1 /**
2  * Code generation 2
3  *
4  * Includes:
5  * - math operators (+ - * / %) and functions (abs, cos, sqrt)
6  * - 'string' functions (strlen, memcpy, memset)
7  * - pointers (address of / dereference)
8  * - struct assign, constructor, destructor
9  *
10  * Compiler implementation of the
11  * $(LINK2 https://www.dlang.org, D programming language).
12  *
13  * Copyright:   Copyright (C) 1984-1998 by Symantec
14  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
15  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
16  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
17  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d)
18  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d
19  */
20 
21 module dmd.backend.cod2;
22 
23 version (SCPP)
24     version = COMPILE;
25 version (MARS)
26     version = COMPILE;
27 
28 version (COMPILE)
29 {
30 
31 import core.stdc.stdio;
32 import core.stdc.stdlib;
33 import core.stdc.string;
34 
35 import dmd.backend.backend;
36 import dmd.backend.cc;
37 import dmd.backend.cdef;
38 import dmd.backend.code;
39 import dmd.backend.code_x86;
40 import dmd.backend.codebuilder;
41 import dmd.backend.mem;
42 import dmd.backend.el;
43 import dmd.backend.exh;
44 import dmd.backend.global;
45 import dmd.backend.oper;
46 import dmd.backend.ty;
47 import dmd.backend.type;
48 import dmd.backend.xmm;
49 
50 extern (C++):
51 
52 nothrow:
53 @safe:
54 
55 extern __gshared CGstate cgstate;
56 extern __gshared ubyte[FLMAX] segfl;
57 extern __gshared bool[FLMAX] stackfl;
58 
59 __gshared int cdcmp_flag;
60 
61 private extern (D) uint mask(uint m) { return 1 << m; }
62 
63 // from divcoeff.c
64 extern (C)
65 {
66     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
67     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
68 }
69 
70 /*******************************
71  * Swap two registers.
72  */
73 
74 private void swap(reg_t *a,reg_t *b)
75 {
76     const tmp = *a;
77     *a = *b;
78     *b = tmp;
79 }
80 
81 
82 /*******************************************
83  * Returns: true if cannot use this EA in anything other than a MOV instruction.
84  */
85 
86 @trusted
87 bool movOnly(const elem *e)
88 {
89     if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar)
90     {
91         const s = e.EV.Vsym;
92         // Fixups for these can only be done with a MOV
93         if (s.Sclass == SC.global || s.Sclass == SC.extern_ ||
94             s.Sclass == SC.comdat || s.Sclass == SC.comdef)
95             return true;
96     }
97     return false;
98 }
99 
100 /********************************
101  * Determine index registers used by addressing mode.
102  * Index is rm of modregrm field.
103  * Returns:
104  *      mask of index registers
105  */
106 
107 regm_t idxregm(const code* c)
108 {
109     const rm = c.Irm;
110     regm_t idxm;
111     if ((rm & 0xC0) != 0xC0)            /* if register is not the destination */
112     {
113         if (I16)
114         {
115             static immutable ubyte[8] idxrm  = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX];
116             idxm = idxrm[rm & 7];
117         }
118         else
119         {
120             if ((rm & 7) == 4)          /* if sib byte                  */
121             {
122                 const sib = c.Isib;
123                 reg_t idxreg = (sib >> 3) & 7;
124                 // scaled index reg
125                 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0));
126 
127                 if ((sib & 7) == 5 && (rm & 0xC0) == 0)
128                 { }
129                 else
130                     idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0));
131             }
132             else
133                 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0));
134         }
135     }
136     return idxm;
137 }
138 
139 
140 /***************************
141  * Gen code for call to floating point routine.
142  */
143 
144 @trusted
145 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib)
146 {
147     if (config.inline8087)
148     {
149         orth87(cdb,e,pretregs);
150         return;
151     }
152 
153     regm_t retregs1,retregs2;
154     if (tybasic(e.EV.E1.Ety) == TYfloat)
155     {
156         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
157         retregs1 = FLOATREGS;
158         retregs2 = FLOATREGS2;
159     }
160     else
161     {
162         if (I32)
163         {   retregs1 = DOUBLEREGS_32;
164             retregs2 = DOUBLEREGS2_32;
165         }
166         else
167         {   retregs1 = mSTACK;
168             retregs2 = DOUBLEREGS_16;
169         }
170     }
171 
172     codelem(cdb,e.EV.E1, &retregs1,false);
173     if (retregs1 & mSTACK)
174         cgstate.stackclean++;
175     scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false);
176     if (retregs1 & mSTACK)
177         cgstate.stackclean--;
178     callclib(cdb, e, clib, pretregs, 0);
179 }
180 
181 /*****************************
182  * Handle operators which are more or less orthogonal
183  * ( + - & | ^ )
184  */
185 
186 @trusted
187 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
188 {
189     //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
190     elem *e1 = e.EV.E1;
191     elem *e2 = e.EV.E2;
192     if (*pretregs == 0)                   // if don't want result
193     {
194         codelem(cdb,e1,pretregs,false); // eval left leaf
195         *pretregs = 0;                          // in case they got set
196         codelem(cdb,e2,pretregs,false);
197         return;
198     }
199 
200     const ty = tybasic(e.Ety);
201     const ty1 = tybasic(e1.Ety);
202 
203     if (tyfloating(ty1))
204     {
205         if (tyvector(ty1) ||
206             config.fpxmmregs && tyxmmreg(ty1) &&
207             !(*pretregs & mST0) &&
208             !(*pretregs & mST01) &&
209             !(ty == TYldouble || ty == TYildouble)  // watch out for shrinkLongDoubleConstantIfPossible()
210            )
211         {
212             orthxmm(cdb,e,pretregs);
213             return;
214         }
215         if (config.inline8087)
216         {
217             orth87(cdb,e,pretregs);
218             return;
219         }
220         if (config.exe & EX_windos)
221         {
222             opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd
223                                                        : CLIB.dsub);
224             return;
225         }
226         else
227         {
228             assert(0);
229         }
230     }
231     if (tyxmmreg(ty1))
232     {
233         orthxmm(cdb,e,pretregs);
234         return;
235     }
236 
237     opcode_t op1, op2;
238     uint mode;
239     __gshared int nest;
240 
241     const ty2 = tybasic(e2.Ety);
242     const e2oper = e2.Eoper;
243     const sz = _tysize[ty];
244     const isbyte = (sz == 1);
245     code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
246     bool test = false;                // assume we destroyed lvalue
247 
248     switch (e.Eoper)
249     {
250         case OPadd:     mode = 0;
251                         op1 = 0x03; op2 = 0x13; break;  /* ADD, ADC     */
252         case OPmin:     mode = 5;
253                         op1 = 0x2B; op2 = 0x1B; break;  /* SUB, SBB     */
254         case OPor:      mode = 1;
255                         op1 = 0x0B; op2 = 0x0B; break;  /* OR , OR      */
256         case OPxor:     mode = 6;
257                         op1 = 0x33; op2 = 0x33; break;  /* XOR, XOR     */
258         case OPand:     mode = 4;
259                         op1 = 0x23; op2 = 0x23;         /* AND, AND     */
260                         if (tyreg(ty1) &&
261                             *pretregs == mPSW)          /* if flags only */
262                         {
263                             test = true;
264                             op1 = 0x85;                 /* TEST         */
265                             mode = 0;
266                         }
267                         break;
268 
269         default:
270             assert(0);
271     }
272     op1 ^= isbyte;                                  /* if byte operation    */
273 
274     // Compute numwords, the number of words to operate on.
275     int numwords = 1;
276     if (!I16)
277     {
278         /* Cannot operate on longs and then do a 'paint' to a far       */
279         /* pointer, because far pointers are 48 bits and longs are 32.  */
280         /* Therefore, numwords can never be 2.                          */
281         assert(!(tyfv(ty1) && tyfv(ty2)));
282         if (sz == 2 * REGSIZE)
283         {
284             numwords++;
285         }
286     }
287     else
288     {
289         /* If ty is a TYfptr, but both operands are long, treat the     */
290         /* operation as a long.                                         */
291         if ((tylong(ty1) || ty1 == TYhptr) &&
292             (tylong(ty2) || ty2 == TYhptr))
293             numwords++;
294     }
295 
296     // Special cases where only flags are set
297     if (test && _tysize[ty1] <= REGSIZE &&
298         (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
299         && !movOnly(e1)
300        )
301     {
302         // Handle the case of (var & const)
303         if (e2.Eoper == OPconst && el_signx32(e2))
304         {
305             code cs = void;
306             cs.Iflags = 0;
307             cs.Irex = 0;
308             getlvalue(cdb,&cs,e1,0);
309             targ_size_t value = e2.EV.Vpointer;
310             if (sz == 2)
311                 value &= 0xFFFF;
312             else if (sz == 4)
313                 value &= 0xFFFFFFFF;
314             reg_t reg;
315             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,&reg))
316             {
317                 code_newreg(&cs, reg);
318                 if (I64 && isbyte && reg >= 4)
319                     cs.Irex |= REX;
320             }
321             else
322             {
323                 if (sz == 8 && !I64)
324                 {
325                     assert(value == cast(int)value);    // sign extend imm32
326                 }
327                 op1 = 0xF7;
328                 cs.IEV2.Vint = cast(targ_int)value;
329                 cs.IFL2 = FLconst;
330             }
331             cs.Iop = op1 ^ isbyte;
332             cs.Iflags |= word | CFpsw;
333             freenode(e1);
334             freenode(e2);
335             cdb.gen(&cs);
336             return;
337         }
338 
339         // Handle (exp & reg)
340         reg_t reg;
341         regm_t retregs;
342         if (isregvar(e2,&retregs,&reg))
343         {
344             code cs = void;
345             cs.Iflags = 0;
346             cs.Irex = 0;
347             getlvalue(cdb,&cs,e1,0);
348             code_newreg(&cs, reg);
349             if (I64 && isbyte && reg >= 4)
350                 cs.Irex |= REX;
351             cs.Iop = op1 ^ isbyte;
352             cs.Iflags |= word | CFpsw;
353             freenode(e1);
354             freenode(e2);
355             cdb.gen(&cs);
356             return;
357         }
358     }
359 
360     code cs = void;
361     cs.Iflags = 0;
362     cs.Irex = 0;
363 
364     // Look for possible uses of LEA
365     if (e.Eoper == OPadd &&
366         !(*pretregs & mPSW) &&                // flags aren't set by LEA
367         !nest &&                              // could cause infinite recursion if e.Ecount
368         (sz == REGSIZE || (I64 && sz == 4)))  // far pointers aren't handled
369     {
370         const rex = (sz == 8) ? REX_W : 0;
371 
372         // Handle the case of (e + &var)
373         int e1oper = e1.Eoper;
374         if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)])))
375                 || // LEA costs too much for simple EAs on older CPUs
376             (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) ||
377             (!I16 && (isscaledindex(e1) || isscaledindex(e2))) ||
378             (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) ||
379             (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) ||
380             (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount &&
381              (e1oper == OPmul || e1oper == OPshl) &&
382              e1.EV.E2.Eoper == OPconst &&
383              ssindex(e1oper,e1.EV.E2.EV.Vuns)
384             ) ||
385             (!I16 && e1.Ecount)
386            )
387         {
388             const inc = e.Ecount != 0;
389             nest += inc;
390             code csx = void;
391             getlvalue(cdb,&csx,e,0);
392             nest -= inc;
393             reg_t regx;
394             allocreg(cdb,pretregs,&regx,ty);
395             csx.Iop = LEA;
396             code_newreg(&csx, regx);
397             cdb.gen(&csx);          // LEA regx,EA
398             if (rex)
399                 code_orrex(cdb.last(), rex);
400             return;
401         }
402 
403         // Handle the case of ((e + c) + e2)
404         if (!I16 &&
405             e1oper == OPadd &&
406             (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) ||
407              e2oper == OPconst && el_signx32(e2)) &&
408             !e1.Ecount
409            )
410         {
411             elem *ebase;
412             elem *edisp;
413             if (e2oper == OPconst && el_signx32(e2))
414             {   edisp = e2;
415                 ebase = e1.EV.E2;
416             }
417             else
418             {   edisp = e1.EV.E2;
419                 ebase = e2;
420             }
421 
422             auto e11 = e1.EV.E1;
423             regm_t retregs = *pretregs & ALLREGS;
424             if (!retregs)
425                 retregs = ALLREGS;
426             int ss = 0;
427             int ss2 = 0;
428 
429             // Handle the case of (((e *  c1) + c2) + e2)
430             // Handle the case of (((e << c1) + c2) + e2)
431             if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
432                 e11.EV.E2.Eoper == OPconst &&
433                 !e11.Ecount
434                )
435             {
436                 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2);
437                 if (e11.Eoper == OPshl)
438                 {
439                     if (co1 > 3)
440                         goto L13;
441                     ss = cast(int)co1;
442                 }
443                 else
444                 {
445                     ss2 = 1;
446                     switch (co1)
447                     {
448                         case  6:        ss = 1;                 break;
449                         case 12:        ss = 1; ss2 = 2;        break;
450                         case 24:        ss = 1; ss2 = 3;        break;
451                         case 10:        ss = 2;                 break;
452                         case 20:        ss = 2; ss2 = 2;        break;
453                         case 40:        ss = 2; ss2 = 3;        break;
454                         case 18:        ss = 3;                 break;
455                         case 36:        ss = 3; ss2 = 2;        break;
456                         case 72:        ss = 3; ss2 = 3;        break;
457                         default:
458                             ss2 = 0;
459                             goto L13;
460                     }
461                 }
462                 freenode(e11.EV.E2);
463                 freenode(e11);
464                 e11 = e11.EV.E1;
465               L13:
466                 { }
467             }
468 
469             reg_t reg11;
470             regm_t regm;
471             if (e11.Eoper == OPvar && isregvar(e11,&regm,&reg11))
472             {
473                 if (tysize(e11.Ety) <= REGSIZE)
474                     retregs = mask(reg11); // only want the LSW
475                 else
476                     retregs = regm;
477                 freenode(e11);
478             }
479             else
480                 codelem(cdb,e11,&retregs,false);
481 
482             regm_t rretregs = ALLREGS & ~retregs & ~mBP;
483             scodelem(cdb,ebase,&rretregs,retregs,true);
484             reg_t reg;
485             {
486                 regm_t sregs = *pretregs & ~rretregs;
487                 if (!sregs)
488                     sregs = ALLREGS & ~rretregs;
489                 allocreg(cdb,&sregs,&reg,ty);
490             }
491 
492             assert((retregs & (retregs - 1)) == 0); // must be only one register
493             assert((rretregs & (rretregs - 1)) == 0); // must be only one register
494 
495             auto  reg1 = findreg(retregs);
496             const reg2 = findreg(rretregs);
497 
498             if (ss2)
499             {
500                 assert(reg != reg2);
501                 if ((reg1 & 7) == BP)
502                 {   static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
503 
504                     // IMUL reg,imm32
505                     cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]);
506                 }
507                 else
508                 {   // LEA reg,[reg1*ss][reg1]
509                     cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7));
510                     if (reg1 & 8)
511                         code_orrex(cdb.last(), REX_X | REX_B);
512                 }
513                 if (rex)
514                     code_orrex(cdb.last(), rex);
515                 reg1 = reg;
516                 ss = ss2;                               // use *2 for scale
517             }
518 
519             cs.Iop = LEA;                      // LEA reg,c[reg1*ss][reg2]
520             cs.Irm = modregrm(2,reg & 7,4);
521             cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7);
522             assert(reg2 != BP);
523             cs.Iflags = CFoff;
524             cs.Irex = cast(ubyte)rex;
525             if (reg & 8)
526                 cs.Irex |= REX_R;
527             if (reg1 & 8)
528                 cs.Irex |= REX_X;
529             if (reg2 & 8)
530                 cs.Irex |= REX_B;
531             cs.IFL1 = FLconst;
532             cs.IEV1.Vsize_t = edisp.EV.Vuns;
533 
534             freenode(edisp);
535             freenode(e1);
536             cdb.gen(&cs);
537             fixresult(cdb,e,mask(reg),pretregs);
538             return;
539         }
540     }
541 
542     regm_t posregs = (isbyte) ? BYTEREGS : (mES | allregs);
543     regm_t retregs = *pretregs & posregs;
544     if (retregs == 0)                   /* if no return regs speced     */
545                                         /* (like if wanted flags only)  */
546         retregs = ALLREGS & posregs;    // give us some
547 
548     if (ty1 == TYhptr || ty2 == TYhptr)
549     {     /* Generate code for add/subtract of huge pointers.
550            No attempt is made to generate very good code.
551          */
552         retregs = (retregs & mLSW) | mDX;
553         regm_t rretregs;
554         if (ty1 == TYhptr)
555         {   // hptr +- long
556             rretregs = mLSW & ~(retregs | regcon.mvar);
557             if (!rretregs)
558                 rretregs = mLSW;
559             rretregs |= mCX;
560             codelem(cdb,e1,&rretregs,0);
561             retregs &= ~rretregs;
562             if (!(retregs & mLSW))
563                 retregs |= mLSW & ~rretregs;
564 
565             scodelem(cdb,e2,&retregs,rretregs,true);
566         }
567         else
568         {   // long + hptr
569             codelem(cdb,e1,&retregs,0);
570             rretregs = (mLSW | mCX) & ~retregs;
571             if (!(rretregs & mLSW))
572                 rretregs |= mLSW;
573             scodelem(cdb,e2,&rretregs,retregs,true);
574         }
575         getregs(cdb,rretregs | retregs);
576         const mreg = DX;
577         const lreg = findreglsw(retregs);
578         if (e.Eoper == OPmin)
579         {   // negate retregs
580             cdb.gen2(0xF7,modregrm(3,3,mreg));     // NEG mreg
581             cdb.gen2(0xF7,modregrm(3,3,lreg));     // NEG lreg
582             code_orflag(cdb.last(),CFpsw);
583             cdb.genc2(0x81,modregrm(3,3,mreg),0);  // SBB mreg,0
584         }
585         const lrreg = findreglsw(rretregs);
586         genregs(cdb,0x03,lreg,lrreg);              // ADD lreg,lrreg
587         code_orflag(cdb.last(),CFpsw);
588         genmovreg(cdb,lrreg,CX);      // MOV lrreg,CX
589         cdb.genc2(0x81,modregrm(3,2,mreg),0);      // ADC mreg,0
590         genshift(cdb);                             // MOV CX,offset __AHSHIFT
591         cdb.gen2(0xD3,modregrm(3,4,mreg));         // SHL mreg,CL
592         genregs(cdb,0x03,mreg,lrreg);              // ADD mreg,MSREG(h)
593         fixresult(cdb,e,retregs,pretregs);
594         return;
595     }
596 
597     regm_t rretregs;
598     reg_t reg;
599     if (_tysize[ty1] > REGSIZE && numwords == 1)
600     {     /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */
601 
602         debug
603         if (_tysize[ty2] != REGSIZE)
604         {
605             printf("e = %p, e.Eoper = %s e1.Ety = %s e2.Ety = %s\n", e, oper_str(e.Eoper), tym_str(ty1), tym_str(ty2));
606             elem_print(e);
607         }
608 
609         assert(_tysize[ty2] == REGSIZE);
610 
611         /* Watch out for the case here where you are going to OP reg,EA */
612         /* and both the reg and EA use ES! Prevent this by forcing      */
613         /* reg into the regular registers.                              */
614         if ((e2oper == OPind ||
615             (e2oper == OPvar && el_fl(e2) == FLfardata)) &&
616             !e2.Ecount)
617         {
618             retregs = ALLREGS;
619         }
620 
621         codelem(cdb,e1,&retregs,test != 0);
622         reg = findreglsw(retregs);      /* reg is the register with the offset*/
623     }
624     else
625     {
626         regm_t regm;
627 
628         /* if (tyword + TYfptr) */
629         if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE)
630         {   retregs = ~*pretregs & ALLREGS;
631 
632             /* if retregs doesn't have any regs in it that aren't reg vars */
633             if ((retregs & ~regcon.mvar) == 0)
634                 retregs |= mAX;
635         }
636         else if (numwords == 2 && retregs & mES)
637             retregs = (retregs | mMSW) & ALLREGS;
638 
639         // Determine if we should swap operands, because
640         //      mov     EAX,x
641         //      add     EAX,reg
642         // is faster than:
643         //      mov     EAX,reg
644         //      add     EAX,x
645         else if (e2oper == OPvar &&
646                  e1.Eoper == OPvar &&
647                  e.Eoper != OPmin &&
648                  isregvar(e1,&regm,null) &&
649                  regm != retregs &&
650                  _tysize[ty1] == _tysize[ty2])
651         {
652             elem *es = e1;
653             e1 = e2;
654             e2 = es;
655         }
656         codelem(cdb,e1,&retregs,test != 0);         // eval left leaf
657         reg = findreg(retregs);
658     }
659     reg_t rreg;
660     int rval;
661     targ_size_t i;
662     switch (e2oper)
663     {
664         case OPind:                                 /* if addressing mode   */
665             if (!e2.Ecount)                         /* if not CSE           */
666                     goto L1;                        /* try OP reg,EA        */
667             goto default;
668 
669         default:                                    /* operator node        */
670         L2:
671             rretregs = ALLREGS & ~retregs;
672             /* Be careful not to do arithmetic on ES        */
673             if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW)
674                 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs;
675             else if (isbyte)
676                 rretregs &= BYTEREGS;
677 
678             scodelem(cdb,e2,&rretregs,retregs,true);       // get rvalue
679             rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs);
680             if (!test)
681                 getregs(cdb,retregs);          // we will trash these regs
682             if (numwords == 1)                              /* ADD reg,rreg */
683             {
684                 /* reverse operands to avoid moving around the segment value */
685                 if (_tysize[ty2] > REGSIZE)
686                 {
687                     getregs(cdb,rretregs);
688                     genregs(cdb,op1,rreg,reg);
689                     retregs = rretregs;     // reverse operands
690                 }
691                 else
692                 {
693                     genregs(cdb,op1,reg,rreg);
694                     if (!I16 && *pretregs & mPSW)
695                         cdb.last().Iflags |= word;
696                 }
697                 if (I64 && sz == 8)
698                     code_orrex(cdb.last(), REX_W);
699                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
700                     code_orrex(cdb.last(), REX);
701             }
702             else /* numwords == 2 */                /* ADD lsreg,lsrreg     */
703             {
704                 reg = findreglsw(retregs);
705                 rreg = findreglsw(rretregs);
706                 genregs(cdb,op1,reg,rreg);
707                 if (e.Eoper == OPadd || e.Eoper == OPmin)
708                     code_orflag(cdb.last(),CFpsw);
709                 reg = findregmsw(retregs);
710                 rreg = findregmsw(rretregs);
711                 if (!(e2oper == OPu16_32 && // if second operand is 0
712                       (op2 == 0x0B || op2 == 0x33)) // and OR or XOR
713                    )
714                     genregs(cdb,op2,reg,rreg);        // ADC msreg,msrreg
715             }
716             break;
717 
718         case OPrelconst:
719             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
720                 goto default;
721             if (sz != REGSIZE)
722                 goto L2;
723             if (segfl[el_fl(e2)] != 3)              /* if not in data segment */
724                 goto L2;
725             if (evalinregister(e2))
726                 goto L2;
727             cs.IEV2.Voffset = e2.EV.Voffset;
728             cs.IEV2.Vsym = e2.EV.Vsym;
729             cs.Iflags |= CFoff;
730             i = 0;                          /* no INC or DEC opcode         */
731             rval = 0;
732             goto L3;
733 
734         case OPconst:
735             if (tyfv(ty2))
736                 goto L2;
737             if (numwords == 1)
738             {
739                 if (!el_signx32(e2))
740                     goto L2;
741                 i = e2.EV.Vpointer;
742                 if (word)
743                 {
744                     if (!(*pretregs & mPSW) &&
745                         config.flags4 & CFG4speed &&
746                         (e.Eoper == OPor || e.Eoper == OPxor || test ||
747                          (e1.Eoper != OPvar && e1.Eoper != OPind)))
748                     {   word = 0;
749                         i &= 0xFFFF;
750                     }
751                 }
752                 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg);
753                 cs.IEV2.Vsize_t = i;
754             L3:
755                 if (!test)
756                     getregs(cdb,retregs);          // we will trash these regs
757                 op1 ^= isbyte;
758                 cs.Iflags |= word;
759                 if (rval)
760                 {   cs.Iop = op1 ^ 2;
761                     mode = rreg;
762                 }
763                 else
764                     cs.Iop = 0x81;
765                 cs.Irm = modregrm(3,mode&7,reg&7);
766                 if (mode & 8)
767                     cs.Irex |= REX_R;
768                 if (reg & 8)
769                     cs.Irex |= REX_B;
770                 if (I64 && sz == 8)
771                     cs.Irex |= REX_W;
772                 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4)))
773                     cs.Irex |= REX;
774                 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2));
775                 /* Modify instruction for special cases */
776                 switch (e.Eoper)
777                 {
778                     case OPadd:
779                     {
780                         int iop;
781 
782                         if (i == 1)
783                             iop = 0;                    /* INC reg      */
784                         else if (i == -1)
785                             iop = 8;                    /* DEC reg      */
786                         else
787                             break;
788                         cs.Iop = (0x40 | iop | reg) ^ isbyte;
789                         if ((isbyte && *pretregs & mPSW) || I64)
790                         {
791                             cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop);
792                             cs.Iop = 0xFF;
793                         }
794                         break;
795                     }
796 
797                     case OPand:
798                         if (test)
799                             cs.Iop = rval ? op1 : 0xF7; // TEST
800                         break;
801 
802                     default:
803                         break;
804                 }
805                 if (*pretregs & mPSW)
806                     cs.Iflags |= CFpsw;
807                 cs.Iop ^= isbyte;
808                 cdb.gen(&cs);
809                 cs.Iflags &= ~CFpsw;
810             }
811             else if (numwords == 2)
812             {
813                 getregs(cdb,retregs);
814                 reg = findregmsw(retregs);
815                 const lsreg = findreglsw(retregs);
816                 cs.Iop = 0x81;
817                 cs.Irm = modregrm(3,mode,lsreg);
818                 cs.IFL2 = FLconst;
819                 const msw = cast(targ_int)MSREG(e2.EV.Vllong);
820                 cs.IEV2.Vint = e2.EV.Vlong;
821                 switch (e.Eoper)
822                 {
823                     case OPadd:
824                     case OPmin:
825                         cs.Iflags |= CFpsw;
826                         break;
827 
828                     default:
829                         break;
830                 }
831                 cdb.gen(&cs);
832                 cs.Iflags &= ~CFpsw;
833 
834                 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg);
835                 cs.IEV2.Vint = msw;
836                 if (e.Eoper == OPadd)
837                     cs.Irm |= modregrm(0,2,0);      /* ADC          */
838                 cdb.gen(&cs);
839             }
840             else
841                 assert(0);
842             freenode(e2);
843             break;
844 
845         case OPvar:
846             if (movOnly(e2))
847                 goto L2;
848         L1:
849             if (tyfv(ty2))
850                 goto L2;
851             if (!test)
852                 getregs(cdb,retregs);          // we will trash these regs
853             loadea(cdb,e2,&cs,op1,
854                    ((numwords == 2) ? findreglsw(retregs) : reg),
855                    0,retregs,retregs);
856             if (!I16 && word)
857             {   if (*pretregs & mPSW)
858                     code_orflag(cdb.last(),word);
859                 else
860                     cdb.last().Iflags &= ~cast(int)word;
861             }
862             else if (numwords == 2)
863             {
864                 if (e.Eoper == OPadd || e.Eoper == OPmin)
865                     code_orflag(cdb.last(),CFpsw);
866                 reg = findregmsw(retregs);
867                 if (!OTleaf(e2.Eoper))
868                 {   getlvalue_msw(&cs);
869                     cs.Iop = op2;
870                     NEWREG(cs.Irm,reg);
871                     cdb.gen(&cs);                 // ADC reg,data+2
872                 }
873                 else
874                     loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0);
875             }
876             else if (I64 && sz == 8)
877                 code_orrex(cdb.last(), REX_W);
878             freenode(e2);
879             break;
880     }
881 
882     if (sz <= REGSIZE && *pretregs & mPSW)
883     {
884         /* If the expression is (_tls_array + ...), then the flags are not set
885          * since the linker may rewrite these instructions into something else.
886          */
887         if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar)
888         {
889             const s = e1.EV.Vsym;
890             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
891             {
892                 goto L7;                        // don't assume flags are set
893             }
894         }
895         code_orflag(cdb.last(),CFpsw);
896         *pretregs &= ~mPSW;                    // flags already set
897     L7: { }
898     }
899     fixresult(cdb,e,retregs,pretregs);
900 }
901 
902 
903 /*****************************
904  * Handle multiply.
905  */
906 
907 @trusted
908 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
909 {
910     //printf("cdmul()\n");
911     elem *e1 = e.EV.E1;
912     elem *e2 = e.EV.E2;
913     if (*pretregs == 0)                         // if don't want result
914     {
915         codelem(cdb,e1,pretregs,false);      // eval left leaf
916         *pretregs = 0;                          // in case they got set
917         codelem(cdb,e2,pretregs,false);
918         return;
919     }
920 
921     //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
922     const tyml = tybasic(e1.Ety);
923     const ty = tybasic(e.Ety);
924     const oper = e.Eoper;
925 
926     if (tyfloating(tyml))
927     {
928         if (tyvector(tyml) ||
929             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
930             !(*pretregs & mST0) &&
931             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
932             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
933             !(ty == TYllong)  //   or passing to function through integer register
934            )
935         {
936             orthxmm(cdb,e,pretregs);
937             return;
938         }
939         if (config.exe & EX_posix)
940             orth87(cdb,e,pretregs);
941         else
942             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
943 
944         return;
945     }
946 
947     if (tyxmmreg(tyml))
948     {
949         orthxmm(cdb,e,pretregs);
950         return;
951     }
952 
953     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if signed operation, 0 if unsigned
954     const isbyte = tybyte(e.Ety) != 0;
955     const sz = _tysize[tyml];
956     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
957     const uint grex = rex << 16;
958     const OPER opunslng = I16 ? OPu16_32 : OPu32_64;
959 
960     code cs = void;
961     cs.Iflags = 0;
962     cs.Irex = 0;
963 
964     switch (e2.Eoper)
965     {
966         case OPu16_32:
967         case OPs16_32:
968         case OPu32_64:
969         case OPs32_64:
970         {
971             if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper ||
972                 e1.Ecount || e2.Ecount)
973                 goto default;
974             const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5;
975             regm_t retregsx = mAX;
976             codelem(cdb,e1.EV.E1,&retregsx,false);    // eval left leaf
977             if (e2.EV.E1.Eoper == OPvar ||
978                 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount)
979                )
980             {
981                 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX);
982             }
983             else
984             {
985                 regm_t rretregsx = ALLREGS & ~mAX;
986                 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue
987                 getregs(cdb,mAX | mDX);
988                 const rregx = findreg(rretregsx);
989                 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx
990             }
991             freenode(e.EV.E1);
992             freenode(e2);
993             fixresult(cdb,e,mAX | mDX,pretregs);
994             return;
995         }
996 
997         case OPconst:
998             const e2factor = cast(targ_size_t)el_tolong(e2);
999 
1000             // Multiply by a constant
1001             if (I32 && sz == REGSIZE * 2)
1002             {
1003                 /*  if (msw)
1004                       IMUL    EDX,EDX,lsw
1005                       IMUL    reg,EAX,msw
1006                       ADD     reg,EDX
1007                     else
1008                       IMUL    reg,EDX,lsw
1009                     MOV       EDX,lsw
1010                     MUL       EDX
1011                     ADD       EDX,reg
1012                  */
1013                 regm_t retregs = mAX | mDX;
1014                 codelem(cdb,e1,&retregs,false);    // eval left leaf
1015                 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX));
1016                 getregs(cdb,mDX | mAX);
1017 
1018                 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1019                 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1020 
1021                 if (msw)
1022                 {
1023                     genmulimm(cdb,DX,DX,lsw);           // IMUL EDX,EDX,lsw
1024                     genmulimm(cdb,reg,AX,msw);          // IMUL reg,EAX,msw
1025                     cdb.gen2(0x03,modregrm(3,reg,DX));  // ADD  reg,EAX
1026                 }
1027                 else
1028                     genmulimm(cdb,reg,DX,lsw);          // IMUL reg,EDX,lsw
1029 
1030                 movregconst(cdb,DX,lsw,0);              // MOV EDX,lsw
1031                 getregs(cdb,mDX);
1032                 cdb.gen2(0xF7,modregrm(3,4,DX));        // MUL EDX
1033                 cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD EDX,reg
1034 
1035                 const resregx = mDX | mAX;
1036                 freenode(e2);
1037                 fixresult(cdb,e,resregx,pretregs);
1038                 return;
1039             }
1040 
1041 
1042             const int pow2 = ispow2(e2factor);
1043 
1044             if (sz > REGSIZE || !el_signx32(e2))
1045                 goto default;
1046 
1047             if (config.target_cpu >= TARGET_80286)
1048             {
1049                 if (I32 || I64)
1050                 {
1051                     // See if we can use an LEA instruction
1052                     int ss;
1053                     int ss2 = 0;
1054                     int shift;
1055 
1056                     switch (e2factor)
1057                     {
1058                         case 12:    ss = 1; ss2 = 2; goto L4;
1059                         case 24:    ss = 1; ss2 = 3; goto L4;
1060 
1061                         case 6:
1062                         case 3:     ss = 1; goto L4;
1063 
1064                         case 20:    ss = 2; ss2 = 2; goto L4;
1065                         case 40:    ss = 2; ss2 = 3; goto L4;
1066 
1067                         case 10:
1068                         case 5:     ss = 2; goto L4;
1069 
1070                         case 36:    ss = 3; ss2 = 2; goto L4;
1071                         case 72:    ss = 3; ss2 = 3; goto L4;
1072 
1073                         case 18:
1074                         case 9:     ss = 3; goto L4;
1075 
1076                         L4:
1077                         {
1078                             regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13);
1079                             if (!resreg)
1080                                 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13);
1081 
1082                             codelem(cdb,e.EV.E1,&resreg,false);
1083                             getregs(cdb,resreg);
1084                             reg_t reg = findreg(resreg);
1085 
1086                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1087                                         modregxrmx(ss,reg,reg));        // LEA reg,[ss*reg][reg]
1088                             assert((reg & 7) != BP);
1089                             if (ss2)
1090                             {
1091                                 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1092                                                modregxrm(ss2,reg,5));
1093                                 cdb.last().IFL1 = FLconst;
1094                                 cdb.last().IEV1.Vint = 0;               // LEA reg,0[ss2*reg]
1095                             }
1096                             else if (!(e2factor & 1))                   // if even factor
1097                             {
1098                                 genregs(cdb,0x03,reg,reg);              // ADD reg,reg
1099                                 code_orrex(cdb.last(),rex);
1100                             }
1101                             freenode(e2);
1102                             fixresult(cdb,e,resreg,pretregs);
1103                             return;
1104                         }
1105                         case 37:
1106                         case 74:    shift = 2;
1107                                     goto L5;
1108                         case 13:
1109                         case 26:    shift = 0;
1110                                     goto L5;
1111                         L5:
1112                         {
1113                             regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1114                             regm_t resreg = *pretregs & (ALLREGS | mBP);
1115                             if (!resreg)
1116                                 resreg = retregs;
1117 
1118                             // Don't use EBP
1119                             resreg &= ~(mBP | mR13);
1120                             if (!resreg)
1121                                 resreg = retregs;
1122                             reg_t reg;
1123                             allocreg(cdb,&resreg,&reg,TYint);
1124 
1125                             regm_t sregm = (ALLREGS & ~mR13) & ~resreg;
1126                             codelem(cdb,e.EV.E1,&sregm,false);
1127                             uint sreg = findreg(sregm);
1128                             getregs(cdb,resreg | sregm);
1129                             assert((sreg & 7) != BP);
1130                             assert((reg & 7) != BP);
1131                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1132                                                   modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1133                             if (shift)
1134                                 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1135                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1136                                                   modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1137                             if (!(e2factor & 1))                                  // if even factor
1138                             {
1139                                 genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1140                                 code_orrex(cdb.last(),rex);
1141                             }
1142                             freenode(e2);
1143                             fixresult(cdb,e,resreg,pretregs);
1144                             return;
1145                         }
1146 
1147                         default:
1148                             break;
1149                     }
1150                 }
1151 
1152                 regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1153                 regm_t resreg = *pretregs & (ALLREGS | mBP);
1154                 if (!resreg)
1155                     resreg = retregs;
1156 
1157                 scodelem(cdb,e.EV.E1,&retregs,0,true);     // eval left leaf
1158                 const regx = findreg(retregs);
1159                 reg_t rreg;
1160                 allocreg(cdb,&resreg,&rreg,e.Ety);
1161 
1162                 // IMUL regx,imm16
1163                 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor);
1164                 freenode(e2);
1165                 fixresult(cdb,e,resreg,pretregs);
1166                 return;
1167             }
1168             goto default;
1169 
1170         case OPind:
1171             if (!e2.Ecount)                        // if not CSE
1172                     goto case OPvar;                        // try OP reg,EA
1173             goto default;
1174 
1175         default:                                    // OPconst and operators
1176             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1177             if (sz <= REGSIZE)
1178             {
1179                 regm_t retregs = mAX;
1180                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1181                 regm_t rretregs = isbyte ? BYTEREGS & ~mAX
1182                                          : ALLREGS & ~(mAX|mDX);
1183                 scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1184                 getregs(cdb,mAX | mDX);     // trash these regs
1185                 reg_t rreg = findreg(rretregs);
1186                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg
1187                 if (I64 && isbyte && rreg >= 4)
1188                     code_orrex(cdb.last(), REX);
1189                 fixresult(cdb,e,mAX,pretregs);
1190                 return;
1191             }
1192             else if (sz == 2 * REGSIZE)
1193             {
1194                 regm_t retregs = mDX | mAX;
1195                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1196                 if (config.target_cpu >= TARGET_PentiumPro)
1197                 {
1198                     regm_t rretregs = allregs & ~retregs;           // second arg
1199                     scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue
1200                     regm_t rlo = findreglsw(rretregs);
1201                     regm_t rhi = findregmsw(rretregs);
1202                     /*  IMUL    rhi,EAX
1203                         IMUL    EDX,rlo
1204                         ADD     rhi,EDX
1205                         MUL     rlo
1206                         ADD     EDX,rhi
1207                      */
1208                     getregs(cdb,mAX|mDX|mask(rhi));
1209                     cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1210                     cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1211                     cdb.gen2(0x03,modregrm(3,rhi,DX));
1212                     cdb.gen2(0xF7,modregrm(3,4,rlo));
1213                     cdb.gen2(0x03,modregrm(3,DX,rhi));
1214                     fixresult(cdb,e,mDX|mAX,pretregs);
1215                     return;
1216                 }
1217                 else
1218                 {
1219                     regm_t rretregs = mCX | mBX;           // second arg
1220                     scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1221                     callclib(cdb,e,CLIB.lmul,pretregs,0);
1222                     return;
1223                 }
1224             }
1225             assert(0);
1226 
1227         case OPvar:
1228             if (!I16 && sz <= REGSIZE)
1229             {
1230                 if (sz > 1)        // no byte version
1231                 {
1232                     // Generate IMUL r32,r/m32
1233                     regm_t retregs = *pretregs & (ALLREGS | mBP);
1234                     if (!retregs)
1235                         retregs = ALLREGS;
1236                     codelem(cdb,e1,&retregs,false);        // eval left leaf
1237                     regm_t resreg = retregs;
1238                     loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs);
1239                     freenode(e2);
1240                     fixresult(cdb,e,resreg,pretregs);
1241                     return;
1242                 }
1243             }
1244             else
1245             {
1246                 if (sz == 2 * REGSIZE)
1247                 {
1248                     if (e.EV.E1.Eoper != opunslng ||
1249                         e1.Ecount)
1250                         goto default;            // have to handle it with codelem()
1251 
1252                     regm_t retregs = ALLREGS & ~(mAX | mDX);
1253                     codelem(cdb,e1.EV.E1,&retregs,false);    // eval left leaf
1254                     const reg = findreg(retregs);
1255                     getregs(cdb,mAX);
1256                     genmovreg(cdb,AX,reg);            // MOV AX,reg
1257                     loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX);  // MUL EA+2
1258                     getregs(cdb,retregs);
1259                     cdb.gen1(0x90 + reg);                          // XCHG AX,reg
1260                     getregs(cdb,mAX | mDX);
1261                     if ((cs.Irm & 0xC0) == 0xC0)            // if EA is a register
1262                         loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA
1263                     else
1264                     {   getlvalue_lsw(&cs);
1265                         cdb.gen(&cs);                       // MUL EA
1266                     }
1267                     cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD DX,reg
1268 
1269                     freenode(e1);
1270                     fixresult(cdb,e,mAX | mDX,pretregs);
1271                     return;
1272                 }
1273                 assert(sz <= REGSIZE);
1274             }
1275 
1276             // loadea() handles CWD or CLR DX for divides
1277             regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX;
1278             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1279             loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0,
1280                    mAX,
1281                    mAX | mDX);
1282             freenode(e2);
1283             fixresult(cdb,e,mAX,pretregs);
1284             return;
1285     }
1286     assert(0);
1287 }
1288 
1289 
1290 /*****************************
1291  * Handle divide, modulo and remquo.
1292  * Note that modulo isn't defined for doubles.
1293  */
1294 
1295 @trusted
1296 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1297 {
1298     //printf("cddiv()\n");
1299     elem *e1 = e.EV.E1;
1300     elem *e2 = e.EV.E2;
1301     if (*pretregs == 0)                         // if don't want result
1302     {
1303         codelem(cdb,e1,pretregs,false);      // eval left leaf
1304         *pretregs = 0;                          // in case they got set
1305         codelem(cdb,e2,pretregs,false);
1306         return;
1307     }
1308 
1309     //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1310     const tyml = tybasic(e1.Ety);
1311     const ty = tybasic(e.Ety);
1312     const oper = e.Eoper;
1313 
1314     if (tyfloating(tyml))
1315     {
1316         if (tyvector(tyml) ||
1317             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
1318             !(*pretregs & mST0) &&
1319             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
1320             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
1321             !(ty == TYllong)  //   or passing to function through integer register
1322            )
1323         {
1324             orthxmm(cdb,e,pretregs);
1325             return;
1326         }
1327         if (config.exe & EX_posix)
1328             orth87(cdb,e,pretregs);
1329         else
1330             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
1331 
1332         return;
1333     }
1334 
1335     if (tyxmmreg(tyml))
1336     {
1337         orthxmm(cdb,e,pretregs);
1338         return;
1339     }
1340 
1341     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if uint operation, 0 if not
1342     const isbyte = tybyte(e.Ety) != 0;
1343     const sz = _tysize[tyml];
1344     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
1345     const uint grex = rex << 16;
1346 
1347     code cs = void;
1348     cs.Iflags = 0;
1349     cs.IFL2 = 0;
1350     cs.Irex = 0;
1351 
1352     switch (e2.Eoper)
1353     {
1354         case OPconst:
1355             auto d = cast(targ_size_t)el_tolong(e2);
1356             bool neg = false;
1357             const e2factor = d;
1358             if (!uns && cast(targ_llong)e2factor < 0)
1359             {   neg = true;
1360                 d = -d;
1361             }
1362 
1363             // Signed divide by a constant
1364             if ((d & (d - 1)) &&
1365                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1366                 config.flags4 & CFG4speed && !uns)
1367             {
1368                 /* R1 / 10
1369                  *
1370                  *  MOV     EAX,m
1371                  *  IMUL    R1
1372                  *  MOV     EAX,R1
1373                  *  SAR     EAX,31
1374                  *  SAR     EDX,shpost
1375                  *  SUB     EDX,EAX
1376                  *  IMUL    EAX,EDX,d
1377                  *  SUB     R1,EAX
1378                  *
1379                  * EDX = quotient
1380                  * R1 = remainder
1381                  */
1382                 assert(sz == 4 || sz == 8);
1383 
1384                 ulong m;
1385                 int shpost;
1386                 const int N = sz * 8;
1387                 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1388 
1389                 regm_t regm = allregs & ~(mAX | mDX);
1390                 codelem(cdb,e1,&regm,false);       // eval left leaf
1391                 const reg_t reg = findreg(regm);
1392                 getregs(cdb,regm | mDX | mAX);
1393 
1394                 /* Algorithm 5.2
1395                  * if m>=2**(N-1)
1396                  *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1397                  * else
1398                  *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1399                  * if (neg)
1400                  *    q = -q
1401                  */
1402                 const bool mgt = mhighbit || m >= (1UL << (N - 1));
1403                 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1404                 cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL R1
1405                 if (mgt)
1406                     cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,R1
1407                 getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1408                 genmovreg(cdb, AX, reg);                   // MOV EAX,R1
1409                 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1410                 if (shpost)
1411                     cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1412                 reg_t r3;
1413                 if (neg && oper == OPdiv)
1414                 {
1415                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1416                     r3 = AX;
1417                 }
1418                 else
1419                 {
1420                     cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1421                     r3 = DX;
1422                 }
1423 
1424                 // r3 is quotient
1425                 regm_t resregx;
1426                 switch (oper)
1427                 {   case OPdiv:
1428                         resregx = mask(r3);
1429                         break;
1430 
1431                     case OPmod:
1432                         assert(reg != AX && r3 == DX);
1433                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1434                         {
1435                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1436                         }
1437                         else
1438                         {
1439                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1440                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1441                             getregsNoSave(mAX);                             // EAX no longer contains 'd'
1442                         }
1443                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1444                         resregx = regm;
1445                         break;
1446 
1447                     case OPremquo:
1448                         assert(reg != AX && r3 == DX);
1449                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1450                         {
1451                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);     // IMUL EAX,EDX,d
1452                         }
1453                         else
1454                         {
1455                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1456                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1457                         }
1458                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1459                         genmovreg(cdb, AX, r3);                // MOV EAX,r3
1460                         if (neg)
1461                             cdb.gen2(0xF7,grex | modregrm(3,3,AX));         // NEG EAX
1462                         genmovreg(cdb, DX, reg);               // MOV EDX,R1
1463                         resregx = mDX | mAX;
1464                         break;
1465 
1466                     default:
1467                         assert(0);
1468                 }
1469                 freenode(e2);
1470                 fixresult(cdb,e,resregx,pretregs);
1471                 return;
1472             }
1473 
1474             // Unsigned divide by a constant
1475             if (e2factor > 2 && (e2factor & (e2factor - 1)) &&
1476                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1477                 config.flags4 & CFG4speed && uns)
1478             {
1479                 assert(sz == 4 || sz == 8);
1480 
1481                 reg_t r3;
1482                 regm_t regm;
1483                 reg_t reg;
1484                 ulong m;
1485                 int shpre;
1486                 int shpost;
1487                 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1488                 {
1489                     /* t1 = MULUH(m, n)
1490                      * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1491                      *   MOV   EAX,reg
1492                      *   MOV   EDX,m
1493                      *   MUL   EDX
1494                      *   MOV   EAX,reg
1495                      *   SUB   EAX,EDX
1496                      *   SHR   EAX,1
1497                      *   LEA   R3,[EAX][EDX]
1498                      *   SHR   R3,shpost-1
1499                      */
1500                     assert(shpre == 0);
1501 
1502                     regm = allregs & ~(mAX | mDX);
1503                     codelem(cdb,e1,&regm,false);       // eval left leaf
1504                     reg = findreg(regm);
1505                     getregs(cdb,mAX | mDX);
1506                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1507                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1508                     getregs(cdb,regm | mDX | mAX);
1509                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1510                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1511                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1512                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1513                     regm_t regm3 = allregs;
1514                     if (oper == OPmod || oper == OPremquo)
1515                     {
1516                         regm3 &= ~regm;
1517                         if (oper == OPremquo || !el_signx32(e2))
1518                             regm3 &= ~mAX;
1519                     }
1520                     allocreg(cdb,&regm3,&r3,TYint);
1521                     cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1522                     if (shpost != 1)
1523                         cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1524                 }
1525                 else
1526                 {
1527                     /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1528                      *   SHR   EAX,shpre
1529                      *   MOV   reg,m
1530                      *   MUL   reg
1531                      *   SHR   EDX,shpost
1532                      */
1533                     regm = mAX;
1534                     if (oper == OPmod || oper == OPremquo)
1535                         regm = allregs & ~(mAX|mDX);
1536                     codelem(cdb,e1,&regm,false);       // eval left leaf
1537                     reg = findreg(regm);
1538 
1539                     if (reg != AX)
1540                     {
1541                         getregs(cdb,mAX);
1542                         genmovreg(cdb,AX,reg);                 // MOV EAX,reg
1543                     }
1544                     if (shpre)
1545                     {
1546                         getregs(cdb,mAX);
1547                         cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1548                     }
1549                     getregs(cdb,mDX);
1550                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1551                     getregs(cdb,mDX | mAX);
1552                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1553                     if (shpost)
1554                         cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1555                     r3 = DX;
1556                 }
1557 
1558                 regm_t resreg;
1559                 switch (oper)
1560                 {   case OPdiv:
1561                         // r3 = quotient
1562                         resreg = mask(r3);
1563                         break;
1564 
1565                     case OPmod:
1566                         /* reg = original value
1567                          * r3  = quotient
1568                          */
1569                         assert(!(regm & mAX));
1570                         if (el_signx32(e2))
1571                         {
1572                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1573                         }
1574                         else
1575                         {
1576                             assert(!(mask(r3) & mAX));
1577                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1578                             getregs(cdb,mAX);
1579                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1580                         }
1581                         getregs(cdb,regm);
1582                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1583                         resreg = regm;
1584                         break;
1585 
1586                     case OPremquo:
1587                         /* reg = original value
1588                          * r3  = quotient
1589                          */
1590                         assert(!(mask(r3) & (mAX|regm)));
1591                         assert(!(regm & mAX));
1592                         if (el_signx32(e2))
1593                         {
1594                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1595                         }
1596                         else
1597                         {
1598                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor
1599                             getregs(cdb,mAX);
1600                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1601                         }
1602                         getregs(cdb,regm);
1603                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1604                         genmovreg(cdb, AX, r3);              // MOV EAX,r3
1605                         genmovreg(cdb, DX, reg);             // MOV EDX,reg
1606                         resreg = mDX | mAX;
1607                         break;
1608 
1609                     default:
1610                         assert(0);
1611                 }
1612                 freenode(e2);
1613                 fixresult(cdb,e,resreg,pretregs);
1614                 return;
1615             }
1616 
1617             const int pow2 = ispow2(e2factor);
1618 
1619             // Register pair signed divide by power of 2
1620             if (sz == REGSIZE * 2 &&
1621                 (oper == OPdiv) && !uns &&
1622                 pow2 != -1 &&
1623                 I32 // not set up for I64 cent yet
1624                )
1625             {
1626                 regm_t retregs = mDX | mAX;
1627                 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW))
1628                     retregs = (retregs & mMSW) | (BYTEREGS & mLSW);  // because of SETZ
1629 
1630                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1631                 const rhi = findregmsw(retregs);
1632                 const rlo = findreglsw(retregs);
1633                 freenode(e2);
1634                 getregs(cdb,retregs);
1635 
1636                 if (pow2 < 32)
1637                 {
1638                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1639 
1640                     genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
1641                     if (pow2 == 1)
1642                         cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
1643                     else
1644                     {
1645                         cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
1646                         cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
1647                     }
1648                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
1649                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
1650                     cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
1651                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
1652                 }
1653                 else if (pow2 == 32)
1654                 {
1655                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1656 
1657                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1658                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1659                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
1660                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
1661                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1662                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1663                 }
1664                 else if (pow2 < 63)
1665                 {
1666                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1667                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1668 
1669                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1670                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1671                     cdb.genmovreg(r2,r1);                                         // MOV r2,r1
1672 
1673                     if (pow2 == 33)
1674                     {
1675                         cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
1676                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
1677                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
1678                     }
1679                     else
1680                     {
1681                         cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
1682                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
1683                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
1684                     }
1685 
1686                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1687                     cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
1688                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1689                 }
1690                 else
1691                 {
1692                     // This may be better done by cgelem.d
1693                     assert(pow2 == 63);
1694                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
1695                     cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
1696                     cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
1697                     cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
1698                     movregconst(cdb,rhi,0,0);                              // MOV rhi,0
1699                 }
1700 
1701                 fixresult(cdb,e,retregs,pretregs);
1702                 return;
1703             }
1704 
1705             // Register pair signed modulo by power of 2
1706             if (sz == REGSIZE * 2 &&
1707                 (oper == OPmod) && !uns &&
1708                 pow2 != -1 &&
1709                 I32 // not set up for I64 cent yet
1710                )
1711             {
1712                 regm_t retregs = mDX | mAX;
1713                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1714                 const rhi = findregmsw(retregs);
1715                 const rlo = findreglsw(retregs);
1716                 freenode(e2);
1717                 getregs(cdb,retregs);
1718 
1719                 regm_t scratchm = allregs & ~retregs;
1720                 if (pow2 == 63)
1721                     scratchm &= BYTEREGS;               // because of SETZ
1722                 reg_t r1 = allocScratchReg(cdb, scratchm);
1723 
1724                 if (pow2 < 32)
1725                 {
1726                     cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
1727                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
1728                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1729                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1730                     cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
1731                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1732                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1733                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
1734                 }
1735                 else if (pow2 == 32)
1736                 {
1737                     cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
1738                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
1739                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
1740                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
1741                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
1742                 }
1743                 else if (pow2 < 63)
1744                 {
1745                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1746 
1747                     cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
1748                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
1749                     cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
1750                     cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
1751                     cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
1752                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
1753                     cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
1754                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
1755                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
1756                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
1757                 }
1758                 else
1759                 {
1760                     // This may be better done by cgelem.d
1761                     assert(pow2 == 63);
1762 
1763                     cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
1764                     cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
1765                     cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
1766                     cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
1767                     cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
1768                 }
1769 
1770                 fixresult(cdb,e,retregs,pretregs);
1771                 return;
1772             }
1773 
1774             if (sz > REGSIZE || !el_signx32(e2))
1775                 goto default;
1776 
1777             // Special code for signed divide or modulo by power of 2
1778             if ((sz == REGSIZE || (I64 && sz == 4)) &&
1779                 (oper == OPdiv || oper == OPmod) && !uns &&
1780                 pow2 != -1 &&
1781                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv)
1782                )
1783             {
1784                 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386)
1785                 {
1786                     /* MOV r,reg
1787                        SHR r,31
1788                        ADD reg,r
1789                        SAR reg,1
1790                      */
1791                     regm_t retregs = allregs;
1792                     codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1793                     const reg = findreg(retregs);
1794                     freenode(e2);
1795                     getregs(cdb,retregs);
1796 
1797                     reg_t r = allocScratchReg(cdb, allregs & ~retregs);
1798                     genmovreg(cdb,r,reg);                        // MOV r,reg
1799                     cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1800                     cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
1801                     cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
1802                     regm_t resreg = retregs;
1803                     fixresult(cdb,e,resreg,pretregs);
1804                     return;
1805                 }
1806 
1807                 regm_t resreg;
1808                 switch (oper)
1809                 {
1810                     case OPdiv:
1811                         resreg = mAX;
1812                         break;
1813 
1814                     case OPmod:
1815                         resreg = mDX;
1816                         break;
1817 
1818                     case OPremquo:
1819                         resreg = mDX | mAX;
1820                         break;
1821 
1822                     default:
1823                         assert(0);
1824                 }
1825 
1826                 regm_t retregs = mAX;
1827                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1828                 freenode(e2);
1829                 getregs(cdb,mAX | mDX);             // modify these regs
1830                 cdb.gen1(0x99);                             // CWD
1831                 code_orrex(cdb.last(), rex);
1832                 if (pow2 == 1)
1833                 {
1834                     if (oper == OPdiv)
1835                     {
1836                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));  // SUB AX,DX
1837                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));   // SAR AX,1
1838                     }
1839                     else // OPmod
1840                     {
1841                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));   // XOR AX,DX
1842                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1
1843                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));   // ADD DX,AX
1844                     }
1845                 }
1846                 else
1847                 {   targ_ulong m;
1848 
1849                     m = (1 << pow2) - 1;
1850                     if (oper == OPdiv)
1851                     {
1852                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);  // AND DX,m
1853                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));    // ADD AX,DX
1854                         // Be careful not to generate this for 8088
1855                         assert(config.target_cpu >= TARGET_80286);
1856                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2
1857                     }
1858                     else // OPmod
1859                     {
1860                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1861                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1862                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);  // AND AX,mask
1863                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1864                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1865                         resreg = mAX;
1866                     }
1867                 }
1868                 fixresult(cdb,e,resreg,pretregs);
1869                 return;
1870             }
1871             goto default;
1872 
1873         case OPind:
1874             if (!e2.Ecount)                        // if not CSE
1875                     goto case OPvar;                        // try OP reg,EA
1876             goto default;
1877 
1878         default:                                    // OPconst and operators
1879             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1880             regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX;
1881             codelem(cdb,e1,&retregs,false);           // eval left leaf
1882             regm_t rretregs;
1883             if (sz <= REGSIZE)                  // dedicated regs for div
1884             {
1885                 // pick some other regs
1886                 rretregs = isbyte ? BYTEREGS & ~mAX
1887                                 : ALLREGS & ~(mAX|mDX);
1888             }
1889             else
1890             {
1891                 assert(sz <= 2 * REGSIZE);
1892                 rretregs = mCX | mBX;           // second arg
1893             }
1894             scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1895             if (sz <= REGSIZE)
1896             {
1897                 getregs(cdb,mAX | mDX);     // trash these regs
1898                 if (uns)                        // unsigned divide
1899                 {
1900                     movregconst(cdb,DX,0,(sz == 8) ? 64 : 0);  // MOV DX,0
1901                     getregs(cdb,mDX);
1902                 }
1903                 else
1904                 {
1905                     cdb.gen1(0x99);                 // CWD
1906                     code_orrex(cdb.last(),rex);
1907                 }
1908                 reg_t rreg = findreg(rretregs);
1909                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg
1910                 if (I64 && isbyte && rreg >= 4)
1911                     code_orrex(cdb.last(), REX);
1912                 regm_t resreg;
1913                 switch (oper)
1914                 {
1915                     case OPdiv:
1916                         resreg = mAX;
1917                         break;
1918 
1919                     case OPmod:
1920                         resreg = mDX;
1921                         break;
1922 
1923                     case OPremquo:
1924                         resreg = mDX | mAX;
1925                         break;
1926 
1927                     default:
1928                         assert(0);
1929                 }
1930                 fixresult(cdb,e,resreg,pretregs);
1931             }
1932             else if (sz == 2 * REGSIZE)
1933             {
1934                 uint lib;
1935                 switch (oper)
1936                 {
1937                     case OPdiv:
1938                     case OPremquo:
1939                         lib = uns ? CLIB.uldiv : CLIB.ldiv;
1940                         break;
1941 
1942                     case OPmod:
1943                         lib = uns ? CLIB.ulmod : CLIB.lmod;
1944                         break;
1945 
1946                     default:
1947                         assert(0);
1948                 }
1949 
1950                 regm_t keepregs = I32 ? mSI | mDI : 0;
1951                 callclib(cdb,e,lib,pretregs,keepregs);
1952             }
1953             else
1954                     assert(0);
1955             return;
1956 
1957         case OPvar:
1958             if (I16 || sz == 2 * REGSIZE)
1959                 goto default;            // have to handle it with codelem()
1960 
1961             // loadea() handles CWD or CLR DX for divides
1962             regm_t retregs = mAX;
1963             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1964             loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0,
1965                    mAX | mDX,
1966                    mAX | mDX);
1967             freenode(e2);
1968             regm_t resreg;
1969             switch (oper)
1970             {
1971                 case OPdiv:
1972                     resreg = mAX;
1973                     break;
1974 
1975                 case OPmod:
1976                     resreg = mDX;
1977                     break;
1978 
1979                 case OPremquo:
1980                     resreg = mDX | mAX;
1981                     break;
1982 
1983                 default:
1984                     assert(0);
1985             }
1986             fixresult(cdb,e,resreg,pretregs);
1987             return;
1988     }
1989     assert(0);
1990 }
1991 
1992 
1993 /***************************
1994  * Handle OPnot and OPbool.
1995  * Generate:
1996  *      c:      [evaluate e1]
1997  *      cfalse: [save reg code]
1998  *              clr     reg
1999  *              jmp     cnop
2000  *      ctrue:  [save reg code]
2001  *              clr     reg
2002  *              inc     reg
2003  *      cnop:   nop
2004  */
2005 
2006 @trusted
2007 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2008 {
2009     //printf("cdnot()\n");
2010     reg_t reg;
2011     tym_t forflags;
2012     regm_t retregs;
2013     elem *e1 = e.EV.E1;
2014 
2015     if (*pretregs == 0)
2016         goto L1;
2017     if (*pretregs == mPSW)
2018     {   //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized
2019     L1:
2020         codelem(cdb,e1,pretregs,false);      // evaluate e1 for cc
2021         return;
2022     }
2023 
2024     OPER op = e.Eoper;
2025     uint sz = tysize(e1.Ety);
2026     uint rex = (I64 && sz == 8) ? REX_W : 0;
2027     uint grex = rex << 16;
2028 
2029     if (!tyfloating(e1.Ety))
2030     {
2031     if (sz <= REGSIZE && e1.Eoper == OPvar)
2032     {   code cs;
2033 
2034         getlvalue(cdb,&cs,e1,0);
2035         freenode(e1);
2036         if (!I16 && sz == 2)
2037             cs.Iflags |= CFopsize;
2038 
2039         retregs = *pretregs & (ALLREGS | mBP);
2040         if (config.target_cpu >= TARGET_80486 &&
2041             tysize(e.Ety) == 1)
2042         {
2043             if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,&reg))
2044             {
2045                 cs.Iop = 0x39;
2046                 if (I64 && (sz == 1) && reg >= 4)
2047                     cs.Irex |= REX;
2048             }
2049             else
2050             {   cs.Iop = 0x81;
2051                 reg = 7;
2052                 cs.IFL2 = FLconst;
2053                 cs.IEV2.Vint = 0;
2054             }
2055             cs.Iop ^= (sz == 1);
2056             code_newreg(&cs,reg);
2057             cdb.gen(&cs);                             // CMP e1,0
2058 
2059             retregs &= BYTEREGS;
2060             if (!retregs)
2061                 retregs = BYTEREGS;
2062             allocreg(cdb,&retregs,&reg,TYint);
2063 
2064             const opcode_t iop = (op == OPbool)
2065                 ? 0x0F95    // SETNZ rm8
2066                 : 0x0F94;   // SETZ rm8
2067             cdb.gen2(iop, modregrmx(3,0,reg));
2068             if (reg >= 4)
2069                 code_orrex(cdb.last(), REX);
2070             if (op == OPbool)
2071                 *pretregs &= ~mPSW;
2072             goto L4;
2073         }
2074 
2075         if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,&reg))
2076             cs.Iop = 0x39;
2077         else
2078         {   cs.Iop = 0x81;
2079             reg = 7;
2080             cs.IFL2 = FLconst;
2081             cs.IEV2.Vint = 1;
2082         }
2083         if (I64 && (sz == 1) && reg >= 4)
2084             cs.Irex |= REX;
2085         cs.Iop ^= (sz == 1);
2086         code_newreg(&cs,reg);
2087         cdb.gen(&cs);                         // CMP e1,1
2088 
2089         allocreg(cdb,&retregs,&reg,TYint);
2090         op ^= (OPbool ^ OPnot);                 // switch operators
2091         goto L2;
2092     }
2093     else if (config.target_cpu >= TARGET_80486 &&
2094         tysize(e.Ety) == 1)
2095     {
2096         int jop = jmpopcode(e.EV.E1);
2097         retregs = mPSW;
2098         codelem(cdb,e.EV.E1,&retregs,false);
2099         retregs = *pretregs & BYTEREGS;
2100         if (!retregs)
2101             retregs = BYTEREGS;
2102         allocreg(cdb,&retregs,&reg,TYint);
2103 
2104         int iop = 0x0F90 | (jop & 0x0F);        // SETcc rm8
2105         if (op == OPnot)
2106             iop ^= 1;
2107         cdb.gen2(iop,grex | modregrmx(3,0,reg));
2108         if (reg >= 4)
2109             code_orrex(cdb.last(), REX);
2110         if (op == OPbool)
2111             *pretregs &= ~mPSW;
2112         goto L4;
2113     }
2114     else if (sz <= REGSIZE &&
2115         // NEG bytereg is too expensive
2116         (sz != 1 || config.target_cpu < TARGET_PentiumPro))
2117     {
2118         retregs = *pretregs & (ALLREGS | mBP);
2119         if (sz == 1 && !(retregs &= BYTEREGS))
2120             retregs = BYTEREGS;
2121         codelem(cdb,e.EV.E1,&retregs,false);
2122         reg = findreg(retregs);
2123         getregs(cdb,retregs);
2124         cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg));   // NEG reg
2125         code_orflag(cdb.last(),CFpsw);
2126         if (!I16 && sz == SHORTSIZE)
2127             code_orflag(cdb.last(),CFopsize);
2128     L2:
2129         genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
2130         code_orrex(cdb.last(), rex);
2131         // At this point, reg==0 if e1==0, reg==-1 if e1!=0
2132         if (op == OPnot)
2133         {
2134             if (I64)
2135                 cdb.gen2(0xFF,grex | modregrmx(3,0,reg));    // INC reg
2136             else
2137                 cdb.gen1(0x40 + reg);                        // INC reg
2138         }
2139         else
2140             cdb.gen2(0xF7,grex | modregrmx(3,3,reg));    // NEG reg
2141         if (*pretregs & mPSW)
2142         {   code_orflag(cdb.last(),CFpsw);
2143             *pretregs &= ~mPSW;         // flags are always set anyway
2144         }
2145     L4:
2146         fixresult(cdb,e,retregs,pretregs);
2147         return;
2148     }
2149     }
2150     code *cnop = gennop(null);
2151     code *ctrue = gennop(null);
2152     logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue);
2153     forflags = *pretregs & mPSW;
2154     if (I64 && sz == 8)
2155         forflags |= 64;
2156     assert(tysize(e.Ety) <= REGSIZE);              // result better be int
2157     CodeBuilder cdbfalse;
2158     cdbfalse.ctor();
2159     allocreg(cdbfalse,pretregs,&reg,e.Ety);        // allocate reg for result
2160     code *cfalse = cdbfalse.finish();
2161     CodeBuilder cdbtrue;
2162     cdbtrue.ctor();
2163     cdbtrue.append(ctrue);
2164     for (code *c1 = cfalse; c1; c1 = code_next(c1))
2165         cdbtrue.gen(c1);                                      // duplicate reg save code
2166     CodeBuilder cdbfalse2;
2167     cdbfalse2.ctor();
2168     movregconst(cdbfalse2,reg,0,forflags);                    // mov 0 into reg
2169     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2170     movregconst(cdbtrue,reg,1,forflags);                      // mov 1 into reg
2171     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2172     genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop);          // skip over ctrue
2173     cdb.append(cfalse);
2174     cdb.append(cdbfalse2);
2175     cdb.append(cdbtrue);
2176     cdb.append(cnop);
2177 }
2178 
2179 
2180 /************************
2181  * Complement operator
2182  */
2183 
2184 @trusted
2185 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2186 {
2187     if (*pretregs == 0)
2188     {
2189         codelem(cdb,e.EV.E1,pretregs,false);
2190         return;
2191     }
2192     tym_t tym = tybasic(e.Ety);
2193     int sz = _tysize[tym];
2194     uint rex = (I64 && sz == 8) ? REX_W : 0;
2195     regm_t possregs = (sz == 1) ? BYTEREGS : allregs;
2196     regm_t retregs = *pretregs & possregs;
2197     if (retregs == 0)
2198         retregs = possregs;
2199     codelem(cdb,e.EV.E1,&retregs,false);
2200     getregs(cdb,retregs);                // retregs will be destroyed
2201 
2202     if (0 && sz == 4 * REGSIZE)
2203     {
2204         cdb.gen2(0xF7,modregrm(3,2,AX));   // NOT AX
2205         cdb.gen2(0xF7,modregrm(3,2,BX));   // NOT BX
2206         cdb.gen2(0xF7,modregrm(3,2,CX));   // NOT CX
2207         cdb.gen2(0xF7,modregrm(3,2,DX));   // NOT DX
2208     }
2209     else
2210     {
2211         const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs);
2212         const op = (sz == 1) ? 0xF6 : 0xF7;
2213         genregs(cdb,op,2,reg);     // NOT reg
2214         code_orrex(cdb.last(), rex);
2215         if (I64 && sz == 1 && reg >= 4)
2216             code_orrex(cdb.last(), REX);
2217         if (sz == 2 * REGSIZE)
2218         {
2219             const reg2 = findreglsw(retregs);
2220             genregs(cdb,op,2,reg2);  // NOT reg+1
2221         }
2222     }
2223     fixresult(cdb,e,retregs,pretregs);
2224 }
2225 
2226 /************************
2227  * Bswap operator
2228  */
2229 
2230 @trusted
2231 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2232 {
2233     if (*pretregs == 0)
2234     {
2235         codelem(cdb,e.EV.E1,pretregs,false);
2236         return;
2237     }
2238 
2239     const tym = tybasic(e.Ety);
2240     const sz = _tysize[tym];
2241     const posregs = (sz == 2) ? mAX|mBX|mCX|mDX : allregs;
2242     regm_t retregs = *pretregs & posregs;
2243     if (retregs == 0)
2244         retregs = posregs;
2245     codelem(cdb,e.EV.E1,&retregs,false);
2246     getregs(cdb,retregs);        // retregs will be destroyed
2247     if (sz == 2 * REGSIZE)
2248     {
2249         assert(sz != 16);                       // no cent support yet
2250         const msreg = findregmsw(retregs);
2251         cdb.gen1(0x0FC8 + (msreg & 7));         // BSWAP msreg
2252         const lsreg = findreglsw(retregs);
2253         cdb.gen1(0x0FC8 + (lsreg & 7));         // BSWAP lsreg
2254         cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg
2255     }
2256     else
2257     {
2258         const reg = findreg(retregs);
2259         if (sz == 2)
2260         {
2261             genregs(cdb,0x86,reg+4,reg);    // XCHG regL,regH
2262         }
2263         else
2264         {
2265             assert(sz == 4 || sz == 8);
2266             cdb.gen1(0x0FC8 + (reg & 7));      // BSWAP reg
2267             ubyte rex = 0;
2268             if (sz == 8)
2269                 rex |= REX_W;
2270             if (reg & 8)
2271                 rex |= REX_B;
2272             if (rex)
2273                 code_orrex(cdb.last(), rex);
2274         }
2275     }
2276     fixresult(cdb,e,retregs,pretregs);
2277 }
2278 
2279 /*************************
2280  * ?: operator
2281  */
2282 
2283 @trusted
2284 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2285 {
2286     con_t regconold,regconsave;
2287     uint stackpushold,stackpushsave;
2288     int ehindexold,ehindexsave;
2289     uint sz2;
2290 
2291     /* vars to save state of 8087 */
2292     int stackusedold,stackusedsave;
2293     NDP[global87.stack.length] _8087old;
2294     NDP[global87.stack.length] _8087save;
2295 
2296     //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2297     elem *e1 = e.EV.E1;
2298     elem *e2 = e.EV.E2;
2299     elem *e21 = e2.EV.E1;
2300     elem *e22 = e2.EV.E2;
2301     regm_t psw = *pretregs & mPSW;               /* save PSW bit                 */
2302     const op1 = e1.Eoper;
2303     uint sz1 = tysize(e1.Ety);
2304     uint jop = jmpopcode(e1);
2305 
2306     uint jop1 = jmpopcode(e21);
2307     uint jop2 = jmpopcode(e22);
2308 
2309     docommas(cdb,&e1);
2310     cgstate.stackclean++;
2311 
2312     if (!OTrel(op1) && e1 == e21 &&
2313         sz1 <= REGSIZE && !tyfloating(e1.Ety))
2314     {   // Recognize (e ? e : f)
2315 
2316         code *cnop1 = gennop(null);
2317         regm_t retregs = *pretregs | mPSW;
2318         codelem(cdb,e1,&retregs,false);
2319 
2320         cse_flush(cdb,1);                // flush CSEs to memory
2321         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2322         freenode(e21);
2323 
2324         regconsave = regcon;
2325         stackpushsave = stackpush;
2326 
2327         retregs |= psw;
2328         if (retregs & (mBP | ALLREGS))
2329             regimmed_set(findreg(retregs),0);
2330         codelem(cdb,e22,&retregs,false);
2331 
2332         andregcon(&regconsave);
2333         assert(stackpushsave == stackpush);
2334 
2335         *pretregs = retregs;
2336         freenode(e2);
2337         cdb.append(cnop1);
2338         cgstate.stackclean--;
2339         return;
2340     }
2341 
2342     if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE &&
2343         !e1.Ecount &&
2344         (jop == JC || jop == JNC) &&
2345         (sz2 = tysize(e2.Ety)) <= REGSIZE &&
2346         e21.Eoper == OPconst &&
2347         e22.Eoper == OPconst
2348        )
2349     {
2350         uint sz = tysize(e.Ety);
2351         uint rex = (I64 && sz == 8) ? REX_W : 0;
2352         uint grex = rex << 16;
2353 
2354         regm_t retregs;
2355         targ_size_t v1,v2;
2356 
2357         if (sz2 != 1 || I64)
2358         {
2359             retregs = *pretregs & (ALLREGS | mBP);
2360             if (!retregs)
2361                 retregs = ALLREGS;
2362         }
2363         else
2364         {
2365             retregs = *pretregs & BYTEREGS;
2366             if (!retregs)
2367                 retregs = BYTEREGS;
2368         }
2369 
2370         cdcmp_flag = 1 | rex;
2371         v1 = cast(targ_size_t)e21.EV.Vllong;
2372         v2 = cast(targ_size_t)e22.EV.Vllong;
2373         if (jop == JNC)
2374         {   v1 = v2;
2375             v2 = cast(targ_size_t)e21.EV.Vllong;
2376         }
2377 
2378         opcode_t opcode = 0x81;
2379         switch (sz2)
2380         {   case 1:     opcode--;
2381                         v1 = cast(byte) v1;
2382                         v2 = cast(byte) v2;
2383                         break;
2384 
2385             case 2:     v1 = cast(short) v1;
2386                         v2 = cast(short) v2;
2387                         break;
2388 
2389             case 4:     v1 = cast(int) v1;
2390                         v2 = cast(int) v2;
2391                         break;
2392             default:
2393                         break;
2394         }
2395 
2396         if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1)
2397         {
2398             // only zero-extension from 32-bits is available for 'or'
2399         }
2400         else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2)
2401         {
2402             // only sign-extension from 32-bits is available for 'and'
2403         }
2404         else
2405         {
2406             codelem(cdb,e1,&retregs,false);
2407             const reg = findreg(retregs);
2408 
2409             if (v1 == 0 && v2 == ~cast(targ_size_t)0)
2410             {
2411                 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg));  // NOT reg
2412                 if (I64 && sz2 == REGSIZE)
2413                     code_orrex(cdb.last(), REX_W);
2414                 if (I64 && sz2 == 1 && reg >= 4)
2415                     code_orrex(cdb.last(), REX);
2416             }
2417             else
2418             {
2419                 v1 -= v2;
2420                 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1);   // AND reg,v1-v2
2421                 if (I64 && sz2 == 1 && reg >= 4)
2422                     code_orrex(cdb.last(), REX);
2423                 if (v2 == 1 && !I64)
2424                     cdb.gen1(0x40 + reg);                     // INC reg
2425                 else if (v2 == -1L && !I64)
2426                     cdb.gen1(0x48 + reg);                     // DEC reg
2427                 else
2428                 {   cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2);   // ADD reg,v2
2429                     if (I64 && sz2 == 1 && reg >= 4)
2430                         code_orrex(cdb.last(), REX);
2431                 }
2432             }
2433 
2434             freenode(e21);
2435             freenode(e22);
2436             freenode(e2);
2437 
2438             fixresult(cdb,e,retregs,pretregs);
2439             cgstate.stackclean--;
2440             return;
2441         }
2442     }
2443 
2444     if (op1 != OPcond && op1 != OPandand && op1 != OPoror &&
2445         op1 != OPnot && op1 != OPbool &&
2446         e21.Eoper == OPconst &&
2447         sz1 <= REGSIZE &&
2448         *pretregs & (mBP | ALLREGS) &&
2449         tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety))
2450     {   // Recognize (e ? c : f)
2451 
2452         code *cnop1 = gennop(null);
2453         regm_t retregs = mPSW;
2454         jop = jmpopcode(e1);            // get jmp condition
2455         codelem(cdb,e1,&retregs,false);
2456 
2457         // Set the register with e21 without affecting the flags
2458         retregs = *pretregs & (ALLREGS | mBP);
2459         if (retregs & ~regcon.mvar)
2460             retregs &= ~regcon.mvar;    // don't disturb register variables
2461         // NOTE: see my email (sign extension bug? possible fix, some questions
2462         reg_t reg;
2463         regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,&reg,tysize(e21.Ety) == 8 ? 64|8 : 8);
2464         retregs = mask(reg);
2465 
2466         cse_flush(cdb,1);                // flush CSE's to memory
2467         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2468         freenode(e21);
2469 
2470         regconsave = regcon;
2471         stackpushsave = stackpush;
2472 
2473         codelem(cdb,e22,&retregs,false);
2474 
2475         andregcon(&regconsave);
2476         assert(stackpushsave == stackpush);
2477 
2478         freenode(e2);
2479         cdb.append(cnop1);
2480         fixresult(cdb,e,retregs,pretregs);
2481         cgstate.stackclean--;
2482         return;
2483     }
2484 
2485     code *cnop1 = gennop(null);
2486     code *cnop2 = gennop(null);         // dummy target addresses
2487     logexp(cdb,e1,false,FLcode,cnop1);  // evaluate condition
2488     regconold = regcon;
2489     stackusedold = global87.stackused;
2490     stackpushold = stackpush;
2491     memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof);
2492     regm_t retregs = *pretregs;
2493     CodeBuilder cdb1;
2494     cdb1.ctor();
2495     if (psw && jop1 != JNE)
2496     {
2497         retregs &= ~mPSW;
2498         if (!retregs)
2499             retregs = ALLREGS;
2500         codelem(cdb1,e21,&retregs,false);
2501         fixresult(cdb1,e21,retregs,pretregs);
2502     }
2503     else
2504         codelem(cdb1,e21,&retregs,false);
2505 
2506     if (CPP && e2.Eoper == OPcolon2)
2507     {
2508         code cs;
2509 
2510         // This is necessary so that any cleanup code on one branch
2511         // is redone on the other branch.
2512         cs.Iop = ESCAPE | ESCmark2;
2513         cs.Iflags = 0;
2514         cs.Irex = 0;
2515         cdb.gen(&cs);
2516         cdb.append(cdb1);
2517         cs.Iop = ESCAPE | ESCrelease2;
2518         cdb.gen(&cs);
2519     }
2520     else
2521         cdb.append(cdb1);
2522 
2523     regconsave = regcon;
2524     regcon = regconold;
2525 
2526     stackpushsave = stackpush;
2527     stackpush = stackpushold;
2528 
2529     stackusedsave = global87.stackused;
2530     global87.stackused = stackusedold;
2531 
2532     memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof);
2533     memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof);
2534 
2535     retregs |= psw;                     // PSW bit may have been trashed
2536     *pretregs |= psw;
2537     CodeBuilder cdb2;
2538     cdb2.ctor();
2539     if (psw && jop2 != JNE)
2540     {
2541         retregs &= ~mPSW;
2542         if (!retregs)
2543             retregs = ALLREGS;
2544         codelem(cdb2,e22,&retregs,false);
2545         fixresult(cdb2,e22,retregs,pretregs);
2546     }
2547     else
2548         codelem(cdb2,e22,&retregs,false);   // use same regs as E1
2549     *pretregs = retregs | psw;
2550     andregcon(&regconold);
2551     andregcon(&regconsave);
2552     assert(global87.stackused == stackusedsave);
2553     assert(stackpush == stackpushsave);
2554     memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof);
2555     freenode(e2);
2556     genjmp(cdb,JMP,FLcode,cast(block *) cnop2);
2557     cdb.append(cnop1);
2558     cdb.append(cdb2);
2559     cdb.append(cnop2);
2560     if (*pretregs & mST0)
2561         note87(e,0,0);
2562 
2563     cgstate.stackclean--;
2564 }
2565 
2566 /*********************
2567  * Comma operator OPcomma
2568  */
2569 
2570 @trusted
2571 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2572 {
2573     regm_t retregs = 0;
2574     codelem(cdb,e.EV.E1,&retregs,false);   // ignore value from left leaf
2575     codelem(cdb,e.EV.E2,pretregs,false);   // do right leaf
2576 }
2577 
2578 
2579 /*********************************
2580  * Do && and || operators.
2581  * Generate:
2582  *              (evaluate e1 and e2, if true goto cnop1)
2583  *      cnop3:  NOP
2584  *      cg:     [save reg code]         ;if we must preserve reg
2585  *              CLR     reg             ;false result (set Z also)
2586  *              JMP     cnop2
2587  *
2588  *      cnop1:  NOP                     ;if e1 evaluates to true
2589  *              [save reg code]         ;preserve reg
2590  *
2591  *              MOV     reg,1           ;true result
2592  *                  or
2593  *              CLR     reg             ;if return result in flags
2594  *              INC     reg
2595  *
2596  *      cnop2:  NOP                     ;mark end of code
2597  */
2598 
2599 @trusted
2600 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2601 {
2602     /* We can trip the assert with the following:
2603      *    if ( (b<=a) ? (c<b || a<=c) : c>=a )
2604      * We'll generate ugly code for it, but it's too obscure a case
2605      * to expend much effort on it.
2606      * assert(*pretregs != mPSW);
2607      */
2608 
2609     //printf("cdloglog() *pretregs: %s\n", regm_str(*pretregs));
2610     cgstate.stackclean++;
2611     code *cnop1 = gennop(null);
2612     CodeBuilder cdb1;
2613     cdb1.ctor();
2614     cdb1.append(cnop1);
2615     code *cnop3 = gennop(null);
2616     elem *e2 = e.EV.E2;
2617     (e.Eoper == OPoror)
2618         ? logexp(cdb,e.EV.E1,1,FLcode,cnop1)
2619         : logexp(cdb,e.EV.E1,0,FLcode,cnop3);
2620     con_t regconsave = regcon;
2621     uint stackpushsave = stackpush;
2622     if (*pretregs == 0)                 // if don't want result
2623     {
2624         int noreturn = !el_returns(e2);
2625         codelem(cdb,e2,pretregs,false);
2626         if (noreturn)
2627         {
2628             regconsave.used |= regcon.used;
2629             regcon = regconsave;
2630         }
2631         else
2632             andregcon(&regconsave);
2633         assert(stackpush == stackpushsave);
2634         cdb.append(cnop3);
2635         cdb.append(cdb1);        // eval code, throw away result
2636         cgstate.stackclean--;
2637         return;
2638     }
2639 
2640     if (tybasic(e2.Ety) == TYnoreturn)
2641     {
2642         regm_t retregs2 = 0;
2643         codelem(cdb, e2, &retregs2, false);
2644         regconsave.used |= regcon.used;
2645         regcon = regconsave;
2646         assert(stackpush == stackpushsave);
2647 
2648         regm_t retregs = *pretregs & (ALLREGS | mBP);
2649         if (!retregs)
2650             retregs = ALLREGS;                                   // if mPSW only
2651 
2652         reg_t reg;
2653         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2654         movregconst(cdb1,reg,e.Eoper == OPoror,*pretregs & mPSW);
2655         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2656         *pretregs = retregs;
2657 
2658         cdb.append(cnop3);
2659         cdb.append(cdb1);        // eval code, throw away result
2660         cgstate.stackclean--;
2661         return;
2662     }
2663 
2664     code *cnop2 = gennop(null);
2665     uint sz = tysize(e.Ety);
2666     if (tybasic(e2.Ety) == TYbool &&
2667       sz == tysize(e2.Ety) &&
2668       !(*pretregs & mPSW) &&
2669       e2.Eoper == OPcall)
2670     {
2671         codelem(cdb,e2,pretregs,false);
2672 
2673         andregcon(&regconsave);
2674 
2675         // stack depth should not change when evaluating E2
2676         assert(stackpush == stackpushsave);
2677 
2678         assert(sz <= 4);                                        // result better be int
2679         regm_t retregs = *pretregs & allregs;
2680         reg_t reg;
2681         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2682         movregconst(cdb1,reg,e.Eoper == OPoror,0);             // reg = 1
2683         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2684         *pretregs = retregs;
2685         if (e.Eoper == OPoror)
2686         {
2687             cdb.append(cnop3);
2688             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2689             cdb.append(cdb1);
2690             cdb.append(cnop2);
2691         }
2692         else
2693         {
2694             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2695             cdb.append(cnop3);
2696             cdb.append(cdb1);
2697             cdb.append(cnop2);
2698         }
2699         cgstate.stackclean--;
2700         return;
2701     }
2702 
2703     logexp(cdb,e2,1,FLcode,cnop1);
2704     andregcon(&regconsave);
2705 
2706     // stack depth should not change when evaluating E2
2707     assert(stackpush == stackpushsave);
2708 
2709     assert(sz <= 4);                                         // result better be int
2710     regm_t retregs = *pretregs & (ALLREGS | mBP);
2711     if (!retregs)
2712         retregs = ALLREGS;                                   // if mPSW only
2713     CodeBuilder cdbcg;
2714     cdbcg.ctor();
2715     reg_t reg;
2716     allocreg(cdbcg,&retregs,&reg,TYint);                     // allocate reg for result
2717     code *cg = cdbcg.finish();
2718     for (code *c1 = cg; c1; c1 = code_next(c1))              // for each instruction
2719         cdb1.gen(c1);                                        // duplicate it
2720     CodeBuilder cdbcg2;
2721     cdbcg2.ctor();
2722     movregconst(cdbcg2,reg,0,*pretregs & mPSW);              // MOV reg,0
2723     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2724     genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2);              // JMP cnop2
2725     movregconst(cdb1,reg,1,*pretregs & mPSW);                // reg = 1
2726     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2727     *pretregs = retregs;
2728     cdb.append(cnop3);
2729     cdb.append(cg);
2730     cdb.append(cdbcg2);
2731     cdb.append(cdb1);
2732     cdb.append(cnop2);
2733     cgstate.stackclean--;
2734     return;
2735 }
2736 
2737 
2738 /*********************
2739  * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror).
2740  */
2741 
2742 @trusted
2743 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2744 {
2745     reg_t resreg;
2746     uint shiftcnt;
2747     regm_t retregs,rretregs;
2748 
2749     //printf("cdshift()\n");
2750     elem *e1 = e.EV.E1;
2751     if (*pretregs == 0)                   // if don't want result
2752     {
2753         codelem(cdb,e1,pretregs,false); // eval left leaf
2754         *pretregs = 0;                  // in case they got set
2755         codelem(cdb,e.EV.E2,pretregs,false);
2756         return;
2757     }
2758 
2759     tym_t tyml = tybasic(e1.Ety);
2760     int sz = _tysize[tyml];
2761     assert(!tyfloating(tyml));
2762     OPER oper = e.Eoper;
2763     uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16;
2764 
2765 version (SCPP)
2766 {
2767     // Do this until the rest of the compiler does OPshr/OPashr correctly
2768     if (oper == OPshr)
2769         oper = (tyuns(tyml)) ? OPshr : OPashr;
2770 }
2771 
2772     uint s1,s2;
2773     switch (oper)
2774     {
2775         case OPshl:
2776             s1 = 4;                     // SHL
2777             s2 = 2;                     // RCL
2778             break;
2779         case OPshr:
2780             s1 = 5;                     // SHR
2781             s2 = 3;                     // RCR
2782             break;
2783         case OPashr:
2784             s1 = 7;                     // SAR
2785             s2 = 3;                     // RCR
2786             break;
2787         case OProl:
2788             s1 = 0;                     // ROL
2789             break;
2790         case OPror:
2791             s1 = 1;                     // ROR
2792             break;
2793         default:
2794             assert(0);
2795     }
2796 
2797     reg_t sreg = NOREG;                   // guard against using value without assigning to sreg
2798     elem *e2 = e.EV.E2;
2799     regm_t forccs = *pretregs & mPSW;            // if return result in CCs
2800     regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs
2801     bool e2isconst = false;                    // assume for the moment
2802     uint isbyte = (sz == 1);
2803     switch (e2.Eoper)
2804     {
2805         case OPconst:
2806             e2isconst = true;               // e2 is a constant
2807             shiftcnt = e2.EV.Vint;         // get shift count
2808             if ((!I16 && sz <= REGSIZE) ||
2809                 shiftcnt <= 4 ||            // if sequence of shifts
2810                 (sz == 2 &&
2811                     (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) ||
2812                 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2813                )
2814             {
2815                 retregs = (forregs) ? forregs
2816                                     : ALLREGS;
2817                 if (isbyte)
2818                 {   retregs &= BYTEREGS;
2819                     if (!retregs)
2820                         retregs = BYTEREGS;
2821                 }
2822                 else if (sz > REGSIZE && sz <= 2 * REGSIZE &&
2823                          !(retregs & mMSW))
2824                     retregs |= mMSW & ALLREGS;
2825                 if (s1 == 7)    // if arithmetic right shift
2826                 {
2827                     if (shiftcnt == 8)
2828                         retregs = mAX;
2829                     else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2830                         retregs = mDX|mAX;
2831                 }
2832 
2833                 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE &&
2834                     oper == OPshl &&
2835                     !e1.Ecount &&
2836                     (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 ||
2837                      e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64)
2838                    )
2839                 {   // Handle (shtlng)s << 16
2840                     regm_t r = retregs & mMSW;
2841                     codelem(cdb,e1.EV.E1,&r,false);      // eval left leaf
2842                     regwithvalue(cdb,retregs & mLSW,0,&resreg,0);
2843                     getregs(cdb,r);
2844                     retregs = r | mask(resreg);
2845                     if (forccs)
2846                     {   sreg = findreg(r);
2847                         gentstreg(cdb,sreg);
2848                         *pretregs &= ~mPSW;             // already set
2849                     }
2850                     freenode(e1);
2851                     freenode(e2);
2852                     break;
2853                 }
2854 
2855                 // See if we should use LEA reg,xxx instead of shift
2856                 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 &&
2857                     (sz == REGSIZE || (I64 && sz == 4)) &&
2858                     oper == OPshl &&
2859                     e1.Eoper == OPvar &&
2860                     !(*pretregs & mPSW) &&
2861                     config.flags4 & CFG4speed
2862                    )
2863                 {
2864                     reg_t reg;
2865                     regm_t regm;
2866 
2867                     if (isregvar(e1,&regm,&reg) && !(regm & retregs))
2868                     {   code cs;
2869                         allocreg(cdb,&retregs,&resreg,e.Ety);
2870                         buildEA(&cs,-1,reg,1 << shiftcnt,0);
2871                         cs.Iop = LEA;
2872                         code_newreg(&cs,resreg);
2873                         cs.Iflags = 0;
2874                         if (I64 && sz == 8)
2875                             cs.Irex |= REX_W;
2876                         cdb.gen(&cs);             // LEA resreg,[reg * ss]
2877                         freenode(e1);
2878                         freenode(e2);
2879                         break;
2880                     }
2881                 }
2882 
2883                 codelem(cdb,e1,&retregs,false); // eval left leaf
2884                 //assert((retregs & regcon.mvar) == 0);
2885                 getregs(cdb,retregs);          // modify these regs
2886 
2887                 {
2888                     if (sz == 2 * REGSIZE)
2889                     {   resreg = findregmsw(retregs);
2890                         sreg = findreglsw(retregs);
2891                     }
2892                     else
2893                     {   resreg = findreg(retregs);
2894                         sreg = NOREG;              // an invalid value
2895                     }
2896                     if (config.target_cpu >= TARGET_80286 &&
2897                         sz <= REGSIZE)
2898                     {
2899                         // SHL resreg,shiftcnt
2900                         assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS)));
2901                         cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt);
2902                         if (shiftcnt == 1)
2903                             cdb.last().Iop += 0x10;     // short form of shift
2904                         if (I64 && sz == 1 && resreg >= 4)
2905                             cdb.last().Irex |= REX;
2906                         // See if we need operand size prefix
2907                         if (!I16 && oper != OPshl && sz == 2)
2908                             cdb.last().Iflags |= CFopsize;
2909                         if (forccs)
2910                             cdb.last().Iflags |= CFpsw;         // need flags result
2911                     }
2912                     else if (shiftcnt == 8)
2913                     {   if (!(retregs & BYTEREGS) || resreg >= 4)
2914                         {
2915                             goto L1;
2916                         }
2917 
2918                         if (pass != BackendPass.final_ && (!forregs || forregs & (mSI | mDI)))
2919                         {
2920                             // e1 might get into SI or DI in a later pass,
2921                             // so don't put CX into a register
2922                             getregs(cdb,mCX);
2923                         }
2924 
2925                         assert(sz == 2);
2926                         switch (oper)
2927                         {
2928                             case OPshl:
2929                                 // MOV regH,regL        XOR regL,regL
2930                                 assert(resreg < 4 && !grex);
2931                                 genregs(cdb,0x8A,resreg+4,resreg);
2932                                 genregs(cdb,0x32,resreg,resreg);
2933                                 break;
2934 
2935                             case OPshr:
2936                             case OPashr:
2937                                 // MOV regL,regH
2938                                 genregs(cdb,0x8A,resreg,resreg+4);
2939                                 if (oper == OPashr)
2940                                     cdb.gen1(0x98);           // CBW
2941                                 else
2942                                     genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH
2943                                 break;
2944 
2945                             case OPror:
2946                             case OProl:
2947                                 // XCHG regL,regH
2948                                 genregs(cdb,0x86,resreg+4,resreg);
2949                                 break;
2950 
2951                             default:
2952                                 assert(0);
2953                         }
2954                         if (forccs)
2955                             gentstreg(cdb,resreg);
2956                     }
2957                     else if (shiftcnt == REGSIZE * 8)   // it's an lword
2958                     {
2959                         if (oper == OPshl)
2960                             swap(&resreg, &sreg);
2961                         genmovreg(cdb,sreg,resreg);  // MOV sreg,resreg
2962                         if (oper == OPashr)
2963                             cdb.gen1(0x99);                       // CWD
2964                         else
2965                             movregconst(cdb,resreg,0,0);  // MOV resreg,0
2966                         if (forccs)
2967                         {
2968                             gentstreg(cdb,sreg);
2969                             *pretregs &= mBP | ALLREGS | mES;
2970                         }
2971                     }
2972                     else
2973                     {
2974                         if (oper == OPshl && sz == 2 * REGSIZE)
2975                             swap(&resreg, &sreg);
2976                         while (shiftcnt--)
2977                         {
2978                             cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg));
2979                             if (sz == 2 * REGSIZE)
2980                             {
2981                                 code_orflag(cdb.last(),CFpsw);
2982                                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2983                             }
2984                         }
2985                         if (forccs)
2986                             code_orflag(cdb.last(),CFpsw);
2987                     }
2988                     if (sz <= REGSIZE)
2989                         *pretregs &= mBP | ALLREGS;     // flags already set
2990                 }
2991                 freenode(e2);
2992                 break;
2993             }
2994             goto default;
2995 
2996         default:
2997             retregs = forregs & ~mCX;               // CX will be shift count
2998             if (sz <= REGSIZE)
2999             {
3000                 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar))
3001                     retregs = ALLREGS & ~mCX;       // need something
3002                 else if (!retregs)
3003                     retregs = ALLREGS & ~mCX;       // need something
3004                 if (sz == 1)
3005                 {   retregs &= mAX|mBX|mDX;
3006                     if (!retregs)
3007                         retregs = mAX|mBX|mDX;
3008                 }
3009             }
3010             else
3011             {
3012                 if (!(retregs & mMSW))
3013                     retregs = ALLREGS & ~mCX;
3014             }
3015             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
3016 
3017             if (sz <= REGSIZE)
3018                 resreg = findreg(retregs);
3019             else
3020             {
3021                 resreg = findregmsw(retregs);
3022                 sreg = findreglsw(retregs);
3023             }
3024         L1:
3025             rretregs = mCX;                 // CX is shift count
3026             if (sz <= REGSIZE)
3027             {
3028                 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue
3029                 getregs(cdb,retregs);      // trash these regs
3030                 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX
3031 
3032                 if (!I16 && sz == 2 && (oper == OProl || oper == OPror))
3033                     cdb.last().Iflags |= CFopsize;
3034 
3035                 // Note that a shift by CL does not set the flags if
3036                 // CL == 0. If e2 is a constant, we know it isn't 0
3037                 // (it would have been optimized out).
3038                 if (e2isconst)
3039                     *pretregs &= mBP | ALLREGS; // flags already set with result
3040             }
3041             else if (sz == 2 * REGSIZE &&
3042                      config.target_cpu >= TARGET_80386)
3043             {
3044                 reg_t hreg = resreg;
3045                 reg_t lreg = sreg;
3046                 uint rex = I64 ? (REX_W << 16) : 0;
3047                 if (e2isconst)
3048                 {
3049                     getregs(cdb,retregs);
3050                     if (shiftcnt & (REGSIZE * 8))
3051                     {
3052                         if (oper == OPshr)
3053                         {   //      SHR hreg,shiftcnt
3054                             //      MOV lreg,hreg
3055                             //      XOR hreg,hreg
3056                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8));
3057                             genmovreg(cdb,lreg,hreg);
3058                             movregconst(cdb,hreg,0,0);
3059                         }
3060                         else if (oper == OPashr)
3061                         {   //      MOV     lreg,hreg
3062                             //      SAR     hreg,31
3063                             //      SHRD    lreg,hreg,shiftcnt
3064                             genmovreg(cdb,lreg,hreg);
3065                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1);
3066                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8));
3067                         }
3068                         else
3069                         {   //      SHL lreg,shiftcnt
3070                             //      MOV hreg,lreg
3071                             //      XOR lreg,lreg
3072                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8));
3073                             genmovreg(cdb,hreg,lreg);
3074                             movregconst(cdb,lreg,0,0);
3075                         }
3076                     }
3077                     else
3078                     {
3079                         if (oper == OPshr || oper == OPashr)
3080                         {   //      SHRD    lreg,hreg,shiftcnt
3081                             //      SHR/SAR hreg,shiftcnt
3082                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt);
3083                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt);
3084                         }
3085                         else
3086                         {   //      SHLD hreg,lreg,shiftcnt
3087                             //      SHL  lreg,shiftcnt
3088                             cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt);
3089                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt);
3090                         }
3091                     }
3092                     freenode(e2);
3093                 }
3094                 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2)
3095                 {
3096                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3097                     getregs(cdb,retregs);          // modify these regs
3098                     if (oper == OPshl)
3099                     {
3100                         /*
3101                             SHLD    hreg,lreg,CL
3102                             SHL     lreg,CL
3103                          */
3104 
3105                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3106                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3107                     }
3108                     else
3109                     {
3110                         /*
3111                             SHRD    lreg,hreg,CL
3112                             SAR             hreg,CL
3113 
3114                             -- or --
3115 
3116                             SHRD    lreg,hreg,CL
3117                             SHR             hreg,CL
3118                          */
3119                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3120                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3121                     }
3122                 }
3123                 else
3124                 {   code* cl1,cl2;
3125 
3126                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3127                     getregs(cdb,retregs | mCX);     // modify these regs
3128                                                             // TEST CL,0x20
3129                     cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8);
3130                     cl1 = gennop(null);
3131                     CodeBuilder cdb1;
3132                     cdb1.ctor();
3133                     cdb1.append(cl1);
3134                     if (oper == OPshl)
3135                     {
3136                         /*  TEST    CL,20H
3137                             JNE     L1
3138                             SHLD    hreg,lreg,CL
3139                             SHL     lreg,CL
3140                             JMP     L2
3141                         L1: AND     CL,20H-1
3142                             SHL     lreg,CL
3143                             MOV     hreg,lreg
3144                             XOR     lreg,lreg
3145                         L2: NOP
3146                          */
3147 
3148                         if (REGSIZE == 2)
3149                             cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3150                         cdb1.gen2(0xD3,modregrm(3,4,lreg));
3151                         genmovreg(cdb1,hreg,lreg);
3152                         genregs(cdb1,0x31,lreg,lreg);
3153 
3154                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3155                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3156                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3157                     }
3158                     else
3159                     {   if (oper == OPashr)
3160                         {
3161                             /*  TEST        CL,20H
3162                                 JNE         L1
3163                                 SHRD        lreg,hreg,CL
3164                                 SAR         hreg,CL
3165                                 JMP         L2
3166                             L1: AND         CL,15
3167                                 MOV         lreg,hreg
3168                                 SAR         hreg,31
3169                                 SHRD        lreg,hreg,CL
3170                             L2: NOP
3171                              */
3172 
3173                             if (REGSIZE == 2)
3174                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3175                             genmovreg(cdb1,lreg,hreg);
3176                             cdb1.genc2(0xC1,modregrm(3,s1,hreg),31);
3177                             cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg));
3178                         }
3179                         else
3180                         {
3181                             /*  TEST        CL,20H
3182                                 JNE         L1
3183                                 SHRD        lreg,hreg,CL
3184                                 SHR         hreg,CL
3185                                 JMP         L2
3186                             L1: AND         CL,15
3187                                 SHR         hreg,CL
3188                                 MOV         lreg,hreg
3189                                 XOR         hreg,hreg
3190                             L2: NOP
3191                              */
3192 
3193                             if (REGSIZE == 2)
3194                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3195                             cdb1.gen2(0xD3,modregrm(3,5,hreg));
3196                             genmovreg(cdb1,lreg,hreg);
3197                             genregs(cdb1,0x31,hreg,hreg);
3198                         }
3199                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3200                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3201                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3202                     }
3203                     cl2 = gennop(null);
3204                     genjmp(cdb,JMPS,FLcode,cast(block *)cl2);
3205                     cdb.append(cdb1);
3206                     cdb.append(cl2);
3207                 }
3208                 break;
3209             }
3210             else if (sz == 2 * REGSIZE)
3211             {
3212                 scodelem(cdb,e2,&rretregs,retregs,false);
3213                 getregs(cdb,retregs | mCX);
3214                 if (oper == OPshl)
3215                     swap(&resreg, &sreg);
3216                 if (!e2isconst)                   // if not sure shift count != 0
3217                     cdb.genc2(0xE3,0,6);          // JCXZ .+6
3218                 cdb.gen2(0xD1,modregrm(3,s1,resreg));
3219                 code_orflag(cdb.last(),CFtarg2);
3220                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
3221                 cdb.genc2(0xE2,0,cast(targ_uns)-6);          // LOOP .-6
3222                 regimmed_set(CX,0);         // note that now CX == 0
3223             }
3224             else
3225                 assert(0);
3226             break;
3227     }
3228     fixresult(cdb,e,retregs,pretregs);
3229 }
3230 
3231 
3232 /***************************
3233  * Perform a 'star' reference (indirection).
3234  */
3235 
3236 @trusted
3237 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3238 {
3239     regm_t retregs;
3240     reg_t reg;
3241     uint nreg;
3242 
3243     //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3244     tym_t tym = tybasic(e.Ety);
3245     if (tyfloating(tym))
3246     {
3247         if (config.inline8087)
3248         {
3249             if (*pretregs & mST0)
3250             {
3251                 cdind87(cdb, e, pretregs);
3252                 return;
3253             }
3254             if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP))
3255             { }
3256             else if (tycomplex(tym))
3257             {
3258                 cload87(cdb, e, pretregs);
3259                 return;
3260             }
3261 
3262             if (*pretregs & mPSW)
3263             {
3264                 cdind87(cdb, e, pretregs);
3265                 return;
3266             }
3267         }
3268     }
3269 
3270     elem *e1 = e.EV.E1;
3271     assert(e1);
3272     switch (tym)
3273     {
3274         case TYstruct:
3275         case TYarray:
3276             // This case should never happen, why is it here?
3277             tym = TYnptr;               // don't confuse allocreg()
3278             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
3279                     tym = TYfptr;
3280             break;
3281 
3282         default:
3283             break;
3284     }
3285     uint sz = _tysize[tym];
3286     uint isbyte = tybyte(tym) != 0;
3287 
3288     code cs;
3289 
3290      getlvalue(cdb,&cs,e,RMload);          // get addressing mode
3291     //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib);
3292     //fprintf(stderr,"cd2 :\n"); WRcodlst(c);
3293     if (*pretregs == 0)
3294     {
3295         if (e.Ety & mTYvolatile)               // do the load anyway
3296             *pretregs = regmask(e.Ety, 0);     // load into registers
3297         else
3298             return;
3299     }
3300 
3301     regm_t idxregs = idxregm(&cs);               // mask of index regs used
3302 
3303     if (*pretregs == mPSW)
3304     {
3305         if (!I16 && tym == TYfloat)
3306         {
3307             retregs = ALLREGS & ~idxregs;
3308             allocreg(cdb,&retregs,&reg,TYfloat);
3309             cs.Iop = 0x8B;
3310             code_newreg(&cs,reg);
3311             cdb.gen(&cs);                       // MOV reg,lsw
3312             cdb.gen2(0xD1,modregrmx(3,4,reg));  // SHL reg,1
3313             code_orflag(cdb.last(), CFpsw);
3314         }
3315         else if (sz <= REGSIZE)
3316         {
3317             cs.Iop = 0x81 ^ isbyte;
3318             cs.Irm |= modregrm(0,7,0);
3319             cs.IFL2 = FLconst;
3320             cs.IEV2.Vsize_t = 0;
3321             cdb.gen(&cs);             // CMP [idx],0
3322         }
3323         else if (!I16 && sz == REGSIZE + 2)      // if far pointer
3324         {
3325             retregs = ALLREGS & ~idxregs;
3326             allocreg(cdb,&retregs,&reg,TYint);
3327             cs.Iop = MOVZXw;
3328             cs.Irm |= modregrm(0,reg,0);
3329             getlvalue_msw(&cs);
3330             cdb.gen(&cs);             // MOVZX reg,msw
3331             goto L4;
3332         }
3333         else if (sz <= 2 * REGSIZE)
3334         {
3335             retregs = ALLREGS & ~idxregs;
3336             allocreg(cdb,&retregs,&reg,TYint);
3337             cs.Iop = 0x8B;
3338             code_newreg(&cs,reg);
3339             getlvalue_msw(&cs);
3340             cdb.gen(&cs);             // MOV reg,msw
3341             if (I32)
3342             {   if (tym == TYdouble || tym == TYdouble_alias)
3343                     cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1
3344             }
3345             else if (tym == TYfloat)
3346                 cdb.gen2(0xD1,modregrm(3,4,reg));    // SHL reg,1
3347         L4:
3348             cs.Iop = 0x0B;
3349             getlvalue_lsw(&cs);
3350             cs.Iflags |= CFpsw;
3351             cdb.gen(&cs);                    // OR reg,lsw
3352         }
3353         else if (!I32 && sz == 8)
3354         {
3355             *pretregs |= DOUBLEREGS_16;     // fake it for now
3356             goto L1;
3357         }
3358         else
3359         {
3360             debug printf("%s\n", tym_str(tym));
3361             assert(0);
3362         }
3363     }
3364     else                                // else return result in reg
3365     {
3366     L1:
3367         retregs = *pretregs;
3368         if (sz == 8 &&
3369             (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK)
3370         {   int i;
3371 
3372             // Optimizer should not CSE these, as the result is worse code!
3373             assert(!e.Ecount);
3374 
3375             cs.Iop = 0xFF;
3376             cs.Irm |= modregrm(0,6,0);
3377             cs.IEV1.Voffset += 8 - REGSIZE;
3378             stackchanged = 1;
3379             i = 8 - REGSIZE;
3380             do
3381             {
3382                 cdb.gen(&cs);                         // PUSH EA+i
3383                 cdb.genadjesp(REGSIZE);
3384                 cs.IEV1.Voffset -= REGSIZE;
3385                 stackpush += REGSIZE;
3386                 i -= REGSIZE;
3387             }
3388             while (i >= 0);
3389             goto L3;
3390         }
3391         if (I16 && sz == 8)
3392             retregs = DOUBLEREGS_16;
3393 
3394         // Watch out for loading an lptr from an lptr! We must have
3395         // the offset loaded into a different register.
3396         /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes)
3397                 retregs = ALLREGS;*/
3398 
3399         {
3400             assert(!isbyte || retregs & BYTEREGS);
3401             allocreg(cdb,&retregs,&reg,tym); // alloc registers
3402         }
3403         if (retregs & XMMREGS)
3404         {
3405             assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector
3406             cs.Iop = xmmload(tym);
3407             cs.Irex &= ~REX_W;
3408             code_newreg(&cs,reg - XMM0);
3409             checkSetVex(&cs,tym);
3410             cdb.gen(&cs);     // MOV reg,[idx]
3411         }
3412         else if (sz <= REGSIZE)
3413         {
3414             cs.Iop = 0x8B;                                  // MOV
3415             if (sz <= 2 && !I16 &&
3416                 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed)
3417             {
3418                 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw;      // MOVZX/MOVSX
3419                 cs.Iflags &= ~CFopsize;
3420             }
3421             cs.Iop ^= isbyte;
3422         L2:
3423             code_newreg(&cs,reg);
3424             cdb.gen(&cs);     // MOV reg,[idx]
3425             if (isbyte && reg >= 4)
3426                 code_orrex(cdb.last(), REX);
3427         }
3428         else if ((tym == TYfptr || tym == TYhptr) && retregs & mES)
3429         {
3430             cs.Iop = 0xC4;          // LES reg,[idx]
3431             goto L2;
3432         }
3433         else if (sz <= 2 * REGSIZE)
3434         {   uint lsreg;
3435 
3436             cs.Iop = 0x8B;
3437             // Be careful not to interfere with index registers
3438             if (!I16)
3439             {
3440                 // Can't handle if both result registers are used in
3441                 // the addressing mode.
3442                 if ((retregs & idxregs) == retregs)
3443                 {
3444                     retregs = mMSW & allregs & ~idxregs;
3445                     if (!retregs)
3446                         retregs |= mCX;
3447                     retregs |= mLSW & ~idxregs;
3448 
3449                     // We can run out of registers, so if that's possible,
3450                     // give us *one* of the idxregs
3451                     if ((retregs & ~regcon.mvar & mLSW) == 0)
3452                     {
3453                         regm_t x = idxregs & mLSW;
3454                         if (x)
3455                             retregs |= mask(findreg(x));        // give us one idxreg
3456                     }
3457                     else if ((retregs & ~regcon.mvar & mMSW) == 0)
3458                     {
3459                         regm_t x = idxregs & mMSW;
3460                         if (x)
3461                             retregs |= mask(findreg(x));        // give us one idxreg
3462                     }
3463 
3464                     allocreg(cdb,&retregs,&reg,tym);     // alloc registers
3465                     assert((retregs & idxregs) != retregs);
3466                 }
3467 
3468                 lsreg = findreglsw(retregs);
3469                 if (mask(reg) & idxregs)                // reg is in addr mode
3470                 {
3471                     code_newreg(&cs,lsreg);
3472                     cdb.gen(&cs);                 // MOV lsreg,lsw
3473                     if (sz == REGSIZE + 2)
3474                         cs.Iflags |= CFopsize;
3475                     lsreg = reg;
3476                     getlvalue_msw(&cs);                 // MOV reg,msw
3477                 }
3478                 else
3479                 {
3480                     code_newreg(&cs,reg);
3481                     getlvalue_msw(&cs);
3482                     cdb.gen(&cs);                 // MOV reg,msw
3483                     if (sz == REGSIZE + 2)
3484                         cdb.last().Iflags |= CFopsize;
3485                     getlvalue_lsw(&cs);                 // MOV lsreg,lsw
3486                 }
3487                 NEWREG(cs.Irm,lsreg);
3488                 cdb.gen(&cs);
3489             }
3490             else
3491             {
3492                 // Index registers are always the lsw!
3493                 cs.Irm |= modregrm(0,reg,0);
3494                 getlvalue_msw(&cs);
3495                 cdb.gen(&cs);     // MOV reg,msw
3496                 lsreg = findreglsw(retregs);
3497                 NEWREG(cs.Irm,lsreg);
3498                 getlvalue_lsw(&cs);     // MOV lsreg,lsw
3499                 cdb.gen(&cs);
3500             }
3501         }
3502         else if (I16 && sz == 8)
3503         {
3504             assert(reg == AX);
3505             cs.Iop = 0x8B;
3506             cs.IEV1.Voffset += 6;
3507             cdb.gen(&cs);             // MOV AX,EA+6
3508             cs.Irm |= modregrm(0,CX,0);
3509             cs.IEV1.Voffset -= 4;
3510             cdb.gen(&cs);                    // MOV CX,EA+2
3511             NEWREG(cs.Irm,DX);
3512             cs.IEV1.Voffset -= 2;
3513             cdb.gen(&cs);                    // MOV DX,EA
3514             cs.IEV1.Voffset += 4;
3515             NEWREG(cs.Irm,BX);
3516             cdb.gen(&cs);                    // MOV BX,EA+4
3517         }
3518         else
3519             assert(0);
3520     L3:
3521         fixresult(cdb,e,retregs,pretregs);
3522     }
3523     //fprintf(stderr,"cdafter :\n"); WRcodlst(c);
3524 }
3525 
3526 
3527 
3528 /********************************
3529  * Generate code to load ES with the right segment value,
3530  * do nothing if e is a far pointer.
3531  */
3532 
3533 @trusted
3534 private code *cod2_setES(tym_t ty)
3535 {
3536     if (config.exe & EX_flat)
3537         return null;
3538 
3539     int push;
3540 
3541     CodeBuilder cdb;
3542     cdb.ctor();
3543     switch (tybasic(ty))
3544     {
3545         case TYnptr:
3546             if (!(config.flags3 & CFG3eseqds))
3547             {   push = 0x1E;            // PUSH DS
3548                 goto L1;
3549             }
3550             break;
3551         case TYcptr:
3552             push = 0x0E;                // PUSH CS
3553             goto L1;
3554         case TYsptr:
3555             if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds))
3556             {   push = 0x16;            // PUSH SS
3557             L1:
3558                 // Must load ES
3559                 getregs(cdb,mES);
3560                 cdb.gen1(push);
3561                 cdb.gen1(0x07);         // POP ES
3562             }
3563             break;
3564 
3565         default:
3566             break;
3567     }
3568     return cdb.finish();
3569 }
3570 
3571 /********************************
3572  * Generate code for intrinsic strlen().
3573  */
3574 
3575 @trusted
3576 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3577 {
3578     /* Generate strlen in CX:
3579         LES     DI,e1
3580         CLR     AX                      ;scan for 0
3581         MOV     CX,-1                   ;largest possible string
3582         REPNE   SCASB
3583         NOT     CX
3584         DEC     CX
3585      */
3586 
3587     regm_t retregs = mDI;
3588     tym_t ty1 = e.EV.E1.Ety;
3589     if (!tyreg(ty1))
3590         retregs |= mES;
3591     codelem(cdb,e.EV.E1,&retregs,false);
3592 
3593     // Make sure ES contains proper segment value
3594     cdb.append(cod2_setES(ty1));
3595 
3596     ubyte rex = I64 ? REX_W : 0;
3597 
3598     getregs_imm(cdb,mAX | mCX);
3599     movregconst(cdb,AX,0,1);               // MOV AL,0
3600     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);  // MOV CX,-1
3601     getregs(cdb,mDI|mCX);
3602     cdb.gen1(0xF2);                                     // REPNE
3603     cdb.gen1(0xAE);                                     // SCASB
3604     genregs(cdb,0xF7,2,CX);                // NOT CX
3605     code_orrex(cdb.last(), rex);
3606     if (I64)
3607         cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX));  // DEC reg
3608     else
3609         cdb.gen1(0x48 + CX);                            // DEC CX
3610 
3611     if (*pretregs & mPSW)
3612     {
3613         cdb.last().Iflags |= CFpsw;
3614         *pretregs &= ~mPSW;
3615     }
3616     fixresult(cdb,e,mCX,pretregs);
3617 }
3618 
3619 
3620 /*********************************
3621  * Generate code for strcmp(s1,s2) intrinsic.
3622  */
3623 
3624 @trusted
3625 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3626 {
3627     char need_DS;
3628     int segreg;
3629 
3630     /*
3631         MOV     SI,s1                   ;get destination pointer (s1)
3632         MOV     CX,s1+2
3633         LES     DI,s2                   ;get source pointer (s2)
3634         PUSH    DS
3635         MOV     DS,CX
3636         CLR     AX                      ;scan for 0
3637         MOV     CX,-1                   ;largest possible string
3638         REPNE   SCASB
3639         NOT     CX                      ;CX = string length of s2
3640         SUB     DI,CX                   ;point DI back to beginning
3641         REPE    CMPSB                   ;compare string
3642         POP     DS
3643         JE      L1                      ;strings are equal
3644         SBB     AX,AX
3645         SBB     AX,-1
3646     L1:
3647     */
3648 
3649     regm_t retregs1 = mSI;
3650     tym_t ty1 = e.EV.E1.Ety;
3651     if (!tyreg(ty1))
3652         retregs1 |= mCX;
3653     codelem(cdb,e.EV.E1,&retregs1,false);
3654 
3655     regm_t retregs = mDI;
3656     tym_t ty2 = e.EV.E2.Ety;
3657     if (!tyreg(ty2))
3658         retregs |= mES;
3659     scodelem(cdb,e.EV.E2,&retregs,retregs1,false);
3660 
3661     // Make sure ES contains proper segment value
3662     cdb.append(cod2_setES(ty2));
3663     getregs_imm(cdb,mAX | mCX);
3664 
3665     ubyte rex = I64 ? REX_W : 0;
3666 
3667     // Load DS with right value
3668     switch (tybasic(ty1))
3669     {
3670         case TYnptr:
3671         case TYimmutPtr:
3672             need_DS = false;
3673             break;
3674 
3675         case TYsptr:
3676             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3677                 segreg = SEG_SS;
3678             else
3679                 segreg = SEG_DS;
3680             goto L1;
3681         case TYcptr:
3682             segreg = SEG_CS;
3683         L1:
3684             cdb.gen1(0x1E);                         // PUSH DS
3685             cdb.gen1(0x06 + (segreg << 3));         // PUSH segreg
3686             cdb.gen1(0x1F);                         // POP  DS
3687             need_DS = true;
3688             break;
3689         case TYfptr:
3690         case TYvptr:
3691         case TYhptr:
3692             cdb.gen1(0x1E);                         // PUSH DS
3693             cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));   // MOV DS,CX
3694             need_DS = true;
3695             break;
3696         default:
3697             assert(0);
3698     }
3699 
3700     movregconst(cdb,AX,0,0);                // MOV AX,0
3701     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);   // MOV CX,-1
3702     getregs(cdb,mSI|mDI|mCX);
3703     cdb.gen1(0xF2);                              // REPNE
3704     cdb.gen1(0xAE);                              // SCASB
3705     genregs(cdb,0xF7,2,CX);         // NOT CX
3706     code_orrex(cdb.last(),rex);
3707     genregs(cdb,0x2B,DI,CX);        // SUB DI,CX
3708     code_orrex(cdb.last(),rex);
3709     cdb.gen1(0xF3);                              // REPE
3710     cdb.gen1(0xA6);                              // CMPSB
3711     if (need_DS)
3712         cdb.gen1(0x1F);                          // POP DS
3713     code *c4 = gennop(null);
3714     if (*pretregs != mPSW)                       // if not flags only
3715     {
3716         genjmp(cdb,JE,FLcode,cast(block *) c4);      // JE L1
3717         getregs(cdb,mAX);
3718         genregs(cdb,0x1B,AX,AX);                 // SBB AX,AX
3719         code_orrex(cdb.last(),rex);
3720         cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1);   // SBB AX,-1
3721     }
3722 
3723     *pretregs &= ~mPSW;
3724     cdb.append(c4);
3725     fixresult(cdb,e,mAX,pretregs);
3726 }
3727 
3728 /*********************************
3729  * Generate code for memcmp(s1,s2,n) intrinsic.
3730  */
3731 
3732 @trusted
3733 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3734 {
3735     char need_DS;
3736     int segreg;
3737 
3738     /*
3739         MOV     SI,s1                   ;get destination pointer (s1)
3740         MOV     DX,s1+2
3741         LES     DI,s2                   ;get source pointer (s2)
3742         MOV     CX,n                    ;get number of bytes to compare
3743         PUSH    DS
3744         MOV     DS,DX
3745         XOR     AX,AX
3746         REPE    CMPSB                   ;compare string
3747         POP     DS
3748         JE      L1                      ;strings are equal
3749         SBB     AX,AX
3750         SBB     AX,-1
3751     L1:
3752     */
3753 
3754     elem *e1 = e.EV.E1;
3755     assert(e1.Eoper == OPparam);
3756 
3757     // Get s1 into DX:SI
3758     regm_t retregs1 = mSI;
3759     tym_t ty1 = e1.EV.E1.Ety;
3760     if (!tyreg(ty1))
3761         retregs1 |= mDX;
3762     codelem(cdb,e1.EV.E1,&retregs1,false);
3763 
3764     // Get s2 into ES:DI
3765     regm_t retregs = mDI;
3766     tym_t ty2 = e1.EV.E2.Ety;
3767     if (!tyreg(ty2))
3768         retregs |= mES;
3769     scodelem(cdb,e1.EV.E2,&retregs,retregs1,false);
3770     freenode(e1);
3771 
3772     // Get nbytes into CX
3773     regm_t retregs3 = mCX;
3774     scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false);
3775 
3776     // Make sure ES contains proper segment value
3777     cdb.append(cod2_setES(ty2));
3778 
3779     // Load DS with right value
3780     switch (tybasic(ty1))
3781     {
3782         case TYnptr:
3783         case TYimmutPtr:
3784             need_DS = false;
3785             break;
3786 
3787         case TYsptr:
3788             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3789                 segreg = SEG_SS;
3790             else
3791                 segreg = SEG_DS;
3792             goto L1;
3793         case TYcptr:
3794             segreg = SEG_CS;
3795         L1:
3796             cdb.gen1(0x1E);                     // PUSH DS
3797             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3798             cdb.gen1(0x1F);                     // POP  DS
3799             need_DS = true;
3800             break;
3801         case TYfptr:
3802         case TYvptr:
3803         case TYhptr:
3804             cdb.gen1(0x1E);                        // PUSH DS
3805             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3806             need_DS = true;
3807             break;
3808         default:
3809             assert(0);
3810     }
3811 
3812     static if (1)
3813     {
3814         getregs(cdb,mAX);
3815         cdb.gen2(0x33,modregrm(3,AX,AX));           // XOR AX,AX
3816         code_orflag(cdb.last(), CFpsw);             // keep flags
3817     }
3818     else
3819     {
3820         if (*pretregs != mPSW)                      // if not flags only
3821             regwithvalue(cdb,mAX,0,null,0);         // put 0 in AX
3822     }
3823 
3824     getregs(cdb,mCX | mSI | mDI);
3825     cdb.gen1(0xF3);                             // REPE
3826     cdb.gen1(0xA6);                             // CMPSB
3827     if (need_DS)
3828         cdb.gen1(0x1F);                         // POP DS
3829     if (*pretregs != mPSW)                      // if not flags only
3830     {
3831         code *c4 = gennop(null);
3832         genjmp(cdb,JE,FLcode,cast(block *) c4);  // JE L1
3833         getregs(cdb,mAX);
3834         genregs(cdb,0x1B,AX,AX);             // SBB AX,AX
3835         cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1);    // SBB AX,-1
3836         cdb.append(c4);
3837     }
3838 
3839     *pretregs &= ~mPSW;
3840     fixresult(cdb,e,mAX,pretregs);
3841 }
3842 
3843 /*********************************
3844  * Generate code for strcpy(s1,s2) intrinsic.
3845  */
3846 
3847 @trusted
3848 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3849 {
3850     char need_DS;
3851     int segreg;
3852 
3853     /*
3854         LES     DI,s2                   ;ES:DI = s2
3855         CLR     AX                      ;scan for 0
3856         MOV     CX,-1                   ;largest possible string
3857         REPNE   SCASB                   ;find end of s2
3858         NOT     CX                      ;CX = strlen(s2) + 1 (for EOS)
3859         SUB     DI,CX
3860         MOV     SI,DI
3861         PUSH    DS
3862         PUSH    ES
3863         LES     DI,s1
3864         POP     DS
3865         MOV     AX,DI                   ;return value is s1
3866         REP     MOVSB
3867         POP     DS
3868     */
3869 
3870     stackchanged = 1;
3871     regm_t retregs = mDI;
3872     tym_t ty2 = tybasic(e.EV.E2.Ety);
3873     if (!tyreg(ty2))
3874         retregs |= mES;
3875     ubyte rex = I64 ? REX_W : 0;
3876     codelem(cdb,e.EV.E2,&retregs,false);
3877 
3878     // Make sure ES contains proper segment value
3879     cdb.append(cod2_setES(ty2));
3880     getregs_imm(cdb,mAX | mCX);
3881     movregconst(cdb,AX,0,1);       // MOV AL,0
3882     movregconst(cdb,CX,-1,I64?64:0);  // MOV CX,-1
3883     getregs(cdb,mAX|mCX|mSI|mDI);
3884     cdb.gen1(0xF2);                             // REPNE
3885     cdb.gen1(0xAE);                             // SCASB
3886     genregs(cdb,0xF7,2,CX);                     // NOT CX
3887     code_orrex(cdb.last(),rex);
3888     genregs(cdb,0x2B,DI,CX);                    // SUB DI,CX
3889     code_orrex(cdb.last(),rex);
3890     genmovreg(cdb,SI,DI);          // MOV SI,DI
3891 
3892     // Load DS with right value
3893     switch (ty2)
3894     {
3895         case TYnptr:
3896         case TYimmutPtr:
3897             need_DS = false;
3898             break;
3899 
3900         case TYsptr:
3901             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3902                 segreg = SEG_SS;
3903             else
3904                 segreg = SEG_DS;
3905             goto L1;
3906         case TYcptr:
3907             segreg = SEG_CS;
3908         L1:
3909             cdb.gen1(0x1E);                     // PUSH DS
3910             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3911             cdb.genadjesp(REGSIZE * 2);
3912             need_DS = true;
3913             break;
3914         case TYfptr:
3915         case TYvptr:
3916         case TYhptr:
3917             segreg = SEG_ES;
3918             goto L1;
3919 
3920         default:
3921             assert(0);
3922     }
3923 
3924     retregs = mDI;
3925     tym_t ty1 = tybasic(e.EV.E1.Ety);
3926     if (!tyreg(ty1))
3927         retregs |= mES;
3928     scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false);
3929     getregs(cdb,mAX|mCX|mSI|mDI);
3930 
3931     // Make sure ES contains proper segment value
3932     if (ty2 != TYnptr || ty1 != ty2)
3933         cdb.append(cod2_setES(ty1));
3934     else
3935     {}                              // ES is already same as DS
3936 
3937     if (need_DS)
3938         cdb.gen1(0x1F);                     // POP DS
3939     if (*pretregs)
3940         genmovreg(cdb,AX,DI);               // MOV AX,DI
3941     cdb.gen1(0xF3);                         // REP
3942     cdb.gen1(0xA4);                              // MOVSB
3943 
3944     if (need_DS)
3945     {   cdb.gen1(0x1F);                          // POP DS
3946         cdb.genadjesp(-(REGSIZE * 2));
3947     }
3948     fixresult(cdb,e,mAX | mES,pretregs);
3949 }
3950 
3951 /*********************************
3952  * Generate code for memcpy(s1,s2,n) intrinsic.
3953  *  OPmemcpy
3954  *   /   \
3955  * s1   OPparam
3956  *       /   \
3957  *      s2    n
3958  */
3959 
3960 @trusted
3961 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3962 {
3963     char need_DS;
3964     int segreg;
3965 
3966     /*
3967         MOV     SI,s2
3968         MOV     DX,s2+2
3969         MOV     CX,n
3970         LES     DI,s1
3971         PUSH    DS
3972         MOV     DS,DX
3973         MOV     AX,DI                   ;return value is s1
3974         REP     MOVSB
3975         POP     DS
3976     */
3977 
3978     elem *e2 = e.EV.E2;
3979     assert(e2.Eoper == OPparam);
3980 
3981     // Get s2 into DX:SI
3982     regm_t retregs2 = mSI;
3983     tym_t ty2 = e2.EV.E1.Ety;
3984     if (!tyreg(ty2))
3985         retregs2 |= mDX;
3986     codelem(cdb,e2.EV.E1,&retregs2,false);
3987 
3988     // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy())
3989     const zeroCheck = e2.EV.E2.Eoper != OPconst;
3990 
3991     // Get nbytes into CX
3992     regm_t retregs3 = mCX;
3993     scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3994     freenode(e2);
3995 
3996     // Get s1 into ES:DI
3997     regm_t retregs1 = mDI;
3998     tym_t ty1 = e.EV.E1.Ety;
3999     if (!tyreg(ty1))
4000         retregs1 |= mES;
4001     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
4002 
4003     ubyte rex = I64 ? REX_W : 0;
4004 
4005     // Make sure ES contains proper segment value
4006     cdb.append(cod2_setES(ty1));
4007 
4008     // Load DS with right value
4009     switch (tybasic(ty2))
4010     {
4011         case TYnptr:
4012         case TYimmutPtr:
4013             need_DS = false;
4014             break;
4015 
4016         case TYsptr:
4017             if (config.wflags & WFssneds)       // if sptr can't use DS segment
4018                 segreg = SEG_SS;
4019             else
4020                 segreg = SEG_DS;
4021             goto L1;
4022 
4023         case TYcptr:
4024             segreg = SEG_CS;
4025         L1:
4026             cdb.gen1(0x1E);                        // PUSH DS
4027             cdb.gen1(0x06 + (segreg << 3));        // PUSH segreg
4028             cdb.gen1(0x1F);                        // POP  DS
4029             need_DS = true;
4030             break;
4031 
4032         case TYfptr:
4033         case TYvptr:
4034         case TYhptr:
4035             cdb.gen1(0x1E);                        // PUSH DS
4036             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
4037             need_DS = true;
4038             break;
4039 
4040         default:
4041             assert(0);
4042     }
4043 
4044     if (*pretregs)                              // if need return value
4045     {   getregs(cdb,mAX);
4046         genmovreg(cdb,AX,DI);
4047     }
4048 
4049     if (0 && I32 && config.flags4 & CFG4speed)
4050     {
4051         /* This is only faster if the memory is dword aligned, if not
4052          * it is significantly slower than just a rep movsb.
4053          */
4054         /*      mov     EDX,ECX
4055          *      shr     ECX,2
4056          *      jz      L1
4057          *      repe    movsd
4058          * L1:  nop
4059          *      and     EDX,3
4060          *      jz      L2
4061          *      mov     ECX,EDX
4062          *      repe    movsb
4063          * L2:  nop
4064          */
4065         getregs(cdb,mSI | mDI | mCX | mDX);
4066         genmovreg(cdb,DX,CX);                  // MOV EDX,ECX
4067         cdb.genc2(0xC1,modregrm(3,5,CX),2);                 // SHR ECX,2
4068         code *cx = gennop(null);
4069         genjmp(cdb, JE, FLcode, cast(block *)cx);  // JZ L1
4070         cdb.gen1(0xF3);                                     // REPE
4071         cdb.gen1(0xA5);                                     // MOVSW
4072         cdb.append(cx);
4073         cdb.genc2(0x81, modregrm(3,4,DX),3);                // AND EDX,3
4074 
4075         code *cnop = gennop(null);
4076         genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ L2
4077         genmovreg(cdb,CX,DX);                    // MOV ECX,EDX
4078         cdb.gen1(0xF3);                          // REPE
4079         cdb.gen1(0xA4);                          // MOVSB
4080         cdb.append(cnop);
4081     }
4082     else
4083     {
4084         getregs(cdb,mSI | mDI | mCX);
4085         code* cnop;
4086         if (zeroCheck)
4087         {
4088             cnop = gennop(null);
4089             gentstreg(cdb,CX);                           // TEST ECX,ECX
4090             if (I64)
4091                 code_orrex(cdb.last, REX_W);
4092             genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ cnop
4093         }
4094 
4095         if (I16 && config.flags4 & CFG4speed)          // if speed optimization
4096         {
4097             // Note this doesn't work if CX is 0
4098             cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX));        // SHR CX,1
4099             cdb.gen1(0xF3);                              // REPE
4100             cdb.gen1(0xA5);                              // MOVSW
4101             cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX));            // ADC CX,CX
4102         }
4103         cdb.gen1(0xF3);                             // REPE
4104         cdb.gen1(0xA4);                             // MOVSB
4105         if (zeroCheck)
4106             cdb.append(cnop);
4107         if (need_DS)
4108             cdb.gen1(0x1F);                         // POP DS
4109     }
4110     fixresult(cdb,e,mES|mAX,pretregs);
4111 }
4112 
4113 
4114 /*********************************
4115  * Generate code for memset(s,value,numbytes) intrinsic.
4116  *      (s OPmemset (numbytes OPparam value))
4117  */
4118 
4119 @trusted
4120 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4121 {
4122     regm_t retregs1;
4123     regm_t retregs3;
4124     reg_t reg;
4125     reg_t vreg;
4126     tym_t ty1;
4127     int segreg;
4128     targ_uns numbytes;
4129     uint m;
4130 
4131     //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs));
4132     elem *e2 = e.EV.E2;
4133     assert(e2.Eoper == OPparam);
4134 
4135     elem* evalue = e2.EV.E2;
4136     elem* enumbytes = e2.EV.E1;
4137 
4138     const sz = tysize(evalue.Ety);
4139     if (sz > 1)
4140     {
4141         cdmemsetn(cdb, e, pretregs);
4142         return;
4143     }
4144 
4145     const grex = I64 ? (REX_W << 16) : 0;
4146 
4147     bool valueIsConst = false;
4148     targ_size_t value;
4149     if (evalue.Eoper == OPconst)
4150     {
4151         value = el_tolong(evalue) & 0xFF;
4152         value |= value << 8;
4153         if (I32 || I64)
4154         {
4155             value |= value << 16;
4156             static if (value.sizeof == 8)
4157             if (I64)
4158                 value |= value << 32;
4159         }
4160         valueIsConst = true;
4161     }
4162     else if (evalue.Eoper == OPstrpar)  // happens if evalue is a struct of 0 size
4163     {
4164         value = 0;
4165         valueIsConst = true;
4166     }
4167     else
4168         value = 0xDEADBEEF;     // stop annoying false positives that value is not inited
4169 
4170     if (enumbytes.Eoper == OPconst)
4171     {
4172         numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes);
4173     }
4174 
4175     // Get nbytes into CX
4176     regm_t retregs2 = 0;
4177     if (enumbytes.Eoper != OPconst)
4178     {
4179         retregs2 = mCX;
4180         codelem(cdb,enumbytes,&retregs2,false);
4181     }
4182 
4183     // Get value into AX
4184     retregs3 = mAX;
4185     if (valueIsConst)
4186     {
4187         regwithvalue(cdb, mAX, value, null, I64?64:0);
4188         freenode(evalue);
4189     }
4190     else
4191     {
4192         scodelem(cdb,evalue,&retregs3,retregs2,false);
4193 
4194         getregs(cdb,mAX);
4195         if (I16)
4196         {
4197             cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL
4198         }
4199         else if (I32)
4200         {
4201             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4202             cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101
4203         }
4204         else
4205         {
4206             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4207             regm_t regm = allregs & ~(mAX | retregs2);
4208             reg_t r;
4209             regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,&r,64); // MOV reg,0x01010101_01010101
4210             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r));        // IMUL RAX,reg
4211         }
4212     }
4213     freenode(e2);
4214 
4215     // Get s into ES:DI
4216     retregs1 = mDI;
4217     ty1 = e.EV.E1.Ety;
4218     if (!tyreg(ty1))
4219         retregs1 |= mES;
4220     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
4221     reg = DI; //findreg(retregs1);
4222 
4223     // Make sure ES contains proper segment value
4224     cdb.append(cod2_setES(ty1));
4225 
4226     if (*pretregs)                              // if need return value
4227     {
4228         getregs(cdb,mBX);
4229         genmovreg(cdb,BX,DI);                   // MOV EBX,EDI
4230     }
4231 
4232     if (enumbytes.Eoper == OPconst)
4233     {
4234         getregs(cdb,mDI);
4235         if (const numwords = numbytes / REGSIZE)
4236         {
4237             regwithvalue(cdb,mCX,numwords,null, I64 ? 64 : 0);
4238             getregs(cdb,mCX);
4239             cdb.gen1(0xF3);                     // REP
4240             cdb.gen1(STOS);                     // STOSW/D/Q
4241             if (I64)
4242                 code_orrex(cdb.last(), REX_W);
4243             regimmed_set(CX, 0);                // CX is now 0
4244         }
4245 
4246         auto remainder = numbytes & (REGSIZE - 1);
4247         if (I64 && remainder >= 4)
4248         {
4249             cdb.gen1(STOS);                     // STOSD
4250             remainder -= 4;
4251         }
4252         for (; remainder; --remainder)
4253             cdb.gen1(STOSB);                    // STOSB
4254         fixresult(cdb,e,mES|mBX,pretregs);
4255         return;
4256     }
4257 
4258     getregs(cdb,mDI | mCX);
4259     if (I16)
4260     {
4261         if (config.flags4 & CFG4speed)      // if speed optimization
4262         {
4263             cdb.gen2(0xD1,modregrm(3,5,CX));  // SHR CX,1
4264             cdb.gen1(0xF3);                   // REP
4265             cdb.gen1(STOS);                   // STOSW
4266             cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX
4267         }
4268         cdb.gen1(0xF3);                       // REP
4269         cdb.gen1(STOSB);                      // STOSB
4270         regimmed_set(CX, 0);                  // CX is now 0
4271         fixresult(cdb,e,mES|mBX,pretregs);
4272         return;
4273     }
4274 
4275     /*  MOV   sreg,ECX
4276         SHR   ECX,n
4277         REP
4278         STOSD/Q
4279 
4280         ADC   ECX,ECX
4281         REP
4282         STOSD
4283 
4284         MOV   ECX,sreg
4285         AND   ECX,3
4286         REP
4287         STOSB
4288      */
4289     regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI));
4290     reg_t sreg;
4291     allocreg(cdb,&regs,&sreg,TYint);
4292     genregs(cdb,0x89,CX,sreg);                        // MOV sreg,ECX (32 bits only)
4293 
4294     const n = I64 ? 3 : 2;
4295     cdb.genc2(0xC1, grex | modregrm(3,5,CX), n);      // SHR ECX,n
4296 
4297     cdb.gen1(0xF3);                                   // REP
4298     cdb.gen1(STOS);                                   // STOSD/Q
4299     if (I64)
4300         code_orrex(cdb.last(), REX_W);
4301 
4302     if (I64)
4303     {
4304         cdb.gen2(0x11,modregrm(3,CX,CX));             // ADC ECX,ECX
4305         cdb.gen1(0xF3);                               // REP
4306         cdb.gen1(STOS);                               // STOSD
4307     }
4308 
4309     genregs(cdb,0x89,sreg,CX);                        // MOV ECX,sreg (32 bits only)
4310     cdb.genc2(0x81, modregrm(3,4,CX), 3);             // AND ECX,3
4311     cdb.gen1(0xF3);                                   // REP
4312     cdb.gen1(STOSB);                                  // STOSB
4313 
4314     regimmed_set(CX, 0);                    // CX is now 0
4315     fixresult(cdb,e,mES|mBX,pretregs);
4316 }
4317 
4318 /***********************************************
4319  * Do memset for values larger than a byte.
4320  * Has many similarities to cod4.cdeq().
4321  * Doesn't work for 16 bit code.
4322  */
4323 @trusted
4324 private void cdmemsetn(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4325 {
4326     //printf("cdmemsetn(*pretregs = %s)\n", regm_str(*pretregs));
4327     elem *e2 = e.EV.E2;
4328     assert(e2.Eoper == OPparam);
4329 
4330     elem* evalue = e2.EV.E2;
4331     elem* enelems = e2.EV.E1;
4332 
4333     tym_t tymv = tybasic(evalue.Ety);
4334     const sz = tysize(evalue.Ety);
4335     assert(cast(int)sz > 1);
4336 
4337     if (tyxmmreg(tymv) && config.fpxmmregs)
4338         assert(0);      // fix later
4339     if (tyfloating(tymv) && config.inline8087)
4340         assert(0);      // fix later
4341 
4342     const grex = I64 ? (REX_W << 16) : 0;
4343 
4344     // get the count of elems into CX
4345     regm_t mregcx = mCX;
4346     codelem(cdb,enelems,&mregcx,false);
4347 
4348     // Get value into AX
4349     regm_t retregs3 = allregs & ~mregcx;
4350     if (sz == 2 * REGSIZE)
4351         retregs3 &= ~(mBP | IDXREGS);  // BP cannot be used for register pair,
4352                                        // IDXREGS could deplete index regs - see sdtor.d test14815()
4353     scodelem(cdb,evalue,&retregs3,mregcx,false);
4354 
4355     /* Necessary because if evalue calls a function, and that function never returns,
4356      * it doesn't affect registers. Which means those registers can be used for enregistering
4357      * variables, and next pass fails because it can't use those registers, and so cannot
4358      * allocate registers for retregs3. See ice11596.d
4359      */
4360     useregs(retregs3);
4361 
4362     reg_t valreg = findreg(retregs3);
4363     reg_t valreghi;
4364     if (sz == 2 * REGSIZE)
4365     {
4366         valreg = findreglsw(retregs3);
4367         valreghi = findregmsw(retregs3);
4368     }
4369 
4370     freenode(e2);
4371 
4372     // Get s into ES:DI
4373     regm_t mregidx = IDXREGS & ~(mregcx | retregs3);
4374     assert(mregidx);
4375     tym_t ty1 = tybasic(e.EV.E1.Ety);
4376     if (!tyreg(ty1))
4377         mregidx |= mES;
4378     scodelem(cdb,e.EV.E1,&mregidx,mregcx | retregs3,false);
4379     reg_t idxreg = findreg(mregidx);
4380 
4381     // Make sure ES contains proper segment value
4382     cdb.append(cod2_setES(ty1));
4383 
4384     regm_t mregbx = 0;
4385     if (*pretregs)                              // if need return value
4386     {
4387         mregbx = *pretregs & ~(mregidx | mregcx | retregs3);
4388         if (!mregbx)
4389             mregbx = allregs & ~(mregidx | mregcx | retregs3);
4390         reg_t regbx;
4391         allocreg(cdb, &mregbx, &regbx, TYnptr);
4392         getregs(cdb, mregbx);
4393         genmovreg(cdb,regbx,idxreg);            // MOV BX,DI
4394     }
4395 
4396     getregs(cdb,mask(idxreg) | mCX);            // modify DI and CX
4397 
4398     /* Generate:
4399      *  JCXZ L1
4400      * L2:
4401      *  MOV [idxreg],AX
4402      *  ADD idxreg,sz
4403      *  LOOP L2
4404      * L1:
4405      *  NOP
4406      */
4407     code* c1 = gennop(null);
4408     genjmp(cdb, JCXZ, FLcode, cast(block *)c1);
4409     code cs;
4410     buildEA(&cs,idxreg,-1,1,0);
4411     cs.Iop = 0x89;
4412     if (!I16 && sz == 2)
4413         cs.Iflags |= CFopsize;
4414     if (I64 && sz == 8)
4415         cs.Irex |= REX_W;
4416     code_newreg(&cs, valreg);
4417     cdb.gen(&cs);                                       // MOV [idxreg],AX
4418     code* c2 = cdb.last();
4419     if (sz == REGSIZE * 2)
4420     {
4421         cs.IEV1.Vuns = REGSIZE;
4422         code_newreg(&cs, valreghi);
4423         cdb.gen(&cs);                                   // MOV REGSIZE[idxreg],DX
4424     }
4425     cdb.genc2(0x81, grex | modregrmx(3,0,idxreg), sz);  // ADD idxreg,sz
4426     genjmp(cdb, LOOP, FLcode, cast(block *)c2);         // LOOP L2
4427     cdb.append(c1);
4428 
4429     regimmed_set(CX, 0);                  // CX is now 0
4430 
4431     fixresult(cdb,e,mregbx,pretregs);
4432 }
4433 
4434 /**********************
4435  * Do structure assignments.
4436  * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
4437  * Mebbe call cdstreq() for double assignments???
4438  */
4439 
4440 @trusted
4441 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4442 {
4443     char need_DS = false;
4444     elem *e1 = e.EV.E1;
4445     elem *e2 = e.EV.E2;
4446     int segreg;
4447     uint numbytes = cast(uint)type_size(e.ET);          // # of bytes in structure/union
4448     ubyte rex = I64 ? REX_W : 0;
4449 
4450     //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4451 
4452     // First, load pointer to rvalue into SI
4453     regm_t srcregs = mSI;                      // source is DS:SI
4454     docommas(cdb,&e2);
4455     if (e2.Eoper == OPind)             // if (.. = *p)
4456     {   elem *e21 = e2.EV.E1;
4457 
4458         segreg = SEG_DS;
4459         switch (tybasic(e21.Ety))
4460         {
4461             case TYsptr:
4462                 if (config.wflags & WFssneds)   // if sptr can't use DS segment
4463                     segreg = SEG_SS;
4464                 break;
4465             case TYcptr:
4466                 if (!(config.exe & EX_flat))
4467                     segreg = SEG_CS;
4468                 break;
4469             case TYfptr:
4470             case TYvptr:
4471             case TYhptr:
4472                 srcregs |= mCX;         // get segment also
4473                 need_DS = true;
4474                 break;
4475 
4476             default:
4477                 break;
4478         }
4479         codelem(cdb,e21,&srcregs,false);
4480         freenode(e2);
4481         if (segreg != SEG_DS)           // if not DS
4482         {
4483             getregs(cdb,mCX);
4484             cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg
4485             need_DS = true;
4486         }
4487     }
4488     else if (e2.Eoper == OPvar)
4489     {
4490         if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment
4491         {   srcregs |= mCX;             // get segment also
4492             need_DS = true;
4493             cdrelconst(cdb,e2,&srcregs);
4494         }
4495         else
4496         {
4497             segreg = segfl[el_fl(e2)];
4498             if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack
4499                 segreg == SEG_CS)               // if source is in CS
4500             {
4501                 need_DS = true;         // we need to reload DS
4502                 // Load CX with segment
4503                 srcregs |= mCX;
4504                 getregs(cdb,mCX);
4505                 cdb.gen2(0x8C,                // MOV CX,[SS|CS]
4506                     modregrm(3,segreg,CX));
4507             }
4508             cdrelconst(cdb,e2,&srcregs);
4509         }
4510         freenode(e2);
4511     }
4512     else
4513     {
4514         if (!(config.exe & EX_flat))
4515         {   need_DS = true;
4516             srcregs |= mCX;
4517         }
4518         codelem(cdb,e2,&srcregs,false);
4519     }
4520 
4521     // now get pointer to lvalue (destination) in ES:DI
4522     regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI;
4523     if (e1.Eoper == OPind)               // if (*p = ..)
4524     {
4525         if (tyreg(e1.EV.E1.Ety))
4526             dstregs = mDI;
4527         cdb.append(cod2_setES(e1.EV.E1.Ety));
4528         scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false);
4529     }
4530     else
4531         cdrelconst(cdb,e1,&dstregs);
4532     freenode(e1);
4533 
4534     getregs(cdb,(srcregs | dstregs) & (mLSW | mDI));
4535     if (need_DS)
4536     {     assert(!(config.exe & EX_flat));
4537         cdb.gen1(0x1E);                     // PUSH DS
4538         cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));    // MOV DS,CX
4539     }
4540     if (numbytes <= REGSIZE * (6 + (REGSIZE == 4)))
4541     {
4542         while (numbytes >= REGSIZE)
4543         {
4544             cdb.gen1(0xA5);         // MOVSW
4545             code_orrex(cdb.last(), rex);
4546             numbytes -= REGSIZE;
4547         }
4548         //if (numbytes)
4549         //    printf("cdstreq numbytes %d\n",numbytes);
4550         if (I64 && numbytes >= 4)
4551         {
4552             cdb.gen1(0xA5);         // MOVSD
4553             numbytes -= 4;
4554         }
4555         while (numbytes--)
4556             cdb.gen1(0xA4);         // MOVSB
4557     }
4558     else
4559     {
4560 static if (1)
4561 {
4562         uint remainder = numbytes & (REGSIZE - 1);
4563         numbytes /= REGSIZE;            // number of words
4564         getregs_imm(cdb,mCX);
4565         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4566         cdb.gen1(0xF3);                 // REP
4567         if (REGSIZE == 8)
4568             cdb.gen1(REX | REX_W);
4569         cdb.gen1(0xA5);                 // REP MOVSD
4570         regimmed_set(CX,0);             // note that CX == 0
4571         if (I64 && remainder >= 4)
4572         {
4573             cdb.gen1(0xA5);         // MOVSD
4574             remainder -= 4;
4575         }
4576         for (; remainder; remainder--)
4577         {
4578             cdb.gen1(0xA4);             // MOVSB
4579         }
4580 }
4581 else
4582 {
4583         uint movs;
4584         if (numbytes & (REGSIZE - 1))   // if odd
4585             movs = 0xA4;                // MOVSB
4586         else
4587         {
4588             movs = 0xA5;                // MOVSW
4589             numbytes /= REGSIZE;        // # of words
4590         }
4591         getregs_imm(cdb,mCX);
4592         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4593         cdb.gen1(0xF3);                 // REP
4594         cdb.gen1(movs);
4595         regimmed_set(CX,0);             // note that CX == 0
4596 }
4597     }
4598     if (need_DS)
4599         cdb.gen1(0x1F);                 // POP  DS
4600     assert(!(*pretregs & mPSW));
4601     if (*pretregs)
4602     {   // ES:DI points past what we want
4603 
4604         cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET));   // SUB DI,numbytes
4605 
4606         const tym = tybasic(e.Ety);
4607         if (tym == TYucent && I64)
4608         {
4609             /* https://issues.dlang.org/show_bug.cgi?id=22175
4610              * The trouble happens when the struct size does not fit exactly into
4611              * 2 registers. Then the type of e becomes a TYucent, not a TYstruct,
4612              * and we need to dereference DI to get the ucent
4613              */
4614 
4615             // dereference DI
4616             code cs;
4617             cs.Iop = 0x8B;
4618             regm_t retregs = *pretregs;
4619             reg_t reg;
4620             allocreg(cdb,&retregs,&reg,tym);
4621 
4622             reg_t msreg = findregmsw(retregs);
4623             buildEA(&cs,DI,-1,1,REGSIZE);
4624             code_newreg(&cs,msreg);
4625             cs.Irex |= REX_W;
4626             cdb.gen(&cs);       // MOV msreg,REGSIZE[DI]        // msreg is never DI
4627 
4628             reg_t lsreg = findreglsw(retregs);
4629             buildEA(&cs,DI,-1,1,0);
4630             code_newreg(&cs,lsreg);
4631             cs.Irex |= REX_W;
4632             cdb.gen(&cs);       // MOV lsreg,[DI];
4633             fixresult(cdb,e,retregs,pretregs);
4634             return;
4635         }
4636 
4637         regm_t retregs = mDI;
4638         if (*pretregs & mMSW && !(config.exe & EX_flat))
4639             retregs |= mES;
4640         fixresult(cdb,e,retregs,pretregs);
4641     }
4642 }
4643 
4644 
4645 /**********************
4646  * Get the address of.
4647  * Is also called by cdstreq() to set up pointer to a structure.
4648  */
4649 
4650 @trusted
4651 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4652 {
4653     //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4654 
4655     /* The following should not happen, but cgelem.c is a little stupid.
4656      * Assertion can be tripped by func("string" == 0); and similar
4657      * things. Need to add goals to optelem() to fix this completely.
4658      */
4659     //assert((*pretregs & mPSW) == 0);
4660     if (*pretregs & mPSW)
4661     {
4662         *pretregs &= ~mPSW;
4663         gentstreg(cdb,SP);            // SP is never 0
4664         if (I64)
4665             code_orrex(cdb.last(), REX_W);
4666     }
4667     if (!*pretregs)
4668         return;
4669 
4670     assert(e);
4671     tym_t tym = tybasic(e.Ety);
4672     switch (tym)
4673     {
4674         case TYstruct:
4675         case TYarray:
4676         case TYldouble:
4677         case TYildouble:
4678         case TYcldouble:
4679             tym = TYnptr;               // don't confuse allocreg()
4680             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
4681             {
4682                 tym = TYfptr;
4683             }
4684             break;
4685 
4686         case TYifunc:
4687             tym = TYfptr;
4688             break;
4689 
4690         default:
4691             if (tyfunc(tym))
4692                 tym =
4693                     tyfarfunc(tym) ? TYfptr :
4694                     TYnptr;
4695             break;
4696     }
4697     //assert(tym & typtr);              // don't fail on (int)&a
4698 
4699     SC sclass;
4700     reg_t mreg,            // segment of the address (TYfptrs only)
4701           lreg;            // offset of the address
4702 
4703     allocreg(cdb,pretregs,&lreg,tym);
4704     if (_tysize[tym] > REGSIZE)            // fptr could've been cast to long
4705     {
4706         if (*pretregs & mES)
4707         {
4708             /* Do not allocate CX or SI here, as cdstreq() needs
4709              * them preserved. cdstreq() should use scodelem()
4710              */
4711             mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg));
4712         }
4713         else
4714         {
4715             mreg = lreg;
4716             lreg = findreglsw(*pretregs);
4717         }
4718 
4719         /* if (get segment of function that isn't necessarily in the
4720          * current segment (i.e. CS doesn't have the right value in it)
4721          */
4722         Symbol *s = e.EV.Vsym;
4723         if (s.Sfl == FLdatseg)
4724         {   assert(0);
4725         }
4726         sclass = s.Sclass;
4727         const ety = tybasic(s.ty());
4728         if ((tyfarfunc(ety) || ety == TYifunc) &&
4729             (sclass == SC.extern_ || ClassInline(sclass) || config.wflags & WFthunk)
4730             || s.Sfl == FLfardata
4731             || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SC.comdat))
4732            )
4733         {   // MOV mreg,seg of symbol
4734             cdb.gencs(0xB8 + mreg,0,FLextern,s);
4735             cdb.last().Iflags = CFseg;
4736         }
4737         else
4738         {
4739             const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl;
4740             cdb.gen2(0x8C,            // MOV mreg,SEG REGISTER
4741                 modregrm(3,segfl[fl],mreg));
4742         }
4743         if (*pretregs & mES)
4744             cdb.gen2(0x8E,modregrm(3,0,mreg));        // MOV ES,mreg
4745     }
4746     getoffset(cdb,e,lreg);
4747 }
4748 
4749 /*********************************
4750  * Load the offset portion of the address represented by e into
4751  * reg.
4752  */
4753 
4754 @trusted
4755 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg)
4756 {
4757     //printf("getoffset(e = %p, reg = %d)\n", e, reg);
4758     code cs = void;
4759     cs.Iflags = 0;
4760     ubyte rex = 0;
4761     cs.Irex = rex;
4762     assert(e.Eoper == OPvar || e.Eoper == OPrelconst);
4763     auto fl = el_fl(e);
4764     switch (fl)
4765     {
4766         case FLdatseg:
4767             cs.IEV2.Vpointer = e.EV.Vpointer;
4768             goto L3;
4769 
4770         case FLfardata:
4771             goto L4;
4772 
4773         case FLtlsdata:
4774         if (config.exe & EX_posix)
4775         {
4776           Lposix:
4777             if (config.flags3 & CFG3pic)
4778             {
4779                 if (I64)
4780                 {
4781                     /* Generate:
4782                      *   LEA DI,s@TLSGD[RIP]
4783                      */
4784                     //assert(reg == DI);
4785                     code css = void;
4786                     css.Irex = REX | REX_W;
4787                     css.Iop = LEA;
4788                     css.Irm = modregrm(0,reg,5);
4789                     if (reg & 8)
4790                         css.Irex |= REX_R;
4791                     css.Iflags = CFopsize;
4792                     css.IFL1 = cast(ubyte)fl;
4793                     css.IEV1.Vsym = e.EV.Vsym;
4794                     css.IEV1.Voffset = e.EV.Voffset;
4795                     cdb.gen(&css);
4796                 }
4797                 else
4798                 {
4799                     /* Generate:
4800                      *   LEA EAX,s@TLSGD[1*EBX+0]
4801                      */
4802                     assert(reg == AX);
4803                     load_localgot(cdb);
4804                     code css = void;
4805                     css.Iflags = 0;
4806                     css.Iop = LEA;             // LEA
4807                     css.Irex = 0;
4808                     css.Irm = modregrm(0,AX,4);
4809                     css.Isib = modregrm(0,BX,5);
4810                     css.IFL1 = cast(ubyte)fl;
4811                     css.IEV1.Vsym = e.EV.Vsym;
4812                     css.IEV1.Voffset = e.EV.Voffset;
4813                     cdb.gen(&css);
4814                 }
4815                 return;
4816             }
4817             /* Generate:
4818              *      MOV reg,GS:[00000000]
4819              *      ADD reg, offset s@TLS_LE
4820              * for locals, and for globals:
4821              *      MOV reg,GS:[00000000]
4822              *      ADD reg, s@TLS_IE
4823              * note different fixup
4824              */
4825             int stack = 0;
4826             if (reg == STACK)
4827             {   regm_t retregs = ALLREGS;
4828 
4829                 reg_t regx;
4830                 allocreg(cdb,&retregs,&regx,TYoffset);
4831                 reg = findreg(retregs);
4832                 stack = 1;
4833             }
4834 
4835             code css = void;
4836             css.Irex = rex;
4837             css.Iop = 0x8B;
4838             css.Irm = modregrm(0, 0, BPRM);
4839             code_newreg(&css, reg);
4840             css.Iflags = CFgs;
4841             css.IFL1 = FLconst;
4842             css.IEV1.Vuns = 0;
4843             cdb.gen(&css);               // MOV reg,GS:[00000000]
4844 
4845             if (e.EV.Vsym.Sclass == SC.static_ || e.EV.Vsym.Sclass == SC.locstat)
4846             {   // ADD reg, offset s
4847                 cs.Irex = rex;
4848                 cs.Iop = 0x81;
4849                 cs.Irm = modregrm(3,0,reg & 7);
4850                 if (reg & 8)
4851                     cs.Irex |= REX_B;
4852                 cs.Iflags = CFoff;
4853                 cs.IFL2 = cast(ubyte)fl;
4854                 cs.IEV2.Vsym = e.EV.Vsym;
4855                 cs.IEV2.Voffset = e.EV.Voffset;
4856             }
4857             else
4858             {   // ADD reg, s
4859                 cs.Irex = rex;
4860                 cs.Iop = 0x03;
4861                 cs.Irm = modregrm(0,0,BPRM);
4862                 code_newreg(&cs, reg);
4863                 cs.Iflags = CFoff;
4864                 cs.IFL1 = cast(ubyte)fl;
4865                 cs.IEV1.Vsym = e.EV.Vsym;
4866                 cs.IEV1.Voffset = e.EV.Voffset;
4867             }
4868             cdb.gen(&cs);                // ADD reg, xxxx
4869 
4870             if (stack)
4871             {
4872                 cdb.gen1(0x50 + (reg & 7));      // PUSH reg
4873                 if (reg & 8)
4874                     code_orrex(cdb.last(), REX_B);
4875                 cdb.genadjesp(REGSIZE);
4876                 stackchanged = 1;
4877             }
4878             break;
4879         }
4880         else if (config.exe & EX_windos)
4881         {
4882             if (I64)
4883             {
4884             Lwin64:
4885                 assert(reg != STACK);
4886                 cs.IEV2.Vsym = e.EV.Vsym;
4887                 cs.IEV2.Voffset = e.EV.Voffset;
4888                 cs.Iop = 0xB8 + (reg & 7);      // MOV Ereg,offset s
4889                 if (reg & 8)
4890                     cs.Irex |= REX_B;
4891                 cs.Iflags = CFoff;              // want offset only
4892                 cs.IFL2 = cast(ubyte)fl;
4893                 cdb.gen(&cs);
4894                 break;
4895             }
4896             goto L4;
4897         }
4898         else
4899         {
4900             goto L4;
4901         }
4902 
4903         case FLfunc:
4904             fl = FLextern;                  /* don't want PC relative addresses */
4905             goto L4;
4906 
4907         case FLextern:
4908             if (config.exe & EX_posix && e.EV.Vsym.ty() & mTYthread)
4909                 goto Lposix;
4910             if (config.exe & EX_WIN64 && e.EV.Vsym.ty() & mTYthread)
4911                 goto Lwin64;
4912             goto L4;
4913 
4914         case FLdata:
4915         case FLudata:
4916         case FLgot:
4917         case FLgotoff:
4918         case FLcsdata:
4919         L4:
4920             cs.IEV2.Vsym = e.EV.Vsym;
4921             cs.IEV2.Voffset = e.EV.Voffset;
4922         L3:
4923             if (reg == STACK)
4924             {   stackchanged = 1;
4925                 cs.Iop = 0x68;              /* PUSH immed16                 */
4926                 cdb.genadjesp(REGSIZE);
4927             }
4928             else
4929             {   cs.Iop = 0xB8 + (reg & 7);  // MOV reg,immed16
4930                 if (reg & 8)
4931                     cs.Irex |= REX_B;
4932                 if (I64)
4933                 {   cs.Irex |= REX_W;
4934                     if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
4935                     {   // LEA reg,immed32[RIP]
4936                         cs.Iop = LEA;
4937                         cs.Irm = modregrm(0,reg & 7,5);
4938                         if (reg & 8)
4939                             cs.Irex = (cs.Irex & ~REX_B) | REX_R;
4940                         cs.IFL1 = cast(ubyte)fl;
4941                         cs.IEV1.Vsym = cs.IEV2.Vsym;
4942                         cs.IEV1.Voffset = cs.IEV2.Voffset;
4943                     }
4944                 }
4945             }
4946             cs.Iflags = CFoff;              /* want offset only             */
4947             cs.IFL2 = cast(ubyte)fl;
4948             cdb.gen(&cs);
4949             break;
4950 
4951         case FLreg:
4952             /* Allow this since the tree optimizer puts & in front of       */
4953             /* register doubles.                                            */
4954             goto L2;
4955         case FLauto:
4956         case FLfast:
4957         case FLbprel:
4958         case FLfltreg:
4959             reflocal = true;
4960             goto L2;
4961         case FLpara:
4962             refparam = true;
4963         L2:
4964             if (reg == STACK)
4965             {   regm_t retregs = ALLREGS;
4966 
4967                 reg_t regx;
4968                 allocreg(cdb,&retregs,&regx,TYoffset);
4969                 reg = findreg(retregs);
4970                 loadea(cdb,e,&cs,LEA,reg,0,0,0);    // LEA reg,EA
4971                 if (I64)
4972                     code_orrex(cdb.last(), REX_W);
4973                 cdb.gen1(0x50 + (reg & 7));               // PUSH reg
4974                 if (reg & 8)
4975                     code_orrex(cdb.last(), REX_B);
4976                 cdb.genadjesp(REGSIZE);
4977                 stackchanged = 1;
4978             }
4979             else
4980             {
4981                 loadea(cdb,e,&cs,LEA,reg,0,0,0);   // LEA reg,EA
4982                 if (I64)
4983                     code_orrex(cdb.last(), REX_W);
4984             }
4985             break;
4986 
4987         default:
4988             debug
4989             {
4990                 elem_print(e);
4991                 WRFL(fl);
4992             }
4993             assert(0);
4994     }
4995 }
4996 
4997 
4998 /******************
4999  * OPneg, OPsqrt, OPsin, OPcos, OPrint
5000  */
5001 
5002 @trusted
5003 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5004 {
5005     //printf("cdneg()\n");
5006     //elem_print(e);
5007     if (*pretregs == 0)
5008     {
5009         codelem(cdb,e.EV.E1,pretregs,false);
5010         return;
5011     }
5012     const tyml = tybasic(e.EV.E1.Ety);
5013     const sz = _tysize[tyml];
5014     if (tyfloating(tyml))
5015     {
5016         if (tycomplex(tyml))
5017         {
5018             neg_complex87(cdb, e, pretregs);
5019             return;
5020         }
5021         if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS)
5022         {
5023             xmmneg(cdb,e,pretregs);
5024             return;
5025         }
5026         if (config.inline8087 &&
5027             ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64))
5028             {
5029                 neg87(cdb,e,pretregs);
5030                 return;
5031             }
5032         regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
5033         codelem(cdb,e.EV.E1,&retregs,false);
5034         getregs(cdb,retregs);
5035         if (I32)
5036         {
5037             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
5038             cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit
5039         }
5040         else
5041         {
5042             const reg = (sz == 8) ? AX : findregmsw(retregs);
5043             cdb.genc2(0x81,modregrm(3,6,reg),0x8000);     // XOR AX,0x8000
5044         }
5045         fixresult(cdb,e,retregs,pretregs);
5046         return;
5047     }
5048 
5049     const uint isbyte = sz == 1;
5050     const possregs = (isbyte) ? BYTEREGS : allregs;
5051     regm_t retregs = *pretregs & possregs;
5052     if (retregs == 0)
5053         retregs = possregs;
5054     codelem(cdb,e.EV.E1,&retregs,false);
5055     getregs(cdb,retregs);                // retregs will be destroyed
5056     if (sz <= REGSIZE)
5057     {
5058         const reg = findreg(retregs);
5059         uint rex = (I64 && sz == 8) ? REX_W : 0;
5060         if (I64 && sz == 1 && reg >= 4)
5061             rex |= REX;
5062         cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg));   // NEG reg
5063         if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW)
5064             cdb.last().Iflags |= CFopsize | CFpsw;
5065         *pretregs &= mBP | ALLREGS;             // flags already set
5066     }
5067     else if (sz == 2 * REGSIZE)
5068     {
5069         const msreg = findregmsw(retregs);
5070         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5071         const lsreg = findreglsw(retregs);
5072         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg
5073         code_orflag(cdb.last(), CFpsw);           // need flag result of previous NEG
5074         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5075     }
5076     else
5077         assert(0);
5078     fixresult(cdb,e,retregs,pretregs);
5079 }
5080 
5081 
5082 /******************
5083  * Absolute value operator
5084  */
5085 
5086 
5087 @trusted
5088 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
5089 {
5090     //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
5091     if (*pretregs == 0)
5092     {
5093         codelem(cdb,e.EV.E1,pretregs,false);
5094         return;
5095     }
5096     const tyml = tybasic(e.EV.E1.Ety);
5097     const sz = _tysize[tyml];
5098     const rex = (I64 && sz == 8) ? REX_W : 0;
5099     if (tyfloating(tyml))
5100     {
5101         if (tyxmmreg(tyml) && *pretregs & XMMREGS)
5102         {
5103             xmmabs(cdb,e,pretregs);
5104             return;
5105         }
5106         if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64))
5107         {
5108             neg87(cdb,e,pretregs);
5109             return;
5110         }
5111         regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
5112         codelem(cdb,e.EV.E1,&retregs,false);
5113         getregs(cdb,retregs);
5114         if (I32)
5115         {
5116             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
5117             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit
5118         }
5119         else
5120         {
5121             const reg = (sz == 8) ? AX : findregmsw(retregs);
5122             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF);     // AND AX,0x7FFF
5123         }
5124         fixresult(cdb,e,retregs,pretregs);
5125         return;
5126     }
5127 
5128     const uint isbyte = sz == 1;
5129     assert(isbyte == 0);
5130     regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs;
5131     if (!I16 && sz == REGSIZE)
5132         possregs = allregs;
5133     regm_t retregs = *pretregs & possregs;
5134     if (retregs == 0)
5135         retregs = possregs;
5136     codelem(cdb,e.EV.E1,&retregs,false);
5137     getregs(cdb,retregs);                // retregs will be destroyed
5138     if (sz <= REGSIZE)
5139     {
5140         /*      CWD
5141                 XOR     AX,DX
5142                 SUB     AX,DX
5143            or:
5144                 MOV     r,reg
5145                 SAR     r,63
5146                 XOR     reg,r
5147                 SUB     reg,r
5148          */
5149         reg_t reg;
5150         reg_t r;
5151 
5152         if (!I16 && sz == REGSIZE)
5153         {
5154             reg = findreg(retregs);
5155             r = allocScratchReg(cdb, allregs & ~retregs);
5156             getregs(cdb,retregs);
5157             genmovreg(cdb,r,reg);                     // MOV r,reg
5158             cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1);      // SAR r,31/63
5159             code_orrex(cdb.last(), rex);
5160         }
5161         else
5162         {
5163             reg = AX;
5164             r = DX;
5165             getregs(cdb,mDX);
5166             if (!I16 && sz == SHORTSIZE)
5167                 cdb.gen1(0x98);                         // CWDE
5168             cdb.gen1(0x99);                             // CWD
5169             code_orrex(cdb.last(), rex);
5170         }
5171         cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r
5172         cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r
5173         if (!I16 && sz == SHORTSIZE && *pretregs & mPSW)
5174             cdb.last().Iflags |= CFopsize | CFpsw;
5175         if (*pretregs & mPSW)
5176             cdb.last().Iflags |= CFpsw;
5177         *pretregs &= ~mPSW;                     // flags already set
5178     }
5179     else if (sz == 2 * REGSIZE)
5180     {
5181         /*      or      DX,DX
5182                 jns     L2
5183                 neg     DX
5184                 neg     AX
5185                 sbb     DX,0
5186             L2:
5187          */
5188 
5189         code *cnop = gennop(null);
5190         const msreg = findregmsw(retregs);
5191         const lsreg = findreglsw(retregs);
5192         genregs(cdb,0x09,msreg,msreg);            // OR msreg,msreg
5193         genjmp(cdb,JNS,FLcode,cast(block *)cnop);
5194         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5195         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg+1
5196         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5197         cdb.append(cnop);
5198     }
5199     else
5200         assert(0);
5201     fixresult(cdb,e,retregs,pretregs);
5202 }
5203 
5204 /**************************
5205  * Post increment and post decrement.
5206  */
5207 
5208 @trusted
5209 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5210 {
5211     //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs));
5212     code cs = void;
5213     const op = e.Eoper;                      // OPxxxx
5214     if (*pretregs == 0)                        // if nothing to return
5215     {
5216         cdaddass(cdb,e,pretregs);
5217         return;
5218     }
5219     const tym_t tyml = tybasic(e.EV.E1.Ety);
5220     const sz = _tysize[tyml];
5221     elem *e2 = e.EV.E2;
5222     const rex = (I64 && sz == 8) ? REX_W : 0;
5223 
5224     if (tyfloating(tyml))
5225     {
5226         if (config.fpxmmregs && tyxmmreg(tyml) &&
5227             !tycomplex(tyml) // SIMD code is not set up to deal with complex
5228            )
5229         {
5230             xmmpost(cdb,e,pretregs);
5231             return;
5232         }
5233 
5234         if (config.inline8087)
5235         {
5236             post87(cdb,e,pretregs);
5237             return;
5238         }
5239 if (config.exe & EX_windos)
5240 {
5241         assert(sz <= 8);
5242         getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS);
5243         freenode(e.EV.E1);
5244         regm_t idxregs = idxregm(&cs);  // mask of index regs used
5245         cs.Iop = 0x8B;                  /* MOV DOUBLEREGS,EA            */
5246         fltregs(cdb,&cs,tyml);
5247         stackchanged = 1;
5248         int stackpushsave = stackpush;
5249         regm_t retregs;
5250         if (sz == 8)
5251         {
5252             if (I32)
5253             {
5254                 cdb.gen1(0x50 + DX);             // PUSH DOUBLEREGS
5255                 cdb.gen1(0x50 + AX);
5256                 stackpush += DOUBLESIZE;
5257                 retregs = DOUBLEREGS2_32;
5258             }
5259             else
5260             {
5261                 cdb.gen1(0x50 + AX);
5262                 cdb.gen1(0x50 + BX);
5263                 cdb.gen1(0x50 + CX);
5264                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5265                 stackpush += DOUBLESIZE + DOUBLESIZE;
5266 
5267                 cdb.gen1(0x50 + AX);
5268                 cdb.gen1(0x50 + BX);
5269                 cdb.gen1(0x50 + CX);
5270                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5271                 retregs = DOUBLEREGS_16;
5272             }
5273         }
5274         else
5275         {
5276             stackpush += FLOATSIZE;     /* so we know something is on   */
5277             if (!I32)
5278                 cdb.gen1(0x50 + DX);
5279             cdb.gen1(0x50 + AX);
5280             retregs = FLOATREGS2;
5281         }
5282         cdb.genadjesp(stackpush - stackpushsave);
5283 
5284         cgstate.stackclean++;
5285         scodelem(cdb,e2,&retregs,idxregs,false);
5286         cgstate.stackclean--;
5287 
5288         if (tyml == TYdouble || tyml == TYdouble_alias)
5289         {
5290             retregs = DOUBLEREGS;
5291             callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub,
5292                     &retregs,idxregs);
5293         }
5294         else /* tyml == TYfloat */
5295         {
5296             retregs = FLOATREGS;
5297             callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub,
5298                     &retregs,idxregs);
5299         }
5300         cs.Iop = 0x89;                  /* MOV EA,DOUBLEREGS            */
5301         fltregs(cdb,&cs,tyml);
5302         stackpushsave = stackpush;
5303         if (tyml == TYdouble || tyml == TYdouble_alias)
5304         {   if (*pretregs == mSTACK)
5305                 retregs = mSTACK;       /* leave result on stack        */
5306             else
5307             {
5308                 if (I32)
5309                 {
5310                     cdb.gen1(0x58 + AX);
5311                     cdb.gen1(0x58 + DX);
5312                 }
5313                 else
5314                 {
5315                     cdb.gen1(0x58 + DX);
5316                     cdb.gen1(0x58 + CX);
5317                     cdb.gen1(0x58 + BX);
5318                     cdb.gen1(0x58 + AX);
5319                 }
5320                 stackpush -= DOUBLESIZE;
5321                 retregs = DOUBLEREGS;
5322             }
5323         }
5324         else
5325         {
5326             cdb.gen1(0x58 + AX);
5327             if (!I32)
5328                 cdb.gen1(0x58 + DX);
5329             stackpush -= FLOATSIZE;
5330             retregs = FLOATREGS;
5331         }
5332         cdb.genadjesp(stackpush - stackpushsave);
5333         fixresult(cdb,e,retregs,pretregs);
5334         return;
5335 }
5336     }
5337     if (tyxmmreg(tyml))
5338     {
5339         xmmpost(cdb,e,pretregs);
5340         return;
5341     }
5342 
5343     assert(e2.Eoper == OPconst);
5344     uint isbyte = (sz == 1);
5345     regm_t possregs = isbyte ? BYTEREGS : allregs;
5346     getlvalue(cdb,&cs,e.EV.E1,0);
5347     freenode(e.EV.E1);
5348     regm_t idxregs = idxregm(&cs);       // mask of index regs used
5349     if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 &&
5350         (!I16 || (idxregs & (mBX | mSI | mDI | mBP))))
5351     {
5352         // Generate:
5353         //      TEST    reg,reg
5354         //      LEA     reg,n[reg]      // don't affect flags
5355         reg_t reg = cs.Irm & 7;
5356         if (cs.Irex & REX_B)
5357             reg |= 8;
5358         cs.Iop = 0x85 ^ isbyte;
5359         code_newreg(&cs, reg);
5360         cs.Iflags |= CFpsw;
5361         cdb.gen(&cs);             // TEST reg,reg
5362 
5363         // If lvalue is a register variable, we must mark it as modified
5364         modEA(cdb,&cs);
5365 
5366         auto n = e2.EV.Vint;
5367         if (op == OPpostdec)
5368             n = -n;
5369         int rm = reg;
5370         if (I16)
5371         {
5372             static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c
5373             rm = regtorm[reg];
5374         }
5375         cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg]
5376         return;
5377     }
5378     else if (sz <= REGSIZE || tyfv(tyml))
5379     {
5380         code cs2 = void;
5381 
5382         cs.Iop = 0x8B ^ isbyte;
5383         regm_t retregs = possregs & ~idxregs & *pretregs;
5384         if (!tyfv(tyml))
5385         {
5386             if (retregs == 0)
5387                 retregs = possregs & ~idxregs;
5388         }
5389         else /* tyfv(tyml) */
5390         {
5391             if ((retregs &= mLSW) == 0)
5392                 retregs = mLSW & ~idxregs;
5393             /* Can't use LES if the EA uses ES as a seg override    */
5394             if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5395             {   cs.Iop = 0xC4;                      /* LES          */
5396                 getregs(cdb,mES);           // allocate ES
5397             }
5398         }
5399         reg_t reg;
5400         allocreg(cdb,&retregs,&reg,TYint);
5401         code_newreg(&cs, reg);
5402         if (sz == 1 && I64 && reg >= 4)
5403             cs.Irex |= REX;
5404         cdb.gen(&cs);                     // MOV reg,EA
5405         cs2 = cs;
5406 
5407         /* If lvalue is a register variable, we must mark it as modified */
5408         modEA(cdb,&cs);
5409 
5410         cs.Iop = 0x81 ^ isbyte;
5411         cs.Irm &= ~cast(int)modregrm(0,7,0);             // reg field = 0
5412         cs.Irex &= ~REX_R;
5413         if (op == OPpostdec)
5414             cs.Irm |= modregrm(0,5,0);  /* SUB                  */
5415         cs.IFL2 = FLconst;
5416         targ_int n = e2.EV.Vint;
5417         cs.IEV2.Vint = n;
5418         if (n == 1)                     /* can use INC or DEC           */
5419         {
5420             cs.Iop |= 0xFE;             /* xFE is dec byte, xFF is word */
5421             if (op == OPpostdec)
5422                 NEWREG(cs.Irm,1);       // DEC EA
5423             else
5424                 NEWREG(cs.Irm,0);       // INC EA
5425         }
5426         else if (n == -1)               // can use INC or DEC
5427         {
5428             cs.Iop |= 0xFE;             // xFE is dec byte, xFF is word
5429             if (op == OPpostinc)
5430                 NEWREG(cs.Irm,1);       // DEC EA
5431             else
5432                 NEWREG(cs.Irm,0);       // INC EA
5433         }
5434 
5435         // For scheduling purposes, we wish to replace:
5436         //      MOV     reg,EA
5437         //      OP      EA
5438         // with:
5439         //      MOV     reg,EA
5440         //      OP      reg
5441         //      MOV     EA,reg
5442         //      ~OP     reg
5443         if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
5444             config.target_cpu >= TARGET_Pentium &&
5445             config.flags4 & CFG4speed)
5446         {
5447             // Replace EA in cs with reg
5448             cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7);
5449             if (reg & 8)
5450             {   cs.Irex &= ~REX_R;
5451                 cs.Irex |= REX_B;
5452             }
5453             else
5454                 cs.Irex &= ~REX_B;
5455             if (I64 && sz == 1 && reg >= 4)
5456                 cs.Irex |= REX;
5457             cdb.gen(&cs);                        // ADD/SUB reg,const
5458 
5459             // Reverse MOV direction
5460             cs2.Iop ^= 2;
5461             cdb.gen(&cs2);                       // MOV EA,reg
5462 
5463             // Toggle INC <. DEC, ADD <. SUB
5464             cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0);
5465             cdb.gen(&cs);
5466 
5467             if (*pretregs & mPSW)
5468             {   *pretregs &= ~mPSW;              // flags already set
5469                 code_orflag(cdb.last(),CFpsw);
5470             }
5471         }
5472         else
5473             cdb.gen(&cs);                        // ADD/SUB EA,const
5474 
5475         freenode(e2);
5476         if (tyfv(tyml))
5477         {
5478             reg_t preg;
5479 
5480             getlvalue_msw(&cs);
5481             if (*pretregs & mES)
5482             {
5483                 preg = ES;
5484                 /* ES is already loaded if CFes is 0            */
5485                 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP;
5486                 NEWREG(cs.Irm,0);       /* MOV ES,EA+2          */
5487             }
5488             else
5489             {
5490                 regm_t retregsx = *pretregs & mMSW;
5491                 if (!retregsx)
5492                     retregsx = mMSW;
5493                 allocreg(cdb,&retregsx,&preg,TYint);
5494                 cs.Iop = 0x8B;
5495                 if (I32)
5496                     cs.Iflags |= CFopsize;
5497                 NEWREG(cs.Irm,preg);    /* MOV preg,EA+2        */
5498             }
5499             getregs(cdb,mask(preg));
5500             cdb.gen(&cs);
5501             retregs = mask(reg) | mask(preg);
5502         }
5503         fixresult(cdb,e,retregs,pretregs);
5504         return;
5505     }
5506     else if (tyml == TYhptr)
5507     {
5508         uint rvalue;
5509         reg_t lreg;
5510         reg_t rtmp;
5511         regm_t mtmp;
5512 
5513         rvalue = e2.EV.Vlong;
5514         freenode(e2);
5515 
5516         // If h--, convert to h++
5517         if (e.Eoper == OPpostdec)
5518             rvalue = -rvalue;
5519 
5520         regm_t retregs = mLSW & ~idxregs & *pretregs;
5521         if (!retregs)
5522             retregs = mLSW & ~idxregs;
5523         allocreg(cdb,&retregs,&lreg,TYint);
5524 
5525         // Can't use LES if the EA uses ES as a seg override
5526         if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5527         {   cs.Iop = 0xC4;
5528             retregs |= mES;
5529             getregs(cdb,mES|mCX);       // allocate ES
5530             cs.Irm |= modregrm(0,lreg,0);
5531             cdb.gen(&cs);                       // LES lreg,EA
5532         }
5533         else
5534         {   cs.Iop = 0x8B;
5535             retregs |= mDX;
5536             getregs(cdb,mDX|mCX);
5537             cs.Irm |= modregrm(0,lreg,0);
5538             cdb.gen(&cs);                       // MOV lreg,EA
5539             NEWREG(cs.Irm,DX);
5540             getlvalue_msw(&cs);
5541             cdb.gen(&cs);                       // MOV DX,EA+2
5542             getlvalue_lsw(&cs);
5543         }
5544 
5545         // Allocate temporary register, rtmp
5546         mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs;
5547         allocreg(cdb,&mtmp,&rtmp,TYint);
5548 
5549         movregconst(cdb,rtmp,rvalue >> 16,0);   // MOV rtmp,e2+2
5550         getregs(cdb,mtmp);
5551         cs.Iop = 0x81;
5552         NEWREG(cs.Irm,0);
5553         cs.IFL2 = FLconst;
5554         cs.IEV2.Vint = rvalue;
5555         cdb.gen(&cs);                           // ADD EA,e2
5556         code_orflag(cdb.last(),CFpsw);
5557         cdb.genc2(0x81,modregrm(3,2,rtmp),0);   // ADC rtmp,0
5558         genshift(cdb);                          // MOV CX,offset __AHSHIFT
5559         cdb.gen2(0xD3,modregrm(3,4,rtmp));      // SHL rtmp,CL
5560         cs.Iop = 0x01;
5561         NEWREG(cs.Irm,rtmp);                    // ADD EA+2,rtmp
5562         getlvalue_msw(&cs);
5563         cdb.gen(&cs);
5564         fixresult(cdb,e,retregs,pretregs);
5565         return;
5566     }
5567     else if (sz == 2 * REGSIZE)
5568     {
5569         regm_t retregs = allregs & ~idxregs & *pretregs;
5570         if ((retregs & mLSW) == 0)
5571                 retregs |= mLSW & ~idxregs;
5572         if ((retregs & mMSW) == 0)
5573                 retregs |= ALLREGS & mMSW;
5574         assert(retregs & mMSW && retregs & mLSW);
5575         reg_t reg;
5576         allocreg(cdb,&retregs,&reg,tyml);
5577         uint sreg = findreglsw(retregs);
5578         cs.Iop = 0x8B;
5579         cs.Irm |= modregrm(0,sreg,0);
5580         cdb.gen(&cs);                   // MOV sreg,EA
5581         NEWREG(cs.Irm,reg);
5582         getlvalue_msw(&cs);
5583         cdb.gen(&cs);                   // MOV reg,EA+2
5584         cs.Iop = 0x81;
5585         cs.Irm &= ~cast(int)modregrm(0,7,0);     /* reg field = 0 for ADD        */
5586         if (op == OPpostdec)
5587             cs.Irm |= modregrm(0,5,0);  /* SUB                          */
5588         getlvalue_lsw(&cs);
5589         cs.IFL2 = FLconst;
5590         cs.IEV2.Vlong = e2.EV.Vlong;
5591         cdb.gen(&cs);                   // ADD/SUB EA,const
5592         code_orflag(cdb.last(),CFpsw);
5593         getlvalue_msw(&cs);
5594         cs.IEV2.Vlong = 0;
5595         if (op == OPpostinc)
5596             cs.Irm ^= modregrm(0,2,0);  /* ADC                          */
5597         else
5598             cs.Irm ^= modregrm(0,6,0);  /* SBB                          */
5599         cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8));
5600         cdb.gen(&cs);                   // ADC/SBB EA,0
5601         freenode(e2);
5602         fixresult(cdb,e,retregs,pretregs);
5603         return;
5604     }
5605     else
5606     {
5607         assert(0);
5608     }
5609 }
5610 
5611 
5612 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5613 {
5614     debug
5615         elem_print(e);
5616 
5617     //printf("op = %d, %d\n", e.Eoper, OPstring);
5618     //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen);
5619     //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring);
5620     assert(0);
5621 }
5622 
5623 @trusted
5624 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5625 {
5626     switch (e.EV.E1.Eoper)
5627     {
5628 version (MARS)
5629 {
5630         case OPdctor:
5631             codelem(cdb,e.EV.E2,pretregs,false);
5632             regm_t retregs = 0;
5633             codelem(cdb,e.EV.E1,&retregs,false);
5634             break;
5635 }
5636 version (SCPP)
5637 {
5638         case OPdtor:
5639             cdcomma(cdb,e,pretregs);
5640             break;
5641         case OPctor:
5642             codelem(cdb,e.EV.E2,pretregs,false);
5643             regm_t retregs = 0;
5644             codelem(cdb,e.EV.E1,&retregs,false);
5645             break;
5646         case OPmark:
5647             if (0 && config.exe == EX_WIN32)
5648             {
5649                 const idx = except_index_get();
5650                 except_mark();
5651                 codelem(cdb,e.EV.E2,pretregs,false);
5652                 if (config.exe == EX_WIN32 && idx != except_index_get())
5653                 {   usednteh |= NTEHcleanup;
5654                     nteh_gensindex(cdb,idx - 1);
5655                 }
5656                 except_release();
5657                 assert(idx == except_index_get());
5658             }
5659             else
5660             {
5661                 code cs = void;
5662                 cs.Iop = ESCAPE | ESCmark;
5663                 cs.Iflags = 0;
5664                 cs.Irex = 0;
5665                 cdb.gen(&cs);
5666                 codelem(cdb,e.EV.E2,pretregs,false);
5667                 cs.Iop = ESCAPE | ESCrelease;
5668                 cdb.gen(&cs);
5669             }
5670             freenode(e.EV.E1);
5671             break;
5672 }
5673         default:
5674             assert(0);
5675     }
5676 }
5677 
5678 /*******************************************
5679  * D constructor.
5680  * OPdctor
5681  */
5682 
5683 @trusted
5684 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5685 {
5686     /* Generate:
5687         ESCAPE | ESCdctor
5688         MOV     sindex[BP],index
5689      */
5690     usednteh |= EHcleanup;
5691     if (config.ehmethod == EHmethod.EH_WIN32)
5692     {   usednteh |= NTEHcleanup | NTEH_try;
5693         nteh_usevars();
5694     }
5695     assert(*pretregs == 0);
5696     code cs;
5697     cs.Iop = ESCAPE | ESCdctor;         // mark start of EH range
5698     cs.Iflags = 0;
5699     cs.Irex = 0;
5700     cs.IFL1 = FLctor;
5701     cs.IEV1.Vtor = e;
5702     cdb.gen(&cs);
5703     nteh_gensindex(cdb,0);              // the actual index will be patched in later
5704                                         // by except_fillInEHTable()
5705 }
5706 
5707 /*******************************************
5708  * D destructor.
5709  * OPddtor
5710  */
5711 
5712 @trusted
5713 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5714 {
5715     if (config.ehmethod == EHmethod.EH_DWARF)
5716     {
5717         usednteh |= EHcleanup;
5718 
5719         code cs;
5720         cs.Iop = ESCAPE | ESCddtor;     // mark end of EH range and where landing pad is
5721         cs.Iflags = 0;
5722         cs.Irex = 0;
5723         cs.IFL1 = FLdtor;
5724         cs.IEV1.Vtor = e;
5725         cdb.gen(&cs);
5726 
5727         // Mark all registers as destroyed
5728         getregsNoSave(allregs);
5729 
5730         assert(*pretregs == 0);
5731         codelem(cdb,e.EV.E1,pretregs,false);
5732         return;
5733     }
5734     else
5735     {
5736         /* Generate:
5737             ESCAPE | ESCddtor
5738             MOV     sindex[BP],index
5739             CALL    dtor
5740             JMP     L1
5741         Ldtor:
5742             ... e.EV.E1 ...
5743             RET
5744         L1: NOP
5745         */
5746         usednteh |= EHcleanup;
5747         if (config.ehmethod == EHmethod.EH_WIN32)
5748         {   usednteh |= NTEHcleanup | NTEH_try;
5749             nteh_usevars();
5750         }
5751 
5752         code cs;
5753         cs.Iop = ESCAPE | ESCddtor;
5754         cs.Iflags = 0;
5755         cs.Irex = 0;
5756         cs.IFL1 = FLdtor;
5757         cs.IEV1.Vtor = e;
5758         cdb.gen(&cs);
5759 
5760         nteh_gensindex(cdb,0);              // the actual index will be patched in later
5761                                             // by except_fillInEHTable()
5762 
5763         // Mark all registers as destroyed
5764         getregsNoSave(allregs);
5765 
5766         assert(*pretregs == 0);
5767         CodeBuilder cdbx;
5768         cdbx.ctor();
5769         codelem(cdbx,e.EV.E1,pretregs,false);
5770         cdbx.gen1(0xC3);                      // RET
5771         code *c = cdbx.finish();
5772 
5773         int nalign = 0;
5774         if (STACKALIGN >= 16)
5775         {
5776             nalign = STACKALIGN - REGSIZE;
5777             cod3_stackadj(cdb, nalign);
5778         }
5779         calledafunc = 1;
5780         genjmp(cdb,0xE8,FLcode,cast(block *)c);   // CALL Ldtor
5781         if (nalign)
5782             cod3_stackadj(cdb, -nalign);
5783 
5784         code *cnop = gennop(null);
5785 
5786         genjmp(cdb,JMP,FLcode,cast(block *)cnop);
5787         cdb.append(cdbx);
5788         cdb.append(cnop);
5789         return;
5790     }
5791 }
5792 
5793 
5794 /*******************************************
5795  * C++ constructor.
5796  */
5797 
5798 @trusted
5799 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5800 {
5801 version (SCPP)
5802 {
5803     usednteh |= EHcleanup;
5804     if (config.exe == EX_WIN32)
5805         usednteh |= NTEHcleanup;
5806     assert(*pretregs == 0);
5807 
5808     code cs = void;
5809     cs.Iop = ESCAPE | ESCctor;
5810     cs.Iflags = 0;
5811     cs.Irex = 0;
5812     cs.IFL1 = FLctor;
5813     cs.IEV1.Vtor = e;
5814     cdb.gen(&cs);
5815 }
5816 }
5817 
5818 /******
5819  * OPdtor
5820  */
5821 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5822 {
5823 version (SCPP)
5824 {
5825     usednteh |= EHcleanup;
5826     if (config.exe == EX_WIN32)
5827         usednteh |= NTEHcleanup;
5828     assert(*pretregs == 0);
5829 
5830     code cs = void;
5831     cs.Iop = ESCAPE | ESCdtor;
5832     cs.Iflags = 0;
5833     cs.Irex = 0;
5834     cs.IFL1 = FLdtor;
5835     cs.IEV1.Vtor = e;
5836     cdb.gen(&cs);
5837 }
5838 }
5839 
5840 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5841 {
5842 }
5843 
5844 static if (!NTEXCEPTIONS)
5845 {
5846 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5847 {
5848     assert(0);
5849 }
5850 }
5851 
5852 /*****************************************
5853  */
5854 
5855 @trusted
5856 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5857 {
5858     assert(*pretregs == 0);
5859     codelem(cdb,e.EV.E1,pretregs,false);
5860 }
5861 
5862 /*****************************************
5863  */
5864 
5865 @trusted
5866 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5867 {
5868     assert(*pretregs == 0);
5869     cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
5870 }
5871 
5872 }