1 /**
2  * Code generation 4
3  *
4  * Includes:
5  * - assignemt variations of operators (+= -= *= /= %= <<= >>=)
6  * - integer comparison (< > <= >=)
7  * - converting integers to a different size (e.g. short to int)
8  * - bit instructions (bit scan, population count)
9  *
10  * Compiler implementation of the
11  * $(LINK2 https://www.dlang.org, D programming language).
12  *
13  * Mostly code generation for assignment operators.
14  *
15  * Copyright:   Copyright (C) 1985-1998 by Symantec
16  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
17  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
18  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
19  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d)
20  * Documentation:  https://dlang.org/phobos/dmd_backend_cod4.html
21  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d
22  */
23 
24 module dmd.backend.cod4;
25 
26 version (SCPP)
27     version = COMPILE;
28 version (MARS)
29     version = COMPILE;
30 
31 version (COMPILE)
32 {
33 
34 import core.stdc.stdio;
35 import core.stdc.stdlib;
36 import core.stdc.string;
37 
38 import dmd.backend.cc;
39 import dmd.backend.cdef;
40 import dmd.backend.code;
41 import dmd.backend.code_x86;
42 import dmd.backend.codebuilder;
43 import dmd.backend.mem;
44 import dmd.backend.el;
45 import dmd.backend.global;
46 import dmd.backend.oper;
47 import dmd.backend.ty;
48 import dmd.backend.evalu8 : el_toldoubled;
49 import dmd.backend.xmm;
50 
51 extern (C++):
52 
53 nothrow:
54 @safe:
55 
56 extern __gshared CGstate cgstate;
57 extern __gshared bool[FLMAX] datafl;
58 
59 private extern (D) uint mask(uint m) { return 1 << m; }
60 
61                         /*   AX,CX,DX,BX                */
62 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ];
63 
64 // from divcoeff.c
65 extern (C)
66 {
67     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
68     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
69 }
70 
71 /*******************************
72  * Return number of times symbol s appears in tree e.
73  */
74 
75 @trusted
76 private int intree(Symbol *s,elem *e)
77 {
78     if (!OTleaf(e.Eoper))
79         return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0);
80     return e.Eoper == OPvar && e.EV.Vsym == s;
81 }
82 
83 /***********************************
84  * Determine if expression e can be evaluated directly into register
85  * variable s.
86  * Have to be careful about things like x=x+x+x, and x=a+x.
87  * Returns:
88  *      !=0     can
89  *      0       can't
90  */
91 
92 @trusted
93 int doinreg(Symbol *s, elem *e)
94 {
95     int in_ = 0;
96     OPER op;
97 
98  L1:
99     op = e.Eoper;
100     if (op == OPind ||
101         OTcall(op)  ||
102         OTleaf(op) ||
103         (in_ = intree(s,e)) == 0 ||
104         (OTunary(op) && OTleaf(e.EV.E1.Eoper))
105        )
106         return 1;
107     if (in_ == 1)
108     {
109         switch (op)
110         {
111             case OPadd:
112             case OPmin:
113             case OPand:
114             case OPor:
115             case OPxor:
116             case OPshl:
117             case OPmul:
118                 if (!intree(s,e.EV.E2))
119                 {
120                     e = e.EV.E1;
121                     goto L1;
122                 }
123                 break;
124 
125             default:
126                 break;
127         }
128     }
129     return 0;
130 }
131 
132 /****************************
133  * Return code for saving common subexpressions if EA
134  * turns out to be a register.
135  * This is called just before modifying an EA.
136  */
137 
138 void modEA(ref CodeBuilder cdb,code *c)
139 {
140     if ((c.Irm & 0xC0) == 0xC0)        // addressing mode refers to a register
141     {
142         reg_t reg = c.Irm & 7;
143         if (c.Irex & REX_B)
144         {   reg |= 8;
145             assert(I64);
146         }
147         getregs(cdb,mask(reg));
148     }
149 }
150 
151 
152 /****************************
153  * Gen code for op= for doubles.
154  */
155 @trusted
156 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op)
157 {
158     assert(config.exe & EX_windos);  // for targets that may not have an 8087
159 
160     static immutable uint[OPdivass - OPpostinc + 1] clibtab =
161     /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass       */
162     [  CLIB.dadd, CLIB.dsub, cast(uint)-1,  CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ];
163 
164     if (config.inline8087)
165     {
166         opass87(cdb,e,pretregs);
167         return;
168     }
169 
170     code cs;
171     regm_t retregs2,retregs,idxregs;
172 
173     uint clib = clibtab[op - OPpostinc];
174     elem *e1 = e.EV.E1;
175     tym_t tym = tybasic(e1.Ety);
176     getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX);
177 
178     if (tym == TYfloat)
179     {
180         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
181 
182         // Load EA into FLOATREGS
183         getregs(cdb,FLOATREGS);
184         cs.Iop = LOD;
185         cs.Irm |= modregrm(0,AX,0);
186         cdb.gen(&cs);
187 
188         if (!I32)
189         {
190             cs.Irm |= modregrm(0,DX,0);
191             getlvalue_msw(&cs);
192             cdb.gen(&cs);
193             getlvalue_lsw(&cs);
194 
195         }
196         retregs2 = FLOATREGS2;
197         idxregs = FLOATREGS | idxregm(&cs);
198         retregs = FLOATREGS;
199     }
200     else
201     {
202         if (I32)
203         {
204             // Load EA into DOUBLEREGS
205             getregs(cdb,DOUBLEREGS_32);
206             cs.Iop = LOD;
207             cs.Irm |= modregrm(0,AX,0);
208             cdb.gen(&cs);
209             cs.Irm |= modregrm(0,DX,0);
210             getlvalue_msw(&cs);
211             cdb.gen(&cs);
212             getlvalue_lsw(&cs);
213 
214             retregs2 = DOUBLEREGS2_32;
215             idxregs = DOUBLEREGS_32 | idxregm(&cs);
216         }
217         else
218         {
219             // Push EA onto stack
220             cs.Iop = 0xFF;
221             cs.Irm |= modregrm(0,6,0);
222             cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
223             cdb.gen(&cs);
224             getlvalue_lsw(&cs);
225             cdb.gen(&cs);
226             getlvalue_lsw(&cs);
227             cdb.gen(&cs);
228             getlvalue_lsw(&cs);
229             cdb.gen(&cs);
230             stackpush += DOUBLESIZE;
231 
232             retregs2 = DOUBLEREGS_16;
233             idxregs = idxregm(&cs);
234         }
235         retregs = DOUBLEREGS;
236     }
237 
238     if ((cs.Iflags & CFSEG) == CFes)
239         idxregs |= mES;
240     cgstate.stackclean++;
241     scodelem(cdb,e.EV.E2,&retregs2,idxregs,false);
242     cgstate.stackclean--;
243     callclib(cdb,e,clib,&retregs,0);
244     if (e1.Ecount)
245         cssave(e1,retregs,!OTleaf(e1.Eoper));             // if lvalue is a CSE
246     freenode(e1);
247     cs.Iop = STO;                              // MOV EA,DOUBLEREGS
248     fltregs(cdb,&cs,tym);
249     fixresult(cdb,e,retregs,pretregs);
250 }
251 
252 /****************************
253  * Gen code for OPnegass for doubles.
254  */
255 
256 @trusted
257 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
258 {
259     assert(config.exe & EX_windos);  // for targets that may not have an 8087
260 
261     if (config.inline8087)
262     {
263         cdnegass87(cdb,e,pretregs);
264         return;
265     }
266     elem *e1 = e.EV.E1;
267     tym_t tym = tybasic(e1.Ety);
268     int sz = _tysize[tym];
269     code cs;
270 
271     getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0);
272     modEA(cdb,&cs);
273     cs.Irm |= modregrm(0,6,0);
274     cs.Iop = 0x80;
275     cs.IEV1.Voffset += sz - 1;
276     cs.IFL2 = FLconst;
277     cs.IEV2.Vuns = 0x80;
278     cdb.gen(&cs);                       // XOR 7[EA],0x80
279     if (tycomplex(tym))
280     {
281         cs.IEV1.Voffset -= sz / 2;
282         cdb.gen(&cs);                   // XOR 7[EA],0x80
283     }
284 
285     regm_t retregs;
286     if (*pretregs || e1.Ecount)
287     {
288         cs.IEV1.Voffset -= sz - 1;
289 
290         if (tym == TYfloat)
291         {
292             // Load EA into FLOATREGS
293             getregs(cdb,FLOATREGS);
294             cs.Iop = LOD;
295             NEWREG(cs.Irm, AX);
296             cdb.gen(&cs);
297 
298             if (!I32)
299             {
300                 NEWREG(cs.Irm, DX);
301                 getlvalue_msw(&cs);
302                 cdb.gen(&cs);
303                 getlvalue_lsw(&cs);
304 
305             }
306             retregs = FLOATREGS;
307         }
308         else
309         {
310             if (I32)
311             {
312                 // Load EA into DOUBLEREGS
313                 getregs(cdb,DOUBLEREGS_32);
314                 cs.Iop = LOD;
315                 cs.Irm &= ~cast(uint)modregrm(0,7,0);
316                 cs.Irm |= modregrm(0,AX,0);
317                 cdb.gen(&cs);
318                 cs.Irm |= modregrm(0,DX,0);
319                 getlvalue_msw(&cs);
320                 cdb.gen(&cs);
321                 getlvalue_lsw(&cs);
322             }
323             else
324             {
325                 static if (1)
326                 {
327                     cs.Iop = LOD;
328                     fltregs(cdb,&cs,TYdouble);     // MOV DOUBLEREGS, EA
329                 }
330                 else
331                 {
332                     // Push EA onto stack
333                     cs.Iop = 0xFF;
334                     cs.Irm |= modregrm(0,6,0);
335                     cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
336                     cdb.gen(&cs);
337                     cs.IEV1.Voffset -= REGSIZE;
338                     cdb.gen(&cs);
339                     cs.IEV1.Voffset -= REGSIZE;
340                     cdb.gen(&cs);
341                     cs.IEV1.Voffset -= REGSIZE;
342                     cdb.gen(&cs);
343                     stackpush += DOUBLESIZE;
344                 }
345             }
346             retregs = DOUBLEREGS;
347         }
348         if (e1.Ecount)
349             cssave(e1,retregs,!OTleaf(e1.Eoper));         /* if lvalue is a CSE   */
350     }
351     else
352     {
353         retregs = 0;
354         assert(e1.Ecount == 0);
355     }
356 
357     freenode(e1);
358     fixresult(cdb,e,retregs,pretregs);
359 }
360 
361 
362 
363 /************************
364  * Generate code for an assignment.
365  */
366 
367 @trusted
368 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
369 {
370     tym_t tymll;
371     reg_t reg;
372     code cs;
373     elem *e11;
374     bool regvar;                  // true means evaluate into register variable
375     regm_t varregm;
376     reg_t varreg;
377     targ_int postinc;
378 
379     //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
380     elem *e1 = e.EV.E1;
381     elem *e2 = e.EV.E2;
382     int e2oper = e2.Eoper;
383     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
384     regm_t retregs = *pretregs;
385 
386     if (tyxmmreg(tyml) && config.fpxmmregs)
387     {
388         xmmeq(cdb, e, CMP, e1, e2, pretregs);
389         return;
390     }
391 
392     if (tyfloating(tyml) && config.inline8087)
393     {
394         if (tycomplex(tyml))
395         {
396             complex_eq87(cdb, e, pretregs);
397             return;
398         }
399 
400         if (!(retregs == 0 &&
401               (e2oper == OPconst || e2oper == OPvar || e2oper == OPind))
402            )
403         {
404             eq87(cdb,e,pretregs);
405             return;
406         }
407         if (config.target_cpu >= TARGET_PentiumPro &&
408             (e2oper == OPvar || e2oper == OPind)
409            )
410         {
411             eq87(cdb,e,pretregs);
412             return;
413         }
414         if (tyml == TYldouble || tyml == TYildouble)
415         {
416             eq87(cdb,e,pretregs);
417             return;
418         }
419     }
420 
421     uint sz = _tysize[tyml];           // # of bytes to transfer
422     assert(cast(int)sz > 0);
423 
424     if (retregs == 0)                     // if no return value
425     {
426         int fl;
427 
428         /* If registers are tight, and we might need them for the lvalue,
429          * prefer to not use them for the rvalue
430          */
431         bool plenty = true;
432         if (e1.Eoper == OPind)
433         {
434             /* Will need 1 register for evaluation, +2 registers for
435              * e1's addressing mode
436              */
437             regm_t m = allregs & ~regcon.mvar;  // mask of non-register variables
438             m &= m - 1;         // clear least significant bit
439             m &= m - 1;         // clear least significant bit
440             plenty = m != 0;    // at least 3 registers
441         }
442 
443         if ((e2oper == OPconst ||       // if rvalue is a constant
444              e2oper == OPrelconst &&
445              !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
446              ((fl = el_fl(e2)) == FLdata ||
447               fl==FLudata || fl == FLextern)
448               && !(e2.EV.Vsym.ty() & mTYcs)
449             ) &&
450             !(evalinregister(e2) && plenty) &&
451             !e1.Ecount)        // and no CSE headaches
452         {
453             // Look for special case of (*p++ = ...), where p is a register variable
454             if (e1.Eoper == OPind &&
455                 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
456                 e11.EV.E1.Eoper == OPvar &&
457                 e11.EV.E1.EV.Vsym.Sfl == FLreg &&
458                 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
459                )
460             {
461                 Symbol *s = e11.EV.E1.EV.Vsym;
462                 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
463                 {
464                     regcon.params &= ~s.Spregm();
465                 }
466                 postinc = e11.EV.E2.EV.Vint;
467                 if (e11.Eoper == OPpostdec)
468                     postinc = -postinc;
469                 getlvalue(cdb,&cs,e1,RMstore);
470                 freenode(e11.EV.E2);
471             }
472             else
473             {
474                 postinc = 0;
475                 getlvalue(cdb,&cs,e1,RMstore);
476 
477                 if (e2oper == OPconst &&
478                     config.flags4 & CFG4speed &&
479                     (config.target_cpu == TARGET_Pentium ||
480                      config.target_cpu == TARGET_PentiumMMX) &&
481                     (cs.Irm & 0xC0) == 0x80
482                    )
483                 {
484                     if (I64 && sz == 8 && e2.EV.Vpointer)
485                     {
486                         // MOV reg,imm64
487                         // MOV EA,reg
488                         regm_t rregm = allregs & ~idxregm(&cs);
489                         reg_t regx;
490                         regwithvalue(cdb,rregm,e2.EV.Vpointer,&regx,64);
491                         cs.Iop = STO;
492                         cs.Irm |= modregrm(0,regx & 7,0);
493                         if (regx & 8)
494                             cs.Irex |= REX_R;
495                         cdb.gen(&cs);
496                         freenode(e2);
497                         goto Lp;
498                     }
499                     if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint)
500                     {
501                         // MOV reg,imm
502                         // MOV EA,reg
503                         regm_t rregm = allregs & ~idxregm(&cs);
504                         reg_t regx;
505                         regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
506                         cs.Iop = STO;
507                         cs.Irm |= modregrm(0,regx & 7,0);
508                         if (regx & 8)
509                             cs.Irex |= REX_R;
510                         cdb.gen(&cs);
511                         freenode(e2);
512                         goto Lp;
513                     }
514                     if (sz == 2 * REGSIZE && e2.EV.Vllong == 0)
515                     {
516                         // MOV reg,imm
517                         // MOV EA,reg
518                         // MOV EA+2,reg
519                         regm_t rregm = getscratch() & ~idxregm(&cs);
520                         if (rregm)
521                         {
522                             reg_t regx;
523                             regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
524                             cs.Iop = STO;
525                             cs.Irm |= modregrm(0,regx,0);
526                             cdb.gen(&cs);
527                             getlvalue_msw(&cs);
528                             cdb.gen(&cs);
529                             freenode(e2);
530                             goto Lp;
531                         }
532                     }
533                 }
534             }
535 
536             // If loading result into a register
537             if ((cs.Irm & 0xC0) == 0xC0)
538             {
539                 modEA(cdb,&cs);
540                 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg)
541                     getregs(cdb,cs.IEV1.Vsym.Sregm);
542             }
543             cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
544 
545             if (e2oper == OPrelconst)
546             {
547                 cs.IEV2.Voffset = e2.EV.Voffset;
548                 cs.IFL2 = cast(ubyte)fl;
549                 cs.IEV2.Vsym = e2.EV.Vsym;
550                 cs.Iflags |= CFoff;
551                 cdb.gen(&cs);       // MOV EA,&variable
552                 if (I64 && sz == 8)
553                     code_orrex(cdb.last(), REX_W);
554                 if (sz > REGSIZE)
555                 {
556                     cs.Iop = 0x8C;
557                     getlvalue_msw(&cs);
558                     cs.Irm |= modregrm(0,3,0);
559                     cdb.gen(&cs);   // MOV EA+2,DS
560                 }
561             }
562             else
563             {
564                 assert(e2oper == OPconst);
565                 cs.IFL2 = FLconst;
566                 targ_size_t *p = cast(targ_size_t *) &(e2.EV);
567                 cs.IEV2.Vsize_t = *p;
568                 // Look for loading a register variable
569                 if ((cs.Irm & 0xC0) == 0xC0)
570                 {
571                     reg_t regx = cs.Irm & 7;
572 
573                     if (cs.Irex & REX_B)
574                         regx |= 8;
575                     if (I64 && sz == 8)
576                         movregconst(cdb,regx,*p,64);
577                     else
578                         movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1));
579                     if (sz == 2 * REGSIZE)
580                     {   getlvalue_msw(&cs);
581                         if (REGSIZE == 2)
582                             movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0);
583                         else if (REGSIZE == 4)
584                             movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0);
585                         else if (REGSIZE == 8)
586                             movregconst(cdb,cs.Irm & 7,p[1],0);
587                         else
588                             assert(0);
589                     }
590                 }
591                 else if (I64 && sz == 8 && *p >= 0x80000000)
592                 {   // Use 64 bit MOV, as the 32 bit one gets sign extended
593                     // MOV reg,imm64
594                     // MOV EA,reg
595                     regm_t rregm = allregs & ~idxregm(&cs);
596                     reg_t regx;
597                     regwithvalue(cdb,rregm,*p,&regx,64);
598                     cs.Iop = STO;
599                     cs.Irm |= modregrm(0,regx & 7,0);
600                     if (regx & 8)
601                         cs.Irex |= REX_R;
602                     cdb.gen(&cs);
603                 }
604                 else
605                 {
606                     int off = sz;
607                     do
608                     {   int regsize = REGSIZE;
609                         if (off >= 4 && I16 && config.target_cpu >= TARGET_80386)
610                         {
611                             regsize = 4;
612                             cs.Iflags |= CFopsize;      // use opsize to do 32 bit operation
613                         }
614                         else if (I64 && sz == 16 && *p >= 0x80000000)
615                         {
616                             regm_t rregm = allregs & ~idxregm(&cs);
617                             reg_t regx;
618                             regwithvalue(cdb,rregm,*p,&regx,64);
619                             cs.Iop = STO;
620                             cs.Irm |= modregrm(0,regx & 7,0);
621                             if (regx & 8)
622                                 cs.Irex |= REX_R;
623                         }
624                         else
625                         {
626                             regm_t retregsx = (sz == 1) ? BYTEREGS : allregs;
627                             reg_t regx;
628                             if (reghasvalue(retregsx,*p,&regx))
629                             {
630                                 cs.Iop = (cs.Iop & 1) | 0x88;
631                                 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx
632                                 if (regx & 8)
633                                     cs.Irex |= REX_R;
634                                 if (I64 && sz == 1 && regx >= 4)
635                                     cs.Irex |= REX;
636                             }
637                             if (!I16 && off == 2)      // if 16 bit operand
638                                 cs.Iflags |= CFopsize;
639                             if (I64 && sz == 8)
640                                 cs.Irex |= REX_W;
641                         }
642                         cdb.gen(&cs);           // MOV EA,const
643 
644                         p = cast(targ_size_t *)(cast(char *) p + regsize);
645                         cs.Iop = (cs.Iop & 1) | 0xC6;
646                         cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0);
647                         cs.Irex &= ~REX_R;
648                         cs.IEV1.Voffset += regsize;
649                         cs.IEV2.Vint = cast(int)*p;
650                         off -= regsize;
651                     } while (off > 0);
652                 }
653             }
654             freenode(e2);
655             goto Lp;
656         }
657         retregs = allregs;        // pick a reg, any reg
658         if (sz == 2 * REGSIZE)
659             retregs &= ~mBP;      // BP cannot be used for register pair
660     }
661     if (retregs == mPSW)
662     {
663         retregs = allregs;
664         if (sz == 2 * REGSIZE)
665             retregs &= ~mBP;      // BP cannot be used for register pair
666     }
667     cs.Iop = STO;
668     if (sz == 1)                  // must have byte regs
669     {
670         cs.Iop = 0x88;
671         retregs &= BYTEREGS;
672         if (!retregs)
673             retregs = BYTEREGS;
674     }
675     else if (retregs & mES &&
676            (
677              (e1.Eoper == OPind &&
678                 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) ||
679              (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata)
680            )
681           )
682         // getlvalue() needs ES, so we can't return it
683         retregs = allregs;              // no conflicts with ES
684     else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0)
685         retregs = DOUBLEREGS;
686 
687     regvar = false;
688     varregm = 0;
689     if (config.flags4 & CFG4optimized)
690     {
691         // Be careful of cases like (x = x+x+x). We cannot evaluate in
692         // x if x is in a register.
693         if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
694             doinreg(e1.EV.Vsym,e2) &&       // and we can compute directly into it
695             !(sz == 1 && e1.EV.Voffset == 1)
696            )
697         {
698             if (varregm & XMMREGS)
699             {
700                 // Could be an integer vector in the XMMREGS
701                 xmmeq(cdb, e, CMP, e1, e2, pretregs);
702                 return;
703             }
704             regvar = true;
705             retregs = varregm;
706             reg = varreg;       // evaluate directly in target register
707             if (tysize(e1.Ety) == REGSIZE &&
708                 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE)
709             {
710                 if (e1.EV.Voffset)
711                     retregs &= mMSW;
712                 else
713                     retregs &= mLSW;
714                 reg = findreg(retregs);
715             }
716         }
717     }
718     if (*pretregs & mPSW && OTleaf(e1.Eoper))     // if evaluating e1 couldn't change flags
719     {   // Be careful that this lines up with jmpopcode()
720         retregs |= mPSW;
721         *pretregs &= ~mPSW;
722     }
723     scodelem(cdb,e2,&retregs,0,true);    // get rvalue
724 
725     // Look for special case of (*p++ = ...), where p is a register variable
726     if (e1.Eoper == OPind &&
727         ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
728         e11.EV.E1.Eoper == OPvar &&
729         e11.EV.E1.EV.Vsym.Sfl == FLreg &&
730         (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
731        )
732     {
733         Symbol *s = e11.EV.E1.EV.Vsym;
734         if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
735         {
736             regcon.params &= ~s.Spregm();
737         }
738 
739         postinc = e11.EV.E2.EV.Vint;
740         if (e11.Eoper == OPpostdec)
741             postinc = -postinc;
742         getlvalue(cdb,&cs,e1,RMstore | retregs);
743         freenode(e11.EV.E2);
744     }
745     else
746     {
747         postinc = 0;
748         getlvalue(cdb,&cs,e1,RMstore | retregs);     // get lvalue (cl == null if regvar)
749     }
750 
751     getregs(cdb,varregm);
752 
753     assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes));
754     if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES)
755     {
756         reg = findreglsw(retregs);
757         cs.Irm |= modregrm(0,reg,0);
758         cdb.gen(&cs);                   // MOV EA,reg
759         getlvalue_msw(&cs);             // point to where segment goes
760         cs.Iop = 0x8C;
761         NEWREG(cs.Irm,0);
762         cdb.gen(&cs);                   // MOV EA+2,ES
763     }
764     else
765     {
766         if (!I16)
767         {
768             reg = findreg(retregs &
769                     ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS));
770             cs.Irm |= modregrm(0,reg & 7,0);
771             if (reg & 8)
772                 cs.Irex |= REX_R;
773             for (; true; sz -= REGSIZE)
774             {
775                 // Do not generate mov from register onto itself
776                 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))
777                     break;
778                 if (sz == 2)            // if 16 bit operand
779                     cs.Iflags |= CFopsize;
780                 else if (sz == 1 && reg >= 4)
781                     cs.Irex |= REX;
782                 cdb.gen(&cs);           // MOV EA+offset,reg
783                 if (sz <= REGSIZE)
784                     break;
785                 getlvalue_msw(&cs);
786                 reg = findregmsw(retregs);
787                 code_newreg(&cs, reg);
788             }
789         }
790         else
791         {
792             if (sz > REGSIZE)
793                 cs.IEV1.Voffset += sz - REGSIZE;  // 0,2,6
794             reg = findreg(retregs &
795                     (sz > REGSIZE ? mMSW : ALLREGS));
796             if (tyml == TYdouble || tyml == TYdouble_alias)
797                 reg = AX;
798             cs.Irm |= modregrm(0,reg,0);
799             // Do not generate mov from register onto itself
800             if (!regvar || reg != (cs.Irm & 7))
801                 for (; true; sz -= REGSIZE)             // 1,2,4
802                 {
803                     cdb.gen(&cs);             // MOV EA+offset,reg
804                     if (sz <= REGSIZE)
805                         break;
806                     cs.IEV1.Voffset -= REGSIZE;
807                     if (tyml == TYdouble || tyml == TYdouble_alias)
808                             reg = dblreg[reg];
809                     else
810                             reg = findreglsw(retregs);
811                     NEWREG(cs.Irm,reg);
812                 }
813         }
814     }
815     if (e1.Ecount ||                    // if lvalue is a CSE or
816         regvar)                         // rvalue can't be a CSE
817     {
818         getregs_imm(cdb,retregs);       // necessary if both lvalue and
819                                         //  rvalue are CSEs (since a reg
820                                         //  can hold only one e at a time)
821         cssave(e1,retregs,!OTleaf(e1.Eoper));     // if lvalue is a CSE
822     }
823 
824     fixresult(cdb,e,retregs,pretregs);
825 Lp:
826     if (postinc)
827     {
828         reg_t ireg = findreg(idxregm(&cs));
829         if (*pretregs & mPSW)
830         {   // Use LEA to avoid touching the flags
831             uint rm = cs.Irm & 7;
832             if (cs.Irex & REX_B)
833                 rm |= 8;
834             cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc);
835             if (tysize(e11.EV.E1.Ety) == 8)
836                 code_orrex(cdb.last(), REX_W);
837         }
838         else if (I64)
839         {
840             cdb.genc2(0x81,modregrmx(3,0,ireg),postinc);
841             if (tysize(e11.EV.E1.Ety) == 8)
842                 code_orrex(cdb.last(), REX_W);
843         }
844         else
845         {
846             if (postinc == 1)
847                 cdb.gen1(0x40 + ireg);        // INC ireg
848             else if (postinc == -cast(targ_int)1)
849                 cdb.gen1(0x48 + ireg);        // DEC ireg
850             else
851             {
852                 cdb.genc2(0x81,modregrm(3,0,ireg),postinc);
853             }
854         }
855     }
856     freenode(e1);
857 }
858 
859 
860 /************************
861  * Generate code for += -= &= |= ^= negass
862  */
863 
864 @trusted
865 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
866 {
867     //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
868     OPER op = e.Eoper;
869     regm_t retregs = 0;
870     uint reverse = 0;
871     elem *e1 = e.EV.E1;
872     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
873     int sz = _tysize[tyml];
874     int isbyte = (sz == 1);                     // 1 for byte operation, else 0
875 
876     // See if evaluate in XMM registers
877     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
878     {
879         xmmopass(cdb,e,pretregs);
880         return;
881     }
882 
883     if (tyfloating(tyml))
884     {
885         if (config.exe & EX_posix)
886         {
887             if (op == OPnegass)
888                 cdnegass87(cdb,e,pretregs);
889             else
890                 opass87(cdb,e,pretregs);
891         }
892         else
893         {
894             if (op == OPnegass)
895                 opnegassdbl(cdb,e,pretregs);
896             else
897                 opassdbl(cdb,e,pretregs,op);
898         }
899         return;
900     }
901     uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386)
902         ? CFopsize : 0;
903     uint cflags = 0;
904     regm_t forccs = *pretregs & mPSW;            // return result in flags
905     regm_t forregs = *pretregs & ~mPSW;          // return result in regs
906     // true if we want the result in a register
907     uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper));
908 
909     reg_t reg;
910     uint op1,op2,mode;
911     code cs;
912     elem *e2;
913     regm_t varregm;
914     reg_t varreg;
915     uint jop;
916 
917 
918     switch (op)                   // select instruction opcodes
919     {
920         case OPpostinc: op = OPaddass;                  // i++ => +=
921                         goto case OPaddass;
922 
923         case OPaddass:  op1 = 0x01; op2 = 0x11;
924                         cflags = CFpsw;
925                         mode = 0; break;                // ADD, ADC
926 
927         case OPpostdec: op = OPminass;                  // i-- => -=
928                         goto case OPminass;
929 
930         case OPminass:  op1 = 0x29; op2 = 0x19;
931                         cflags = CFpsw;
932                         mode = 5; break;                // SUB, SBC
933 
934         case OPandass:  op1 = op2 = 0x21;
935                         mode = 4; break;                // AND, AND
936 
937         case OPorass:   op1 = op2 = 0x09;
938                         mode = 1; break;                // OR , OR
939 
940         case OPxorass:  op1 = op2 = 0x31;
941                         mode = 6; break;                // XOR, XOR
942 
943         case OPnegass:  op1 = 0xF7;                     // NEG
944                         break;
945 
946         default:
947                 assert(0);
948     }
949     op1 ^= isbyte;                  // bit 0 is 0 for byte operation
950 
951     if (op == OPnegass)
952     {
953         getlvalue(cdb,&cs,e1,0);
954         modEA(cdb,&cs);
955         cs.Irm |= modregrm(0,3,0);
956         cs.Iop = op1;
957         switch (_tysize[tyml])
958         {
959             case CHARSIZE:
960                 cdb.gen(&cs);
961                 break;
962 
963             case SHORTSIZE:
964                 cdb.gen(&cs);
965                 if (!I16 && *pretregs & mPSW)
966                     cdb.last().Iflags |= CFopsize | CFpsw;
967                 break;
968 
969             case LONGSIZE:
970                 if (!I16 || opsize)
971                 {   cdb.gen(&cs);
972                     cdb.last().Iflags |= opsize;
973                     break;
974                 }
975             neg_2reg:
976                 getlvalue_msw(&cs);
977                 cdb.gen(&cs);              // NEG EA+2
978                 getlvalue_lsw(&cs);
979                 cdb.gen(&cs);              // NEG EA
980                 code_orflag(cdb.last(),CFpsw);
981                 cs.Iop = 0x81;
982                 getlvalue_msw(&cs);
983                 cs.IFL2 = FLconst;
984                 cs.IEV2.Vuns = 0;
985                 cdb.gen(&cs);              // SBB EA+2,0
986                 break;
987 
988             case LLONGSIZE:
989                 if (I16)
990                     assert(0);             // not implemented yet
991                 if (I32)
992                     goto neg_2reg;
993                 cdb.gen(&cs);
994                 break;
995 
996             default:
997                 assert(0);
998         }
999         forccs = 0;             // flags already set by NEG
1000         *pretregs &= ~mPSW;
1001     }
1002     else if ((e2 = e.EV.E2).Eoper == OPconst &&    // if rvalue is a const
1003              el_signx32(e2) &&
1004              // Don't evaluate e2 in register if we can use an INC or DEC
1005              (((sz <= REGSIZE || tyfv(tyml)) &&
1006                (op == OPaddass || op == OPminass) &&
1007                (el_allbits(e2, 1) || el_allbits(e2, -1))
1008               ) ||
1009               (!evalinregister(e2)
1010                && tyml != TYhptr
1011               )
1012              )
1013             )
1014     {
1015         getlvalue(cdb,&cs,e1,0);
1016         modEA(cdb,&cs);
1017         cs.IFL2 = FLconst;
1018         cs.IEV2.Vsize_t = e2.EV.Vint;
1019         if (sz <= REGSIZE || tyfv(tyml) || opsize)
1020         {
1021             targ_int i = cs.IEV2.Vint;
1022 
1023             // Handle shortcuts. Watch out for if result has
1024             // to be in flags.
1025 
1026             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&reg) && i != 1 && i != -1 &&
1027                 !opsize)
1028             {
1029                 cs.Iop = op1;
1030                 cs.Irm |= modregrm(0,reg & 7,0);
1031                 if (I64)
1032                 {   if (isbyte && reg >= 4)
1033                         cs.Irex |= REX;
1034                     if (reg & 8)
1035                         cs.Irex |= REX_R;
1036                 }
1037             }
1038             else
1039             {
1040                 cs.Iop = 0x81;
1041                 cs.Irm |= modregrm(0,mode,0);
1042                 switch (op)
1043                 {
1044                     case OPminass:      // convert to +=
1045                         cs.Irm ^= modregrm(0,5,0);
1046                         i = -i;
1047                         cs.IEV2.Vsize_t = i;
1048                         goto case OPaddass;
1049 
1050                     case OPaddass:
1051                         if (i == 1)             // INC EA
1052                                 goto L1;
1053                         else if (i == -1)       // DEC EA
1054                         {       cs.Irm |= modregrm(0,1,0);
1055                            L1:  cs.Iop = 0xFF;
1056                         }
1057                         break;
1058 
1059                     default:
1060                         break;
1061                 }
1062                 cs.Iop ^= isbyte;             // for byte operations
1063             }
1064             cs.Iflags |= opsize;
1065             if (forccs)
1066                 cs.Iflags |= CFpsw;
1067             else if (!I16 && cs.Iflags & CFopsize)
1068             {
1069                 switch (op)
1070                 {   case OPorass:
1071                     case OPxorass:
1072                         cs.IEV2.Vsize_t &= 0xFFFF;
1073                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1074                         break;
1075 
1076                     case OPandass:
1077                         cs.IEV2.Vsize_t |= ~0xFFFFL;
1078                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1079                         break;
1080 
1081                     case OPminass:
1082                     case OPaddass:
1083                         static if (1)
1084                         {
1085                             if ((cs.Irm & 0xC0) == 0xC0)    // EA is register
1086                                 cs.Iflags &= ~CFopsize;
1087                         }
1088                         else
1089                         {
1090                             if ((cs.Irm & 0xC0) == 0xC0 &&  // EA is register and
1091                                 e1.Eoper == OPind)          // not a register var
1092                                 cs.Iflags &= ~CFopsize;
1093                         }
1094                         break;
1095 
1096                     default:
1097                         assert(0);
1098                 }
1099             }
1100 
1101             // For scheduling purposes, we wish to replace:
1102             //    OP    EA
1103             // with:
1104             //    MOV   reg,EA
1105             //    OP    reg
1106             //    MOV   EA,reg
1107             if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
1108                 (config.target_cpu == TARGET_Pentium ||
1109                  config.target_cpu == TARGET_PentiumMMX) &&
1110                 config.flags4 & CFG4speed)
1111             {
1112                 regm_t sregm;
1113                 code cs2;
1114 
1115                 // Determine which registers to use
1116                 sregm = allregs & ~idxregm(&cs);
1117                 if (isbyte)
1118                     sregm &= BYTEREGS;
1119                 if (sregm & forregs)
1120                     sregm &= forregs;
1121 
1122                 allocreg(cdb,&sregm,&reg,tyml);      // allocate register
1123 
1124                 cs2 = cs;
1125                 cs2.Iflags &= ~CFpsw;
1126                 cs2.Iop = LOD ^ isbyte;
1127                 code_newreg(&cs2, reg);
1128                 cdb.gen(&cs2);                      // MOV reg,EA
1129 
1130                 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7);
1131                 if (reg & 8)
1132                     cs.Irex |= REX_B;
1133                 cdb.gen(&cs);                       // OP reg
1134 
1135                 cs2.Iop ^= 2;
1136                 cdb.gen(&cs2);                      // MOV EA,reg
1137 
1138                 retregs = sregm;
1139                 wantres = 0;
1140                 if (e1.Ecount)
1141                     cssave(e1,retregs,!OTleaf(e1.Eoper));
1142             }
1143             else
1144             {
1145                 cdb.gen(&cs);
1146                 cs.Iflags &= ~opsize;
1147                 cs.Iflags &= ~CFpsw;
1148                 if (I16 && opsize)                     // if DWORD operand
1149                     cs.IEV1.Voffset += 2; // compensate for wantres code
1150             }
1151         }
1152         else if (sz == 2 * REGSIZE)
1153         {
1154             targ_uns msw;
1155 
1156             cs.Iop = 0x81;
1157             cs.Irm |= modregrm(0,mode,0);
1158             cs.Iflags |= cflags;
1159             cdb.gen(&cs);
1160             cs.Iflags &= ~CFpsw;
1161 
1162             getlvalue_msw(&cs);             // point to msw
1163             msw = cast(uint)MSREG(e.EV.E2.EV.Vllong);
1164             cs.IEV2.Vuns = msw;             // msw of constant
1165             switch (op)
1166             {
1167                 case OPminass:
1168                     cs.Irm ^= modregrm(0,6,0);      // SUB => SBB
1169                     break;
1170 
1171                 case OPaddass:
1172                     cs.Irm |= modregrm(0,2,0);      // ADD => ADC
1173                     break;
1174 
1175                 default:
1176                     break;
1177             }
1178             cdb.gen(&cs);
1179         }
1180         else
1181             assert(0);
1182         freenode(e.EV.E2);        // don't need it anymore
1183     }
1184     else if (isregvar(e1,&varregm,&varreg) &&
1185              (e2.Eoper == OPvar || e2.Eoper == OPind) &&
1186             !evalinregister(e2) &&
1187              sz <= REGSIZE)               // deal with later
1188     {
1189         getlvalue(cdb,&cs,e2,0);
1190         freenode(e2);
1191         getregs(cdb,varregm);
1192         code_newreg(&cs, varreg);
1193         if (I64 && sz == 1 && varreg >= 4)
1194             cs.Irex |= REX;
1195         cs.Iop = op1 ^ 2;                       // toggle direction bit
1196         if (forccs)
1197             cs.Iflags |= CFpsw;
1198         reverse = 2;                            // remember we toggled it
1199         cdb.gen(&cs);
1200         retregs = 0;            // to trigger a bug if we attempt to use it
1201     }
1202     else if ((op == OPaddass || op == OPminass) &&
1203              sz <= REGSIZE &&
1204              !e2.Ecount &&
1205              ((jop = jmpopcode(e2)) == JC || jop == JNC ||
1206               (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC)))
1207             )
1208     {
1209         /* e1 += (x < y)    ADC EA,0
1210          * e1 -= (x < y)    SBB EA,0
1211          * e1 += (x >= y)   SBB EA,-1
1212          * e1 -= (x >= y)   ADC EA,-1
1213          */
1214         getlvalue(cdb,&cs,e1,0);             // get lvalue
1215         modEA(cdb,&cs);
1216         regm_t keepmsk = idxregm(&cs);
1217         retregs = mPSW;
1218         if (OTconv(e2.Eoper))
1219         {
1220             scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true);
1221             freenode(e2);
1222         }
1223         else
1224             scodelem(cdb,e2,&retregs,keepmsk,true);
1225         cs.Iop = 0x81 ^ isbyte;                   // ADC EA,imm16/32
1226         uint regop = 2;                     // ADC
1227         if ((op == OPaddass) ^ (jop == JC))
1228             regop = 3;                          // SBB
1229         code_newreg(&cs,regop);
1230         cs.Iflags |= opsize;
1231         if (forccs)
1232             cs.Iflags |= CFpsw;
1233         cs.IFL2 = FLconst;
1234         cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0;
1235         cdb.gen(&cs);
1236         retregs = 0;            // to trigger a bug if we attempt to use it
1237     }
1238     else // evaluate e2 into register
1239     {
1240         retregs = (isbyte) ? BYTEREGS : ALLREGS;  // pick working reg
1241         if (tyml == TYhptr)
1242             retregs &= ~mCX;                    // need CX for shift count
1243         scodelem(cdb,e.EV.E2,&retregs,0,true);   // get rvalue
1244         getlvalue(cdb,&cs,e1,retregs);         // get lvalue
1245         modEA(cdb,&cs);
1246         cs.Iop = op1;
1247         if (sz <= REGSIZE || tyfv(tyml))
1248         {
1249             reg = findreg(retregs);
1250             code_newreg(&cs, reg);              // OP1 EA,reg
1251             if (sz == 1 && reg >= 4 && I64)
1252                 cs.Irex |= REX;
1253             if (forccs)
1254                 cs.Iflags |= CFpsw;
1255         }
1256         else if (tyml == TYhptr)
1257         {
1258             uint mreg = findregmsw(retregs);
1259             uint lreg = findreglsw(retregs);
1260             getregs(cdb,retregs | mCX);
1261 
1262             // If h -= l, convert to h += -l
1263             if (e.Eoper == OPminass)
1264             {
1265                 cdb.gen2(0xF7,modregrm(3,3,mreg));      // NEG mreg
1266                 cdb.gen2(0xF7,modregrm(3,3,lreg));      // NEG lreg
1267                 code_orflag(cdb.last(),CFpsw);
1268                 cdb.genc2(0x81,modregrm(3,3,mreg),0);   // SBB mreg,0
1269             }
1270             cs.Iop = 0x01;
1271             cs.Irm |= modregrm(0,lreg,0);
1272             cdb.gen(&cs);                               // ADD EA,lreg
1273             code_orflag(cdb.last(),CFpsw);
1274             cdb.genc2(0x81,modregrm(3,2,mreg),0);       // ADC mreg,0
1275             genshift(cdb);                              // MOV CX,offset __AHSHIFT
1276             cdb.gen2(0xD3,modregrm(3,4,mreg));          // SHL mreg,CL
1277             NEWREG(cs.Irm,mreg);                        // ADD EA+2,mreg
1278             getlvalue_msw(&cs);
1279         }
1280         else if (sz == 2 * REGSIZE)
1281         {
1282             cs.Irm |= modregrm(0,findreglsw(retregs),0);
1283             cdb.gen(&cs);                               // OP1 EA,reg+1
1284             code_orflag(cdb.last(),cflags);
1285             cs.Iop = op2;
1286             NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg
1287             getlvalue_msw(&cs);
1288         }
1289         else
1290             assert(0);
1291         cdb.gen(&cs);
1292         retregs = 0;            // to trigger a bug if we attempt to use it
1293     }
1294 
1295     // See if we need to reload result into a register.
1296     // Need result in registers in case we have a 32 bit
1297     // result and we want the flags as a result.
1298     if (wantres || (sz > REGSIZE && forccs))
1299     {
1300         if (sz <= REGSIZE)
1301         {
1302             regm_t possregs;
1303 
1304             possregs = ALLREGS;
1305             if (isbyte)
1306                 possregs = BYTEREGS;
1307             retregs = forregs & possregs;
1308             if (!retregs)
1309                 retregs = possregs;
1310 
1311             // If reg field is destination
1312             if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5)
1313             {
1314                 reg = (cs.Irm >> 3) & 7;
1315                 if (cs.Irex & REX_R)
1316                     reg |= 8;
1317                 retregs = mask(reg);
1318                 allocreg(cdb,&retregs,&reg,tyml);
1319             }
1320             // If lvalue is a register, just use that register
1321             else if ((cs.Irm & 0xC0) == 0xC0)
1322             {
1323                 reg = cs.Irm & 7;
1324                 if (cs.Irex & REX_B)
1325                     reg |= 8;
1326                 retregs = mask(reg);
1327                 allocreg(cdb,&retregs,&reg,tyml);
1328             }
1329             else
1330             {
1331                 allocreg(cdb,&retregs,&reg,tyml);
1332                 cs.Iop = LOD ^ isbyte ^ reverse;
1333                 code_newreg(&cs, reg);
1334                 if (I64 && isbyte && reg >= 4)
1335                     cs.Irex |= REX_W;
1336                 cdb.gen(&cs);               // MOV reg,EA
1337             }
1338         }
1339         else if (tyfv(tyml) || tyml == TYhptr)
1340         {
1341             regm_t idxregs;
1342 
1343             if (tyml == TYhptr)
1344                 getlvalue_lsw(&cs);
1345             idxregs = idxregm(&cs);
1346             retregs = forregs & ~idxregs;
1347             if (!(retregs & IDXREGS))
1348                 retregs |= IDXREGS & ~idxregs;
1349             if (!(retregs & mMSW))
1350                 retregs |= mMSW & ALLREGS;
1351             allocreg(cdb,&retregs,&reg,tyml);
1352             NEWREG(cs.Irm,findreglsw(retregs));
1353             if (retregs & mES)              // if want ES loaded
1354             {
1355                 cs.Iop = 0xC4;
1356                 cdb.gen(&cs);               // LES lreg,EA
1357             }
1358             else
1359             {
1360                 cs.Iop = LOD;
1361                 cdb.gen(&cs);               // MOV lreg,EA
1362                 getlvalue_msw(&cs);
1363                 if (I32)
1364                     cs.Iflags |= CFopsize;
1365                 NEWREG(cs.Irm,reg);
1366                 cdb.gen(&cs);               // MOV mreg,EA+2
1367             }
1368         }
1369         else if (sz == 2 * REGSIZE)
1370         {
1371             regm_t idx = idxregm(&cs);
1372             retregs = forregs;
1373             if (!retregs)
1374                 retregs = ALLREGS;
1375             allocreg(cdb,&retregs,&reg,tyml);
1376             cs.Iop = LOD;
1377             NEWREG(cs.Irm,reg);
1378 
1379             code csl = cs;
1380             NEWREG(csl.Irm,findreglsw(retregs));
1381             getlvalue_lsw(&csl);
1382 
1383             if (mask(reg) & idx)
1384             {
1385                 cdb.gen(&csl);             // MOV reg+1,EA
1386                 cdb.gen(&cs);              // MOV reg,EA+2
1387             }
1388             else
1389             {
1390                 cdb.gen(&cs);              // MOV reg,EA+2
1391                 cdb.gen(&csl);             // MOV reg+1,EA
1392             }
1393         }
1394         else
1395             assert(0);
1396         if (e1.Ecount)                 // if we gen a CSE
1397             cssave(e1,retregs,!OTleaf(e1.Eoper));
1398     }
1399     freenode(e1);
1400     if (sz <= REGSIZE)
1401         *pretregs &= ~mPSW;            // flags are already set
1402     fixresult(cdb,e,retregs,pretregs);
1403 }
1404 
1405 /********************************
1406  * Generate code for *=
1407  */
1408 
1409 @trusted
1410 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1411 {
1412     code cs;
1413     regm_t retregs;
1414     reg_t resreg;
1415     uint opr,isbyte;
1416 
1417     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1418     elem *e1 = e.EV.E1;
1419     elem *e2 = e.EV.E2;
1420     OPER op = e.Eoper;                     // OPxxxx
1421 
1422     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1423     char uns = tyuns(tyml) || tyuns(e2.Ety);
1424     uint sz = _tysize[tyml];
1425 
1426     uint rex = (I64 && sz == 8) ? REX_W : 0;
1427     uint grex = rex << 16;          // 64 bit operands
1428 
1429     // See if evaluate in XMM registers
1430     if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0))
1431     {
1432         xmmopass(cdb,e,pretregs);
1433         return;
1434     }
1435 
1436     if (tyfloating(tyml))
1437     {
1438         if (config.exe & EX_posix)
1439         {
1440             opass87(cdb,e,pretregs);
1441         }
1442         else
1443         {
1444             opassdbl(cdb,e,pretregs,op);
1445         }
1446         return;
1447     }
1448 
1449     if (sz <= REGSIZE)                  // if word or byte
1450     {
1451         if (e2.Eoper == OPconst &&
1452             (I32 || I64) &&
1453             el_signx32(e2) &&
1454             sz >= 4)
1455         {
1456             // See if we can use an LEA instruction
1457 
1458             int ss;
1459             int ss2 = 0;
1460             int shift;
1461 
1462             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1463             switch (e2factor)
1464             {
1465                 case 12:    ss = 1; ss2 = 2; goto L4;
1466                 case 24:    ss = 1; ss2 = 3; goto L4;
1467 
1468                 case 6:
1469                 case 3:     ss = 1; goto L4;
1470 
1471                 case 20:    ss = 2; ss2 = 2; goto L4;
1472                 case 40:    ss = 2; ss2 = 3; goto L4;
1473 
1474                 case 10:
1475                 case 5:     ss = 2; goto L4;
1476 
1477                 case 36:    ss = 3; ss2 = 2; goto L4;
1478                 case 72:    ss = 3; ss2 = 3; goto L4;
1479 
1480                 case 18:
1481                 case 9:     ss = 3; goto L4;
1482                 L4:
1483                 {
1484                     getlvalue(cdb,&cs,e1,0);           // get EA
1485                     modEA(cdb,&cs);
1486                     freenode(e2);
1487                     regm_t idxregs = idxregm(&cs);
1488                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1489                     if (!regm)
1490                         regm = allregs & ~(idxregs | mBP | mR13);
1491                     reg_t reg;
1492                     allocreg(cdb,&regm,&reg,tyml);
1493                     cs.Iop = LOD;
1494                     code_newreg(&cs,reg);
1495                     cs.Irex |= rex;
1496                     cdb.gen(&cs);                       // MOV reg,EA
1497 
1498                     assert((reg & 7) != BP);
1499                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1500                                 modregxrmx(ss,reg,reg));  // LEA reg,[ss*reg][reg]
1501                     if (ss2)
1502                     {
1503                         cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1504                                        modregxrm(ss2,reg,5));
1505                         cdb.last().IFL1 = FLconst;
1506                         cdb.last().IEV1.Vint = 0;       // LEA reg,0[ss2*reg]
1507                     }
1508                     else if (!(e2factor & 1))    // if even factor
1509                     {
1510                         genregs(cdb,0x03,reg,reg); // ADD reg,reg
1511                         code_orrex(cdb.last(),rex);
1512                     }
1513                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1514                     return;
1515                 }
1516 
1517                 case 37:
1518                 case 74:    shift = 2;
1519                             goto L5;
1520                 case 13:
1521                 case 26:    shift = 0;
1522                             goto L5;
1523                 L5:
1524                 {
1525                     getlvalue(cdb,&cs,e1,0);           // get EA
1526                     modEA(cdb,&cs);
1527                     freenode(e2);
1528                     regm_t idxregs = idxregm(&cs);
1529                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1530                     if (!regm)
1531                         regm = allregs & ~(idxregs | mBP | mR13);
1532                     reg_t reg;                          // return register
1533                     allocreg(cdb,&regm,&reg,tyml);
1534 
1535                     reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13));
1536 
1537                     cs.Iop = LOD;
1538                     code_newreg(&cs,sreg);
1539                     cs.Irex |= rex;
1540                     cdb.gen(&cs);                                         // MOV sreg,EA
1541 
1542                     assert((sreg & 7) != BP);
1543                     assert((reg & 7) != BP);
1544                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1545                                           modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1546                     if (shift)
1547                         cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1548                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1549                                           modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1550                     if (!(e2factor & 1))                                  // if even factor
1551                     {
1552                         genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1553                         code_orrex(cdb.last(),rex);
1554                     }
1555                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1556                     return;
1557                 }
1558 
1559                 default:
1560                     break;
1561             }
1562         }
1563 
1564         isbyte = (sz == 1);             // 1 for byte operation
1565 
1566         if (config.target_cpu >= TARGET_80286 &&
1567             e2.Eoper == OPconst && !isbyte)
1568         {
1569             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1570             if (I64 && sz == 8 && e2factor != cast(int)e2factor)
1571                 goto L1;
1572             freenode(e2);
1573             getlvalue(cdb,&cs,e1,0);     // get EA
1574             regm_t idxregs = idxregm(&cs);
1575             retregs = *pretregs & (ALLREGS | mBP) & ~idxregs;
1576             if (!retregs)
1577                 retregs = ALLREGS & ~idxregs;
1578             allocreg(cdb,&retregs,&resreg,tyml);
1579             cs.Iop = 0x69;                  // IMUL reg,EA,e2value
1580             cs.IFL2 = FLconst;
1581             cs.IEV2.Vint = cast(int)e2factor;
1582             opr = resreg;
1583         }
1584         else if (!I16 && !isbyte)
1585         {
1586          L1:
1587             retregs = *pretregs & (ALLREGS | mBP);
1588             if (!retregs)
1589                 retregs = ALLREGS;
1590             codelem(cdb,e2,&retregs,false); // load rvalue in reg
1591             getlvalue(cdb,&cs,e1,retregs);  // get EA
1592             getregs(cdb,retregs);           // destroy these regs
1593             cs.Iop = 0x0FAF;                        // IMUL resreg,EA
1594             resreg = findreg(retregs);
1595             opr = resreg;
1596         }
1597         else
1598         {
1599             retregs = mAX;
1600             codelem(cdb,e2,&retregs,false);      // load rvalue in AX
1601             getlvalue(cdb,&cs,e1,mAX);           // get EA
1602             getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs
1603             cs.Iop = 0xF7 ^ isbyte;                        // [I]MUL EA
1604             opr = uns ? 4 : 5;              // MUL/IMUL
1605             resreg = AX;                    // result register for *
1606         }
1607         code_newreg(&cs,opr);
1608         cdb.gen(&cs);
1609 
1610         opAssStoreReg(cdb, cs, e, resreg, pretregs);
1611         return;
1612     }
1613     else if (sz == 2 * REGSIZE)
1614     {
1615         if (e2.Eoper == OPconst && I32)
1616         {
1617             /*  if (msw)
1618                   IMUL    EDX,EDX,lsw
1619                   IMUL    reg,EAX,msw
1620                   ADD     reg,EDX
1621                 else
1622                   IMUL    reg,EDX,lsw
1623                 MOV       EDX,lsw
1624                 MUL       EDX
1625                 ADD       EDX,reg
1626              */
1627             freenode(e2);
1628             retregs = mDX|mAX;
1629             reg_t rhi, rlo;
1630             opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
1631             const regm_t keepmsk = idxregm(&cs);
1632 
1633             reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
1634 
1635             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1636             const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1637             const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1638 
1639             if (msw)
1640             {
1641                 genmulimm(cdb,DX,DX,lsw);          // IMUL EDX,EDX,lsw
1642                 genmulimm(cdb,reg,AX,msw);         // IMUL reg,EAX,msw
1643                 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX
1644             }
1645             else
1646                 genmulimm(cdb,reg,DX,lsw);         // IMUL reg,EDX,lsw
1647 
1648             movregconst(cdb,DX,lsw,0);             // MOV EDX,lsw
1649             getregs(cdb,mDX);
1650             cdb.gen2(0xF7,modregrm(3,4,DX));       // MUL EDX
1651             cdb.gen2(0x03,modregrm(3,DX,reg));     // ADD EDX,reg
1652         }
1653         else
1654         {
1655             retregs = mDX | mAX;
1656             regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX;
1657             codelem(cdb,e2,&rretregs,false);
1658             getlvalue(cdb,&cs,e1,retregs | rretregs);
1659             getregs(cdb,retregs);
1660             cs.Iop = LOD;
1661             cdb.gen(&cs);                   // MOV AX,EA
1662             getlvalue_msw(&cs);
1663             cs.Irm |= modregrm(0,DX,0);
1664             cdb.gen(&cs);                   // MOV DX,EA+2
1665             getlvalue_lsw(&cs);
1666             if (config.target_cpu >= TARGET_PentiumPro)
1667             {
1668                 regm_t rlo = findreglsw(rretregs);
1669                 regm_t rhi = findregmsw(rretregs);
1670                 /*  IMUL    rhi,EAX
1671                     IMUL    EDX,rlo
1672                     ADD     rhi,EDX
1673                     MUL     rlo
1674                     ADD     EDX,Erhi
1675                  */
1676                  getregs(cdb,mAX|mDX|mask(rhi));
1677                  cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1678                  cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1679                  cdb.gen2(0x03,modregrm(3,rhi,DX));
1680                  cdb.gen2(0xF7,modregrm(3,4,rlo));
1681                  cdb.gen2(0x03,modregrm(3,DX,rhi));
1682             }
1683             else
1684             {
1685                 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs));
1686             }
1687         }
1688 
1689         opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
1690         return;
1691     }
1692     else
1693     {
1694         assert(0);
1695     }
1696 }
1697 
1698 
1699 /********************************
1700  * Generate code for /= %=
1701  */
1702 
1703 @trusted
1704 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1705 {
1706     elem *e1 = e.EV.E1;
1707     elem *e2 = e.EV.E2;
1708 
1709     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1710     OPER op = e.Eoper;                     // OPxxxx
1711 
1712     // See if evaluate in XMM registers
1713     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
1714     {
1715         xmmopass(cdb,e,pretregs);
1716         return;
1717     }
1718 
1719     if (tyfloating(tyml))
1720     {
1721         if (config.exe & EX_posix)
1722         {
1723             opass87(cdb,e,pretregs);
1724         }
1725         else
1726         {
1727             opassdbl(cdb,e,pretregs,op);
1728         }
1729         return;
1730     }
1731 
1732     code cs = void;
1733 
1734     //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1735     char uns = tyuns(tyml) || tyuns(e2.Ety);
1736     uint sz = _tysize[tyml];
1737 
1738     uint rex = (I64 && sz == 8) ? REX_W : 0;
1739     uint grex = rex << 16;          // 64 bit operands
1740 
1741     if (sz <= REGSIZE)                  // if word or byte
1742     {
1743         uint isbyte = (sz == 1);        // 1 for byte operation
1744         reg_t resreg;
1745         targ_size_t e2factor;
1746         targ_size_t d;
1747         bool neg;
1748         int pow2;
1749 
1750         assert(!isbyte);                      // should never happen
1751         assert(I16 || sz != SHORTSIZE);
1752 
1753         if (e2.Eoper == OPconst)
1754         {
1755             e2factor = cast(targ_size_t)el_tolong(e2);
1756             pow2 = ispow2(e2factor);
1757             d = e2factor;
1758             if (!uns && cast(targ_llong)e2factor < 0)
1759             {
1760                 neg = true;
1761                 d = -d;
1762             }
1763         }
1764 
1765         // Signed divide by a constant
1766         if (config.flags4 & CFG4speed &&
1767             e2.Eoper == OPconst &&
1768             !uns &&
1769             (d & (d - 1)) &&
1770             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1771         {
1772             /* R1 / 10
1773              *
1774              *  MOV     EAX,m
1775              *  IMUL    R1
1776              *  MOV     EAX,R1
1777              *  SAR     EAX,31
1778              *  SAR     EDX,shpost
1779              *  SUB     EDX,EAX
1780              *  IMUL    EAX,EDX,d
1781              *  SUB     R1,EAX
1782              *
1783              * EDX = quotient
1784              * R1 = remainder
1785              */
1786             assert(sz == 4 || sz == 8);
1787 
1788             ulong m;
1789             int shpost;
1790             const int N = sz * 8;
1791             const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1792 
1793             freenode(e2);
1794 
1795             getlvalue(cdb,&cs,e1,mAX | mDX);
1796             reg_t reg;
1797             opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs)));    // MOV reg,EA
1798             getregs(cdb, mAX|mDX);
1799 
1800             /* Algorithm 5.2
1801              * if m>=2**(N-1)
1802              *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1803              * else
1804              *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1805              * if (neg)
1806              *    q = -q
1807              */
1808             const bool mgt = mhighbit || m >= (1UL << (N - 1));
1809             movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1810             cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL reg
1811             if (mgt)
1812                 cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,reg
1813             getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1814             genmovreg(cdb, AX, reg);                                // MOV EAX,reg
1815             cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1816             if (shpost)
1817                 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1818             reg_t r3;
1819             if (neg && op == OPdivass)
1820             {
1821                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1822                 r3 = AX;
1823             }
1824             else
1825             {
1826                 cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1827                 r3 = DX;
1828             }
1829 
1830             // r3 is quotient
1831             reg_t resregx;
1832             switch (op)
1833             {   case OPdivass:
1834                     resregx = r3;
1835                     break;
1836 
1837                 case OPmodass:
1838                     assert(reg != AX && r3 == DX);
1839                     if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1840                     {
1841                         cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1842                     }
1843                     else
1844                     {
1845                         movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0);     // MOV EAX,d
1846                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1847                         getregsNoSave(mAX);                             // EAX no longer contains 'd'
1848                     }
1849                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1850                     resregx = reg;
1851                     break;
1852 
1853                 default:
1854                     assert(0);
1855             }
1856 
1857             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1858             return;
1859         }
1860 
1861         // Unsigned divide by a constant
1862         void unsignedDivideByConstant(ref CodeBuilder cdb)
1863         {
1864             assert(sz == 4 || sz == 8);
1865 
1866             reg_t r3;
1867             reg_t reg;
1868             ulong m;
1869             int shpre;
1870             int shpost;
1871             code cs = void;
1872 
1873             if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1874             {
1875                 /* t1 = MULUH(m, n)
1876                  * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1877                  *   MOV   EAX,reg
1878                  *   MOV   EDX,m
1879                  *   MUL   EDX
1880                  *   MOV   EAX,reg
1881                  *   SUB   EAX,EDX
1882                  *   SHR   EAX,1
1883                  *   LEA   R3,[EAX][EDX]
1884                  *   SHR   R3,shpost-1
1885                  */
1886                 assert(shpre == 0);
1887 
1888                 freenode(e2);
1889                 getlvalue(cdb,&cs,e1,mAX | mDX);
1890                 regm_t idxregs = idxregm(&cs);
1891                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1892                 getregs(cdb, mAX|mDX);
1893 
1894                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1895                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m
1896                 getregs(cdb,mask(reg) | mDX | mAX);
1897                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1898                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1899                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1900                 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1901                 regm_t regm3 = allregs & ~idxregs;
1902                 if (op == OPmodass)
1903                 {
1904                     regm3 &= ~mask(reg);
1905                     if (!el_signx32(e2))
1906                         regm3 &= ~mAX;
1907                 }
1908                 allocreg(cdb,&regm3,&r3,TYint);
1909                 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1910                 if (shpost != 1)
1911                     cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1912             }
1913             else
1914             {
1915                 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1916                  *   SHR   EAX,shpre
1917                  *   MOV   reg,m
1918                  *   MUL   reg
1919                  *   SHR   EDX,shpost
1920                  */
1921 
1922                 freenode(e2);
1923                 getlvalue(cdb,&cs,e1,mAX | mDX);
1924                 regm_t idxregs = idxregm(&cs);
1925                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1926                 getregs(cdb, mAX|mDX);
1927 
1928                 if (reg != AX)
1929                 {
1930                     getregs(cdb,mAX);
1931                     genmovreg(cdb,AX,reg);                              // MOV EAX,reg
1932                 }
1933                 if (shpre)
1934                 {
1935                     getregs(cdb,mAX);
1936                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1937                 }
1938                 getregs(cdb,mDX);
1939                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1940                 getregs(cdb,mDX | mAX);
1941                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1942                 if (shpost)
1943                     cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1944                 r3 = DX;
1945             }
1946 
1947             reg_t resregx;
1948             switch (op)
1949             {
1950                 case OPdivass:
1951                     // r3 = quotient
1952                     resregx = r3;
1953                     break;
1954 
1955                 case OPmodass:
1956                     /* reg = original value
1957                      * r3  = quotient
1958                      */
1959                     assert(reg != AX);
1960                     if (el_signx32(e2))
1961                     {
1962                         cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1963                     }
1964                     else
1965                     {
1966                         assert(!(mask(r3) & mAX));
1967                         movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1968                         getregs(cdb,mAX);
1969                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1970                     }
1971                     getregs(cdb,mask(reg));
1972                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1973                     resregx = reg;
1974                     break;
1975 
1976                 default:
1977                     assert(0);
1978             }
1979 
1980             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1981             return;
1982         }
1983 
1984         if (config.flags4 & CFG4speed &&
1985             e2.Eoper == OPconst &&
1986             uns &&
1987             e2factor > 2 && (e2factor & (e2factor - 1)) &&
1988             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1989         {
1990             unsignedDivideByConstant(cdb);
1991             return;
1992         }
1993 
1994         if (config.flags4 & CFG4speed &&
1995             e2.Eoper == OPconst && !uns &&
1996             (sz == REGSIZE || (I64 && sz == 4)) &&
1997             pow2 != -1 &&
1998             e2factor == cast(int)e2factor &&
1999             !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass)
2000            )
2001         {
2002             freenode(e2);
2003             if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386)
2004             {
2005                 /* This is better than the code further down because it is
2006                  * not constrained to using AX and DX.
2007                  */
2008                 getlvalue(cdb,&cs,e1,0);
2009                 regm_t idxregs = idxregm(&cs);
2010                 reg_t reg;
2011                 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA
2012 
2013                 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg)));
2014                 genmovreg(cdb,r,reg);                        // MOV r,reg
2015                 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
2016                 cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
2017                 cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
2018 
2019                 opAssStoreReg(cdb, cs, e, reg, pretregs);
2020                 return;
2021             }
2022 
2023             // Signed divide or modulo by power of 2
2024             getlvalue(cdb,&cs,e1,mAX | mDX);
2025             reg_t reg;
2026             opAssLoadReg(cdb,cs,e,reg,mAX);
2027 
2028             getregs(cdb,mDX);                   // DX is scratch register
2029             cdb.gen1(0x99);                     // CWD
2030             code_orrex(cdb.last(), rex);
2031             if (pow2 == 1)
2032             {
2033                 if (op == OPdivass)
2034                 {
2035                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2036                     cdb.gen2(0xD1,grex | modregrm(3,7,AX));        // SAR AX,1
2037                     resreg = AX;
2038                 }
2039                 else // OPmod
2040                 {
2041                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2042                     cdb.genc2(0x81,grex | modregrm(3,4,AX),1);     // AND AX,1
2043                     cdb.gen2(0x03,grex | modregrm(3,DX,AX));       // ADD DX,AX
2044                     resreg = DX;
2045                 }
2046             }
2047             else
2048             {
2049                 assert(pow2 < 32);
2050                 targ_ulong m = (1 << pow2) - 1;
2051                 if (op == OPdivass)
2052                 {
2053                     cdb.genc2(0x81,grex | modregrm(3,4,DX),m);     // AND DX,m
2054                     cdb.gen2(0x03,grex | modregrm(3,AX,DX));       // ADD AX,DX
2055                     // Be careful not to generate this for 8088
2056                     assert(config.target_cpu >= TARGET_80286);
2057                     cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2);  // SAR AX,pow2
2058                     resreg = AX;
2059                 }
2060                 else // OPmodass
2061                 {
2062                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2063                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2064                     cdb.genc2(0x81,grex | modregrm(3,4,AX),m);     // AND AX,m
2065                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2066                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2067                     resreg = AX;
2068                 }
2069             }
2070         }
2071         else
2072         {
2073             regm_t retregs = ALLREGS & ~(mAX|mDX);     // DX gets sign extension
2074             codelem(cdb,e2,&retregs,false);            // load rvalue in retregs
2075             reg_t reg = findreg(retregs);
2076             getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA
2077             getregs(cdb,mAX | mDX);         // destroy these regs
2078             cs.Irm |= modregrm(0,AX,0);
2079             cs.Iop = LOD;
2080             cdb.gen(&cs);                   // MOV AX,EA
2081             if (uns)                        // if uint
2082                 movregconst(cdb,DX,0,0);    // CLR DX
2083             else                            // else signed
2084             {
2085                 cdb.gen1(0x99);             // CWD
2086                 code_orrex(cdb.last(),rex);
2087             }
2088             getregs(cdb,mDX | mAX); // DX and AX will be destroyed
2089             const uint opr = uns ? 6 : 7;     // DIV/IDIV
2090             genregs(cdb,0xF7,opr,reg);   // OPR reg
2091             code_orrex(cdb.last(),rex);
2092             resreg = (op == OPmodass) ? DX : AX;        // result register
2093         }
2094         opAssStoreReg(cdb, cs, e, resreg, pretregs);
2095         return;
2096     }
2097 
2098     assert(sz == 2 * REGSIZE);
2099 
2100     targ_size_t e2factor;
2101     int pow2;
2102     if (e2.Eoper == OPconst)
2103     {
2104         e2factor = cast(targ_size_t)el_tolong(e2);
2105         pow2 = ispow2(e2factor);
2106     }
2107 
2108     // Register pair signed divide by power of 2
2109     if (op == OPdivass &&
2110         !uns &&
2111         e.Eoper == OPconst &&
2112         pow2 != -1 &&
2113         I32 // not set up for I16 or I64 cent
2114        )
2115     {
2116         freenode(e2);
2117         regm_t retregs = mDX|mAX | mCX|mBX;     // LSW must be byte reg because of later SETZ
2118         reg_t rhi, rlo;
2119         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2120         const regm_t keepmsk = idxregm(&cs);
2121         retregs = mask(rhi) | mask(rlo);
2122 
2123         if (pow2 < 32)
2124         {
2125             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2126 
2127             genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
2128             if (pow2 == 1)
2129                 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
2130             else
2131             {
2132                 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
2133                 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
2134             }
2135             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
2136             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
2137             cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
2138             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
2139         }
2140         else if (pow2 == 32)
2141         {
2142             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2143 
2144             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2145             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2146             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
2147             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
2148             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2149             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2150         }
2151         else if (pow2 < 63)
2152         {
2153             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2154             reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1)));
2155 
2156             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2157             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2158             cdb.genmovreg(r2,r1);                                         // MOV r2,r1
2159 
2160             if (pow2 == 33)
2161             {
2162                 cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
2163                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
2164                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
2165             }
2166             else
2167             {
2168                 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
2169                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
2170                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
2171             }
2172 
2173             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2174             cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
2175             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2176         }
2177         else
2178         {
2179             // This may be better done by cgelem.d
2180             assert(pow2 == 63);
2181             assert(mask(rlo) & BYTEREGS);                          // for SETZ
2182             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
2183             cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
2184             cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
2185             cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
2186             movregconst(cdb,rhi,0,0);                              // MOV rhi,0
2187         }
2188 
2189         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2190         return;
2191     }
2192 
2193     // Register pair signed modulo by power of 2
2194     if (op == OPmodass &&
2195         !uns &&
2196         e.Eoper == OPconst &&
2197         pow2 != -1 &&
2198         I32 // not set up for I64 cent yet
2199        )
2200     {
2201         freenode(e2);
2202         regm_t retregs = mDX|mAX;
2203         reg_t rhi, rlo;
2204         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2205         const regm_t keepmsk = idxregm(&cs);
2206 
2207         regm_t scratchm = allregs & ~(retregs | keepmsk);
2208         if (pow2 == 63)
2209             scratchm &= BYTEREGS;               // because of SETZ
2210         reg_t r1 = allocScratchReg(cdb, scratchm);
2211 
2212         if (pow2 < 32)
2213         {
2214             cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
2215             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
2216             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2217             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2218             cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
2219             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2220             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2221             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
2222         }
2223         else if (pow2 == 32)
2224         {
2225             cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
2226             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
2227             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
2228             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
2229             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
2230         }
2231         else if (pow2 < 63)
2232         {
2233             scratchm = allregs & ~(retregs | scratchm);
2234             reg_t r2;
2235             allocreg(cdb,&scratchm,&r2,TYint);
2236 
2237             cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
2238             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
2239             cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
2240             cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
2241             cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
2242             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
2243             cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
2244             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
2245             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
2246             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
2247         }
2248         else
2249         {
2250             // This may be better done by cgelem.d
2251             assert(pow2 == 63);
2252 
2253             cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
2254             cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
2255             cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
2256             cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
2257             cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
2258         }
2259 
2260         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2261         return;
2262     }
2263 
2264     regm_t rretregs = mCX|mBX;
2265     codelem(cdb,e2,&rretregs,false);    // load e2 into CX|BX
2266 
2267     reg_t rlo;
2268     reg_t rhi;
2269     opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs);
2270 
2271     regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX;
2272     uint lib = uns ? CLIB.uldiv : CLIB.ldiv;
2273     if (op == OPmodass)
2274         ++lib;
2275     callclib(cdb,e,lib,&retregs,idxregm(&cs));
2276 
2277     opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
2278 }
2279 
2280 
2281 /********************************
2282  * Generate code for <<= and >>=
2283  */
2284 
2285 @trusted
2286 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2287 {
2288     code cs;
2289     regm_t retregs;
2290     uint op1,op2;
2291     reg_t reg;
2292 
2293     elem *e1 = e.EV.E1;
2294     elem *e2 = e.EV.E2;
2295 
2296     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
2297     uint sz = _tysize[tyml];
2298     uint isbyte = tybyte(e.Ety) != 0;        // 1 for byte operations
2299     tym_t tym = tybasic(e.Ety);                // type of result
2300     OPER oper = e.Eoper;
2301     assert(tysize(e2.Ety) <= REGSIZE);
2302 
2303     uint rex = (I64 && sz == 8) ? REX_W : 0;
2304 
2305     // if our lvalue is a cse, make sure we evaluate for result in register
2306     if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,&reg))
2307         *pretregs |= ALLREGS;
2308 
2309     version (SCPP)
2310     {
2311         // Do this until the rest of the compiler does OPshr/OPashr correctly
2312         if (oper == OPshrass)
2313             oper = tyuns(tyml) ? OPshrass : OPashrass;
2314     }
2315 
2316     // Select opcodes. op2 is used for msw for long shifts.
2317 
2318     switch (oper)
2319     {
2320         case OPshlass:
2321             op1 = 4;                    // SHL
2322             op2 = 2;                    // RCL
2323             break;
2324 
2325         case OPshrass:
2326             op1 = 5;                    // SHR
2327             op2 = 3;                    // RCR
2328             break;
2329 
2330         case OPashrass:
2331             op1 = 7;                    // SAR
2332             op2 = 3;                    // RCR
2333             break;
2334 
2335         default:
2336             assert(0);
2337     }
2338 
2339 
2340     uint v = 0xD3;                  // for SHIFT xx,CL cases
2341     uint loopcnt = 1;
2342     uint conste2 = false;
2343     uint shiftcnt = 0;              // avoid "use before initialized" warnings
2344     if (e2.Eoper == OPconst)
2345     {
2346         conste2 = true;                 // e2 is a constant
2347         shiftcnt = e2.EV.Vint;         // byte ordering of host
2348         if (config.target_cpu >= TARGET_80286 &&
2349             sz <= REGSIZE &&
2350             shiftcnt != 1)
2351             v = 0xC1;                   // SHIFT xx,shiftcnt
2352         else if (shiftcnt <= 3)
2353         {
2354             loopcnt = shiftcnt;
2355             v = 0xD1;                   // SHIFT xx,1
2356         }
2357     }
2358 
2359     if (v == 0xD3)                        // if COUNT == CL
2360     {
2361         retregs = mCX;
2362         codelem(cdb,e2,&retregs,false);
2363     }
2364     else
2365         freenode(e2);
2366     getlvalue(cdb,&cs,e1,mCX);          // get lvalue, preserve CX
2367     modEA(cdb,&cs);             // check for modifying register
2368 
2369     if (*pretregs == 0 ||               // if don't return result
2370         (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) ||
2371         sz > REGSIZE
2372        )
2373     {
2374         retregs = 0;            // value not returned in a register
2375         cs.Iop = v ^ isbyte;
2376         while (loopcnt--)
2377         {
2378             NEWREG(cs.Irm,op1);           // make sure op1 is first
2379             if (sz <= REGSIZE)
2380             {
2381                 if (conste2)
2382                 {
2383                     cs.IFL2 = FLconst;
2384                     cs.IEV2.Vint = shiftcnt;
2385                 }
2386                 cdb.gen(&cs);             // SHIFT EA,[CL|1]
2387                 if (*pretregs & mPSW && !loopcnt && conste2)
2388                   code_orflag(cdb.last(),CFpsw);
2389             }
2390             else // TYlong
2391             {
2392                 cs.Iop = 0xD1;            // plain shift
2393                 code *ce = gennop(null);                  // ce: NOP
2394                 if (v == 0xD3)
2395                 {
2396                     getregs(cdb,mCX);
2397                     if (!conste2)
2398                     {
2399                         assert(loopcnt == 0);
2400                         genjmp(cdb,JCXZ,FLcode,cast(block *) ce);   // JCXZ ce
2401                     }
2402                 }
2403                 code *cg;
2404                 if (oper == OPshlass)
2405                 {
2406                     cdb.gen(&cs);               // cg: SHIFT EA
2407                     cg = cdb.last();
2408                     code_orflag(cg,CFpsw);
2409                     getlvalue_msw(&cs);
2410                     NEWREG(cs.Irm,op2);
2411                     cdb.gen(&cs);               // SHIFT EA
2412                     getlvalue_lsw(&cs);
2413                 }
2414                 else
2415                 {
2416                     getlvalue_msw(&cs);
2417                     cdb.gen(&cs);
2418                     cg = cdb.last();
2419                     code_orflag(cg,CFpsw);
2420                     NEWREG(cs.Irm,op2);
2421                     getlvalue_lsw(&cs);
2422                     cdb.gen(&cs);
2423                 }
2424                 if (v == 0xD3)                    // if building a loop
2425                 {
2426                     genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg
2427                     regimmed_set(CX,0);           // note that now CX == 0
2428                 }
2429                 cdb.append(ce);
2430             }
2431         }
2432 
2433         // If we want the result, we must load it from the EA
2434         // into a register.
2435 
2436         if (sz == 2 * REGSIZE && *pretregs)
2437         {
2438             retregs = *pretregs & (ALLREGS | mBP);
2439             if (retregs)
2440             {
2441                 retregs &= ~idxregm(&cs);
2442                 allocreg(cdb,&retregs,&reg,tym);
2443                 cs.Iop = LOD;
2444 
2445                 // be careful not to trash any index regs
2446                 // do MSW first (which can't be an index reg)
2447                 getlvalue_msw(&cs);
2448                 NEWREG(cs.Irm,reg);
2449                 cdb.gen(&cs);
2450                 getlvalue_lsw(&cs);
2451                 reg = findreglsw(retregs);
2452                 NEWREG(cs.Irm,reg);
2453                 cdb.gen(&cs);
2454                 if (*pretregs & mPSW)
2455                     tstresult(cdb,retregs,tyml,true);
2456             }
2457             else        // flags only
2458             {
2459                 retregs = ALLREGS & ~idxregm(&cs);
2460                 allocreg(cdb,&retregs,&reg,TYint);
2461                 cs.Iop = LOD;
2462                 NEWREG(cs.Irm,reg);
2463                 cdb.gen(&cs);           // MOV reg,EA
2464                 cs.Iop = 0x0B;          // OR reg,EA+2
2465                 cs.Iflags |= CFpsw;
2466                 getlvalue_msw(&cs);
2467                 cdb.gen(&cs);
2468             }
2469         }
2470         if (e1.Ecount && !(retregs & regcon.mvar))   // if lvalue is a CSE
2471             cssave(e1,retregs,!OTleaf(e1.Eoper));
2472         freenode(e1);
2473         *pretregs = retregs;
2474         return;
2475     }
2476     else                                // else must evaluate in register
2477     {
2478         if (sz <= REGSIZE)
2479         {
2480             regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs);
2481             if (isbyte)
2482                 possregs &= BYTEREGS;
2483             retregs = *pretregs & possregs;
2484             if (retregs == 0)
2485                 retregs = possregs;
2486             allocreg(cdb,&retregs,&reg,tym);
2487             cs.Iop = LOD ^ isbyte;
2488             code_newreg(&cs, reg);
2489             if (isbyte && I64 && (reg >= 4))
2490                 cs.Irex |= REX;
2491             cdb.gen(&cs);                     // MOV reg,EA
2492             if (!I16)
2493             {
2494                 assert(!isbyte || (mask(reg) & BYTEREGS));
2495                 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt);
2496                 if (isbyte && I64 && (reg >= 4))
2497                     cdb.last().Irex |= REX;
2498                 code_orrex(cdb.last(), rex);
2499                 // We can do a 32 bit shift on a 16 bit operand if
2500                 // it's a left shift and we're not concerned about
2501                 // the flags. Remember that flags are not set if
2502                 // a shift of 0 occurs.
2503                 if (_tysize[tym] == SHORTSIZE &&
2504                     (oper == OPshrass || oper == OPashrass ||
2505                      (*pretregs & mPSW && conste2)))
2506                      cdb.last().Iflags |= CFopsize;            // 16 bit operand
2507             }
2508             else
2509             {
2510                 while (loopcnt--)
2511                 {   // Generate shift instructions.
2512                     cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt);
2513                 }
2514             }
2515             if (*pretregs & mPSW && conste2)
2516             {
2517                 assert(shiftcnt);
2518                 *pretregs &= ~mPSW;     // result is already in flags
2519                 code_orflag(cdb.last(),CFpsw);
2520             }
2521 
2522             opAssStoreReg(cdb,cs,e,reg,pretregs);
2523             return;
2524         }
2525         assert(0);
2526     }
2527 }
2528 
2529 
2530 /**********************************
2531  * Generate code for compares.
2532  * Handles lt,gt,le,ge,eqeq,ne for all data types.
2533  */
2534 
2535 @trusted
2536 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2537 {
2538     regm_t retregs,rretregs;
2539     reg_t reg,rreg;
2540     int fl;
2541 
2542     //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs));
2543     // Collect extra parameter. This is pretty ugly...
2544     int flag = cdcmp_flag;
2545     cdcmp_flag = 0;
2546 
2547     elem *e1 = e.EV.E1;
2548     elem *e2 = e.EV.E2;
2549     if (*pretregs == 0)                 // if don't want result
2550     {
2551         codelem(cdb,e1,pretregs,false);
2552         *pretregs = 0;                  // in case e1 changed it
2553         codelem(cdb,e2,pretregs,false);
2554         return;
2555     }
2556 
2557     if (tyvector(tybasic(e1.Ety)))
2558         return orthxmm(cdb,e,pretregs);
2559 
2560     uint jop = jmpopcode(e);        // must be computed before
2561                                         // leaves are free'd
2562     uint reverse = 0;
2563 
2564     OPER op = e.Eoper;
2565     assert(OTrel(op));
2566     bool eqorne = (op == OPeqeq) || (op == OPne);
2567 
2568     tym_t tym = tybasic(e1.Ety);
2569     uint sz = _tysize[tym];
2570     uint isbyte = sz == 1;
2571 
2572     uint rex = (I64 && sz == 8) ? REX_W : 0;
2573     uint grex = rex << 16;          // 64 bit operands
2574 
2575     code cs;
2576     code *ce;
2577     if (tyfloating(tym))                  // if floating operation
2578     {
2579         if (config.fpxmmregs)
2580         {
2581             retregs = mPSW;
2582             if (tyxmmreg(tym))
2583                 orthxmm(cdb,e,&retregs);
2584             else
2585                 orth87(cdb,e,&retregs);
2586         }
2587         else if (config.inline8087)
2588         {   retregs = mPSW;
2589             orth87(cdb,e,&retregs);
2590         }
2591         else
2592         {
2593             if (config.exe & EX_windos)
2594             {
2595                 int clib;
2596 
2597                 retregs = 0;                /* skip result for now          */
2598                 if (iffalse(e2))            /* second operand is constant 0 */
2599                 {
2600                     assert(!eqorne);        /* should be OPbool or OPnot    */
2601                     if (tym == TYfloat)
2602                     {
2603                         retregs = FLOATREGS;
2604                         clib = CLIB.ftst0;
2605                     }
2606                     else
2607                     {
2608                         retregs = DOUBLEREGS;
2609                         clib = CLIB.dtst0;
2610                     }
2611                     if (rel_exception(op))
2612                         clib += CLIB.dtst0exc - CLIB.dtst0;
2613                     codelem(cdb,e1,&retregs,false);
2614                     retregs = 0;
2615                     callclib(cdb,e,clib,&retregs,0);
2616                     freenode(e2);
2617                 }
2618                 else
2619                 {
2620                     clib = CLIB.dcmp;
2621                     if (rel_exception(op))
2622                         clib += CLIB.dcmpexc - CLIB.dcmp;
2623                     opdouble(cdb,e,&retregs,clib);
2624                 }
2625             }
2626             else
2627             {
2628                 assert(0);
2629             }
2630         }
2631         goto L3;
2632     }
2633 
2634     /* If it's a signed comparison of longs, we have to call a library    */
2635     /* routine, because we don't know the target of the signed branch     */
2636     /* (have to set up flags so that jmpopcode() will do it right)        */
2637     if (!eqorne &&
2638         (I16 && tym == TYlong  && tybasic(e2.Ety) == TYlong ||
2639          I32 && tym == TYllong && tybasic(e2.Ety) == TYllong)
2640        )
2641     {
2642         assert(jop != JC && jop != JNC);
2643         retregs = mDX | mAX;
2644         codelem(cdb,e1,&retregs,false);
2645         retregs = mCX | mBX;
2646         scodelem(cdb,e2,&retregs,mDX | mAX,false);
2647 
2648         if (I16)
2649         {
2650             retregs = 0;
2651             callclib(cdb,e,CLIB.lcmp,&retregs,0);    // gross, but it works
2652         }
2653         else
2654         {
2655             /* Generate:
2656              *      CMP  EDX,ECX
2657              *      JNE  C1
2658              *      XOR  EDX,EDX
2659              *      CMP  EAX,EBX
2660              *      JZ   C1
2661              *      JA   C3
2662              *      DEC  EDX
2663              *      JMP  C1
2664              * C3:  INC  EDX
2665              * C1:
2666              */
2667              getregs(cdb,mDX);
2668              genregs(cdb,0x39,CX,DX);             // CMP EDX,ECX
2669              code *c1 = gennop(null);
2670              genjmp(cdb,JNE,FLcode,cast(block *)c1);  // JNE C1
2671              movregconst(cdb,DX,0,0);             // XOR EDX,EDX
2672              genregs(cdb,0x39,BX,AX);             // CMP EAX,EBX
2673              genjmp(cdb,JE,FLcode,cast(block *)c1);   // JZ C1
2674              code *c3 = gen1(null,0x40 + DX);                  // INC EDX
2675              genjmp(cdb,JA,FLcode,cast(block *)c3);   // JA C3
2676              cdb.gen1(0x48 + DX);                              // DEC EDX
2677              genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1
2678              cdb.append(c3);
2679              cdb.append(c1);
2680              getregs(cdb,mDX);
2681              retregs = mPSW;
2682         }
2683         goto L3;
2684     }
2685 
2686     /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC
2687      * (This is already reflected in the jop)
2688      */
2689     if ((jop == JC || jop == JNC) &&
2690         (op == OPgt || op == OPle) &&
2691         (tyuns(tym) || tyuns(e2.Ety))
2692        )
2693     {   // jmpopcode() sez comparison should be reversed
2694         assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst);
2695         reverse ^= 2;
2696     }
2697 
2698     /* See if we should swap operands     */
2699     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2700     {
2701         e1 = e.EV.E2;
2702         e2 = e.EV.E1;
2703         reverse ^= 2;
2704     }
2705 
2706     retregs = allregs;
2707     if (isbyte)
2708         retregs = BYTEREGS;
2709 
2710     ce = null;
2711     cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
2712     cs.Irex = cast(ubyte)rex;
2713     if (sz > REGSIZE)
2714         ce = gennop(ce);
2715 
2716     switch (e2.Eoper)
2717     {
2718         default:
2719         L2:
2720             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2721             rretregs = allregs & ~retregs;
2722             if (isbyte)
2723                 rretregs &= BYTEREGS;
2724             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2725             if (sz <= REGSIZE)                              // CMP reg,rreg
2726             {
2727                 reg = findreg(retregs);             // get reg that e1 is in
2728                 rreg = findreg(rretregs);
2729                 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg);
2730                 code_orrex(cdb.last(), rex);
2731                 if (!I16 && sz == SHORTSIZE)
2732                     cdb.last().Iflags |= CFopsize;          // compare only 16 bits
2733                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
2734                     cdb.last().Irex |= REX;                 // address byte registers
2735             }
2736             else
2737             {
2738                 assert(sz <= 2 * REGSIZE);
2739 
2740                 // Compare MSW, if they're equal then compare the LSW
2741                 reg = findregmsw(retregs);
2742                 rreg = findregmsw(rretregs);
2743                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2744                 if (I32 && sz == 6)
2745                     cdb.last().Iflags |= CFopsize;         // seg is only 16 bits
2746                 else if (I64)
2747                     code_orrex(cdb.last(), REX_W);
2748                 genjmp(cdb,JNE,FLcode,cast(block *) ce);   // JNE nop
2749 
2750                 reg = findreglsw(retregs);
2751                 rreg = findreglsw(rretregs);
2752                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2753                 if (I64)
2754                     code_orrex(cdb.last(), REX_W);
2755             }
2756             break;
2757 
2758         case OPrelconst:
2759             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
2760                 goto L2;
2761             fl = el_fl(e2);
2762             switch (fl)
2763             {
2764                 case FLfunc:
2765                     fl = FLextern;          // so it won't be self-relative
2766                     break;
2767 
2768                 case FLdata:
2769                 case FLudata:
2770                 case FLextern:
2771                     if (sz > REGSIZE)       // compare against DS, not DGROUP
2772                         goto L2;
2773                     break;
2774 
2775                 case FLfardata:
2776                     break;
2777 
2778                 default:
2779                     goto L2;
2780             }
2781             cs.IFL2 = cast(ubyte)fl;
2782             cs.IEV2.Vsym = e2.EV.Vsym;
2783             if (sz > REGSIZE)
2784             {
2785                 cs.Iflags |= CFseg;
2786                 cs.IEV2.Voffset = 0;
2787             }
2788             else
2789             {
2790                 cs.Iflags |= CFoff;
2791                 cs.IEV2.Voffset = e2.EV.Voffset;
2792             }
2793             goto L4;
2794 
2795         case OPconst:
2796             // If compare against 0
2797             if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) &&
2798                 isregvar(e1,&retregs,&reg)
2799                )
2800             {   // Just do a TEST instruction
2801                 genregs(cdb,0x85 ^ isbyte,reg,reg);      // TEST reg,reg
2802                 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw;
2803                 code_orrex(cdb.last(), rex);
2804                 if (I64 && isbyte && reg >= 4)
2805                     cdb.last().Irex |= REX;                 // address byte registers
2806                 retregs = mPSW;
2807                 break;
2808             }
2809 
2810             if (!tyuns(tym) && !tyuns(e2.Ety) &&
2811                 !boolres(e2) && !(*pretregs & mPSW) &&
2812                 (sz == REGSIZE || (I64 && sz == 4)) &&
2813                 (!I16 || op == OPlt || op == OPge))
2814             {
2815                 assert(*pretregs & (allregs));
2816                 codelem(cdb,e1,pretregs,false);
2817                 reg = findreg(*pretregs);
2818                 getregs(cdb,mask(reg));
2819                 switch (op)
2820                 {
2821                     case OPle:
2822                         cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1);   // ADD reg,-1
2823                         code_orflag(cdb.last(), CFpsw);
2824                         cdb.genc2(0x81,grex | modregrmx(3,2,reg),0);          // ADC reg,0
2825                         goto oplt;
2826 
2827                     case OPgt:
2828                         cdb.gen2(0xF7,grex | modregrmx(3,3,reg));         // NEG reg
2829                             /* Flips the sign bit unless the value is 0 or int.min.
2830                             Also sets the carry bit when the value is not 0. */
2831                         code_orflag(cdb.last(), CFpsw);
2832                         cdb.genc2(0x81,grex | modregrmx(3,3,reg),0);  // SBB reg,0
2833                             /* Subtracts the carry bit. This turns int.min into
2834                             int.max, flipping the sign bit.
2835                             For other negative and positive values, subtracting 1
2836                             doesn't affect the sign bit.
2837                             For 0, the carry bit is not set, so this does nothing
2838                             and the sign bit is not affected. */
2839                         goto oplt;
2840 
2841                     case OPlt:
2842                     oplt:
2843                         // Get the sign bit, i.e. 1 if the value is negative.
2844                         if (!I16)
2845                             cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31
2846                         else
2847                         {   /* 8088-286 do not have a barrel shifter, so use this
2848                                faster sequence
2849                              */
2850                             genregs(cdb,0xD1,0,reg);   // ROL reg,1
2851                             reg_t regi;
2852                             if (reghasvalue(allregs,1,&regi))
2853                                 genregs(cdb,0x23,reg,regi);  // AND reg,regi
2854                             else
2855                                 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1
2856                         }
2857                         break;
2858 
2859                     case OPge:
2860                         genregs(cdb,0xD1,4,reg);        // SHL reg,1
2861                         code_orrex(cdb.last(),rex);
2862                         code_orflag(cdb.last(), CFpsw);
2863                         genregs(cdb,0x19,reg,reg);      // SBB reg,reg
2864                         code_orrex(cdb.last(),rex);
2865                         if (I64)
2866                         {
2867                             cdb.gen2(0xFF,modregrmx(3,0,reg));       // INC reg
2868                             code_orrex(cdb.last(), rex);
2869                         }
2870                         else
2871                             cdb.gen1(0x40 + reg);                    // INC reg
2872                         break;
2873 
2874                     default:
2875                         assert(0);
2876                 }
2877                 freenode(e2);
2878                 goto ret;
2879             }
2880 
2881             cs.IFL2 = FLconst;
2882             if (sz == 16)
2883                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.hi;
2884             else if (sz > REGSIZE)
2885                 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);
2886             else
2887                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong;
2888 
2889             // The cmp immediate relies on sign extension of the 32 bit immediate value
2890             if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint)
2891                 goto L2;
2892           L4:
2893             cs.Iop = 0x81 ^ isbyte;
2894 
2895             /* if ((e1 is data or a '*' reference) and it's not a
2896              * common subexpression
2897              */
2898 
2899             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2900                  e1.Eoper == OPind) &&
2901                 !evalinregister(e1))
2902             {
2903                 getlvalue(cdb,&cs,e1,RMload);
2904                 freenode(e1);
2905                 if (evalinregister(e2))
2906                 {
2907                     retregs = idxregm(&cs);
2908                     if ((cs.Iflags & CFSEG) == CFes)
2909                         retregs |= mES;             // take no chances
2910                     rretregs = allregs & ~retregs;
2911                     if (isbyte)
2912                         rretregs &= BYTEREGS;
2913                     scodelem(cdb,e2,&rretregs,retregs,true);
2914                     cs.Iop = 0x39 ^ isbyte ^ reverse;
2915                     if (sz > REGSIZE)
2916                     {
2917                         rreg = findregmsw(rretregs);
2918                         cs.Irm |= modregrm(0,rreg,0);
2919                         getlvalue_msw(&cs);
2920                         cdb.gen(&cs);              // CMP EA+2,rreg
2921                         if (I32 && sz == 6)
2922                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2923                         if (I64 && isbyte && rreg >= 4)
2924                             cdb.last().Irex |= REX;
2925                         genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop
2926                         rreg = findreglsw(rretregs);
2927                         NEWREG(cs.Irm,rreg);
2928                         getlvalue_lsw(&cs);
2929                     }
2930                     else
2931                     {
2932                         rreg = findreg(rretregs);
2933                         code_newreg(&cs, rreg);
2934                         if (I64 && isbyte && rreg >= 4)
2935                             cs.Irex |= REX;
2936                     }
2937                 }
2938                 else
2939                 {
2940                     cs.Irm |= modregrm(0,7,0);
2941                     if (sz > REGSIZE)
2942                     {
2943                         if (sz == 6)
2944                             assert(0);
2945                         if (e2.Eoper == OPrelconst)
2946                         {   cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg;
2947                             cs.IEV2.Voffset = 0;
2948                         }
2949                         getlvalue_msw(&cs);
2950                         cdb.gen(&cs);              // CMP EA+2,const
2951                         if (!I16 && sz == 6)
2952                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2953                         genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop
2954                         if (e2.Eoper == OPconst)
2955                             cs.IEV2.Vint = cast(int)e2.EV.Vllong;
2956                         else if (e2.Eoper == OPrelconst)
2957                         {   // Turn off CFseg, on CFoff
2958                             cs.Iflags ^= CFseg | CFoff;
2959                             cs.IEV2.Voffset = e2.EV.Voffset;
2960                         }
2961                         else
2962                             assert(0);
2963                         getlvalue_lsw(&cs);
2964                     }
2965                     freenode(e2);
2966                 }
2967                 cdb.gen(&cs);
2968                 break;
2969             }
2970 
2971             if (evalinregister(e2) && !OTassign(e1.Eoper) &&
2972                 !isregvar(e1,null,null))
2973             {
2974                 regm_t m;
2975 
2976                 m = allregs & ~regcon.mvar;
2977                 if (isbyte)
2978                     m &= BYTEREGS;
2979                 if (m & (m - 1))    // if more than one free register
2980                     goto L2;
2981             }
2982             if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) &&
2983                 !boolres(e2) && !evalinregister(e1))
2984             {
2985                 retregs = mPSW;
2986                 scodelem(cdb,e1,&retregs,0,false);
2987                 freenode(e2);
2988                 break;
2989             }
2990             if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW)
2991             {
2992                 retregs |= mPSW;
2993                 scodelem(cdb,e1,&retregs,0,false);
2994                 freenode(e2);
2995                 break;
2996             }
2997             scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2998             if (sz == 1)
2999             {
3000                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3001                 cs.Irm = modregrm(3,7,reg & 7);
3002                 if (reg & 8)
3003                     cs.Irex |= REX_B;
3004                 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg)
3005                 {   assert(reg < 4);
3006                     cs.Irm |= 4;                    // use upper register half
3007                 }
3008                 if (I64 && reg >= 4)
3009                     cs.Irex |= REX;                 // address byte registers
3010             }
3011             else if (sz <= REGSIZE)
3012             {   // CMP reg,const
3013                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3014                 rretregs = allregs & ~retregs;
3015                 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg))
3016                 {
3017                     genregs(cdb,0x3B,reg,rreg);
3018                     code_orrex(cdb.last(), rex);
3019                     if (!I16)
3020                         cdb.last().Iflags |= cs.Iflags & CFopsize;
3021                     freenode(e2);
3022                     break;
3023                 }
3024                 cs.Irm = modregrm(3,7,reg & 7);
3025                 if (reg & 8)
3026                     cs.Irex |= REX_B;
3027             }
3028             else if (sz <= 2 * REGSIZE)
3029             {
3030                 reg = findregmsw(retregs);          // get reg that e1 is in
3031                 cs.Irm = modregrm(3,7,reg);
3032                 cdb.gen(&cs);                       // CMP reg,MSW
3033                 if (I32 && sz == 6)
3034                     cdb.last().Iflags |= CFopsize;  // seg is only 16 bits
3035                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3036 
3037                 reg = findreglsw(retregs);
3038                 cs.Irm = modregrm(3,7,reg);
3039                 if (e2.Eoper == OPconst)
3040                     cs.IEV2.Vint = e2.EV.Vlong;
3041                 else if (e2.Eoper == OPrelconst)
3042                 {   // Turn off CFseg, on CFoff
3043                     cs.Iflags ^= CFseg | CFoff;
3044                     cs.IEV2.Voffset = e2.EV.Voffset;
3045                 }
3046                 else
3047                     assert(0);
3048             }
3049             else
3050                 assert(0);
3051             cdb.gen(&cs);                         // CMP sucreg,LSW
3052             freenode(e2);
3053             break;
3054 
3055         case OPind:
3056             if (e2.Ecount)
3057                 goto L2;
3058             goto L5;
3059 
3060         case OPvar:
3061             if (config.exe & (EX_OSX | EX_OSX64))
3062             {
3063                 if (movOnly(e2))
3064                     goto L2;
3065             }
3066             if ((e1.Eoper == OPvar &&
3067                  isregvar(e2,&rretregs,&reg) &&
3068                  sz <= REGSIZE
3069                 ) ||
3070                 (e1.Eoper == OPind &&
3071                  isregvar(e2,&rretregs,&reg) &&
3072                  !evalinregister(e1) &&
3073                  sz <= REGSIZE
3074                 )
3075                )
3076             {
3077                 // CMP EA,e2
3078                 getlvalue(cdb,&cs,e1,RMload);
3079                 freenode(e1);
3080                 cs.Iop = 0x39 ^ isbyte ^ reverse;
3081                 code_newreg(&cs,reg);
3082                 if (I64 && isbyte && reg >= 4)
3083                     cs.Irex |= REX;                 // address byte registers
3084                 cdb.gen(&cs);
3085                 freenode(e2);
3086                 break;
3087             }
3088           L5:
3089             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3090             if (sz <= REGSIZE)                      // CMP reg,EA
3091             {
3092                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3093                 uint opsize = cs.Iflags & CFopsize;
3094                 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0);
3095                 code_orflag(cdb.last(),opsize);
3096             }
3097             else if (sz <= 2 * REGSIZE)
3098             {
3099                 reg = findregmsw(retregs);   // get reg that e1 is in
3100                 // CMP reg,EA
3101                 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0);
3102                 if (I32 && sz == 6)
3103                     cdb.last().Iflags |= CFopsize;        // seg is only 16 bits
3104                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3105                 reg = findreglsw(retregs);
3106                 if (e2.Eoper == OPind)
3107                 {
3108                     NEWREG(cs.Irm,reg);
3109                     getlvalue_lsw(&cs);
3110                     cdb.gen(&cs);
3111                 }
3112                 else
3113                     loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0);
3114             }
3115             else
3116                 assert(0);
3117             freenode(e2);
3118             break;
3119     }
3120     cdb.append(ce);
3121 
3122 L3:
3123     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3124     {
3125         if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00))
3126         {
3127             regm_t resregs = retregs;
3128             if (!I64)
3129             {
3130                 resregs &= BYTEREGS;
3131                 if (!resregs)
3132                     resregs = BYTEREGS;
3133             }
3134             allocreg(cdb,&resregs,&reg,TYint);
3135             cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg
3136             if (I64 && reg >= 4)
3137                 code_orrex(cdb.last(),REX);
3138             if (tysize(e.Ety) > 1)
3139             {
3140                 genregs(cdb,MOVZXb,reg,reg);       // MOVZX reg,reg
3141                 if (I64 && sz == 8)
3142                     code_orrex(cdb.last(),REX_W);
3143                 if (I64 && reg >= 4)
3144                     code_orrex(cdb.last(),REX);
3145             }
3146             *pretregs &= ~mPSW;
3147             fixresult(cdb,e,resregs,pretregs);
3148         }
3149         else
3150         {
3151             code *nop = null;
3152             regm_t save = regcon.immed.mval;
3153             allocreg(cdb,&retregs,&reg,TYint);
3154             regcon.immed.mval = save;
3155             if ((*pretregs & mPSW) == 0 &&
3156                 (jop == JC || jop == JNC))
3157             {
3158                 getregs(cdb,retregs);
3159                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
3160                 if (rex || flag & REX_W)
3161                     code_orrex(cdb.last(), REX_W);
3162                 if (flag)
3163                 { }                                         // cdcond() will handle it
3164                 else if (jop == JNC)
3165                 {
3166                     if (I64)
3167                     {
3168                         cdb.gen2(0xFF,modregrmx(3,0,reg));  // INC reg
3169                         code_orrex(cdb.last(), rex);
3170                     }
3171                     else
3172                         cdb.gen1(0x40 + reg);               // INC reg
3173                 }
3174                 else
3175                 {
3176                     cdb.gen2(0xF7,modregrmx(3,3,reg));      // NEG reg
3177                     code_orrex(cdb.last(), rex);
3178                 }
3179             }
3180             else if (I64 && sz == 8)
3181             {
3182                 assert(!flag);
3183                 movregconst(cdb,reg,1,64|8);   // MOV reg,1
3184                 nop = gennop(nop);
3185                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3186                                                             // MOV reg,0
3187                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64);
3188                 regcon.immed.mval &= ~mask(reg);
3189             }
3190             else
3191             {
3192                 assert(!flag);
3193                 movregconst(cdb,reg,1,8);      // MOV reg,1
3194                 nop = gennop(nop);
3195                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3196                                                             // MOV reg,0
3197                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0);
3198                 regcon.immed.mval &= ~mask(reg);
3199             }
3200             *pretregs = retregs;
3201             cdb.append(nop);
3202         }
3203     }
3204 ret:
3205     { }
3206 }
3207 
3208 
3209 /**********************************
3210  * Generate code for signed compare of longs.
3211  * Input:
3212  *      targ    block* or code*
3213  */
3214 
3215 @trusted
3216 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ)
3217 {
3218                                          // <=  >   <   >=
3219     static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
3220     static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];
3221 
3222     //printf("longcmp(e = %p)\n", e);
3223     elem *e1 = e.EV.E1;
3224     elem *e2 = e.EV.E2;
3225     OPER op = e.Eoper;
3226 
3227     // See if we should swap operands
3228     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
3229     {
3230         e1 = e.EV.E2;
3231         e2 = e.EV.E1;
3232         op = swaprel(op);
3233     }
3234 
3235     code cs;
3236     cs.Iflags = 0;
3237     cs.Irex = 0;
3238 
3239     code *ce = gennop(null);
3240     regm_t retregs = ALLREGS;
3241     regm_t rretregs;
3242     reg_t reg,rreg;
3243 
3244     uint jop = jopmsw[op - OPle];
3245     if (!(jcond & 1)) jop ^= (JL ^ JG);                   // toggle jump condition
3246     CodeBuilder cdbjmp;
3247     cdbjmp.ctor();
3248     genjmp(cdbjmp,jop,fltarg, cast(block *) targ);             // Jx targ
3249     genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce);   // Jy nop
3250 
3251     switch (e2.Eoper)
3252     {
3253         default:
3254         L2:
3255             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3256             rretregs = ALLREGS & ~retregs;
3257             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
3258             cse_flush(cdb,1);
3259             // Compare MSW, if they're equal then compare the LSW
3260             reg = findregmsw(retregs);
3261             rreg = findregmsw(rretregs);
3262             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3263             cdb.append(cdbjmp);
3264 
3265             reg = findreglsw(retregs);
3266             rreg = findreglsw(rretregs);
3267             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3268             break;
3269 
3270         case OPconst:
3271             cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);            // MSW first
3272             cs.IFL2 = FLconst;
3273             cs.Iop = 0x81;
3274 
3275             /* if ((e1 is data or a '*' reference) and it's not a
3276              * common subexpression
3277              */
3278 
3279             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
3280                  e1.Eoper == OPind) &&
3281                 !evalinregister(e1))
3282             {
3283                 getlvalue(cdb,&cs,e1,0);
3284                 freenode(e1);
3285                 if (evalinregister(e2))
3286                 {
3287                     retregs = idxregm(&cs);
3288                     if ((cs.Iflags & CFSEG) == CFes)
3289                             retregs |= mES;         // take no chances
3290                     rretregs = ALLREGS & ~retregs;
3291                     scodelem(cdb,e2,&rretregs,retregs,true);
3292                     cse_flush(cdb,1);
3293                     rreg = findregmsw(rretregs);
3294                     cs.Iop = 0x39;
3295                     cs.Irm |= modregrm(0,rreg,0);
3296                     getlvalue_msw(&cs);
3297                     cdb.gen(&cs);           // CMP EA+2,rreg
3298                     cdb.append(cdbjmp);
3299                     rreg = findreglsw(rretregs);
3300                     NEWREG(cs.Irm,rreg);
3301                 }
3302                 else
3303                 {
3304                     cse_flush(cdb,1);
3305                     cs.Irm |= modregrm(0,7,0);
3306                     getlvalue_msw(&cs);
3307                     cdb.gen(&cs);           // CMP EA+2,const
3308                     cdb.append(cdbjmp);
3309                     cs.IEV2.Vint = e2.EV.Vlong;
3310                     freenode(e2);
3311                 }
3312                 getlvalue_lsw(&cs);
3313                 cdb.gen(&cs);                   // CMP EA,rreg/const
3314                 break;
3315             }
3316             if (evalinregister(e2))
3317                 goto L2;
3318 
3319             scodelem(cdb,e1,&retregs,0,true);    // compute left leaf
3320             cse_flush(cdb,1);
3321             reg = findregmsw(retregs);              // get reg that e1 is in
3322             cs.Irm = modregrm(3,7,reg);
3323 
3324             cdb.gen(&cs);                           // CMP reg,MSW
3325             cdb.append(cdbjmp);
3326             reg = findreglsw(retregs);
3327             cs.Irm = modregrm(3,7,reg);
3328             cs.IEV2.Vint = e2.EV.Vlong;
3329             cdb.gen(&cs);                           // CMP sucreg,LSW
3330             freenode(e2);
3331             break;
3332 
3333         case OPvar:
3334             if (!e1.Ecount && e1.Eoper == OPs32_64)
3335             {
3336                 reg_t msreg;
3337 
3338                 retregs = allregs;
3339                 scodelem(cdb,e1.EV.E1,&retregs,0,true);
3340                 freenode(e1);
3341                 reg = findreg(retregs);
3342                 retregs = allregs & ~retregs;
3343                 allocreg(cdb,&retregs,&msreg,TYint);
3344                 genmovreg(cdb,msreg,reg);                  // MOV msreg,reg
3345                 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3346                 cse_flush(cdb,1);
3347                 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0);
3348                 cdb.append(cdbjmp);
3349                 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0);
3350                 freenode(e2);
3351             }
3352             else
3353             {
3354                 scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
3355                 cse_flush(cdb,1);
3356                 reg = findregmsw(retregs);   // get reg that e1 is in
3357                 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0);
3358                 cdb.append(cdbjmp);
3359                 reg = findreglsw(retregs);
3360                 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0);
3361                 freenode(e2);
3362             }
3363             break;
3364     }
3365 
3366     jop = joplsw[op - OPle];
3367     if (!(jcond & 1)) jop ^= 1;                           // toggle jump condition
3368     genjmp(cdb,jop,fltarg,cast(block *) targ);   // Jcond targ
3369 
3370     cdb.append(ce);
3371     freenode(e);
3372 }
3373 
3374 /*****************************
3375  * Do conversions.
3376  * Depends on OPd_s32 and CLIB.dbllng being in sequence.
3377  */
3378 
3379 @trusted
3380 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
3381 {
3382     //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs));
3383     //elem_print(e);
3384 
3385     static immutable ubyte[2][16] clib =
3386     [
3387         [ OPd_s32,        CLIB.dbllng   ],
3388         [ OPs32_d,        CLIB.lngdbl   ],
3389         [ OPd_s16,        CLIB.dblint   ],
3390         [ OPs16_d,        CLIB.intdbl   ],
3391         [ OPd_u16,        CLIB.dbluns   ],
3392         [ OPu16_d,        CLIB.unsdbl   ],
3393         [ OPd_u32,        CLIB.dblulng  ],
3394         [ OPu32_d,        CLIB.ulngdbl  ],
3395         [ OPd_s64,        CLIB.dblllng  ],
3396         [ OPs64_d,        CLIB.llngdbl  ],
3397         [ OPd_u64,        CLIB.dblullng ],
3398         [ OPu64_d,        CLIB.ullngdbl ],
3399         [ OPd_f,          CLIB.dblflt   ],
3400         [ OPf_d,          CLIB.fltdbl   ],
3401         [ OPvp_fp,        CLIB.vptrfptr ],
3402         [ OPcvp_fp,       CLIB.cvptrfptr]
3403     ];
3404 
3405     if (!*pretregs)
3406     {
3407         codelem(cdb,e.EV.E1,pretregs,false);
3408         return;
3409     }
3410 
3411     regm_t retregs;
3412     if (config.inline8087)
3413     {
3414         switch (e.Eoper)
3415         {
3416             case OPld_d:
3417             case OPd_ld:
3418             {
3419                 if (tycomplex(e.EV.E1.Ety))
3420                 {
3421             Lcomplex:
3422                     regm_t retregsx = mST01 | (*pretregs & mPSW);
3423                     codelem(cdb,e.EV.E1, &retregsx, false);
3424                     fixresult_complex87(cdb, e, retregsx, pretregs);
3425                     return;
3426                 }
3427                 regm_t retregsx = mST0 | (*pretregs & mPSW);
3428                 codelem(cdb,e.EV.E1, &retregsx, false);
3429                 fixresult87(cdb, e, retregsx, pretregs);
3430                 return;
3431             }
3432 
3433             case OPf_d:
3434             case OPd_f:
3435                 if (tycomplex(e.EV.E1.Ety))
3436                     goto Lcomplex;
3437                 if (config.fpxmmregs && *pretregs & XMMREGS)
3438                 {
3439                     xmmcnvt(cdb, e, pretregs);
3440                     return;
3441                 }
3442 
3443                 /* if won't do us much good to transfer back and        */
3444                 /* forth between 8088 registers and 8087 registers      */
3445                 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs))
3446                 {
3447                     retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety);
3448                     if (retregs & (mXMM1 | mXMM0 |mST01 | mST0))       // if return in ST0
3449                     {
3450                         codelem(cdb,e.EV.E1,pretregs,false);
3451                         if (*pretregs & mST0)
3452                             note87(e, 0, 0);
3453                         return;
3454                     }
3455                     else
3456                         break;
3457                 }
3458                 goto Lload87;
3459 
3460             case OPs64_d:
3461                 if (!I64)
3462                     goto Lload87;
3463                 goto case OPs32_d;
3464 
3465             case OPs32_d:
3466                 if (config.fpxmmregs && *pretregs & XMMREGS)
3467                 {
3468                     xmmcnvt(cdb, e, pretregs);
3469                     return;
3470                 }
3471                 goto Lload87;
3472 
3473             case OPs16_d:
3474             case OPu16_d:
3475             Lload87:
3476                 load87(cdb,e,0,pretregs,null,-1);
3477                 return;
3478 
3479             case OPu32_d:
3480                 if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
3481                 {
3482                     xmmcnvt(cdb,e,pretregs);
3483                     return;
3484                 }
3485                 else if (!I16)
3486                 {
3487                     regm_t retregsx = ALLREGS;
3488                     codelem(cdb,e.EV.E1, &retregsx, false);
3489                     reg_t reg = findreg(retregsx);
3490                     cdb.genfltreg(STO, reg, 0);
3491                     regwithvalue(cdb,ALLREGS,0,&reg,0);
3492                     cdb.genfltreg(STO, reg, 4);
3493 
3494                     push87(cdb);
3495                     cdb.genfltreg(0xDF,5,0);     // FILD m64int
3496 
3497                     regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/;
3498                     fixresult87(cdb, e, retregsy, pretregs);
3499                     return;
3500                 }
3501                 break;
3502 
3503             case OPd_s64:
3504                 if (!I64)
3505                     goto Lcnvt87;
3506                 goto case OPd_s32;
3507 
3508             case OPd_s16:
3509             case OPd_s32:
3510                 if (config.fpxmmregs)
3511                 {
3512                     xmmcnvt(cdb,e,pretregs);
3513                     return;
3514                 }
3515                 goto Lcnvt87;
3516 
3517             case OPd_u16:
3518             Lcnvt87:
3519                 cnvt87(cdb,e,pretregs);
3520                 return;
3521 
3522             case OPd_u32:               // use subroutine, not 8087
3523                 if (I64 && config.fpxmmregs)
3524                 {
3525                     xmmcnvt(cdb,e,pretregs);
3526                     return;
3527                 }
3528                 if (I32 || I64)
3529                 {
3530                     cdd_u32(cdb,e,pretregs);
3531                     return;
3532                 }
3533                 if (config.exe & EX_posix)
3534                 {
3535                     retregs = mST0;
3536                 }
3537                 else
3538                 {
3539                     retregs = DOUBLEREGS;
3540                 }
3541                 goto L1;
3542 
3543             case OPd_u64:
3544                 if (I32 || I64)
3545                 {
3546                     cdd_u64(cdb,e,pretregs);
3547                     return;
3548                 }
3549                 retregs = DOUBLEREGS;
3550                 goto L1;
3551 
3552             case OPu64_d:
3553                 if (*pretregs & mST0)
3554                 {
3555                     regm_t retregsx = I64 ? mAX : mAX|mDX;
3556                     codelem(cdb,e.EV.E1,&retregsx,false);
3557                     callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
3558                     return;
3559                 }
3560                 break;
3561 
3562             case OPld_u64:
3563             {
3564                 if (I32 || I64)
3565                 {
3566                     cdd_u64(cdb,e,pretregs);
3567                     return;
3568                 }
3569                 regm_t retregsx = mST0;
3570                 codelem(cdb,e.EV.E1,&retregsx,false);
3571                 callclib(cdb,e,CLIB.ld_u64,pretregs,0);
3572                 return;
3573             }
3574 
3575             default:
3576                 break;
3577         }
3578     }
3579     retregs = regmask(e.EV.E1.Ety, TYnfunc);
3580 L1:
3581     codelem(cdb,e.EV.E1,&retregs,false);
3582     for (int i = 0; 1; i++)
3583     {
3584         assert(i < clib.length);
3585         if (clib[i][0] == e.Eoper)
3586         {
3587             callclib(cdb,e,clib[i][1],pretregs,0);
3588             break;
3589         }
3590     }
3591 }
3592 
3593 
3594 /***************************
3595  * Convert short to long.
3596  * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64,
3597  * OPu64_128, OPs64_128
3598  */
3599 
3600 @trusted
3601 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3602 {
3603     reg_t reg;
3604     regm_t retregs;
3605 
3606     //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3607     int e1comsub = e.EV.E1.Ecount;
3608     ubyte op = e.Eoper;
3609     if ((*pretregs & (ALLREGS | mBP)) == 0)    // if don't need result in regs
3610     {
3611         codelem(cdb,e.EV.E1,pretregs,false);     // then conversion isn't necessary
3612         return;
3613     }
3614     else if (
3615              op == OPnp_fp ||
3616              (I16 && op == OPu16_32) ||
3617              (I32 && op == OPu32_64) ||
3618              (I64 && op == OPu64_128)
3619             )
3620     {
3621         /* Result goes into a register pair.
3622          * Zero extend by putting a zero into most significant reg.
3623          */
3624 
3625         regm_t retregsx = *pretregs & mLSW;
3626         assert(retregsx);
3627         tym_t tym1 = tybasic(e.EV.E1.Ety);
3628         codelem(cdb,e.EV.E1,&retregsx,false);
3629 
3630         regm_t regm = *pretregs & (mMSW & ALLREGS);
3631         if (regm == 0)                  // *pretregs could be mES
3632             regm = mMSW & ALLREGS;
3633         allocreg(cdb,&regm,&reg,TYint);
3634         if (e1comsub)
3635             getregs(cdb,retregsx);
3636         if (op == OPnp_fp)
3637         {
3638             int segreg;
3639 
3640             // BUG: what about pointers to functions?
3641             switch (tym1)
3642             {
3643                 case TYimmutPtr:
3644                 case TYnptr:    segreg = SEG_DS;        break;
3645                 case TYcptr:    segreg = SEG_CS;        break;
3646                 case TYsptr:    segreg = SEG_SS;        break;
3647                 default:        assert(0);
3648             }
3649             cdb.gen2(0x8C,modregrm(3,segreg,reg));  // MOV reg,segreg
3650         }
3651         else
3652             movregconst(cdb,reg,0,0);  // 0 extend
3653 
3654         fixresult(cdb,e,retregsx | regm,pretregs);
3655         return;
3656     }
3657     else if (I64 && op == OPu32_64)
3658     {
3659         elem *e1 = e.EV.E1;
3660         retregs = *pretregs;
3661         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3662         {
3663             code cs;
3664 
3665             allocreg(cdb,&retregs,&reg,TYint);
3666             loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs);  //  MOV Ereg,EA
3667             freenode(e1);
3668         }
3669         else
3670         {
3671             *pretregs &= ~mPSW;                 // flags are set by eval of e1
3672             codelem(cdb,e1,&retregs,false);
3673             /* Determine if high 32 bits are already 0
3674              */
3675             if (e1.Eoper == OPu16_32 && !e1.Ecount)
3676             {
3677             }
3678             else
3679             {
3680                 // Zero high 32 bits
3681                 getregs(cdb,retregs);
3682                 reg = findreg(retregs);
3683                 // Don't use x89 because that will get optimized away
3684                 genregs(cdb,LOD,reg,reg);  // MOV Ereg,Ereg
3685             }
3686         }
3687         fixresult(cdb,e,retregs,pretregs);
3688         return;
3689     }
3690     else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount)
3691     {
3692         /* Due to how e1 is calculated, the high 32 bits of the register
3693          * are already 0.
3694          */
3695         retregs = *pretregs;
3696         codelem(cdb,e.EV.E1,&retregs,false);
3697         fixresult(cdb,e,retregs,pretregs);
3698         return;
3699     }
3700     else if (!I16 && (op == OPs16_32 || op == OPu16_32) ||
3701               I64 && op == OPs32_64)
3702     {
3703         elem *e11;
3704         elem *e1 = e.EV.E1;
3705 
3706         if (e1.Eoper == OPu8_16 && !e1.Ecount &&
3707             ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount))
3708            )
3709         {
3710             code cs;
3711 
3712             retregs = *pretregs & BYTEREGS;
3713             if (!retregs)
3714                 retregs = BYTEREGS;
3715             allocreg(cdb,&retregs,&reg,TYint);
3716             movregconst(cdb,reg,0,0);                   //  XOR reg,reg
3717             loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs);  //  MOV regL,EA
3718             freenode(e11);
3719             freenode(e1);
3720         }
3721         else if (e1.Eoper == OPvar ||
3722             (e1.Eoper == OPind && !e1.Ecount))
3723         {
3724             code cs = void;
3725 
3726             if (I32 && op == OPu16_32 && config.flags4 & CFG4speed)
3727                 goto L2;
3728             retregs = *pretregs;
3729             allocreg(cdb,&retregs,&reg,TYint);
3730             const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA
3731             if (op == OPs32_64)
3732             {
3733                 assert(I64);
3734                 // MOVSXD reg,e1
3735                 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs);
3736                 code_orrex(cdb.last(), REX_W);
3737             }
3738             else
3739                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs);
3740             freenode(e1);
3741         }
3742         else
3743         {
3744         L2:
3745             retregs = *pretregs;
3746             if (op == OPs32_64)
3747                 retregs = mAX | (*pretregs & mPSW);
3748             *pretregs &= ~mPSW;             // flags are already set
3749             CodeBuilder cdbx;
3750             cdbx.ctor();
3751             codelem(cdbx,e1,&retregs,false);
3752             code *cx = cdbx.finish();
3753             cdb.append(cdbx);
3754             getregs(cdb,retregs);
3755             if (op == OPu16_32 && cx)
3756             {
3757                 cx = code_last(cx);
3758                 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) &&
3759                     mask(cx.Irm & 7) == retregs)
3760                 {
3761                     // Convert AND of a word to AND of a dword, zeroing upper word
3762                     if (cx.Irex & REX_B)
3763                         retregs = mask(8 | (cx.Irm & 7));
3764                     cx.Iflags &= ~CFopsize;
3765                     cx.IEV2.Vint &= 0xFFFF;
3766                     goto L1;
3767                 }
3768             }
3769             if (op == OPs16_32 && retregs == mAX)
3770                 cdb.gen1(0x98);         // CWDE
3771             else if (op == OPs32_64 && retregs == mAX)
3772             {
3773                 cdb.gen1(0x98);         // CDQE
3774                 code_orrex(cdb.last(), REX_W);
3775             }
3776             else
3777             {
3778                 reg = findreg(retregs);
3779                 if (config.flags4 & CFG4speed && op == OPu16_32)
3780                 {   // AND reg,0xFFFF
3781                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu);
3782                 }
3783                 else
3784                 {
3785                     opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg
3786                     genregs(cdb,iop,reg,reg);
3787                 }
3788             }
3789          L1:
3790             if (e1comsub)
3791                 getregs(cdb,retregs);
3792         }
3793         fixresult(cdb,e,retregs,pretregs);
3794         return;
3795     }
3796     else if (*pretregs & mPSW || config.target_cpu < TARGET_80286)
3797     {
3798         // OPs16_32, OPs32_64
3799         // CWD doesn't affect flags, so we can depend on the integer
3800         // math to provide the flags.
3801         retregs = mAX | mPSW;               // want integer result in AX
3802         *pretregs &= ~mPSW;                 // flags are already set
3803         codelem(cdb,e.EV.E1,&retregs,false);
3804         getregs(cdb,mDX);           // sign extend into DX
3805         cdb.gen1(0x99);                     // CWD/CDQ
3806         if (e1comsub)
3807             getregs(cdb,retregs);
3808         fixresult(cdb,e,mDX | retregs,pretregs);
3809         return;
3810     }
3811     else
3812     {
3813         // OPs16_32, OPs32_64, OPs64_128
3814         uint msreg,lsreg;
3815 
3816         retregs = *pretregs & mLSW;
3817         assert(retregs);
3818         codelem(cdb,e.EV.E1,&retregs,false);
3819         retregs |= *pretregs & mMSW;
3820         allocreg(cdb,&retregs,&reg,e.Ety);
3821         msreg = findregmsw(retregs);
3822         lsreg = findreglsw(retregs);
3823         genmovreg(cdb,msreg,lsreg);                // MOV msreg,lsreg
3824         assert(config.target_cpu >= TARGET_80286);              // 8088 can't handle SAR reg,imm8
3825         cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3826         fixresult(cdb,e,retregs,pretregs);
3827         return;
3828     }
3829 }
3830 
3831 
3832 /***************************
3833  * Convert byte to int.
3834  * For OPu8_16 and OPs8_16.
3835  */
3836 
3837 @trusted
3838 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3839 {
3840     regm_t retregs;
3841     char size;
3842 
3843     if ((*pretregs & (ALLREGS | mBP | XMMREGS)) == 0) // if don't need result in regs
3844     {
3845         codelem(cdb,e.EV.E1,pretregs,false);      // then conversion isn't necessary
3846         return;
3847     }
3848 
3849     //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3850     char op = e.Eoper;
3851     elem *e1 = e.EV.E1;
3852     if (e1.Eoper == OPcomma)
3853         docommas(cdb,&e1);
3854     if (!I16)
3855     {
3856         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3857         {
3858             code cs;
3859 
3860             regm_t retregsx = *pretregs;
3861             reg_t reg;
3862             allocreg(cdb,&retregsx,&reg,TYint);
3863             if (config.flags4 & CFG4speed &&
3864                 op == OPu8_16 && mask(reg) & BYTEREGS &&
3865                 config.target_cpu < TARGET_PentiumPro)
3866             {
3867                 movregconst(cdb,reg,0,0);                 //  XOR reg,reg
3868                 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); //  MOV regL,EA
3869             }
3870             else
3871             {
3872                 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA
3873                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx);
3874             }
3875             freenode(e1);
3876             fixresult(cdb,e,retregsx,pretregs);
3877             return;
3878         }
3879         size = tysize(e.Ety);
3880         retregs = *pretregs & BYTEREGS;
3881         if (retregs == 0)
3882             retregs = BYTEREGS;
3883         retregs |= *pretregs & mPSW;
3884         *pretregs &= ~mPSW;
3885     }
3886     else
3887     {
3888         if (op == OPu8_16)              // if uint conversion
3889         {
3890             retregs = *pretregs & BYTEREGS;
3891             if (retregs == 0)
3892                 retregs = BYTEREGS;
3893         }
3894         else
3895         {
3896             // CBW doesn't affect flags, so we can depend on the integer
3897             // math to provide the flags.
3898             retregs = mAX | (*pretregs & mPSW); // want integer result in AX
3899         }
3900     }
3901 
3902     CodeBuilder cdb1;
3903     cdb1.ctor();
3904     codelem(cdb1,e1,&retregs,false);
3905     code *c1 = cdb1.finish();
3906     cdb.append(cdb1);
3907     reg_t reg = findreg(retregs);
3908     code *c;
3909     if (!c1)
3910         goto L1;
3911 
3912     // If previous instruction is an AND bytereg,value
3913     c = cdb.last();
3914     if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) &&
3915         (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0))
3916     {
3917         if (*pretregs & mPSW)
3918             c.Iflags |= CFpsw;
3919         c.Iop |= 1;                    // convert to word operation
3920         c.IEV2.Vuns &= 0xFF;           // dump any high order bits
3921         *pretregs &= ~mPSW;             // flags already set
3922     }
3923     else
3924     {
3925      L1:
3926         if (!I16)
3927         {
3928             if (op == OPs8_16 && reg == AX && size == 2)
3929             {
3930                 cdb.gen1(0x98);                  // CBW
3931                 cdb.last().Iflags |= CFopsize;  // don't do a CWDE
3932             }
3933             else
3934             {
3935                 // We could do better by not forcing the src and dst
3936                 // registers to be the same.
3937 
3938                 if (config.flags4 & CFG4speed && op == OPu8_16)
3939                 {   // AND reg,0xFF
3940                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFF);
3941                 }
3942                 else
3943                 {
3944                     opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg
3945                     genregs(cdb,iop,reg,reg);
3946                     if (I64 && reg >= 4)
3947                         code_orrex(cdb.last(), REX);
3948                 }
3949             }
3950         }
3951         else
3952         {
3953             if (op == OPu8_16)
3954                 genregs(cdb,0x30,reg+4,reg+4);  // XOR regH,regH
3955             else
3956             {
3957                 cdb.gen1(0x98);                 // CBW
3958                 *pretregs &= ~mPSW;             // flags already set
3959             }
3960         }
3961     }
3962     getregs(cdb,retregs);
3963     fixresult(cdb,e,retregs,pretregs);
3964 }
3965 
3966 
3967 /***************************
3968  * Convert long to short (OP32_16).
3969  * Get offset of far pointer (OPoffset).
3970  * Convert int to byte (OP16_8).
3971  * Convert long long to long (OP64_32).
3972  * OP128_64
3973  */
3974 
3975 @trusted
3976 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3977 {
3978     debug
3979     {
3980         switch (e.Eoper)
3981         {
3982             case OP32_16:
3983             case OPoffset:
3984             case OP16_8:
3985             case OP64_32:
3986             case OP128_64:
3987                 break;
3988 
3989             default:
3990                 assert(0);
3991         }
3992     }
3993 
3994     regm_t retregs;
3995     if (e.Eoper == OP16_8)
3996     {
3997         retregs = *pretregs ? BYTEREGS : 0;
3998         codelem(cdb,e.EV.E1,&retregs,false);
3999     }
4000     else
4001     {
4002         if (e.EV.E1.Eoper == OPrelconst)
4003             offsetinreg(cdb,e.EV.E1,&retregs);
4004         else
4005         {
4006             retregs = *pretregs ? ALLREGS : 0;
4007             codelem(cdb,e.EV.E1,&retregs,false);
4008             bool isOff = e.Eoper == OPoffset;
4009             if (I16 ||
4010                 I32 && (isOff || e.Eoper == OP64_32) ||
4011                 I64 && (isOff || e.Eoper == OP128_64))
4012                 retregs &= mLSW;                // want LSW only
4013         }
4014     }
4015 
4016     /* We "destroy" a reg by assigning it the result of a new e, even
4017      * though the values are the same. Weakness of our CSE strategy that
4018      * a register can only hold the contents of one elem at a time.
4019      */
4020     if (e.Ecount)
4021         getregs(cdb,retregs);
4022     else
4023         useregs(retregs);
4024 
4025     debug
4026     if (!(!*pretregs || retregs))
4027     {
4028         printf("%s *pretregs = %s, retregs = %s, e = %p\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs),e);
4029     }
4030 
4031     assert(!*pretregs || retregs);
4032     fixresult(cdb,e,retregs,pretregs);  // lsw only
4033 }
4034 
4035 /**********************************************
4036  * Get top 32 bits of 64 bit value (I32)
4037  * or top 16 bits of 32 bit value (I16)
4038  * or top 64 bits of 128 bit value (I64).
4039  * OPmsw
4040  */
4041 
4042 @trusted
4043 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4044 {
4045     assert(e.Eoper == OPmsw);
4046 
4047     regm_t retregs = *pretregs ? ALLREGS : 0;
4048     codelem(cdb,e.EV.E1,&retregs,false);
4049     retregs &= mMSW;                    // want MSW only
4050 
4051     /* We "destroy" a reg by assigning it the result of a new e, even
4052      * though the values are the same. Weakness of our CSE strategy that
4053      * a register can only hold the contents of one elem at a time.
4054      */
4055     if (e.Ecount)
4056         getregs(cdb,retregs);
4057     else
4058         useregs(retregs);
4059 
4060     debug
4061     if (!(!*pretregs || retregs))
4062     {
4063         printf("%s *pretregs = %s, retregs = %s\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs));
4064         elem_print(e);
4065     }
4066 
4067     assert(!*pretregs || retregs);
4068     fixresult(cdb,e,retregs,pretregs);  // msw only
4069 }
4070 
4071 
4072 
4073 /******************************
4074  * Handle operators OPinp and OPoutp.
4075  */
4076 
4077 @trusted
4078 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4079 {
4080     //printf("cdport\n");
4081     ubyte op = 0xE4;            // root of all IN/OUT opcodes
4082     elem *e1 = e.EV.E1;
4083 
4084     // See if we can use immediate mode of IN/OUT opcodes
4085     ubyte port;
4086     if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 &&
4087         (!evalinregister(e1) || regcon.mvar & mDX))
4088     {
4089         port = cast(ubyte)e1.EV.Vuns;
4090         freenode(e1);
4091     }
4092     else
4093     {
4094         regm_t retregs = mDX;           // port number is always DX
4095         codelem(cdb,e1,&retregs,false);
4096         op |= 0x08;                     // DX version of opcode
4097         port = 0;                       // not logically needed, but
4098                                         // quiets "uninitialized var" complaints
4099     }
4100 
4101     uint sz;
4102     if (e.Eoper == OPoutp)
4103     {
4104         sz = tysize(e.EV.E2.Ety);
4105         regm_t retregs = mAX;           // byte/word to output is in AL/AX
4106         scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true);
4107         op |= 0x02;                     // OUT opcode
4108     }
4109     else // OPinp
4110     {
4111         getregs(cdb,mAX);
4112         sz = tysize(e.Ety);
4113     }
4114 
4115     if (sz != 1)
4116         op |= 1;                        // word operation
4117     cdb.genc2(op,0,port);               // IN/OUT AL/AX,DX/port
4118     if (op & 1 && sz != REGSIZE)        // if need size override
4119         cdb.last().Iflags |= CFopsize;
4120     regm_t retregs = mAX;
4121     fixresult(cdb,e,retregs,pretregs);
4122 }
4123 
4124 /************************
4125  * Generate code for an asm elem.
4126  */
4127 
4128 @trusted
4129 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4130 {
4131     // Assume only regs normally destroyed by a function are destroyed
4132     getregs(cdb,(ALLREGS | mES) & ~fregsaved);
4133     cdb.genasm(cast(char *)e.EV.Vstring, cast(uint) e.EV.Vstrlen);
4134     fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs);
4135 }
4136 
4137 /************************
4138  * Generate code for OPnp_f16p and OPf16p_np.
4139  */
4140 
4141 @trusted
4142 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4143 {
4144     code *cnop;
4145     code cs;
4146 
4147     assert(I32);
4148     codelem(cdb,e.EV.E1,pretregs,false);
4149     reg_t reg = findreg(*pretregs);
4150     getregs(cdb,*pretregs);      // we will destroy the regs
4151 
4152     cs.Iop = 0xC1;
4153     cs.Irm = modregrm(3,0,reg);
4154     cs.Iflags = 0;
4155     cs.Irex = 0;
4156     cs.IFL2 = FLconst;
4157     cs.IEV2.Vuns = 16;
4158 
4159     cdb.gen(&cs);                       // ROL ereg,16
4160     cs.Irm |= modregrm(0,1,0);
4161     cdb.gen(&cs);                       // ROR ereg,16
4162     cs.IEV2.Vuns = 3;
4163     cs.Iflags |= CFopsize;
4164 
4165     if (e.Eoper == OPnp_f16p)
4166     {
4167         /*      OR  ereg,ereg
4168                 JE  L1
4169                 ROR ereg,16
4170                 SHL reg,3
4171                 MOV rx,SS
4172                 AND rx,3                ;mask off CPL bits
4173                 OR  rl,4                ;run on LDT bit
4174                 OR  regl,rl
4175                 ROL ereg,16
4176             L1: NOP
4177          */
4178         reg_t rx;
4179 
4180         regm_t retregs = BYTEREGS & ~*pretregs;
4181         allocreg(cdb,&retregs,&rx,TYint);
4182         cnop = gennop(null);
4183         int jop = JCXZ;
4184         if (reg != CX)
4185         {
4186             gentstreg(cdb,reg);
4187             jop = JE;
4188         }
4189         genjmp(cdb,jop,FLcode, cast(block *)cnop);  // Jop L1
4190         NEWREG(cs.Irm,4);
4191         cdb.gen(&cs);                                   // SHL reg,3
4192         genregs(cdb,0x8C,2,rx);            // MOV rx,SS
4193         int isbyte = (mask(reg) & BYTEREGS) == 0;
4194         cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3);      // AND rl,3
4195         cdb.genc2(0x80,modregrm(3,1,rx),4);             // OR  rl,4
4196         genregs(cdb,0x0A | isbyte,reg,rx);   // OR  regl,rl
4197     }
4198     else // OPf16p_np
4199     {
4200         /*      ROR ereg,16
4201                 SHR reg,3
4202                 ROL ereg,16
4203          */
4204 
4205         cs.Irm |= modregrm(0,5,0);
4206         cdb.gen(&cs);                                   // SHR reg,3
4207         cnop = null;
4208     }
4209 }
4210 
4211 /*************************
4212  * Generate code for OPbtst
4213  */
4214 
4215 @trusted
4216 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4217 {
4218     regm_t retregs;
4219     reg_t reg;
4220 
4221     //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
4222 
4223     opcode_t op = 0xA3;                        // BT EA,value
4224     int mode = 4;
4225 
4226     elem *e1 = e.EV.E1;
4227     elem *e2 = e.EV.E2;
4228     code cs;
4229     cs.Iflags = 0;
4230 
4231     if (*pretregs == 0)                   // if don't want result
4232     {
4233         codelem(cdb,e1,pretregs,false);  // eval left leaf
4234         *pretregs = 0;                    // in case they got set
4235         codelem(cdb,e2,pretregs,false);
4236         return;
4237     }
4238 
4239     regm_t idxregs;
4240     if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar)
4241     {
4242         getlvalue(cdb, &cs, e1, RMload);    // get addressing mode
4243         idxregs = idxregm(&cs);             // mask if index regs used
4244     }
4245     else
4246     {
4247         retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs;
4248         codelem(cdb,e1, &retregs, false);
4249         reg = findreg(retregs);
4250         cs.Irm = modregrm(3,0,reg & 7);
4251         cs.Iflags = 0;
4252         cs.Irex = 0;
4253         if (reg & 8)
4254             cs.Irex |= REX_B;
4255         idxregs = retregs;
4256     }
4257 
4258     tym_t ty1 = tybasic(e1.Ety);
4259     const sz = tysize(e1.Ety);
4260     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4261 
4262 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4263     if (e2.Eoper == OPconst)
4264     {
4265         cs.Iop = 0x0FBA;                         // BT rm,imm8
4266         cs.Irm |= modregrm(0,mode,0);
4267         cs.Iflags |= CFpsw | word;
4268         cs.IFL2 = FLconst;
4269         if (sz <= SHORTSIZE)
4270         {
4271             cs.IEV2.Vint = e2.EV.Vint & 15;
4272         }
4273         else if (sz == 4)
4274         {
4275             cs.IEV2.Vint = e2.EV.Vint & 31;
4276         }
4277         else
4278         {
4279             cs.IEV2.Vint = e2.EV.Vint & 63;
4280             if (I64)
4281                 cs.Irex |= REX_W;
4282         }
4283         cdb.gen(&cs);
4284     }
4285     else
4286     {
4287         retregs = ALLREGS & ~idxregs;
4288 
4289         /* A register variable may not have its upper 32
4290          * bits 0, so pick a different register to force
4291          * a MOV which will clear it
4292          */
4293         if (I64 && sz == 8 && tysize(e2.Ety) == 4)
4294         {
4295             regm_t rregm;
4296             if (isregvar(e2, &rregm, null))
4297                 retregs &= ~rregm;
4298         }
4299 
4300         scodelem(cdb,e2,&retregs,idxregs,true);
4301         reg = findreg(retregs);
4302 
4303         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4304         code_newreg(&cs,reg);
4305         cs.Iflags |= CFpsw | word;
4306         if (I64 && _tysize[ty1] == 8)
4307             cs.Irex |= REX_W;
4308         cdb.gen(&cs);
4309     }
4310 
4311     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4312     {
4313         if (tysize(e.Ety) == 1)
4314         {
4315             assert(I64 || retregs & BYTEREGS);
4316             allocreg(cdb,&retregs,&reg,TYint);
4317             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4318             if (I64 && reg >= 4)
4319                 code_orrex(cdb.last(), REX);
4320             *pretregs = retregs;
4321         }
4322         else
4323         {
4324             code *cnop = null;
4325             regm_t save = regcon.immed.mval;
4326             allocreg(cdb,&retregs,&reg,TYint);
4327             regcon.immed.mval = save;
4328             if ((*pretregs & mPSW) == 0)
4329             {
4330                 getregs(cdb,retregs);
4331                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
4332                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4333             }
4334             else
4335             {
4336                 movregconst(cdb,reg,1,8);      // MOV reg,1
4337                 cnop = gennop(null);
4338                 genjmp(cdb,JC,FLcode, cast(block *) cnop);  // Jtrue nop
4339                                                             // MOV reg,0
4340                 movregconst(cdb,reg,0,8);
4341                 regcon.immed.mval &= ~mask(reg);
4342             }
4343             *pretregs = retregs;
4344             cdb.append(cnop);
4345         }
4346     }
4347 }
4348 
4349 /*************************
4350  * Generate code for OPbt, OPbtc, OPbtr, OPbts
4351  */
4352 
4353 @trusted
4354 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4355 {
4356     //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs));
4357     regm_t retregs;
4358     reg_t reg;
4359     opcode_t op;
4360     int mode;
4361 
4362     switch (e.Eoper)
4363     {
4364         case OPbt:      op = 0xA3; mode = 4; break;
4365         case OPbtc:     op = 0xBB; mode = 7; break;
4366         case OPbtr:     op = 0xB3; mode = 6; break;
4367         case OPbts:     op = 0xAB; mode = 5; break;
4368 
4369         default:
4370             assert(0);
4371     }
4372 
4373     elem *e1 = e.EV.E1;
4374     elem *e2 = e.EV.E2;
4375     code cs;
4376     cs.Iflags = 0;
4377 
4378     getlvalue(cdb, &cs, e, RMload);      // get addressing mode
4379     if (e.Eoper == OPbt && *pretregs == 0)
4380     {
4381         codelem(cdb,e2,pretregs,false);
4382         return;
4383     }
4384 
4385     const ty1 = tybasic(e1.Ety);
4386     const ty2 = tybasic(e2.Ety);
4387     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4388     regm_t idxregs = idxregm(&cs);         // mask if index regs used
4389 
4390 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4391     if (e2.Eoper == OPconst)
4392     {
4393         cs.Iop = 0x0FBA;                         // BT rm,imm8
4394         cs.Irm |= modregrm(0,mode,0);
4395         cs.Iflags |= CFpsw | word;
4396         cs.IFL2 = FLconst;
4397         if (_tysize[ty1] == SHORTSIZE)
4398         {
4399             cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3;
4400             cs.IEV2.Vint = e2.EV.Vint & 15;
4401         }
4402         else if (_tysize[ty1] == 4)
4403         {
4404             cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3;
4405             cs.IEV2.Vint = e2.EV.Vint & 31;
4406         }
4407         else
4408         {
4409             cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3;
4410             cs.IEV2.Vint = e2.EV.Vint & 63;
4411             if (I64)
4412                 cs.Irex |= REX_W;
4413         }
4414         cdb.gen(&cs);
4415     }
4416     else
4417     {
4418         retregs = ALLREGS & ~idxregs;
4419         scodelem(cdb,e2,&retregs,idxregs,true);
4420         reg = findreg(retregs);
4421 
4422         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4423         code_newreg(&cs,reg);
4424         cs.Iflags |= CFpsw | word;
4425         if (_tysize[ty2] == 8 && I64)
4426             cs.Irex |= REX_W;
4427         cdb.gen(&cs);
4428     }
4429 
4430     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4431     {
4432         if (_tysize[e.Ety] == 1)
4433         {
4434             assert(I64 || retregs & BYTEREGS);
4435             allocreg(cdb,&retregs,&reg,TYint);
4436             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4437             if (I64 && reg >= 4)
4438                 code_orrex(cdb.last(), REX);
4439             *pretregs = retregs;
4440         }
4441         else
4442         {
4443             code *cnop = null;
4444             const save = regcon.immed.mval;
4445             allocreg(cdb,&retregs,&reg,TYint);
4446             regcon.immed.mval = save;
4447             if ((*pretregs & mPSW) == 0)
4448             {
4449                 getregs(cdb,retregs);
4450                 genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
4451                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4452             }
4453             else
4454             {
4455                 movregconst(cdb,reg,1,8);      // MOV reg,1
4456                 cnop = gennop(null);
4457                 genjmp(cdb,JC,FLcode, cast(block *) cnop);    // Jtrue nop
4458                                                             // MOV reg,0
4459                 movregconst(cdb,reg,0,8);
4460                 regcon.immed.mval &= ~mask(reg);
4461             }
4462             *pretregs = retregs;
4463             cdb.append(cnop);
4464         }
4465     }
4466 }
4467 
4468 /*************************************
4469  * Generate code for OPbsf and OPbsr.
4470  */
4471 
4472 @trusted
4473 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4474 {
4475     //printf("cdbscan()\n");
4476     //elem_print(e);
4477     if (!*pretregs)
4478     {
4479         codelem(cdb,e.EV.E1,pretregs,false);
4480         return;
4481     }
4482 
4483     const tyml = tybasic(e.EV.E1.Ety);
4484     const sz = _tysize[tyml];
4485     assert(sz == 2 || sz == 4 || sz == 8);
4486     code cs = void;
4487 
4488     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4489     {
4490         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4491     }
4492     else
4493     {
4494         regm_t retregs = allregs;
4495         codelem(cdb,e.EV.E1, &retregs, false);
4496         const reg = findreg(retregs);
4497         cs.Irm = modregrm(3,0,reg & 7);
4498         cs.Iflags = 0;
4499         cs.Irex = 0;
4500         if (reg & 8)
4501             cs.Irex |= REX_B;
4502     }
4503 
4504     regm_t retregs = *pretregs & allregs;
4505     if  (!retregs)
4506         retregs = allregs;
4507     reg_t reg;
4508     allocreg(cdb,&retregs, &reg, e.Ety);
4509 
4510     cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD;        // BSF/BSR reg,EA
4511     code_newreg(&cs, reg);
4512     if (!I16 && sz == SHORTSIZE)
4513         cs.Iflags |= CFopsize;
4514     cdb.gen(&cs);
4515     if (sz == 8)
4516         code_orrex(cdb.last(), REX_W);
4517 
4518     fixresult(cdb,e,retregs,pretregs);
4519 }
4520 
4521 /************************
4522  * OPpopcnt operator
4523  */
4524 
4525 @trusted
4526 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4527 {
4528     //printf("cdpopcnt()\n");
4529     //elem_print(e);
4530     assert(!I16);
4531     if (!*pretregs)
4532     {
4533         codelem(cdb,e.EV.E1,pretregs,false);
4534         return;
4535     }
4536 
4537     const tyml = tybasic(e.EV.E1.Ety);
4538 
4539     const sz = _tysize[tyml];
4540     assert(sz == 2 || sz == 4 || (sz == 8 && I64));     // no byte op
4541 
4542     code cs = void;
4543     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4544     {
4545         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4546     }
4547     else
4548     {
4549         regm_t retregs = allregs;
4550         codelem(cdb,e.EV.E1, &retregs, false);
4551         const reg = findreg(retregs);
4552         cs.Irm = modregrm(3,0,reg & 7);
4553         cs.Iflags = 0;
4554         cs.Irex = 0;
4555         if (reg & 8)
4556             cs.Irex |= REX_B;
4557     }
4558 
4559     regm_t retregs = *pretregs & allregs;
4560     if  (!retregs)
4561         retregs = allregs;
4562     reg_t reg;
4563     allocreg(cdb,&retregs, &reg, e.Ety);
4564 
4565     cs.Iop = POPCNT;            // POPCNT reg,EA
4566     code_newreg(&cs, reg);
4567     if (sz == SHORTSIZE)
4568         cs.Iflags |= CFopsize;
4569     if (*pretregs & mPSW)
4570         cs.Iflags |= CFpsw;
4571     cdb.gen(&cs);
4572     if (sz == 8)
4573         code_orrex(cdb.last(), REX_W);
4574     *pretregs &= mBP | ALLREGS;             // flags already set
4575 
4576     fixresult(cdb,e,retregs,pretregs);
4577 }
4578 
4579 
4580 /*******************************************
4581  * Generate code for OPpair, OPrpair.
4582  */
4583 
4584 @trusted
4585 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4586 {
4587     if (*pretregs == 0)                         // if don't want result
4588     {
4589         codelem(cdb,e.EV.E1,pretregs,false);     // eval left leaf
4590         *pretregs = 0;                          // in case they got set
4591         codelem(cdb,e.EV.E2,pretregs,false);
4592         return;
4593     }
4594 
4595     //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4596     //WRTYxx(e.Ety);printf("\n");
4597     //printf("Ecount = %d\n", e.Ecount);
4598 
4599     regm_t retregs = *pretregs;
4600     if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087)
4601     {
4602         if (config.fpxmmregs)
4603             retregs |= mXMM0 | mXMM1;
4604         else
4605             retregs |= mST01;
4606     }
4607 
4608     if (retregs & mST01)
4609     {
4610         loadPair87(cdb, e, pretregs);
4611         return;
4612     }
4613 
4614     regm_t regs1;
4615     regm_t regs2;
4616     if (retregs & XMMREGS)
4617     {
4618         retregs &= XMMREGS;
4619         const reg = findreg(retregs);
4620         regs1 = mask(reg);
4621         regs2 = mask(findreg(retregs & ~regs1));
4622     }
4623     else
4624     {
4625         retregs &= allregs;
4626         if  (!retregs)
4627             retregs = allregs;
4628         regs1 = retregs & mLSW;
4629         regs2 = retregs & mMSW;
4630     }
4631     if (e.Eoper == OPrpair)
4632     {
4633         // swap
4634         regs1 ^= regs2;
4635         regs2 ^= regs1;
4636         regs1 ^= regs2;
4637     }
4638     //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4639 
4640     codelem(cdb,e.EV.E1, &regs1, false);
4641     scodelem(cdb,e.EV.E2, &regs2, regs1, false);
4642 
4643     if (e.EV.E1.Ecount)
4644         getregs(cdb,regs1);
4645     if (e.EV.E2.Ecount)
4646         getregs(cdb,regs2);
4647 
4648     fixresult(cdb,e,regs1 | regs2,pretregs);
4649 }
4650 
4651 /*************************
4652  * Generate code for OPcmpxchg
4653  */
4654 
4655 @trusted
4656 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4657 {
4658     /* The form is:
4659      *     OPcmpxchg
4660      *    /     \
4661      * lvalue   OPparam
4662      *          /     \
4663      *        old     new
4664      */
4665 
4666     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
4667     elem *e1 = e.EV.E1;
4668     elem *e2 = e.EV.E2;
4669     assert(e2.Eoper == OPparam);
4670     assert(!e2.Ecount);
4671 
4672     const tyml = tybasic(e1.Ety);                   // type of lvalue
4673     const sz = _tysize[tyml];
4674 
4675     if (I32 && sz == 8)
4676     {
4677         regm_t retregsx = mDX|mAX;
4678         codelem(cdb,e2.EV.E1,&retregsx,false);          // [DX,AX] = e2.EV.E1
4679 
4680         regm_t retregs = mCX|mBX;
4681         scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false);  // [CX,BX] = e2.EV.E2
4682 
4683         code cs = void;
4684         getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX);        // get EA
4685 
4686         getregs(cdb,mDX|mAX);                 // CMPXCHG destroys these regs
4687 
4688         if (e1.Ety & mTYvolatile)
4689             cdb.gen1(LOCK);                           // LOCK prefix
4690         cs.Iop = 0x0FC7;                              // CMPXCHG8B EA
4691         cs.Iflags |= CFpsw;
4692         code_newreg(&cs,1);
4693         cdb.gen(&cs);
4694 
4695         assert(!e1.Ecount);
4696         freenode(e1);
4697     }
4698     else
4699     {
4700         const uint isbyte = (sz == 1);            // 1 for byte operation
4701         const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
4702         const uint rex = (I64 && sz == 8) ? REX_W : 0;
4703 
4704         regm_t retregsx = mAX;
4705         codelem(cdb,e2.EV.E1,&retregsx,false);       // AX = e2.EV.E1
4706 
4707         regm_t retregs = (ALLREGS | mBP) & ~mAX;
4708         scodelem(cdb,e2.EV.E2,&retregs,mAX,false);   // load rvalue in reg
4709 
4710         code cs = void;
4711         getlvalue(cdb,&cs,e1,mAX | retregs); // get EA
4712 
4713         getregs(cdb,mAX);                  // CMPXCHG destroys AX
4714 
4715         if (e1.Ety & mTYvolatile)
4716             cdb.gen1(LOCK);                        // LOCK prefix
4717         cs.Iop = 0x0FB1 ^ isbyte;                    // CMPXCHG EA,reg
4718         cs.Iflags |= CFpsw | word;
4719         cs.Irex |= rex;
4720         const reg = findreg(retregs);
4721         code_newreg(&cs,reg);
4722         cdb.gen(&cs);
4723 
4724         assert(!e1.Ecount);
4725         freenode(e1);
4726     }
4727 
4728     if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register
4729     {
4730         assert(tysize(e.Ety) == 1);
4731         assert(I64 || retregs & BYTEREGS);
4732         reg_t reg;
4733         allocreg(cdb,&retregs,&reg,TYint);
4734         uint ea = modregrmx(3,0,reg);
4735         if (I64 && reg >= 4)
4736             ea |= REX << 16;
4737         cdb.gen2(0x0F94,ea);        // SETZ reg
4738         *pretregs = retregs;
4739     }
4740 }
4741 
4742 /*************************
4743  * Generate code for OPprefetch
4744  */
4745 
4746 @trusted
4747 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4748 {
4749     /* Generate the following based on e2:
4750      *    0: prefetch0
4751      *    1: prefetch1
4752      *    2: prefetch2
4753      *    3: prefetchnta
4754      *    4: prefetchw
4755      *    5: prefetchwt1
4756      */
4757     //printf("cdprefetch\n");
4758     elem *e1 = e.EV.E1;
4759 
4760     assert(*pretregs == 0);
4761     assert(e.EV.E2.Eoper == OPconst);
4762     opcode_t op;
4763     reg_t reg;
4764     switch (e.EV.E2.EV.Vuns)
4765     {
4766         case 0: op = PREFETCH; reg = 1; break;  // PREFETCH0
4767         case 1: op = PREFETCH; reg = 2; break;  // PREFETCH1
4768         case 2: op = PREFETCH; reg = 3; break;  // PREFETCH2
4769         case 3: op = PREFETCH; reg = 0; break;  // PREFETCHNTA
4770         case 4: op = 0x0F0D;   reg = 1; break;  // PREFETCHW
4771         case 5: op = 0x0F0D;   reg = 2; break;  // PREFETCHWT1
4772         default: assert(0);
4773     }
4774 
4775     freenode(e.EV.E2);
4776 
4777     code cs = void;
4778     getlvalue(cdb,&cs,e1,0);
4779     cs.Iop = op;
4780     cs.Irm |= modregrm(0,reg,0);
4781     cs.Iflags |= CFvolatile;            // do not schedule
4782     cdb.gen(&cs);
4783 }
4784 
4785 
4786 /*********************
4787  * Load register from EA of assignment operation.
4788  * Params:
4789  *      cdb = store generated code here
4790  *      cs = instruction with EA already set in it
4791  *      e = assignment expression that will be evaluated
4792  *      reg = set to register loaded from EA
4793  *      retregs = register candidates for reg
4794  */
4795 @trusted
4796 private
4797 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs)
4798 {
4799     modEA(cdb, &cs);
4800     allocreg(cdb,&retregs,&reg,TYoffset);
4801 
4802     cs.Iop = LOD;
4803     code_newreg(&cs,reg);
4804     cdb.gen(&cs);                   // MOV reg,EA
4805 }
4806 
4807 /*********************
4808  * Load register pair from EA of assignment operation.
4809  * Params:
4810  *      cdb = store generated code here
4811  *      cs = instruction with EA already set in it
4812  *      e = assignment expression that will be evaluated
4813  *      rhi = set to most significant register of the pair
4814  *      rlo = set toleast significant register of the pair
4815  *      retregs = register candidates for rhi, rlo
4816  *      keepmsk = registers to not modify
4817  */
4818 @trusted
4819 private
4820 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk)
4821 {
4822     getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk);
4823     const tym_t tyml = tybasic(e.EV.E1.Ety);              // type of lvalue
4824     reg_t reg;
4825     allocreg(cdb,&retregs,&reg,tyml);
4826 
4827     rhi = findregmsw(retregs);
4828     rlo = findreglsw(retregs);
4829 
4830     cs.Iop = LOD;
4831     code_newreg(&cs,rlo);
4832     cdb.gen(&cs);                   // MOV rlo,EA
4833     getlvalue_msw(&cs);
4834     code_newreg(&cs,rhi);
4835     cdb.gen(&cs);                   // MOV rhi,EA+2
4836     getlvalue_lsw(&cs);
4837 }
4838 
4839 
4840 /*********************************************************
4841  * Store register result of assignment operation EA.
4842  * Params:
4843  *      cdb = store generated code here
4844  *      cs = instruction with EA already set in it
4845  *      e = assignment expression that was evaluated
4846  *      reg = register of result
4847  *      pretregs = registers to store result in
4848  */
4849 @trusted
4850 private
4851 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs)
4852 {
4853     elem* e1 = e.EV.E1;
4854     const tym_t tyml = tybasic(e1.Ety);     // type of lvalue
4855     const uint sz = _tysize[tyml];
4856     const ubyte isbyte = (sz == 1);         // 1 for byte operation
4857     cs.Iop = STO ^ isbyte;
4858     code_newreg(&cs,reg);
4859     cdb.gen(&cs);                           // MOV EA,resreg
4860     if (e1.Ecount)                          // if we gen a CSE
4861         cssave(e1,mask(reg),!OTleaf(e1.Eoper));
4862     freenode(e1);
4863     fixresult(cdb,e,mask(reg),pretregs);
4864 }
4865 
4866 /*********************************************************
4867  * Store register pair result of assignment operation EA.
4868  * Params:
4869  *      cdb = store generated code here
4870  *      cs = instruction with EA already set in it
4871  *      e = assignment expression that was evaluated
4872  *      rhi = most significant register of the pair
4873  *      rlo = least significant register of the pair
4874  *      pretregs = registers to store result in
4875  */
4876 @trusted
4877 private
4878 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs)
4879 {
4880     cs.Iop = STO;
4881     code_newreg(&cs,rlo);
4882     cdb.gen(&cs);                   // MOV EA,lsreg
4883     code_newreg(&cs,rhi);
4884     getlvalue_msw(&cs);
4885     cdb.gen(&cs);                   // MOV EA+REGSIZE,msreg
4886     const regm_t retregs = mask(rhi) | mask(rlo);
4887     elem* e1 = e.EV.E1;
4888     if (e1.Ecount)                 // if we gen a CSE
4889         cssave(e1,retregs,!OTleaf(e1.Eoper));
4890     freenode(e1);
4891     fixresult(cdb,e,retregs,pretregs);
4892 }
4893 
4894 
4895 }