1 /**
2  * Code generation 3
3  *
4  * Includes:
5  * - generating a function prolog (pushing return address, loading paramters)
6  * - generating a function epilog (restoring registers, returning)
7  * - generation / peephole optimizations of jump / branch instructions
8  *
9  * Compiler implementation of the
10  * $(LINK2 https://www.dlang.org, D programming language).
11  *
12  * Copyright:   Copyright (C) 1994-1998 by Symantec
13  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
14  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
15  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
16  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d)
17  * Documentation:  https://dlang.org/phobos/dmd_backend_cod3.html
18  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d
19  */
20 
21 module dmd.backend.cod3;
22 
23 version (SCPP)
24     version = COMPILE;
25 version (MARS)
26     version = COMPILE;
27 
28 version (COMPILE)
29 {
30 
31 import core.bitop;
32 import core.stdc.stdio;
33 import core.stdc.stdlib;
34 import core.stdc.string;
35 
36 import dmd.backend.backend;
37 import dmd.backend.barray;
38 import dmd.backend.cc;
39 import dmd.backend.cdef;
40 import dmd.backend.cgcse;
41 import dmd.backend.code;
42 import dmd.backend.code_x86;
43 import dmd.backend.codebuilder;
44 import dmd.backend.dlist;
45 import dmd.backend.dvec;
46 import dmd.backend.melf;
47 import dmd.backend.mem;
48 import dmd.backend.el;
49 import dmd.backend.exh;
50 import dmd.backend.global;
51 import dmd.backend.obj;
52 import dmd.backend.oper;
53 import dmd.backend.rtlsym;
54 import dmd.backend.symtab;
55 import dmd.backend.ty;
56 import dmd.backend.type;
57 import dmd.backend.xmm;
58 
59 version (SCPP)
60 {
61     import parser;
62     import precomp;
63 }
64 
65 extern (C++):
66 
67 nothrow:
68 @safe:
69 
70 version (MARS)
71     enum MARS = true;
72 else
73     enum MARS = false;
74 
75 extern __gshared CGstate cgstate;
76 
77 private extern (D) uint mask(uint m) { return 1 << m; }
78 
79 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
80 
81 extern __gshared targ_size_t retsize;
82 
83 enum JMPJMPTABLE = false;               // benchmarking shows it's slower
84 
85 enum MINLL =           0x8000_0000_0000_0000L;
86 enum MAXLL =           0x7FFF_FFFF_FFFF_FFFFL;
87 
88 /*************
89  * Size in bytes of each instruction.
90  * 0 means illegal instruction.
91  * bit  M:      if there is a modregrm field (EV1 is reserved for modregrm)
92  * bit  T:      if there is a second operand (EV2)
93  * bit  E:      if second operand is only 8 bits
94  * bit  A:      a short version exists for the AX reg
95  * bit  R:      a short version exists for regs
96  * bits 2..0:   size of instruction (excluding optional bytes)
97  */
98 
99 enum
100 {
101     M = 0x80,
102     T = 0x40,
103     E = 0x20,
104     A = 0x10,
105     R = 0x08,
106     W = 0,
107 }
108 
109 private __gshared ubyte[256] inssize =
110 [       M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 00 */
111         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 08 */
112         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 10 */
113         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 18 */
114         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 20 */
115         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 28 */
116         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 30 */
117         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 38 */
118         1,1,1,1,                1,1,1,1,                /* 40 */
119         1,1,1,1,                1,1,1,1,                /* 48 */
120         1,1,1,1,                1,1,1,1,                /* 50 */
121         1,1,1,1,                1,1,1,1,                /* 58 */
122         1,1,M|2,M|2,            1,1,1,1,                /* 60 */
123         T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1,               /* 68 */
124         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 70 */
125         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 78 */
126         M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3,      M|2,M|2,M|2,M|A|R|2, /* 80 */
127         M|A|2,M|A|2,M|A|2,M|A|2,        M|2,M|2,M|2,M|R|2,      /* 88 */
128         1,1,1,1,                1,1,1,1,                /* 90 */
129         1,1,T|5,1,              1,1,1,1,                /* 98 */
130 
131      // cod3_set32() patches this
132     //  T|5,T|5,T|5,T|5,        1,1,1,1,                /* A0 */
133         T|3,T|3,T|3,T|3,        1,1,1,1,                /* A0 */
134 
135         T|E|2,T|3,1,1,          1,1,1,1,                /* A8 */
136         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* B0 */
137         T|3,T|3,T|3,T|3,        T|3,T|3,T|3,T|3,                /* B8 */
138         M|T|E|3,M|T|E|3,T|3,1,  M|2,M|2,M|T|E|R|3,M|T|R|4,      /* C0 */
139         T|E|4,1,T|3,1,          1,T|E|2,1,1,            /* C8 */
140         M|2,M|2,M|2,M|2,        T|E|2,T|E|2,0,1,        /* D0 */
141         /* For the floating instructions, allow room for the FWAIT      */
142         M|2,M|2,M|2,M|2,        M|2,M|2,M|2,M|2,        /* D8 */
143         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* E0 */
144         T|3,T|3,T|5,T|E|2,              1,1,1,1,                /* E8 */
145         1,0,1,1,                1,1,M|A|2,M|A|2,                /* F0 */
146         1,1,1,1,                1,1,M|2,M|R|2                   /* F8 */
147 ];
148 
149 private __gshared const ubyte[256] inssize32 =
150 [       2,2,2,2,        2,5,1,1,                /* 00 */
151         2,2,2,2,        2,5,1,1,                /* 08 */
152         2,2,2,2,        2,5,1,1,                /* 10 */
153         2,2,2,2,        2,5,1,1,                /* 18 */
154         2,2,2,2,        2,5,1,1,                /* 20 */
155         2,2,2,2,        2,5,1,1,                /* 28 */
156         2,2,2,2,        2,5,1,1,                /* 30 */
157         2,2,2,2,        2,5,1,1,                /* 38 */
158         1,1,1,1,        1,1,1,1,                /* 40 */
159         1,1,1,1,        1,1,1,1,                /* 48 */
160         1,1,1,1,        1,1,1,1,                /* 50 */
161         1,1,1,1,        1,1,1,1,                /* 58 */
162         1,1,2,2,        1,1,1,1,                /* 60 */
163         5,6,2,3,        1,1,1,1,                /* 68 */
164         2,2,2,2,        2,2,2,2,                /* 70 */
165         2,2,2,2,        2,2,2,2,                /* 78 */
166         3,6,3,3,        2,2,2,2,                /* 80 */
167         2,2,2,2,        2,2,2,2,                /* 88 */
168         1,1,1,1,        1,1,1,1,                /* 90 */
169         1,1,7,1,        1,1,1,1,                /* 98 */
170         5,5,5,5,        1,1,1,1,                /* A0 */
171         2,5,1,1,        1,1,1,1,                /* A8 */
172         2,2,2,2,        2,2,2,2,                /* B0 */
173         5,5,5,5,        5,5,5,5,                /* B8 */
174         3,3,3,1,        2,2,3,6,                /* C0 */
175         4,1,3,1,        1,2,1,1,                /* C8 */
176         2,2,2,2,        2,2,0,1,                /* D0 */
177         /* For the floating instructions, don't need room for the FWAIT */
178         2,2,2,2,        2,2,2,2,                /* D8 */
179 
180         2,2,2,2,        2,2,2,2,                /* E0 */
181         5,5,7,2,        1,1,1,1,                /* E8 */
182         1,0,1,1,        1,1,2,2,                /* F0 */
183         1,1,1,1,        1,1,2,2                 /* F8 */
184 ];
185 
186 /* For 2 byte opcodes starting with 0x0F        */
187 private __gshared ubyte[256] inssize2 =
188 [       M|3,M|3,M|3,M|3,        2,2,2,2,                // 00
189         2,2,M|3,2,              2,M|3,2,M|T|E|4,        // 08
190         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 10
191         M|3,2,2,2,              2,2,2,2,                // 18
192         M|3,M|3,M|3,M|3,        M|3,2,M|3,2,            // 20
193         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 28
194         2,2,2,2,                2,2,2,2,                // 30
195         M|4,2,M|T|E|5,2,        2,2,2,2,                // 38
196         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 40
197         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 48
198         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 50
199         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 58
200         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 60
201         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 68
202         M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
203         2,2,2,2,                M|3,M|3,M|3,M|3,        // 78
204         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
205         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
206         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 90
207         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 98
208         2,2,2,M|3,      M|T|E|4,M|3,2,2,        // A0
209         2,2,2,M|3,      M|T|E|4,M|3,M|3,M|3,    // A8
210         M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,     // B0
211         M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3,     // B8
212         M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3,       // C0
213         2,2,2,2,        2,2,2,2,                // C8
214         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D0
215         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D8
216         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E0
217         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E8
218         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // F0
219         M|3,M|3,M|3,M|3, M|3,M|3,M|3,2          // F8
220 ];
221 
222 /*************************************************
223  * Generate code to save `reg` in `regsave` stack area.
224  * Params:
225  *      regsave = register save areay on stack
226  *      cdb = where to write generated code
227  *      reg = register to save
228  *      idx = set to location in regsave for use in REGSAVE_restore()
229  */
230 
231 @trusted
232 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx)
233 {
234     if (isXMMreg(reg))
235     {
236         regsave.alignment = 16;
237         regsave.idx = (regsave.idx + 15) & ~15;
238         idx = regsave.idx;
239         regsave.idx += 16;
240         // MOVD idx[RBP],xmm
241         opcode_t op = STOAPD;
242         if (TARGET_LINUX && I32)
243             // Haven't yet figured out why stack is not aligned to 16
244             op = STOUPD;
245         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
246     }
247     else
248     {
249         if (!regsave.alignment)
250             regsave.alignment = REGSIZE;
251         idx = regsave.idx;
252         regsave.idx += REGSIZE;
253         // MOV idx[RBP],reg
254         cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
255         if (I64)
256             code_orrex(cdb.last(), REX_W);
257     }
258     reflocal = true;
259     if (regsave.idx > regsave.top)
260         regsave.top = regsave.idx;              // keep high water mark
261 }
262 
263 /*******************************
264  * Restore `reg` from `regsave` area.
265  * Complement REGSAVE_save().
266  */
267 
268 @trusted
269 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx)
270 {
271     if (isXMMreg(reg))
272     {
273         assert(regsave.alignment == 16);
274         // MOVD xmm,idx[RBP]
275         opcode_t op = LODAPD;
276         if (TARGET_LINUX && I32)
277             // Haven't yet figured out why stack is not aligned to 16
278             op = LODUPD;
279         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
280     }
281     else
282     {   // MOV reg,idx[RBP]
283         cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
284         if (I64)
285             code_orrex(cdb.last(), REX_W);
286     }
287 }
288 
289 /************************************
290  * Size for vex encoded instruction.
291  */
292 
293 @trusted
294 ubyte vex_inssize(code *c)
295 {
296     assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4);
297     ubyte ins;
298     if (c.Iflags & CFvex3)
299     {
300         switch (c.Ivex.mmmm)
301         {
302         case 0: // no prefix
303         case 1: // 0F
304             ins = cast(ubyte)(inssize2[c.Ivex.op] + 2);
305             break;
306         case 2: // 0F 38
307             ins = cast(ubyte)(inssize2[0x38] + 1);
308             break;
309         case 3: // 0F 3A
310             ins = cast(ubyte)(inssize2[0x3A] + 1);
311             break;
312         default:
313             printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm);
314             assert(0);
315         }
316     }
317     else
318     {
319         ins = cast(ubyte)(inssize2[c.Ivex.op] + 1);
320     }
321     return ins;
322 }
323 
324 /************************************
325  * Determine if there is a modregrm byte for code.
326  */
327 
328 @trusted
329 int cod3_EA(code *c)
330 {   uint ins;
331 
332     opcode_t op1 = c.Iop & 0xFF;
333     if (op1 == ESCAPE)
334         ins = 0;
335     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
336         ins = inssize2[(c.Iop >> 8) & 0xFF];
337     else if ((c.Iop & 0xFF00) == 0x0F00)
338         ins = inssize2[op1];
339     else
340         ins = inssize[op1];
341     return ins & M;
342 }
343 
344 /********************************
345  * setup ALLREGS and BYTEREGS
346  * called by: codgen
347  */
348 
349 @trusted
350 void cod3_initregs()
351 {
352     if (I64)
353     {
354         ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
355         BYTEREGS = ALLREGS;
356     }
357     else
358     {
359         ALLREGS = ALLREGS_INIT;
360         BYTEREGS = BYTEREGS_INIT;
361     }
362 }
363 
364 /********************************
365  * set initial global variable values
366  */
367 
368 @trusted
369 void cod3_setdefault()
370 {
371     fregsaved = mBP | mSI | mDI;
372 }
373 
374 /********************************
375  * Fix global variables for 386.
376  */
377 @trusted
378 void cod3_set32()
379 {
380     inssize[0xA0] = T|5;
381     inssize[0xA1] = T|5;
382     inssize[0xA2] = T|5;
383     inssize[0xA3] = T|5;
384     BPRM = 5;                       /* [EBP] addressing mode        */
385     fregsaved = mBP | mBX | mSI | mDI;      // saved across function calls
386     FLOATREGS = FLOATREGS_32;
387     FLOATREGS2 = FLOATREGS2_32;
388     DOUBLEREGS = DOUBLEREGS_32;
389     if (config.flags3 & CFG3eseqds)
390         fregsaved |= mES;
391 
392     foreach (ref v; inssize2[0x80 .. 0x90])
393         v = W|T|6;
394 
395     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4;
396 }
397 
398 /********************************
399  * Fix global variables for I64.
400  */
401 
402 @trusted
403 void cod3_set64()
404 {
405     inssize[0xA0] = T|5;                // MOV AL,mem
406     inssize[0xA1] = T|5;                // MOV RAX,mem
407     inssize[0xA2] = T|5;                // MOV mem,AL
408     inssize[0xA3] = T|5;                // MOV mem,RAX
409     BPRM = 5;                           // [RBP] addressing mode
410 
411     fregsaved = (config.exe & EX_windos)
412         ? mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7 // also XMM8..15;
413         : mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES;      // saved across function calls
414 
415     FLOATREGS = FLOATREGS_64;
416     FLOATREGS2 = FLOATREGS2_64;
417     DOUBLEREGS = DOUBLEREGS_64;
418 
419     ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI|  mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
420     BYTEREGS = ALLREGS;
421 
422     foreach (ref v; inssize2[0x80 .. 0x90])
423         v = W|T|6;
424 
425     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8;
426 }
427 
428 /*********************************
429  * Word or dword align start of function.
430  * Params:
431  *      seg = segment to write alignment bytes to
432  *      nbytes = number of alignment bytes to write
433  */
434 @trusted
435 void cod3_align_bytes(int seg, size_t nbytes)
436 {
437     /* Table 4-2 from Intel Instruction Set Reference M-Z
438      * 1 bytes NOP                                        90
439      * 2 bytes 66 NOP                                     66 90
440      * 3 bytes NOP DWORD ptr [EAX]                        0F 1F 00
441      * 4 bytes NOP DWORD ptr [EAX + 00H]                  0F 1F 40 00
442      * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H]          0F 1F 44 00 00
443      * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H]       66 0F 1F 44 00 00
444      * 7 bytes NOP DWORD ptr [EAX + 00000000H]            0F 1F 80 00 00 00 00
445      * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H]    0F 1F 84 00 00 00 00 00
446      * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00
447      * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B
448      */
449 
450     assert(SegData[seg].SDseg == seg);
451 
452     while (nbytes)
453     {   size_t n = nbytes;
454         const(char)* p;
455 
456         if (nbytes > 1 && (I64 || config.fpxmmregs))
457         {
458             switch (n)
459             {
460                 case 2:  p = "\x66\x90"; break;
461                 case 3:  p = "\x0F\x1F\x00"; break;
462                 case 4:  p = "\x0F\x1F\x40\x00"; break;
463                 case 5:  p = "\x0F\x1F\x44\x00\x00"; break;
464                 case 6:  p = "\x66\x0F\x1F\x44\x00\x00"; break;
465                 case 7:  p = "\x0F\x1F\x80\x00\x00\x00\x00"; break;
466                 case 8:  p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break;
467                 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break;
468             }
469         }
470         else
471         {
472             static immutable ubyte[15] nops = [
473                 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
474             ]; // XCHG AX,AX
475             if (n > nops.length)
476                 n = nops.length;
477             p = cast(char*)nops;
478         }
479         objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p);
480         nbytes -= n;
481     }
482 }
483 
484 /****************************
485  * Align start of function.
486  * Params:
487  *      seg = segment of function
488  */
489 @trusted
490 void cod3_align(int seg)
491 {
492     if (config.exe & EX_windos)
493     {
494         if (config.flags4 & CFG4speed)      // if optimized for speed
495         {
496             // Pick alignment based on CPU target
497             if (config.target_cpu == TARGET_80486 ||
498                 config.target_cpu >= TARGET_PentiumPro)
499             {   // 486 does reads on 16 byte boundaries, so if we are near
500                 // such a boundary, align us to it
501 
502                 const nbytes = -Offset(seg) & 15;
503                 if (nbytes < 8)
504                     cod3_align_bytes(seg, nbytes);
505             }
506         }
507     }
508     else
509     {
510         const nbytes = -Offset(seg) & 7;
511         cod3_align_bytes(seg, nbytes);
512     }
513 }
514 
515 
516 /**********************************
517  * Generate code to adjust the stack pointer by `nbytes`
518  * Params:
519  *      cdb = code builder
520  *      nbytes = number of bytes to adjust stack pointer
521  */
522 void cod3_stackadj(ref CodeBuilder cdb, int nbytes)
523 {
524     //printf("cod3_stackadj(%d)\n", nbytes);
525     uint grex = I64 ? REX_W << 16 : 0;
526     uint rm;
527     if (nbytes > 0)
528         rm = modregrm(3,5,SP); // SUB ESP,nbytes
529     else
530     {
531         nbytes = -nbytes;
532         rm = modregrm(3,0,SP); // ADD ESP,nbytes
533     }
534     cdb.genc2(0x81, grex | rm, nbytes);
535 }
536 
537 /**********************************
538  * Generate code to align the stack pointer at `nbytes`
539  * Params:
540  *      cdb = code builder
541  *      nbytes = number of bytes to align stack pointer
542  */
543 void cod3_stackalign(ref CodeBuilder cdb, int nbytes)
544 {
545     //printf("cod3_stackalign(%d)\n", nbytes);
546     const grex = I64 ? REX_W << 16 : 0;
547     const rm = modregrm(3, 4, SP);             // AND ESP,-nbytes
548     cdb.genc2(0x81, grex | rm, -nbytes);
549 }
550 
551 /* Constructor that links the ModuleReference to the head of
552  * the list pointed to by _Dmoduleref
553  *
554  * For ELF object files.
555  */
556 static if (0)
557 {
558 void cod3_buildmodulector(OutBuffer* buf, int codeOffset, int refOffset)
559 {
560     /*      ret
561      * codeOffset:
562      *      pushad
563      *      mov     EAX,&ModuleReference
564      *      mov     ECX,_DmoduleRef
565      *      mov     EDX,[ECX]
566      *      mov     [EAX],EDX
567      *      mov     [ECX],EAX
568      *      popad
569      *      ret
570      */
571 
572     const int seg = CODE;
573 
574     if (I64 && config.flags3 & CFG3pic)
575     {   // LEA RAX,ModuleReference[RIP]
576         buf.writeByte(REX | REX_W);
577         buf.writeByte(LEA);
578         buf.writeByte(modregrm(0,AX,5));
579         codeOffset += 3;
580         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4);
581 
582         // MOV RCX,_DmoduleRef@GOTPCREL[RIP]
583         buf.writeByte(REX | REX_W);
584         buf.writeByte(0x8B);
585         buf.writeByte(modregrm(0,CX,5));
586         codeOffset += 3;
587         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4);
588     }
589     else
590     {
591         /* movl ModuleReference*, %eax */
592         buf.writeByte(0xB8);
593         codeOffset += 1;
594         const uint reltype = I64 ? R_X86_64_32 : R_386_32;
595         codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset);
596 
597         /* movl _Dmodule_ref, %ecx */
598         buf.writeByte(0xB9);
599         codeOffset += 1;
600         codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0);
601     }
602 
603     if (I64)
604         buf.writeByte(REX | REX_W);
605     buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */
606     if (I64)
607         buf.writeByte(REX | REX_W);
608     buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */
609     if (I64)
610         buf.writeByte(REX | REX_W);
611     buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */
612 
613     buf.writeByte(0xC3); /* ret */
614 }
615 }
616 
617 /*****************************
618  * Given a type, return a mask of
619  * registers to hold that type.
620  * Input:
621  *      tyf     function type
622  */
623 
624 @trusted
625 regm_t regmask(tym_t tym, tym_t tyf)
626 {
627     switch (tybasic(tym))
628     {
629         case TYvoid:
630         case TYnoreturn:
631         case TYstruct:
632         case TYarray:
633             return 0;
634 
635         case TYbool:
636         case TYwchar_t:
637         case TYchar16:
638         case TYchar:
639         case TYschar:
640         case TYuchar:
641         case TYshort:
642         case TYushort:
643         case TYint:
644         case TYuint:
645         case TYnullptr:
646         case TYnptr:
647         case TYnref:
648         case TYsptr:
649         case TYcptr:
650         case TYimmutPtr:
651         case TYsharePtr:
652         case TYrestrictPtr:
653         case TYfgPtr:
654             return mAX;
655 
656         case TYfloat:
657         case TYifloat:
658             if (I64)
659                 return mXMM0;
660             if (config.exe & EX_flat)
661                 return mST0;
662             goto case TYlong;
663 
664         case TYlong:
665         case TYulong:
666         case TYdchar:
667             if (!I16)
668                 return mAX;
669             goto case TYfptr;
670 
671         case TYfptr:
672         case TYhptr:
673             return mDX | mAX;
674 
675         case TYcent:
676         case TYucent:
677             assert(I64);
678             return mDX | mAX;
679 
680         case TYvptr:
681             return mDX | mBX;
682 
683         case TYdouble:
684         case TYdouble_alias:
685         case TYidouble:
686             if (I64)
687                 return mXMM0;
688             if (config.exe & EX_flat)
689                 return mST0;
690             return DOUBLEREGS;
691 
692         case TYllong:
693         case TYullong:
694             return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS);
695 
696         case TYldouble:
697         case TYildouble:
698             return mST0;
699 
700         case TYcfloat:
701             if (config.exe & EX_posix && I32 && tybasic(tyf) == TYnfunc)
702                 return mDX | mAX;
703             goto case TYcdouble;
704 
705         case TYcdouble:
706             if (I64)
707                 return mXMM0 | mXMM1;
708             goto case TYcldouble;
709 
710         case TYcldouble:
711             return mST01;
712 
713         // SIMD vector types
714         case TYfloat4:
715         case TYdouble2:
716         case TYschar16:
717         case TYuchar16:
718         case TYshort8:
719         case TYushort8:
720         case TYlong4:
721         case TYulong4:
722         case TYllong2:
723         case TYullong2:
724 
725         case TYfloat8:
726         case TYdouble4:
727         case TYschar32:
728         case TYuchar32:
729         case TYshort16:
730         case TYushort16:
731         case TYlong8:
732         case TYulong8:
733         case TYllong4:
734         case TYullong4:
735             if (!config.fpxmmregs)
736             {   printf("SIMD operations not supported on this platform\n");
737                 exit(1);
738             }
739             return mXMM0;
740 
741         default:
742             debug printf("%s\n", tym_str(tym));
743             assert(0);
744     }
745 }
746 
747 /*******************************
748  * setup register allocator parameters with platform specific data
749  */
750 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg)
751 {
752     *dst_integer_reg = AX;
753     *dst_float_reg   = XMM0;
754 }
755 
756 @trusted
757 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw)
758 {
759     //printf("cgreg_set_priorities %x\n", ty);
760     const sz = tysize(ty);
761 
762     if (tyxmmreg(ty))
763     {
764         static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG];
765         *pseq = sequence.ptr;
766     }
767     else if (I64)
768     {
769         if (sz == REGSIZE * 2)
770         {
771             static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG];
772             static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG];
773             *pseq = seqlsw1.ptr;
774             *pseqmsw = seqmsw1.ptr;
775         }
776         else
777         {   // R10 is reserved for the static link
778             static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG];
779             *pseq = cast(ubyte*)sequence2.ptr;
780         }
781     }
782     else if (I32)
783     {
784         if (sz == REGSIZE * 2)
785         {
786             static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG];
787             static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG];
788             *pseq = seqlsw3.ptr;
789             *pseqmsw = seqmsw3.ptr;
790         }
791         else
792         {
793             static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
794             *pseq = sequence4.ptr;
795         }
796     }
797     else
798     {   assert(I16);
799         if (typtr(ty))
800         {
801             // For pointer types, try to pick index register first
802             static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG];
803             *pseq = seqidx5.ptr;
804         }
805         else
806         {
807             // Otherwise, try to pick index registers last
808             static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
809             *pseq = sequence6.ptr;
810         }
811     }
812 }
813 
814 /*******************************************
815  * Call finally block.
816  * Params:
817  *      bf = block to call
818  *      retregs = registers to preserve across call
819  * Returns:
820  *      code generated
821  */
822 @trusted
823 private code *callFinallyBlock(block *bf, regm_t retregs)
824 {
825     CodeBuilder cdbs; cdbs.ctor();
826     CodeBuilder cdbr; cdbr.ctor();
827     int nalign = 0;
828 
829     calledFinally = true;
830     uint npush = gensaverestore(retregs,cdbs,cdbr);
831 
832     if (STACKALIGN >= 16)
833     {   npush += REGSIZE;
834         if (npush & (STACKALIGN - 1))
835         {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
836             cod3_stackadj(cdbs, nalign);
837         }
838     }
839     cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf);
840     regcon.immed.mval = 0;
841     if (nalign)
842         cod3_stackadj(cdbs, -nalign);
843     cdbs.append(cdbr);
844     return cdbs.finish();
845 }
846 
847 /*******************************
848  * Generate block exit code
849  */
850 @trusted
851 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave)
852 {
853     CodeBuilder cdb2; cdb2.ctor();
854     elem *e = bl.Belem;
855     block *nextb;
856     regm_t retregs = 0;
857 
858     if (bl.BC != BCasm)
859         assert(bl.Bcode == null);
860 
861     switch (bl.BC)                     /* block exit condition         */
862     {
863         case BCiftrue:
864         {
865             bool jcond = true;
866             block *bs1 = bl.nthSucc(0);
867             block *bs2 = bl.nthSucc(1);
868             if (bs1 == bl.Bnext)
869             {   // Swap bs1 and bs2
870                 block *btmp;
871 
872                 jcond ^= 1;
873                 btmp = bs1;
874                 bs1 = bs2;
875                 bs2 = btmp;
876             }
877             logexp(cdb,e,jcond,FLblock,cast(code *) bs1);
878             nextb = bs2;
879         }
880         L5:
881             if (configv.addlinenumbers && bl.Bsrcpos.Slinnum &&
882                 !(funcsym_p.ty() & mTYnaked))
883             {
884                 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum);
885                 cdb.genlinnum(bl.Bsrcpos);
886             }
887             if (nextb != bl.Bnext)
888             {
889                 assert(!(bl.Bflags & BFLepilog));
890                 genjmp(cdb,JMP,FLblock,nextb);
891             }
892             break;
893 
894         case BCjmptab:
895         case BCifthen:
896         case BCswitch:
897         {
898             assert(!(bl.Bflags & BFLepilog));
899             doswitch(cdb,bl);               // hide messy details
900             break;
901         }
902 version (MARS)
903 {
904         case BCjcatch:          // D catch clause of try-catch
905             assert(ehmethod(funcsym_p) != EHmethod.EH_NONE);
906             // Mark all registers as destroyed. This will prevent
907             // register assignments to variables used in catch blocks.
908             getregs(cdb,lpadregs());
909 
910             if (config.ehmethod == EHmethod.EH_DWARF)
911             {
912                 /* Each block must have ESP set to the same value it was at the end
913                  * of the prolog. But the unwinder calls catch blocks with ESP set
914                  * at the value it was when the throwing function was called, which
915                  * may have arguments pushed on the stack.
916                  * This instruction will reset ESP to the correct offset from EBP.
917                  */
918                 cdb.gen1(ESCAPE | ESCfixesp);
919             }
920             goto case_goto;
921 }
922 version (SCPP)
923 {
924         case BCcatch:           // C++ catch clause of try-catch
925             // Mark all registers as destroyed. This will prevent
926             // register assignments to variables used in catch blocks.
927             getregs(cdb,allregs | mES);
928             goto case_goto;
929 
930         case BCtry:
931             usednteh |= EHtry;
932             if (config.exe == EX_WIN32)
933                 usednteh |= NTEHtry;
934             goto case_goto;
935 }
936         case BCgoto:
937             nextb = bl.nthSucc(0);
938             if ((MARS ||
939                  funcsym_p.Sfunc.Fflags3 & Fnteh) &&
940                 ehmethod(funcsym_p) != EHmethod.EH_DWARF &&
941                 bl.Btry != nextb.Btry &&
942                 nextb.BC != BC_finally)
943             {
944                 regm_t retregsx = 0;
945                 gencodelem(cdb,e,&retregsx,true);
946                 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1;
947                 assert(bl.Btry);
948                 int fromindex = bl.Btry.Bscope_index;
949 version (MARS)
950 {
951                 if (toindex + 1 == fromindex)
952                 {   // Simply call __finally
953                     if (bl.Btry &&
954                         bl.Btry.nthSucc(1).BC == BCjcatch)
955                     {
956                         goto L5;        // it's a try-catch, not a try-finally
957                     }
958                 }
959 }
960                 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
961                     config.ehmethod == EHmethod.EH_SEH)
962                 {
963                     nteh_unwind(cdb,0,toindex);
964                 }
965                 else
966                 {
967 version (MARS)
968 {
969                 if (toindex + 1 <= fromindex)
970                 {
971                     //c = cat(c, linux_unwind(0, toindex));
972                     block *bt;
973 
974                     //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex);
975                     bt = bl;
976                     while ((bt = bt.Btry) != null && bt.Bscope_index != toindex)
977                     {   block *bf;
978 
979                         //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index);
980                         bf = bt.nthSucc(1);
981                         // Only look at try-finally blocks
982                         if (bf.BC == BCjcatch)
983                             continue;
984 
985                         if (bf == nextb)
986                             continue;
987                         //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx);
988                         if (nextb.BC == BCgoto &&
989                             !nextb.Belem &&
990                             bf == nextb.nthSucc(0))
991                             continue;
992 
993                         // call __finally
994                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx));
995                     }
996                 }
997 }
998                 }
999                 goto L5;
1000             }
1001         case_goto:
1002         {
1003             regm_t retregsx = 0;
1004             gencodelem(cdb,e,&retregsx,true);
1005             if (anyspill)
1006             {   // Add in the epilog code
1007                 CodeBuilder cdbstore; cdbstore.ctor();
1008                 CodeBuilder cdbload;  cdbload.ctor();
1009 
1010                 for (int i = 0; i < anyspill; i++)
1011                 {   Symbol *s = globsym[i];
1012 
1013                     if (s.Sflags & SFLspill &&
1014                         vec_testbit(dfoidx,s.Srange))
1015                     {
1016                         s.Sfl = sflsave[i];    // undo block register assignments
1017                         cgreg_spillreg_epilog(bl,s,cdbstore,cdbload);
1018                     }
1019                 }
1020                 cdb.append(cdbstore);
1021                 cdb.append(cdbload);
1022             }
1023             nextb = bl.nthSucc(0);
1024             goto L5;
1025         }
1026 
1027         case BC_try:
1028             if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none)
1029             {
1030                 /* Need to use frame pointer to access locals, not the stack pointer,
1031                  * because we'll be calling the BC_finally blocks and the stack will be off.
1032                  */
1033                 needframe = 1;
1034             }
1035             else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32)
1036             {
1037                 usednteh |= NTEH_try;
1038                 nteh_usevars();
1039             }
1040             else
1041                 usednteh |= EHtry;
1042             goto case_goto;
1043 
1044         case BC_finally:
1045             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1046             {
1047                 // Mark scratch registers as destroyed.
1048                 getregsNoSave(lpadregs());
1049 
1050                 regm_t retregsx = 0;
1051                 gencodelem(cdb,bl.Belem,&retregsx,true);
1052 
1053                 // JMP bl.nthSucc(1)
1054                 nextb = bl.nthSucc(1);
1055 
1056                 goto L5;
1057             }
1058             else
1059             {
1060                 if (config.ehmethod == EHmethod.EH_SEH ||
1061                     config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none))
1062                 {
1063                     // Mark all registers as destroyed. This will prevent
1064                     // register assignments to variables used in finally blocks.
1065                     getregsNoSave(lpadregs());
1066                 }
1067 
1068                 assert(!e);
1069                 // Generate CALL to finalizer code
1070                 cdb.append(callFinallyBlock(bl.nthSucc(0), 0));
1071 
1072                 // JMP bl.nthSucc(1)
1073                 nextb = bl.nthSucc(1);
1074 
1075                 goto L5;
1076             }
1077 
1078         case BC_lpad:
1079         {
1080             assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF);
1081             // Mark all registers as destroyed. This will prevent
1082             // register assignments to variables used in finally blocks.
1083             getregsNoSave(lpadregs());
1084 
1085             regm_t retregsx = 0;
1086             gencodelem(cdb,bl.Belem,&retregsx,true);
1087 
1088             // JMP bl.nthSucc(0)
1089             nextb = bl.nthSucc(0);
1090             goto L5;
1091         }
1092 
1093         case BC_ret:
1094         {
1095             regm_t retregsx = 0;
1096             gencodelem(cdb,e,&retregsx,true);
1097             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1098             {
1099             }
1100             else
1101                 cdb.gen1(0xC3);   // RET
1102             break;
1103         }
1104 
1105 static if (NTEXCEPTIONS)
1106 {
1107         case BC_except:
1108         {
1109             assert(!e);
1110             usednteh |= NTEH_except;
1111             nteh_setsp(cdb,0x8B);
1112             getregsNoSave(allregs);
1113             nextb = bl.nthSucc(0);
1114             goto L5;
1115         }
1116         case BC_filter:
1117         {
1118             nteh_filter(cdb, bl);
1119             // Mark all registers as destroyed. This will prevent
1120             // register assignments to variables used in filter blocks.
1121             getregsNoSave(allregs);
1122             regm_t retregsx = regmask(e.Ety, TYnfunc);
1123             gencodelem(cdb,e,&retregsx,true);
1124             cdb.gen1(0xC3);   // RET
1125             break;
1126         }
1127 }
1128 
1129         case BCretexp:
1130             reg_t reg1, reg2, lreg, mreg;
1131             retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), reg1, reg2);
1132             //printf("allocretregs returns %s\n", regm_str(mask(reg1) | mask(reg2)));
1133 
1134             lreg = mreg = NOREG;
1135             if (reg1 == NOREG)
1136             {}
1137             else if (tybasic(e.Ety) == TYcfloat)
1138                 lreg = ST01;
1139             else if (mask(reg1) & (mST0 | mST01))
1140                 lreg = reg1;
1141             else if (reg2 == NOREG)
1142                 lreg = reg1;
1143             else if (mask(reg1) & XMMREGS)
1144             {
1145                 lreg = XMM0;
1146                 mreg = XMM1;
1147             }
1148             else
1149             {
1150                 lreg = mask(reg1) & mLSW ? reg1 : AX;
1151                 mreg = mask(reg2) & mMSW ? reg2 : DX;
1152             }
1153             if (reg1 != NOREG)
1154                 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG);
1155 
1156             // For the final load into the return regs, don't set regcon.used,
1157             // so that the optimizer can potentially use retregs for register
1158             // variable assignments.
1159 
1160             if (config.flags4 & CFG4optimized)
1161             {   regm_t usedsave;
1162 
1163                 docommas(cdb,&e);
1164                 usedsave = regcon.used;
1165                 if (!OTleaf(e.Eoper))
1166                     gencodelem(cdb,e,&retregs,true);
1167                 else
1168                 {
1169                     if (e.Eoper == OPconst)
1170                         regcon.mvar = 0;
1171                     gencodelem(cdb,e,&retregs,true);
1172                     regcon.used = usedsave;
1173                     if (e.Eoper == OPvar)
1174                     {   Symbol *s = e.EV.Vsym;
1175 
1176                         if (s.Sfl == FLreg && s.Sregm != mAX)
1177                             *retsym = s;
1178                     }
1179                 }
1180             }
1181             else
1182             {
1183                 gencodelem(cdb,e,&retregs,true);
1184             }
1185 
1186             if (reg1 == NOREG)
1187             {
1188             }
1189             else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01))
1190             {
1191                 assert(reg1 == lreg && reg2 == NOREG);
1192                 regm_t pretregs = mask(reg1) | mask(reg2);
1193                 fixresult87(cdb, e, retregs, &pretregs, true);
1194             }
1195             // fix return registers
1196             else if (tybasic(e.Ety) == TYcfloat)
1197             {
1198                 assert(lreg == ST01);
1199                 if (I64)
1200                 {
1201                     assert(reg2 == NOREG);
1202                     // spill
1203                     pop87();
1204                     pop87();
1205                     cdb.genfltreg(0xD9, 3, tysize(TYfloat));
1206                     genfwait(cdb);
1207                     cdb.genfltreg(0xD9, 3, 0);
1208                     genfwait(cdb);
1209                     // reload
1210                     if (config.exe == EX_WIN64)
1211                     {
1212                         assert(reg1 == AX);
1213                         cdb.genfltreg(LOD, reg1, 0);
1214                         code_orrex(cdb.last(), REX_W);
1215                     }
1216                     else
1217                     {
1218                         assert(reg1 == XMM0);
1219                         cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble);
1220                     }
1221                 }
1222                 else
1223                 {
1224                     assert(reg1 == AX && reg2 == DX);
1225                     regm_t pretregs = mask(reg1) | mask(reg2);
1226                     fixresult_complex87(cdb, e, retregs, &pretregs, true);
1227                 }
1228             }
1229             else if (reg2 == NOREG)
1230                 assert(lreg == reg1);
1231             else for (int v = 0; v < 2; v++)
1232             {
1233                 if (v ^ (reg1 != mreg))
1234                     genmovreg(cdb, reg1, lreg);
1235                 else
1236                     genmovreg(cdb, reg2, mreg);
1237             }
1238             if (reg1 != NOREG)
1239                 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG);
1240             goto L4;
1241 
1242         case BCret:
1243             retregs = 0;
1244             gencodelem(cdb,e,&retregs,true);
1245         L4:
1246             if (retregs == mST0)
1247             {   assert(global87.stackused == 1);
1248                 pop87();                // account for return value
1249             }
1250             else if (retregs == mST01)
1251             {   assert(global87.stackused == 2);
1252                 pop87();
1253                 pop87();                // account for return value
1254             }
1255 
1256             if (MARS || usednteh & NTEH_try)
1257             {
1258                 block *bt = bl;
1259                 while ((bt = bt.Btry) != null)
1260                 {
1261                     block *bf = bt.nthSucc(1);
1262 version (MARS)
1263 {
1264                     // Only look at try-finally blocks
1265                     if (bf.BC == BCjcatch)
1266                     {
1267                         continue;
1268                     }
1269 }
1270                     if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
1271                         config.ehmethod == EHmethod.EH_SEH)
1272                     {
1273                         if (bt.Bscope_index == 0)
1274                         {
1275                             // call __finally
1276                             CodeBuilder cdbs; cdbs.ctor();
1277                             CodeBuilder cdbr; cdbr.ctor();
1278 
1279                             nteh_gensindex(cdb,-1);
1280                             gensaverestore(retregs,cdbs,cdbr);
1281                             cdb.append(cdbs);
1282                             cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0));
1283                             regcon.immed.mval = 0;
1284                             cdb.append(cdbr);
1285                         }
1286                         else
1287                         {
1288                             nteh_unwind(cdb,retregs,~0);
1289                         }
1290                         break;
1291                     }
1292                     else
1293                     {
1294                         // call __finally
1295                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregs));
1296                     }
1297                 }
1298             }
1299             break;
1300 
1301         case BCexit:
1302             retregs = 0;
1303             gencodelem(cdb,e,&retregs,true);
1304             if (config.flags4 & CFG4optimized)
1305                 mfuncreg = mfuncregsave;
1306             break;
1307 
1308         case BCasm:
1309         {
1310             assert(!e);
1311             // Mark destroyed registers
1312             CodeBuilder cdbx; cdbx.ctor();
1313             getregs(cdbx,iasm_regs(bl));         // mark destroyed registers
1314             code *c = cdbx.finish();
1315             if (bl.Bsucc)
1316             {   nextb = bl.nthSucc(0);
1317                 if (!bl.Bnext)
1318                 {
1319                     cdb.append(bl.Bcode);
1320                     cdb.append(c);
1321                     goto L5;
1322                 }
1323                 if (nextb != bl.Bnext &&
1324                     bl.Bnext &&
1325                     !(bl.Bnext.BC == BCgoto &&
1326                      !bl.Bnext.Belem &&
1327                      nextb == bl.Bnext.nthSucc(0)))
1328                 {
1329                     // See if already have JMP at end of block
1330                     code *cl = code_last(bl.Bcode);
1331                     if (!cl || cl.Iop != JMP)
1332                     {
1333                         cdb.append(bl.Bcode);
1334                         cdb.append(c);
1335                         goto L5;        // add JMP at end of block
1336                     }
1337                 }
1338             }
1339             cdb.append(bl.Bcode);
1340             break;
1341         }
1342 
1343         default:
1344             debug
1345             printf("bl.BC = %d\n",bl.BC);
1346             assert(0);
1347     }
1348 }
1349 
1350 /***************************
1351  * Allocate registers for function return values.
1352  *
1353  * Params:
1354  *    ty    = return type
1355  *    t     = return type extended info
1356  *    tyf   = function type
1357  *    reg1  = set to the first part register, else NOREG
1358  *    reg2  = set to the second part register, else NOREG
1359  *
1360  * Returns:
1361  *    a bit mask of return registers.
1362  *    0 if function returns on the stack or returns void.
1363  */
1364 @trusted
1365 regm_t allocretregs(const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2)
1366 {
1367     //printf("allocretregs() ty: %s\n", tym_str(ty));
1368     reg1 = reg2 = NOREG;
1369 
1370     if (!(config.exe & EX_posix))
1371         return regmask(ty, tyf);    // for non-Posix ABI
1372 
1373     /* The rest is for the Itanium ABI
1374      */
1375 
1376     const tyb = tybasic(ty);
1377     if (tyb == TYvoid || tyb == TYnoreturn)
1378         return 0;
1379 
1380     tym_t ty1 = tyb;
1381     tym_t ty2 = TYMAX;  // stays TYMAX if only one register is needed
1382 
1383     if (ty & mTYxmmgpr)
1384     {
1385         ty1 = TYdouble;
1386         ty2 = TYllong;
1387     }
1388     else if (ty & mTYgprxmm)
1389     {
1390         ty1 = TYllong;
1391         ty2 = TYdouble;
1392     }
1393 
1394     if (tyb == TYstruct)
1395     {
1396         assert(t);
1397         ty1 = t.Tty;
1398     }
1399 
1400     const tyfb = tybasic(tyf);
1401     switch (tyrelax(ty1))
1402     {
1403         case TYcent:
1404             if (I32)
1405                 return 0;
1406             ty1 = ty2 = TYllong;
1407             break;
1408 
1409         case TYcdouble:
1410             if (tyfb == TYjfunc && I32)
1411                 break;
1412             if (I32)
1413                 return 0;
1414             ty1 = ty2 = TYdouble;
1415             break;
1416 
1417         case TYcfloat:
1418             if (tyfb == TYjfunc && I32)
1419                 break;
1420             if (I32)
1421                 goto case TYllong;
1422             ty1 = TYdouble;
1423             break;
1424 
1425         case TYcldouble:
1426             if (tyfb == TYjfunc && I32)
1427                 break;
1428             if (I32)
1429                 return 0;
1430             break;
1431 
1432         case TYllong:
1433             if (I32)
1434                 ty1 = ty2 = TYlong;
1435             break;
1436 
1437         case TYarray:
1438             type* targ1, targ2;
1439             argtypes(t, targ1, targ2);
1440             if (targ1)
1441                 ty1 = targ1.Tty;
1442             else
1443                 return 0;
1444             if (targ2)
1445                 ty2 = targ2.Tty;
1446             break;
1447 
1448         case TYstruct:
1449             assert(t);
1450             if (I64)
1451             {
1452                 assert(tybasic(t.Tty) == TYstruct);
1453                 if (const targ1 = t.Ttag.Sstruct.Sarg1type)
1454                     ty1 = targ1.Tty;
1455                 else
1456                     return 0;
1457                 if (const targ2 = t.Ttag.Sstruct.Sarg2type)
1458                     ty2 = targ2.Tty;
1459                 break;
1460             }
1461             return 0;
1462 
1463         default:
1464             break;
1465     }
1466 
1467     /* now we have ty1 and ty2, use that to determine which register
1468      * is used for ty1 and which for ty2
1469      */
1470 
1471     static struct RetRegsAllocator
1472     {
1473     nothrow:
1474         static immutable reg_t[2] gpr_regs = [AX, DX];
1475         static immutable reg_t[2] xmm_regs = [XMM0, XMM1];
1476 
1477         uint cntgpr = 0,
1478              cntxmm = 0;
1479 
1480         reg_t gpr() { return gpr_regs[cntgpr++]; }
1481         reg_t xmm() { return xmm_regs[cntxmm++]; }
1482     }
1483 
1484     RetRegsAllocator rralloc;
1485 
1486     reg_t allocreg(tym_t tym)
1487     {
1488         if (tym == TYMAX)
1489             return NOREG;
1490         switch (tysize(tym))
1491         {
1492         case 1:
1493         case 2:
1494         case 4:
1495             if (tyfloating(tym))
1496                 return I64 ? rralloc.xmm() : ST0;
1497             else
1498                 return rralloc.gpr();
1499 
1500         case 8:
1501             if (tycomplex(tym))
1502             {
1503                 assert(tyfb == TYjfunc && I32);
1504                 return ST01;
1505             }
1506             else if (tysimd(tym))
1507             {
1508                 return rralloc.xmm();
1509             }
1510             assert(I64 || tyfloating(tym));
1511             goto case 4;
1512 
1513         default:
1514             if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble)
1515             {
1516                 return ST0;
1517             }
1518             else if (tybasic(tym) == TYcldouble)
1519             {
1520                 return ST01;
1521             }
1522             else if (tycomplex(tym) && tyfb == TYjfunc && I32)
1523             {
1524                 return ST01;
1525             }
1526             else if (tysimd(tym))
1527             {
1528                 return rralloc.xmm();
1529             }
1530 
1531             debug printf("%s\n", tym_str(tym));
1532             assert(0);
1533         }
1534     }
1535 
1536     reg1 = allocreg(ty1);
1537     reg2 = allocreg(ty2);
1538 
1539     return (mask(reg1) | mask(reg2)) & ~mask(NOREG);
1540 }
1541 
1542 /***********************************************
1543  * Struct necessary for sorting switch cases.
1544  */
1545 
1546 private alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
1547 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
1548 
1549 extern (C)  // qsort cmp functions need to be "C"
1550 {
1551 struct CaseVal
1552 {
1553     targ_ullong val;
1554     block *target;
1555 
1556     /* Sort function for qsort() */
1557     @trusted
1558     extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q)
1559     {
1560         const(CaseVal)* c1 = cast(const(CaseVal)*)p;
1561         const(CaseVal)* c2 = cast(const(CaseVal)*)q;
1562         return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1);
1563     }
1564 }
1565 }
1566 
1567 /***
1568  * Generate comparison of [reg2,reg] with val
1569  */
1570 @trusted
1571 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg)
1572 {
1573     if (I64 && sz == 8)
1574     {
1575         assert(reg2 == NOREG);
1576         if (val == cast(int)val)    // if val is a 64 bit value sign-extended from 32 bits
1577         {
1578             cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);     // CMP reg,value32
1579             cdb.last().Irex |= REX_W;                  // 64 bit operand
1580         }
1581         else
1582         {
1583             assert(sreg != NOREG);
1584             movregconst(cdb,sreg,cast(targ_size_t)val,64);  // MOV sreg,val64
1585             genregs(cdb,0x3B,reg,sreg);    // CMP reg,sreg
1586             code_orrex(cdb.last(), REX_W);
1587             getregsNoSave(mask(sreg));                  // don't remember we loaded this constant
1588         }
1589     }
1590     else if (reg2 == NOREG)
1591         cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);         // CMP reg,casevalue
1592     else
1593     {
1594         cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));  // CMP reg2,MSREG(casevalue)
1595         code *cnext = gennop(null);
1596         genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1597         cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val);          // CMP reg,casevalue
1598         cdb.append(cnext);
1599     }
1600 }
1601 
1602 @trusted
1603 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases,
1604         uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last)
1605 {
1606     if (ncases >= 4 && config.flags4 & CFG4speed)
1607     {
1608         size_t pivot = ncases >> 1;
1609 
1610         // Compares for casevals[0..pivot]
1611         CodeBuilder cdb1; cdb1.ctor();
1612         ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true);
1613 
1614         // Compares for casevals[pivot+1..ncases]
1615         CodeBuilder cdb2; cdb2.ctor();
1616         ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last);
1617         code *c2 = gennop(null);
1618 
1619         // Compare for caseval[pivot]
1620         cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg);
1621         genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target
1622         // Note uint jump here, as cases were sorted using uint comparisons
1623         genjmp(cdb,JA,FLcode,cast(block *) c2);           // JG c2
1624 
1625         cdb.append(cdb1);
1626         cdb.append(c2);
1627         cdb.append(cdb2);
1628     }
1629     else
1630     {   // Not worth doing a binary search, just do a sequence of CMP/JE
1631         for (size_t n = 0; n < ncases; n++)
1632         {
1633             targ_llong val = casevals[n].val;
1634             cmpval(cdb, val, sz, reg, reg2, sreg);
1635             code *cnext = null;
1636             if (reg2 != NOREG)
1637             {
1638                 cnext = gennop(null);
1639                 genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1640                 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));   // CMP reg2,MSREG(casevalue)
1641             }
1642             genjmp(cdb,JE,FLblock,casevals[n].target);   // JE caseaddr
1643             cdb.append(cnext);
1644         }
1645 
1646         if (last)       // if default is not next block
1647             genjmp(cdb,JMP,FLblock,bdefault);
1648     }
1649 }
1650 
1651 /*******************************
1652  * Generate code for blocks ending in a switch statement.
1653  * Take BCswitch and decide on
1654  *      BCifthen        use if - then code
1655  *      BCjmptab        index into jump table
1656  *      BCswitch        search table for match
1657  */
1658 
1659 @trusted
1660 void doswitch(ref CodeBuilder cdb, block *b)
1661 {
1662     targ_ulong msw;
1663 
1664     // If switch tables are in code segment and we need a CS: override to get at them
1665     bool csseg = cast(bool)(config.flags & CFGromable);
1666 
1667     //printf("doswitch(%d)\n", b.BC);
1668     elem *e = b.Belem;
1669     elem_debug(e);
1670     docommas(cdb,&e);
1671     cgstate.stackclean++;
1672     tym_t tys = tybasic(e.Ety);
1673     int sz = _tysize[tys];
1674     bool dword = (sz == 2 * REGSIZE);
1675     bool mswsame = true;                // assume all msw's are the same
1676     targ_llong *p = b.Bswitch;          // pointer to case data
1677     assert(p);
1678     uint ncases = cast(uint)*p++;       // number of cases
1679 
1680     targ_llong vmax = MINLL;            // smallest possible llong
1681     targ_llong vmin = MAXLL;            // largest possible llong
1682     for (uint n = 0; n < ncases; n++)   // find max and min case values
1683     {
1684         targ_llong val = *p++;
1685         if (val > vmax) vmax = val;
1686         if (val < vmin) vmin = val;
1687         if (REGSIZE == 2)
1688         {
1689             ushort ms = (val >> 16) & 0xFFFF;
1690             if (n == 0)
1691                 msw = ms;
1692             else if (msw != ms)
1693                 mswsame = 0;
1694         }
1695         else // REGSIZE == 4
1696         {
1697             targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
1698             if (n == 0)
1699                 msw = ms;
1700             else if (msw != ms)
1701                 mswsame = 0;
1702         }
1703     }
1704     p -= ncases;
1705     //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
1706 
1707     /* Three kinds of switch strategies - pick one
1708      */
1709     if (ncases <= 3)
1710         goto Lifthen;
1711     else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2)
1712         goto Ljmptab;           // >=50% of the table is case values, rest is default
1713     else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3)
1714         goto Ljmptab;           // >= 33% of the table is case values, rest is default
1715     else if (I16)
1716         goto Lswitch;
1717     else
1718         goto Lifthen;
1719 
1720     /*************************************************************************/
1721     {   // generate if-then sequence
1722     Lifthen:
1723         regm_t retregs = ALLREGS;
1724         b.BC = BCifthen;
1725         scodelem(cdb,e,&retregs,0,true);
1726         reg_t reg, reg2;
1727         if (dword)
1728         {   reg = findreglsw(retregs);
1729             reg2 = findregmsw(retregs);
1730         }
1731         else
1732         {
1733             reg = findreg(retregs);     // reg that result is in
1734             reg2 = NOREG;
1735         }
1736         list_t bl = b.Bsucc;
1737         block *bdefault = b.nthSucc(0);
1738         if (dword && mswsame)
1739         {
1740             cdb.genc2(0x81,modregrm(3,7,reg2),msw);   // CMP reg2,MSW
1741             genjmp(cdb,JNE,FLblock,bdefault);  // JNE default
1742             reg2 = NOREG;
1743         }
1744 
1745         reg_t sreg = NOREG;                          // may need a scratch register
1746 
1747         // Put into casevals[0..ncases] so we can sort then slice
1748         assert(ncases < size_t.max / (2 * CaseVal.sizeof));
1749         CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof);
1750         assert(casevals);
1751         for (uint n = 0; n < ncases; n++)
1752         {
1753             casevals[n].val = p[n];
1754             bl = list_next(bl);
1755             casevals[n].target = list_block(bl);
1756 
1757             // See if we need a scratch register
1758             if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n])
1759             {   regm_t regm = ALLREGS & ~mask(reg);
1760                 allocreg(cdb,&regm, &sreg, TYint);
1761             }
1762         }
1763 
1764         // Sort cases so we can do a runtime binary search
1765         qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp);
1766 
1767         //for (uint n = 0; n < ncases; n++)
1768             //printf("casevals[%lld] = x%x\n", n, casevals[n].val);
1769 
1770         // Generate binary tree of comparisons
1771         ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext);
1772 
1773         free(casevals);
1774 
1775         cgstate.stackclean--;
1776         return;
1777     }
1778 
1779     /*************************************************************************/
1780     {
1781         // Use switch value to index into jump table
1782     Ljmptab:
1783         //printf("Ljmptab:\n");
1784 
1785         b.BC = BCjmptab;
1786 
1787         /* If vmin is small enough, we can just set it to 0 and the jump
1788          * table entries from 0..vmin-1 can be set with the default target.
1789          * This saves the SUB instruction.
1790          * Must be same computation as used in outjmptab().
1791          */
1792         if (vmin > 0 && vmin <= _tysize[TYint])
1793             vmin = 0;
1794 
1795         b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr);
1796         regm_t retregs = IDXREGS;
1797         if (dword)
1798             retregs |= mMSW;
1799         if (config.exe & EX_posix && I32 && config.flags3 & CFG3pic)
1800             retregs &= ~mBX;                            // need EBX for GOT
1801         bool modify = (I16 || I64 || vmin);
1802         scodelem(cdb,e,&retregs,0,!modify);
1803         reg_t reg = findreg(retregs & IDXREGS); // reg that result is in
1804         reg_t reg2;
1805         if (dword)
1806             reg2 = findregmsw(retregs);
1807         if (modify)
1808         {
1809             assert(!(retregs & regcon.mvar));
1810             getregs(cdb,retregs);
1811         }
1812         if (vmin)                       // if there is a minimum
1813         {
1814             cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin
1815             if (dword)
1816             {   cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin
1817                 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1818             }
1819         }
1820         else if (dword)
1821         {   gentstreg(cdb,reg2);              // TEST reg2,reg2
1822             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1823         }
1824         if (vmax - vmin != REGMASK)     // if there is a maximum
1825         {                               // CMP reg,vmax-vmin
1826             cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin));
1827             if (I64 && sz == 8)
1828                 code_orrex(cdb.last(), REX_W);
1829             genjmp(cdb,JA,FLblock,b.nthSucc(0));  // JA default
1830         }
1831         if (I64)
1832         {
1833             if (!vmin)
1834             {   // Need to clear out high 32 bits of reg
1835                 // Use 8B instead of 89, as 89 will be optimized away as a NOP
1836                 genregs(cdb,0x8B,reg,reg);                 // MOV reg,reg
1837             }
1838             if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
1839             {
1840                 /* LEA    R1,disp[RIP]          48 8D 05 00 00 00 00
1841                  * MOVSXD R2,[reg*4][R1]        48 63 14 B8
1842                  * LEA    R1,[R1][R2]           48 8D 04 02
1843                  * JMP    R1                    FF E0
1844                  */
1845                 reg_t r1;
1846                 regm_t scratchm = ALLREGS & ~mask(reg);
1847                 allocreg(cdb,&scratchm,&r1,TYint);
1848                 reg_t r2;
1849                 scratchm = ALLREGS & ~(mask(reg) | mask(r1));
1850                 allocreg(cdb,&scratchm,&r2,TYint);
1851 
1852                 CodeBuilder cdbe; cdbe.ctor();
1853                 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0);        // LEA R1,disp[RIP]
1854                 cdbe.last().IEV1.Vswitch = b;
1855                 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1]
1856                 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2));    // LEA R1,[R1][R2]
1857                 cdbe.gen2(0xFF,modregrmx(3,4,r1));                                          // JMP R1
1858 
1859                 b.Btablesize = cast(int) (vmax - vmin + 1) * 4;
1860                 code *ce = cdbe.finish();
1861                 pinholeopt(ce, null);
1862 
1863                 cdb.append(cdbe);
1864             }
1865             else
1866             {
1867                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);   // JMP disp[reg*8]
1868                 cdb.last().IEV1.Vswitch = b;
1869                 cdb.last().Isib = modregrm(3,reg & 7,5);
1870                 if (reg & 8)
1871                     cdb.last().Irex |= REX_X;
1872             }
1873         }
1874         else if (I32)
1875         {
1876 static if (JMPJMPTABLE)
1877 {
1878             /* LEA jreg,offset ctable[reg][reg * 4]
1879                JMP jreg
1880               ctable:
1881                JMP case0
1882                JMP case1
1883                ...
1884              */
1885             CodeBuilder ctable; ctable.ctor();
1886             block *bdef = b.nthSucc(0);
1887             targ_llong u;
1888             for (u = vmin; ; u++)
1889             {   block *targ = bdef;
1890                 for (n = 0; n < ncases; n++)
1891                 {
1892                     if (p[n] == u)
1893                     {   targ = b.nthSucc(n + 1);
1894                         break;
1895                     }
1896                 }
1897                 genjmp(ctable,JMP,FLblock,targ);
1898                 ctable.last().Iflags |= CFjmp5;           // don't shrink these
1899                 if (u == vmax)
1900                     break;
1901             }
1902 
1903             // Allocate scratch register jreg
1904             regm_t scratchm = ALLREGS & ~mask(reg);
1905             uint jreg = AX;
1906             allocreg(cdb,&scratchm,&jreg,TYint);
1907 
1908             // LEA jreg, offset ctable[reg][reg*4]
1909             cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6);
1910             cdb.last().Isib = modregrm(2,reg,reg);
1911             cdb.gen2(0xFF,modregrm(3,4,jreg));      // JMP jreg
1912             cdb.append(ctable);
1913             b.Btablesize = 0;
1914             cgstate.stackclean--;
1915             return;
1916 }
1917 else
1918 {
1919         if (config.exe & (EX_OSX | EX_OSX64))
1920         {
1921             /*     CALL L1
1922              * L1: POP  R1
1923              *     ADD  R1,disp[reg*4][R1]
1924              *     JMP  R1
1925              */
1926             // Allocate scratch register r1
1927             regm_t scratchm = ALLREGS & ~mask(reg);
1928             reg_t r1;
1929             allocreg(cdb,&scratchm,&r1,TYint);
1930 
1931             cdb.genc2(CALL,0,0);                           //     CALL L1
1932             cdb.gen1(0x58 + r1);                           // L1: POP R1
1933             cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0);   // ADD R1,disp[reg*4][EBX]
1934             cdb.last().IEV1.Vswitch = b;
1935             cdb.last().Isib = modregrm(2,reg,r1);
1936             cdb.gen2(0xFF,modregrm(3,4,r1));               // JMP R1
1937         }
1938         else
1939         {
1940             if (config.flags3 & CFG3pic)
1941             {
1942                 /* MOV  R1,EBX
1943                  * SUB  R1,funcsym_p@GOTOFF[offset][reg*4][EBX]
1944                  * JMP  R1
1945                  */
1946 
1947                 // Load GOT in EBX
1948                 load_localgot(cdb);
1949 
1950                 // Allocate scratch register r1
1951                 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX);
1952                 reg_t r1;
1953                 allocreg(cdb,&scratchm,&r1,TYint);
1954 
1955                 genmovreg(cdb,r1,BX);              // MOV R1,EBX
1956                 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0);   // SUB R1,disp[reg*4][EBX]
1957                 cdb.last().IEV1.Vswitch = b;
1958                 cdb.last().Isib = modregrm(2,reg,BX);
1959                 cdb.gen2(0xFF,modregrmx(3,4,r1));               // JMP R1
1960             }
1961             else
1962             {
1963                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);     // JMP disp[idxreg*4]
1964                 cdb.last().IEV1.Vswitch = b;
1965                 cdb.last().Isib = modregrm(2,reg,5);
1966             }
1967         }
1968 }
1969         }
1970         else if (I16)
1971         {
1972             cdb.gen2(0xD1,modregrm(3,4,reg));                   // SHL reg,1
1973             uint rm = getaddrmode(retregs) | modregrm(0,4,0);
1974             cdb.genc1(0xFF,rm,FLswitch,0);                  // JMP [CS:]disp[idxreg]
1975             cdb.last().IEV1.Vswitch = b;
1976             cdb.last().Iflags |= csseg ? CFcs : 0;                       // segment override
1977         }
1978         else
1979             assert(0);
1980         cgstate.stackclean--;
1981         return;
1982     }
1983 
1984     /*************************************************************************/
1985     {
1986         /* Scan a table of case values, and jump to corresponding address.
1987          * Since it relies on REPNE SCASW, it has really nothing to recommend it
1988          * over Lifthen for 32 and 64 bit code.
1989          * Note that it has not been tested with MACHOBJ (OSX).
1990          */
1991     Lswitch:
1992         regm_t retregs = mAX;                  // SCASW requires AX
1993         if (dword)
1994             retregs |= mDX;
1995         else if (ncases <= 6 || config.flags4 & CFG4speed)
1996             goto Lifthen;
1997         scodelem(cdb,e,&retregs,0,true);
1998         if (dword && mswsame)
1999         {   /* CMP DX,MSW       */
2000             cdb.genc2(0x81,modregrm(3,7,DX),msw);
2001             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
2002         }
2003         getregs(cdb,mCX|mDI);
2004 
2005         if (config.flags3 & CFG3pic && config.exe & EX_posix)
2006         {   // Add in GOT
2007             getregs(cdb,mDX);
2008             cdb.genc2(CALL,0,0);        //     CALL L1
2009             cdb.gen1(0x58 + DI);        // L1: POP EDI
2010 
2011                                         //     ADD EDI,_GLOBAL_OFFSET_TABLE_+3
2012             Symbol *gotsym = Obj.getGOTsym();
2013             cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym);
2014             cdb.last().Iflags = CFoff;
2015             cdb.last().IEV2.Voffset = 3;
2016 
2017             makeitextern(gotsym);
2018 
2019             genmovreg(cdb, DX, DI);    // MOV EDX, EDI
2020                                         // ADD EDI,offset of switch table
2021             cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null);
2022             cdb.last().IEV2.Vswitch = b;
2023         }
2024 
2025         if (!(config.flags3 & CFG3pic))
2026         {
2027                                         // MOV DI,offset of switch table
2028             cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null);
2029             cdb.last().IEV2.Vswitch = b;
2030         }
2031         movregconst(cdb,CX,ncases,0);    // MOV CX,ncases
2032 
2033         /* The switch table will be accessed through ES:DI.
2034          * Therefore, load ES with proper segment value.
2035          */
2036         if (config.flags3 & CFG3eseqds)
2037         {
2038             assert(!csseg);
2039             getregs(cdb,mCX);           // allocate CX
2040         }
2041         else
2042         {
2043             getregs(cdb,mES|mCX);       // allocate ES and CX
2044             cdb.gen1(csseg ? 0x0E : 0x1E);      // PUSH CS/DS
2045             cdb.gen1(0x07);                     // POP  ES
2046         }
2047 
2048         targ_size_t disp = (ncases - 1) * _tysize[TYint];  // displacement to jump table
2049         if (dword && !mswsame)
2050         {
2051 
2052             /* Build the following:
2053                 L1:     SCASW
2054                         JNE     L2
2055                         CMP     DX,[CS:]disp[DI]
2056                 L2:     LOOPNE  L1
2057              */
2058 
2059             const int mod = (disp > 127) ? 2 : 1;         // displacement size
2060             code *cloop = genc2(null,0xE0,0,-7 - mod - csseg);   // LOOPNE scasw
2061             cdb.gen1(0xAF);                                      // SCASW
2062             code_orflag(cdb.last(),CFtarg2);                     // target of jump
2063             genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop
2064                                                                  // CMP DX,[CS:]disp[DI]
2065             cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp);
2066             cdb.last().Iflags |= csseg ? CFcs : 0;              // possible seg override
2067             cdb.append(cloop);
2068             disp += ncases * _tysize[TYint];           // skip over msw table
2069         }
2070         else
2071         {
2072             cdb.gen1(0xF2);              // REPNE
2073             cdb.gen1(0xAF);              // SCASW
2074         }
2075         genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
2076         const int mod = (disp > 127) ? 2 : 1;     // 1 or 2 byte displacement
2077         if (csseg)
2078             cdb.gen1(SEGCS);            // table is in code segment
2079 
2080         if (config.flags3 & CFG3pic &&
2081             config.exe & EX_posix)
2082         {                               // ADD EDX,(ncases-1)*2[EDI]
2083             cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp);
2084                                         // JMP EDX
2085             cdb.gen2(0xFF,modregrm(3,4,DX));
2086         }
2087 
2088         if (!(config.flags3 & CFG3pic))
2089         {                               // JMP (ncases-1)*2[DI]
2090             cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
2091             cdb.last().Iflags |= csseg ? CFcs : 0;
2092         }
2093         b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr);
2094         //assert(b.Bcode);
2095         cgstate.stackclean--;
2096         return;
2097     }
2098 }
2099 
2100 /******************************
2101  * Output data block for a jump table (BCjmptab).
2102  * The 'holes' in the table get filled with the
2103  * default label.
2104  */
2105 
2106 @trusted
2107 void outjmptab(block *b)
2108 {
2109     if (JMPJMPTABLE && I32)
2110         return;
2111 
2112     targ_llong *p = b.Bswitch;               // pointer to case data
2113     size_t ncases = cast(size_t)*p++;        // number of cases
2114 
2115     /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1]
2116      * Must be same computation as used in doswitch().
2117      */
2118     targ_llong vmax = MINLL;                 // smallest possible llong
2119     targ_llong vmin = MAXLL;                 // largest possible llong
2120     for (size_t n = 0; n < ncases; n++)      // find min case value
2121     {   targ_llong val = p[n];
2122         if (val > vmax) vmax = val;
2123         if (val < vmin) vmin = val;
2124     }
2125     if (vmin > 0 && vmin <= _tysize[TYint])
2126         vmin = 0;
2127     assert(vmin <= vmax);
2128 
2129     /* Segment and offset into which the jump table will be emitted
2130      */
2131     int jmpseg = objmod.jmpTableSegment(funcsym_p);
2132     targ_size_t *poffset = &Offset(jmpseg);
2133 
2134     /* Align start of jump table
2135      */
2136     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
2137     objmod.lidata(jmpseg,*poffset,alignbytes);
2138     assert(*poffset == b.Btableoffset);        // should match precomputed value
2139 
2140     Symbol *gotsym = null;
2141     targ_size_t def = b.nthSucc(0).Boffset;  // default address
2142     for (targ_llong u = vmin; ; u++)
2143     {   targ_size_t targ = def;                     // default
2144         for (size_t n = 0; n < ncases; n++)
2145         {       if (p[n] == u)
2146                 {       targ = b.nthSucc(cast(int)(n + 1)).Boffset;
2147                         break;
2148                 }
2149         }
2150         if (config.exe & (EX_LINUX64 | EX_FREEBSD64 | EX_OPENBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS64))
2151         {
2152             if (config.flags3 & CFG3pic)
2153             {
2154                 objmod.reftodatseg(jmpseg,*poffset,cast(targ_size_t)(targ + (u - vmin) * 4),funcsym_p.Sseg,CFswitch);
2155                 *poffset += 4;
2156             }
2157             else
2158             {
2159                 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch);
2160                 *poffset += 8;
2161             }
2162         }
2163         else if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS))
2164         {
2165             if (config.flags3 & CFG3pic)
2166             {
2167                 assert(config.flags & CFGromable);
2168                 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_
2169                 if (!gotsym)
2170                     gotsym = Obj.getGOTsym();
2171                 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch);
2172             }
2173             else
2174                 objmod.reftocodeseg(jmpseg,*poffset,targ);
2175             *poffset += 4;
2176         }
2177         else if (config.exe & (EX_OSX | EX_OSX64))
2178         {
2179             targ_size_t val;
2180             if (I64)
2181                 val = targ - b.Btableoffset;
2182             else
2183                 val = targ - b.Btablebase;
2184             objmod.write_bytes(SegData[jmpseg],4,&val);
2185         }
2186         else
2187         {
2188             if (I64)
2189             {
2190                 targ_size_t val = targ - b.Btableoffset;
2191                 objmod.write_bytes(SegData[jmpseg],4,&val);
2192             }
2193             else
2194             {
2195                 objmod.reftocodeseg(jmpseg,*poffset,targ);
2196                 *poffset += tysize(TYnptr);
2197             }
2198         }
2199 
2200         if (u == vmax)                  // for case that (vmax == ~0)
2201             break;
2202     }
2203 }
2204 
2205 
2206 /******************************
2207  * Output data block for a switch table.
2208  * Two consecutive tables, the first is the case value table, the
2209  * second is the address table.
2210  */
2211 
2212 @trusted
2213 void outswitab(block *b)
2214 {
2215     //printf("outswitab()\n");
2216     targ_llong *p = b.Bswitch;        // pointer to case data
2217     uint ncases = cast(uint)*p++;     // number of cases
2218 
2219     const int seg = objmod.jmpTableSegment(funcsym_p);
2220     targ_size_t *poffset = &Offset(seg);
2221     targ_size_t offset = *poffset;
2222     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
2223     objmod.lidata(seg,*poffset,alignbytes);  // any alignment bytes necessary
2224     assert(*poffset == offset + alignbytes);
2225 
2226     uint sz = _tysize[TYint];
2227     assert(SegData[seg].SDseg == seg);
2228     for (uint n = 0; n < ncases; n++)          // send out value table
2229     {
2230         //printf("\tcase %d, offset = x%x\n", n, *poffset);
2231         objmod.write_bytes(SegData[seg],sz,p);
2232         p++;
2233     }
2234     offset += alignbytes + sz * ncases;
2235     assert(*poffset == offset);
2236 
2237     if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr)))
2238     {
2239         // Send out MSW table
2240         p -= ncases;
2241         for (uint n = 0; n < ncases; n++)
2242         {
2243             targ_size_t val = cast(targ_size_t)MSREG(*p);
2244             p++;
2245             objmod.write_bytes(SegData[seg],REGSIZE,&val);
2246         }
2247         offset += REGSIZE * ncases;
2248         assert(*poffset == offset);
2249     }
2250 
2251     list_t bl = b.Bsucc;
2252     for (uint n = 0; n < ncases; n++)          // send out address table
2253     {
2254         bl = list_next(bl);
2255         objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset);
2256         *poffset += tysize(TYnptr);
2257     }
2258     assert(*poffset == offset + ncases * tysize(TYnptr));
2259 }
2260 
2261 /*****************************
2262  * Return a jump opcode relevant to the elem for a JMP true.
2263  */
2264 
2265 @trusted
2266 int jmpopcode(elem *e)
2267 {
2268     //printf("jmpopcode()\n"); elem_print(e);
2269     tym_t tym;
2270     int zero,i,jp,op;
2271     static immutable ubyte[6][2][2] jops =
2272     [   /* <=  >   <   >=  ==  !=    <=0 >0  <0  >=0 ==0 !=0    */
2273        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed   */
2274        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */
2275 /+
2276        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real     */
2277        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087     */
2278        [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R   */
2279 +/
2280     ];
2281 
2282     enum
2283     {
2284         XP     = (JP  << 8),
2285         XNP    = (JNP << 8),
2286     }
2287     static immutable uint[26][1] jfops =
2288     /*   le     gt lt     ge  eqeq    ne     unord lg  leg  ule ul uge  */
2289     [
2290       [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP,   JNE,JNP, JBE,JC,XP|JAE,
2291 
2292     /*  ug    ue ngt nge nlt    nle    ord nlg nleg nule nul nuge    nug     nue */
2293         XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP,  JA,  JNC,XNP|JB, XNP|JBE,JNE        ], /* 8087     */
2294     ];
2295 
2296     assert(e);
2297     while (e.Eoper == OPcomma ||
2298         /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where  */
2299         /* we decide if mPSW is passed on when evaluating E2 or not.    */
2300          (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper)))
2301     {
2302         e = e.EV.E2;                      /* right operand determines it  */
2303     }
2304 
2305     op = e.Eoper;
2306     tym_t tymx = tybasic(e.Ety);
2307     bool needsNanCheck = tyfloating(tymx) && config.inline8087 &&
2308         (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
2309          tymx == TYcdouble || tymx == TYcfloat ||
2310          (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) ||
2311          op == OPind ||
2312          (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS))));
2313 
2314     if (!needsNanCheck)
2315     {
2316         /* If e is in an XMM register, need to use XP.
2317          * Match same test in loaddata()
2318          */
2319         Symbol* s;
2320         needsNanCheck = e.Eoper == OPvar &&
2321             (s = e.EV.Vsym).Sfl == FLreg &&
2322              s.Sregm & XMMREGS &&
2323              (tymx == TYfloat || tymx == TYifloat || tymx == TYdouble || tymx ==TYidouble);
2324     }
2325 
2326     if (e.Ecount != e.Ecomsub)          // comsubs just get Z bit set
2327     {
2328         if (needsNanCheck) // except for floating point values that need a NaN check
2329             return XP|JNE;
2330         else
2331             return JNE;
2332     }
2333     if (!OTrel(op))                       // not relational operator
2334     {
2335         if (needsNanCheck)
2336             return XP|JNE;
2337 
2338         if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; }
2339         if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; }
2340         if (op == OPu8_16) op = e.EV.E1.Eoper;
2341         return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE;
2342     }
2343 
2344     if (e.EV.E2.Eoper == OPconst)
2345         zero = !boolres(e.EV.E2);
2346     else
2347         zero = 0;
2348 
2349     tym = e.EV.E1.Ety;
2350     if (tyfloating(tym))
2351     {
2352 static if (1)
2353 {
2354         i = 0;
2355         if (config.inline8087)
2356         {   i = 1;
2357 
2358 static if (1)
2359 {
2360             if (rel_exception(op) || config.flags4 & CFG4fastfloat)
2361             {
2362                 const bool NOSAHF = (I64 || config.fpxmmregs);
2363                 if (zero)
2364                 {
2365                     if (NOSAHF)
2366                         op = swaprel(op);
2367                 }
2368                 else if (NOSAHF)
2369                     op = swaprel(op);
2370                 else if (cmporder87(e.EV.E2))
2371                     op = swaprel(op);
2372                 else
2373                 { }
2374             }
2375             else
2376             {
2377                 if (zero && config.target_cpu < TARGET_80386)
2378                 { }
2379                 else
2380                     op = swaprel(op);
2381             }
2382 }
2383 else
2384 {
2385             if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
2386                 op = swaprel(op);
2387             else if (!zero &&
2388                 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
2389                 /* compare is reversed */
2390                 op = swaprel(op);
2391 }
2392         }
2393         jp = jfops[0][op - OPle];
2394         goto L1;
2395 }
2396 else
2397 {
2398         i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2;
2399 }
2400     }
2401     else if (tyuns(tym) || tyuns(e.EV.E2.Ety))
2402         i = 1;
2403     else if (tyintegral(tym) || typtr(tym))
2404         i = 0;
2405     else
2406     {
2407         debug
2408         elem_print(e);
2409         printf("%s\n", tym_str(tym));
2410         assert(0);
2411     }
2412 
2413     jp = jops[i][zero][op - OPle];        /* table starts with OPle       */
2414 
2415     /* Try to rewrite uint comparisons so they rely on just the Carry flag
2416      */
2417     if (i == 1 && (jp == JA || jp == JBE) &&
2418         (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst))
2419     {
2420         jp = (jp == JA) ? JC : JNC;
2421     }
2422 
2423 L1:
2424     debug
2425     if ((jp & 0xF0) != 0x70)
2426     {
2427         printf("%s i %d zero %d op x%x jp x%x\n",oper_str(op),i,zero,op,jp);
2428     }
2429 
2430     assert((jp & 0xF0) == 0x70);
2431     return jp;
2432 }
2433 
2434 /**********************************
2435  * Append code to cdb which validates pointer described by
2436  * addressing mode in *pcs. Modify addressing mode in *pcs.
2437  * Params:
2438  *    cdb = append generated code to this
2439  *    pcs = original addressing mode to be updated
2440  *    keepmsk = mask of registers we must not destroy or use
2441  *              if (keepmsk & RMstore), this will be only a store operation
2442  *              into the lvalue
2443  */
2444 
2445 @trusted
2446 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk)
2447 {
2448     ubyte sib;
2449     reg_t reg;
2450     uint flagsave;
2451 
2452     assert(!I64);
2453     if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2454         return;         // not designed to deal with 48 bit far pointers
2455 
2456     ubyte rm = pcs.Irm;
2457     assert(!(rm & 0x40));       // no disp8 or reg addressing modes
2458 
2459     // If the addressing mode is already a register
2460     reg = rm & 7;
2461     if (I16)
2462     {   static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ];
2463 
2464         reg = imode[reg];               // convert [SI] to SI, etc.
2465     }
2466     regm_t idxregs = mask(reg);
2467     if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) ||
2468         !(idxregs & ALLREGS)
2469        )
2470     {
2471         // Load the offset into a register, so we can push the address
2472         regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
2473         assert(idxregs2);
2474         allocreg(cdb,&idxregs2,&reg,TYoffset);
2475 
2476         const opsave = pcs.Iop;
2477         flagsave = pcs.Iflags;
2478         pcs.Iop = LEA;
2479         pcs.Irm |= modregrm(0,reg,0);
2480         pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs);        // no prefix bytes needed
2481         cdb.gen(pcs);                 // LEA reg,EA
2482 
2483         pcs.Iflags = flagsave;
2484         pcs.Iop = opsave;
2485     }
2486 
2487     // registers destroyed by the function call
2488     //used = (mBP | ALLREGS | mES) & ~fregsaved;
2489     regm_t used = 0;                           // much less code generated this way
2490 
2491     code *cs2 = null;
2492     regm_t tosave = used & (keepmsk | idxregs);
2493     for (int i = 0; tosave; i++)
2494     {
2495         regm_t mi = mask(i);
2496 
2497         assert(i < REGMAX);
2498         if (mi & tosave)        /* i = register to save                 */
2499         {
2500             int push,pop;
2501 
2502             stackchanged = 1;
2503             if (i == ES)
2504             {   push = 0x06;
2505                 pop = 0x07;
2506             }
2507             else
2508             {   push = 0x50 + i;
2509                 pop = push | 8;
2510             }
2511             cdb.gen1(push);                     // PUSH i
2512             cs2 = cat(gen1(null,pop),cs2);      // POP i
2513             tosave &= ~mi;
2514         }
2515     }
2516 
2517     // For 16 bit models, push a far pointer
2518     if (I16)
2519     {
2520         int segreg;
2521 
2522         switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2523         {   case CFes:  segreg = 0x06;  break;
2524             case CFss:  segreg = 0x16;  break;
2525             case CFcs:  segreg = 0x0E;  break;
2526             case 0:     segreg = 0x1E;  break;  // DS
2527             default:
2528                 assert(0);
2529         }
2530 
2531         // See if we should default to SS:
2532         // (Happens when BP is part of the addressing mode)
2533         if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
2534             rm & 2 && (rm & 7) != 7)
2535         {
2536             segreg = 0x16;
2537             if (config.wflags & WFssneds)
2538                 pcs.Iflags |= CFss;    // because BP won't be there anymore
2539         }
2540         cdb.gen1(segreg);               // PUSH segreg
2541     }
2542 
2543     cdb.gen1(0x50 + reg);               // PUSH reg
2544 
2545     // Rewrite the addressing mode in *pcs so it is just 0[reg]
2546     setaddrmode(pcs, idxregs);
2547     pcs.IFL1 = FLoffset;
2548     pcs.IEV1.Vuns = 0;
2549 
2550     // Call the validation function
2551     {
2552         makeitextern(getRtlsym(RTLSYM.PTRCHK));
2553 
2554         used &= ~(keepmsk | idxregs);           // regs destroyed by this exercise
2555         getregs(cdb,used);
2556                                                 // CALL __ptrchk
2557         cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.PTRCHK));
2558     }
2559 
2560     cdb.append(cs2);
2561 }
2562 
2563 /***********************************
2564  * Determine if BP can be used as a general purpose register.
2565  * Note parallels between this routine and prolog().
2566  * Returns:
2567  *      0       can't be used, needed for frame
2568  *      mBP     can be used
2569  */
2570 
2571 @trusted
2572 regm_t cod3_useBP()
2573 {
2574     tym_t tym;
2575     tym_t tyf;
2576 
2577     // Note that DOSX memory model cannot use EBP as a general purpose
2578     // register, as SS != DS.
2579     if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
2580         goto Lcant;
2581 
2582     if (anyiasm)
2583         goto Lcant;
2584 
2585     tyf = funcsym_p.ty();
2586     if (tyf & mTYnaked)                 // if no prolog/epilog for function
2587         goto Lcant;
2588 
2589     if (funcsym_p.Sfunc.Fflags3 & Ffakeeh)
2590     {
2591         goto Lcant;                     // need consistent stack frame
2592     }
2593 
2594     tym = tybasic(tyf);
2595     if (tym == TYifunc)
2596         goto Lcant;
2597 
2598     stackoffsets(globsym, true);                // estimate stack offsets
2599     localsize = Auto.offset + Fast.offset;                // an estimate only
2600 //    if (localsize)
2601     {
2602         if (!(config.flags4 & CFG4speed) ||
2603             config.target_cpu < TARGET_Pentium ||
2604             tyfarfunc(tym) ||
2605             config.flags & CFGstack ||
2606             localsize >= 0x100 ||       // arbitrary value < 0x1000
2607             (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
2608             calledFinally ||
2609             Alloca.size
2610            )
2611             goto Lcant;
2612     }
2613     return mBP;
2614 
2615 Lcant:
2616     return 0;
2617 }
2618 
2619 /*************************************************
2620  * Generate code segment to be used later to restore a cse
2621  */
2622 
2623 @trusted
2624 bool cse_simple(code *c, elem *e)
2625 {
2626     regm_t regm;
2627     reg_t reg;
2628     int sz = tysize(e.Ety);
2629 
2630     if (!I16 &&                                  // don't bother with 16 bit code
2631         e.Eoper == OPadd &&
2632         sz == REGSIZE &&
2633         e.EV.E2.Eoper == OPconst &&
2634         e.EV.E1.Eoper == OPvar &&
2635         isregvar(e.EV.E1,&regm,&reg) &&
2636         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2637        )
2638     {
2639         memset(c,0,(*c).sizeof);
2640 
2641         // Make this an LEA instruction
2642         c.Iop = LEA;
2643         buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns);
2644         if (I64)
2645         {   if (sz == 8)
2646                 c.Irex |= REX_W;
2647         }
2648 
2649         return true;
2650     }
2651     else if (e.Eoper == OPind &&
2652         sz <= REGSIZE &&
2653         e.EV.E1.Eoper == OPvar &&
2654         isregvar(e.EV.E1,&regm,&reg) &&
2655         (I32 || I64 || regm & IDXREGS) &&
2656         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2657        )
2658     {
2659         memset(c,0,(*c).sizeof);
2660 
2661         // Make this a MOV instruction
2662         c.Iop = (sz == 1) ? 0x8A : 0x8B;       // MOV reg,EA
2663         buildEA(c,reg,-1,1,0);
2664         if (sz == 2 && I32)
2665             c.Iflags |= CFopsize;
2666         else if (I64)
2667         {   if (sz == 8)
2668                 c.Irex |= REX_W;
2669         }
2670 
2671         return true;
2672     }
2673     return false;
2674 }
2675 
2676 /**************************
2677  * Store `reg` to the common subexpression save area in index `slot`.
2678  * Params:
2679  *      cdb = where to write code to
2680  *      tym = type of value that's in `reg`
2681  *      reg = register to save
2682  *      slot = index into common subexpression save area
2683  */
2684 @trusted
2685 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2686 {
2687     // MOV slot[BP],reg
2688     if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES
2689     {
2690         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2691         const op = xmmstore(tym, aligned);
2692         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2693         return;
2694     }
2695     opcode_t op = STO;              // normal mov
2696     if (reg == ES)
2697     {
2698         reg = 0;            // the real reg number
2699         op = 0x8C;          // segment reg mov
2700     }
2701     cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot);
2702     if (I64)
2703         code_orrex(cdb.last(), REX_W);
2704 }
2705 
2706 @trusted
2707 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot)
2708 {
2709     // CMP slot[BP],0
2710     cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM),
2711                 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0);
2712     if ((I64 || I32) && sz == 2)
2713         cdb.last().Iflags |= CFopsize;
2714     if (I64 && sz == 8)
2715         code_orrex(cdb.last(), REX_W);
2716 }
2717 
2718 @trusted
2719 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2720 {
2721     // MOV reg,slot[BP]
2722     if (isXMMreg(reg) && config.fpxmmregs)
2723     {
2724         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2725         const op = xmmload(tym, aligned);
2726         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2727         return;
2728     }
2729     opcode_t op = LOD;
2730     if (reg == ES)
2731     {
2732         op = 0x8E;
2733         reg = 0;
2734     }
2735     cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot);
2736     if (I64)
2737         code_orrex(cdb.last(), REX_W);
2738 }
2739 
2740 /***************************************
2741  * Gen code for OPframeptr
2742  */
2743 
2744 @trusted
2745 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2746 {
2747     regm_t retregs = *pretregs & allregs;
2748     if  (!retregs)
2749         retregs = allregs;
2750     reg_t reg;
2751     allocreg(cdb,&retregs, &reg, TYint);
2752 
2753     code cs;
2754     cs.Iop = ESCAPE | ESCframeptr;
2755     cs.Iflags = 0;
2756     cs.Irex = 0;
2757     cs.Irm = cast(ubyte)reg;
2758     cdb.gen(&cs);
2759     fixresult(cdb,e,retregs,pretregs);
2760 }
2761 
2762 /***************************************
2763  * Gen code for load of _GLOBAL_OFFSET_TABLE_.
2764  * This value gets cached in the local variable 'localgot'.
2765  */
2766 
2767 @trusted
2768 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2769 {
2770     if (config.exe & (EX_OSX | EX_OSX64))
2771     {
2772         regm_t retregs = *pretregs & allregs;
2773         if  (!retregs)
2774             retregs = allregs;
2775         reg_t reg;
2776         allocreg(cdb,&retregs, &reg, TYnptr);
2777 
2778         cdb.genc(CALL,0,0,0,FLgot,0);     //     CALL L1
2779         cdb.gen1(0x58 + reg);             // L1: POP reg
2780 
2781         fixresult(cdb,e,retregs,pretregs);
2782     }
2783     else if (config.exe & EX_posix)
2784     {
2785         regm_t retregs = *pretregs & allregs;
2786         if  (!retregs)
2787             retregs = allregs;
2788         reg_t reg;
2789         allocreg(cdb,&retregs, &reg, TYnptr);
2790 
2791         cdb.genc2(CALL,0,0);        //     CALL L1
2792         cdb.gen1(0x58 + reg);       // L1: POP reg
2793 
2794                                     //     ADD reg,_GLOBAL_OFFSET_TABLE_+3
2795         Symbol *gotsym = Obj.getGOTsym();
2796         cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym);
2797         /* Because the 2:3 offset from L1: is hardcoded,
2798          * this sequence of instructions must not
2799          * have any instructions in between,
2800          * so set CFvolatile to prevent the scheduler from rearranging it.
2801          */
2802         code *cgot = cdb.last();
2803         cgot.Iflags = CFoff | CFvolatile;
2804         cgot.IEV2.Voffset = (reg == AX) ? 2 : 3;
2805 
2806         makeitextern(gotsym);
2807         fixresult(cdb,e,retregs,pretregs);
2808     }
2809     else
2810         assert(0);
2811 }
2812 
2813 /**************************************************
2814  * Load contents of localgot into EBX.
2815  */
2816 
2817 @trusted
2818 void load_localgot(ref CodeBuilder cdb)
2819 {
2820     if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // note: I32 only
2821     {
2822         if (config.flags3 & CFG3pic)
2823         {
2824             if (localgot && !(localgot.Sflags & SFLdead))
2825             {
2826                 localgot.Sflags &= ~GTregcand;     // because this hack doesn't work with reg allocator
2827                 elem *e = el_var(localgot);
2828                 regm_t retregs = mBX;
2829                 codelem(cdb,e,&retregs,false);
2830                 el_free(e);
2831             }
2832             else
2833             {
2834                 elem *e = el_long(TYnptr, 0);
2835                 e.Eoper = OPgot;
2836                 regm_t retregs = mBX;
2837                 codelem(cdb,e,&retregs,false);
2838                 el_free(e);
2839             }
2840         }
2841     }
2842 }
2843 
2844 /*****************************
2845  * Returns:
2846  *      # of bytes stored
2847  */
2848 
2849 
2850 @trusted
2851 int obj_namestring(char *p,const(char)* name)
2852 {
2853     size_t len = strlen(name);
2854     if (len > 255)
2855     {
2856         short *ps = cast(short *)p;
2857         p[0] = 0xFF;
2858         p[1] = 0;
2859         ps[1] = cast(short)len;
2860         memcpy(p + 4,name,len);
2861         const int ONS_OHD = 4;           // max # of extra bytes added by obj_namestring()
2862         len += ONS_OHD;
2863     }
2864     else
2865     {
2866         p[0] = cast(char)len;
2867         memcpy(p + 1,name,len);
2868         len++;
2869     }
2870     return cast(int)len;
2871 }
2872 
2873 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg)
2874 {
2875     return cdb.gen2(op,modregxrmx(3,dstreg,srcreg));
2876 }
2877 
2878 void gentstreg(ref CodeBuilder cdb, uint t)
2879 {
2880     cdb.gen2(0x85,modregxrmx(3,t,t));   // TEST t,t
2881     code_orflag(cdb.last(),CFpsw);
2882 }
2883 
2884 void genpush(ref CodeBuilder cdb, reg_t reg)
2885 {
2886     cdb.gen1(0x50 + (reg & 7));
2887     if (reg & 8)
2888         code_orrex(cdb.last(), REX_B);
2889 }
2890 
2891 void genpop(ref CodeBuilder cdb, reg_t reg)
2892 {
2893     cdb.gen1(0x58 + (reg & 7));
2894     if (reg & 8)
2895         code_orrex(cdb.last(), REX_B);
2896 }
2897 
2898 /**************************
2899  * Generate a MOV to,from register instruction.
2900  * Smart enough to dump redundant register moves, and segment
2901  * register moves.
2902  */
2903 
2904 code *genmovreg(uint to,uint from)
2905 {
2906     CodeBuilder cdb; cdb.ctor();
2907     genmovreg(cdb, to, from);
2908     return cdb.finish();
2909 }
2910 
2911 void genmovreg(ref CodeBuilder cdb,uint to,uint from)
2912 {
2913     genmovreg(cdb, to, from, TYMAX);
2914 }
2915 
2916 @trusted
2917 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym)
2918 {
2919     // register kind. ex: GPR,XMM,SEG
2920     static uint _K(uint reg)
2921     {
2922         switch (reg)
2923         {
2924         case ES:                   return ES;
2925         case XMM15:
2926         case XMM0: .. case XMM7:   return XMM0;
2927         case AX:   .. case R15:    return AX;
2928         default:                   return reg;
2929         }
2930     }
2931 
2932     // kind combination (order kept)
2933     static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); }
2934 
2935     if (to != from)
2936     {
2937         if (tym == TYMAX) tym = TYsize_t; // avoid register slicing
2938         switch (_X(to, from))
2939         {
2940             case _X(AX, AX):
2941                 genregs(cdb, 0x89, from, to);    // MOV to,from
2942                 if (I64 && tysize(tym) >= 8)
2943                     code_orrex(cdb.last(), REX_W);
2944                 break;
2945 
2946             case _X(XMM0, XMM0):             // MOVD/Q to,from
2947                 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0);
2948                 checkSetVex(cdb.last(), tym);
2949                 break;
2950 
2951             case _X(AX, XMM0):               // MOVD/Q to,from
2952                 genregs(cdb, STOD, from-XMM0, to);
2953                 if (I64 && tysize(tym) >= 8)
2954                     code_orrex(cdb.last(), REX_W);
2955                 checkSetVex(cdb.last(), tym);
2956                 break;
2957 
2958             case _X(XMM0, AX):               // MOVD/Q to,from
2959                 genregs(cdb, LODD, to-XMM0, from);
2960                 if (I64 && tysize(tym) >= 8)
2961                     code_orrex(cdb.last(),  REX_W);
2962                 checkSetVex(cdb.last(), tym);
2963                 break;
2964 
2965             case _X(ES, AX):
2966                 assert(tysize(tym) <= REGSIZE);
2967                 genregs(cdb, 0x8E, 0, from);
2968                 break;
2969 
2970             case _X(AX, ES):
2971                 assert(tysize(tym) <= REGSIZE);
2972                 genregs(cdb, 0x8C, 0, to);
2973                 break;
2974 
2975             default:
2976                 debug printf("genmovreg(to = %s, from = %s)\n"
2977                     , regm_str(mask(to)), regm_str(mask(from)));
2978                 assert(0);
2979         }
2980     }
2981 }
2982 
2983 /***************************************
2984  * Generate immediate multiply instruction for r1=r2*imm.
2985  * Optimize it into LEA's if we can.
2986  */
2987 
2988 @trusted
2989 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm)
2990 {
2991     // These optimizations should probably be put into pinholeopt()
2992     switch (imm)
2993     {
2994         case 1:
2995             genmovreg(cdb,r1,r2);
2996             break;
2997 
2998         case 5:
2999         {
3000             code cs;
3001             cs.Iop = LEA;
3002             cs.Iflags = 0;
3003             cs.Irex = 0;
3004             buildEA(&cs,r2,r2,4,0);
3005             cs.orReg(r1);
3006             cdb.gen(&cs);
3007             break;
3008         }
3009 
3010         default:
3011             cdb.genc2(0x69,modregxrmx(3,r1,r2),imm);    // IMUL r1,r2,imm
3012             break;
3013     }
3014 }
3015 
3016 /******************************
3017  * Load CX with the value of _AHSHIFT.
3018  */
3019 
3020 void genshift(ref CodeBuilder cdb)
3021 {
3022     version (SCPP)
3023     {
3024         // Set up ahshift to trick ourselves into giving the right fixup,
3025         // which must be seg-relative, external frame, external target.
3026         cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM.AHSHIFT));
3027         cdb.last().Iflags |= CFoff;
3028     }
3029     else
3030         assert(0);
3031 }
3032 
3033 /******************************
3034  * Move constant value into reg.
3035  * Take advantage of existing values in registers.
3036  * If flags & mPSW
3037  *      set flags based on result
3038  * Else if flags & 8
3039  *      do not disturb flags
3040  * Else
3041  *      don't care about flags
3042  * If flags & 1 then byte move
3043  * If flags & 2 then short move (for I32 and I64)
3044  * If flags & 4 then don't disturb unused portion of register
3045  * If flags & 16 then reg is a byte register AL..BH
3046  * If flags & 64 (0x40) then 64 bit move (I64 only)
3047  * Returns:
3048  *      code (if any) generated
3049  */
3050 
3051 @trusted
3052 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags)
3053 {
3054     reg_t r;
3055     regm_t mreg;
3056 
3057     //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags);
3058 
3059     regm_t regm = regcon.immed.mval & mask(reg);
3060     targ_size_t regv = regcon.immed.value[reg];
3061 
3062     if (flags & 1)      // 8 bits
3063     {
3064         value &= 0xFF;
3065         regm &= BYTEREGS;
3066 
3067         // If we already have the right value in the right register
3068         if (regm && (regv & 0xFF) == value)
3069             goto L2;
3070 
3071         if (flags & 16 && reg & 4 &&    // if an H byte register
3072             regcon.immed.mval & mask(reg & 3) &&
3073             (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
3074             goto L2;
3075 
3076         /* Avoid byte register loads to avoid dependency stalls.
3077          */
3078         if ((I32 || I64) &&
3079             config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
3080             goto L3;
3081 
3082         // See if another register has the right value
3083         r = 0;
3084         for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
3085         {
3086             if (mreg & 1)
3087             {
3088                 if ((regcon.immed.value[r] & 0xFF) == value)
3089                 {
3090                     genregs(cdb,0x8A,reg,r);          // MOV regL,rL
3091                     if (I64 && reg >= 4 || r >= 4)
3092                         code_orrex(cdb.last(), REX);
3093                     goto L2;
3094                 }
3095                 if (!(I64 && reg >= 4) &&
3096                     r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
3097                 {
3098                     genregs(cdb,0x8A,reg,r | 4);      // MOV regL,rH
3099                     goto L2;
3100                 }
3101             }
3102             r++;
3103         }
3104 
3105         if (value == 0 && !(flags & 8))
3106         {
3107             if (!(flags & 4) &&                 // if we can set the whole register
3108                 !(flags & 16 && reg & 4))       // and reg is not an H register
3109             {
3110                 genregs(cdb,0x31,reg,reg);      // XOR reg,reg
3111                 regimmed_set(reg,value);
3112                 regv = 0;
3113             }
3114             else
3115                 genregs(cdb,0x30,reg,reg);      // XOR regL,regL
3116             flags &= ~mPSW;                     // flags already set by XOR
3117         }
3118         else
3119         {
3120             cdb.genc2(0xC6,modregrmx(3,0,reg),value);  // MOV regL,value
3121             if (reg >= 4 && I64)
3122             {
3123                 code_orrex(cdb.last(), REX);
3124             }
3125         }
3126     L2:
3127         if (flags & mPSW)
3128             genregs(cdb,0x84,reg,reg);            // TEST regL,regL
3129 
3130         if (regm)
3131             // Set just the 'L' part of the register value
3132             regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value);
3133         else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3))
3134             // Set just the 'H' part of the register value
3135             regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8));
3136         return;
3137     }
3138 L3:
3139     if (I16)
3140         value = cast(targ_short) value;             // sign-extend MSW
3141     else if (I32)
3142         value = cast(targ_int) value;
3143 
3144     if (!I16 && flags & 2)                      // load 16 bit value
3145     {
3146         value &= 0xFFFF;
3147         if (value && !(flags & mPSW))
3148         {
3149             cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value
3150             regimmed_set(reg, value);
3151             return;
3152         }
3153     }
3154 
3155     // If we already have the right value in the right register
3156     if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
3157     {
3158         if (flags & mPSW)
3159             gentstreg(cdb,reg);
3160     }
3161     else if (flags & 64 && regm && regv == value)
3162     {   // Look at the full 64 bits
3163         if (flags & mPSW)
3164         {
3165             gentstreg(cdb,reg);
3166             code_orrex(cdb.last(), REX_W);
3167         }
3168     }
3169     else
3170     {
3171         if (flags & mPSW)
3172         {
3173             switch (value)
3174             {
3175                 case 0:
3176                     genregs(cdb,0x31,reg,reg);
3177                     break;
3178 
3179                 case 1:
3180                     if (I64)
3181                         goto L4;
3182                     genregs(cdb,0x31,reg,reg);
3183                     goto inc;
3184 
3185                 case ~cast(targ_size_t)0:
3186                     if (I64)
3187                         goto L4;
3188                     genregs(cdb,0x31,reg,reg);
3189                     goto dec;
3190 
3191                 default:
3192                 L4:
3193                     if (flags & 64)
3194                     {
3195                         cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
3196                         gentstreg(cdb,reg);
3197                         code_orrex(cdb.last(), REX_W);
3198                     }
3199                     else
3200                     {
3201                         value &= 0xFFFFFFFF;
3202                         cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
3203                         gentstreg(cdb,reg);
3204                     }
3205                     break;
3206             }
3207         }
3208         else
3209         {
3210             // Look for single byte conversion
3211             if (regcon.immed.mval & mAX)
3212             {
3213                 if (I32)
3214                 {
3215                     if (reg == AX && value == cast(targ_short) regv)
3216                     {
3217                         cdb.gen1(0x98);               // CWDE
3218                         goto done;
3219                     }
3220                     if (reg == DX &&
3221                         value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
3222                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
3223                        )
3224                     {
3225                         cdb.gen1(0x99);               // CDQ
3226                         goto done;
3227                     }
3228                 }
3229                 else if (I16)
3230                 {
3231                     if (reg == AX &&
3232                         cast(targ_short) value == cast(byte) regv)
3233                     {
3234                         cdb.gen1(0x98);               // CBW
3235                         goto done;
3236                     }
3237 
3238                     if (reg == DX &&
3239                         cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) &&
3240                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
3241                        )
3242                     {
3243                         cdb.gen1(0x99);               // CWD
3244                         goto done;
3245                     }
3246                 }
3247             }
3248             if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
3249             {
3250                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
3251                 goto done;
3252             }
3253 
3254             if (!I64 && regm && !(flags & 8))
3255             {
3256                 if (regv + 1 == value ||
3257                     // Catch case of (0xFFFF+1 == 0) for 16 bit compiles
3258                     (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value))
3259                 {
3260                 inc:
3261                     cdb.gen1(0x40 + reg);     // INC reg
3262                     goto done;
3263                 }
3264                 if (regv - 1 == value)
3265                 {
3266                 dec:
3267                     cdb.gen1(0x48 + reg);     // DEC reg
3268                     goto done;
3269                 }
3270             }
3271 
3272             // See if another register has the right value
3273             r = 0;
3274             for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
3275             {
3276                 debug
3277                 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]);
3278 
3279                 if (mreg & 1 && regcon.immed.value[r] == value)
3280                 {
3281                     genmovreg(cdb,reg,r);
3282                     goto done;
3283                 }
3284                 r++;
3285             }
3286 
3287             if (value == 0 && !(flags & 8))
3288             {
3289                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
3290             }
3291             else
3292             {   // See if we can just load a byte
3293                 if (regm & BYTEREGS &&
3294                     !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
3295                    )
3296                 {
3297                     if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF))
3298                     {
3299                         movregconst(cdb,reg,value,(flags & 8) |4|1);  // load regL
3300                         return;
3301                     }
3302                     if (regm & (mAX|mBX|mCX|mDX) &&
3303                         (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) &&
3304                         !I64)
3305                     {
3306                         movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
3307                         return;
3308                     }
3309                 }
3310                 if (flags & 64)
3311                     cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
3312                 else
3313                 {
3314                     value &= 0xFFFFFFFF;
3315                     cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
3316                 }
3317             }
3318         }
3319     done:
3320         regimmed_set(reg,value);
3321     }
3322 }
3323 
3324 /**************************
3325  * Generate a jump instruction.
3326  */
3327 
3328 @trusted
3329 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ)
3330 {
3331     code cs;
3332     cs.Iop = op & 0xFF;
3333     cs.Iflags = 0;
3334     cs.Irex = 0;
3335     if (op != JMP && op != 0xE8)        // if not already long branch
3336           cs.Iflags = CFjmp16;          // assume long branch for op = 0x7x
3337     cs.IFL2 = cast(ubyte)fltarg;        // FLblock (or FLcode)
3338     cs.IEV2.Vblock = targ;              // target block (or code)
3339     if (fltarg == FLcode)
3340         (cast(code *)targ).Iflags |= CFtarg;
3341 
3342     if (config.flags4 & CFG4fastfloat)  // if fast floating point
3343     {
3344         cdb.gen(&cs);
3345         return;
3346     }
3347 
3348     switch (op & 0xFF00)                // look at second jump opcode
3349     {
3350         // The JP and JNP come from floating point comparisons
3351         case JP << 8:
3352             cdb.gen(&cs);
3353             cs.Iop = JP;
3354             cdb.gen(&cs);
3355             break;
3356 
3357         case JNP << 8:
3358         {
3359             // Do a JP around the jump instruction
3360             code *cnop = gennop(null);
3361             genjmp(cdb,JP,FLcode,cast(block *) cnop);
3362             cdb.gen(&cs);
3363             cdb.append(cnop);
3364             break;
3365         }
3366 
3367         case 1 << 8:                    // toggled no jump
3368         case 0 << 8:
3369             cdb.gen(&cs);
3370             break;
3371 
3372         default:
3373             debug
3374             printf("jop = x%x\n",op);
3375             assert(0);
3376     }
3377 }
3378 
3379 /*********************************************
3380  * Generate first part of prolog for interrupt function.
3381  */
3382 @trusted
3383 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf)
3384 {
3385     static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ];
3386     static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53,
3387                                     0x54,0x55,0x56,0x57,
3388                                     0x1E,0x06,0 ];
3389 
3390     immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
3391     do
3392         cdb.gen1(*p);
3393     while (*++p);
3394 
3395     genregs(cdb,0x8B,BP,SP);     // MOV BP,SP
3396     if (localsize)
3397         cod3_stackadj(cdb, cast(int)localsize);
3398 
3399     *tyf |= mTYloadds;
3400 }
3401 
3402 @trusted
3403 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds)
3404 {
3405     /* Determine if we need to reload DS        */
3406     if (tyf & mTYloadds)
3407     {
3408         if (!pushds)                           // if not already pushed
3409             cdb.gen1(0x1E);                    // PUSH DS
3410         spoff += _tysize[TYint];
3411         cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV  AX,DGROUP
3412         code *c = cdb.last();
3413         c.IEV2.Vseg = DATA;
3414         c.Iflags ^= CFseg | CFoff;            // turn off CFoff, on CFseg
3415         cdb.gen2(0x8E,modregrm(3,3,AX));       // MOV  DS,AX
3416         useregs(mAX);
3417     }
3418 
3419     if (tym == TYifunc)
3420         cdb.gen1(0xFC);                        // CLD
3421 }
3422 
3423 @trusted
3424 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds)
3425 {
3426     int wflags = config.wflags;
3427     if (wflags & WFreduced && !(*tyf & mTYexport))
3428     {   // reduced prolog/epilog for non-exported functions
3429         wflags &= ~(WFdgroup | WFds | WFss);
3430     }
3431 
3432     getregsNoSave(mAX);                     // should not have any value in AX
3433 
3434     int segreg;
3435     switch (wflags & (WFdgroup | WFds | WFss))
3436     {
3437         case WFdgroup:                      // MOV  AX,DGROUP
3438         {
3439             if (wflags & WFreduced)
3440                 *tyf &= ~mTYloadds;          // remove redundancy
3441             cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0);
3442             code *c = cdb.last();
3443             c.IEV2.Vseg = DATA;
3444             c.Iflags ^= CFseg | CFoff;     // turn off CFoff, on CFseg
3445             break;
3446         }
3447 
3448         case WFss:
3449             segreg = 2;                     // SS
3450             goto Lmovax;
3451 
3452         case WFds:
3453             segreg = 3;                     // DS
3454         Lmovax:
3455             cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
3456             if (wflags & WFds)
3457                 cdb.gen1(0x90);             // NOP
3458             break;
3459 
3460         case 0:
3461             break;
3462 
3463         default:
3464             debug
3465             printf("config.wflags = x%x\n",config.wflags);
3466             assert(0);
3467     }
3468     if (wflags & WFincbp)
3469         cdb.gen1(0x40 + BP);              // INC  BP
3470     cdb.gen1(0x50 + BP);                  // PUSH BP
3471     genregs(cdb,0x8B,BP,SP); // MOV  BP,SP
3472     if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
3473     {
3474         cdb.gen1(0x1E);                       // PUSH DS
3475         *pushds = true;
3476         BPoff = -REGSIZE;
3477     }
3478     if (wflags & (WFds | WFss | WFdgroup))
3479         cdb.gen2(0x8E,modregrm(3,3,AX));      // MOV  DS,AX
3480 }
3481 
3482 /**********************************************
3483  * Set up frame register.
3484  * Params:
3485  *      cdb        = write generated code here
3486  *      farfunc    = true if a far function
3487  *      enter      = set to true if ENTER instruction can be used, false otherwise
3488  *      xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer
3489  *      cfa_offset = set to frame pointer's offset from the CFA
3490  * Returns:
3491  *      generated code
3492  */
3493 @trusted
3494 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset)
3495 {
3496     //printf("prolog_frame\n");
3497     cfa_offset = 0;
3498 
3499     if (0 && config.exe == EX_WIN64)
3500     {
3501         // PUSH RBP
3502         // LEA RBP,0[RSP]
3503         cdb. gen1(0x50 + BP);
3504         cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0);
3505         enter = false;
3506         return;
3507     }
3508 
3509     if (config.wflags & WFincbp && farfunc)
3510         cdb.gen1(0x40 + BP);      // INC  BP
3511     if (config.target_cpu < TARGET_80286 ||
3512         config.exe & (EX_posix | EX_WIN64) ||
3513         !localsize ||
3514         config.flags & CFGstack ||
3515         (xlocalsize >= 0x1000 && config.exe & EX_flat) ||
3516         localsize >= 0x10000 ||
3517         (NTEXCEPTIONS == 2 &&
3518          (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) ||
3519         (config.target_cpu >= TARGET_80386 &&
3520          config.flags4 & CFG4speed)
3521        )
3522     {
3523         cdb.gen1(0x50 + BP);      // PUSH BP
3524         genregs(cdb,0x8B,BP,SP);      // MOV  BP,SP
3525         if (I64)
3526             code_orrex(cdb.last(), REX_W);   // MOV RBP,RSP
3527         if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes)
3528             // Don't reorder instructions, as dwarf CFA relies on it
3529             code_orflag(cdb.last(), CFvolatile);
3530 static if (NTEXCEPTIONS == 2)
3531 {
3532         if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))
3533         {
3534             nteh_prolog(cdb);
3535             int sz = nteh_contextsym_size();
3536             assert(sz != 0);        // should be 5*4, not 0
3537             xlocalsize -= sz;      // sz is already subtracted from ESP
3538                                     // by nteh_prolog()
3539         }
3540 }
3541         if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3542             config.ehmethod == EHmethod.EH_DWARF)
3543         {
3544             int off = 2 * REGSIZE;      // 1 for the return address + 1 for the PUSH EBP
3545             dwarf_CFA_set_loc(1);           // address after PUSH EBP
3546             dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
3547             dwarf_CFA_offset(BP, -off);       // EBP is at 0[ESP]
3548             dwarf_CFA_set_loc(I64 ? 4 : 3);   // address after MOV EBP,ESP
3549             /* Oddly, the CFA is not the same as the frame pointer,
3550              * which is why the offset of BP is set to 8
3551              */
3552             dwarf_CFA_set_reg_offset(BP, off);        // CFA is now 0[EBP]
3553             cfa_offset = off;  // remember the difference between the CFA and the frame pointer
3554         }
3555         enter = false;              /* do not use ENTER instruction */
3556     }
3557     else
3558         enter = true;
3559 }
3560 
3561 /**********************************************
3562  * Enforce stack alignment.
3563  * Input:
3564  *      cdb     code builder.
3565  * Returns:
3566  *      generated code
3567  */
3568 @trusted
3569 void prolog_stackalign(ref CodeBuilder cdb)
3570 {
3571     if (!enforcealign)
3572         return;
3573 
3574     const offset = (hasframe ? 2 : 1) * REGSIZE;   // 1 for the return address + 1 for the PUSH EBP
3575     if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN)
3576         cod3_stackalign(cdb, STACKALIGN);
3577 }
3578 
3579 @trusted
3580 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc)
3581 {
3582     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3583 
3584     bool check;
3585     if (config.exe & (EX_LINUX | EX_LINUX64))
3586         check = false;               // seems that Linux doesn't need to fault in stack pages
3587     else
3588         check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
3589             || (config.exe & (EX_windos & EX_flat) && xlocalsize >= 0x1000);
3590 
3591     if (check)
3592     {
3593         if (I16)
3594         {
3595             // BUG: Won't work if parameter is passed in AX
3596             movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize
3597             makeitextern(getRtlsym(RTLSYM.CHKSTK));
3598                                                     // CALL _chkstk
3599             cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.CHKSTK));
3600             useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM.CHKSTK).Sregsaved);
3601         }
3602         else
3603         {
3604             /* Watch out for 64 bit code where EDX is passed as a register parameter
3605              */
3606             reg_t reg = I64 ? R11 : DX;  // scratch register
3607 
3608             /*      MOV     EDX, xlocalsize/0x1000
3609              *  L1: SUB     ESP, 0x1000
3610              *      TEST    [ESP],ESP
3611              *      DEC     EDX
3612              *      JNE     L1
3613              *      SUB     ESP, xlocalsize % 0x1000
3614              */
3615             movregconst(cdb, reg, xlocalsize / 0x1000, false);
3616             cod3_stackadj(cdb, 0x1000);
3617             code_orflag(cdb.last(), CFtarg2);
3618             cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP));
3619             if (I64)
3620             {   cdb.gen2(0xFF, modregrmx(3,1,R11));   // DEC R11D
3621                 cdb.genc2(JNE,0,cast(targ_uns)-15);
3622             }
3623             else
3624             {   cdb.gen1(0x48 + DX);                  // DEC EDX
3625                 cdb.genc2(JNE,0,cast(targ_uns)-12);
3626             }
3627             regimmed_set(reg,0);             // reg is now 0
3628             cod3_stackadj(cdb, xlocalsize & 0xFFF);
3629             useregs(mask(reg));
3630         }
3631     }
3632     else
3633     {
3634         if (enter)
3635         {   // ENTER xlocalsize,0
3636             cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0);
3637             assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data
3638         }
3639         else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
3640         {
3641             cdb. gen1(0x50 + pushallocreg);    // PUSH AX
3642             // Do this to prevent an -x[EBP] to be moved in
3643             // front of the push.
3644             code_orflag(cdb.last(),CFvolatile);
3645             *pushalloc = true;
3646         }
3647         else
3648             cod3_stackadj(cdb, xlocalsize);
3649     }
3650 }
3651 
3652 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc)
3653 {
3654     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3655     if (xlocalsize == REGSIZE)
3656     {
3657         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3658         *pushalloc = true;
3659     }
3660     else if (xlocalsize == 2 * REGSIZE)
3661     {
3662         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3663         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3664         *pushalloc = true;
3665     }
3666     else
3667         cod3_stackadj(cdb, xlocalsize);
3668 }
3669 
3670 @trusted
3671 void prolog_setupalloca(ref CodeBuilder cdb)
3672 {
3673     //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n",
3674         //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment);
3675     // Set up magic parameter for alloca()
3676     // MOV -REGSIZE[BP],localsize - BPoff
3677     cdb.genc(0xC7,modregrm(2,0,BPRM),
3678             FLconst,Alloca.offset + BPoff,
3679             FLconst,localsize - BPoff);
3680     if (I64)
3681         code_orrex(cdb.last(), REX_W);
3682 }
3683 
3684 /**************************************
3685  * Save registers that the function destroys,
3686  * but that the ABI says should be preserved across
3687  * function calls.
3688  *
3689  * Emit Dwarf info for these saves.
3690  * Params:
3691  *      cdb = append generated instructions to this
3692  *      topush = mask of registers to push
3693  *      cfa_offset = offset of frame pointer from CFA
3694  */
3695 
3696 @trusted
3697 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset)
3698 {
3699     if (pushoffuse)
3700     {
3701         // Save to preallocated section in the stack frame
3702         int xmmtopush = popcnt(topush & XMMREGS);   // XMM regs take 16 bytes
3703         int gptopush = popcnt(topush) - xmmtopush;  // general purpose registers to save
3704         targ_size_t xmmoffset = pushoff + BPoff;
3705         if (!hasframe || enforcealign)
3706             xmmoffset += EBPtoESP;
3707         targ_size_t gpoffset = xmmoffset + xmmtopush * 16;
3708         while (topush)
3709         {
3710             reg_t reg = findreg(topush);
3711             topush &= ~mask(reg);
3712             if (isXMMreg(reg))
3713             {
3714                 if (hasframe && !enforcealign)
3715                 {
3716                     // MOVUPD xmmoffset[EBP],xmm
3717                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3718                 }
3719                 else
3720                 {
3721                     // MOVUPD xmmoffset[ESP],xmm
3722                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3723                 }
3724                 xmmoffset += 16;
3725             }
3726             else
3727             {
3728                 if (hasframe && !enforcealign)
3729                 {
3730                     // MOV gpoffset[EBP],reg
3731                     cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3732                 }
3733                 else
3734                 {
3735                     // MOV gpoffset[ESP],reg
3736                     cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3737                 }
3738                 if (I64)
3739                     code_orrex(cdb.last(), REX_W);
3740                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3741                     config.ehmethod == EHmethod.EH_DWARF)
3742                 {   // Emit debug_frame data giving location of saved register
3743                     code *c = cdb.finish();
3744                     pinholeopt(c, null);
3745                     dwarf_CFA_set_loc(calcblksize(c));  // address after save
3746                     dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset));
3747                     cdb.reset();
3748                     cdb.append(c);
3749                 }
3750                 gpoffset += REGSIZE;
3751             }
3752         }
3753     }
3754     else
3755     {
3756         while (topush)                      /* while registers to push      */
3757         {
3758             reg_t reg = findreg(topush);
3759             topush &= ~mask(reg);
3760             if (isXMMreg(reg))
3761             {
3762                 // SUB RSP,16
3763                 cod3_stackadj(cdb, 16);
3764                 // MOVUPD 0[RSP],xmm
3765                 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3766                 EBPtoESP += 16;
3767                 spoff += 16;
3768             }
3769             else
3770             {
3771                 genpush(cdb, reg);
3772                 EBPtoESP += REGSIZE;
3773                 spoff += REGSIZE;
3774                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3775                     config.ehmethod == EHmethod.EH_DWARF)
3776                 {   // Emit debug_frame data giving location of saved register
3777                     // relative to 0[EBP]
3778                     code *c = cdb.finish();
3779                     pinholeopt(c, null);
3780                     dwarf_CFA_set_loc(calcblksize(c));  // address after PUSH reg
3781                     dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset);
3782                     cdb.reset();
3783                     cdb.append(c);
3784                 }
3785             }
3786         }
3787     }
3788 }
3789 
3790 /**************************************
3791  * Undo prolog_saveregs()
3792  */
3793 
3794 @trusted
3795 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop)
3796 {
3797     debug
3798     if (topop & ~(XMMREGS | 0xFFFF))
3799         printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg));
3800 
3801     assert(!(topop & ~(XMMREGS | 0xFFFF)));
3802     if (pushoffuse)
3803     {
3804         // Save to preallocated section in the stack frame
3805         int xmmtopop = popcnt(topop & XMMREGS);   // XMM regs take 16 bytes
3806         int gptopop = popcnt(topop) - xmmtopop;   // general purpose registers to save
3807         targ_size_t xmmoffset = pushoff + BPoff;
3808         if (!hasframe || enforcealign)
3809             xmmoffset += EBPtoESP;
3810         targ_size_t gpoffset = xmmoffset + xmmtopop * 16;
3811         while (topop)
3812         {
3813             reg_t reg = findreg(topop);
3814             topop &= ~mask(reg);
3815             if (isXMMreg(reg))
3816             {
3817                 if (hasframe && !enforcealign)
3818                 {
3819                     // MOVUPD xmm,xmmoffset[EBP]
3820                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3821                 }
3822                 else
3823                 {
3824                     // MOVUPD xmm,xmmoffset[ESP]
3825                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3826                 }
3827                 xmmoffset += 16;
3828             }
3829             else
3830             {
3831                 if (hasframe && !enforcealign)
3832                 {
3833                     // MOV reg,gpoffset[EBP]
3834                     cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3835                 }
3836                 else
3837                 {
3838                     // MOV reg,gpoffset[ESP]
3839                     cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3840                 }
3841                 if (I64)
3842                     code_orrex(cdb.last(), REX_W);
3843                 gpoffset += REGSIZE;
3844             }
3845         }
3846     }
3847     else
3848     {
3849         reg_t reg = I64 ? XMM7 : DI;
3850         if (!(topop & XMMREGS))
3851             reg = R15;
3852         regm_t regm = 1 << reg;
3853 
3854         while (topop)
3855         {   if (topop & regm)
3856             {
3857                 if (isXMMreg(reg))
3858                 {
3859                     // MOVUPD xmm,0[RSP]
3860                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3861                     // ADD RSP,16
3862                     cod3_stackadj(cdb, -16);
3863                 }
3864                 else
3865                 {
3866                     cdb.gen1(0x58 + (reg & 7));         // POP reg
3867                     if (reg & 8)
3868                         code_orrex(cdb.last(), REX_B);
3869                 }
3870                 topop &= ~regm;
3871             }
3872             regm >>= 1;
3873             reg--;
3874         }
3875     }
3876 }
3877 
3878 version (SCPP)
3879 {
3880 @trusted
3881 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved)
3882 {
3883     Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_PRO_F : RTLSYM.TRACE_PRO_N);
3884     makeitextern(s);
3885     cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALL _trace
3886     if (!I16)
3887         code_orflag(cdb.last(),CFoff | CFselfrel);
3888     /* Embedding the function name inline after the call works, but it
3889      * makes disassembling the code annoying.
3890      */
3891     static if (ELFOBJ || MACHOBJ)
3892     {
3893         // Generate length prefixed name that is recognized by profiler
3894         size_t len = strlen(funcsym_p.Sident);
3895         char *buffer = cast(char *)malloc(len + 4);
3896         assert(buffer);
3897         if (len <= 254)
3898         {
3899             buffer[0] = len;
3900             memcpy(buffer + 1, funcsym_p.Sident, len);
3901             len++;
3902         }
3903         else
3904         {
3905             buffer[0] = 0xFF;
3906             buffer[1] = 0;
3907             buffer[2] = len & 0xFF;
3908             buffer[3] = len >> 8;
3909             memcpy(buffer + 4, funcsym_p.Sident, len);
3910             len += 4;
3911         }
3912         cdb.genasm(buffer, len);         // append func name
3913         free(buffer);
3914     }
3915     else
3916     {
3917         char [IDMAX+IDOHD+1] name = void;
3918         size_t len = objmod.mangle(funcsym_p,name.ptr);
3919         assert(len < name.length);
3920         cdb.genasm(name.ptr,len);             // append func name
3921     }
3922     *regsaved = s.Sregsaved;
3923 }
3924 }
3925 
3926 /******************************
3927  * Generate special varargs prolog for Posix 64 bit systems.
3928  * Params:
3929  *      cdb = sink for generated code
3930  *      sv = symbol for __va_argsave
3931  *      namedargs = registers that named parameters (not ... arguments) were passed in.
3932  */
3933 @trusted
3934 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs)
3935 {
3936     /* Generate code to move any arguments passed in registers into
3937      * the stack variable __va_argsave,
3938      * so we can reference it via pointers through va_arg().
3939      *   struct __va_argsave_t {
3940      *     size_t[6] regs;
3941      *     real[8] fpregs;
3942      *     uint offset_regs;
3943      *     uint offset_fpregs;
3944      *     void* stack_args;
3945      *     void* reg_args;
3946      *   }
3947      * The MOVAPS instructions seg fault if data is not aligned on
3948      * 16 bytes, so this gives us a nice check to ensure no mistakes.
3949         MOV     voff+0*8[RBP],EDI
3950         MOV     voff+1*8[RBP],ESI
3951         MOV     voff+2*8[RBP],RDX
3952         MOV     voff+3*8[RBP],RCX
3953         MOV     voff+4*8[RBP],R8
3954         MOV     voff+5*8[RBP],R9
3955         MOVZX   EAX,AL                      // AL = 0..8, # of XMM registers used
3956         SHL     EAX,2                       // 4 bytes for each MOVAPS
3957         LEA     R11,offset L2[RIP]
3958         SUB     R11,RAX
3959         LEA     RAX,voff+6*8+0x7F[RBP]
3960         JMP     R11d
3961         MOVAPS  -0x0F[RAX],XMM7             // only save XMM registers if actually used
3962         MOVAPS  -0x1F[RAX],XMM6
3963         MOVAPS  -0x2F[RAX],XMM5
3964         MOVAPS  -0x3F[RAX],XMM4
3965         MOVAPS  -0x4F[RAX],XMM3
3966         MOVAPS  -0x5F[RAX],XMM2
3967         MOVAPS  -0x6F[RAX],XMM1
3968         MOVAPS  -0x7F[RAX],XMM0
3969       L2:
3970         MOV     1[RAX],offset_regs          // set __va_argsave.offset_regs
3971         MOV     5[RAX],offset_fpregs        // set __va_argsave.offset_fpregs
3972         LEA     R11, Para.size+Para.offset[RBP]
3973         MOV     9[RAX],R11                  // set __va_argsave.stack_args
3974         SUB     RAX,6*8+0x7F                // point to start of __va_argsave
3975         MOV     6*8+8*16+4+4+8[RAX],RAX     // set __va_argsave.reg_args
3976     * RAX and R11 are destroyed.
3977     */
3978 
3979     /* Save registers into the voff area on the stack
3980      */
3981     targ_size_t voff = Auto.size + BPoff + sv.Soffset;  // EBP offset of start of sv
3982     const int vregnum = 6;
3983     const uint vsize = vregnum * 8 + 8 * 16;
3984 
3985     static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ];
3986 
3987     if (!hasframe || enforcealign)
3988         voff += EBPtoESP;
3989 
3990     for (int i = 0; i < vregnum; i++)
3991     {
3992         uint r = regs[i];
3993         if (!(mask(r) & namedargs))  // unnamed arguments would be the ... ones
3994         {
3995             uint ea = (REX_W << 16) | modregxrm(2,r,BPRM);
3996             if (!hasframe || enforcealign)
3997                 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
3998             cdb.genc1(0x89,ea,FLconst,voff + i*8);
3999         }
4000     }
4001 
4002     genregs(cdb,MOVZXb,AX,AX);                 // MOVZX EAX,AL
4003     cdb.genc2(0xC1,modregrm(3,4,AX),2);                     // SHL EAX,2
4004     int raxoff = cast(int)(voff+6*8+0x7F);
4005     uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A;
4006     if (!hasframe || enforcealign)
4007         L2offset += 1;                                      // +1 for sib byte
4008     // LEA R11,offset L2[RIP]
4009     cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset);
4010     genregs(cdb,0x29,AX,R11);                  // SUB R11,RAX
4011     code_orrex(cdb.last(), REX_W);
4012     // LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
4013     uint ea = (REX_W << 16) | modregrm(2,AX,BPRM);
4014     if (!hasframe || enforcealign)
4015         // add sib byte for [RSP] addressing
4016         ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
4017     cdb.genc1(LEA,ea,FLconst,raxoff);
4018     cdb.gen2(0xFF,modregrmx(3,4,R11));                      // JMP R11d
4019     for (int i = 0; i < 8; i++)
4020     {
4021         // MOVAPS -15-16*i[RAX],XMM7-i
4022         cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
4023     }
4024 
4025     /* Compute offset_regs and offset_fpregs
4026      */
4027     uint offset_regs = 0;
4028     uint offset_fpregs = vregnum * 8;
4029     for (int i = AX; i <= XMM7; i++)
4030     {
4031         regm_t m = mask(i);
4032         if (m & namedargs)
4033         {
4034             if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
4035                 offset_regs += 8;
4036             else if (m & XMMREGS)
4037                 offset_fpregs += 16;
4038             namedargs &= ~m;
4039             if (!namedargs)
4040                 break;
4041         }
4042     }
4043     // MOV 1[RAX],offset_regs
4044     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
4045 
4046     // MOV 5[RAX],offset_fpregs
4047     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
4048 
4049     // LEA R11, Para.size+Para.offset[RBP]
4050     ea = modregxrm(2,R11,BPRM);
4051     if (!hasframe)
4052         ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
4053     Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
4054     cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset);
4055 
4056     // MOV 9[RAX],R11
4057     cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9);
4058 
4059     // SUB RAX,6*8+0x7F             // point to start of __va_argsave
4060     cdb.genc2(0x2D,0,6*8+0x7F);
4061     code_orrex(cdb.last(), REX_W);
4062 
4063     // MOV 6*8+8*16+4+4+8[RAX],RAX  // set __va_argsave.reg_args
4064     cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
4065 
4066     pinholeopt(cdb.peek(), null);
4067     useregs(mAX|mR11);
4068 }
4069 
4070 void prolog_gen_win64_varargs(ref CodeBuilder cdb)
4071 {
4072     /* The Microsoft scheme.
4073      * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29
4074      * Copy registers onto stack.
4075          mov     8[RSP],RCX
4076          mov     010h[RSP],RDX
4077          mov     018h[RSP],R8
4078          mov     020h[RSP],R9
4079      */
4080 }
4081 
4082 /************************************
4083  * Params:
4084  *      cdb = generated code sink
4085  *      tf = what's the type of the function
4086  *      pushalloc = use PUSH to allocate on the stack rather than subtracting from SP
4087  *      namedargs = set to the registers that named parameters were passed in
4088  */
4089 @trusted
4090 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs)
4091 {
4092     //printf("prolog_loadparams() %s\n", funcsym_p.Sident.ptr);
4093     debug
4094     for (SYMIDX si = 0; si < globsym.length; si++)
4095     {
4096         Symbol *s = globsym[si];
4097         if (debugr && (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg))
4098         {
4099             printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr,
4100                 regm_str(mask(s.Spreg)),
4101                 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2))));
4102             if (s.Sfl == FLreg)
4103                 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw)));
4104         }
4105     }
4106 
4107     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
4108 
4109     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned
4110      * registers into their stack locations.
4111      */
4112     regm_t shadowregm = 0;
4113     for (SYMIDX si = 0; si < globsym.length; si++)
4114     {
4115         Symbol *s = globsym[si];
4116         uint sz = cast(uint)type_size(s.Stype);
4117 
4118         if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl != FLreg))
4119             continue;
4120         // Argument is passed in a register
4121 
4122         type *t = s.Stype;
4123         type *t2 = null;
4124 
4125         tym_t tyb = tybasic(t.Tty);
4126 
4127         // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d
4128         //
4129         // Find suitable SROA based on the element type
4130         // (Don't put volatile parameters in registers on Windows)
4131         if (tyb == TYarray && (config.exe != EX_WIN64 || !(t.Tty & mTYvolatile)))
4132         {
4133             type *targ1;
4134             argtypes(t, targ1, t2);
4135             if (targ1)
4136                 t = targ1;
4137         }
4138 
4139         // If struct just wraps another type
4140         if (tyb == TYstruct)
4141         {
4142             // On windows 64 bits, structs occupy a general purpose register,
4143             // regardless of the struct size or the number & types of its fields.
4144             if (config.exe != EX_WIN64)
4145             {
4146                 type *targ1 = t.Ttag.Sstruct.Sarg1type;
4147                 t2 = t.Ttag.Sstruct.Sarg2type;
4148                 if (targ1)
4149                     t = targ1;
4150             }
4151         }
4152 
4153         if (Symbol_Sisdead(*s, anyiasm))
4154         {
4155             // Ignore it, as it is never referenced
4156             continue;
4157         }
4158 
4159         targ_size_t offset = Fast.size + BPoff;
4160         if (s.Sclass == SC.shadowreg)
4161             offset = Para.size;
4162         offset += s.Soffset;
4163         if (!hasframe || (enforcealign && s.Sclass != SC.shadowreg))
4164             offset += EBPtoESP;
4165 
4166         reg_t preg = s.Spreg;
4167         foreach (i; 0 .. 2)     // twice, once for each possible parameter register
4168         {
4169             static type* type_arrayBase(type* ta)
4170             {
4171                 while (tybasic(ta.Tty) == TYarray)
4172                     ta = ta.Tnext;
4173                 return ta;
4174             }
4175             shadowregm |= mask(preg);
4176             const opcode_t op = isXMMreg(preg)
4177                 ? xmmstore(type_arrayBase(t).Tty)
4178                 : 0x89;    // MOV x[EBP],preg
4179             if (!(pushalloc && preg == pushallocreg) || s.Sclass == SC.shadowreg)
4180             {
4181                 if (hasframe && (!enforcealign || s.Sclass == SC.shadowreg))
4182                 {
4183                     // MOV x[EBP],preg
4184                     cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset);
4185                     if (isXMMreg(preg))
4186                     {
4187                         checkSetVex(cdb.last(), t.Tty);
4188                     }
4189                     else
4190                     {
4191                         //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n",
4192                         //         s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz);
4193                         if (I64 && sz > 4)
4194                             code_orrex(cdb.last(), REX_W);
4195                     }
4196                 }
4197                 else
4198                 {
4199                     // MOV offset[ESP],preg
4200                     // BUG: byte size?
4201                     cdb.genc1(op,
4202                               (modregrm(0,4,SP) << 8) |
4203                                modregxrm(2,preg,4),FLconst,offset);
4204                     if (isXMMreg(preg))
4205                     {
4206                         checkSetVex(cdb.last(), t.Tty);
4207                     }
4208                     else
4209                     {
4210                         if (I64 && sz > 4)
4211                             cdb.last().Irex |= REX_W;
4212                     }
4213                 }
4214             }
4215             preg = s.Spreg2;
4216             if (preg == NOREG)
4217                 break;
4218             if (t2)
4219                 t = t2;
4220             offset += REGSIZE;
4221         }
4222     }
4223 
4224     if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype))
4225     {
4226         /* The Microsoft scheme.
4227          * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29
4228          * Copy registers onto stack.
4229              mov     8[RSP],RCX or XMM0
4230              mov     010h[RSP],RDX or XMM1
4231              mov     018h[RSP],R8 or XMM2
4232              mov     020h[RSP],R9 or XMM3
4233          */
4234         static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ];
4235         for (int i = 0; i < vregs.length; ++i)
4236         {
4237             uint preg = vregs[i];
4238             uint offset = cast(uint)(Para.size + i * REGSIZE);
4239             if (!(shadowregm & (mask(preg) | mask(XMM0 + i))))
4240             {
4241                 if (hasframe)
4242                 {
4243                     // MOV x[EBP],preg
4244                     cdb.genc1(0x89,
4245                                      modregxrm(2,preg,BPRM),FLconst, offset);
4246                     code_orrex(cdb.last(), REX_W);
4247                 }
4248                 else
4249                 {
4250                     // MOV offset[ESP],preg
4251                     cdb.genc1(0x89,
4252                                      (modregrm(0,4,SP) << 8) |
4253                                      modregxrm(2,preg,4),FLconst,offset + EBPtoESP);
4254                 }
4255                 cdb.last().Irex |= REX_W;
4256             }
4257         }
4258     }
4259 
4260     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers
4261      * into their assigned registers.
4262      * Note that we have a big problem if Pa is passed in R1 and assigned to R2,
4263      * and Pb is passed in R2 but assigned to R1. Detect it and assert.
4264      */
4265     regm_t assignregs = 0;
4266     for (SYMIDX si = 0; si < globsym.length; si++)
4267     {
4268         Symbol *s = globsym[si];
4269         uint sz = cast(uint)type_size(s.Stype);
4270 
4271         if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
4272             namedargs |= s.Spregm();
4273 
4274         if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl == FLreg))
4275         {
4276             // Argument is passed in a register
4277             continue;
4278         }
4279 
4280         type *t = s.Stype;
4281         type *t2 = null;
4282         if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64)
4283         {   type *targ1 = t.Ttag.Sstruct.Sarg1type;
4284             t2 = t.Ttag.Sstruct.Sarg2type;
4285             if (targ1)
4286                 t = targ1;
4287         }
4288 
4289         reg_t preg = s.Spreg;
4290         reg_t r = s.Sreglsw;
4291         for (int i = 0; i < 2; ++i)
4292         {
4293             if (preg == NOREG)
4294                 break;
4295             assert(!(mask(preg) & assignregs));         // not already stepped on
4296             assignregs |= mask(r);
4297 
4298             // MOV reg,preg
4299             if (r == preg)
4300             {
4301             }
4302             else if (mask(preg) & XMMREGS)
4303             {
4304                 const op = xmmload(t.Tty);      // MOVSS/D xreg,preg
4305                 uint xreg = r - XMM0;
4306                 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0));
4307             }
4308             else
4309             {
4310                 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]);
4311                 genmovreg(cdb,r,preg);
4312                 if (I64 && sz == 8)
4313                     code_orrex(cdb.last(), REX_W);
4314             }
4315             preg = s.Spreg2;
4316             r = s.Sregmsw;
4317             if (t2)
4318                 t = t2;
4319         }
4320     }
4321 
4322     /* For parameters that were passed on the stack, but are enregistered,
4323      * initialize the registers with the parameter stack values.
4324      * Do not use assignaddr(), as it will replace the stack reference with
4325      * the register.
4326      */
4327     for (SYMIDX si = 0; si < globsym.length; si++)
4328     {
4329         Symbol *s = globsym[si];
4330         uint sz = cast(uint)type_size(s.Stype);
4331 
4332         if (!((s.Sclass == SC.regpar || s.Sclass == SC.parameter) &&
4333             s.Sfl == FLreg &&
4334             (refparam
4335                 // This variable has been reference by a nested function
4336                 || MARS && s.Stype.Tty & mTYvolatile
4337                 )))
4338         {
4339             continue;
4340         }
4341         // MOV reg,param[BP]
4342         //assert(refparam);
4343         if (mask(s.Sreglsw) & XMMREGS)
4344         {
4345             const op = xmmload(s.Stype.Tty);  // MOVSS/D xreg,mem
4346             uint xreg = s.Sreglsw - XMM0;
4347             cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset);
4348             if (!hasframe)
4349             {   // Convert to ESP relative address rather than EBP
4350                 code *c = cdb.last();
4351                 c.Irm = cast(ubyte)modregxrm(2,xreg,4);
4352                 c.Isib = modregrm(0,4,SP);
4353                 c.IEV1.Vpointer += EBPtoESP;
4354             }
4355             continue;
4356         }
4357 
4358         cdb.genc1(sz == 1 ? 0x8A : 0x8B,
4359             modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset);
4360         code *c = cdb.last();
4361         if (!I16 && sz == SHORTSIZE)
4362             c.Iflags |= CFopsize; // operand size
4363         if (I64 && sz >= REGSIZE)
4364             c.Irex |= REX_W;
4365         if (I64 && sz == 1 && s.Sreglsw >= 4)
4366             c.Irex |= REX;
4367         if (!hasframe)
4368         {   // Convert to ESP relative address rather than EBP
4369             assert(!I16);
4370             c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4);
4371             c.Isib = modregrm(0,4,SP);
4372             c.IEV1.Vpointer += EBPtoESP;
4373         }
4374         if (sz > REGSIZE)
4375         {
4376             cdb.genc1(0x8B,
4377                 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE);
4378             code *cx = cdb.last();
4379             if (I64)
4380                 cx.Irex |= REX_W;
4381             if (!hasframe)
4382             {   // Convert to ESP relative address rather than EBP
4383                 assert(!I16);
4384                 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4);
4385                 cx.Isib = modregrm(0,4,SP);
4386                 cx.IEV1.Vpointer += EBPtoESP;
4387             }
4388         }
4389     }
4390 }
4391 
4392 /*******************************
4393  * Generate and return function epilog.
4394  * Output:
4395  *      retsize         Size of function epilog
4396  */
4397 
4398 @trusted
4399 void epilog(block *b)
4400 {
4401     code *cpopds;
4402     reg_t reg;
4403     reg_t regx;                      // register that's not a return reg
4404     regm_t topop,regm;
4405     targ_size_t xlocalsize = localsize;
4406 
4407     CodeBuilder cdbx; cdbx.ctor();
4408     tym_t tyf = funcsym_p.ty();
4409     tym_t tym = tybasic(tyf);
4410     bool farfunc = tyfarfunc(tym) != 0;
4411     if (!(b.Bflags & BFLepilog))       // if no epilog code
4412         goto Lret;                      // just generate RET
4413     regx = (b.BC == BCret) ? AX : CX;
4414 
4415     retsize = 0;
4416 
4417     if (tyf & mTYnaked)                 // if no prolog/epilog
4418         return;
4419 
4420     if (tym == TYifunc)
4421     {
4422         static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ];
4423         static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E,
4424                                         0x5D,0x5B,0x5B,0x5A,
4425                                         0x59,0x58,0xCF,0 ];
4426 
4427         genregs(cdbx,0x8B,SP,BP);              // MOV SP,BP
4428         auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
4429         do
4430             cdbx.gen1(*p);
4431         while (*++p);
4432         goto Lopt;
4433     }
4434 
4435     if (config.flags & CFGtrace &&
4436         (!(config.flags4 & CFG4allcomdat) ||
4437          funcsym_p.Sclass == SC.comdat ||
4438          funcsym_p.Sclass == SC.global ||
4439          (config.flags2 & CFG2comdat && SymInline(funcsym_p))
4440         )
4441        )
4442     {
4443         Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_EPI_F : RTLSYM.TRACE_EPI_N);
4444         makeitextern(s);
4445         cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALLF _trace
4446         if (!I16)
4447             code_orflag(cdbx.last(),CFoff | CFselfrel);
4448         useregs((ALLREGS | mBP | mES) & ~s.Sregsaved);
4449     }
4450 
4451     if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS))
4452     {
4453         nteh_epilog(cdbx);
4454     }
4455 
4456     cpopds = null;
4457     if (tyf & mTYloadds)
4458     {
4459         cdbx.gen1(0x1F);             // POP DS
4460         cpopds = cdbx.last();
4461     }
4462 
4463     /* Pop all the general purpose registers saved on the stack
4464      * by the prolog code. Remember to do them in the reverse
4465      * order they were pushed.
4466      */
4467     topop = fregsaved & ~mfuncreg;
4468     epilog_restoreregs(cdbx, topop);
4469 
4470     version (MARS)
4471     {
4472         if (usednteh & NTEHjmonitor)
4473         {
4474             regm_t retregs = 0;
4475             if (b.BC == BCretexp)
4476                 retregs = regmask(b.Belem.Ety, tym);
4477             nteh_monitor_epilog(cdbx,retregs);
4478             xlocalsize += 8;
4479         }
4480     }
4481 
4482     if (config.wflags & WFwindows && farfunc)
4483     {
4484         int wflags = config.wflags;
4485         if (wflags & WFreduced && !(tyf & mTYexport))
4486         {   // reduced prolog/epilog for non-exported functions
4487             wflags &= ~(WFdgroup | WFds | WFss);
4488             if (!(wflags & WFsaveds))
4489                 goto L4;
4490         }
4491 
4492         if (localsize)
4493         {
4494             cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */
4495         }
4496         if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
4497         {
4498             if (cpopds)
4499                 cpopds.Iop = NOP;              // don't need previous one
4500             cdbx.gen1(0x1F);                    // POP DS
4501         }
4502         cdbx.gen1(0x58 + BP);                   // POP BP
4503         if (config.wflags & WFincbp)
4504             cdbx.gen1(0x48 + BP);               // DEC BP
4505         assert(hasframe);
4506     }
4507     else
4508     {
4509         if (needframe || (xlocalsize && hasframe))
4510         {
4511         L4:
4512             assert(hasframe);
4513             if (xlocalsize || enforcealign)
4514             {
4515                 if (config.flags2 & CFG2stomp)
4516                 {   /*   MOV  ECX,0xBEAF
4517                      * L1:
4518                      *   MOV  [ESP],ECX
4519                      *   ADD  ESP,4
4520                      *   CMP  EBP,ESP
4521                      *   JNE  L1
4522                      *   POP  EBP
4523                      */
4524                     /* Value should be:
4525                      * 1. != 0 (code checks for null pointers)
4526                      * 2. be odd (to mess up alignment)
4527                      * 3. fall in first 64K (likely marked as inaccessible)
4528                      * 4. be a value that stands out in the debugger
4529                      */
4530                     assert(I32 || I64);
4531                     targ_size_t value = 0x0000BEAF;
4532                     reg_t regcx = CX;
4533                     mfuncreg &= ~mask(regcx);
4534                     uint grex = I64 ? REX_W << 16 : 0;
4535                     cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value);   // MOV regcx,value
4536                     cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx
4537                     code *c1 = cdbx.last();
4538                     cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE);     // ADD ESP,REGSIZE
4539                     genregs(cdbx,0x39,SP,BP);                             // CMP EBP,ESP
4540                     if (I64)
4541                         code_orrex(cdbx.last(),REX_W);
4542                     genjmp(cdbx,JNE,FLcode,cast(block *)c1);                  // JNE L1
4543                     // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779)
4544                     cdbx.last().Iflags &= ~CFjmp16;
4545                     cdbx.gen1(0x58 + BP);                                 // POP BP
4546                 }
4547                 else if (config.exe == EX_WIN64)
4548                 {   // See https://msdn.microsoft.com/en-us/library/tawsa7cb%28v=vs.100%29.aspx
4549                     // LEA RSP,0[RBP]
4550                     cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0);
4551                     cdbx.gen1(0x58 + BP);      // POP RBP
4552                 }
4553                 else if (config.target_cpu >= TARGET_80286 &&
4554                     !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed)
4555                    )
4556                     cdbx.gen1(LEAVE);          // LEAVE
4557                 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32)
4558                 {   // This doesn't work - I should figure out why
4559                     mfuncreg &= ~mask(regx);
4560                     cdbx.gen1(0x58 + regx);    // POP regx
4561                     cdbx.gen1(0x58 + BP);      // POP BP
4562                 }
4563                 else
4564                 {
4565                     genregs(cdbx,0x8B,SP,BP);  // MOV SP,BP
4566                     if (I64)
4567                         code_orrex(cdbx.last(), REX_W);   // MOV RSP,RBP
4568                     cdbx.gen1(0x58 + BP);      // POP BP
4569                 }
4570             }
4571             else
4572                 cdbx.gen1(0x58 + BP);          // POP BP
4573             if (config.wflags & WFincbp && farfunc)
4574                 cdbx.gen1(0x48 + BP);              // DEC BP
4575         }
4576         else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret))
4577         {
4578             mfuncreg &= ~mask(regx);
4579             cdbx.gen1(0x58 + regx);                    // POP regx
4580         }
4581         else if (xlocalsize)
4582             cod3_stackadj(cdbx, cast(int)-xlocalsize);
4583     }
4584     if (b.BC == BCret || b.BC == BCretexp)
4585     {
4586 Lret:
4587         opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2;
4588         if (tym == TYhfunc)
4589         {
4590             cdbx.genc2(0xC2,0,4);                       // RET 4
4591         }
4592         else if (!typfunc(tym) ||                       // if caller cleans the stack
4593                  config.exe == EX_WIN64 ||
4594                  Para.offset == 0)                      // or nothing pushed on the stack anyway
4595         {
4596             op++;                                       // to a regular RET
4597             cdbx.gen1(op);
4598         }
4599         else
4600         {   // Stack is always aligned on register size boundary
4601             Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
4602             if (Para.offset >= 0x10000)
4603             {
4604                 /*
4605                     POP REG
4606                     ADD ESP, Para.offset
4607                     JMP REG
4608                 */
4609                 cdbx.gen1(0x58+regx);
4610                 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset);
4611                 if (I64)
4612                     code_orrex(cdbx.last(), REX_W);
4613                 cdbx.genc2(0xFF, modregrm(3,4,regx), 0);
4614                 if (I64)
4615                     code_orrex(cdbx.last(), REX_W);
4616             }
4617             else
4618                 cdbx.genc2(op,0,Para.offset);          // RET Para.offset
4619         }
4620     }
4621 
4622 Lopt:
4623     // If last instruction in ce is ADD SP,imm, and first instruction
4624     // in c sets SP, we can dump the ADD.
4625     CodeBuilder cdb; cdb.ctor();
4626     cdb.append(b.Bcode);
4627     code *cr = cdb.last();
4628     code *c = cdbx.peek();
4629     if (cr && c && !I64)
4630     {
4631         if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP))     // if ADD SP,imm
4632         {
4633             if (
4634                 c.Iop == LEAVE ||                                // LEAVE
4635                 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP
4636                 (c.Iop == LEA && c.Irm == modregrm(1,SP,6))     // LEA SP,-imm[BP]
4637                )
4638                 cr.Iop = NOP;
4639             else if (c.Iop == 0x58 + BP)                       // if POP BP
4640             {
4641                 cr.Iop = 0x8B;
4642                 cr.Irm = modregrm(3,SP,BP);                    // MOV SP,BP
4643             }
4644         }
4645         else
4646         {
4647 static if (0)
4648 {
4649         // These optimizations don't work if the called function
4650         // cleans off the stack.
4651         if (c.Iop == 0xC3 && cr.Iop == CALL)     // CALL near
4652         {
4653             cr.Iop = 0xE9;                             // JMP near
4654             c.Iop = NOP;
4655         }
4656         else if (c.Iop == 0xCB && cr.Iop == 0x9A)     // CALL far
4657         {
4658             cr.Iop = 0xEA;                             // JMP far
4659             c.Iop = NOP;
4660         }
4661 }
4662         }
4663     }
4664 
4665     pinholeopt(c, null);
4666     retsize += calcblksize(c);          // compute size of function epilog
4667     cdb.append(cdbx);
4668     b.Bcode = cdb.finish();
4669 }
4670 
4671 /*******************************
4672  * Return offset of SP from BP.
4673  */
4674 
4675 @trusted
4676 targ_size_t cod3_spoff()
4677 {
4678     //printf("spoff = x%x, localsize = x%x\n", cast(int)spoff, cast(int)localsize);
4679     return spoff + localsize;
4680 }
4681 
4682 @trusted
4683 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg)
4684 {
4685     code cs;
4686     const regm_t keepmsk = toreg ? RMload : RMstore;
4687 
4688     elem* e = el_var(s); // so we can trick getlvalue() into working for us
4689 
4690     if (mask(s.Sreglsw) & XMMREGS)
4691     {   // Convert to save/restore of XMM register
4692         if (toreg)
4693             cs.Iop = xmmload(s.Stype.Tty);        // MOVSS/D xreg,mem
4694         else
4695             cs.Iop = xmmstore(s.Stype.Tty);       // MOVSS/D mem,xreg
4696         getlvalue(cdb,&cs,e,keepmsk);
4697         cs.orReg(s.Sreglsw - XMM0);
4698         cdb.gen(&cs);
4699     }
4700     else
4701     {
4702         const int sz = cast(int)type_size(s.Stype);
4703         cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
4704         cs.Iop ^= (sz == 1);
4705         getlvalue(cdb,&cs,e,keepmsk);
4706         cs.orReg(s.Sreglsw);
4707         if (I64 && sz == 1 && s.Sreglsw >= 4)
4708             cs.Irex |= REX;
4709         if ((cs.Irm & 0xC0) == 0xC0 &&                  // reg,reg
4710             (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&      // registers match
4711             (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)      // REX_R and REX_B match
4712         { }                                             // skip MOV reg,reg
4713         else
4714             cdb.gen(&cs);
4715         if (sz > REGSIZE)
4716         {
4717             cs.setReg(s.Sregmsw);
4718             getlvalue_msw(&cs);
4719             if ((cs.Irm & 0xC0) == 0xC0 &&              // reg,reg
4720                 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&  // registers match
4721                 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)  // REX_R and REX_B match
4722             { }                                         // skip MOV reg,reg
4723             else
4724                 cdb.gen(&cs);
4725         }
4726     }
4727 
4728     el_free(e);
4729 }
4730 
4731 /****************************
4732  * Generate code for, and output a thunk.
4733  * Params:
4734  *      sthunk =  Symbol of thunk
4735  *      sfunc =   Symbol of thunk's target function
4736  *      thisty =  Type of this pointer
4737  *      p =       ESP parameter offset to this pointer
4738  *      d =       offset to add to 'this' pointer
4739  *      d2 =      offset from 'this' to vptr
4740  *      i =       offset into vtbl[]
4741  */
4742 
4743 @trusted
4744 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty,
4745         uint d,int i,uint d2)
4746 {
4747     targ_size_t thunkoffset;
4748 
4749     int seg = sthunk.Sseg;
4750     cod3_align(seg);
4751 
4752     // Skip over return address
4753     tym_t thunkty = tybasic(sthunk.ty());
4754     if (tyfarfunc(thunkty))
4755         p += I32 ? 8 : tysize(TYfptr);          // far function
4756     else
4757         p += tysize(TYnptr);
4758     if (tybasic(sfunc.ty()) == TYhfunc)
4759         p += tysize(TYnptr);                    // skip over hidden pointer
4760 
4761     CodeBuilder cdb; cdb.ctor();
4762     if (!I16)
4763     {
4764         /*
4765            Generate:
4766             ADD p[ESP],d
4767            For direct call:
4768             JMP sfunc
4769            For virtual call:
4770             MOV EAX, p[ESP]                     EAX = this
4771             MOV EAX, d2[EAX]                    EAX = this.vptr
4772             JMP i[EAX]                          jump to virtual function
4773          */
4774         reg_t reg = 0;
4775         if (cast(int)d < 0)
4776         {
4777             d = -d;
4778             reg = 5;                            // switch from ADD to SUB
4779         }
4780         if (thunkty == TYmfunc)
4781         {                                       // ADD ECX,d
4782             if (d)
4783                 cdb.genc2(0x81,modregrm(3,reg,CX),d);
4784         }
4785         else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
4786         {                                       // ADD EAX,d
4787             int rm = AX;
4788             if (config.exe == EX_WIN64)
4789                 rm = CX;
4790             else if (I64)
4791                 rm = (thunkty == TYnfunc && (sfunc.Sfunc.Fflags3 & F3hiddenPtr)) ? SI : DI;
4792             if (d)
4793                 cdb.genc2(0x81,modregrm(3,reg,rm),d);
4794         }
4795         else
4796         {
4797             cdb.genc(0x81,modregrm(2,reg,4),
4798                 FLconst,p,                      // to this
4799                 FLconst,d);                     // ADD p[ESP],d
4800             cdb.last().Isib = modregrm(0,4,SP);
4801         }
4802         if (I64 && cdb.peek())
4803             cdb.last().Irex |= REX_W;
4804     }
4805     else
4806     {
4807         /*
4808            Generate:
4809             MOV BX,SP
4810             ADD [SS:] p[BX],d
4811            For direct call:
4812             JMP sfunc
4813            For virtual call:
4814             MOV BX, p[BX]                       BX = this
4815             MOV BX, d2[BX]                      BX = this.vptr
4816             JMP i[BX]                           jump to virtual function
4817          */
4818 
4819         genregs(cdb,0x89,SP,BX);           // MOV BX,SP
4820         cdb.genc(0x81,modregrm(2,0,7),
4821             FLconst,p,                                  // to this
4822             FLconst,d);                                 // ADD p[BX],d
4823         if (config.wflags & WFssneds ||
4824             // If DS needs reloading from SS,
4825             // then assume SS != DS on thunk entry
4826             (LARGEDATA && config.wflags & WFss))
4827             cdb.last().Iflags |= CFss;                 // SS:
4828     }
4829 
4830     if ((i & 0xFFFF) != 0xFFFF)                 // if virtual call
4831     {
4832         const bool FARTHIS = (tysize(thisty) > REGSIZE);
4833         const bool FARVPTR = FARTHIS;
4834 
4835         assert(thisty != TYvptr);               // can't handle this case
4836 
4837         if (!I16)
4838         {
4839             assert(!FARTHIS && !LARGECODE);
4840             if (thunkty == TYmfunc)     // if 'this' is in ECX
4841             {
4842                 // MOV EAX,d2[ECX]
4843                 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2);
4844             }
4845             else if (thunkty == TYjfunc)        // if 'this' is in EAX
4846             {
4847                 // MOV EAX,d2[EAX]
4848                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4849             }
4850             else
4851             {
4852                 // MOV EAX,p[ESP]
4853                 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p);
4854                 if (I64)
4855                     cdb.last().Irex |= REX_W;
4856 
4857                 // MOV EAX,d2[EAX]
4858                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4859             }
4860             if (I64)
4861                 code_orrex(cdb.last(), REX_W);
4862                                                         // JMP i[EAX]
4863             cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i);
4864         }
4865         else
4866         {
4867             // MOV/LES BX,[SS:] p[BX]
4868             cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p);
4869             if (config.wflags & WFssneds ||
4870                 // If DS needs reloading from SS,
4871                 // then assume SS != DS on thunk entry
4872                 (LARGEDATA && config.wflags & WFss))
4873                 cdb.last().Iflags |= CFss;             // SS:
4874 
4875             // MOV/LES BX,[ES:]d2[BX]
4876             cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
4877             if (FARTHIS)
4878                 cdb.last().Iflags |= CFes;             // ES:
4879 
4880                                                         // JMP i[BX]
4881             cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i);
4882             if (FARVPTR)
4883                 cdb.last().Iflags |= CFes;             // ES:
4884         }
4885     }
4886     else
4887     {
4888         if (config.flags3 & CFG3pic)
4889         {
4890             localgot = null;                // no local variables
4891             CodeBuilder cdbgot; cdbgot.ctor();
4892             load_localgot(cdbgot);          // load GOT in EBX
4893             code *c1 = cdbgot.finish();
4894             if (c1)
4895             {
4896                 assignaddrc(c1);
4897                 cdb.append(c1);
4898             }
4899         }
4900         cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc
4901         cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
4902     }
4903 
4904     thunkoffset = Offset(seg);
4905     code *c = cdb.finish();
4906     pinholeopt(c,null);
4907     codout(seg,c,null);
4908     code_free(c);
4909 
4910     sthunk.Soffset = thunkoffset;
4911     sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk
4912     sthunk.Sseg = seg;
4913     if (config.exe & EX_posix ||
4914        config.objfmt == OBJ_MSCOFF)
4915     {
4916         objmod.pubdef(seg,sthunk,sthunk.Soffset);
4917     }
4918     searchfixlist(sthunk);              // resolve forward refs
4919 }
4920 
4921 /*****************************
4922  * Assume symbol s is extern.
4923  */
4924 
4925 @trusted
4926 void makeitextern(Symbol *s)
4927 {
4928     if (s.Sxtrnnum == 0)
4929     {
4930         s.Sclass = SC.extern_;           /* external             */
4931         /*printf("makeitextern(x%x)\n",s);*/
4932         objmod.external(s);
4933     }
4934 }
4935 
4936 
4937 /*******************************
4938  * Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
4939  * This routine depends on FLcode jumps to only be forward
4940  * referenced.
4941  * BFLjmpoptdone is set to true if nothing more can be done
4942  * with this block.
4943  * Input:
4944  *      flag    !=0 means don't have correct Boffsets yet
4945  * Returns:
4946  *      number of bytes saved
4947  */
4948 
4949 @trusted
4950 int branch(block *bl,int flag)
4951 {
4952     int bytesaved;
4953     code* c,cn,ct;
4954     targ_size_t offset,disp;
4955     targ_size_t csize;
4956 
4957     if (!flag)
4958         bl.Bflags |= BFLjmpoptdone;      // assume this will be all
4959     c = bl.Bcode;
4960     if (!c)
4961         return 0;
4962     bytesaved = 0;
4963     offset = bl.Boffset;                 /* offset of start of block     */
4964     while (1)
4965     {
4966         ubyte op;
4967 
4968         csize = calccodsize(c);
4969         cn = code_next(c);
4970         op = cast(ubyte)c.Iop;
4971         if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 ||
4972             (op == JMP && !(c.Iflags & CFjmp5)))
4973         {
4974           L1:
4975             switch (c.IFL2)
4976             {
4977                 case FLblock:
4978                     if (flag)           // no offsets yet, don't optimize
4979                         goto L3;
4980                     disp = c.IEV2.Vblock.Boffset - offset - csize;
4981 
4982                     /* If this is a forward branch, and there is an aligned
4983                      * block intervening, it is possible that shrinking
4984                      * the jump instruction will cause it to be out of
4985                      * range of the target. This happens if the alignment
4986                      * prevents the target block from moving correspondingly
4987                      * closer.
4988                      */
4989                     if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset)
4990                     {   /* Look for intervening alignment
4991                          */
4992                         for (block *b = bl.Bnext; b; b = b.Bnext)
4993                         {
4994                             if (b.Balign)
4995                             {
4996                                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
4997                                 goto L3;
4998                             }
4999                             if (b == c.IEV2.Vblock)
5000                                 break;
5001                         }
5002                     }
5003 
5004                     break;
5005 
5006                 case FLcode:
5007                 {
5008                     code *cr;
5009 
5010                     disp = 0;
5011 
5012                     ct = c.IEV2.Vcode;         /* target of branch     */
5013                     assert(ct.Iflags & (CFtarg | CFtarg2));
5014                     for (cr = cn; cr; cr = code_next(cr))
5015                     {
5016                         if (cr == ct)
5017                             break;
5018                         disp += calccodsize(cr);
5019                     }
5020 
5021                     if (!cr)
5022                     {   // Didn't find it in forward search. Try backwards jump
5023                         int s = 0;
5024                         disp = 0;
5025                         for (cr = bl.Bcode; cr != cn; cr = code_next(cr))
5026                         {
5027                             assert(cr != null); // must have found it
5028                             if (cr == ct)
5029                                 s = 1;
5030                             if (s)
5031                                 disp += calccodsize(cr);
5032                         }
5033                     }
5034 
5035                     if (config.flags4 & CFG4optimized && !flag)
5036                     {
5037                         /* Propagate branch forward past junk   */
5038                         while (1)
5039                         {
5040                             if (ct.Iop == NOP ||
5041                                 ct.Iop == (ESCAPE | ESClinnum))
5042                             {
5043                                 ct = code_next(ct);
5044                                 if (!ct)
5045                                     goto L2;
5046                             }
5047                             else
5048                             {
5049                                 c.IEV2.Vcode = ct;
5050                                 ct.Iflags |= CFtarg;
5051                                 break;
5052                             }
5053                         }
5054 
5055                         /* And eliminate jmps to jmps   */
5056                         if ((op == ct.Iop || ct.Iop == JMP) &&
5057                             (op == JMP || c.Iflags & CFjmp16))
5058                         {
5059                             c.IFL2 = ct.IFL2;
5060                             c.IEV2.Vcode = ct.IEV2.Vcode;
5061                             /*printf("eliminating branch\n");*/
5062                             goto L1;
5063                         }
5064                      L2:
5065                         { }
5066                     }
5067                 }
5068                     break;
5069 
5070                 default:
5071                     goto L3;
5072             }
5073 
5074             if (disp == 0)                      // bra to next instruction
5075             {
5076                 bytesaved += csize;
5077                 c.Iop = NOP;                   // del branch instruction
5078                 c.IEV2.Vcode = null;
5079                 c = cn;
5080                 if (!c)
5081                     break;
5082                 continue;
5083             }
5084             else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) &&
5085                      cast(targ_size_t)cast(targ_schar)disp == disp)
5086             {
5087                 if (op == JMP)
5088                 {
5089                     c.Iop = JMPS;              // JMP SHORT
5090                     bytesaved += I16 ? 1 : 3;
5091                 }
5092                 else                            // else Jcond
5093                 {
5094                     c.Iflags &= ~CFjmp16;      // a branch is ok
5095                     bytesaved += I16 ? 3 : 4;
5096 
5097                     // Replace a cond jump around a call to a function that
5098                     // never returns with a cond jump to that function.
5099                     if (config.flags4 & CFG4optimized &&
5100                         config.target_cpu >= TARGET_80386 &&
5101                         disp == (I16 ? 3 : 5) &&
5102                         cn &&
5103                         cn.Iop == CALL &&
5104                         cn.IFL2 == FLfunc &&
5105                         cn.IEV2.Vsym.Sflags & SFLexit &&
5106                         !(cn.Iflags & (CFtarg | CFtarg2))
5107                        )
5108                     {
5109                         cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81);
5110                         c.Iop = NOP;
5111                         c.IEV2.Vcode = null;
5112                         bytesaved++;
5113 
5114                         // If nobody else points to ct, we can remove the CFtarg
5115                         if (flag && ct)
5116                         {
5117                             code *cx;
5118                             for (cx = bl.Bcode; 1; cx = code_next(cx))
5119                             {
5120                                 if (!cx)
5121                                 {
5122                                     ct.Iflags &= ~CFtarg;
5123                                     break;
5124                                 }
5125                                 if (cx.IEV2.Vcode == ct)
5126                                     break;
5127                             }
5128                         }
5129                     }
5130                 }
5131                 csize = calccodsize(c);
5132             }
5133             else
5134                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
5135         }
5136 L3:
5137         if (cn)
5138         {
5139             offset += csize;
5140             c = cn;
5141         }
5142         else
5143             break;
5144     }
5145     //printf("bytesaved = x%x\n",bytesaved);
5146     return bytesaved;
5147 }
5148 
5149 
5150 /************************************************
5151  * Adjust all Soffset's of stack variables so they
5152  * are all relative to the frame pointer.
5153  */
5154 
5155 version (MARS)
5156 {
5157 @trusted
5158 void cod3_adjSymOffsets()
5159 {
5160     SYMIDX si;
5161 
5162     //printf("cod3_adjSymOffsets()\n");
5163     for (si = 0; si < globsym.length; si++)
5164     {
5165         //printf("\tglobsym[%d] = %p\n",si,globsym[si]);
5166         Symbol *s = globsym[si];
5167 
5168         switch (s.Sclass)
5169         {
5170             case SC.parameter:
5171             case SC.regpar:
5172             case SC.shadowreg:
5173 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP);
5174                 s.Soffset += Para.size;
5175                 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember))
5176                 {
5177                     if (!hasframe)
5178                         s.Soffset += EBPtoESP;
5179                     if (funcsym_p.Sfunc.Fflags3 & Fnested)
5180                         s.Soffset += REGSIZE;
5181                 }
5182                 break;
5183 
5184             case SC.fastpar:
5185 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, cast(int)s.Soffset, cast(int)Fast.size, cast(int)BPoff);
5186                 s.Soffset += Fast.size + BPoff;
5187                 break;
5188 
5189             case SC.auto_:
5190             case SC.register:
5191                 if (s.Sfl == FLfast)
5192                     s.Soffset += Fast.size + BPoff;
5193                 else
5194 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, cast(int)s.Soffset, cast(int)Auto.size, cast(int)BPoff, cast(int)EBPtoESP);
5195 //              if (!(funcsym_p.Sfunc.Fflags3 & Fnested))
5196                     s.Soffset += Auto.size + BPoff;
5197                 break;
5198 
5199             case SC.bprel:
5200                 break;
5201 
5202             default:
5203                 continue;
5204         }
5205         static if (0)
5206         {
5207             if (!hasframe)
5208                 s.Soffset += EBPtoESP;
5209         }
5210     }
5211 }
5212 
5213 }
5214 
5215 /*******************************
5216  * Take symbol info in union ev and replace it with a real address
5217  * in Vpointer.
5218  */
5219 
5220 @trusted
5221 void assignaddr(block *bl)
5222 {
5223     int EBPtoESPsave = EBPtoESP;
5224     int hasframesave = hasframe;
5225 
5226     if (bl.Bflags & BFLoutsideprolog)
5227     {
5228         EBPtoESP = -REGSIZE;
5229         hasframe = 0;
5230     }
5231     assignaddrc(bl.Bcode);
5232     hasframe = hasframesave;
5233     EBPtoESP = EBPtoESPsave;
5234 }
5235 
5236 @trusted
5237 void assignaddrc(code *c)
5238 {
5239     int sn;
5240     Symbol *s;
5241     ubyte ins,rm;
5242     targ_size_t soff;
5243     targ_size_t base;
5244 
5245     base = EBPtoESP;
5246     for (; c; c = code_next(c))
5247     {
5248         debug
5249         {
5250         if (0)
5251         {       printf("assignaddrc()\n");
5252                 code_print(c);
5253         }
5254         if (code_next(c) && code_next(code_next(c)) == c)
5255             assert(0);
5256         }
5257 
5258         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
5259             ins = vex_inssize(c);
5260         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
5261             ins = inssize2[(c.Iop >> 8) & 0xFF];
5262         else if ((c.Iop & 0xFF00) == 0x0F00)
5263             ins = inssize2[c.Iop & 0xFF];
5264         else if ((c.Iop & 0xFF) == ESCAPE)
5265         {
5266             if (c.Iop == (ESCAPE | ESCadjesp))
5267             {
5268                 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,cast(long)c.IEV1.Vint);
5269                 EBPtoESP += c.IEV1.Vint;
5270                 c.Iop = NOP;
5271             }
5272             else if (c.Iop == (ESCAPE | ESCfixesp))
5273             {
5274                 //printf("fix ESP\n");
5275                 if (hasframe)
5276                 {
5277                     // LEA ESP,-EBPtoESP[EBP]
5278                     c.Iop = LEA;
5279                     if (c.Irm & 8)
5280                         c.Irex |= REX_R;
5281                     c.Irm = modregrm(2,SP,BP);
5282                     c.Iflags = CFoff;
5283                     c.IFL1 = FLconst;
5284                     c.IEV1.Vuns = -EBPtoESP;
5285                     if (enforcealign)
5286                     {
5287                         // AND ESP, -STACKALIGN
5288                         code *cn = code_calloc();
5289                         cn.Iop = 0x81;
5290                         cn.Irm = modregrm(3, 4, SP);
5291                         cn.Iflags = CFoff;
5292                         cn.IFL2 = FLconst;
5293                         cn.IEV2.Vsize_t = -STACKALIGN;
5294                         if (I64)
5295                             c.Irex |= REX_W;
5296                         cn.next = c.next;
5297                         c.next = cn;
5298                     }
5299                 }
5300             }
5301             else if (c.Iop == (ESCAPE | ESCframeptr))
5302             {   // Convert to load of frame pointer
5303                 // c.Irm is the register to use
5304                 if (hasframe && !enforcealign)
5305                 {   // MOV reg,EBP
5306                     c.Iop = 0x89;
5307                     if (c.Irm & 8)
5308                         c.Irex |= REX_B;
5309                     c.Irm = modregrm(3,BP,c.Irm & 7);
5310                 }
5311                 else
5312                 {   // LEA reg,EBPtoESP[ESP]
5313                     c.Iop = LEA;
5314                     if (c.Irm & 8)
5315                         c.Irex |= REX_R;
5316                     c.Irm = modregrm(2,c.Irm & 7,4);
5317                     c.Isib = modregrm(0,4,SP);
5318                     c.Iflags = CFoff;
5319                     c.IFL1 = FLconst;
5320                     c.IEV1.Vuns = EBPtoESP;
5321                 }
5322             }
5323             if (I64)
5324                 c.Irex |= REX_W;
5325             continue;
5326         }
5327         else
5328             ins = inssize[c.Iop & 0xFF];
5329         if (!(ins & M) ||
5330             ((rm = c.Irm) & 0xC0) == 0xC0)
5331             goto do2;           /* if no first operand          */
5332         if (is32bitaddr(I32,c.Iflags))
5333         {
5334 
5335             if (
5336                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
5337                )
5338                 goto do2;       /* if no first operand  */
5339         }
5340         else
5341         {
5342             if (
5343                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
5344                )
5345                 goto do2;       /* if no first operand  */
5346         }
5347         s = c.IEV1.Vsym;
5348         switch (c.IFL1)
5349         {
5350             case FLdata:
5351                 if (config.objfmt == OBJ_OMF && s.Sclass != SC.comdat && s.Sclass != SC.extern_)
5352                 {
5353                     version (MARS)
5354                     {
5355                         c.IEV1.Vseg = s.Sseg;
5356                     }
5357                     else
5358                     {
5359                         c.IEV1.Vseg = DATA;
5360                     }
5361                     c.IEV1.Vpointer += s.Soffset;
5362                     c.IFL1 = FLdatseg;
5363                 }
5364                 else
5365                     c.IFL1 = FLextern;
5366                 goto do2;
5367 
5368             case FLudata:
5369                 if (config.objfmt == OBJ_OMF)
5370                 {
5371                     version (MARS)
5372                     {
5373                         c.IEV1.Vseg = s.Sseg;
5374                     }
5375                     else
5376                     {
5377                         c.IEV1.Vseg = UDATA;
5378                     }
5379                     c.IEV1.Vpointer += s.Soffset;
5380                     c.IFL1 = FLdatseg;
5381                 }
5382                 else
5383                     c.IFL1 = FLextern;
5384                 goto do2;
5385 
5386             case FLtlsdata:
5387                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5388                     c.IFL1 = FLextern;
5389                 goto do2;
5390 
5391             case FLdatseg:
5392                 //c.IEV1.Vseg = DATA;
5393                 goto do2;
5394 
5395             case FLfardata:
5396             case FLcsdata:
5397             case FLpseudo:
5398                 goto do2;
5399 
5400             case FLstack:
5401                 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
5402                 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer);
5403                 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset;
5404                 break;
5405 
5406             case FLfast:
5407                 soff = Fast.size;
5408                 goto L1;
5409 
5410             case FLreg:
5411             case FLauto:
5412                 soff = Auto.size;
5413             L1:
5414                 if (Symbol_Sisdead(*s, anyiasm))
5415                 {
5416                     c.Iop = NOP;               // remove references to it
5417                     continue;
5418                 }
5419                 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2)
5420                 {
5421                     reg_t reg = s.Sreglsw;
5422 
5423                     assert(!(s.Sregm & ~mask(reg)));
5424                     if (c.IEV1.Vpointer == 1)
5425                     {
5426                         assert(reg < 4);    /* must be a BYTEREGS   */
5427                         reg |= 4;           /* convert to high byte reg */
5428                     }
5429                     if (reg & 8)
5430                     {
5431                         assert(I64);
5432                         c.Irex |= REX_B;
5433                         reg &= 7;
5434                     }
5435                     c.Irm = (c.Irm & modregrm(0,7,0))
5436                             | modregrm(3,0,reg);
5437                     assert(c.Iop != LES && c.Iop != LEA);
5438                     goto do2;
5439                 }
5440                 else
5441                 {   c.IEV1.Vpointer += s.Soffset + soff + BPoff;
5442                     if (s.Sflags & SFLunambig)
5443                         c.Iflags |= CFunambig;
5444             L2:
5445                     if (!hasframe || (enforcealign && c.IFL1 != FLpara))
5446                     {   /* Convert to ESP relative address instead of EBP */
5447                         assert(!I16);
5448                         c.IEV1.Vpointer += EBPtoESP;
5449                         ubyte crm = c.Irm;
5450                         if ((crm & 7) == 4)              // if SIB byte
5451                         {
5452                             assert((c.Isib & 7) == BP);
5453                             assert((crm & 0xC0) != 0);
5454                             c.Isib = (c.Isib & ~7) | modregrm(0,0,SP);
5455                         }
5456                         else
5457                         {
5458                             assert((crm & 7) == 5);
5459                             c.Irm = (crm & modregrm(0,7,0))
5460                                     | modregrm(2,0,4);
5461                             c.Isib = modregrm(0,4,SP);
5462                         }
5463                     }
5464                 }
5465                 break;
5466 
5467             case FLpara:
5468                 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d, Vpointer = %d\n",
5469                 //s.Sident.ptr, cast(int)s.Soffset, cast(int)Para.size, cast(int)BPoff,
5470                 //cast(int)EBPtoESP, cast(int)c.IEV1.Vpointer);
5471                 soff = Para.size - BPoff;    // cancel out add of BPoff
5472                 goto L1;
5473 
5474             case FLfltreg:
5475                 c.IEV1.Vpointer += Foff + BPoff;
5476                 c.Iflags |= CFunambig;
5477                 goto L2;
5478 
5479             case FLallocatmp:
5480                 c.IEV1.Vpointer += Alloca.offset + BPoff;
5481                 goto L2;
5482 
5483             case FLfuncarg:
5484                 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff;
5485                 goto L2;
5486 
5487             case FLbprel:
5488                 c.IEV1.Vpointer += s.Soffset;
5489                 break;
5490 
5491             case FLcs:
5492                 sn = c.IEV1.Vuns;
5493                 if (!CSE.loaded(sn))            // if never loaded
5494                 {
5495                     c.Iop = NOP;
5496                     continue;
5497                 }
5498                 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff;
5499                 c.Iflags |= CFunambig;
5500                 goto L2;
5501 
5502             case FLregsave:
5503                 sn = c.IEV1.Vuns;
5504                 c.IEV1.Vpointer = sn + regsave.off + BPoff;
5505                 c.Iflags |= CFunambig;
5506                 goto L2;
5507 
5508             case FLndp:
5509                 version (MARS)
5510                 {
5511                     assert(c.IEV1.Vuns < global87.save.length);
5512                 }
5513                 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff;
5514                 c.Iflags |= CFunambig;
5515                 goto L2;
5516 
5517             case FLoffset:
5518                 break;
5519 
5520             case FLlocalsize:
5521                 c.IEV1.Vpointer += localsize;
5522                 break;
5523 
5524             case FLconst:
5525             default:
5526                 goto do2;
5527         }
5528         c.IFL1 = FLconst;
5529     do2:
5530         /* Ignore TEST (F6 and F7) opcodes      */
5531         if (!(ins & T)) goto done;              /* if no second operand */
5532         s = c.IEV2.Vsym;
5533         switch (c.IFL2)
5534         {
5535             case FLdata:
5536                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5537                 {
5538                     c.IFL2 = FLextern;
5539                     goto do2;
5540                 }
5541                 else
5542                 {
5543                     if (s.Sclass == SC.comdat)
5544                     {   c.IFL2 = FLextern;
5545                         goto do2;
5546                     }
5547                     c.IEV2.Vseg = MARS ? s.Sseg : DATA;
5548                     c.IEV2.Vpointer += s.Soffset;
5549                     c.IFL2 = FLdatseg;
5550                     goto done;
5551                 }
5552 
5553             case FLudata:
5554                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5555                 {
5556                     c.IFL2 = FLextern;
5557                     goto do2;
5558                 }
5559                 else
5560                 {
5561                     c.IEV2.Vseg = MARS ? s.Sseg : UDATA;
5562                     c.IEV2.Vpointer += s.Soffset;
5563                     c.IFL2 = FLdatseg;
5564                     goto done;
5565                 }
5566 
5567             case FLtlsdata:
5568                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5569                 {
5570                     c.IFL2 = FLextern;
5571                     goto do2;
5572                 }
5573                 goto done;
5574 
5575             case FLdatseg:
5576                 //c.IEV2.Vseg = DATA;
5577                 goto done;
5578 
5579             case FLcsdata:
5580             case FLfardata:
5581                 goto done;
5582 
5583             case FLreg:
5584             case FLpseudo:
5585                 assert(0);
5586                 /* NOTREACHED */
5587 
5588             case FLfast:
5589                 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff;
5590                 break;
5591 
5592             case FLauto:
5593                 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff;
5594             L3:
5595                 if (!hasframe || (enforcealign && c.IFL2 != FLpara))
5596                     /* Convert to ESP relative address instead of EBP */
5597                     c.IEV2.Vpointer += EBPtoESP;
5598                 break;
5599 
5600             case FLpara:
5601                 c.IEV2.Vpointer += s.Soffset + Para.size;
5602                 goto L3;
5603 
5604             case FLfltreg:
5605                 c.IEV2.Vpointer += Foff + BPoff;
5606                 goto L3;
5607 
5608             case FLallocatmp:
5609                 c.IEV2.Vpointer += Alloca.offset + BPoff;
5610                 goto L3;
5611 
5612             case FLfuncarg:
5613                 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff;
5614                 goto L3;
5615 
5616             case FLbprel:
5617                 c.IEV2.Vpointer += s.Soffset;
5618                 break;
5619 
5620             case FLstack:
5621                 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base;
5622                 break;
5623 
5624             case FLcs:
5625             case FLndp:
5626             case FLregsave:
5627                 assert(0);
5628 
5629             case FLconst:
5630                 break;
5631 
5632             case FLlocalsize:
5633                 c.IEV2.Vpointer += localsize;
5634                 break;
5635 
5636             default:
5637                 goto done;
5638         }
5639         c.IFL2 = FLconst;
5640   done:
5641         { }
5642     }
5643 }
5644 
5645 /*******************************
5646  * Return offset from BP of symbol s.
5647  */
5648 
5649 @trusted
5650 targ_size_t cod3_bpoffset(Symbol *s)
5651 {
5652     targ_size_t offset;
5653 
5654     symbol_debug(s);
5655     offset = s.Soffset;
5656     switch (s.Sfl)
5657     {
5658         case FLpara:
5659             offset += Para.size;
5660             break;
5661 
5662         case FLfast:
5663             offset += Fast.size + BPoff;
5664             break;
5665 
5666         case FLauto:
5667             offset += Auto.size + BPoff;
5668             break;
5669 
5670         default:
5671             WRFL(cast(FL)s.Sfl);
5672             symbol_print(s);
5673             assert(0);
5674     }
5675     assert(hasframe);
5676     return offset;
5677 }
5678 
5679 
5680 /*******************************
5681  * Find shorter versions of the same instructions.
5682  * Does these optimizations:
5683  *      replaces jmps to the next instruction with NOPs
5684  *      sign extension of modregrm displacement
5685  *      sign extension of immediate data (can't do it for OR, AND, XOR
5686  *              as the opcodes are not defined)
5687  *      short versions for AX EA
5688  *      short versions for reg EA
5689  * Code is neither removed nor added.
5690  * Params:
5691  *      b = block for code (or null)
5692  *      c = code list to optimize
5693  */
5694 
5695 @trusted
5696 void pinholeopt(code *c,block *b)
5697 {
5698     targ_size_t a;
5699     uint mod;
5700     ubyte ins;
5701     int usespace;
5702     int useopsize;
5703     int space;
5704     block *bn;
5705 
5706     debug
5707     {
5708         __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); }
5709     }
5710 
5711     debug
5712     {
5713         code *cstart = c;
5714         if (debugc)
5715         {
5716             printf("+pinholeopt(%p)\n",c);
5717         }
5718     }
5719 
5720     if (b)
5721     {
5722         bn = b.Bnext;
5723         usespace = (config.flags4 & CFG4space && b.BC != BCasm);
5724         useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm));
5725     }
5726     else
5727     {
5728         bn = null;
5729         usespace = (config.flags4 & CFG4space);
5730         useopsize = (I16 || config.flags4 & CFG4space);
5731     }
5732     for (; c; c = code_next(c))
5733     {
5734     L1:
5735         opcode_t op = c.Iop;
5736         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
5737             ins = vex_inssize(c);
5738         else if ((op & 0xFFFD00) == 0x0F3800)
5739             ins = inssize2[(op >> 8) & 0xFF];
5740         else if ((op & 0xFF00) == 0x0F00)
5741             ins = inssize2[op & 0xFF];
5742         else
5743             ins = inssize[op & 0xFF];
5744         if (ins & M)            // if modregrm byte
5745         {
5746             int shortop = (c.Iflags & CFopsize) ? !I16 : I16;
5747             int local_BPRM = BPRM;
5748 
5749             if (c.Iflags & CFaddrsize)
5750                 local_BPRM ^= 5 ^ 6;    // toggle between 5 and 6
5751 
5752             uint rm = c.Irm;
5753             reg_t reg = rm & modregrm(0,7,0);          // isolate reg field
5754             reg_t ereg = rm & 7;
5755             //printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
5756 
5757             /* If immediate second operand      */
5758             if ((ins & T ||
5759                  ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
5760                 ) &&
5761                 c.IFL2 == FLconst)
5762             {
5763                 int flags = c.Iflags & CFpsw;      /* if want result in flags */
5764                 targ_long u = c.IEV2.Vuns;
5765                 if (ins & E)
5766                     u = cast(byte) u;
5767                 else if (shortop)
5768                     u = cast(short) u;
5769 
5770                 // Replace CMP reg,0 with TEST reg,reg
5771                 if ((op & 0xFE) == 0x80 &&              // 80 is CMP R8,imm8; 81 is CMP reg,imm
5772                     rm >= modregrm(3,7,AX) &&
5773                     u == 0)
5774                 {
5775                     c.Iop = (op & 1) | 0x84;
5776                     c.Irm = modregrm(3,ereg,ereg);
5777                     if (c.Irex & REX_B)
5778                         c.Irex |= REX_R;
5779                     goto L1;
5780                 }
5781 
5782                 /* Optimize ANDs with an immediate constant             */
5783                 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
5784                 {
5785                     if (rm >= modregrm(3,4,AX))         // AND reg,imm
5786                     {
5787                         if (u == 0)
5788                         {
5789                             /* Replace with XOR reg,reg     */
5790                             c.Iop = 0x30 | (op & 1);
5791                             c.Irm = modregrm(3,ereg,ereg);
5792                             if (c.Irex & REX_B)
5793                                 c.Irex |= REX_R;
5794                             goto L1;
5795                         }
5796                         if (u == 0xFFFFFFFF && !flags)
5797                         {
5798                             c.Iop = NOP;
5799                             goto L1;
5800                         }
5801                     }
5802                     if (op == 0x81 && !flags)
5803                     {   // If we can do the operation in one byte
5804 
5805                         // If EA is not SI or DI
5806                         if ((rm < modregrm(3,4,SP) || I64) &&
5807                             (config.flags4 & CFG4space ||
5808                              config.target_cpu < TARGET_PentiumPro)
5809                            )
5810                         {
5811                             if ((u & 0xFFFFFF00) == 0xFFFFFF00)
5812                                 goto L2;
5813                             else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))
5814                             {
5815                                 if (!shortop)
5816                                 {
5817                                     if ((u & 0xFFFF00FF) == 0xFFFF00FF)
5818                                         goto L3;
5819                                 }
5820                                 else
5821                                 {
5822                                     if ((u & 0xFF) == 0xFF)
5823                                         goto L3;
5824                                 }
5825                             }
5826                         }
5827                         if (!shortop && useopsize)
5828                         {
5829                             if ((u & 0xFFFF0000) == 0xFFFF0000)
5830                             {
5831                                 c.Iflags ^= CFopsize;
5832                                 goto L1;
5833                             }
5834                             if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
5835                             {
5836                                 c.IEV1.Voffset += 2; /* address MSW      */
5837                                 c.IEV2.Vuns >>= 16;
5838                                 c.Iflags ^= CFopsize;
5839                                 goto L1;
5840                             }
5841                             if (rm >= modregrm(3,4,AX))
5842                             {
5843                                 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
5844                                 {
5845                                     c.Iop = MOVZXb;     // MOVZX
5846                                     c.Irm = modregrm(3,ereg,ereg);
5847                                     if (c.Irex & REX_B)
5848                                         c.Irex |= REX_R;
5849                                     goto L1;
5850                                 }
5851                                 if (u == 0xFFFF)
5852                                 {
5853                                     c.Iop = MOVZXw;     // MOVZX
5854                                     c.Irm = modregrm(3,ereg,ereg);
5855                                     if (c.Irex & REX_B)
5856                                         c.Irex |= REX_R;
5857                                     goto L1;
5858                                 }
5859                             }
5860                         }
5861                     }
5862                 }
5863 
5864                 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */
5865                 if (!flags &&
5866                     (op == 0x81 || op == 0x80) &&
5867                     (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) ||  // ADD,OR
5868                      reg == modregrm(0,5,0) || reg == modregrm(0,6,0))    // SUB, XOR
5869                    )
5870                 {
5871                     if (u == 0)
5872                     {
5873                         c.Iop = NOP;
5874                         goto L1;
5875                     }
5876                     if (u == ~0 && reg == modregrm(0,6,0))  /* XOR  */
5877                     {
5878                         c.Iop = 0xF6 | (op & 1);       /* NOT  */
5879                         c.Irm ^= modregrm(0,6^2,0);
5880                         goto L1;
5881                     }
5882                     if (!shortop &&
5883                         useopsize &&
5884                         op == 0x81 &&
5885                         (u & 0xFFFF0000) == 0 &&
5886                         (reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
5887                     {
5888                         c.Iflags ^= CFopsize;
5889                         goto L1;
5890                     }
5891                 }
5892 
5893                 /* Look for TEST or OR or XOR with an immediate constant */
5894                 /* that we can replace with a byte operation            */
5895                 if (op == 0xF7 && reg == modregrm(0,0,0) ||
5896                     op == 0x81 && reg == modregrm(0,6,0) && !flags ||
5897                     op == 0x81 && reg == modregrm(0,1,0))
5898                 {
5899                     // See if we can replace a dword with a word
5900                     // (avoid for 32 bit instructions, because CFopsize
5901                     //  is too slow)
5902                     if (!shortop && useopsize)
5903                     {
5904                         if ((u & 0xFFFF0000) == 0)
5905                         {
5906                             c.Iflags ^= CFopsize;
5907                             goto L1;
5908                         }
5909                         /* If memory (not register) addressing mode     */
5910                         if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
5911                         {
5912                             c.IEV1.Voffset += 2; /* address MSW  */
5913                             c.IEV2.Vuns >>= 16;
5914                             c.Iflags ^= CFopsize;
5915                             goto L1;
5916                         }
5917                     }
5918 
5919                     // If EA is not SI or DI
5920                     if (rm < (modregrm(3,0,SP) | reg) &&
5921                         (usespace ||
5922                          config.target_cpu < TARGET_PentiumPro)
5923                        )
5924                     {
5925                         if ((u & 0xFFFFFF00) == 0)
5926                         {
5927                         L2: c.Iop--;           /* to byte instruction  */
5928                             c.Iflags &= ~CFopsize;
5929                             goto L1;
5930                         }
5931                         if (((u & 0xFFFF00FF) == 0 ||
5932                              (shortop && (u & 0xFF) == 0)) &&
5933                             (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)))
5934                         {
5935                         L3:
5936                             c.IEV2.Vuns >>= 8;
5937                             if (rm >= (modregrm(3,0,AX) | reg))
5938                                 c.Irm |= 4;    /* AX.AH, BX.BH, etc. */
5939                             else
5940                                 c.IEV1.Voffset += 1;
5941                             goto L2;
5942                         }
5943                     }
5944 
5945                     // BUG: which is right?
5946                     //else if ((u & 0xFFFF0000) == 0)
5947 
5948                     else if (0 && op == 0xF7 &&
5949                              rm >= modregrm(3,0,SP) &&
5950                              (u & 0xFFFF0000) == 0)
5951 
5952                         c.Iflags &= ~CFopsize;
5953                 }
5954 
5955                 // Try to replace TEST reg,-1 with TEST reg,reg
5956                 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
5957                 {
5958                     if ((u & 0xFF) == 0xFF)
5959                     {
5960                       L4:
5961                         c.Iop = 0x84;          // TEST regL,regL
5962                         c.Irm = modregrm(3,ereg,ereg);
5963                         if (c.Irex & REX_B)
5964                             c.Irex |= REX_R;
5965                         c.Iflags &= ~CFopsize;
5966                         goto L1;
5967                     }
5968                 }
5969                 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
5970                 {
5971                     if (u == 0xFF)
5972                     {
5973                         if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix
5974                             c.Irex |= REX;
5975                         goto L4;
5976                     }
5977                     if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4)
5978                     {
5979                         ereg |= 4;                /* to regH      */
5980                         goto L4;
5981                     }
5982                 }
5983 
5984                 /* Look for sign extended immediate data */
5985                 if (cast(byte) u == u)
5986                 {
5987                     if (op == 0x81)
5988                     {
5989                         if (reg != 0x08 && reg != 0x20 && reg != 0x30)
5990                             c.Iop = op = 0x83;         /* 8 bit sgn ext */
5991                     }
5992                     else if (op == 0x69)                /* IMUL rw,ew,dw */
5993                         c.Iop = op = 0x6B;             /* IMUL rw,ew,db */
5994                 }
5995 
5996                 // Look for SHIFT EA,imm8 we can replace with short form
5997                 if (u == 1 && ((op & 0xFE) == 0xC0))
5998                     c.Iop |= 0xD0;
5999 
6000             } /* if immediate second operand */
6001 
6002             /* Look for AX short form */
6003             if (ins & A)
6004             {
6005                 if (rm == modregrm(0,AX,local_BPRM) &&
6006                     !(c.Irex & REX_R) &&               // and it's AX, not R8
6007                     (op & ~3) == 0x88 &&
6008                     !I64)
6009                 {
6010                     op = ((op & 3) + 0xA0) ^ 2;
6011                     /* 8A. A0 */
6012                     /* 8B. A1 */
6013                     /* 88. A2 */
6014                     /* 89. A3 */
6015                     c.Iop = op;
6016                     c.IFL2 = c.IFL1;
6017                     c.IEV2 = c.IEV1;
6018                 }
6019 
6020                 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2   */
6021                 else if (!I16 &&
6022                          (op == 0x89 || op == 0x8B) &&
6023                          (rm & 0xC0) == 0xC0 &&
6024                          (!b || b.BC != BCasm)
6025                         )
6026                     c.Iflags &= ~CFopsize;
6027 
6028                 // If rm is AX
6029                 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B)))
6030                 {
6031                     switch (op)
6032                     {
6033                         case 0x80:  op = reg | 4; break;
6034                         case 0x81:  op = reg | 5; break;
6035                         case 0x87:  op = 0x90 + (reg>>3); break;    // XCHG
6036 
6037                         case 0xF6:
6038                             if (reg == 0)
6039                                 op = 0xA8;  /* TEST AL,immed8       */
6040                             break;
6041 
6042                         case 0xF7:
6043                             if (reg == 0)
6044                                 op = 0xA9;  /* TEST AX,immed16      */
6045                             break;
6046 
6047                         default:
6048                             break;
6049                     }
6050                     c.Iop = op;
6051                 }
6052             }
6053 
6054             /* Look for reg short form */
6055             if ((ins & R) && (rm & 0xC0) == 0xC0)
6056             {
6057                 switch (op)
6058                 {
6059                     case 0xC6:  op = 0xB0 + ereg; break;
6060                     case 0xC7: // if no sign extension
6061                         if (!(c.Irex & REX_W && c.IEV2.Vint < 0))
6062                         {
6063                             c.Irm = 0;
6064                             c.Irex &= ~REX_W;
6065                             op = 0xB8 + ereg;
6066                         }
6067                         break;
6068 
6069                     case 0xFF:
6070                         switch (reg)
6071                         {   case 6<<3: op = 0x50+ereg; break;/* PUSH*/
6072                             case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
6073                             case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
6074                             default: break;
6075                         }
6076                         break;
6077 
6078                     case 0x8F:  op = 0x58 + ereg; break;
6079                     case 0x87:
6080                         if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8
6081                             op = 0x90 + ereg;
6082                         break;
6083 
6084                     default:
6085                         break;
6086                 }
6087                 c.Iop = op;
6088             }
6089 
6090             // Look to remove redundant REX prefix on XOR
6091             if (c.Irex == REX_W // ignore ops involving R8..R15
6092                 && (op == 0x31 || op == 0x33) // XOR
6093                 && ((rm & 0xC0) == 0xC0) // register direct
6094                 && ((reg >> 3) == ereg)) // register with itself
6095             {
6096                 c.Irex = 0;
6097             }
6098 
6099             // Look to replace SHL reg,1 with ADD reg,reg
6100             if ((op & ~1) == 0xD0 &&
6101                      (rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
6102                      config.target_cpu >= TARGET_80486)
6103             {
6104                 c.Iop &= 1;
6105                 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3));
6106                 if (c.Irex & REX_B)
6107                     c.Irex |= REX_R;
6108                 if (!(c.Iflags & CFpsw) && !I16)
6109                     c.Iflags &= ~CFopsize;
6110                 goto L1;
6111             }
6112 
6113             /* Look for sign extended modregrm displacement, or 0
6114              * displacement.
6115              */
6116 
6117             if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
6118                 c.IFL1 == FLconst)      // and it's a constant
6119             {
6120                 a = c.IEV1.Vpointer;
6121                 if (a == 0 && (rm & 7) != local_BPRM &&         // if 0[disp]
6122                     !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP)
6123                    )
6124                     c.Irm &= 0x3F;
6125                 else if (!I16)
6126                 {
6127                     if (cast(targ_size_t)cast(targ_schar)a == a)
6128                         c.Irm ^= 0xC0;                 /* do 8 sx      */
6129                 }
6130                 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
6131                     c.Irm ^= 0xC0;                     /* do 8 sx      */
6132             }
6133 
6134             /* Look for LEA reg,[ireg], replace with MOV reg,ireg       */
6135             if (op == LEA)
6136             {
6137                 rm = c.Irm & 7;
6138                 mod = c.Irm & modregrm(3,0,0);
6139                 if (mod == 0)
6140                 {
6141                     if (!I16)
6142                     {
6143                         switch (rm)
6144                         {
6145                             case 4:
6146                             case 5:
6147                                 break;
6148 
6149                             default:
6150                                 c.Irm |= modregrm(3,0,0);
6151                                 c.Iop = 0x8B;
6152                                 break;
6153                         }
6154                     }
6155                     else
6156                     {
6157                         switch (rm)
6158                         {
6159                             case 4:     rm = modregrm(3,0,SI);  goto L6;
6160                             case 5:     rm = modregrm(3,0,DI);  goto L6;
6161                             case 7:     rm = modregrm(3,0,BX);  goto L6;
6162                             L6:     c.Irm = cast(ubyte)(rm + reg);
6163                                     c.Iop = 0x8B;
6164                                     break;
6165 
6166                             default:
6167                                     break;
6168                         }
6169                     }
6170                 }
6171 
6172                 /* replace LEA reg,0[BP] with MOV reg,BP        */
6173                 else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
6174                         c.IFL1 == FLconst && c.IEV1.Vpointer == 0)
6175                 {
6176                     c.Iop = 0x8B;          /* MOV reg,BP   */
6177                     c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg);
6178                 }
6179             }
6180 
6181             // Replace [R13] with 0[R13]
6182             if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) ||
6183                                     issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP))
6184             {
6185                 c.Irm |= modregrm(1,0,0);
6186                 c.IFL1 = FLconst;
6187                 c.IEV1.Vpointer = 0;
6188             }
6189         }
6190         else if (!(c.Iflags & CFvex))
6191         {
6192             switch (op)
6193             {
6194                 default:
6195                     // Look for MOV r64, immediate
6196                     if ((c.Irex & REX_W) && (op & ~7) == 0xB8)
6197                     {
6198                         /* Look for zero extended immediate data */
6199                         if (c.IEV2.Vsize_t == c.IEV2.Vuns)
6200                         {
6201                             c.Irex &= ~REX_W;
6202                         }
6203                         /* Look for sign extended immediate data */
6204                         else if (c.IEV2.Vsize_t == c.IEV2.Vint)
6205                         {
6206                             c.Irm = modregrm(3,0,op & 7);
6207                             c.Iop = op = 0xC7;
6208                             c.IEV2.Vsize_t = c.IEV2.Vuns;
6209                         }
6210                     }
6211                     if ((op & ~0x0F) != 0x70)
6212                         break;
6213                     goto case JMP;
6214 
6215                 case JMP:
6216                     switch (c.IFL2)
6217                     {
6218                         case FLcode:
6219                             if (c.IEV2.Vcode == code_next(c))
6220                             {
6221                                 c.Iop = NOP;
6222                                 continue;
6223                             }
6224                             break;
6225 
6226                         case FLblock:
6227                             if (!code_next(c) && c.IEV2.Vblock == bn)
6228                             {
6229                                 c.Iop = NOP;
6230                                 continue;
6231                             }
6232                             break;
6233 
6234                         case FLconst:
6235                         case FLfunc:
6236                         case FLextern:
6237                             break;
6238 
6239                         default:
6240                             WRFL(cast(FL)c.IFL2);
6241                             assert(0);
6242                     }
6243                     break;
6244 
6245                 case 0x68:                      // PUSH immed16
6246                     if (c.IFL2 == FLconst)
6247                     {
6248                         targ_long u = c.IEV2.Vuns;
6249                         if (I64 ||
6250                             ((c.Iflags & CFopsize) ? I16 : I32))
6251                         {   // PUSH 32/64 bit operand
6252                             if (u == cast(byte) u)
6253                                 c.Iop = 0x6A;          // PUSH immed8
6254                         }
6255                         else // PUSH 16 bit operand
6256                         {
6257                             if (cast(short)u == cast(byte) u)
6258                                 c.Iop = 0x6A;          // PUSH immed8
6259                         }
6260                     }
6261                     break;
6262             }
6263         }
6264     }
6265 
6266     debug
6267     if (debugc)
6268     {
6269         printf("-pinholeopt(%p)\n",cstart);
6270         for (c = cstart; c; c = code_next(c))
6271             code_print(c);
6272     }
6273 }
6274 
6275 
6276 debug
6277 {
6278 @trusted
6279 private void pinholeopt_unittest()
6280 {
6281     //printf("pinholeopt_unittest()\n");
6282     static struct CS
6283     {
6284         uint model,op,ea;
6285         targ_size_t ev1,ev2;
6286         uint flags;
6287     }
6288     __gshared CS[2][22] tests =
6289     [
6290         // XOR reg,immed                            NOT regL
6291         [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 },    { 0,0xF6,modregrm(3,2,BX),0,0xFF } ],
6292 
6293         // MOV 0[BX],3                               MOV [BX],3
6294         [ { 16,0xC7,modregrm(2,0,7),0,3 },          { 0,0xC7,modregrm(0,0,7),0,3 } ],
6295 
6296 /+      // only if config.flags4 & CFG4space
6297         // TEST regL,immed8
6298         [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
6299         [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
6300         [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
6301         [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
6302 +/
6303 
6304         // PUSH immed => PUSH immed8
6305         [ { 0,0x68,0,0,0 },    { 0,0x6A,0,0,0 }],
6306         [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }],
6307         [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }],
6308         [ { 16,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
6309         [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
6310         [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
6311         [ { 16,0x68,0,0,0x10000,0 },     { 0,0x6A,0,0,0x10000,0 }],
6312         [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }],
6313         [ { 32,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
6314         [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
6315         [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
6316         [ { 32,0x68,0,0,0x10000,CFopsize },    { 0,0x6A,0,0,0x10000,CFopsize }],
6317         [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }],
6318 
6319         // clear r64, for r64 != R8..R15
6320         [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}],
6321         [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}],
6322 
6323         // MOV r64, immed
6324         [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}],
6325         [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}],
6326         [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }],
6327         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }],
6328         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}],
6329     ];
6330 
6331     //config.flags4 |= CFG4space;
6332     for (int i = 0; i < tests.length; i++)
6333     {   CS *pin  = &tests[i][0];
6334         CS *pout = &tests[i][1];
6335         code cs = void;
6336         memset(&cs, 0, cs.sizeof);
6337         if (pin.model)
6338         {
6339             if (I16 && pin.model != 16)
6340                 continue;
6341             if (I32 && pin.model != 32)
6342                 continue;
6343             if (I64 && pin.model != 64)
6344                 continue;
6345         }
6346         //printf("[%d]\n", i);
6347         cs.Iop = pin.op;
6348         cs.Iea = pin.ea;
6349         cs.IFL1 = FLconst;
6350         cs.IFL2 = FLconst;
6351         cs.IEV1.Vsize_t = pin.ev1;
6352         cs.IEV2.Vsize_t = pin.ev2;
6353         cs.Iflags = pin.flags;
6354         pinholeopt(&cs, null);
6355         if (cs.Iop != pout.op)
6356         {   printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op);
6357             assert(0);
6358         }
6359         assert(cs.Iea == pout.ea);
6360         assert(cs.IEV1.Vsize_t == pout.ev1);
6361         assert(cs.IEV2.Vsize_t == pout.ev2);
6362         assert(cs.Iflags == pout.flags);
6363     }
6364 }
6365 }
6366 
6367 @trusted
6368 void simplify_code(code* c)
6369 {
6370     reg_t reg;
6371     if (config.flags4 & CFG4optimized &&
6372         (c.Iop == 0x81 || c.Iop == 0x80) &&
6373         c.IFL2 == FLconst &&
6374         reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,&reg) &&
6375         !(I16 && c.Iflags & CFopsize)
6376        )
6377     {
6378         // See if we can replace immediate instruction with register instruction
6379         static immutable ubyte[8] regop =
6380                 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ];
6381 
6382         //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong);
6383         c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1);
6384         code_newreg(c, reg);
6385         if (I64 && !(c.Iop & 1) && (reg & 4))
6386             c.Irex |= REX;
6387     }
6388 }
6389 
6390 /**************************
6391  * Compute jump addresses for FLcode.
6392  * Note: only works for forward referenced code.
6393  *       only direct jumps and branches are detected.
6394  *       LOOP instructions only work for backward refs.
6395  */
6396 
6397 @trusted
6398 void jmpaddr(code *c)
6399 {
6400     code* ci,cn,ctarg,cstart;
6401     targ_size_t ad;
6402 
6403     //printf("jmpaddr()\n");
6404     cstart = c;                           /* remember start of code       */
6405     while (c)
6406     {
6407         const op = c.Iop;
6408         if (op <= 0xEB &&
6409             inssize[op] & T &&   // if second operand
6410             c.IFL2 == FLcode &&
6411             ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
6412         {
6413             ci = code_next(c);
6414             ctarg = c.IEV2.Vcode;  /* target code                  */
6415             ad = 0;                 /* IP displacement              */
6416             while (ci && ci != ctarg)
6417             {
6418                 ad += calccodsize(ci);
6419                 ci = code_next(ci);
6420             }
6421             if (!ci)
6422                 goto Lbackjmp;      // couldn't find it
6423             if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
6424                 c.IEV2.Vpointer = ad;
6425             else                    /* else conditional             */
6426             {
6427                 if (!(c.Iflags & CFjmp16))     /* if branch    */
6428                     c.IEV2.Vpointer = ad;
6429                 else            /* branch around a long jump    */
6430                 {
6431                     cn = code_next(c);
6432                     c.next = code_calloc();
6433                     code_next(c).next = cn;
6434                     c.Iop = op ^ 1;        /* converse jmp */
6435                     c.Iflags &= ~CFjmp16;
6436                     c.IEV2.Vpointer = I16 ? 3 : 5;
6437                     cn = code_next(c);
6438                     cn.Iop = JMP;          /* long jump    */
6439                     cn.IFL2 = FLconst;
6440                     cn.IEV2.Vpointer = ad;
6441                 }
6442             }
6443             c.IFL2 = FLconst;
6444         }
6445         if (op == LOOP && c.IFL2 == FLcode)    /* backwards refs       */
6446         {
6447           Lbackjmp:
6448             ctarg = c.IEV2.Vcode;
6449             for (ci = cstart; ci != ctarg; ci = code_next(ci))
6450                 if (!ci || ci == c)
6451                     assert(0);
6452             ad = 2;                 /* - IP displacement            */
6453             while (ci != c)
6454             {
6455                 assert(ci);
6456                 ad += calccodsize(ci);
6457                 ci = code_next(ci);
6458             }
6459             c.IEV2.Vpointer = (-ad) & 0xFF;
6460             c.IFL2 = FLconst;
6461         }
6462         c = code_next(c);
6463     }
6464 }
6465 
6466 /*******************************
6467  * Calculate bl.Bsize.
6468  */
6469 
6470 uint calcblksize(code *c)
6471 {
6472     uint size;
6473     for (size = 0; c; c = code_next(c))
6474     {
6475         uint sz = calccodsize(c);
6476         //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop);
6477         size += sz;
6478     }
6479     //printf("calcblksize(c = x%x) = %d\n", c, size);
6480     return size;
6481 }
6482 
6483 /*****************************
6484  * Calculate and return code size of a code.
6485  * Note that NOPs are sometimes used as markers, but are
6486  * never output. LINNUMs are never output.
6487  * Note: This routine must be fast. Profiling shows it is significant.
6488  */
6489 
6490 @trusted
6491 uint calccodsize(code *c)
6492 {
6493     uint size;
6494     ubyte rm,mod,ins;
6495     uint iflags;
6496     uint i32 = I32 || I64;
6497     uint a32 = i32;
6498 
6499     debug
6500     assert((a32 & ~1) == 0);
6501 
6502     iflags = c.Iflags;
6503     opcode_t op = c.Iop;
6504     //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags);
6505     if (iflags & CFvex && c.Ivex.pfx == 0xC4)
6506     {
6507         ins = vex_inssize(c);
6508         size = ins & 7;
6509         goto Lmodrm;
6510     }
6511     else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
6512         op = 0x0F;
6513     else
6514         op &= 0xFF;
6515     switch (op)
6516     {
6517         case 0x0F:
6518             if ((c.Iop & 0xFFFD00) == 0x0F3800)
6519             {   // 3 byte op ( 0F38-- or 0F3A-- )
6520                 ins = inssize2[(c.Iop >> 8) & 0xFF];
6521                 size = ins & 7;
6522                 if (c.Iop & 0xFF000000)
6523                   size++;
6524             }
6525             else
6526             {   // 2 byte op ( 0F-- )
6527                 ins = inssize2[c.Iop & 0xFF];
6528                 size = ins & 7;
6529                 if (c.Iop & 0xFF0000)
6530                   size++;
6531             }
6532             break;
6533 
6534         case 0x90:
6535             size = (c.Iop == PAUSE) ? 2 : 1;
6536             goto Lret2;
6537 
6538         case NOP:
6539         case ESCAPE:
6540             size = 0;                   // since these won't be output
6541             goto Lret2;
6542 
6543         case ASM:
6544             if (c.Iflags == CFaddrsize)        // kludge for DA inline asm
6545                 size = _tysize[TYnptr];
6546             else
6547                 size = cast(uint)c.IEV1.len;
6548             goto Lret2;
6549 
6550         case 0xA1:
6551         case 0xA3:
6552             if (c.Irex)
6553             {
6554                 size = 9;               // 64 bit immediate value for MOV to/from RAX
6555                 goto Lret;
6556             }
6557             goto Ldefault;
6558 
6559         case 0xF6:                      /* TEST mem8,immed8             */
6560             ins = inssize[op];
6561             size = ins & 7;
6562             if (i32)
6563                 size = inssize32[op];
6564             if ((c.Irm & (7<<3)) == 0)
6565                 size++;                 /* size of immed8               */
6566             break;
6567 
6568         case 0xF7:
6569             ins = inssize[op];
6570             size = ins & 7;
6571             if (i32)
6572                 size = inssize32[op];
6573             if ((c.Irm & (7<<3)) == 0)
6574                 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
6575             break;
6576 
6577         default:
6578         Ldefault:
6579             ins = inssize[op];
6580             size = ins & 7;
6581             if (i32)
6582                 size = inssize32[op];
6583     }
6584 
6585     if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
6586     {
6587         if (iflags & CFwait)    // if add FWAIT prefix
6588             size++;
6589         if (iflags & CFSEG)     // if segment override
6590             size++;
6591 
6592         // If the instruction has a second operand that is not an 8 bit,
6593         // and the operand size prefix is present, then fix the size computation
6594         // because the operand size will be different.
6595         // Walter, I had problems with this bit at the end.  There can still be
6596         // an ADDRSIZE prefix for these and it does indeed change the operand size.
6597 
6598         if (iflags & (CFopsize | CFaddrsize))
6599         {
6600             if ((ins & (T|E)) == T)
6601             {
6602                 if ((op & 0xAC) == 0xA0)
6603                 {
6604                     if (iflags & CFaddrsize && !I64)
6605                     {   if (I32)
6606                             size -= 2;
6607                         else
6608                             size += 2;
6609                     }
6610                 }
6611                 else if (iflags & CFopsize)
6612                 {   if (I16)
6613                         size += 2;
6614                     else
6615                         size -= 2;
6616                 }
6617             }
6618             if (iflags & CFaddrsize)
6619             {   if (!I64)
6620                     a32 ^= 1;
6621                 size++;
6622             }
6623             if (iflags & CFopsize)
6624                 size++;                         /* +1 for OPSIZE prefix         */
6625         }
6626     }
6627 
6628 Lmodrm:
6629     if ((op & ~0x0F) == 0x70)
6630     {
6631         if (iflags & CFjmp16)           // if long branch
6632             size += I16 ? 3 : 4;        // + 3(4) bytes for JMP
6633     }
6634     else if (ins & M)                   // if modregrm byte
6635     {
6636         rm = c.Irm;
6637         mod = rm & 0xC0;
6638         if (a32 || I64)
6639         {   // 32 bit addressing
6640             if (issib(rm))
6641                 size++;
6642             switch (mod)
6643             {   case 0:
6644                     if (issib(rm) && (c.Isib & 7) == 5 ||
6645                         (rm & 7) == 5)
6646                         size += 4;      /* disp32                       */
6647                     if (c.Irex & REX_B && (rm & 7) == 5)
6648                         /* Instead of selecting R13, this mode is an [RIP] relative
6649                          * address. Although valid, it's redundant, and should not
6650                          * be generated. Instead, generate 0[R13] instead of [R13].
6651                          */
6652                         assert(0);
6653                     break;
6654 
6655                 case 0x40:
6656                     size++;             /* disp8                        */
6657                     break;
6658 
6659                 case 0x80:
6660                     size += 4;          /* disp32                       */
6661                     break;
6662 
6663                 default:
6664                     break;
6665             }
6666         }
6667         else
6668         {   // 16 bit addressing
6669             if (mod == 0x40)            /* 01: 8 bit displacement       */
6670                 size++;
6671             else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
6672                 size += 2;
6673         }
6674     }
6675 
6676 Lret:
6677     if (!(iflags & CFvex) && c.Irex)
6678     {
6679         size++;
6680         if (c.Irex & REX_W && (op & ~7) == 0xB8)
6681             size += 4;
6682     }
6683 Lret2:
6684     //printf("op = x%02x, size = %d\n",op,size);
6685     return size;
6686 }
6687 
6688 /********************************
6689  * Return !=0 if codes match.
6690  */
6691 
6692 static if (0)
6693 {
6694 
6695 int code_match(code *c1,code *c2)
6696 {
6697     code cs1,cs2;
6698     ubyte ins;
6699 
6700     if (c1 == c2)
6701         goto match;
6702     cs1 = *c1;
6703     cs2 = *c2;
6704     if (cs1.Iop != cs2.Iop)
6705         goto nomatch;
6706     switch (cs1.Iop)
6707     {
6708         case ESCAPE | ESCctor:
6709         case ESCAPE | ESCdtor:
6710             goto nomatch;
6711 
6712         case NOP:
6713             goto match;
6714 
6715         case ASM:
6716             if (cs1.IEV1.len == cs2.IEV1.len &&
6717                 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0)
6718                 goto match;
6719             else
6720                 goto nomatch;
6721 
6722         default:
6723             if ((cs1.Iop & 0xFF) == ESCAPE)
6724                 goto match;
6725             break;
6726     }
6727     if (cs1.Iflags != cs2.Iflags)
6728         goto nomatch;
6729 
6730     ins = inssize[cs1.Iop & 0xFF];
6731     if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
6732     {
6733         ins = inssize2[(cs1.Iop >> 8) & 0xFF];
6734     }
6735     else if ((cs1.Iop & 0xFF00) == 0x0F00)
6736     {
6737         ins = inssize2[cs1.Iop & 0xFF];
6738     }
6739 
6740     if (ins & M)                // if modregrm byte
6741     {
6742         if (cs1.Irm != cs2.Irm)
6743             goto nomatch;
6744         if ((cs1.Irm & 0xC0) == 0xC0)
6745             goto do2;
6746         if (is32bitaddr(I32,cs1.Iflags))
6747         {
6748             if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
6749                 goto nomatch;
6750             if (
6751                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
6752                )
6753                 goto do2;       /* if no first operand  */
6754         }
6755         else
6756         {
6757             if (
6758                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
6759                )
6760                 goto do2;       /* if no first operand  */
6761         }
6762         if (cs1.IFL1 != cs2.IFL1)
6763             goto nomatch;
6764         if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym)
6765             goto nomatch;
6766         if (cs1.IEV1.Voffset != cs2.IEV1.Voffset)
6767             goto nomatch;
6768     }
6769 
6770 do2:
6771     if (!(ins & T))                     // if no second operand
6772         goto match;
6773     if (cs1.IFL2 != cs2.IFL2)
6774         goto nomatch;
6775     if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym)
6776         goto nomatch;
6777     if (cs1.IEV2.Voffset != cs2.IEV2.Voffset)
6778         goto nomatch;
6779 
6780 match:
6781     return 1;
6782 
6783 nomatch:
6784     return 0;
6785 }
6786 
6787 }
6788 
6789 /************************
6790  * Little buffer allocated on the stack to accumulate instruction bytes to
6791  * later be sent along to objmod
6792  */
6793 private struct MiniCodeBuf
6794 {
6795 nothrow:
6796     uint index;
6797     uint offset;
6798     int seg;
6799     Barray!ubyte* disasmBuf;
6800     ubyte[256] bytes; // = void;
6801 
6802     @trusted
6803     this(int seg)
6804     {
6805         index = 0;
6806         this.offset = cast(uint)Offset(seg);
6807         this.seg = seg;
6808     }
6809 
6810     @trusted
6811     void flushx()
6812     {
6813         // Emit accumulated bytes to code segment
6814         debug assert(index < bytes.length);
6815 
6816         if (disasmBuf)                     // write to buffer for disassembly
6817         {
6818             foreach (c; bytes[0 .. index]) // not efficient, but for verbose output anyway
6819                 disasmBuf.push(c);
6820         }
6821 
6822         offset += objmod.bytes(seg, offset, index, bytes.ptr);
6823         index = 0;
6824     }
6825 
6826     @trusted
6827     void gen(ubyte c) { bytes[index++] = c; }
6828 
6829     @trusted
6830     void genp(uint n, void *p) { memcpy(&bytes[index], p, n); index += n; }
6831 
6832     @trusted
6833     void flush() { if (index) flushx(); }
6834 
6835     @trusted
6836     uint getOffset() { return offset + index; }
6837 
6838     @trusted
6839     uint available() { return cast(uint)bytes.length - index; }
6840 
6841     /******************************
6842      * write64/write32/write16 write `value` to `disasmBuf`
6843      */
6844     @trusted
6845     void write64(ulong value)
6846     {
6847         if (disasmBuf)
6848         {
6849             disasmBuf.push(cast(ubyte)value);
6850             disasmBuf.push(cast(ubyte)(value >>  8));
6851             disasmBuf.push(cast(ubyte)(value >> 16));
6852             disasmBuf.push(cast(ubyte)(value >> 24));
6853             disasmBuf.push(cast(ubyte)(value >> 32));
6854             disasmBuf.push(cast(ubyte)(value >> 36));
6855             disasmBuf.push(cast(ubyte)(value >> 40));
6856             disasmBuf.push(cast(ubyte)(value >> 44));
6857         }
6858     }
6859 
6860     pragma(inline, true)
6861     @trusted
6862     void write32(uint value)
6863     {
6864         if (disasmBuf)
6865         {
6866             disasmBuf.push(cast(ubyte)value);
6867             disasmBuf.push(cast(ubyte)(value >>  8));
6868             disasmBuf.push(cast(ubyte)(value >> 16));
6869             disasmBuf.push(cast(ubyte)(value >> 24));
6870         }
6871     }
6872 
6873     pragma(inline, true)
6874     @trusted
6875     void write16(uint value)
6876     {
6877         if (disasmBuf)
6878         {
6879             disasmBuf.push(cast(ubyte)value);
6880             disasmBuf.push(cast(ubyte)(value >> 8));
6881         }
6882     }
6883 }
6884 
6885 /**************************
6886  * Convert instructions to object code and write them to objmod.
6887  * Params:
6888  *      seg = code segment to write to, code starts at Offset(seg)
6889  *      c = list of instructions to write
6890  *      disasmBuf = if not null, then also write object code here
6891  * Returns:
6892  *      offset of end of code emitted
6893  */
6894 
6895 @trusted
6896 uint codout(int seg, code *c, Barray!ubyte* disasmBuf)
6897 {
6898     ubyte rm,mod;
6899     ubyte ins;
6900     code *cn;
6901     uint flags;
6902     Symbol *s;
6903 
6904     debug
6905     if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg));
6906 
6907     MiniCodeBuf ggen = void;
6908     ggen.index = 0;
6909     ggen.offset = cast(uint)Offset(seg);
6910     ggen.seg = seg;
6911     ggen.disasmBuf = disasmBuf;
6912 
6913     for (; c; c = code_next(c))
6914     {
6915         debug
6916         {
6917         if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); }
6918         uint startoffset = ggen.getOffset();
6919         }
6920 
6921         opcode_t op = c.Iop;
6922         ins = inssize[op & 0xFF];
6923         switch (op & 0xFF)
6924         {
6925             case ESCAPE:
6926                 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
6927                 if(op == 0x660F383E || c.Iflags & CFvex) break;
6928 
6929                 switch (op & 0xFFFF00)
6930                 {   case ESClinnum:
6931                         /* put out line number stuff    */
6932                         objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset());
6933                         break;
6934 version (SCPP)
6935 {
6936 static if (1)
6937 {
6938                     case ESCctor:
6939                     case ESCdtor:
6940                     case ESCoffset:
6941                         if (config.exe != EX_WIN32)
6942                             except_pair_setoffset(c,ggen.getOffset() - funcoffset);
6943                         break;
6944 
6945                     case ESCmark:
6946                     case ESCrelease:
6947                     case ESCmark2:
6948                     case ESCrelease2:
6949                         break;
6950 }
6951 else
6952 {
6953                     case ESCctor:
6954                         except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6955                         break;
6956 
6957                     case ESCdtor:
6958                         except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6959                         break;
6960 
6961                     case ESCmark:
6962                         except_mark();
6963                         break;
6964 
6965                     case ESCrelease:
6966                         except_release();
6967                         break;
6968 }
6969 }
6970                     case ESCadjesp:
6971                         //printf("adjust ESP %ld\n", cast(long)c.IEV1.Vint);
6972                         break;
6973 
6974                     default:
6975                         break;
6976                 }
6977 
6978                 debug
6979                 assert(calccodsize(c) == 0);
6980 
6981                 continue;
6982 
6983             case NOP:                   /* don't send them out          */
6984                 if (op != NOP)
6985                     break;
6986                 debug
6987                 assert(calccodsize(c) == 0);
6988 
6989                 continue;
6990 
6991             case ASM:
6992                 if (op != ASM)
6993                     break;
6994                 ggen.flush();
6995                 if (c.Iflags == CFaddrsize)    // kludge for DA inline asm
6996                 {
6997                     do32bit(&ggen, FLblockoff,&c.IEV1,0,0);
6998                 }
6999                 else
7000                 {
7001                     ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes);
7002                 }
7003                 debug
7004                 assert(calccodsize(c) == c.IEV1.len);
7005 
7006                 continue;
7007 
7008             default:
7009                 break;
7010         }
7011         flags = c.Iflags;
7012 
7013         // See if we need to flush (don't have room for largest code sequence)
7014         if (ggen.available() < (1+4+4+8+8))
7015             ggen.flush();
7016 
7017         // see if we need to put out prefix bytes
7018         if (flags & (CFwait | CFPREFIX | CFjmp16))
7019         {
7020             int override_;
7021 
7022             if (flags & CFwait)
7023                 ggen.gen(0x9B);                      // FWAIT
7024                                                 /* ? SEGES : SEGSS      */
7025             switch (flags & CFSEG)
7026             {   case CFes:      override_ = SEGES;       goto segover;
7027                 case CFss:      override_ = SEGSS;       goto segover;
7028                 case CFcs:      override_ = SEGCS;       goto segover;
7029                 case CFds:      override_ = SEGDS;       goto segover;
7030                 case CFfs:      override_ = SEGFS;       goto segover;
7031                 case CFgs:      override_ = SEGGS;       goto segover;
7032                 segover:        ggen.gen(cast(ubyte)override_);
7033                                 break;
7034 
7035                 default:        break;
7036             }
7037 
7038             if (flags & CFaddrsize)
7039                 ggen.gen(0x67);
7040 
7041             // Do this last because of instructions like ADDPD
7042             if (flags & CFopsize)
7043                 ggen.gen(0x66);                      /* operand size         */
7044 
7045             if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
7046             {
7047                 if (!I16)
7048                 {   // Put out 16 bit conditional jump
7049                     c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
7050                 }
7051                 else
7052                 {
7053                     cn = code_calloc();
7054                     /*cxcalloc++;*/
7055                     cn.next = code_next(c);
7056                     c.next= cn;          // link into code
7057                     cn.Iop = JMP;              // JMP block
7058                     cn.IFL2 = c.IFL2;
7059                     cn.IEV2.Vblock = c.IEV2.Vblock;
7060                     c.Iop = op ^= 1;           // toggle condition
7061                     c.IFL2 = FLconst;
7062                     c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block
7063                     c.Iflags &= ~CFjmp16;
7064                 }
7065             }
7066         }
7067 
7068         if (flags & CFvex)
7069         {
7070             if (flags & CFvex3)
7071             {
7072                 ggen.gen(0xC4);
7073                 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex));
7074                 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex));
7075                 ggen.gen(c.Ivex.op);
7076             }
7077             else
7078             {
7079                 ggen.gen(0xC5);
7080                 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex));
7081                 ggen.gen(c.Ivex.op);
7082             }
7083             ins = vex_inssize(c);
7084             goto Lmodrm;
7085         }
7086 
7087         if (op > 0xFF)
7088         {
7089             if ((op & 0xFFFD00) == 0x0F3800)
7090                 ins = inssize2[(op >> 8) & 0xFF];
7091             else if ((op & 0xFF00) == 0x0F00)
7092                 ins = inssize2[op & 0xFF];
7093 
7094             if (op & 0xFF000000)
7095             {
7096                 ubyte op1 = op >> 24;
7097                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
7098                 {
7099                     ggen.gen(op1);
7100                     if (c.Irex)
7101                         ggen.gen(c.Irex | REX);
7102                 }
7103                 else
7104                 {
7105                     if (c.Irex)
7106                         ggen.gen(c.Irex | REX);
7107                     ggen.gen(op1);
7108                 }
7109                 ggen.gen((op >> 16) & 0xFF);
7110                 ggen.gen((op >> 8) & 0xFF);
7111                 ggen.gen(op & 0xFF);
7112             }
7113             else if (op & 0xFF0000)
7114             {
7115                 ubyte op1 = cast(ubyte)(op >> 16);
7116                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
7117                 {
7118                     ggen.gen(op1);
7119                     if (c.Irex)
7120                         ggen.gen(c.Irex | REX);
7121                 }
7122                 else
7123                 {
7124                     if (c.Irex)
7125                         ggen.gen(c.Irex | REX);
7126                     ggen.gen(op1);
7127                 }
7128                 ggen.gen((op >> 8) & 0xFF);
7129                 ggen.gen(op & 0xFF);
7130             }
7131             else
7132             {
7133                 if (c.Irex)
7134                     ggen.gen(c.Irex | REX);
7135                 ggen.gen((op >> 8) & 0xFF);
7136                 ggen.gen(op & 0xFF);
7137             }
7138         }
7139         else
7140         {
7141             if (c.Irex)
7142                 ggen.gen(c.Irex | REX);
7143             ggen.gen(cast(ubyte)op);
7144         }
7145   Lmodrm:
7146         if (ins & M)            /* if modregrm byte             */
7147         {
7148             rm = c.Irm;
7149             ggen.gen(rm);
7150 
7151             // Look for an address size override when working with the
7152             // MOD R/M and SIB bytes
7153 
7154             if (is32bitaddr( I32, flags))
7155             {
7156                 if (issib(rm))
7157                     ggen.gen(c.Isib);
7158                 switch (rm & 0xC0)
7159                 {
7160                     case 0x40:
7161                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
7162                         break;
7163 
7164                     case 0:
7165                         if (!(issib(rm) && (c.Isib & 7) == 5 ||
7166                               (rm & 7) == 5))
7167                             break;
7168                         goto case 0x80;
7169 
7170                     case 0x80:
7171                     {
7172                         int cfflags = CFoff;
7173                         targ_size_t val = 0;
7174                         if (I64)
7175                         {
7176                             if ((rm & modregrm(3,0,7)) == modregrm(0,0,5))      // if disp32[RIP]
7177                             {
7178                                 cfflags |= CFpc32;
7179                                 val = -4;
7180                                 reg_t reg = rm & modregrm(0,7,0);
7181                                 if (ins & T ||
7182                                     ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
7183                                 {   if (ins & E || op == 0xF6)
7184                                         val = -5;
7185                                     else if (c.Iflags & CFopsize)
7186                                         val = -6;
7187                                     else
7188                                         val = -8;
7189                                 }
7190 
7191                                 if (config.exe & (EX_OSX64 | EX_WIN64))
7192                                     /* Mach-O and Win64 fixups already take the 4 byte size
7193                                      * into account, so bias by 4
7194                                      */
7195                                     val += 4;
7196                             }
7197                         }
7198                         do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val);
7199                         break;
7200                     }
7201 
7202                     default:
7203                         break;
7204                 }
7205             }
7206             else
7207             {
7208                 switch (rm & 0xC0)
7209                 {   case 0x40:
7210                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
7211                         break;
7212 
7213                     case 0:
7214                         if ((rm & 7) != 6)
7215                             break;
7216                         goto case 0x80;
7217 
7218                     case 0x80:
7219                         do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff);
7220                         break;
7221 
7222                     default:
7223                         break;
7224                 }
7225             }
7226         }
7227         else
7228         {
7229             if (op == ENTER)
7230                 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0);
7231         }
7232         flags &= CFseg | CFoff | CFselfrel;
7233         if (ins & T)                    /* if second operand            */
7234         {
7235             if (ins & E)            /* if data-8                    */
7236                 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2);
7237             else if (!I16)
7238             {
7239                 switch (op)
7240                 {
7241                     case 0xC2:              /* RETN imm16           */
7242                     case 0xCA:              /* RETF imm16           */
7243                     do16:
7244                         do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7245                         break;
7246 
7247                     case 0xA1:
7248                     case 0xA3:
7249                         if (I64 && c.Irex)
7250                         {
7251                     do64:
7252                             do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7253                             break;
7254                         }
7255                         goto case 0xA0;
7256 
7257                     case 0xA0:              /* MOV AL,byte ptr []   */
7258                     case 0xA2:
7259                         if (c.Iflags & CFaddrsize && !I64)
7260                             goto do16;
7261                         else
7262                     do32:
7263                             do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags,0);
7264                         break;
7265 
7266                     case 0x9A:
7267                     case 0xEA:
7268                         if (c.Iflags & CFopsize)
7269                             goto ptr1616;
7270                         else
7271                             goto ptr1632;
7272 
7273                     case 0x68:              // PUSH immed32
7274                         if (cast(FL)c.IFL2 == FLblock)
7275                         {
7276                             c.IFL2 = FLblockoff;
7277                             goto do32;
7278                         }
7279                         else
7280                             goto case_default;
7281 
7282                     case CALL:              // CALL rel
7283                     case JMP:               // JMP  rel
7284                         flags |= CFselfrel;
7285                         goto case_default;
7286 
7287                     default:
7288                         if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
7289                             flags |= CFselfrel;
7290                         if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W)
7291                             goto do64;
7292                     case_default:
7293                         if (c.Iflags & CFopsize)
7294                             goto do16;
7295                         else
7296                             goto do32;
7297                 }
7298             }
7299             else
7300             {
7301                 switch (op)
7302                 {
7303                     case 0xC2:
7304                     case 0xCA:
7305                         goto do16;
7306 
7307                     case 0xA0:
7308                     case 0xA1:
7309                     case 0xA2:
7310                     case 0xA3:
7311                         if (c.Iflags & CFaddrsize)
7312                             goto do32;
7313                         else
7314                             goto do16;
7315 
7316                     case 0x9A:
7317                     case 0xEA:
7318                         if (c.Iflags & CFopsize)
7319                             goto ptr1632;
7320                         else
7321                             goto ptr1616;
7322 
7323                     ptr1616:
7324                     ptr1632:
7325                         //assert(c.IFL2 == FLfunc);
7326                         ggen.flush();
7327                         if (c.IFL2 == FLdatseg)
7328                         {
7329                             objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer,
7330                                     c.IEV2.Vseg,flags);
7331                             ggen.offset += 4;
7332                         }
7333                         else
7334                         {
7335                             s = c.IEV2.Vsym;
7336                             ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags);
7337                         }
7338                         break;
7339 
7340                     case 0x68:              // PUSH immed16
7341                         if (cast(FL)c.IFL2 == FLblock)
7342                         {   c.IFL2 = FLblockoff;
7343                             goto do16;
7344                         }
7345                         else
7346                             goto case_default16;
7347 
7348                     case CALL:
7349                     case JMP:
7350                         flags |= CFselfrel;
7351                         goto default;
7352 
7353                     default:
7354                     case_default16:
7355                         if (c.Iflags & CFopsize)
7356                             goto do32;
7357                         else
7358                             goto do16;
7359                 }
7360             }
7361         }
7362         else if (op == 0xF6)            /* TEST mem8,immed8             */
7363         {
7364             if ((rm & (7<<3)) == 0)
7365                 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2);
7366         }
7367         else if (op == 0xF7)
7368         {
7369             if ((rm & (7<<3)) == 0)     /* TEST mem16/32,immed16/32     */
7370             {
7371                 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0))
7372                     do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags,0);
7373                 else
7374                     do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7375             }
7376         }
7377 
7378         debug
7379         if (ggen.getOffset() - startoffset != calccodsize(c))
7380         {
7381             printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c));
7382             code_print(c);
7383             assert(0);
7384         }
7385     }
7386     ggen.flush();
7387     Offset(seg) = ggen.offset;
7388     //printf("-codout(), Coffset = x%x\n", Offset(seg));
7389     return cast(uint)ggen.offset;                      /* ending address               */
7390 }
7391 
7392 
7393 @trusted
7394 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
7395 {
7396     char *p;
7397     Symbol *s;
7398     targ_size_t ad;
7399 
7400     assert(I64);
7401     switch (fl)
7402     {
7403         case FLconst:
7404             ad = *cast(targ_size_t *) uev;
7405         L1:
7406             pbuf.genp(8,&ad);
7407             return;
7408 
7409         case FLdatseg:
7410             pbuf.flush();
7411             pbuf.write64(uev.Vpointer);
7412             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags);
7413             break;
7414 
7415         case FLframehandler:
7416             framehandleroffset = pbuf.getOffset();
7417             ad = 0;
7418             goto L1;
7419 
7420         case FLswitch:
7421             pbuf.flush();
7422             ad = uev.Vswitch.Btableoffset;
7423             pbuf.write64(ad);
7424             if (config.flags & CFGromable)
7425                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7426             else
7427                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7428             break;
7429 
7430         case FLcsdata:
7431         case FLfardata:
7432             //symbol_print(uev.Vsym);
7433             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7434             // strings and statics are treated like offsets from a
7435             // un-named external with is the start of .rodata or .data
7436         case FLextern:                      /* external data symbol         */
7437         case FLtlsdata:
7438             pbuf.flush();
7439             s = uev.Vsym;               /* symbol pointer               */
7440             pbuf.write64(uev.Voffset);
7441             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7442             break;
7443 
7444         case FLgotoff:
7445             if (config.exe & (EX_OSX | EX_OSX64))
7446             {
7447                 assert(0);
7448             }
7449             else if (config.exe & EX_posix)
7450             {
7451                 pbuf.flush();
7452                 s = uev.Vsym;               /* symbol pointer               */
7453                 pbuf.write64(uev.Voffset);
7454                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7455                 break;
7456             }
7457             else
7458                 assert(0);
7459 
7460         case FLgot:
7461             if (config.exe & (EX_OSX | EX_OSX64))
7462             {
7463                 funcsym_p.Slocalgotoffset = pbuf.getOffset();
7464                 ad = 0;
7465                 goto L1;
7466             }
7467             else if (config.exe & EX_posix)
7468             {
7469                 pbuf.flush();
7470                 s = uev.Vsym;               /* symbol pointer               */
7471                 pbuf.write64(uev.Voffset);
7472                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7473                 break;
7474             }
7475             else
7476                 assert(0);
7477 
7478         case FLfunc:                        /* function call                */
7479             s = uev.Vsym;               /* symbol pointer               */
7480             assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7481             pbuf.flush();
7482             pbuf.write64(0);
7483             objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags);
7484             break;
7485 
7486         case FLblock:                       /* displacement to another block */
7487             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7488             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7489             goto L1;
7490 
7491         case FLblockoff:
7492             pbuf.flush();
7493             assert(uev.Vblock);
7494             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7495             pbuf.write64(uev.Vblock.Boffset);
7496             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7497             break;
7498 
7499         default:
7500             WRFL(fl);
7501             assert(0);
7502     }
7503     pbuf.offset += 8;
7504 }
7505 
7506 
7507 @trusted
7508 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val)
7509 {
7510     char *p;
7511     Symbol *s;
7512     targ_size_t ad;
7513 
7514     //printf("do32bit(flags = x%x)\n", flags);
7515     switch (fl)
7516     {
7517         case FLconst:
7518             assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8);
7519             ad = * cast(targ_size_t *) uev;
7520         L1:
7521             pbuf.genp(4,&ad);
7522             return;
7523 
7524         case FLdatseg:
7525             pbuf.flush();
7526             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7527             pbuf.write32(cast(uint)uev.Vpointer);
7528             break;
7529 
7530         case FLframehandler:
7531             framehandleroffset = pbuf.getOffset();
7532             ad = 0;
7533             goto L1;
7534 
7535         case FLswitch:
7536             pbuf.flush();
7537             ad = uev.Vswitch.Btableoffset;
7538             if (config.flags & CFGromable)
7539             {
7540                 if (config.exe & (EX_OSX | EX_OSX64))
7541                 {
7542                     // These are magic values based on the exact code generated for the switch jump
7543                     if (I64)
7544                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7545                     else
7546                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8;
7547                     ad -= uev.Vswitch.Btablebase;
7548                     goto L1;
7549                 }
7550                 else if (config.exe & EX_windos)
7551                 {
7552                     if (I64)
7553                     {
7554                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7555                         ad -= uev.Vswitch.Btablebase;
7556                         goto L1;
7557                     }
7558                     else
7559                         objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7560                 }
7561                 else
7562                 {
7563                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7564                 }
7565             }
7566             else
7567                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7568             pbuf.write32(cast(uint)ad);
7569             break;
7570 
7571         case FLcode:
7572             //assert(JMPJMPTABLE);            // the only use case
7573             pbuf.flush();
7574             ad = *cast(targ_size_t *) uev + pbuf.getOffset();
7575             objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7576             pbuf.write32(cast(uint)ad);
7577             break;
7578 
7579         case FLcsdata:
7580         case FLfardata:
7581             //symbol_print(uev.Vsym);
7582 
7583             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7584             // strings and statics are treated like offsets from a
7585             // un-named external with is the start of .rodata or .data
7586         case FLextern:                      /* external data symbol         */
7587         case FLtlsdata:
7588             pbuf.flush();
7589             s = uev.Vsym;               /* symbol pointer               */
7590             if (config.exe & EX_windos && I64 && (flags & CFpc32))
7591             {
7592                 /* This is for those funky fixups where the location to be fixed up
7593                  * is a 'val' amount back from the current RIP, biased by adding 4.
7594                  */
7595                 assert(val >= -5 && val <= 0);
7596                 flags |= (-val & 7) << 24;          // set CFREL value
7597                 assert(CFREL == (7 << 24));
7598                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7599                 pbuf.write32(cast(uint)uev.Voffset);
7600             }
7601             else
7602             {
7603                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7604                 pbuf.write32(cast(uint)(uev.Voffset + val));
7605             }
7606             break;
7607 
7608         case FLgotoff:
7609             if (config.exe & (EX_OSX | EX_OSX64))
7610             {
7611                 assert(0);
7612             }
7613             else if (config.exe & EX_posix)
7614             {
7615                 pbuf.flush();
7616                 s = uev.Vsym;               /* symbol pointer               */
7617                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7618                 pbuf.write32(cast(uint)(uev.Voffset + val));
7619                 break;
7620             }
7621             else
7622                 assert(0);
7623 
7624         case FLgot:
7625             if (config.exe & (EX_OSX | EX_OSX64))
7626             {
7627                 funcsym_p.Slocalgotoffset = pbuf.getOffset();
7628                 ad = 0;
7629                 goto L1;
7630             }
7631             else if (config.exe & EX_posix)
7632             {
7633                 pbuf.flush();
7634                 s = uev.Vsym;               /* symbol pointer               */
7635                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7636                 pbuf.write32(cast(uint)(uev.Voffset + val));
7637                 break;
7638             }
7639             else
7640                 assert(0);
7641 
7642         case FLfunc:                        /* function call                */
7643             s = uev.Vsym;               /* symbol pointer               */
7644             if (tyfarfunc(s.ty()))
7645             {   /* Large code references are always absolute    */
7646                 pbuf.flush();
7647                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4;
7648                 pbuf.write32(0);
7649             }
7650             else if (s.Sseg == pbuf.seg &&
7651                      (s.Sclass == SC.static_ || s.Sclass == SC.global) &&
7652                      s.Sxtrnnum == 0 && flags & CFselfrel)
7653             {   /* if we know it's relative address     */
7654                 ad = s.Soffset - pbuf.getOffset() - 4;
7655                 goto L1;
7656             }
7657             else
7658             {
7659                 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7660                 pbuf.flush();
7661                 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags);
7662                 pbuf.write32(cast(uint)(val));
7663             }
7664             break;
7665 
7666         case FLblock:                       /* displacement to another block */
7667             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7668             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7669             goto L1;
7670 
7671         case FLblockoff:
7672             pbuf.flush();
7673             assert(uev.Vblock);
7674             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7675             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7676             pbuf.write32(cast(uint)(uev.Vblock.Boffset));
7677             break;
7678 
7679         default:
7680             WRFL(fl);
7681             assert(0);
7682     }
7683     pbuf.offset += 4;
7684 }
7685 
7686 
7687 @trusted
7688 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
7689 {
7690     char *p;
7691     Symbol *s;
7692     targ_size_t ad;
7693 
7694     switch (fl)
7695     {
7696         case FLconst:
7697             pbuf.genp(2,cast(char *) uev);
7698             return;
7699 
7700         case FLdatseg:
7701             pbuf.flush();
7702             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7703             pbuf.write16(cast(uint)uev.Vpointer);
7704             break;
7705 
7706         case FLswitch:
7707             pbuf.flush();
7708             ad = uev.Vswitch.Btableoffset;
7709             if (config.flags & CFGromable)
7710                 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7711             else
7712                 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7713             pbuf.write16(cast(uint)ad);
7714             break;
7715 
7716         case FLcsdata:
7717         case FLfardata:
7718         case FLextern:                      /* external data symbol         */
7719         case FLtlsdata:
7720             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7721             pbuf.flush();
7722             s = uev.Vsym;               /* symbol pointer               */
7723             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7724             pbuf.write16(cast(uint)uev.Voffset);
7725             break;
7726 
7727         case FLfunc:                        /* function call                */
7728             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7729             s = uev.Vsym;               /* symbol pointer               */
7730             if (tyfarfunc(s.ty()))
7731             {   /* Large code references are always absolute    */
7732                 pbuf.flush();
7733                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2;
7734             }
7735             else if (s.Sseg == pbuf.seg &&
7736                      (s.Sclass == SC.static_ || s.Sclass == SC.global) &&
7737                      s.Sxtrnnum == 0 && flags & CFselfrel)
7738             {   /* if we know it's relative address     */
7739                 ad = s.Soffset - pbuf.getOffset() - 2;
7740                 goto L1;
7741             }
7742             else
7743             {
7744                 pbuf.flush();
7745                 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags);
7746             }
7747             pbuf.write16(0);
7748             break;
7749 
7750         case FLblock:                       /* displacement to another block */
7751             ad = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7752             debug
7753             {
7754                 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7755                 assert(cast(short)delta == delta);
7756             }
7757         L1:
7758             pbuf.genp(2,&ad);                    // displacement
7759             return;
7760 
7761         case FLblockoff:
7762             pbuf.flush();
7763             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7764             pbuf.write16(cast(uint)uev.Vblock.Boffset);
7765             break;
7766 
7767         default:
7768             WRFL(fl);
7769             assert(0);
7770     }
7771     pbuf.offset += 2;
7772 }
7773 
7774 
7775 @trusted
7776 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev)
7777 {
7778     char c;
7779     targ_ptrdiff_t delta;
7780 
7781     switch (fl)
7782     {
7783         case FLconst:
7784             c = cast(char)uev.Vuns;
7785             break;
7786 
7787         case FLblock:
7788             delta = uev.Vblock.Boffset - pbuf.getOffset() - 1;
7789             if (cast(byte)delta != delta)
7790             {
7791                 version (MARS)
7792                 {
7793                     if (uev.Vblock.Bsrcpos.Slinnum)
7794                         printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum);
7795                 }
7796                 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta);
7797                 err_exit();
7798             }
7799             c = cast(char)delta;
7800             debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F);
7801             break;
7802 
7803         default:
7804             debug printf("fl = %d\n",fl);
7805             assert(0);
7806     }
7807     pbuf.gen(c);
7808 }
7809 
7810 
7811 /**********************************
7812  */
7813 
7814 version (SCPP)
7815 {
7816 static if (HYDRATE)
7817 {
7818 @trusted
7819 void code_hydrate(code **pc)
7820 {
7821     code *c;
7822     ubyte ins,rm;
7823     FL fl;
7824 
7825     assert(pc);
7826     while (*pc)
7827     {
7828         c = cast(code *) ph_hydrate(cast(void**)pc);
7829         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7830             ins = vex_inssize(c);
7831         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7832             ins = inssize2[(c.Iop >> 8) & 0xFF];
7833         else if ((c.Iop & 0xFF00) == 0x0F00)
7834             ins = inssize2[c.Iop & 0xFF];
7835         else
7836             ins = inssize[c.Iop & 0xFF];
7837         switch (c.Iop)
7838         {
7839             default:
7840                 break;
7841 
7842             case ESCAPE | ESClinnum:
7843                 srcpos_hydrate(&c.IEV1.Vsrcpos);
7844                 goto done;
7845 
7846             case ESCAPE | ESCctor:
7847             case ESCAPE | ESCdtor:
7848                 el_hydrate(&c.IEV1.Vtor);
7849                 goto done;
7850 
7851             case ASM:
7852                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7853                 goto done;
7854         }
7855         if (!(ins & M) ||
7856             ((rm = c.Irm) & 0xC0) == 0xC0)
7857             goto do2;           /* if no first operand          */
7858         if (is32bitaddr(I32,c.Iflags))
7859         {
7860 
7861             if (
7862                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
7863                )
7864                 goto do2;       /* if no first operand  */
7865         }
7866         else
7867         {
7868             if (
7869                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
7870                )
7871                 goto do2;       /* if no first operand  */
7872         }
7873         fl = cast(FL) c.IFL1;
7874         switch (fl)
7875         {
7876             case FLudata:
7877             case FLdata:
7878             case FLreg:
7879             case FLauto:
7880             case FLfast:
7881             case FLbprel:
7882             case FLpara:
7883             case FLcsdata:
7884             case FLfardata:
7885             case FLtlsdata:
7886             case FLfunc:
7887             case FLpseudo:
7888             case FLextern:
7889                 assert(flinsymtab[fl]);
7890                 symbol_hydrate(&c.IEV1.Vsym);
7891                 symbol_debug(c.IEV1.Vsym);
7892                 break;
7893 
7894             case FLdatseg:
7895             case FLfltreg:
7896             case FLallocatmp:
7897             case FLcs:
7898             case FLndp:
7899             case FLoffset:
7900             case FLlocalsize:
7901             case FLconst:
7902             case FLframehandler:
7903                 assert(!flinsymtab[fl]);
7904                 break;
7905 
7906             case FLcode:
7907                 ph_hydrate(cast(void**)&c.IEV1.Vcode);
7908                 break;
7909 
7910             case FLblock:
7911             case FLblockoff:
7912                 ph_hydrate(cast(void**)&c.IEV1.Vblock);
7913                 break;
7914 version (SCPP)
7915 {
7916             case FLctor:
7917             case FLdtor:
7918                 el_hydrate(cast(elem**)&c.IEV1.Vtor);
7919                 break;
7920 }
7921             case FLasm:
7922                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7923                 break;
7924 
7925             default:
7926                 WRFL(fl);
7927                 assert(0);
7928         }
7929     do2:
7930         /* Ignore TEST (F6 and F7) opcodes      */
7931         if (!(ins & T))
7932             goto done;          /* if no second operand */
7933 
7934         fl = cast(FL) c.IFL2;
7935         switch (fl)
7936         {
7937             case FLudata:
7938             case FLdata:
7939             case FLreg:
7940             case FLauto:
7941             case FLfast:
7942             case FLbprel:
7943             case FLpara:
7944             case FLcsdata:
7945             case FLfardata:
7946             case FLtlsdata:
7947             case FLfunc:
7948             case FLpseudo:
7949             case FLextern:
7950                 assert(flinsymtab[fl]);
7951                 symbol_hydrate(&c.IEV2.Vsym);
7952                 symbol_debug(c.IEV2.Vsym);
7953                 break;
7954 
7955             case FLdatseg:
7956             case FLfltreg:
7957             case FLallocatmp:
7958             case FLcs:
7959             case FLndp:
7960             case FLoffset:
7961             case FLlocalsize:
7962             case FLconst:
7963             case FLframehandler:
7964                 assert(!flinsymtab[fl]);
7965                 break;
7966 
7967             case FLcode:
7968                 ph_hydrate(cast(void**)&c.IEV2.Vcode);
7969                 break;
7970 
7971             case FLblock:
7972             case FLblockoff:
7973                 ph_hydrate(cast(void**)&c.IEV2.Vblock);
7974                 break;
7975 
7976             default:
7977                 WRFL(fl);
7978                 assert(0);
7979         }
7980   done:
7981         { }
7982 
7983         pc = &c.next;
7984     }
7985 }
7986 }
7987 
7988 /**********************************
7989  */
7990 
7991 static if (DEHYDRATE)
7992 {
7993 @trusted
7994 void code_dehydrate(code **pc)
7995 {
7996     code *c;
7997     ubyte ins,rm;
7998     FL fl;
7999 
8000     while ((c = *pc) != null)
8001     {
8002         ph_dehydrate(pc);
8003 
8004         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
8005             ins = vex_inssize(c);
8006         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
8007             ins = inssize2[(c.Iop >> 8) & 0xFF];
8008         else if ((c.Iop & 0xFF00) == 0x0F00)
8009             ins = inssize2[c.Iop & 0xFF];
8010         else
8011             ins = inssize[c.Iop & 0xFF];
8012         switch (c.Iop)
8013         {
8014             default:
8015                 break;
8016 
8017             case ESCAPE | ESClinnum:
8018                 srcpos_dehydrate(&c.IEV1.Vsrcpos);
8019                 goto done;
8020 
8021             case ESCAPE | ESCctor:
8022             case ESCAPE | ESCdtor:
8023                 el_dehydrate(&c.IEV1.Vtor);
8024                 goto done;
8025 
8026             case ASM:
8027                 ph_dehydrate(&c.IEV1.bytes);
8028                 goto done;
8029         }
8030 
8031         if (!(ins & M) ||
8032             ((rm = c.Irm) & 0xC0) == 0xC0)
8033             goto do2;           /* if no first operand          */
8034         if (is32bitaddr(I32,c.Iflags))
8035         {
8036 
8037             if (
8038                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
8039                )
8040                 goto do2;       /* if no first operand  */
8041         }
8042         else
8043         {
8044             if (
8045                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
8046                )
8047                 goto do2;       /* if no first operand  */
8048         }
8049         fl = cast(FL) c.IFL1;
8050         switch (fl)
8051         {
8052             case FLudata:
8053             case FLdata:
8054             case FLreg:
8055             case FLauto:
8056             case FLfast:
8057             case FLbprel:
8058             case FLpara:
8059             case FLcsdata:
8060             case FLfardata:
8061             case FLtlsdata:
8062             case FLfunc:
8063             case FLpseudo:
8064             case FLextern:
8065                 assert(flinsymtab[fl]);
8066                 symbol_dehydrate(&c.IEV1.Vsym);
8067                 break;
8068 
8069             case FLdatseg:
8070             case FLfltreg:
8071             case FLallocatmp:
8072             case FLcs:
8073             case FLndp:
8074             case FLoffset:
8075             case FLlocalsize:
8076             case FLconst:
8077             case FLframehandler:
8078                 assert(!flinsymtab[fl]);
8079                 break;
8080 
8081             case FLcode:
8082                 ph_dehydrate(&c.IEV1.Vcode);
8083                 break;
8084 
8085             case FLblock:
8086             case FLblockoff:
8087                 ph_dehydrate(&c.IEV1.Vblock);
8088                 break;
8089 version (SCPP)
8090 {
8091             case FLctor:
8092             case FLdtor:
8093                 el_dehydrate(&c.IEV1.Vtor);
8094                 break;
8095 }
8096             case FLasm:
8097                 ph_dehydrate(&c.IEV1.bytes);
8098                 break;
8099 
8100             default:
8101                 WRFL(fl);
8102                 assert(0);
8103                 break;
8104         }
8105     do2:
8106         /* Ignore TEST (F6 and F7) opcodes      */
8107         if (!(ins & T))
8108             goto done;          /* if no second operand */
8109 
8110         fl = cast(FL) c.IFL2;
8111         switch (fl)
8112         {
8113             case FLudata:
8114             case FLdata:
8115             case FLreg:
8116             case FLauto:
8117             case FLfast:
8118             case FLbprel:
8119             case FLpara:
8120             case FLcsdata:
8121             case FLfardata:
8122             case FLtlsdata:
8123             case FLfunc:
8124             case FLpseudo:
8125             case FLextern:
8126                 assert(flinsymtab[fl]);
8127                 symbol_dehydrate(&c.IEV2.Vsym);
8128                 break;
8129 
8130             case FLdatseg:
8131             case FLfltreg:
8132             case FLallocatmp:
8133             case FLcs:
8134             case FLndp:
8135             case FLoffset:
8136             case FLlocalsize:
8137             case FLconst:
8138             case FLframehandler:
8139                 assert(!flinsymtab[fl]);
8140                 break;
8141 
8142             case FLcode:
8143                 ph_dehydrate(&c.IEV2.Vcode);
8144                 break;
8145 
8146             case FLblock:
8147             case FLblockoff:
8148                 ph_dehydrate(&c.IEV2.Vblock);
8149                 break;
8150 
8151             default:
8152                 WRFL(fl);
8153                 assert(0);
8154                 break;
8155         }
8156   done:
8157         pc = &code_next(c);
8158     }
8159 }
8160 }
8161 }
8162 
8163 /***************************
8164  * Debug code to dump code structure.
8165  */
8166 
8167 void WRcodlst(code *c)
8168 {
8169     for (; c; c = code_next(c))
8170         code_print(c);
8171 }
8172 
8173 @trusted
8174 extern (C) void code_print(scope code* c)
8175 {
8176     ubyte ins;
8177     ubyte rexb;
8178 
8179     if (c == null)
8180     {
8181         printf("code 0\n");
8182         return;
8183     }
8184 
8185     const op = c.Iop;
8186     if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
8187         ins = vex_inssize(c);
8188     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
8189         ins = inssize2[(op >> 8) & 0xFF];
8190     else if ((c.Iop & 0xFF00) == 0x0F00)
8191         ins = inssize2[op & 0xFF];
8192     else
8193         ins = inssize[op & 0xFF];
8194 
8195     printf("code %p: nxt=%p ",c,code_next(c));
8196 
8197     if (c.Iflags & CFvex)
8198     {
8199         if (c.Iflags & CFvex3)
8200         {
8201             printf("vex=0xC4");
8202             printf(" 0x%02X", VEX3_B1(c.Ivex));
8203             printf(" 0x%02X", VEX3_B2(c.Ivex));
8204             rexb =
8205                 ( c.Ivex.w ? REX_W : 0) |
8206                 (!c.Ivex.r ? REX_R : 0) |
8207                 (!c.Ivex.x ? REX_X : 0) |
8208                 (!c.Ivex.b ? REX_B : 0);
8209         }
8210         else
8211         {
8212             printf("vex=0xC5");
8213             printf(" 0x%02X", VEX2_B1(c.Ivex));
8214             rexb = !c.Ivex.r ? REX_R : 0;
8215         }
8216         printf(" ");
8217     }
8218     else
8219         rexb = c.Irex;
8220 
8221     if (rexb)
8222     {
8223         printf("rex=0x%02X ", c.Irex);
8224         if (rexb & REX_W)
8225             printf("W");
8226         if (rexb & REX_R)
8227             printf("R");
8228         if (rexb & REX_X)
8229             printf("X");
8230         if (rexb & REX_B)
8231             printf("B");
8232         printf(" ");
8233     }
8234     printf("op=0x%02X",op);
8235 
8236     if ((op & 0xFF) == ESCAPE)
8237     {
8238         if ((op & 0xFF00) == ESClinnum)
8239         {
8240             printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum);
8241             return;
8242         }
8243         printf(" ESCAPE %d",c.Iop >> 8);
8244     }
8245     if (c.Iflags)
8246         printf(" flg=%x",c.Iflags);
8247     if (ins & M)
8248     {
8249         uint rm = c.Irm;
8250         printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
8251         if (!I16 && issib(rm))
8252         {
8253             ubyte sib = c.Isib;
8254             printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
8255         }
8256         if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
8257         {
8258             switch (c.IFL1)
8259             {
8260                 case FLconst:
8261                 case FLoffset:
8262                     printf(" int = %4d",c.IEV1.Vuns);
8263                     break;
8264 
8265                 case FLblock:
8266                     printf(" block = %p",c.IEV1.Vblock);
8267                     break;
8268 
8269                 case FLswitch:
8270                 case FLblockoff:
8271                 case FLlocalsize:
8272                 case FLframehandler:
8273                 case 0:
8274                     break;
8275 
8276                 case FLdatseg:
8277                     printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer);
8278                     break;
8279 
8280                 case FLauto:
8281                 case FLfast:
8282                 case FLreg:
8283                 case FLdata:
8284                 case FLudata:
8285                 case FLpara:
8286                 case FLbprel:
8287                 case FLtlsdata:
8288                 case FLextern:
8289                     printf(" ");
8290                     WRFL(cast(FL)c.IFL1);
8291                     printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr);
8292                     if (c.IEV1.Voffset)
8293                         printf(".%d", cast(int)c.IEV1.Voffset);
8294                     break;
8295 
8296                 default:
8297                     WRFL(cast(FL)c.IFL1);
8298                     break;
8299             }
8300         }
8301     }
8302     if (ins & T)
8303     {
8304         printf(" ");
8305         WRFL(cast(FL)c.IFL2);
8306         switch (c.IFL2)
8307         {
8308             case FLconst:
8309                 printf(" int = %4d",c.IEV2.Vuns);
8310                 break;
8311 
8312             case FLblock:
8313                 printf(" block = %p",c.IEV2.Vblock);
8314                 break;
8315 
8316             case FLswitch:
8317             case FLblockoff:
8318             case 0:
8319             case FLlocalsize:
8320             case FLframehandler:
8321                 break;
8322 
8323             case FLdatseg:
8324                 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer);
8325                 break;
8326 
8327             case FLauto:
8328             case FLfast:
8329             case FLreg:
8330             case FLpara:
8331             case FLbprel:
8332             case FLfunc:
8333             case FLdata:
8334             case FLudata:
8335             case FLtlsdata:
8336                 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr);
8337                 break;
8338 
8339             case FLcode:
8340                 printf(" code = %p",c.IEV2.Vcode);
8341                 break;
8342 
8343             default:
8344                 WRFL(cast(FL)c.IFL2);
8345                 break;
8346         }
8347     }
8348     printf("\n");
8349 }
8350 
8351 /**************************************
8352  * Pretty-print a CF mask.
8353  * Params:
8354  *      cf = CF mask
8355  */
8356 @trusted
8357 extern (C) void CF_print(uint cf)
8358 {
8359     void print(uint mask, const(char)* string)
8360     {
8361         if (cf & mask)
8362         {
8363             printf(string);
8364             cf &= ~mask;
8365             if (cf)
8366                 printf("|");
8367         }
8368     }
8369 
8370     print(CFindirect, "CFindirect");
8371     print(CFswitch, "CFswitch");
8372     print(CFjmp5, "CFjmp5");
8373     print(CFvex3, "CFvex3");
8374     print(CFvex, "CFvex");
8375     print(CFpc32, "CFpc32");
8376     print(CFoffset64, "CFoffset64");
8377     print(CFclassinit, "CFclassinit");
8378     print(CFvolatile, "CFvolatile");
8379     print(CFtarg2, "CFtarg2");
8380     print(CFunambig, "CFunambig");
8381     print(CFselfrel, "CFselfrel");
8382     print(CFwait, "CFwait");
8383     print(CFfs, "CFfs");
8384     print(CFcs, "CFcs");
8385     print(CFds, "CFds");
8386     print(CFss, "CFss");
8387     print(CFes, "CFes");
8388     print(CFaddrsize, "CFaddrsize");
8389     print(CFopsize, "CFopsize");
8390     print(CFpsw, "CFpsw");
8391     print(CFoff, "CFoff");
8392     print(CFseg, "CFseg");
8393     print(CFtarg, "CFtarg");
8394     print(CFjmp16, "CFjmp16");
8395     printf("\n");
8396 }
8397 
8398 }