1 /**
2  * Constants and data structures specific to the x86 platform.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_code_x86.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d
11  */
12 
13 module dmd.backend.code_x86;
14 
15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html
16 
17 import dmd.backend.cdef;
18 import dmd.backend.cc : config;
19 import dmd.backend.code;
20 import dmd.backend.codebuilder : CodeBuilder;
21 import dmd.backend.el : elem;
22 import dmd.backend.ty : I64;
23 import dmd.backend.barray;
24 
25 nothrow:
26 @safe:
27 
28 alias opcode_t = uint;          // CPU opcode
29 enum opcode_t NoOpcode = 0xFFFF;              // not a valid opcode_t
30 
31 /* Register definitions */
32 
33 enum
34 {
35     AX      = 0,
36     CX      = 1,
37     DX      = 2,
38     BX      = 3,
39     SP      = 4,
40     BP      = 5,
41     SI      = 6,
42     DI      = 7,
43 
44     // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members
45 
46     R8       = 8,
47     R9       = 9,
48     R10      = 10,
49     R11      = 11,
50     R12      = 12,
51     R13      = 13,
52     R14      = 14,
53     R15      = 15,
54 
55     XMM0    = 16,
56     XMM1    = 17,
57     XMM2    = 18,
58     XMM3    = 19,
59     XMM4    = 20,
60     XMM5    = 21,
61     XMM6    = 22,
62     XMM7    = 23,
63 /* There are also XMM8..XMM14 */
64     XMM15   = 31,
65 }
66 
67 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; }
68 
69 enum PICREG = BX;
70 
71 enum ES     = 24;
72 
73 enum NUMGENREGS = 16;
74 
75 // fishy naming as it covers XMM7 but not XMM15
76 // currently only used as a replacement for mES in cgcod.c
77 enum NUMREGS = 25;
78 
79 enum PSW     = 25;
80 enum STACK   = 26;      // top of stack
81 enum ST0     = 27;      // 8087 top of stack register
82 enum ST01    = 28;      // top two 8087 registers; for complex types
83 
84 enum reg_t NOREG   = 29;     // no register
85 
86 enum
87 {
88     AL      = 0,
89     CL      = 1,
90     DL      = 2,
91     BL      = 3,
92     AH      = 4,
93     CH      = 5,
94     DH      = 6,
95     BH      = 7,
96 }
97 
98 enum
99 {
100     mAX     = 1,
101     mCX     = 2,
102     mDX     = 4,
103     mBX     = 8,
104     mSP     = 0x10,
105     mBP     = 0x20,
106     mSI     = 0x40,
107     mDI     = 0x80,
108 
109     mR8     = (1 << R8),
110     mR9     = (1 << R9),
111     mR10    = (1 << R10),
112     mR11    = (1 << R11),
113     mR12    = (1 << R12),
114     mR13    = (1 << R13),
115     mR14    = (1 << R14),
116     mR15    = (1 << R15),
117 
118     mXMM0   = (1 << XMM0),
119     mXMM1   = (1 << XMM1),
120     mXMM2   = (1 << XMM2),
121     mXMM3   = (1 << XMM3),
122     mXMM4   = (1 << XMM4),
123     mXMM5   = (1 << XMM5),
124     mXMM6   = (1 << XMM6),
125     mXMM7   = (1 << XMM7),
126     XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7),
127 
128     mES     = (1 << ES),      // 0x1000000
129     mPSW    = (1 << PSW),     // 0x2000000
130 
131     mSTACK  = (1 << STACK),   // 0x4000000
132 
133     mST0    = (1 << ST0),     // 0x20000000
134     mST01   = (1 << ST01),    // 0x40000000
135 }
136 
137 // Flags for getlvalue (must fit in regm_t)
138 enum RMload  = (1 << 30);
139 enum RMstore = (1 << 31);
140 
141     // To support positional independent code,
142     // must be able to remove BX from available registers
143     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
144     enum ALLREGS_INIT_PIC      = (mAX|mCX|mDX|mSI|mDI);
145     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
146     enum BYTEREGS_INIT_PIC     = (mAX|mCX|mDX);
147 
148 /* We use the same IDXREGS for the 386 as the 8088, because if
149    we used ALLREGS, it would interfere with mMSW
150  */
151 enum IDXREGS         = (mBX|mSI|mDI);
152 
153 enum FLOATREGS_64    = mAX;
154 enum FLOATREGS2_64   = mDX;
155 enum DOUBLEREGS_64   = mAX;
156 enum DOUBLEREGS2_64  = mDX;
157 
158 enum FLOATREGS_32    = mAX;
159 enum FLOATREGS2_32   = mDX;
160 enum DOUBLEREGS_32   = (mAX|mDX);
161 enum DOUBLEREGS2_32  = (mCX|mBX);
162 
163 enum FLOATREGS_16    = (mDX|mAX);
164 enum FLOATREGS2_16   = (mCX|mBX);
165 enum DOUBLEREGS_16   = (mAX|mBX|mCX|mDX);
166 
167 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/
168 
169 /* Segment registers    */
170 enum
171 {
172     SEG_ES  = 0,
173     SEG_CS  = 1,
174     SEG_SS  = 2,
175     SEG_DS  = 3,
176 }
177 
178 /*********************
179  * Masks for register pairs.
180  * Note that index registers are always LSWs. This is for the convenience
181  * of implementing far pointers.
182  */
183 
184 static if (0)
185 {
186 // Give us an extra one so we can enregister a long
187 enum mMSW = mCX|mDX|mDI|mES;       // most significant regs
188 enum mLSW = mAX|mBX|mSI;           // least significant regs
189 }
190 else
191 {
192 enum mMSW = mCX|mDX|mES;           // most significant regs
193 enum mLSW = mAX|mBX|mSI|mDI;       // least significant regs
194 }
195 
196 /* Return !=0 if there is a SIB byte   */
197 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; }
198 
199 static if (0)
200 {
201 // relocation field size is always 32bits
202 //enum is32bitaddr(x,Iflags) (1)
203 }
204 else
205 {
206 //
207 // is32bitaddr works correctly only when x is 0 or 1.  This is
208 // true today for the current definition of I32, but if the definition
209 // of I32 changes, this macro will need to change as well
210 //
211 // Note: even for linux targets, CFaddrsize can be set by the inline
212 // assembler.
213 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); }
214 }
215 
216 
217 /**********************
218  * C library routines.
219  * See callclib().
220  */
221 
222 enum CLIB
223 {
224     lcmp,
225     lmul,
226     ldiv,
227     lmod,
228     uldiv,
229     ulmod,
230 
231     dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub,
232     fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub,
233 
234     dbllng,lngdbl,dblint,intdbl,
235     dbluns,unsdbl,
236     dblulng,
237     ulngdbl,
238     dblflt,fltdbl,
239     dblllng,
240     llngdbl,
241     dblullng,
242     ullngdbl,
243     dtst,
244     vptrfptr,cvptrfptr,
245 
246     _87topsw,fltto87,dblto87,dblint87,dbllng87,
247     ftst,
248     fcompp,
249     ftest,
250     ftest0,
251     fdiv87,
252 
253     // Complex numbers
254     cmul,
255     cdiv,
256     ccmp,
257 
258     u64_ldbl,
259     ld_u64,
260     MAX
261 }
262 
263 alias code_flags_t = uint;
264 enum
265 {
266     CFes        =        1,     // generate an ES: segment override for this instr
267     CFjmp16     =        2,     // need 16 bit jump offset (long branch)
268     CFtarg      =        4,     // this code is the target of a jump
269     CFseg       =        8,     // get segment of immediate value
270     CFoff       =     0x10,     // get offset of immediate value
271     CFss        =     0x20,     // generate an SS: segment override (not with
272                                 // CFes at the same time, though!)
273     CFpsw       =     0x40,     // we need the flags result after this instruction
274     CFopsize    =     0x80,     // prefix with operand size
275     CFaddrsize  =    0x100,     // prefix with address size
276     CFds        =    0x200,     // need DS override (not with ES, SS, or CS )
277     CFcs        =    0x400,     // need CS override
278     CFfs        =    0x800,     // need FS override
279     CFgs        =   CFcs | CFfs,   // need GS override
280     CFwait      =   0x1000,     // If I32 it indicates when to output a WAIT
281     CFselfrel   =   0x2000,     // if self-relative
282     CFunambig   =   0x4000,     // indicates cannot be accessed by other addressing
283                                 // modes
284     CFtarg2     =   0x8000,     // like CFtarg, but we can't optimize this away
285     CFvolatile  =  0x10000,     // volatile reference, do not schedule
286     CFclassinit =  0x20000,     // class init code
287     CFoffset64  =  0x40000,     // offset is 64 bits
288     CFpc32      =  0x80000,     // I64: PC relative 32 bit fixup
289 
290     CFvex       =  0x10_0000,    // vex prefix
291     CFvex3      =  0x20_0000,    // 3 byte vex prefix
292 
293     CFjmp5      =  0x40_0000,    // always a 5 byte jmp
294     CFswitch    =  0x80_0000,    // kludge for switch table fixups
295 
296     CFindirect  = 0x100_0000,    // OSX32: indirect fixups
297 
298     /* These are for CFpc32 fixups, they're the negative of the offset of the fixup
299      * from the program counter
300      */
301     CFREL       = 0x700_0000,
302 
303     CFSEG       = CFes | CFss | CFds | CFcs | CFfs | CFgs,
304     CFPREFIX    = CFSEG | CFopsize | CFaddrsize,
305 }
306 
307 @trusted
308 extern (C) void CF_print(uint cf);
309 
310 struct code
311 {
312     code *next;
313     code_flags_t Iflags;
314 
315     union
316     {
317         opcode_t Iop;
318         struct Svex
319         {
320           nothrow:
321           align(1):
322             ubyte  op;
323 
324             // [R X B m-mmmm]  [W vvvv L pp]
325             ushort _pp;
326 
327             @property ushort pp() const { return _pp & 3; }
328             @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); }
329 
330             @property ushort l() const { return (_pp >> 2) & 1; }
331             @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); }
332 
333             @property ushort vvvv() const { return (_pp >> 3) & 0x0F; }
334             @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); }
335 
336             @property ushort w() const { return (_pp >> 7) & 1; }
337             @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); }
338 
339             @property ushort mmmm() const { return (_pp >> 8) & 0x1F; }
340             @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); }
341 
342             @property ushort b() const { return (_pp >> 13) & 1; }
343             @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); }
344 
345             @property ushort x() const { return (_pp >> 14) & 1; }
346             @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); }
347 
348             @property ushort r() const { return (_pp >> 15) & 1; }
349             @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); }
350 
351             ubyte pfx; // always 0xC4
352         }
353         Svex Ivex;
354     }
355 
356     /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte,
357      * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm,
358      * the 32 bit x86 added the sib, and the 64 bit one added the rex.
359      */
360     union
361     {
362         uint Iea;
363         struct
364         {
365             ubyte Irm;          // reg/mode
366             ubyte Isib;         // SIB byte
367             ubyte Irex;         // REX prefix
368         }
369     }
370 
371     /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective
372      * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second
373      * operand, usually for immediate instructions.
374      */
375 
376     ubyte IFL1,IFL2;    // FLavors of 1st, 2nd operands
377     evc IEV1;             // 1st operand, if any
378     evc IEV2;             // 2nd operand, if any
379 
380   nothrow:
381     void orReg(uint reg)
382     {   if (reg & 8)
383             Irex |= REX_R;
384         Irm |= modregrm(0, reg & 7, 0);
385     }
386 
387     void setReg(uint reg)
388     {
389         Irex &= ~REX_R;
390         Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0);
391         orReg(reg);
392     }
393 
394     bool isJumpOP() { return Iop == JMP || Iop == JMPS; }
395 
396     extern (C++) void print()               // pretty-printer
397     {
398         code_print(&this);
399     }
400 }
401 
402 extern (C) void code_print(scope code*);
403 
404 /*******************
405  * Some instructions.
406  */
407 
408 enum
409 {
410     SEGES   = 0x26,
411     SEGCS   = 0x2E,
412     SEGSS   = 0x36,
413     SEGDS   = 0x3E,
414     SEGFS   = 0x64,
415     SEGGS   = 0x65,
416 
417     CMP     = 0x3B,
418     CALL    = 0xE8,
419     JMP     = 0xE9,    // Intra-Segment Direct
420     JMPS    = 0xEB,    // JMP SHORT
421     JCXZ    = 0xE3,
422     LOOP    = 0xE2,
423     LES     = 0xC4,
424     LEA     = 0x8D,
425     LOCK    = 0xF0,
426     INT3    = 0xCC,
427     HLT     = 0xF4,
428     ENTER   = 0xC8,
429     LEAVE   = 0xC9,
430     MOVSXb  = 0x0FBE,
431     MOVSXw  = 0x0FBF,
432     MOVZXb  = 0x0FB6,
433     MOVZXw  = 0x0FB7,
434 
435     STOSB   = 0xAA,
436     STOS    = 0xAB,
437 
438     STO     = 0x89,
439     LOD     = 0x8B,
440 
441     JO      = 0x70,
442     JNO     = 0x71,
443     JC      = 0x72,
444     JB      = 0x72,
445     JNC     = 0x73,
446     JAE     = 0x73,
447     JE      = 0x74,
448     JNE     = 0x75,
449     JBE     = 0x76,
450     JA      = 0x77,
451     JS      = 0x78,
452     JNS     = 0x79,
453     JP      = 0x7A,
454     JNP     = 0x7B,
455     JL      = 0x7C,
456     JGE     = 0x7D,
457     JLE     = 0x7E,
458     JG      = 0x7F,
459 
460     UD2     = 0x0F0B,
461     PAUSE   = 0xF390,  // aka REP NOP
462 
463     // NOP is used as a placeholder in the linked list of instructions, no
464     // actual code will be generated for it.
465     NOP     = SEGCS,   // don't use 0x90 because the
466                        // Windows stuff wants to output 0x90's
467 
468     ASM     = SEGSS,   // string of asm bytes
469 
470     ESCAPE  = SEGDS,   // marker that special information is here
471                        // (Iop2 is the type of special information)
472 }
473 
474 
475 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop
476 
477 enum
478 {
479     ESClinnum   = (1 << 8),      // line number information
480     ESCctor     = (2 << 8),      // object is constructed
481     ESCdtor     = (3 << 8),      // object is destructed
482     ESCmark     = (4 << 8),      // mark eh stack
483     ESCrelease  = (5 << 8),      // release eh stack
484     ESCoffset   = (6 << 8),      // set code offset for eh
485     ESCadjesp   = (7 << 8),      // adjust ESP by IEV2.Vint
486     ESCmark2    = (8 << 8),      // mark eh stack
487     ESCrelease2 = (9 << 8),      // release eh stack
488     ESCframeptr = (10 << 8),     // replace with load of frame pointer
489     ESCdctor    = (11 << 8),     // D object is constructed
490     ESCddtor    = (12 << 8),     // D object is destructed
491     ESCadjfpu   = (13 << 8),     // adjust fpustackused by IEV2.Vint
492     ESCfixesp   = (14 << 8),     // reset ESP to end of local frame
493 }
494 
495 /*********************************
496  * Macros to ease generating code
497  * modregrm:    generate mod reg r/m field
498  * modregxrm:   reg could be R8..R15
499  * modregrmx:   rm could be R8..R15
500  * modregxrmx:  reg or rm could be R8..R15
501  * NEWREG:      change reg field of x to r
502  * genorreg:    OR  t,f
503  */
504 
505 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); }
506 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); }
507 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); }
508 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); }
509 
510 void NEWREXR(ref ubyte x, uint r)  { x = (x&~REX_R)|((r&8)>>1); }
511 void NEWREG (ref ubyte x, uint r)  { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); }
512 void code_newreg(code* c, uint r)  { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); }
513 
514 //#define genorreg(c,t,f)         genregs((c),0x09,(f),(t))
515 
516 enum
517 {
518     REX     = 0x40,        // REX prefix byte, OR'd with the following bits:
519     REX_W   = 8,           // 0 = default operand size, 1 = 64 bit operand size
520     REX_R   = 4,           // high bit of reg field of modregrm
521     REX_X   = 2,           // high bit of sib index reg
522     REX_B   = 1,           // high bit of rm field, sib base reg, or opcode reg
523 }
524 
525 uint VEX2_B1(code.Svex ivex)
526 {
527     return
528         ivex.r    << 7 |
529         ivex.vvvv << 3 |
530         ivex.l    << 2 |
531         ivex.pp;
532 }
533 
534 uint VEX3_B1(code.Svex ivex)
535 {
536     return
537         ivex.r    << 7 |
538         ivex.x    << 6 |
539         ivex.b    << 5 |
540         ivex.mmmm;
541 }
542 
543 uint VEX3_B2(code.Svex ivex)
544 {
545     return
546         ivex.w    << 7 |
547         ivex.vvvv << 3 |
548         ivex.l    << 2 |
549         ivex.pp;
550 }
551 
552 @trusted
553 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
554 
555 /************************************
556  */
557 
558 extern (C++):
559 
560 struct NDP
561 {
562     elem *e;                    // which elem is stored here (NULL if none)
563     uint offset;            // offset from e (used for complex numbers)
564 }
565 
566 struct Globals87
567 {
568     NDP[8] stack;              // 8087 stack
569     int stackused = 0;         // number of items on the 8087 stack
570 
571     Barray!NDP save;           // 8087 values spilled to memory
572 }