1 /** 2 * Code generation 3 3 * 4 * Includes: 5 * - generating a function prolog (pushing return address, loading paramters) 6 * - generating a function epilog (restoring registers, returning) 7 * - generation / peephole optimizations of jump / branch instructions 8 * 9 * Compiler implementation of the 10 * $(LINK2 https://www.dlang.org, D programming language). 11 * 12 * Copyright: Copyright (C) 1994-1998 by Symantec 13 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 14 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 15 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 16 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d) 17 * Documentation: https://dlang.org/phobos/dmd_backend_cod3.html 18 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d 19 */ 20 21 module dmd.backend.cod3; 22 23 version (SCPP) 24 version = COMPILE; 25 version (MARS) 26 version = COMPILE; 27 28 version (COMPILE) 29 { 30 31 import core.bitop; 32 import core.stdc.stdio; 33 import core.stdc.stdlib; 34 import core.stdc.string; 35 36 import dmd.backend.backend; 37 import dmd.backend.barray; 38 import dmd.backend.cc; 39 import dmd.backend.cdef; 40 import dmd.backend.cgcse; 41 import dmd.backend.code; 42 import dmd.backend.code_x86; 43 import dmd.backend.codebuilder; 44 import dmd.backend.dlist; 45 import dmd.backend.dvec; 46 import dmd.backend.melf; 47 import dmd.backend.mem; 48 import dmd.backend.el; 49 import dmd.backend.exh; 50 import dmd.backend.global; 51 import dmd.backend.obj; 52 import dmd.backend.oper; 53 import dmd.backend.rtlsym; 54 import dmd.backend.symtab; 55 import dmd.backend.ty; 56 import dmd.backend.type; 57 import dmd.backend.xmm; 58 59 version (SCPP) 60 { 61 import parser; 62 import precomp; 63 } 64 65 extern (C++): 66 67 nothrow: 68 @safe: 69 70 version (MARS) 71 enum MARS = true; 72 else 73 enum MARS = false; 74 75 extern __gshared CGstate cgstate; 76 77 private extern (D) uint mask(uint m) { return 1 << m; } 78 79 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 80 81 extern __gshared targ_size_t retsize; 82 83 enum JMPJMPTABLE = false; // benchmarking shows it's slower 84 85 enum MINLL = 0x8000_0000_0000_0000L; 86 enum MAXLL = 0x7FFF_FFFF_FFFF_FFFFL; 87 88 /************* 89 * Size in bytes of each instruction. 90 * 0 means illegal instruction. 91 * bit M: if there is a modregrm field (EV1 is reserved for modregrm) 92 * bit T: if there is a second operand (EV2) 93 * bit E: if second operand is only 8 bits 94 * bit A: a short version exists for the AX reg 95 * bit R: a short version exists for regs 96 * bits 2..0: size of instruction (excluding optional bytes) 97 */ 98 99 enum 100 { 101 M = 0x80, 102 T = 0x40, 103 E = 0x20, 104 A = 0x10, 105 R = 0x08, 106 W = 0, 107 } 108 109 private __gshared ubyte[256] inssize = 110 [ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */ 111 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */ 112 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */ 113 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */ 114 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */ 115 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */ 116 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */ 117 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */ 118 1,1,1,1, 1,1,1,1, /* 40 */ 119 1,1,1,1, 1,1,1,1, /* 48 */ 120 1,1,1,1, 1,1,1,1, /* 50 */ 121 1,1,1,1, 1,1,1,1, /* 58 */ 122 1,1,M|2,M|2, 1,1,1,1, /* 60 */ 123 T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */ 124 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */ 125 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */ 126 M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */ 127 M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */ 128 1,1,1,1, 1,1,1,1, /* 90 */ 129 1,1,T|5,1, 1,1,1,1, /* 98 */ 130 131 // cod3_set32() patches this 132 // T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */ 133 T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */ 134 135 T|E|2,T|3,1,1, 1,1,1,1, /* A8 */ 136 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */ 137 T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */ 138 M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */ 139 T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */ 140 M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */ 141 /* For the floating instructions, allow room for the FWAIT */ 142 M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */ 143 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */ 144 T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */ 145 1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */ 146 1,1,1,1, 1,1,M|2,M|R|2 /* F8 */ 147 ]; 148 149 private __gshared const ubyte[256] inssize32 = 150 [ 2,2,2,2, 2,5,1,1, /* 00 */ 151 2,2,2,2, 2,5,1,1, /* 08 */ 152 2,2,2,2, 2,5,1,1, /* 10 */ 153 2,2,2,2, 2,5,1,1, /* 18 */ 154 2,2,2,2, 2,5,1,1, /* 20 */ 155 2,2,2,2, 2,5,1,1, /* 28 */ 156 2,2,2,2, 2,5,1,1, /* 30 */ 157 2,2,2,2, 2,5,1,1, /* 38 */ 158 1,1,1,1, 1,1,1,1, /* 40 */ 159 1,1,1,1, 1,1,1,1, /* 48 */ 160 1,1,1,1, 1,1,1,1, /* 50 */ 161 1,1,1,1, 1,1,1,1, /* 58 */ 162 1,1,2,2, 1,1,1,1, /* 60 */ 163 5,6,2,3, 1,1,1,1, /* 68 */ 164 2,2,2,2, 2,2,2,2, /* 70 */ 165 2,2,2,2, 2,2,2,2, /* 78 */ 166 3,6,3,3, 2,2,2,2, /* 80 */ 167 2,2,2,2, 2,2,2,2, /* 88 */ 168 1,1,1,1, 1,1,1,1, /* 90 */ 169 1,1,7,1, 1,1,1,1, /* 98 */ 170 5,5,5,5, 1,1,1,1, /* A0 */ 171 2,5,1,1, 1,1,1,1, /* A8 */ 172 2,2,2,2, 2,2,2,2, /* B0 */ 173 5,5,5,5, 5,5,5,5, /* B8 */ 174 3,3,3,1, 2,2,3,6, /* C0 */ 175 4,1,3,1, 1,2,1,1, /* C8 */ 176 2,2,2,2, 2,2,0,1, /* D0 */ 177 /* For the floating instructions, don't need room for the FWAIT */ 178 2,2,2,2, 2,2,2,2, /* D8 */ 179 180 2,2,2,2, 2,2,2,2, /* E0 */ 181 5,5,7,2, 1,1,1,1, /* E8 */ 182 1,0,1,1, 1,1,2,2, /* F0 */ 183 1,1,1,1, 1,1,2,2 /* F8 */ 184 ]; 185 186 /* For 2 byte opcodes starting with 0x0F */ 187 private __gshared ubyte[256] inssize2 = 188 [ M|3,M|3,M|3,M|3, 2,2,2,2, // 00 189 2,2,M|3,2, 2,M|3,2,M|T|E|4, // 08 190 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10 191 M|3,2,2,2, 2,2,2,2, // 18 192 M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20 193 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28 194 2,2,2,2, 2,2,2,2, // 30 195 M|4,2,M|T|E|5,2, 2,2,2,2, // 38 196 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40 197 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48 198 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50 199 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58 200 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60 201 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68 202 M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70 203 2,2,2,2, M|3,M|3,M|3,M|3, // 78 204 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80 205 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88 206 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90 207 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98 208 2,2,2,M|3, M|T|E|4,M|3,2,2, // A0 209 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 210 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 211 M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 212 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 213 2,2,2,2, 2,2,2,2, // C8 214 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 215 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 216 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 217 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 218 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 219 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 220 ]; 221 222 /************************************************* 223 * Generate code to save `reg` in `regsave` stack area. 224 * Params: 225 * regsave = register save areay on stack 226 * cdb = where to write generated code 227 * reg = register to save 228 * idx = set to location in regsave for use in REGSAVE_restore() 229 */ 230 231 @trusted 232 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) 233 { 234 if (isXMMreg(reg)) 235 { 236 regsave.alignment = 16; 237 regsave.idx = (regsave.idx + 15) & ~15; 238 idx = regsave.idx; 239 regsave.idx += 16; 240 // MOVD idx[RBP],xmm 241 opcode_t op = STOAPD; 242 if (TARGET_LINUX && I32) 243 // Haven't yet figured out why stack is not aligned to 16 244 op = STOUPD; 245 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 246 } 247 else 248 { 249 if (!regsave.alignment) 250 regsave.alignment = REGSIZE; 251 idx = regsave.idx; 252 regsave.idx += REGSIZE; 253 // MOV idx[RBP],reg 254 cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 255 if (I64) 256 code_orrex(cdb.last(), REX_W); 257 } 258 reflocal = true; 259 if (regsave.idx > regsave.top) 260 regsave.top = regsave.idx; // keep high water mark 261 } 262 263 /******************************* 264 * Restore `reg` from `regsave` area. 265 * Complement REGSAVE_save(). 266 */ 267 268 @trusted 269 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) 270 { 271 if (isXMMreg(reg)) 272 { 273 assert(regsave.alignment == 16); 274 // MOVD xmm,idx[RBP] 275 opcode_t op = LODAPD; 276 if (TARGET_LINUX && I32) 277 // Haven't yet figured out why stack is not aligned to 16 278 op = LODUPD; 279 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 280 } 281 else 282 { // MOV reg,idx[RBP] 283 cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 284 if (I64) 285 code_orrex(cdb.last(), REX_W); 286 } 287 } 288 289 /************************************ 290 * Size for vex encoded instruction. 291 */ 292 293 @trusted 294 ubyte vex_inssize(code *c) 295 { 296 assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4); 297 ubyte ins; 298 if (c.Iflags & CFvex3) 299 { 300 switch (c.Ivex.mmmm) 301 { 302 case 0: // no prefix 303 case 1: // 0F 304 ins = cast(ubyte)(inssize2[c.Ivex.op] + 2); 305 break; 306 case 2: // 0F 38 307 ins = cast(ubyte)(inssize2[0x38] + 1); 308 break; 309 case 3: // 0F 3A 310 ins = cast(ubyte)(inssize2[0x3A] + 1); 311 break; 312 default: 313 printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm); 314 assert(0); 315 } 316 } 317 else 318 { 319 ins = cast(ubyte)(inssize2[c.Ivex.op] + 1); 320 } 321 return ins; 322 } 323 324 /************************************ 325 * Determine if there is a modregrm byte for code. 326 */ 327 328 @trusted 329 int cod3_EA(code *c) 330 { uint ins; 331 332 opcode_t op1 = c.Iop & 0xFF; 333 if (op1 == ESCAPE) 334 ins = 0; 335 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 336 ins = inssize2[(c.Iop >> 8) & 0xFF]; 337 else if ((c.Iop & 0xFF00) == 0x0F00) 338 ins = inssize2[op1]; 339 else 340 ins = inssize[op1]; 341 return ins & M; 342 } 343 344 /******************************** 345 * setup ALLREGS and BYTEREGS 346 * called by: codgen 347 */ 348 349 @trusted 350 void cod3_initregs() 351 { 352 if (I64) 353 { 354 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 355 BYTEREGS = ALLREGS; 356 } 357 else 358 { 359 ALLREGS = ALLREGS_INIT; 360 BYTEREGS = BYTEREGS_INIT; 361 } 362 } 363 364 /******************************** 365 * set initial global variable values 366 */ 367 368 @trusted 369 void cod3_setdefault() 370 { 371 fregsaved = mBP | mSI | mDI; 372 } 373 374 /******************************** 375 * Fix global variables for 386. 376 */ 377 @trusted 378 void cod3_set32() 379 { 380 inssize[0xA0] = T|5; 381 inssize[0xA1] = T|5; 382 inssize[0xA2] = T|5; 383 inssize[0xA3] = T|5; 384 BPRM = 5; /* [EBP] addressing mode */ 385 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 386 FLOATREGS = FLOATREGS_32; 387 FLOATREGS2 = FLOATREGS2_32; 388 DOUBLEREGS = DOUBLEREGS_32; 389 if (config.flags3 & CFG3eseqds) 390 fregsaved |= mES; 391 392 foreach (ref v; inssize2[0x80 .. 0x90]) 393 v = W|T|6; 394 395 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4; 396 } 397 398 /******************************** 399 * Fix global variables for I64. 400 */ 401 402 @trusted 403 void cod3_set64() 404 { 405 inssize[0xA0] = T|5; // MOV AL,mem 406 inssize[0xA1] = T|5; // MOV RAX,mem 407 inssize[0xA2] = T|5; // MOV mem,AL 408 inssize[0xA3] = T|5; // MOV mem,RAX 409 BPRM = 5; // [RBP] addressing mode 410 411 fregsaved = (config.exe & EX_windos) 412 ? mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7 // also XMM8..15; 413 : mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 414 415 FLOATREGS = FLOATREGS_64; 416 FLOATREGS2 = FLOATREGS2_64; 417 DOUBLEREGS = DOUBLEREGS_64; 418 419 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 420 BYTEREGS = ALLREGS; 421 422 foreach (ref v; inssize2[0x80 .. 0x90]) 423 v = W|T|6; 424 425 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8; 426 } 427 428 /********************************* 429 * Word or dword align start of function. 430 * Params: 431 * seg = segment to write alignment bytes to 432 * nbytes = number of alignment bytes to write 433 */ 434 @trusted 435 void cod3_align_bytes(int seg, size_t nbytes) 436 { 437 /* Table 4-2 from Intel Instruction Set Reference M-Z 438 * 1 bytes NOP 90 439 * 2 bytes 66 NOP 66 90 440 * 3 bytes NOP DWORD ptr [EAX] 0F 1F 00 441 * 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00 442 * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00 443 * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00 444 * 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00 445 * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00 446 * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00 447 * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B 448 */ 449 450 assert(SegData[seg].SDseg == seg); 451 452 while (nbytes) 453 { size_t n = nbytes; 454 const(char)* p; 455 456 if (nbytes > 1 && (I64 || config.fpxmmregs)) 457 { 458 switch (n) 459 { 460 case 2: p = "\x66\x90"; break; 461 case 3: p = "\x0F\x1F\x00"; break; 462 case 4: p = "\x0F\x1F\x40\x00"; break; 463 case 5: p = "\x0F\x1F\x44\x00\x00"; break; 464 case 6: p = "\x66\x0F\x1F\x44\x00\x00"; break; 465 case 7: p = "\x0F\x1F\x80\x00\x00\x00\x00"; break; 466 case 8: p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break; 467 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break; 468 } 469 } 470 else 471 { 472 static immutable ubyte[15] nops = [ 473 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 474 ]; // XCHG AX,AX 475 if (n > nops.length) 476 n = nops.length; 477 p = cast(char*)nops; 478 } 479 objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p); 480 nbytes -= n; 481 } 482 } 483 484 /**************************** 485 * Align start of function. 486 * Params: 487 * seg = segment of function 488 */ 489 @trusted 490 void cod3_align(int seg) 491 { 492 if (config.exe & EX_windos) 493 { 494 if (config.flags4 & CFG4speed) // if optimized for speed 495 { 496 // Pick alignment based on CPU target 497 if (config.target_cpu == TARGET_80486 || 498 config.target_cpu >= TARGET_PentiumPro) 499 { // 486 does reads on 16 byte boundaries, so if we are near 500 // such a boundary, align us to it 501 502 const nbytes = -Offset(seg) & 15; 503 if (nbytes < 8) 504 cod3_align_bytes(seg, nbytes); 505 } 506 } 507 } 508 else 509 { 510 const nbytes = -Offset(seg) & 7; 511 cod3_align_bytes(seg, nbytes); 512 } 513 } 514 515 516 /********************************** 517 * Generate code to adjust the stack pointer by `nbytes` 518 * Params: 519 * cdb = code builder 520 * nbytes = number of bytes to adjust stack pointer 521 */ 522 void cod3_stackadj(ref CodeBuilder cdb, int nbytes) 523 { 524 //printf("cod3_stackadj(%d)\n", nbytes); 525 uint grex = I64 ? REX_W << 16 : 0; 526 uint rm; 527 if (nbytes > 0) 528 rm = modregrm(3,5,SP); // SUB ESP,nbytes 529 else 530 { 531 nbytes = -nbytes; 532 rm = modregrm(3,0,SP); // ADD ESP,nbytes 533 } 534 cdb.genc2(0x81, grex | rm, nbytes); 535 } 536 537 /********************************** 538 * Generate code to align the stack pointer at `nbytes` 539 * Params: 540 * cdb = code builder 541 * nbytes = number of bytes to align stack pointer 542 */ 543 void cod3_stackalign(ref CodeBuilder cdb, int nbytes) 544 { 545 //printf("cod3_stackalign(%d)\n", nbytes); 546 const grex = I64 ? REX_W << 16 : 0; 547 const rm = modregrm(3, 4, SP); // AND ESP,-nbytes 548 cdb.genc2(0x81, grex | rm, -nbytes); 549 } 550 551 /* Constructor that links the ModuleReference to the head of 552 * the list pointed to by _Dmoduleref 553 * 554 * For ELF object files. 555 */ 556 static if (0) 557 { 558 void cod3_buildmodulector(OutBuffer* buf, int codeOffset, int refOffset) 559 { 560 /* ret 561 * codeOffset: 562 * pushad 563 * mov EAX,&ModuleReference 564 * mov ECX,_DmoduleRef 565 * mov EDX,[ECX] 566 * mov [EAX],EDX 567 * mov [ECX],EAX 568 * popad 569 * ret 570 */ 571 572 const int seg = CODE; 573 574 if (I64 && config.flags3 & CFG3pic) 575 { // LEA RAX,ModuleReference[RIP] 576 buf.writeByte(REX | REX_W); 577 buf.writeByte(LEA); 578 buf.writeByte(modregrm(0,AX,5)); 579 codeOffset += 3; 580 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4); 581 582 // MOV RCX,_DmoduleRef@GOTPCREL[RIP] 583 buf.writeByte(REX | REX_W); 584 buf.writeByte(0x8B); 585 buf.writeByte(modregrm(0,CX,5)); 586 codeOffset += 3; 587 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4); 588 } 589 else 590 { 591 /* movl ModuleReference*, %eax */ 592 buf.writeByte(0xB8); 593 codeOffset += 1; 594 const uint reltype = I64 ? R_X86_64_32 : R_386_32; 595 codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset); 596 597 /* movl _Dmodule_ref, %ecx */ 598 buf.writeByte(0xB9); 599 codeOffset += 1; 600 codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0); 601 } 602 603 if (I64) 604 buf.writeByte(REX | REX_W); 605 buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */ 606 if (I64) 607 buf.writeByte(REX | REX_W); 608 buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */ 609 if (I64) 610 buf.writeByte(REX | REX_W); 611 buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */ 612 613 buf.writeByte(0xC3); /* ret */ 614 } 615 } 616 617 /***************************** 618 * Given a type, return a mask of 619 * registers to hold that type. 620 * Input: 621 * tyf function type 622 */ 623 624 @trusted 625 regm_t regmask(tym_t tym, tym_t tyf) 626 { 627 switch (tybasic(tym)) 628 { 629 case TYvoid: 630 case TYnoreturn: 631 case TYstruct: 632 case TYarray: 633 return 0; 634 635 case TYbool: 636 case TYwchar_t: 637 case TYchar16: 638 case TYchar: 639 case TYschar: 640 case TYuchar: 641 case TYshort: 642 case TYushort: 643 case TYint: 644 case TYuint: 645 case TYnullptr: 646 case TYnptr: 647 case TYnref: 648 case TYsptr: 649 case TYcptr: 650 case TYimmutPtr: 651 case TYsharePtr: 652 case TYrestrictPtr: 653 case TYfgPtr: 654 return mAX; 655 656 case TYfloat: 657 case TYifloat: 658 if (I64) 659 return mXMM0; 660 if (config.exe & EX_flat) 661 return mST0; 662 goto case TYlong; 663 664 case TYlong: 665 case TYulong: 666 case TYdchar: 667 if (!I16) 668 return mAX; 669 goto case TYfptr; 670 671 case TYfptr: 672 case TYhptr: 673 return mDX | mAX; 674 675 case TYcent: 676 case TYucent: 677 assert(I64); 678 return mDX | mAX; 679 680 case TYvptr: 681 return mDX | mBX; 682 683 case TYdouble: 684 case TYdouble_alias: 685 case TYidouble: 686 if (I64) 687 return mXMM0; 688 if (config.exe & EX_flat) 689 return mST0; 690 return DOUBLEREGS; 691 692 case TYllong: 693 case TYullong: 694 return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS); 695 696 case TYldouble: 697 case TYildouble: 698 return mST0; 699 700 case TYcfloat: 701 if (config.exe & EX_posix && I32 && tybasic(tyf) == TYnfunc) 702 return mDX | mAX; 703 goto case TYcdouble; 704 705 case TYcdouble: 706 if (I64) 707 return mXMM0 | mXMM1; 708 goto case TYcldouble; 709 710 case TYcldouble: 711 return mST01; 712 713 // SIMD vector types 714 case TYfloat4: 715 case TYdouble2: 716 case TYschar16: 717 case TYuchar16: 718 case TYshort8: 719 case TYushort8: 720 case TYlong4: 721 case TYulong4: 722 case TYllong2: 723 case TYullong2: 724 725 case TYfloat8: 726 case TYdouble4: 727 case TYschar32: 728 case TYuchar32: 729 case TYshort16: 730 case TYushort16: 731 case TYlong8: 732 case TYulong8: 733 case TYllong4: 734 case TYullong4: 735 if (!config.fpxmmregs) 736 { printf("SIMD operations not supported on this platform\n"); 737 exit(1); 738 } 739 return mXMM0; 740 741 default: 742 debug printf("%s\n", tym_str(tym)); 743 assert(0); 744 } 745 } 746 747 /******************************* 748 * setup register allocator parameters with platform specific data 749 */ 750 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg) 751 { 752 *dst_integer_reg = AX; 753 *dst_float_reg = XMM0; 754 } 755 756 @trusted 757 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw) 758 { 759 //printf("cgreg_set_priorities %x\n", ty); 760 const sz = tysize(ty); 761 762 if (tyxmmreg(ty)) 763 { 764 static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG]; 765 *pseq = sequence.ptr; 766 } 767 else if (I64) 768 { 769 if (sz == REGSIZE * 2) 770 { 771 static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG]; 772 static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG]; 773 *pseq = seqlsw1.ptr; 774 *pseqmsw = seqmsw1.ptr; 775 } 776 else 777 { // R10 is reserved for the static link 778 static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG]; 779 *pseq = cast(ubyte*)sequence2.ptr; 780 } 781 } 782 else if (I32) 783 { 784 if (sz == REGSIZE * 2) 785 { 786 static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG]; 787 static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG]; 788 *pseq = seqlsw3.ptr; 789 *pseqmsw = seqmsw3.ptr; 790 } 791 else 792 { 793 static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 794 *pseq = sequence4.ptr; 795 } 796 } 797 else 798 { assert(I16); 799 if (typtr(ty)) 800 { 801 // For pointer types, try to pick index register first 802 static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG]; 803 *pseq = seqidx5.ptr; 804 } 805 else 806 { 807 // Otherwise, try to pick index registers last 808 static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 809 *pseq = sequence6.ptr; 810 } 811 } 812 } 813 814 /******************************************* 815 * Call finally block. 816 * Params: 817 * bf = block to call 818 * retregs = registers to preserve across call 819 * Returns: 820 * code generated 821 */ 822 @trusted 823 private code *callFinallyBlock(block *bf, regm_t retregs) 824 { 825 CodeBuilder cdbs; cdbs.ctor(); 826 CodeBuilder cdbr; cdbr.ctor(); 827 int nalign = 0; 828 829 calledFinally = true; 830 uint npush = gensaverestore(retregs,cdbs,cdbr); 831 832 if (STACKALIGN >= 16) 833 { npush += REGSIZE; 834 if (npush & (STACKALIGN - 1)) 835 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 836 cod3_stackadj(cdbs, nalign); 837 } 838 } 839 cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf); 840 regcon.immed.mval = 0; 841 if (nalign) 842 cod3_stackadj(cdbs, -nalign); 843 cdbs.append(cdbr); 844 return cdbs.finish(); 845 } 846 847 /******************************* 848 * Generate block exit code 849 */ 850 @trusted 851 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave) 852 { 853 CodeBuilder cdb2; cdb2.ctor(); 854 elem *e = bl.Belem; 855 block *nextb; 856 regm_t retregs = 0; 857 858 if (bl.BC != BCasm) 859 assert(bl.Bcode == null); 860 861 switch (bl.BC) /* block exit condition */ 862 { 863 case BCiftrue: 864 { 865 bool jcond = true; 866 block *bs1 = bl.nthSucc(0); 867 block *bs2 = bl.nthSucc(1); 868 if (bs1 == bl.Bnext) 869 { // Swap bs1 and bs2 870 block *btmp; 871 872 jcond ^= 1; 873 btmp = bs1; 874 bs1 = bs2; 875 bs2 = btmp; 876 } 877 logexp(cdb,e,jcond,FLblock,cast(code *) bs1); 878 nextb = bs2; 879 } 880 L5: 881 if (configv.addlinenumbers && bl.Bsrcpos.Slinnum && 882 !(funcsym_p.ty() & mTYnaked)) 883 { 884 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum); 885 cdb.genlinnum(bl.Bsrcpos); 886 } 887 if (nextb != bl.Bnext) 888 { 889 assert(!(bl.Bflags & BFLepilog)); 890 genjmp(cdb,JMP,FLblock,nextb); 891 } 892 break; 893 894 case BCjmptab: 895 case BCifthen: 896 case BCswitch: 897 { 898 assert(!(bl.Bflags & BFLepilog)); 899 doswitch(cdb,bl); // hide messy details 900 break; 901 } 902 version (MARS) 903 { 904 case BCjcatch: // D catch clause of try-catch 905 assert(ehmethod(funcsym_p) != EHmethod.EH_NONE); 906 // Mark all registers as destroyed. This will prevent 907 // register assignments to variables used in catch blocks. 908 getregs(cdb,lpadregs()); 909 910 if (config.ehmethod == EHmethod.EH_DWARF) 911 { 912 /* Each block must have ESP set to the same value it was at the end 913 * of the prolog. But the unwinder calls catch blocks with ESP set 914 * at the value it was when the throwing function was called, which 915 * may have arguments pushed on the stack. 916 * This instruction will reset ESP to the correct offset from EBP. 917 */ 918 cdb.gen1(ESCAPE | ESCfixesp); 919 } 920 goto case_goto; 921 } 922 version (SCPP) 923 { 924 case BCcatch: // C++ catch clause of try-catch 925 // Mark all registers as destroyed. This will prevent 926 // register assignments to variables used in catch blocks. 927 getregs(cdb,allregs | mES); 928 goto case_goto; 929 930 case BCtry: 931 usednteh |= EHtry; 932 if (config.exe == EX_WIN32) 933 usednteh |= NTEHtry; 934 goto case_goto; 935 } 936 case BCgoto: 937 nextb = bl.nthSucc(0); 938 if ((MARS || 939 funcsym_p.Sfunc.Fflags3 & Fnteh) && 940 ehmethod(funcsym_p) != EHmethod.EH_DWARF && 941 bl.Btry != nextb.Btry && 942 nextb.BC != BC_finally) 943 { 944 regm_t retregsx = 0; 945 gencodelem(cdb,e,&retregsx,true); 946 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1; 947 assert(bl.Btry); 948 int fromindex = bl.Btry.Bscope_index; 949 version (MARS) 950 { 951 if (toindex + 1 == fromindex) 952 { // Simply call __finally 953 if (bl.Btry && 954 bl.Btry.nthSucc(1).BC == BCjcatch) 955 { 956 goto L5; // it's a try-catch, not a try-finally 957 } 958 } 959 } 960 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 961 config.ehmethod == EHmethod.EH_SEH) 962 { 963 nteh_unwind(cdb,0,toindex); 964 } 965 else 966 { 967 version (MARS) 968 { 969 if (toindex + 1 <= fromindex) 970 { 971 //c = cat(c, linux_unwind(0, toindex)); 972 block *bt; 973 974 //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex); 975 bt = bl; 976 while ((bt = bt.Btry) != null && bt.Bscope_index != toindex) 977 { block *bf; 978 979 //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index); 980 bf = bt.nthSucc(1); 981 // Only look at try-finally blocks 982 if (bf.BC == BCjcatch) 983 continue; 984 985 if (bf == nextb) 986 continue; 987 //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx); 988 if (nextb.BC == BCgoto && 989 !nextb.Belem && 990 bf == nextb.nthSucc(0)) 991 continue; 992 993 // call __finally 994 cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx)); 995 } 996 } 997 } 998 } 999 goto L5; 1000 } 1001 case_goto: 1002 { 1003 regm_t retregsx = 0; 1004 gencodelem(cdb,e,&retregsx,true); 1005 if (anyspill) 1006 { // Add in the epilog code 1007 CodeBuilder cdbstore; cdbstore.ctor(); 1008 CodeBuilder cdbload; cdbload.ctor(); 1009 1010 for (int i = 0; i < anyspill; i++) 1011 { Symbol *s = globsym[i]; 1012 1013 if (s.Sflags & SFLspill && 1014 vec_testbit(dfoidx,s.Srange)) 1015 { 1016 s.Sfl = sflsave[i]; // undo block register assignments 1017 cgreg_spillreg_epilog(bl,s,cdbstore,cdbload); 1018 } 1019 } 1020 cdb.append(cdbstore); 1021 cdb.append(cdbload); 1022 } 1023 nextb = bl.nthSucc(0); 1024 goto L5; 1025 } 1026 1027 case BC_try: 1028 if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none) 1029 { 1030 /* Need to use frame pointer to access locals, not the stack pointer, 1031 * because we'll be calling the BC_finally blocks and the stack will be off. 1032 */ 1033 needframe = 1; 1034 } 1035 else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32) 1036 { 1037 usednteh |= NTEH_try; 1038 nteh_usevars(); 1039 } 1040 else 1041 usednteh |= EHtry; 1042 goto case_goto; 1043 1044 case BC_finally: 1045 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1046 { 1047 // Mark scratch registers as destroyed. 1048 getregsNoSave(lpadregs()); 1049 1050 regm_t retregsx = 0; 1051 gencodelem(cdb,bl.Belem,&retregsx,true); 1052 1053 // JMP bl.nthSucc(1) 1054 nextb = bl.nthSucc(1); 1055 1056 goto L5; 1057 } 1058 else 1059 { 1060 if (config.ehmethod == EHmethod.EH_SEH || 1061 config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none)) 1062 { 1063 // Mark all registers as destroyed. This will prevent 1064 // register assignments to variables used in finally blocks. 1065 getregsNoSave(lpadregs()); 1066 } 1067 1068 assert(!e); 1069 // Generate CALL to finalizer code 1070 cdb.append(callFinallyBlock(bl.nthSucc(0), 0)); 1071 1072 // JMP bl.nthSucc(1) 1073 nextb = bl.nthSucc(1); 1074 1075 goto L5; 1076 } 1077 1078 case BC_lpad: 1079 { 1080 assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF); 1081 // Mark all registers as destroyed. This will prevent 1082 // register assignments to variables used in finally blocks. 1083 getregsNoSave(lpadregs()); 1084 1085 regm_t retregsx = 0; 1086 gencodelem(cdb,bl.Belem,&retregsx,true); 1087 1088 // JMP bl.nthSucc(0) 1089 nextb = bl.nthSucc(0); 1090 goto L5; 1091 } 1092 1093 case BC_ret: 1094 { 1095 regm_t retregsx = 0; 1096 gencodelem(cdb,e,&retregsx,true); 1097 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1098 { 1099 } 1100 else 1101 cdb.gen1(0xC3); // RET 1102 break; 1103 } 1104 1105 static if (NTEXCEPTIONS) 1106 { 1107 case BC_except: 1108 { 1109 assert(!e); 1110 usednteh |= NTEH_except; 1111 nteh_setsp(cdb,0x8B); 1112 getregsNoSave(allregs); 1113 nextb = bl.nthSucc(0); 1114 goto L5; 1115 } 1116 case BC_filter: 1117 { 1118 nteh_filter(cdb, bl); 1119 // Mark all registers as destroyed. This will prevent 1120 // register assignments to variables used in filter blocks. 1121 getregsNoSave(allregs); 1122 regm_t retregsx = regmask(e.Ety, TYnfunc); 1123 gencodelem(cdb,e,&retregsx,true); 1124 cdb.gen1(0xC3); // RET 1125 break; 1126 } 1127 } 1128 1129 case BCretexp: 1130 reg_t reg1, reg2, lreg, mreg; 1131 retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), reg1, reg2); 1132 //printf("allocretregs returns %s\n", regm_str(mask(reg1) | mask(reg2))); 1133 1134 lreg = mreg = NOREG; 1135 if (reg1 == NOREG) 1136 {} 1137 else if (tybasic(e.Ety) == TYcfloat) 1138 lreg = ST01; 1139 else if (mask(reg1) & (mST0 | mST01)) 1140 lreg = reg1; 1141 else if (reg2 == NOREG) 1142 lreg = reg1; 1143 else if (mask(reg1) & XMMREGS) 1144 { 1145 lreg = XMM0; 1146 mreg = XMM1; 1147 } 1148 else 1149 { 1150 lreg = mask(reg1) & mLSW ? reg1 : AX; 1151 mreg = mask(reg2) & mMSW ? reg2 : DX; 1152 } 1153 if (reg1 != NOREG) 1154 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG); 1155 1156 // For the final load into the return regs, don't set regcon.used, 1157 // so that the optimizer can potentially use retregs for register 1158 // variable assignments. 1159 1160 if (config.flags4 & CFG4optimized) 1161 { regm_t usedsave; 1162 1163 docommas(cdb,&e); 1164 usedsave = regcon.used; 1165 if (!OTleaf(e.Eoper)) 1166 gencodelem(cdb,e,&retregs,true); 1167 else 1168 { 1169 if (e.Eoper == OPconst) 1170 regcon.mvar = 0; 1171 gencodelem(cdb,e,&retregs,true); 1172 regcon.used = usedsave; 1173 if (e.Eoper == OPvar) 1174 { Symbol *s = e.EV.Vsym; 1175 1176 if (s.Sfl == FLreg && s.Sregm != mAX) 1177 *retsym = s; 1178 } 1179 } 1180 } 1181 else 1182 { 1183 gencodelem(cdb,e,&retregs,true); 1184 } 1185 1186 if (reg1 == NOREG) 1187 { 1188 } 1189 else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01)) 1190 { 1191 assert(reg1 == lreg && reg2 == NOREG); 1192 regm_t pretregs = mask(reg1) | mask(reg2); 1193 fixresult87(cdb, e, retregs, &pretregs, true); 1194 } 1195 // fix return registers 1196 else if (tybasic(e.Ety) == TYcfloat) 1197 { 1198 assert(lreg == ST01); 1199 if (I64) 1200 { 1201 assert(reg2 == NOREG); 1202 // spill 1203 pop87(); 1204 pop87(); 1205 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 1206 genfwait(cdb); 1207 cdb.genfltreg(0xD9, 3, 0); 1208 genfwait(cdb); 1209 // reload 1210 if (config.exe == EX_WIN64) 1211 { 1212 assert(reg1 == AX); 1213 cdb.genfltreg(LOD, reg1, 0); 1214 code_orrex(cdb.last(), REX_W); 1215 } 1216 else 1217 { 1218 assert(reg1 == XMM0); 1219 cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble); 1220 } 1221 } 1222 else 1223 { 1224 assert(reg1 == AX && reg2 == DX); 1225 regm_t pretregs = mask(reg1) | mask(reg2); 1226 fixresult_complex87(cdb, e, retregs, &pretregs, true); 1227 } 1228 } 1229 else if (reg2 == NOREG) 1230 assert(lreg == reg1); 1231 else for (int v = 0; v < 2; v++) 1232 { 1233 if (v ^ (reg1 != mreg)) 1234 genmovreg(cdb, reg1, lreg); 1235 else 1236 genmovreg(cdb, reg2, mreg); 1237 } 1238 if (reg1 != NOREG) 1239 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1240 goto L4; 1241 1242 case BCret: 1243 retregs = 0; 1244 gencodelem(cdb,e,&retregs,true); 1245 L4: 1246 if (retregs == mST0) 1247 { assert(global87.stackused == 1); 1248 pop87(); // account for return value 1249 } 1250 else if (retregs == mST01) 1251 { assert(global87.stackused == 2); 1252 pop87(); 1253 pop87(); // account for return value 1254 } 1255 1256 if (MARS || usednteh & NTEH_try) 1257 { 1258 block *bt = bl; 1259 while ((bt = bt.Btry) != null) 1260 { 1261 block *bf = bt.nthSucc(1); 1262 version (MARS) 1263 { 1264 // Only look at try-finally blocks 1265 if (bf.BC == BCjcatch) 1266 { 1267 continue; 1268 } 1269 } 1270 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 1271 config.ehmethod == EHmethod.EH_SEH) 1272 { 1273 if (bt.Bscope_index == 0) 1274 { 1275 // call __finally 1276 CodeBuilder cdbs; cdbs.ctor(); 1277 CodeBuilder cdbr; cdbr.ctor(); 1278 1279 nteh_gensindex(cdb,-1); 1280 gensaverestore(retregs,cdbs,cdbr); 1281 cdb.append(cdbs); 1282 cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0)); 1283 regcon.immed.mval = 0; 1284 cdb.append(cdbr); 1285 } 1286 else 1287 { 1288 nteh_unwind(cdb,retregs,~0); 1289 } 1290 break; 1291 } 1292 else 1293 { 1294 // call __finally 1295 cdb.append(callFinallyBlock(bf.nthSucc(0), retregs)); 1296 } 1297 } 1298 } 1299 break; 1300 1301 case BCexit: 1302 retregs = 0; 1303 gencodelem(cdb,e,&retregs,true); 1304 if (config.flags4 & CFG4optimized) 1305 mfuncreg = mfuncregsave; 1306 break; 1307 1308 case BCasm: 1309 { 1310 assert(!e); 1311 // Mark destroyed registers 1312 CodeBuilder cdbx; cdbx.ctor(); 1313 getregs(cdbx,iasm_regs(bl)); // mark destroyed registers 1314 code *c = cdbx.finish(); 1315 if (bl.Bsucc) 1316 { nextb = bl.nthSucc(0); 1317 if (!bl.Bnext) 1318 { 1319 cdb.append(bl.Bcode); 1320 cdb.append(c); 1321 goto L5; 1322 } 1323 if (nextb != bl.Bnext && 1324 bl.Bnext && 1325 !(bl.Bnext.BC == BCgoto && 1326 !bl.Bnext.Belem && 1327 nextb == bl.Bnext.nthSucc(0))) 1328 { 1329 // See if already have JMP at end of block 1330 code *cl = code_last(bl.Bcode); 1331 if (!cl || cl.Iop != JMP) 1332 { 1333 cdb.append(bl.Bcode); 1334 cdb.append(c); 1335 goto L5; // add JMP at end of block 1336 } 1337 } 1338 } 1339 cdb.append(bl.Bcode); 1340 break; 1341 } 1342 1343 default: 1344 debug 1345 printf("bl.BC = %d\n",bl.BC); 1346 assert(0); 1347 } 1348 } 1349 1350 /*************************** 1351 * Allocate registers for function return values. 1352 * 1353 * Params: 1354 * ty = return type 1355 * t = return type extended info 1356 * tyf = function type 1357 * reg1 = set to the first part register, else NOREG 1358 * reg2 = set to the second part register, else NOREG 1359 * 1360 * Returns: 1361 * a bit mask of return registers. 1362 * 0 if function returns on the stack or returns void. 1363 */ 1364 @trusted 1365 regm_t allocretregs(const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2) 1366 { 1367 //printf("allocretregs() ty: %s\n", tym_str(ty)); 1368 reg1 = reg2 = NOREG; 1369 1370 if (!(config.exe & EX_posix)) 1371 return regmask(ty, tyf); // for non-Posix ABI 1372 1373 /* The rest is for the Itanium ABI 1374 */ 1375 1376 const tyb = tybasic(ty); 1377 if (tyb == TYvoid || tyb == TYnoreturn) 1378 return 0; 1379 1380 tym_t ty1 = tyb; 1381 tym_t ty2 = TYMAX; // stays TYMAX if only one register is needed 1382 1383 if (ty & mTYxmmgpr) 1384 { 1385 ty1 = TYdouble; 1386 ty2 = TYllong; 1387 } 1388 else if (ty & mTYgprxmm) 1389 { 1390 ty1 = TYllong; 1391 ty2 = TYdouble; 1392 } 1393 1394 if (tyb == TYstruct) 1395 { 1396 assert(t); 1397 ty1 = t.Tty; 1398 } 1399 1400 const tyfb = tybasic(tyf); 1401 switch (tyrelax(ty1)) 1402 { 1403 case TYcent: 1404 if (I32) 1405 return 0; 1406 ty1 = ty2 = TYllong; 1407 break; 1408 1409 case TYcdouble: 1410 if (tyfb == TYjfunc && I32) 1411 break; 1412 if (I32) 1413 return 0; 1414 ty1 = ty2 = TYdouble; 1415 break; 1416 1417 case TYcfloat: 1418 if (tyfb == TYjfunc && I32) 1419 break; 1420 if (I32) 1421 goto case TYllong; 1422 ty1 = TYdouble; 1423 break; 1424 1425 case TYcldouble: 1426 if (tyfb == TYjfunc && I32) 1427 break; 1428 if (I32) 1429 return 0; 1430 break; 1431 1432 case TYllong: 1433 if (I32) 1434 ty1 = ty2 = TYlong; 1435 break; 1436 1437 case TYarray: 1438 type* targ1, targ2; 1439 argtypes(t, targ1, targ2); 1440 if (targ1) 1441 ty1 = targ1.Tty; 1442 else 1443 return 0; 1444 if (targ2) 1445 ty2 = targ2.Tty; 1446 break; 1447 1448 case TYstruct: 1449 assert(t); 1450 if (I64) 1451 { 1452 assert(tybasic(t.Tty) == TYstruct); 1453 if (const targ1 = t.Ttag.Sstruct.Sarg1type) 1454 ty1 = targ1.Tty; 1455 else 1456 return 0; 1457 if (const targ2 = t.Ttag.Sstruct.Sarg2type) 1458 ty2 = targ2.Tty; 1459 break; 1460 } 1461 return 0; 1462 1463 default: 1464 break; 1465 } 1466 1467 /* now we have ty1 and ty2, use that to determine which register 1468 * is used for ty1 and which for ty2 1469 */ 1470 1471 static struct RetRegsAllocator 1472 { 1473 nothrow: 1474 static immutable reg_t[2] gpr_regs = [AX, DX]; 1475 static immutable reg_t[2] xmm_regs = [XMM0, XMM1]; 1476 1477 uint cntgpr = 0, 1478 cntxmm = 0; 1479 1480 reg_t gpr() { return gpr_regs[cntgpr++]; } 1481 reg_t xmm() { return xmm_regs[cntxmm++]; } 1482 } 1483 1484 RetRegsAllocator rralloc; 1485 1486 reg_t allocreg(tym_t tym) 1487 { 1488 if (tym == TYMAX) 1489 return NOREG; 1490 switch (tysize(tym)) 1491 { 1492 case 1: 1493 case 2: 1494 case 4: 1495 if (tyfloating(tym)) 1496 return I64 ? rralloc.xmm() : ST0; 1497 else 1498 return rralloc.gpr(); 1499 1500 case 8: 1501 if (tycomplex(tym)) 1502 { 1503 assert(tyfb == TYjfunc && I32); 1504 return ST01; 1505 } 1506 else if (tysimd(tym)) 1507 { 1508 return rralloc.xmm(); 1509 } 1510 assert(I64 || tyfloating(tym)); 1511 goto case 4; 1512 1513 default: 1514 if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble) 1515 { 1516 return ST0; 1517 } 1518 else if (tybasic(tym) == TYcldouble) 1519 { 1520 return ST01; 1521 } 1522 else if (tycomplex(tym) && tyfb == TYjfunc && I32) 1523 { 1524 return ST01; 1525 } 1526 else if (tysimd(tym)) 1527 { 1528 return rralloc.xmm(); 1529 } 1530 1531 debug printf("%s\n", tym_str(tym)); 1532 assert(0); 1533 } 1534 } 1535 1536 reg1 = allocreg(ty1); 1537 reg2 = allocreg(ty2); 1538 1539 return (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1540 } 1541 1542 /*********************************************** 1543 * Struct necessary for sorting switch cases. 1544 */ 1545 1546 private alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 1547 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 1548 1549 extern (C) // qsort cmp functions need to be "C" 1550 { 1551 struct CaseVal 1552 { 1553 targ_ullong val; 1554 block *target; 1555 1556 /* Sort function for qsort() */ 1557 @trusted 1558 extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q) 1559 { 1560 const(CaseVal)* c1 = cast(const(CaseVal)*)p; 1561 const(CaseVal)* c2 = cast(const(CaseVal)*)q; 1562 return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1); 1563 } 1564 } 1565 } 1566 1567 /*** 1568 * Generate comparison of [reg2,reg] with val 1569 */ 1570 @trusted 1571 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg) 1572 { 1573 if (I64 && sz == 8) 1574 { 1575 assert(reg2 == NOREG); 1576 if (val == cast(int)val) // if val is a 64 bit value sign-extended from 32 bits 1577 { 1578 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,value32 1579 cdb.last().Irex |= REX_W; // 64 bit operand 1580 } 1581 else 1582 { 1583 assert(sreg != NOREG); 1584 movregconst(cdb,sreg,cast(targ_size_t)val,64); // MOV sreg,val64 1585 genregs(cdb,0x3B,reg,sreg); // CMP reg,sreg 1586 code_orrex(cdb.last(), REX_W); 1587 getregsNoSave(mask(sreg)); // don't remember we loaded this constant 1588 } 1589 } 1590 else if (reg2 == NOREG) 1591 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1592 else 1593 { 1594 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1595 code *cnext = gennop(null); 1596 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1597 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1598 cdb.append(cnext); 1599 } 1600 } 1601 1602 @trusted 1603 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases, 1604 uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last) 1605 { 1606 if (ncases >= 4 && config.flags4 & CFG4speed) 1607 { 1608 size_t pivot = ncases >> 1; 1609 1610 // Compares for casevals[0..pivot] 1611 CodeBuilder cdb1; cdb1.ctor(); 1612 ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true); 1613 1614 // Compares for casevals[pivot+1..ncases] 1615 CodeBuilder cdb2; cdb2.ctor(); 1616 ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last); 1617 code *c2 = gennop(null); 1618 1619 // Compare for caseval[pivot] 1620 cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg); 1621 genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target 1622 // Note uint jump here, as cases were sorted using uint comparisons 1623 genjmp(cdb,JA,FLcode,cast(block *) c2); // JG c2 1624 1625 cdb.append(cdb1); 1626 cdb.append(c2); 1627 cdb.append(cdb2); 1628 } 1629 else 1630 { // Not worth doing a binary search, just do a sequence of CMP/JE 1631 for (size_t n = 0; n < ncases; n++) 1632 { 1633 targ_llong val = casevals[n].val; 1634 cmpval(cdb, val, sz, reg, reg2, sreg); 1635 code *cnext = null; 1636 if (reg2 != NOREG) 1637 { 1638 cnext = gennop(null); 1639 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1640 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1641 } 1642 genjmp(cdb,JE,FLblock,casevals[n].target); // JE caseaddr 1643 cdb.append(cnext); 1644 } 1645 1646 if (last) // if default is not next block 1647 genjmp(cdb,JMP,FLblock,bdefault); 1648 } 1649 } 1650 1651 /******************************* 1652 * Generate code for blocks ending in a switch statement. 1653 * Take BCswitch and decide on 1654 * BCifthen use if - then code 1655 * BCjmptab index into jump table 1656 * BCswitch search table for match 1657 */ 1658 1659 @trusted 1660 void doswitch(ref CodeBuilder cdb, block *b) 1661 { 1662 targ_ulong msw; 1663 1664 // If switch tables are in code segment and we need a CS: override to get at them 1665 bool csseg = cast(bool)(config.flags & CFGromable); 1666 1667 //printf("doswitch(%d)\n", b.BC); 1668 elem *e = b.Belem; 1669 elem_debug(e); 1670 docommas(cdb,&e); 1671 cgstate.stackclean++; 1672 tym_t tys = tybasic(e.Ety); 1673 int sz = _tysize[tys]; 1674 bool dword = (sz == 2 * REGSIZE); 1675 bool mswsame = true; // assume all msw's are the same 1676 targ_llong *p = b.Bswitch; // pointer to case data 1677 assert(p); 1678 uint ncases = cast(uint)*p++; // number of cases 1679 1680 targ_llong vmax = MINLL; // smallest possible llong 1681 targ_llong vmin = MAXLL; // largest possible llong 1682 for (uint n = 0; n < ncases; n++) // find max and min case values 1683 { 1684 targ_llong val = *p++; 1685 if (val > vmax) vmax = val; 1686 if (val < vmin) vmin = val; 1687 if (REGSIZE == 2) 1688 { 1689 ushort ms = (val >> 16) & 0xFFFF; 1690 if (n == 0) 1691 msw = ms; 1692 else if (msw != ms) 1693 mswsame = 0; 1694 } 1695 else // REGSIZE == 4 1696 { 1697 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 1698 if (n == 0) 1699 msw = ms; 1700 else if (msw != ms) 1701 mswsame = 0; 1702 } 1703 } 1704 p -= ncases; 1705 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 1706 1707 /* Three kinds of switch strategies - pick one 1708 */ 1709 if (ncases <= 3) 1710 goto Lifthen; 1711 else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2) 1712 goto Ljmptab; // >=50% of the table is case values, rest is default 1713 else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3) 1714 goto Ljmptab; // >= 33% of the table is case values, rest is default 1715 else if (I16) 1716 goto Lswitch; 1717 else 1718 goto Lifthen; 1719 1720 /*************************************************************************/ 1721 { // generate if-then sequence 1722 Lifthen: 1723 regm_t retregs = ALLREGS; 1724 b.BC = BCifthen; 1725 scodelem(cdb,e,&retregs,0,true); 1726 reg_t reg, reg2; 1727 if (dword) 1728 { reg = findreglsw(retregs); 1729 reg2 = findregmsw(retregs); 1730 } 1731 else 1732 { 1733 reg = findreg(retregs); // reg that result is in 1734 reg2 = NOREG; 1735 } 1736 list_t bl = b.Bsucc; 1737 block *bdefault = b.nthSucc(0); 1738 if (dword && mswsame) 1739 { 1740 cdb.genc2(0x81,modregrm(3,7,reg2),msw); // CMP reg2,MSW 1741 genjmp(cdb,JNE,FLblock,bdefault); // JNE default 1742 reg2 = NOREG; 1743 } 1744 1745 reg_t sreg = NOREG; // may need a scratch register 1746 1747 // Put into casevals[0..ncases] so we can sort then slice 1748 assert(ncases < size_t.max / (2 * CaseVal.sizeof)); 1749 CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof); 1750 assert(casevals); 1751 for (uint n = 0; n < ncases; n++) 1752 { 1753 casevals[n].val = p[n]; 1754 bl = list_next(bl); 1755 casevals[n].target = list_block(bl); 1756 1757 // See if we need a scratch register 1758 if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n]) 1759 { regm_t regm = ALLREGS & ~mask(reg); 1760 allocreg(cdb,®m, &sreg, TYint); 1761 } 1762 } 1763 1764 // Sort cases so we can do a runtime binary search 1765 qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp); 1766 1767 //for (uint n = 0; n < ncases; n++) 1768 //printf("casevals[%lld] = x%x\n", n, casevals[n].val); 1769 1770 // Generate binary tree of comparisons 1771 ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext); 1772 1773 free(casevals); 1774 1775 cgstate.stackclean--; 1776 return; 1777 } 1778 1779 /*************************************************************************/ 1780 { 1781 // Use switch value to index into jump table 1782 Ljmptab: 1783 //printf("Ljmptab:\n"); 1784 1785 b.BC = BCjmptab; 1786 1787 /* If vmin is small enough, we can just set it to 0 and the jump 1788 * table entries from 0..vmin-1 can be set with the default target. 1789 * This saves the SUB instruction. 1790 * Must be same computation as used in outjmptab(). 1791 */ 1792 if (vmin > 0 && vmin <= _tysize[TYint]) 1793 vmin = 0; 1794 1795 b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr); 1796 regm_t retregs = IDXREGS; 1797 if (dword) 1798 retregs |= mMSW; 1799 if (config.exe & EX_posix && I32 && config.flags3 & CFG3pic) 1800 retregs &= ~mBX; // need EBX for GOT 1801 bool modify = (I16 || I64 || vmin); 1802 scodelem(cdb,e,&retregs,0,!modify); 1803 reg_t reg = findreg(retregs & IDXREGS); // reg that result is in 1804 reg_t reg2; 1805 if (dword) 1806 reg2 = findregmsw(retregs); 1807 if (modify) 1808 { 1809 assert(!(retregs & regcon.mvar)); 1810 getregs(cdb,retregs); 1811 } 1812 if (vmin) // if there is a minimum 1813 { 1814 cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin 1815 if (dword) 1816 { cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin 1817 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1818 } 1819 } 1820 else if (dword) 1821 { gentstreg(cdb,reg2); // TEST reg2,reg2 1822 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1823 } 1824 if (vmax - vmin != REGMASK) // if there is a maximum 1825 { // CMP reg,vmax-vmin 1826 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin)); 1827 if (I64 && sz == 8) 1828 code_orrex(cdb.last(), REX_W); 1829 genjmp(cdb,JA,FLblock,b.nthSucc(0)); // JA default 1830 } 1831 if (I64) 1832 { 1833 if (!vmin) 1834 { // Need to clear out high 32 bits of reg 1835 // Use 8B instead of 89, as 89 will be optimized away as a NOP 1836 genregs(cdb,0x8B,reg,reg); // MOV reg,reg 1837 } 1838 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 1839 { 1840 /* LEA R1,disp[RIP] 48 8D 05 00 00 00 00 1841 * MOVSXD R2,[reg*4][R1] 48 63 14 B8 1842 * LEA R1,[R1][R2] 48 8D 04 02 1843 * JMP R1 FF E0 1844 */ 1845 reg_t r1; 1846 regm_t scratchm = ALLREGS & ~mask(reg); 1847 allocreg(cdb,&scratchm,&r1,TYint); 1848 reg_t r2; 1849 scratchm = ALLREGS & ~(mask(reg) | mask(r1)); 1850 allocreg(cdb,&scratchm,&r2,TYint); 1851 1852 CodeBuilder cdbe; cdbe.ctor(); 1853 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0); // LEA R1,disp[RIP] 1854 cdbe.last().IEV1.Vswitch = b; 1855 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1] 1856 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2)); // LEA R1,[R1][R2] 1857 cdbe.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1858 1859 b.Btablesize = cast(int) (vmax - vmin + 1) * 4; 1860 code *ce = cdbe.finish(); 1861 pinholeopt(ce, null); 1862 1863 cdb.append(cdbe); 1864 } 1865 else 1866 { 1867 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[reg*8] 1868 cdb.last().IEV1.Vswitch = b; 1869 cdb.last().Isib = modregrm(3,reg & 7,5); 1870 if (reg & 8) 1871 cdb.last().Irex |= REX_X; 1872 } 1873 } 1874 else if (I32) 1875 { 1876 static if (JMPJMPTABLE) 1877 { 1878 /* LEA jreg,offset ctable[reg][reg * 4] 1879 JMP jreg 1880 ctable: 1881 JMP case0 1882 JMP case1 1883 ... 1884 */ 1885 CodeBuilder ctable; ctable.ctor(); 1886 block *bdef = b.nthSucc(0); 1887 targ_llong u; 1888 for (u = vmin; ; u++) 1889 { block *targ = bdef; 1890 for (n = 0; n < ncases; n++) 1891 { 1892 if (p[n] == u) 1893 { targ = b.nthSucc(n + 1); 1894 break; 1895 } 1896 } 1897 genjmp(ctable,JMP,FLblock,targ); 1898 ctable.last().Iflags |= CFjmp5; // don't shrink these 1899 if (u == vmax) 1900 break; 1901 } 1902 1903 // Allocate scratch register jreg 1904 regm_t scratchm = ALLREGS & ~mask(reg); 1905 uint jreg = AX; 1906 allocreg(cdb,&scratchm,&jreg,TYint); 1907 1908 // LEA jreg, offset ctable[reg][reg*4] 1909 cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6); 1910 cdb.last().Isib = modregrm(2,reg,reg); 1911 cdb.gen2(0xFF,modregrm(3,4,jreg)); // JMP jreg 1912 cdb.append(ctable); 1913 b.Btablesize = 0; 1914 cgstate.stackclean--; 1915 return; 1916 } 1917 else 1918 { 1919 if (config.exe & (EX_OSX | EX_OSX64)) 1920 { 1921 /* CALL L1 1922 * L1: POP R1 1923 * ADD R1,disp[reg*4][R1] 1924 * JMP R1 1925 */ 1926 // Allocate scratch register r1 1927 regm_t scratchm = ALLREGS & ~mask(reg); 1928 reg_t r1; 1929 allocreg(cdb,&scratchm,&r1,TYint); 1930 1931 cdb.genc2(CALL,0,0); // CALL L1 1932 cdb.gen1(0x58 + r1); // L1: POP R1 1933 cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0); // ADD R1,disp[reg*4][EBX] 1934 cdb.last().IEV1.Vswitch = b; 1935 cdb.last().Isib = modregrm(2,reg,r1); 1936 cdb.gen2(0xFF,modregrm(3,4,r1)); // JMP R1 1937 } 1938 else 1939 { 1940 if (config.flags3 & CFG3pic) 1941 { 1942 /* MOV R1,EBX 1943 * SUB R1,funcsym_p@GOTOFF[offset][reg*4][EBX] 1944 * JMP R1 1945 */ 1946 1947 // Load GOT in EBX 1948 load_localgot(cdb); 1949 1950 // Allocate scratch register r1 1951 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX); 1952 reg_t r1; 1953 allocreg(cdb,&scratchm,&r1,TYint); 1954 1955 genmovreg(cdb,r1,BX); // MOV R1,EBX 1956 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0); // SUB R1,disp[reg*4][EBX] 1957 cdb.last().IEV1.Vswitch = b; 1958 cdb.last().Isib = modregrm(2,reg,BX); 1959 cdb.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1960 } 1961 else 1962 { 1963 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[idxreg*4] 1964 cdb.last().IEV1.Vswitch = b; 1965 cdb.last().Isib = modregrm(2,reg,5); 1966 } 1967 } 1968 } 1969 } 1970 else if (I16) 1971 { 1972 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 1973 uint rm = getaddrmode(retregs) | modregrm(0,4,0); 1974 cdb.genc1(0xFF,rm,FLswitch,0); // JMP [CS:]disp[idxreg] 1975 cdb.last().IEV1.Vswitch = b; 1976 cdb.last().Iflags |= csseg ? CFcs : 0; // segment override 1977 } 1978 else 1979 assert(0); 1980 cgstate.stackclean--; 1981 return; 1982 } 1983 1984 /*************************************************************************/ 1985 { 1986 /* Scan a table of case values, and jump to corresponding address. 1987 * Since it relies on REPNE SCASW, it has really nothing to recommend it 1988 * over Lifthen for 32 and 64 bit code. 1989 * Note that it has not been tested with MACHOBJ (OSX). 1990 */ 1991 Lswitch: 1992 regm_t retregs = mAX; // SCASW requires AX 1993 if (dword) 1994 retregs |= mDX; 1995 else if (ncases <= 6 || config.flags4 & CFG4speed) 1996 goto Lifthen; 1997 scodelem(cdb,e,&retregs,0,true); 1998 if (dword && mswsame) 1999 { /* CMP DX,MSW */ 2000 cdb.genc2(0x81,modregrm(3,7,DX),msw); 2001 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2002 } 2003 getregs(cdb,mCX|mDI); 2004 2005 if (config.flags3 & CFG3pic && config.exe & EX_posix) 2006 { // Add in GOT 2007 getregs(cdb,mDX); 2008 cdb.genc2(CALL,0,0); // CALL L1 2009 cdb.gen1(0x58 + DI); // L1: POP EDI 2010 2011 // ADD EDI,_GLOBAL_OFFSET_TABLE_+3 2012 Symbol *gotsym = Obj.getGOTsym(); 2013 cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym); 2014 cdb.last().Iflags = CFoff; 2015 cdb.last().IEV2.Voffset = 3; 2016 2017 makeitextern(gotsym); 2018 2019 genmovreg(cdb, DX, DI); // MOV EDX, EDI 2020 // ADD EDI,offset of switch table 2021 cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null); 2022 cdb.last().IEV2.Vswitch = b; 2023 } 2024 2025 if (!(config.flags3 & CFG3pic)) 2026 { 2027 // MOV DI,offset of switch table 2028 cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null); 2029 cdb.last().IEV2.Vswitch = b; 2030 } 2031 movregconst(cdb,CX,ncases,0); // MOV CX,ncases 2032 2033 /* The switch table will be accessed through ES:DI. 2034 * Therefore, load ES with proper segment value. 2035 */ 2036 if (config.flags3 & CFG3eseqds) 2037 { 2038 assert(!csseg); 2039 getregs(cdb,mCX); // allocate CX 2040 } 2041 else 2042 { 2043 getregs(cdb,mES|mCX); // allocate ES and CX 2044 cdb.gen1(csseg ? 0x0E : 0x1E); // PUSH CS/DS 2045 cdb.gen1(0x07); // POP ES 2046 } 2047 2048 targ_size_t disp = (ncases - 1) * _tysize[TYint]; // displacement to jump table 2049 if (dword && !mswsame) 2050 { 2051 2052 /* Build the following: 2053 L1: SCASW 2054 JNE L2 2055 CMP DX,[CS:]disp[DI] 2056 L2: LOOPNE L1 2057 */ 2058 2059 const int mod = (disp > 127) ? 2 : 1; // displacement size 2060 code *cloop = genc2(null,0xE0,0,-7 - mod - csseg); // LOOPNE scasw 2061 cdb.gen1(0xAF); // SCASW 2062 code_orflag(cdb.last(),CFtarg2); // target of jump 2063 genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop 2064 // CMP DX,[CS:]disp[DI] 2065 cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp); 2066 cdb.last().Iflags |= csseg ? CFcs : 0; // possible seg override 2067 cdb.append(cloop); 2068 disp += ncases * _tysize[TYint]; // skip over msw table 2069 } 2070 else 2071 { 2072 cdb.gen1(0xF2); // REPNE 2073 cdb.gen1(0xAF); // SCASW 2074 } 2075 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2076 const int mod = (disp > 127) ? 2 : 1; // 1 or 2 byte displacement 2077 if (csseg) 2078 cdb.gen1(SEGCS); // table is in code segment 2079 2080 if (config.flags3 & CFG3pic && 2081 config.exe & EX_posix) 2082 { // ADD EDX,(ncases-1)*2[EDI] 2083 cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp); 2084 // JMP EDX 2085 cdb.gen2(0xFF,modregrm(3,4,DX)); 2086 } 2087 2088 if (!(config.flags3 & CFG3pic)) 2089 { // JMP (ncases-1)*2[DI] 2090 cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp); 2091 cdb.last().Iflags |= csseg ? CFcs : 0; 2092 } 2093 b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr); 2094 //assert(b.Bcode); 2095 cgstate.stackclean--; 2096 return; 2097 } 2098 } 2099 2100 /****************************** 2101 * Output data block for a jump table (BCjmptab). 2102 * The 'holes' in the table get filled with the 2103 * default label. 2104 */ 2105 2106 @trusted 2107 void outjmptab(block *b) 2108 { 2109 if (JMPJMPTABLE && I32) 2110 return; 2111 2112 targ_llong *p = b.Bswitch; // pointer to case data 2113 size_t ncases = cast(size_t)*p++; // number of cases 2114 2115 /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1] 2116 * Must be same computation as used in doswitch(). 2117 */ 2118 targ_llong vmax = MINLL; // smallest possible llong 2119 targ_llong vmin = MAXLL; // largest possible llong 2120 for (size_t n = 0; n < ncases; n++) // find min case value 2121 { targ_llong val = p[n]; 2122 if (val > vmax) vmax = val; 2123 if (val < vmin) vmin = val; 2124 } 2125 if (vmin > 0 && vmin <= _tysize[TYint]) 2126 vmin = 0; 2127 assert(vmin <= vmax); 2128 2129 /* Segment and offset into which the jump table will be emitted 2130 */ 2131 int jmpseg = objmod.jmpTableSegment(funcsym_p); 2132 targ_size_t *poffset = &Offset(jmpseg); 2133 2134 /* Align start of jump table 2135 */ 2136 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2137 objmod.lidata(jmpseg,*poffset,alignbytes); 2138 assert(*poffset == b.Btableoffset); // should match precomputed value 2139 2140 Symbol *gotsym = null; 2141 targ_size_t def = b.nthSucc(0).Boffset; // default address 2142 for (targ_llong u = vmin; ; u++) 2143 { targ_size_t targ = def; // default 2144 for (size_t n = 0; n < ncases; n++) 2145 { if (p[n] == u) 2146 { targ = b.nthSucc(cast(int)(n + 1)).Boffset; 2147 break; 2148 } 2149 } 2150 if (config.exe & (EX_LINUX64 | EX_FREEBSD64 | EX_OPENBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS64)) 2151 { 2152 if (config.flags3 & CFG3pic) 2153 { 2154 objmod.reftodatseg(jmpseg,*poffset,cast(targ_size_t)(targ + (u - vmin) * 4),funcsym_p.Sseg,CFswitch); 2155 *poffset += 4; 2156 } 2157 else 2158 { 2159 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch); 2160 *poffset += 8; 2161 } 2162 } 2163 else if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2164 { 2165 if (config.flags3 & CFG3pic) 2166 { 2167 assert(config.flags & CFGromable); 2168 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_ 2169 if (!gotsym) 2170 gotsym = Obj.getGOTsym(); 2171 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch); 2172 } 2173 else 2174 objmod.reftocodeseg(jmpseg,*poffset,targ); 2175 *poffset += 4; 2176 } 2177 else if (config.exe & (EX_OSX | EX_OSX64)) 2178 { 2179 targ_size_t val; 2180 if (I64) 2181 val = targ - b.Btableoffset; 2182 else 2183 val = targ - b.Btablebase; 2184 objmod.write_bytes(SegData[jmpseg],4,&val); 2185 } 2186 else 2187 { 2188 if (I64) 2189 { 2190 targ_size_t val = targ - b.Btableoffset; 2191 objmod.write_bytes(SegData[jmpseg],4,&val); 2192 } 2193 else 2194 { 2195 objmod.reftocodeseg(jmpseg,*poffset,targ); 2196 *poffset += tysize(TYnptr); 2197 } 2198 } 2199 2200 if (u == vmax) // for case that (vmax == ~0) 2201 break; 2202 } 2203 } 2204 2205 2206 /****************************** 2207 * Output data block for a switch table. 2208 * Two consecutive tables, the first is the case value table, the 2209 * second is the address table. 2210 */ 2211 2212 @trusted 2213 void outswitab(block *b) 2214 { 2215 //printf("outswitab()\n"); 2216 targ_llong *p = b.Bswitch; // pointer to case data 2217 uint ncases = cast(uint)*p++; // number of cases 2218 2219 const int seg = objmod.jmpTableSegment(funcsym_p); 2220 targ_size_t *poffset = &Offset(seg); 2221 targ_size_t offset = *poffset; 2222 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2223 objmod.lidata(seg,*poffset,alignbytes); // any alignment bytes necessary 2224 assert(*poffset == offset + alignbytes); 2225 2226 uint sz = _tysize[TYint]; 2227 assert(SegData[seg].SDseg == seg); 2228 for (uint n = 0; n < ncases; n++) // send out value table 2229 { 2230 //printf("\tcase %d, offset = x%x\n", n, *poffset); 2231 objmod.write_bytes(SegData[seg],sz,p); 2232 p++; 2233 } 2234 offset += alignbytes + sz * ncases; 2235 assert(*poffset == offset); 2236 2237 if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr))) 2238 { 2239 // Send out MSW table 2240 p -= ncases; 2241 for (uint n = 0; n < ncases; n++) 2242 { 2243 targ_size_t val = cast(targ_size_t)MSREG(*p); 2244 p++; 2245 objmod.write_bytes(SegData[seg],REGSIZE,&val); 2246 } 2247 offset += REGSIZE * ncases; 2248 assert(*poffset == offset); 2249 } 2250 2251 list_t bl = b.Bsucc; 2252 for (uint n = 0; n < ncases; n++) // send out address table 2253 { 2254 bl = list_next(bl); 2255 objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset); 2256 *poffset += tysize(TYnptr); 2257 } 2258 assert(*poffset == offset + ncases * tysize(TYnptr)); 2259 } 2260 2261 /***************************** 2262 * Return a jump opcode relevant to the elem for a JMP true. 2263 */ 2264 2265 @trusted 2266 int jmpopcode(elem *e) 2267 { 2268 //printf("jmpopcode()\n"); elem_print(e); 2269 tym_t tym; 2270 int zero,i,jp,op; 2271 static immutable ubyte[6][2][2] jops = 2272 [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ 2273 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed */ 2274 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */ 2275 /+ 2276 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real */ 2277 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 */ 2278 [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R */ 2279 +/ 2280 ]; 2281 2282 enum 2283 { 2284 XP = (JP << 8), 2285 XNP = (JNP << 8), 2286 } 2287 static immutable uint[26][1] jfops = 2288 /* le gt lt ge eqeq ne unord lg leg ule ul uge */ 2289 [ 2290 [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE, 2291 2292 /* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */ 2293 XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE ], /* 8087 */ 2294 ]; 2295 2296 assert(e); 2297 while (e.Eoper == OPcomma || 2298 /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where */ 2299 /* we decide if mPSW is passed on when evaluating E2 or not. */ 2300 (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper))) 2301 { 2302 e = e.EV.E2; /* right operand determines it */ 2303 } 2304 2305 op = e.Eoper; 2306 tym_t tymx = tybasic(e.Ety); 2307 bool needsNanCheck = tyfloating(tymx) && config.inline8087 && 2308 (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble || 2309 tymx == TYcdouble || tymx == TYcfloat || 2310 (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) || 2311 op == OPind || 2312 (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS)))); 2313 2314 if (!needsNanCheck) 2315 { 2316 /* If e is in an XMM register, need to use XP. 2317 * Match same test in loaddata() 2318 */ 2319 Symbol* s; 2320 needsNanCheck = e.Eoper == OPvar && 2321 (s = e.EV.Vsym).Sfl == FLreg && 2322 s.Sregm & XMMREGS && 2323 (tymx == TYfloat || tymx == TYifloat || tymx == TYdouble || tymx ==TYidouble); 2324 } 2325 2326 if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set 2327 { 2328 if (needsNanCheck) // except for floating point values that need a NaN check 2329 return XP|JNE; 2330 else 2331 return JNE; 2332 } 2333 if (!OTrel(op)) // not relational operator 2334 { 2335 if (needsNanCheck) 2336 return XP|JNE; 2337 2338 if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; } 2339 if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; } 2340 if (op == OPu8_16) op = e.EV.E1.Eoper; 2341 return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE; 2342 } 2343 2344 if (e.EV.E2.Eoper == OPconst) 2345 zero = !boolres(e.EV.E2); 2346 else 2347 zero = 0; 2348 2349 tym = e.EV.E1.Ety; 2350 if (tyfloating(tym)) 2351 { 2352 static if (1) 2353 { 2354 i = 0; 2355 if (config.inline8087) 2356 { i = 1; 2357 2358 static if (1) 2359 { 2360 if (rel_exception(op) || config.flags4 & CFG4fastfloat) 2361 { 2362 const bool NOSAHF = (I64 || config.fpxmmregs); 2363 if (zero) 2364 { 2365 if (NOSAHF) 2366 op = swaprel(op); 2367 } 2368 else if (NOSAHF) 2369 op = swaprel(op); 2370 else if (cmporder87(e.EV.E2)) 2371 op = swaprel(op); 2372 else 2373 { } 2374 } 2375 else 2376 { 2377 if (zero && config.target_cpu < TARGET_80386) 2378 { } 2379 else 2380 op = swaprel(op); 2381 } 2382 } 2383 else 2384 { 2385 if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386) 2386 op = swaprel(op); 2387 else if (!zero && 2388 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat))) 2389 /* compare is reversed */ 2390 op = swaprel(op); 2391 } 2392 } 2393 jp = jfops[0][op - OPle]; 2394 goto L1; 2395 } 2396 else 2397 { 2398 i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2; 2399 } 2400 } 2401 else if (tyuns(tym) || tyuns(e.EV.E2.Ety)) 2402 i = 1; 2403 else if (tyintegral(tym) || typtr(tym)) 2404 i = 0; 2405 else 2406 { 2407 debug 2408 elem_print(e); 2409 printf("%s\n", tym_str(tym)); 2410 assert(0); 2411 } 2412 2413 jp = jops[i][zero][op - OPle]; /* table starts with OPle */ 2414 2415 /* Try to rewrite uint comparisons so they rely on just the Carry flag 2416 */ 2417 if (i == 1 && (jp == JA || jp == JBE) && 2418 (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst)) 2419 { 2420 jp = (jp == JA) ? JC : JNC; 2421 } 2422 2423 L1: 2424 debug 2425 if ((jp & 0xF0) != 0x70) 2426 { 2427 printf("%s i %d zero %d op x%x jp x%x\n",oper_str(op),i,zero,op,jp); 2428 } 2429 2430 assert((jp & 0xF0) == 0x70); 2431 return jp; 2432 } 2433 2434 /********************************** 2435 * Append code to cdb which validates pointer described by 2436 * addressing mode in *pcs. Modify addressing mode in *pcs. 2437 * Params: 2438 * cdb = append generated code to this 2439 * pcs = original addressing mode to be updated 2440 * keepmsk = mask of registers we must not destroy or use 2441 * if (keepmsk & RMstore), this will be only a store operation 2442 * into the lvalue 2443 */ 2444 2445 @trusted 2446 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk) 2447 { 2448 ubyte sib; 2449 reg_t reg; 2450 uint flagsave; 2451 2452 assert(!I64); 2453 if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2454 return; // not designed to deal with 48 bit far pointers 2455 2456 ubyte rm = pcs.Irm; 2457 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 2458 2459 // If the addressing mode is already a register 2460 reg = rm & 7; 2461 if (I16) 2462 { static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ]; 2463 2464 reg = imode[reg]; // convert [SI] to SI, etc. 2465 } 2466 regm_t idxregs = mask(reg); 2467 if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) || 2468 !(idxregs & ALLREGS) 2469 ) 2470 { 2471 // Load the offset into a register, so we can push the address 2472 regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs 2473 assert(idxregs2); 2474 allocreg(cdb,&idxregs2,®,TYoffset); 2475 2476 const opsave = pcs.Iop; 2477 flagsave = pcs.Iflags; 2478 pcs.Iop = LEA; 2479 pcs.Irm |= modregrm(0,reg,0); 2480 pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed 2481 cdb.gen(pcs); // LEA reg,EA 2482 2483 pcs.Iflags = flagsave; 2484 pcs.Iop = opsave; 2485 } 2486 2487 // registers destroyed by the function call 2488 //used = (mBP | ALLREGS | mES) & ~fregsaved; 2489 regm_t used = 0; // much less code generated this way 2490 2491 code *cs2 = null; 2492 regm_t tosave = used & (keepmsk | idxregs); 2493 for (int i = 0; tosave; i++) 2494 { 2495 regm_t mi = mask(i); 2496 2497 assert(i < REGMAX); 2498 if (mi & tosave) /* i = register to save */ 2499 { 2500 int push,pop; 2501 2502 stackchanged = 1; 2503 if (i == ES) 2504 { push = 0x06; 2505 pop = 0x07; 2506 } 2507 else 2508 { push = 0x50 + i; 2509 pop = push | 8; 2510 } 2511 cdb.gen1(push); // PUSH i 2512 cs2 = cat(gen1(null,pop),cs2); // POP i 2513 tosave &= ~mi; 2514 } 2515 } 2516 2517 // For 16 bit models, push a far pointer 2518 if (I16) 2519 { 2520 int segreg; 2521 2522 switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2523 { case CFes: segreg = 0x06; break; 2524 case CFss: segreg = 0x16; break; 2525 case CFcs: segreg = 0x0E; break; 2526 case 0: segreg = 0x1E; break; // DS 2527 default: 2528 assert(0); 2529 } 2530 2531 // See if we should default to SS: 2532 // (Happens when BP is part of the addressing mode) 2533 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 2534 rm & 2 && (rm & 7) != 7) 2535 { 2536 segreg = 0x16; 2537 if (config.wflags & WFssneds) 2538 pcs.Iflags |= CFss; // because BP won't be there anymore 2539 } 2540 cdb.gen1(segreg); // PUSH segreg 2541 } 2542 2543 cdb.gen1(0x50 + reg); // PUSH reg 2544 2545 // Rewrite the addressing mode in *pcs so it is just 0[reg] 2546 setaddrmode(pcs, idxregs); 2547 pcs.IFL1 = FLoffset; 2548 pcs.IEV1.Vuns = 0; 2549 2550 // Call the validation function 2551 { 2552 makeitextern(getRtlsym(RTLSYM.PTRCHK)); 2553 2554 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 2555 getregs(cdb,used); 2556 // CALL __ptrchk 2557 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.PTRCHK)); 2558 } 2559 2560 cdb.append(cs2); 2561 } 2562 2563 /*********************************** 2564 * Determine if BP can be used as a general purpose register. 2565 * Note parallels between this routine and prolog(). 2566 * Returns: 2567 * 0 can't be used, needed for frame 2568 * mBP can be used 2569 */ 2570 2571 @trusted 2572 regm_t cod3_useBP() 2573 { 2574 tym_t tym; 2575 tym_t tyf; 2576 2577 // Note that DOSX memory model cannot use EBP as a general purpose 2578 // register, as SS != DS. 2579 if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp)) 2580 goto Lcant; 2581 2582 if (anyiasm) 2583 goto Lcant; 2584 2585 tyf = funcsym_p.ty(); 2586 if (tyf & mTYnaked) // if no prolog/epilog for function 2587 goto Lcant; 2588 2589 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh) 2590 { 2591 goto Lcant; // need consistent stack frame 2592 } 2593 2594 tym = tybasic(tyf); 2595 if (tym == TYifunc) 2596 goto Lcant; 2597 2598 stackoffsets(globsym, true); // estimate stack offsets 2599 localsize = Auto.offset + Fast.offset; // an estimate only 2600 // if (localsize) 2601 { 2602 if (!(config.flags4 & CFG4speed) || 2603 config.target_cpu < TARGET_Pentium || 2604 tyfarfunc(tym) || 2605 config.flags & CFGstack || 2606 localsize >= 0x100 || // arbitrary value < 0x1000 2607 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 2608 calledFinally || 2609 Alloca.size 2610 ) 2611 goto Lcant; 2612 } 2613 return mBP; 2614 2615 Lcant: 2616 return 0; 2617 } 2618 2619 /************************************************* 2620 * Generate code segment to be used later to restore a cse 2621 */ 2622 2623 @trusted 2624 bool cse_simple(code *c, elem *e) 2625 { 2626 regm_t regm; 2627 reg_t reg; 2628 int sz = tysize(e.Ety); 2629 2630 if (!I16 && // don't bother with 16 bit code 2631 e.Eoper == OPadd && 2632 sz == REGSIZE && 2633 e.EV.E2.Eoper == OPconst && 2634 e.EV.E1.Eoper == OPvar && 2635 isregvar(e.EV.E1,®m,®) && 2636 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2637 ) 2638 { 2639 memset(c,0,(*c).sizeof); 2640 2641 // Make this an LEA instruction 2642 c.Iop = LEA; 2643 buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns); 2644 if (I64) 2645 { if (sz == 8) 2646 c.Irex |= REX_W; 2647 } 2648 2649 return true; 2650 } 2651 else if (e.Eoper == OPind && 2652 sz <= REGSIZE && 2653 e.EV.E1.Eoper == OPvar && 2654 isregvar(e.EV.E1,®m,®) && 2655 (I32 || I64 || regm & IDXREGS) && 2656 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2657 ) 2658 { 2659 memset(c,0,(*c).sizeof); 2660 2661 // Make this a MOV instruction 2662 c.Iop = (sz == 1) ? 0x8A : 0x8B; // MOV reg,EA 2663 buildEA(c,reg,-1,1,0); 2664 if (sz == 2 && I32) 2665 c.Iflags |= CFopsize; 2666 else if (I64) 2667 { if (sz == 8) 2668 c.Irex |= REX_W; 2669 } 2670 2671 return true; 2672 } 2673 return false; 2674 } 2675 2676 /************************** 2677 * Store `reg` to the common subexpression save area in index `slot`. 2678 * Params: 2679 * cdb = where to write code to 2680 * tym = type of value that's in `reg` 2681 * reg = register to save 2682 * slot = index into common subexpression save area 2683 */ 2684 @trusted 2685 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2686 { 2687 // MOV slot[BP],reg 2688 if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES 2689 { 2690 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2691 const op = xmmstore(tym, aligned); 2692 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2693 return; 2694 } 2695 opcode_t op = STO; // normal mov 2696 if (reg == ES) 2697 { 2698 reg = 0; // the real reg number 2699 op = 0x8C; // segment reg mov 2700 } 2701 cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot); 2702 if (I64) 2703 code_orrex(cdb.last(), REX_W); 2704 } 2705 2706 @trusted 2707 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot) 2708 { 2709 // CMP slot[BP],0 2710 cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM), 2711 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0); 2712 if ((I64 || I32) && sz == 2) 2713 cdb.last().Iflags |= CFopsize; 2714 if (I64 && sz == 8) 2715 code_orrex(cdb.last(), REX_W); 2716 } 2717 2718 @trusted 2719 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2720 { 2721 // MOV reg,slot[BP] 2722 if (isXMMreg(reg) && config.fpxmmregs) 2723 { 2724 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2725 const op = xmmload(tym, aligned); 2726 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2727 return; 2728 } 2729 opcode_t op = LOD; 2730 if (reg == ES) 2731 { 2732 op = 0x8E; 2733 reg = 0; 2734 } 2735 cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot); 2736 if (I64) 2737 code_orrex(cdb.last(), REX_W); 2738 } 2739 2740 /*************************************** 2741 * Gen code for OPframeptr 2742 */ 2743 2744 @trusted 2745 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2746 { 2747 regm_t retregs = *pretregs & allregs; 2748 if (!retregs) 2749 retregs = allregs; 2750 reg_t reg; 2751 allocreg(cdb,&retregs, ®, TYint); 2752 2753 code cs; 2754 cs.Iop = ESCAPE | ESCframeptr; 2755 cs.Iflags = 0; 2756 cs.Irex = 0; 2757 cs.Irm = cast(ubyte)reg; 2758 cdb.gen(&cs); 2759 fixresult(cdb,e,retregs,pretregs); 2760 } 2761 2762 /*************************************** 2763 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 2764 * This value gets cached in the local variable 'localgot'. 2765 */ 2766 2767 @trusted 2768 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2769 { 2770 if (config.exe & (EX_OSX | EX_OSX64)) 2771 { 2772 regm_t retregs = *pretregs & allregs; 2773 if (!retregs) 2774 retregs = allregs; 2775 reg_t reg; 2776 allocreg(cdb,&retregs, ®, TYnptr); 2777 2778 cdb.genc(CALL,0,0,0,FLgot,0); // CALL L1 2779 cdb.gen1(0x58 + reg); // L1: POP reg 2780 2781 fixresult(cdb,e,retregs,pretregs); 2782 } 2783 else if (config.exe & EX_posix) 2784 { 2785 regm_t retregs = *pretregs & allregs; 2786 if (!retregs) 2787 retregs = allregs; 2788 reg_t reg; 2789 allocreg(cdb,&retregs, ®, TYnptr); 2790 2791 cdb.genc2(CALL,0,0); // CALL L1 2792 cdb.gen1(0x58 + reg); // L1: POP reg 2793 2794 // ADD reg,_GLOBAL_OFFSET_TABLE_+3 2795 Symbol *gotsym = Obj.getGOTsym(); 2796 cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym); 2797 /* Because the 2:3 offset from L1: is hardcoded, 2798 * this sequence of instructions must not 2799 * have any instructions in between, 2800 * so set CFvolatile to prevent the scheduler from rearranging it. 2801 */ 2802 code *cgot = cdb.last(); 2803 cgot.Iflags = CFoff | CFvolatile; 2804 cgot.IEV2.Voffset = (reg == AX) ? 2 : 3; 2805 2806 makeitextern(gotsym); 2807 fixresult(cdb,e,retregs,pretregs); 2808 } 2809 else 2810 assert(0); 2811 } 2812 2813 /************************************************** 2814 * Load contents of localgot into EBX. 2815 */ 2816 2817 @trusted 2818 void load_localgot(ref CodeBuilder cdb) 2819 { 2820 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // note: I32 only 2821 { 2822 if (config.flags3 & CFG3pic) 2823 { 2824 if (localgot && !(localgot.Sflags & SFLdead)) 2825 { 2826 localgot.Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator 2827 elem *e = el_var(localgot); 2828 regm_t retregs = mBX; 2829 codelem(cdb,e,&retregs,false); 2830 el_free(e); 2831 } 2832 else 2833 { 2834 elem *e = el_long(TYnptr, 0); 2835 e.Eoper = OPgot; 2836 regm_t retregs = mBX; 2837 codelem(cdb,e,&retregs,false); 2838 el_free(e); 2839 } 2840 } 2841 } 2842 } 2843 2844 /***************************** 2845 * Returns: 2846 * # of bytes stored 2847 */ 2848 2849 2850 @trusted 2851 int obj_namestring(char *p,const(char)* name) 2852 { 2853 size_t len = strlen(name); 2854 if (len > 255) 2855 { 2856 short *ps = cast(short *)p; 2857 p[0] = 0xFF; 2858 p[1] = 0; 2859 ps[1] = cast(short)len; 2860 memcpy(p + 4,name,len); 2861 const int ONS_OHD = 4; // max # of extra bytes added by obj_namestring() 2862 len += ONS_OHD; 2863 } 2864 else 2865 { 2866 p[0] = cast(char)len; 2867 memcpy(p + 1,name,len); 2868 len++; 2869 } 2870 return cast(int)len; 2871 } 2872 2873 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg) 2874 { 2875 return cdb.gen2(op,modregxrmx(3,dstreg,srcreg)); 2876 } 2877 2878 void gentstreg(ref CodeBuilder cdb, uint t) 2879 { 2880 cdb.gen2(0x85,modregxrmx(3,t,t)); // TEST t,t 2881 code_orflag(cdb.last(),CFpsw); 2882 } 2883 2884 void genpush(ref CodeBuilder cdb, reg_t reg) 2885 { 2886 cdb.gen1(0x50 + (reg & 7)); 2887 if (reg & 8) 2888 code_orrex(cdb.last(), REX_B); 2889 } 2890 2891 void genpop(ref CodeBuilder cdb, reg_t reg) 2892 { 2893 cdb.gen1(0x58 + (reg & 7)); 2894 if (reg & 8) 2895 code_orrex(cdb.last(), REX_B); 2896 } 2897 2898 /************************** 2899 * Generate a MOV to,from register instruction. 2900 * Smart enough to dump redundant register moves, and segment 2901 * register moves. 2902 */ 2903 2904 code *genmovreg(uint to,uint from) 2905 { 2906 CodeBuilder cdb; cdb.ctor(); 2907 genmovreg(cdb, to, from); 2908 return cdb.finish(); 2909 } 2910 2911 void genmovreg(ref CodeBuilder cdb,uint to,uint from) 2912 { 2913 genmovreg(cdb, to, from, TYMAX); 2914 } 2915 2916 @trusted 2917 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym) 2918 { 2919 // register kind. ex: GPR,XMM,SEG 2920 static uint _K(uint reg) 2921 { 2922 switch (reg) 2923 { 2924 case ES: return ES; 2925 case XMM15: 2926 case XMM0: .. case XMM7: return XMM0; 2927 case AX: .. case R15: return AX; 2928 default: return reg; 2929 } 2930 } 2931 2932 // kind combination (order kept) 2933 static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); } 2934 2935 if (to != from) 2936 { 2937 if (tym == TYMAX) tym = TYsize_t; // avoid register slicing 2938 switch (_X(to, from)) 2939 { 2940 case _X(AX, AX): 2941 genregs(cdb, 0x89, from, to); // MOV to,from 2942 if (I64 && tysize(tym) >= 8) 2943 code_orrex(cdb.last(), REX_W); 2944 break; 2945 2946 case _X(XMM0, XMM0): // MOVD/Q to,from 2947 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0); 2948 checkSetVex(cdb.last(), tym); 2949 break; 2950 2951 case _X(AX, XMM0): // MOVD/Q to,from 2952 genregs(cdb, STOD, from-XMM0, to); 2953 if (I64 && tysize(tym) >= 8) 2954 code_orrex(cdb.last(), REX_W); 2955 checkSetVex(cdb.last(), tym); 2956 break; 2957 2958 case _X(XMM0, AX): // MOVD/Q to,from 2959 genregs(cdb, LODD, to-XMM0, from); 2960 if (I64 && tysize(tym) >= 8) 2961 code_orrex(cdb.last(), REX_W); 2962 checkSetVex(cdb.last(), tym); 2963 break; 2964 2965 case _X(ES, AX): 2966 assert(tysize(tym) <= REGSIZE); 2967 genregs(cdb, 0x8E, 0, from); 2968 break; 2969 2970 case _X(AX, ES): 2971 assert(tysize(tym) <= REGSIZE); 2972 genregs(cdb, 0x8C, 0, to); 2973 break; 2974 2975 default: 2976 debug printf("genmovreg(to = %s, from = %s)\n" 2977 , regm_str(mask(to)), regm_str(mask(from))); 2978 assert(0); 2979 } 2980 } 2981 } 2982 2983 /*************************************** 2984 * Generate immediate multiply instruction for r1=r2*imm. 2985 * Optimize it into LEA's if we can. 2986 */ 2987 2988 @trusted 2989 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm) 2990 { 2991 // These optimizations should probably be put into pinholeopt() 2992 switch (imm) 2993 { 2994 case 1: 2995 genmovreg(cdb,r1,r2); 2996 break; 2997 2998 case 5: 2999 { 3000 code cs; 3001 cs.Iop = LEA; 3002 cs.Iflags = 0; 3003 cs.Irex = 0; 3004 buildEA(&cs,r2,r2,4,0); 3005 cs.orReg(r1); 3006 cdb.gen(&cs); 3007 break; 3008 } 3009 3010 default: 3011 cdb.genc2(0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 3012 break; 3013 } 3014 } 3015 3016 /****************************** 3017 * Load CX with the value of _AHSHIFT. 3018 */ 3019 3020 void genshift(ref CodeBuilder cdb) 3021 { 3022 version (SCPP) 3023 { 3024 // Set up ahshift to trick ourselves into giving the right fixup, 3025 // which must be seg-relative, external frame, external target. 3026 cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM.AHSHIFT)); 3027 cdb.last().Iflags |= CFoff; 3028 } 3029 else 3030 assert(0); 3031 } 3032 3033 /****************************** 3034 * Move constant value into reg. 3035 * Take advantage of existing values in registers. 3036 * If flags & mPSW 3037 * set flags based on result 3038 * Else if flags & 8 3039 * do not disturb flags 3040 * Else 3041 * don't care about flags 3042 * If flags & 1 then byte move 3043 * If flags & 2 then short move (for I32 and I64) 3044 * If flags & 4 then don't disturb unused portion of register 3045 * If flags & 16 then reg is a byte register AL..BH 3046 * If flags & 64 (0x40) then 64 bit move (I64 only) 3047 * Returns: 3048 * code (if any) generated 3049 */ 3050 3051 @trusted 3052 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) 3053 { 3054 reg_t r; 3055 regm_t mreg; 3056 3057 //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags); 3058 3059 regm_t regm = regcon.immed.mval & mask(reg); 3060 targ_size_t regv = regcon.immed.value[reg]; 3061 3062 if (flags & 1) // 8 bits 3063 { 3064 value &= 0xFF; 3065 regm &= BYTEREGS; 3066 3067 // If we already have the right value in the right register 3068 if (regm && (regv & 0xFF) == value) 3069 goto L2; 3070 3071 if (flags & 16 && reg & 4 && // if an H byte register 3072 regcon.immed.mval & mask(reg & 3) && 3073 (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value) 3074 goto L2; 3075 3076 /* Avoid byte register loads to avoid dependency stalls. 3077 */ 3078 if ((I32 || I64) && 3079 config.target_cpu >= TARGET_PentiumPro && !(flags & 4)) 3080 goto L3; 3081 3082 // See if another register has the right value 3083 r = 0; 3084 for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1) 3085 { 3086 if (mreg & 1) 3087 { 3088 if ((regcon.immed.value[r] & 0xFF) == value) 3089 { 3090 genregs(cdb,0x8A,reg,r); // MOV regL,rL 3091 if (I64 && reg >= 4 || r >= 4) 3092 code_orrex(cdb.last(), REX); 3093 goto L2; 3094 } 3095 if (!(I64 && reg >= 4) && 3096 r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value) 3097 { 3098 genregs(cdb,0x8A,reg,r | 4); // MOV regL,rH 3099 goto L2; 3100 } 3101 } 3102 r++; 3103 } 3104 3105 if (value == 0 && !(flags & 8)) 3106 { 3107 if (!(flags & 4) && // if we can set the whole register 3108 !(flags & 16 && reg & 4)) // and reg is not an H register 3109 { 3110 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3111 regimmed_set(reg,value); 3112 regv = 0; 3113 } 3114 else 3115 genregs(cdb,0x30,reg,reg); // XOR regL,regL 3116 flags &= ~mPSW; // flags already set by XOR 3117 } 3118 else 3119 { 3120 cdb.genc2(0xC6,modregrmx(3,0,reg),value); // MOV regL,value 3121 if (reg >= 4 && I64) 3122 { 3123 code_orrex(cdb.last(), REX); 3124 } 3125 } 3126 L2: 3127 if (flags & mPSW) 3128 genregs(cdb,0x84,reg,reg); // TEST regL,regL 3129 3130 if (regm) 3131 // Set just the 'L' part of the register value 3132 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value); 3133 else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3)) 3134 // Set just the 'H' part of the register value 3135 regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8)); 3136 return; 3137 } 3138 L3: 3139 if (I16) 3140 value = cast(targ_short) value; // sign-extend MSW 3141 else if (I32) 3142 value = cast(targ_int) value; 3143 3144 if (!I16 && flags & 2) // load 16 bit value 3145 { 3146 value &= 0xFFFF; 3147 if (value && !(flags & mPSW)) 3148 { 3149 cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value 3150 regimmed_set(reg, value); 3151 return; 3152 } 3153 } 3154 3155 // If we already have the right value in the right register 3156 if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) 3157 { 3158 if (flags & mPSW) 3159 gentstreg(cdb,reg); 3160 } 3161 else if (flags & 64 && regm && regv == value) 3162 { // Look at the full 64 bits 3163 if (flags & mPSW) 3164 { 3165 gentstreg(cdb,reg); 3166 code_orrex(cdb.last(), REX_W); 3167 } 3168 } 3169 else 3170 { 3171 if (flags & mPSW) 3172 { 3173 switch (value) 3174 { 3175 case 0: 3176 genregs(cdb,0x31,reg,reg); 3177 break; 3178 3179 case 1: 3180 if (I64) 3181 goto L4; 3182 genregs(cdb,0x31,reg,reg); 3183 goto inc; 3184 3185 case ~cast(targ_size_t)0: 3186 if (I64) 3187 goto L4; 3188 genregs(cdb,0x31,reg,reg); 3189 goto dec; 3190 3191 default: 3192 L4: 3193 if (flags & 64) 3194 { 3195 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3196 gentstreg(cdb,reg); 3197 code_orrex(cdb.last(), REX_W); 3198 } 3199 else 3200 { 3201 value &= 0xFFFFFFFF; 3202 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3203 gentstreg(cdb,reg); 3204 } 3205 break; 3206 } 3207 } 3208 else 3209 { 3210 // Look for single byte conversion 3211 if (regcon.immed.mval & mAX) 3212 { 3213 if (I32) 3214 { 3215 if (reg == AX && value == cast(targ_short) regv) 3216 { 3217 cdb.gen1(0x98); // CWDE 3218 goto done; 3219 } 3220 if (reg == DX && 3221 value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) && 3222 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3223 ) 3224 { 3225 cdb.gen1(0x99); // CDQ 3226 goto done; 3227 } 3228 } 3229 else if (I16) 3230 { 3231 if (reg == AX && 3232 cast(targ_short) value == cast(byte) regv) 3233 { 3234 cdb.gen1(0x98); // CBW 3235 goto done; 3236 } 3237 3238 if (reg == DX && 3239 cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) && 3240 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3241 ) 3242 { 3243 cdb.gen1(0x99); // CWD 3244 goto done; 3245 } 3246 } 3247 } 3248 if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486) 3249 { 3250 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3251 goto done; 3252 } 3253 3254 if (!I64 && regm && !(flags & 8)) 3255 { 3256 if (regv + 1 == value || 3257 // Catch case of (0xFFFF+1 == 0) for 16 bit compiles 3258 (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value)) 3259 { 3260 inc: 3261 cdb.gen1(0x40 + reg); // INC reg 3262 goto done; 3263 } 3264 if (regv - 1 == value) 3265 { 3266 dec: 3267 cdb.gen1(0x48 + reg); // DEC reg 3268 goto done; 3269 } 3270 } 3271 3272 // See if another register has the right value 3273 r = 0; 3274 for (mreg = regcon.immed.mval; mreg; mreg >>= 1) 3275 { 3276 debug 3277 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]); 3278 3279 if (mreg & 1 && regcon.immed.value[r] == value) 3280 { 3281 genmovreg(cdb,reg,r); 3282 goto done; 3283 } 3284 r++; 3285 } 3286 3287 if (value == 0 && !(flags & 8)) 3288 { 3289 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3290 } 3291 else 3292 { // See if we can just load a byte 3293 if (regm & BYTEREGS && 3294 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro) 3295 ) 3296 { 3297 if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF)) 3298 { 3299 movregconst(cdb,reg,value,(flags & 8) |4|1); // load regL 3300 return; 3301 } 3302 if (regm & (mAX|mBX|mCX|mDX) && 3303 (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) && 3304 !I64) 3305 { 3306 movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH 3307 return; 3308 } 3309 } 3310 if (flags & 64) 3311 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3312 else 3313 { 3314 value &= 0xFFFFFFFF; 3315 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3316 } 3317 } 3318 } 3319 done: 3320 regimmed_set(reg,value); 3321 } 3322 } 3323 3324 /************************** 3325 * Generate a jump instruction. 3326 */ 3327 3328 @trusted 3329 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ) 3330 { 3331 code cs; 3332 cs.Iop = op & 0xFF; 3333 cs.Iflags = 0; 3334 cs.Irex = 0; 3335 if (op != JMP && op != 0xE8) // if not already long branch 3336 cs.Iflags = CFjmp16; // assume long branch for op = 0x7x 3337 cs.IFL2 = cast(ubyte)fltarg; // FLblock (or FLcode) 3338 cs.IEV2.Vblock = targ; // target block (or code) 3339 if (fltarg == FLcode) 3340 (cast(code *)targ).Iflags |= CFtarg; 3341 3342 if (config.flags4 & CFG4fastfloat) // if fast floating point 3343 { 3344 cdb.gen(&cs); 3345 return; 3346 } 3347 3348 switch (op & 0xFF00) // look at second jump opcode 3349 { 3350 // The JP and JNP come from floating point comparisons 3351 case JP << 8: 3352 cdb.gen(&cs); 3353 cs.Iop = JP; 3354 cdb.gen(&cs); 3355 break; 3356 3357 case JNP << 8: 3358 { 3359 // Do a JP around the jump instruction 3360 code *cnop = gennop(null); 3361 genjmp(cdb,JP,FLcode,cast(block *) cnop); 3362 cdb.gen(&cs); 3363 cdb.append(cnop); 3364 break; 3365 } 3366 3367 case 1 << 8: // toggled no jump 3368 case 0 << 8: 3369 cdb.gen(&cs); 3370 break; 3371 3372 default: 3373 debug 3374 printf("jop = x%x\n",op); 3375 assert(0); 3376 } 3377 } 3378 3379 /********************************************* 3380 * Generate first part of prolog for interrupt function. 3381 */ 3382 @trusted 3383 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf) 3384 { 3385 static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ]; 3386 static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53, 3387 0x54,0x55,0x56,0x57, 3388 0x1E,0x06,0 ]; 3389 3390 immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 3391 do 3392 cdb.gen1(*p); 3393 while (*++p); 3394 3395 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3396 if (localsize) 3397 cod3_stackadj(cdb, cast(int)localsize); 3398 3399 *tyf |= mTYloadds; 3400 } 3401 3402 @trusted 3403 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds) 3404 { 3405 /* Determine if we need to reload DS */ 3406 if (tyf & mTYloadds) 3407 { 3408 if (!pushds) // if not already pushed 3409 cdb.gen1(0x1E); // PUSH DS 3410 spoff += _tysize[TYint]; 3411 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV AX,DGROUP 3412 code *c = cdb.last(); 3413 c.IEV2.Vseg = DATA; 3414 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3415 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3416 useregs(mAX); 3417 } 3418 3419 if (tym == TYifunc) 3420 cdb.gen1(0xFC); // CLD 3421 } 3422 3423 @trusted 3424 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds) 3425 { 3426 int wflags = config.wflags; 3427 if (wflags & WFreduced && !(*tyf & mTYexport)) 3428 { // reduced prolog/epilog for non-exported functions 3429 wflags &= ~(WFdgroup | WFds | WFss); 3430 } 3431 3432 getregsNoSave(mAX); // should not have any value in AX 3433 3434 int segreg; 3435 switch (wflags & (WFdgroup | WFds | WFss)) 3436 { 3437 case WFdgroup: // MOV AX,DGROUP 3438 { 3439 if (wflags & WFreduced) 3440 *tyf &= ~mTYloadds; // remove redundancy 3441 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); 3442 code *c = cdb.last(); 3443 c.IEV2.Vseg = DATA; 3444 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3445 break; 3446 } 3447 3448 case WFss: 3449 segreg = 2; // SS 3450 goto Lmovax; 3451 3452 case WFds: 3453 segreg = 3; // DS 3454 Lmovax: 3455 cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg 3456 if (wflags & WFds) 3457 cdb.gen1(0x90); // NOP 3458 break; 3459 3460 case 0: 3461 break; 3462 3463 default: 3464 debug 3465 printf("config.wflags = x%x\n",config.wflags); 3466 assert(0); 3467 } 3468 if (wflags & WFincbp) 3469 cdb.gen1(0x40 + BP); // INC BP 3470 cdb.gen1(0x50 + BP); // PUSH BP 3471 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3472 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 3473 { 3474 cdb.gen1(0x1E); // PUSH DS 3475 *pushds = true; 3476 BPoff = -REGSIZE; 3477 } 3478 if (wflags & (WFds | WFss | WFdgroup)) 3479 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3480 } 3481 3482 /********************************************** 3483 * Set up frame register. 3484 * Params: 3485 * cdb = write generated code here 3486 * farfunc = true if a far function 3487 * enter = set to true if ENTER instruction can be used, false otherwise 3488 * xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer 3489 * cfa_offset = set to frame pointer's offset from the CFA 3490 * Returns: 3491 * generated code 3492 */ 3493 @trusted 3494 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset) 3495 { 3496 //printf("prolog_frame\n"); 3497 cfa_offset = 0; 3498 3499 if (0 && config.exe == EX_WIN64) 3500 { 3501 // PUSH RBP 3502 // LEA RBP,0[RSP] 3503 cdb. gen1(0x50 + BP); 3504 cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0); 3505 enter = false; 3506 return; 3507 } 3508 3509 if (config.wflags & WFincbp && farfunc) 3510 cdb.gen1(0x40 + BP); // INC BP 3511 if (config.target_cpu < TARGET_80286 || 3512 config.exe & (EX_posix | EX_WIN64) || 3513 !localsize || 3514 config.flags & CFGstack || 3515 (xlocalsize >= 0x1000 && config.exe & EX_flat) || 3516 localsize >= 0x10000 || 3517 (NTEXCEPTIONS == 2 && 3518 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) || 3519 (config.target_cpu >= TARGET_80386 && 3520 config.flags4 & CFG4speed) 3521 ) 3522 { 3523 cdb.gen1(0x50 + BP); // PUSH BP 3524 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3525 if (I64) 3526 code_orrex(cdb.last(), REX_W); // MOV RBP,RSP 3527 if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes) 3528 // Don't reorder instructions, as dwarf CFA relies on it 3529 code_orflag(cdb.last(), CFvolatile); 3530 static if (NTEXCEPTIONS == 2) 3531 { 3532 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH)) 3533 { 3534 nteh_prolog(cdb); 3535 int sz = nteh_contextsym_size(); 3536 assert(sz != 0); // should be 5*4, not 0 3537 xlocalsize -= sz; // sz is already subtracted from ESP 3538 // by nteh_prolog() 3539 } 3540 } 3541 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3542 config.ehmethod == EHmethod.EH_DWARF) 3543 { 3544 int off = 2 * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3545 dwarf_CFA_set_loc(1); // address after PUSH EBP 3546 dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP] 3547 dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP] 3548 dwarf_CFA_set_loc(I64 ? 4 : 3); // address after MOV EBP,ESP 3549 /* Oddly, the CFA is not the same as the frame pointer, 3550 * which is why the offset of BP is set to 8 3551 */ 3552 dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP] 3553 cfa_offset = off; // remember the difference between the CFA and the frame pointer 3554 } 3555 enter = false; /* do not use ENTER instruction */ 3556 } 3557 else 3558 enter = true; 3559 } 3560 3561 /********************************************** 3562 * Enforce stack alignment. 3563 * Input: 3564 * cdb code builder. 3565 * Returns: 3566 * generated code 3567 */ 3568 @trusted 3569 void prolog_stackalign(ref CodeBuilder cdb) 3570 { 3571 if (!enforcealign) 3572 return; 3573 3574 const offset = (hasframe ? 2 : 1) * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3575 if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN) 3576 cod3_stackalign(cdb, STACKALIGN); 3577 } 3578 3579 @trusted 3580 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc) 3581 { 3582 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3583 3584 bool check; 3585 if (config.exe & (EX_LINUX | EX_LINUX64)) 3586 check = false; // seems that Linux doesn't need to fault in stack pages 3587 else 3588 check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check 3589 || (config.exe & (EX_windos & EX_flat) && xlocalsize >= 0x1000); 3590 3591 if (check) 3592 { 3593 if (I16) 3594 { 3595 // BUG: Won't work if parameter is passed in AX 3596 movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize 3597 makeitextern(getRtlsym(RTLSYM.CHKSTK)); 3598 // CALL _chkstk 3599 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.CHKSTK)); 3600 useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM.CHKSTK).Sregsaved); 3601 } 3602 else 3603 { 3604 /* Watch out for 64 bit code where EDX is passed as a register parameter 3605 */ 3606 reg_t reg = I64 ? R11 : DX; // scratch register 3607 3608 /* MOV EDX, xlocalsize/0x1000 3609 * L1: SUB ESP, 0x1000 3610 * TEST [ESP],ESP 3611 * DEC EDX 3612 * JNE L1 3613 * SUB ESP, xlocalsize % 0x1000 3614 */ 3615 movregconst(cdb, reg, xlocalsize / 0x1000, false); 3616 cod3_stackadj(cdb, 0x1000); 3617 code_orflag(cdb.last(), CFtarg2); 3618 cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP)); 3619 if (I64) 3620 { cdb.gen2(0xFF, modregrmx(3,1,R11)); // DEC R11D 3621 cdb.genc2(JNE,0,cast(targ_uns)-15); 3622 } 3623 else 3624 { cdb.gen1(0x48 + DX); // DEC EDX 3625 cdb.genc2(JNE,0,cast(targ_uns)-12); 3626 } 3627 regimmed_set(reg,0); // reg is now 0 3628 cod3_stackadj(cdb, xlocalsize & 0xFFF); 3629 useregs(mask(reg)); 3630 } 3631 } 3632 else 3633 { 3634 if (enter) 3635 { // ENTER xlocalsize,0 3636 cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0); 3637 assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data 3638 } 3639 else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized) 3640 { 3641 cdb. gen1(0x50 + pushallocreg); // PUSH AX 3642 // Do this to prevent an -x[EBP] to be moved in 3643 // front of the push. 3644 code_orflag(cdb.last(),CFvolatile); 3645 *pushalloc = true; 3646 } 3647 else 3648 cod3_stackadj(cdb, xlocalsize); 3649 } 3650 } 3651 3652 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc) 3653 { 3654 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3655 if (xlocalsize == REGSIZE) 3656 { 3657 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3658 *pushalloc = true; 3659 } 3660 else if (xlocalsize == 2 * REGSIZE) 3661 { 3662 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3663 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3664 *pushalloc = true; 3665 } 3666 else 3667 cod3_stackadj(cdb, xlocalsize); 3668 } 3669 3670 @trusted 3671 void prolog_setupalloca(ref CodeBuilder cdb) 3672 { 3673 //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n", 3674 //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment); 3675 // Set up magic parameter for alloca() 3676 // MOV -REGSIZE[BP],localsize - BPoff 3677 cdb.genc(0xC7,modregrm(2,0,BPRM), 3678 FLconst,Alloca.offset + BPoff, 3679 FLconst,localsize - BPoff); 3680 if (I64) 3681 code_orrex(cdb.last(), REX_W); 3682 } 3683 3684 /************************************** 3685 * Save registers that the function destroys, 3686 * but that the ABI says should be preserved across 3687 * function calls. 3688 * 3689 * Emit Dwarf info for these saves. 3690 * Params: 3691 * cdb = append generated instructions to this 3692 * topush = mask of registers to push 3693 * cfa_offset = offset of frame pointer from CFA 3694 */ 3695 3696 @trusted 3697 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset) 3698 { 3699 if (pushoffuse) 3700 { 3701 // Save to preallocated section in the stack frame 3702 int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes 3703 int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save 3704 targ_size_t xmmoffset = pushoff + BPoff; 3705 if (!hasframe || enforcealign) 3706 xmmoffset += EBPtoESP; 3707 targ_size_t gpoffset = xmmoffset + xmmtopush * 16; 3708 while (topush) 3709 { 3710 reg_t reg = findreg(topush); 3711 topush &= ~mask(reg); 3712 if (isXMMreg(reg)) 3713 { 3714 if (hasframe && !enforcealign) 3715 { 3716 // MOVUPD xmmoffset[EBP],xmm 3717 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3718 } 3719 else 3720 { 3721 // MOVUPD xmmoffset[ESP],xmm 3722 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3723 } 3724 xmmoffset += 16; 3725 } 3726 else 3727 { 3728 if (hasframe && !enforcealign) 3729 { 3730 // MOV gpoffset[EBP],reg 3731 cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3732 } 3733 else 3734 { 3735 // MOV gpoffset[ESP],reg 3736 cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3737 } 3738 if (I64) 3739 code_orrex(cdb.last(), REX_W); 3740 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3741 config.ehmethod == EHmethod.EH_DWARF) 3742 { // Emit debug_frame data giving location of saved register 3743 code *c = cdb.finish(); 3744 pinholeopt(c, null); 3745 dwarf_CFA_set_loc(calcblksize(c)); // address after save 3746 dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset)); 3747 cdb.reset(); 3748 cdb.append(c); 3749 } 3750 gpoffset += REGSIZE; 3751 } 3752 } 3753 } 3754 else 3755 { 3756 while (topush) /* while registers to push */ 3757 { 3758 reg_t reg = findreg(topush); 3759 topush &= ~mask(reg); 3760 if (isXMMreg(reg)) 3761 { 3762 // SUB RSP,16 3763 cod3_stackadj(cdb, 16); 3764 // MOVUPD 0[RSP],xmm 3765 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3766 EBPtoESP += 16; 3767 spoff += 16; 3768 } 3769 else 3770 { 3771 genpush(cdb, reg); 3772 EBPtoESP += REGSIZE; 3773 spoff += REGSIZE; 3774 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3775 config.ehmethod == EHmethod.EH_DWARF) 3776 { // Emit debug_frame data giving location of saved register 3777 // relative to 0[EBP] 3778 code *c = cdb.finish(); 3779 pinholeopt(c, null); 3780 dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg 3781 dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset); 3782 cdb.reset(); 3783 cdb.append(c); 3784 } 3785 } 3786 } 3787 } 3788 } 3789 3790 /************************************** 3791 * Undo prolog_saveregs() 3792 */ 3793 3794 @trusted 3795 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop) 3796 { 3797 debug 3798 if (topop & ~(XMMREGS | 0xFFFF)) 3799 printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg)); 3800 3801 assert(!(topop & ~(XMMREGS | 0xFFFF))); 3802 if (pushoffuse) 3803 { 3804 // Save to preallocated section in the stack frame 3805 int xmmtopop = popcnt(topop & XMMREGS); // XMM regs take 16 bytes 3806 int gptopop = popcnt(topop) - xmmtopop; // general purpose registers to save 3807 targ_size_t xmmoffset = pushoff + BPoff; 3808 if (!hasframe || enforcealign) 3809 xmmoffset += EBPtoESP; 3810 targ_size_t gpoffset = xmmoffset + xmmtopop * 16; 3811 while (topop) 3812 { 3813 reg_t reg = findreg(topop); 3814 topop &= ~mask(reg); 3815 if (isXMMreg(reg)) 3816 { 3817 if (hasframe && !enforcealign) 3818 { 3819 // MOVUPD xmm,xmmoffset[EBP] 3820 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3821 } 3822 else 3823 { 3824 // MOVUPD xmm,xmmoffset[ESP] 3825 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3826 } 3827 xmmoffset += 16; 3828 } 3829 else 3830 { 3831 if (hasframe && !enforcealign) 3832 { 3833 // MOV reg,gpoffset[EBP] 3834 cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3835 } 3836 else 3837 { 3838 // MOV reg,gpoffset[ESP] 3839 cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3840 } 3841 if (I64) 3842 code_orrex(cdb.last(), REX_W); 3843 gpoffset += REGSIZE; 3844 } 3845 } 3846 } 3847 else 3848 { 3849 reg_t reg = I64 ? XMM7 : DI; 3850 if (!(topop & XMMREGS)) 3851 reg = R15; 3852 regm_t regm = 1 << reg; 3853 3854 while (topop) 3855 { if (topop & regm) 3856 { 3857 if (isXMMreg(reg)) 3858 { 3859 // MOVUPD xmm,0[RSP] 3860 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3861 // ADD RSP,16 3862 cod3_stackadj(cdb, -16); 3863 } 3864 else 3865 { 3866 cdb.gen1(0x58 + (reg & 7)); // POP reg 3867 if (reg & 8) 3868 code_orrex(cdb.last(), REX_B); 3869 } 3870 topop &= ~regm; 3871 } 3872 regm >>= 1; 3873 reg--; 3874 } 3875 } 3876 } 3877 3878 version (SCPP) 3879 { 3880 @trusted 3881 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved) 3882 { 3883 Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_PRO_F : RTLSYM.TRACE_PRO_N); 3884 makeitextern(s); 3885 cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace 3886 if (!I16) 3887 code_orflag(cdb.last(),CFoff | CFselfrel); 3888 /* Embedding the function name inline after the call works, but it 3889 * makes disassembling the code annoying. 3890 */ 3891 static if (ELFOBJ || MACHOBJ) 3892 { 3893 // Generate length prefixed name that is recognized by profiler 3894 size_t len = strlen(funcsym_p.Sident); 3895 char *buffer = cast(char *)malloc(len + 4); 3896 assert(buffer); 3897 if (len <= 254) 3898 { 3899 buffer[0] = len; 3900 memcpy(buffer + 1, funcsym_p.Sident, len); 3901 len++; 3902 } 3903 else 3904 { 3905 buffer[0] = 0xFF; 3906 buffer[1] = 0; 3907 buffer[2] = len & 0xFF; 3908 buffer[3] = len >> 8; 3909 memcpy(buffer + 4, funcsym_p.Sident, len); 3910 len += 4; 3911 } 3912 cdb.genasm(buffer, len); // append func name 3913 free(buffer); 3914 } 3915 else 3916 { 3917 char [IDMAX+IDOHD+1] name = void; 3918 size_t len = objmod.mangle(funcsym_p,name.ptr); 3919 assert(len < name.length); 3920 cdb.genasm(name.ptr,len); // append func name 3921 } 3922 *regsaved = s.Sregsaved; 3923 } 3924 } 3925 3926 /****************************** 3927 * Generate special varargs prolog for Posix 64 bit systems. 3928 * Params: 3929 * cdb = sink for generated code 3930 * sv = symbol for __va_argsave 3931 * namedargs = registers that named parameters (not ... arguments) were passed in. 3932 */ 3933 @trusted 3934 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs) 3935 { 3936 /* Generate code to move any arguments passed in registers into 3937 * the stack variable __va_argsave, 3938 * so we can reference it via pointers through va_arg(). 3939 * struct __va_argsave_t { 3940 * size_t[6] regs; 3941 * real[8] fpregs; 3942 * uint offset_regs; 3943 * uint offset_fpregs; 3944 * void* stack_args; 3945 * void* reg_args; 3946 * } 3947 * The MOVAPS instructions seg fault if data is not aligned on 3948 * 16 bytes, so this gives us a nice check to ensure no mistakes. 3949 MOV voff+0*8[RBP],EDI 3950 MOV voff+1*8[RBP],ESI 3951 MOV voff+2*8[RBP],RDX 3952 MOV voff+3*8[RBP],RCX 3953 MOV voff+4*8[RBP],R8 3954 MOV voff+5*8[RBP],R9 3955 MOVZX EAX,AL // AL = 0..8, # of XMM registers used 3956 SHL EAX,2 // 4 bytes for each MOVAPS 3957 LEA R11,offset L2[RIP] 3958 SUB R11,RAX 3959 LEA RAX,voff+6*8+0x7F[RBP] 3960 JMP R11d 3961 MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used 3962 MOVAPS -0x1F[RAX],XMM6 3963 MOVAPS -0x2F[RAX],XMM5 3964 MOVAPS -0x3F[RAX],XMM4 3965 MOVAPS -0x4F[RAX],XMM3 3966 MOVAPS -0x5F[RAX],XMM2 3967 MOVAPS -0x6F[RAX],XMM1 3968 MOVAPS -0x7F[RAX],XMM0 3969 L2: 3970 MOV 1[RAX],offset_regs // set __va_argsave.offset_regs 3971 MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs 3972 LEA R11, Para.size+Para.offset[RBP] 3973 MOV 9[RAX],R11 // set __va_argsave.stack_args 3974 SUB RAX,6*8+0x7F // point to start of __va_argsave 3975 MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3976 * RAX and R11 are destroyed. 3977 */ 3978 3979 /* Save registers into the voff area on the stack 3980 */ 3981 targ_size_t voff = Auto.size + BPoff + sv.Soffset; // EBP offset of start of sv 3982 const int vregnum = 6; 3983 const uint vsize = vregnum * 8 + 8 * 16; 3984 3985 static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ]; 3986 3987 if (!hasframe || enforcealign) 3988 voff += EBPtoESP; 3989 3990 for (int i = 0; i < vregnum; i++) 3991 { 3992 uint r = regs[i]; 3993 if (!(mask(r) & namedargs)) // unnamed arguments would be the ... ones 3994 { 3995 uint ea = (REX_W << 16) | modregxrm(2,r,BPRM); 3996 if (!hasframe || enforcealign) 3997 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4); 3998 cdb.genc1(0x89,ea,FLconst,voff + i*8); 3999 } 4000 } 4001 4002 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4003 cdb.genc2(0xC1,modregrm(3,4,AX),2); // SHL EAX,2 4004 int raxoff = cast(int)(voff+6*8+0x7F); 4005 uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A; 4006 if (!hasframe || enforcealign) 4007 L2offset += 1; // +1 for sib byte 4008 // LEA R11,offset L2[RIP] 4009 cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset); 4010 genregs(cdb,0x29,AX,R11); // SUB R11,RAX 4011 code_orrex(cdb.last(), REX_W); 4012 // LEA RAX,voff+vsize-6*8-16+0x7F[RBP] 4013 uint ea = (REX_W << 16) | modregrm(2,AX,BPRM); 4014 if (!hasframe || enforcealign) 4015 // add sib byte for [RSP] addressing 4016 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4); 4017 cdb.genc1(LEA,ea,FLconst,raxoff); 4018 cdb.gen2(0xFF,modregrmx(3,4,R11)); // JMP R11d 4019 for (int i = 0; i < 8; i++) 4020 { 4021 // MOVAPS -15-16*i[RAX],XMM7-i 4022 cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i); 4023 } 4024 4025 /* Compute offset_regs and offset_fpregs 4026 */ 4027 uint offset_regs = 0; 4028 uint offset_fpregs = vregnum * 8; 4029 for (int i = AX; i <= XMM7; i++) 4030 { 4031 regm_t m = mask(i); 4032 if (m & namedargs) 4033 { 4034 if (m & (mDI|mSI|mDX|mCX|mR8|mR9)) 4035 offset_regs += 8; 4036 else if (m & XMMREGS) 4037 offset_fpregs += 16; 4038 namedargs &= ~m; 4039 if (!namedargs) 4040 break; 4041 } 4042 } 4043 // MOV 1[RAX],offset_regs 4044 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs); 4045 4046 // MOV 5[RAX],offset_fpregs 4047 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs); 4048 4049 // LEA R11, Para.size+Para.offset[RBP] 4050 ea = modregxrm(2,R11,BPRM); 4051 if (!hasframe) 4052 ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4); 4053 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4054 cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset); 4055 4056 // MOV 9[RAX],R11 4057 cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9); 4058 4059 // SUB RAX,6*8+0x7F // point to start of __va_argsave 4060 cdb.genc2(0x2D,0,6*8+0x7F); 4061 code_orrex(cdb.last(), REX_W); 4062 4063 // MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 4064 cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8); 4065 4066 pinholeopt(cdb.peek(), null); 4067 useregs(mAX|mR11); 4068 } 4069 4070 void prolog_gen_win64_varargs(ref CodeBuilder cdb) 4071 { 4072 /* The Microsoft scheme. 4073 * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29 4074 * Copy registers onto stack. 4075 mov 8[RSP],RCX 4076 mov 010h[RSP],RDX 4077 mov 018h[RSP],R8 4078 mov 020h[RSP],R9 4079 */ 4080 } 4081 4082 /************************************ 4083 * Params: 4084 * cdb = generated code sink 4085 * tf = what's the type of the function 4086 * pushalloc = use PUSH to allocate on the stack rather than subtracting from SP 4087 * namedargs = set to the registers that named parameters were passed in 4088 */ 4089 @trusted 4090 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs) 4091 { 4092 //printf("prolog_loadparams() %s\n", funcsym_p.Sident.ptr); 4093 debug 4094 for (SYMIDX si = 0; si < globsym.length; si++) 4095 { 4096 Symbol *s = globsym[si]; 4097 if (debugr && (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)) 4098 { 4099 printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr, 4100 regm_str(mask(s.Spreg)), 4101 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2)))); 4102 if (s.Sfl == FLreg) 4103 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw))); 4104 } 4105 } 4106 4107 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 4108 4109 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned 4110 * registers into their stack locations. 4111 */ 4112 regm_t shadowregm = 0; 4113 for (SYMIDX si = 0; si < globsym.length; si++) 4114 { 4115 Symbol *s = globsym[si]; 4116 uint sz = cast(uint)type_size(s.Stype); 4117 4118 if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl != FLreg)) 4119 continue; 4120 // Argument is passed in a register 4121 4122 type *t = s.Stype; 4123 type *t2 = null; 4124 4125 tym_t tyb = tybasic(t.Tty); 4126 4127 // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d 4128 // 4129 // Find suitable SROA based on the element type 4130 // (Don't put volatile parameters in registers on Windows) 4131 if (tyb == TYarray && (config.exe != EX_WIN64 || !(t.Tty & mTYvolatile))) 4132 { 4133 type *targ1; 4134 argtypes(t, targ1, t2); 4135 if (targ1) 4136 t = targ1; 4137 } 4138 4139 // If struct just wraps another type 4140 if (tyb == TYstruct) 4141 { 4142 // On windows 64 bits, structs occupy a general purpose register, 4143 // regardless of the struct size or the number & types of its fields. 4144 if (config.exe != EX_WIN64) 4145 { 4146 type *targ1 = t.Ttag.Sstruct.Sarg1type; 4147 t2 = t.Ttag.Sstruct.Sarg2type; 4148 if (targ1) 4149 t = targ1; 4150 } 4151 } 4152 4153 if (Symbol_Sisdead(*s, anyiasm)) 4154 { 4155 // Ignore it, as it is never referenced 4156 continue; 4157 } 4158 4159 targ_size_t offset = Fast.size + BPoff; 4160 if (s.Sclass == SC.shadowreg) 4161 offset = Para.size; 4162 offset += s.Soffset; 4163 if (!hasframe || (enforcealign && s.Sclass != SC.shadowreg)) 4164 offset += EBPtoESP; 4165 4166 reg_t preg = s.Spreg; 4167 foreach (i; 0 .. 2) // twice, once for each possible parameter register 4168 { 4169 static type* type_arrayBase(type* ta) 4170 { 4171 while (tybasic(ta.Tty) == TYarray) 4172 ta = ta.Tnext; 4173 return ta; 4174 } 4175 shadowregm |= mask(preg); 4176 const opcode_t op = isXMMreg(preg) 4177 ? xmmstore(type_arrayBase(t).Tty) 4178 : 0x89; // MOV x[EBP],preg 4179 if (!(pushalloc && preg == pushallocreg) || s.Sclass == SC.shadowreg) 4180 { 4181 if (hasframe && (!enforcealign || s.Sclass == SC.shadowreg)) 4182 { 4183 // MOV x[EBP],preg 4184 cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset); 4185 if (isXMMreg(preg)) 4186 { 4187 checkSetVex(cdb.last(), t.Tty); 4188 } 4189 else 4190 { 4191 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n", 4192 // s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz); 4193 if (I64 && sz > 4) 4194 code_orrex(cdb.last(), REX_W); 4195 } 4196 } 4197 else 4198 { 4199 // MOV offset[ESP],preg 4200 // BUG: byte size? 4201 cdb.genc1(op, 4202 (modregrm(0,4,SP) << 8) | 4203 modregxrm(2,preg,4),FLconst,offset); 4204 if (isXMMreg(preg)) 4205 { 4206 checkSetVex(cdb.last(), t.Tty); 4207 } 4208 else 4209 { 4210 if (I64 && sz > 4) 4211 cdb.last().Irex |= REX_W; 4212 } 4213 } 4214 } 4215 preg = s.Spreg2; 4216 if (preg == NOREG) 4217 break; 4218 if (t2) 4219 t = t2; 4220 offset += REGSIZE; 4221 } 4222 } 4223 4224 if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype)) 4225 { 4226 /* The Microsoft scheme. 4227 * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29 4228 * Copy registers onto stack. 4229 mov 8[RSP],RCX or XMM0 4230 mov 010h[RSP],RDX or XMM1 4231 mov 018h[RSP],R8 or XMM2 4232 mov 020h[RSP],R9 or XMM3 4233 */ 4234 static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ]; 4235 for (int i = 0; i < vregs.length; ++i) 4236 { 4237 uint preg = vregs[i]; 4238 uint offset = cast(uint)(Para.size + i * REGSIZE); 4239 if (!(shadowregm & (mask(preg) | mask(XMM0 + i)))) 4240 { 4241 if (hasframe) 4242 { 4243 // MOV x[EBP],preg 4244 cdb.genc1(0x89, 4245 modregxrm(2,preg,BPRM),FLconst, offset); 4246 code_orrex(cdb.last(), REX_W); 4247 } 4248 else 4249 { 4250 // MOV offset[ESP],preg 4251 cdb.genc1(0x89, 4252 (modregrm(0,4,SP) << 8) | 4253 modregxrm(2,preg,4),FLconst,offset + EBPtoESP); 4254 } 4255 cdb.last().Irex |= REX_W; 4256 } 4257 } 4258 } 4259 4260 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers 4261 * into their assigned registers. 4262 * Note that we have a big problem if Pa is passed in R1 and assigned to R2, 4263 * and Pb is passed in R2 but assigned to R1. Detect it and assert. 4264 */ 4265 regm_t assignregs = 0; 4266 for (SYMIDX si = 0; si < globsym.length; si++) 4267 { 4268 Symbol *s = globsym[si]; 4269 uint sz = cast(uint)type_size(s.Stype); 4270 4271 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 4272 namedargs |= s.Spregm(); 4273 4274 if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl == FLreg)) 4275 { 4276 // Argument is passed in a register 4277 continue; 4278 } 4279 4280 type *t = s.Stype; 4281 type *t2 = null; 4282 if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64) 4283 { type *targ1 = t.Ttag.Sstruct.Sarg1type; 4284 t2 = t.Ttag.Sstruct.Sarg2type; 4285 if (targ1) 4286 t = targ1; 4287 } 4288 4289 reg_t preg = s.Spreg; 4290 reg_t r = s.Sreglsw; 4291 for (int i = 0; i < 2; ++i) 4292 { 4293 if (preg == NOREG) 4294 break; 4295 assert(!(mask(preg) & assignregs)); // not already stepped on 4296 assignregs |= mask(r); 4297 4298 // MOV reg,preg 4299 if (r == preg) 4300 { 4301 } 4302 else if (mask(preg) & XMMREGS) 4303 { 4304 const op = xmmload(t.Tty); // MOVSS/D xreg,preg 4305 uint xreg = r - XMM0; 4306 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0)); 4307 } 4308 else 4309 { 4310 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]); 4311 genmovreg(cdb,r,preg); 4312 if (I64 && sz == 8) 4313 code_orrex(cdb.last(), REX_W); 4314 } 4315 preg = s.Spreg2; 4316 r = s.Sregmsw; 4317 if (t2) 4318 t = t2; 4319 } 4320 } 4321 4322 /* For parameters that were passed on the stack, but are enregistered, 4323 * initialize the registers with the parameter stack values. 4324 * Do not use assignaddr(), as it will replace the stack reference with 4325 * the register. 4326 */ 4327 for (SYMIDX si = 0; si < globsym.length; si++) 4328 { 4329 Symbol *s = globsym[si]; 4330 uint sz = cast(uint)type_size(s.Stype); 4331 4332 if (!((s.Sclass == SC.regpar || s.Sclass == SC.parameter) && 4333 s.Sfl == FLreg && 4334 (refparam 4335 // This variable has been reference by a nested function 4336 || MARS && s.Stype.Tty & mTYvolatile 4337 ))) 4338 { 4339 continue; 4340 } 4341 // MOV reg,param[BP] 4342 //assert(refparam); 4343 if (mask(s.Sreglsw) & XMMREGS) 4344 { 4345 const op = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4346 uint xreg = s.Sreglsw - XMM0; 4347 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset); 4348 if (!hasframe) 4349 { // Convert to ESP relative address rather than EBP 4350 code *c = cdb.last(); 4351 c.Irm = cast(ubyte)modregxrm(2,xreg,4); 4352 c.Isib = modregrm(0,4,SP); 4353 c.IEV1.Vpointer += EBPtoESP; 4354 } 4355 continue; 4356 } 4357 4358 cdb.genc1(sz == 1 ? 0x8A : 0x8B, 4359 modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset); 4360 code *c = cdb.last(); 4361 if (!I16 && sz == SHORTSIZE) 4362 c.Iflags |= CFopsize; // operand size 4363 if (I64 && sz >= REGSIZE) 4364 c.Irex |= REX_W; 4365 if (I64 && sz == 1 && s.Sreglsw >= 4) 4366 c.Irex |= REX; 4367 if (!hasframe) 4368 { // Convert to ESP relative address rather than EBP 4369 assert(!I16); 4370 c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4); 4371 c.Isib = modregrm(0,4,SP); 4372 c.IEV1.Vpointer += EBPtoESP; 4373 } 4374 if (sz > REGSIZE) 4375 { 4376 cdb.genc1(0x8B, 4377 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE); 4378 code *cx = cdb.last(); 4379 if (I64) 4380 cx.Irex |= REX_W; 4381 if (!hasframe) 4382 { // Convert to ESP relative address rather than EBP 4383 assert(!I16); 4384 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4); 4385 cx.Isib = modregrm(0,4,SP); 4386 cx.IEV1.Vpointer += EBPtoESP; 4387 } 4388 } 4389 } 4390 } 4391 4392 /******************************* 4393 * Generate and return function epilog. 4394 * Output: 4395 * retsize Size of function epilog 4396 */ 4397 4398 @trusted 4399 void epilog(block *b) 4400 { 4401 code *cpopds; 4402 reg_t reg; 4403 reg_t regx; // register that's not a return reg 4404 regm_t topop,regm; 4405 targ_size_t xlocalsize = localsize; 4406 4407 CodeBuilder cdbx; cdbx.ctor(); 4408 tym_t tyf = funcsym_p.ty(); 4409 tym_t tym = tybasic(tyf); 4410 bool farfunc = tyfarfunc(tym) != 0; 4411 if (!(b.Bflags & BFLepilog)) // if no epilog code 4412 goto Lret; // just generate RET 4413 regx = (b.BC == BCret) ? AX : CX; 4414 4415 retsize = 0; 4416 4417 if (tyf & mTYnaked) // if no prolog/epilog 4418 return; 4419 4420 if (tym == TYifunc) 4421 { 4422 static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ]; 4423 static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E, 4424 0x5D,0x5B,0x5B,0x5A, 4425 0x59,0x58,0xCF,0 ]; 4426 4427 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4428 auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 4429 do 4430 cdbx.gen1(*p); 4431 while (*++p); 4432 goto Lopt; 4433 } 4434 4435 if (config.flags & CFGtrace && 4436 (!(config.flags4 & CFG4allcomdat) || 4437 funcsym_p.Sclass == SC.comdat || 4438 funcsym_p.Sclass == SC.global || 4439 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 4440 ) 4441 ) 4442 { 4443 Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_EPI_F : RTLSYM.TRACE_EPI_N); 4444 makeitextern(s); 4445 cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace 4446 if (!I16) 4447 code_orflag(cdbx.last(),CFoff | CFselfrel); 4448 useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); 4449 } 4450 4451 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) 4452 { 4453 nteh_epilog(cdbx); 4454 } 4455 4456 cpopds = null; 4457 if (tyf & mTYloadds) 4458 { 4459 cdbx.gen1(0x1F); // POP DS 4460 cpopds = cdbx.last(); 4461 } 4462 4463 /* Pop all the general purpose registers saved on the stack 4464 * by the prolog code. Remember to do them in the reverse 4465 * order they were pushed. 4466 */ 4467 topop = fregsaved & ~mfuncreg; 4468 epilog_restoreregs(cdbx, topop); 4469 4470 version (MARS) 4471 { 4472 if (usednteh & NTEHjmonitor) 4473 { 4474 regm_t retregs = 0; 4475 if (b.BC == BCretexp) 4476 retregs = regmask(b.Belem.Ety, tym); 4477 nteh_monitor_epilog(cdbx,retregs); 4478 xlocalsize += 8; 4479 } 4480 } 4481 4482 if (config.wflags & WFwindows && farfunc) 4483 { 4484 int wflags = config.wflags; 4485 if (wflags & WFreduced && !(tyf & mTYexport)) 4486 { // reduced prolog/epilog for non-exported functions 4487 wflags &= ~(WFdgroup | WFds | WFss); 4488 if (!(wflags & WFsaveds)) 4489 goto L4; 4490 } 4491 4492 if (localsize) 4493 { 4494 cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */ 4495 } 4496 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 4497 { 4498 if (cpopds) 4499 cpopds.Iop = NOP; // don't need previous one 4500 cdbx.gen1(0x1F); // POP DS 4501 } 4502 cdbx.gen1(0x58 + BP); // POP BP 4503 if (config.wflags & WFincbp) 4504 cdbx.gen1(0x48 + BP); // DEC BP 4505 assert(hasframe); 4506 } 4507 else 4508 { 4509 if (needframe || (xlocalsize && hasframe)) 4510 { 4511 L4: 4512 assert(hasframe); 4513 if (xlocalsize || enforcealign) 4514 { 4515 if (config.flags2 & CFG2stomp) 4516 { /* MOV ECX,0xBEAF 4517 * L1: 4518 * MOV [ESP],ECX 4519 * ADD ESP,4 4520 * CMP EBP,ESP 4521 * JNE L1 4522 * POP EBP 4523 */ 4524 /* Value should be: 4525 * 1. != 0 (code checks for null pointers) 4526 * 2. be odd (to mess up alignment) 4527 * 3. fall in first 64K (likely marked as inaccessible) 4528 * 4. be a value that stands out in the debugger 4529 */ 4530 assert(I32 || I64); 4531 targ_size_t value = 0x0000BEAF; 4532 reg_t regcx = CX; 4533 mfuncreg &= ~mask(regcx); 4534 uint grex = I64 ? REX_W << 16 : 0; 4535 cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value 4536 cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx 4537 code *c1 = cdbx.last(); 4538 cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE 4539 genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP 4540 if (I64) 4541 code_orrex(cdbx.last(),REX_W); 4542 genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 4543 // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) 4544 cdbx.last().Iflags &= ~CFjmp16; 4545 cdbx.gen1(0x58 + BP); // POP BP 4546 } 4547 else if (config.exe == EX_WIN64) 4548 { // See https://msdn.microsoft.com/en-us/library/tawsa7cb%28v=vs.100%29.aspx 4549 // LEA RSP,0[RBP] 4550 cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); 4551 cdbx.gen1(0x58 + BP); // POP RBP 4552 } 4553 else if (config.target_cpu >= TARGET_80286 && 4554 !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed) 4555 ) 4556 cdbx.gen1(LEAVE); // LEAVE 4557 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32) 4558 { // This doesn't work - I should figure out why 4559 mfuncreg &= ~mask(regx); 4560 cdbx.gen1(0x58 + regx); // POP regx 4561 cdbx.gen1(0x58 + BP); // POP BP 4562 } 4563 else 4564 { 4565 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4566 if (I64) 4567 code_orrex(cdbx.last(), REX_W); // MOV RSP,RBP 4568 cdbx.gen1(0x58 + BP); // POP BP 4569 } 4570 } 4571 else 4572 cdbx.gen1(0x58 + BP); // POP BP 4573 if (config.wflags & WFincbp && farfunc) 4574 cdbx.gen1(0x48 + BP); // DEC BP 4575 } 4576 else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret)) 4577 { 4578 mfuncreg &= ~mask(regx); 4579 cdbx.gen1(0x58 + regx); // POP regx 4580 } 4581 else if (xlocalsize) 4582 cod3_stackadj(cdbx, cast(int)-xlocalsize); 4583 } 4584 if (b.BC == BCret || b.BC == BCretexp) 4585 { 4586 Lret: 4587 opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2; 4588 if (tym == TYhfunc) 4589 { 4590 cdbx.genc2(0xC2,0,4); // RET 4 4591 } 4592 else if (!typfunc(tym) || // if caller cleans the stack 4593 config.exe == EX_WIN64 || 4594 Para.offset == 0) // or nothing pushed on the stack anyway 4595 { 4596 op++; // to a regular RET 4597 cdbx.gen1(op); 4598 } 4599 else 4600 { // Stack is always aligned on register size boundary 4601 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4602 if (Para.offset >= 0x10000) 4603 { 4604 /* 4605 POP REG 4606 ADD ESP, Para.offset 4607 JMP REG 4608 */ 4609 cdbx.gen1(0x58+regx); 4610 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset); 4611 if (I64) 4612 code_orrex(cdbx.last(), REX_W); 4613 cdbx.genc2(0xFF, modregrm(3,4,regx), 0); 4614 if (I64) 4615 code_orrex(cdbx.last(), REX_W); 4616 } 4617 else 4618 cdbx.genc2(op,0,Para.offset); // RET Para.offset 4619 } 4620 } 4621 4622 Lopt: 4623 // If last instruction in ce is ADD SP,imm, and first instruction 4624 // in c sets SP, we can dump the ADD. 4625 CodeBuilder cdb; cdb.ctor(); 4626 cdb.append(b.Bcode); 4627 code *cr = cdb.last(); 4628 code *c = cdbx.peek(); 4629 if (cr && c && !I64) 4630 { 4631 if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP)) // if ADD SP,imm 4632 { 4633 if ( 4634 c.Iop == LEAVE || // LEAVE 4635 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP 4636 (c.Iop == LEA && c.Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 4637 ) 4638 cr.Iop = NOP; 4639 else if (c.Iop == 0x58 + BP) // if POP BP 4640 { 4641 cr.Iop = 0x8B; 4642 cr.Irm = modregrm(3,SP,BP); // MOV SP,BP 4643 } 4644 } 4645 else 4646 { 4647 static if (0) 4648 { 4649 // These optimizations don't work if the called function 4650 // cleans off the stack. 4651 if (c.Iop == 0xC3 && cr.Iop == CALL) // CALL near 4652 { 4653 cr.Iop = 0xE9; // JMP near 4654 c.Iop = NOP; 4655 } 4656 else if (c.Iop == 0xCB && cr.Iop == 0x9A) // CALL far 4657 { 4658 cr.Iop = 0xEA; // JMP far 4659 c.Iop = NOP; 4660 } 4661 } 4662 } 4663 } 4664 4665 pinholeopt(c, null); 4666 retsize += calcblksize(c); // compute size of function epilog 4667 cdb.append(cdbx); 4668 b.Bcode = cdb.finish(); 4669 } 4670 4671 /******************************* 4672 * Return offset of SP from BP. 4673 */ 4674 4675 @trusted 4676 targ_size_t cod3_spoff() 4677 { 4678 //printf("spoff = x%x, localsize = x%x\n", cast(int)spoff, cast(int)localsize); 4679 return spoff + localsize; 4680 } 4681 4682 @trusted 4683 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg) 4684 { 4685 code cs; 4686 const regm_t keepmsk = toreg ? RMload : RMstore; 4687 4688 elem* e = el_var(s); // so we can trick getlvalue() into working for us 4689 4690 if (mask(s.Sreglsw) & XMMREGS) 4691 { // Convert to save/restore of XMM register 4692 if (toreg) 4693 cs.Iop = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4694 else 4695 cs.Iop = xmmstore(s.Stype.Tty); // MOVSS/D mem,xreg 4696 getlvalue(cdb,&cs,e,keepmsk); 4697 cs.orReg(s.Sreglsw - XMM0); 4698 cdb.gen(&cs); 4699 } 4700 else 4701 { 4702 const int sz = cast(int)type_size(s.Stype); 4703 cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg 4704 cs.Iop ^= (sz == 1); 4705 getlvalue(cdb,&cs,e,keepmsk); 4706 cs.orReg(s.Sreglsw); 4707 if (I64 && sz == 1 && s.Sreglsw >= 4) 4708 cs.Irex |= REX; 4709 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4710 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4711 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4712 { } // skip MOV reg,reg 4713 else 4714 cdb.gen(&cs); 4715 if (sz > REGSIZE) 4716 { 4717 cs.setReg(s.Sregmsw); 4718 getlvalue_msw(&cs); 4719 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4720 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4721 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4722 { } // skip MOV reg,reg 4723 else 4724 cdb.gen(&cs); 4725 } 4726 } 4727 4728 el_free(e); 4729 } 4730 4731 /**************************** 4732 * Generate code for, and output a thunk. 4733 * Params: 4734 * sthunk = Symbol of thunk 4735 * sfunc = Symbol of thunk's target function 4736 * thisty = Type of this pointer 4737 * p = ESP parameter offset to this pointer 4738 * d = offset to add to 'this' pointer 4739 * d2 = offset from 'this' to vptr 4740 * i = offset into vtbl[] 4741 */ 4742 4743 @trusted 4744 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty, 4745 uint d,int i,uint d2) 4746 { 4747 targ_size_t thunkoffset; 4748 4749 int seg = sthunk.Sseg; 4750 cod3_align(seg); 4751 4752 // Skip over return address 4753 tym_t thunkty = tybasic(sthunk.ty()); 4754 if (tyfarfunc(thunkty)) 4755 p += I32 ? 8 : tysize(TYfptr); // far function 4756 else 4757 p += tysize(TYnptr); 4758 if (tybasic(sfunc.ty()) == TYhfunc) 4759 p += tysize(TYnptr); // skip over hidden pointer 4760 4761 CodeBuilder cdb; cdb.ctor(); 4762 if (!I16) 4763 { 4764 /* 4765 Generate: 4766 ADD p[ESP],d 4767 For direct call: 4768 JMP sfunc 4769 For virtual call: 4770 MOV EAX, p[ESP] EAX = this 4771 MOV EAX, d2[EAX] EAX = this.vptr 4772 JMP i[EAX] jump to virtual function 4773 */ 4774 reg_t reg = 0; 4775 if (cast(int)d < 0) 4776 { 4777 d = -d; 4778 reg = 5; // switch from ADD to SUB 4779 } 4780 if (thunkty == TYmfunc) 4781 { // ADD ECX,d 4782 if (d) 4783 cdb.genc2(0x81,modregrm(3,reg,CX),d); 4784 } 4785 else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc)) 4786 { // ADD EAX,d 4787 int rm = AX; 4788 if (config.exe == EX_WIN64) 4789 rm = CX; 4790 else if (I64) 4791 rm = (thunkty == TYnfunc && (sfunc.Sfunc.Fflags3 & F3hiddenPtr)) ? SI : DI; 4792 if (d) 4793 cdb.genc2(0x81,modregrm(3,reg,rm),d); 4794 } 4795 else 4796 { 4797 cdb.genc(0x81,modregrm(2,reg,4), 4798 FLconst,p, // to this 4799 FLconst,d); // ADD p[ESP],d 4800 cdb.last().Isib = modregrm(0,4,SP); 4801 } 4802 if (I64 && cdb.peek()) 4803 cdb.last().Irex |= REX_W; 4804 } 4805 else 4806 { 4807 /* 4808 Generate: 4809 MOV BX,SP 4810 ADD [SS:] p[BX],d 4811 For direct call: 4812 JMP sfunc 4813 For virtual call: 4814 MOV BX, p[BX] BX = this 4815 MOV BX, d2[BX] BX = this.vptr 4816 JMP i[BX] jump to virtual function 4817 */ 4818 4819 genregs(cdb,0x89,SP,BX); // MOV BX,SP 4820 cdb.genc(0x81,modregrm(2,0,7), 4821 FLconst,p, // to this 4822 FLconst,d); // ADD p[BX],d 4823 if (config.wflags & WFssneds || 4824 // If DS needs reloading from SS, 4825 // then assume SS != DS on thunk entry 4826 (LARGEDATA && config.wflags & WFss)) 4827 cdb.last().Iflags |= CFss; // SS: 4828 } 4829 4830 if ((i & 0xFFFF) != 0xFFFF) // if virtual call 4831 { 4832 const bool FARTHIS = (tysize(thisty) > REGSIZE); 4833 const bool FARVPTR = FARTHIS; 4834 4835 assert(thisty != TYvptr); // can't handle this case 4836 4837 if (!I16) 4838 { 4839 assert(!FARTHIS && !LARGECODE); 4840 if (thunkty == TYmfunc) // if 'this' is in ECX 4841 { 4842 // MOV EAX,d2[ECX] 4843 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2); 4844 } 4845 else if (thunkty == TYjfunc) // if 'this' is in EAX 4846 { 4847 // MOV EAX,d2[EAX] 4848 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4849 } 4850 else 4851 { 4852 // MOV EAX,p[ESP] 4853 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p); 4854 if (I64) 4855 cdb.last().Irex |= REX_W; 4856 4857 // MOV EAX,d2[EAX] 4858 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4859 } 4860 if (I64) 4861 code_orrex(cdb.last(), REX_W); 4862 // JMP i[EAX] 4863 cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i); 4864 } 4865 else 4866 { 4867 // MOV/LES BX,[SS:] p[BX] 4868 cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p); 4869 if (config.wflags & WFssneds || 4870 // If DS needs reloading from SS, 4871 // then assume SS != DS on thunk entry 4872 (LARGEDATA && config.wflags & WFss)) 4873 cdb.last().Iflags |= CFss; // SS: 4874 4875 // MOV/LES BX,[ES:]d2[BX] 4876 cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2); 4877 if (FARTHIS) 4878 cdb.last().Iflags |= CFes; // ES: 4879 4880 // JMP i[BX] 4881 cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i); 4882 if (FARVPTR) 4883 cdb.last().Iflags |= CFes; // ES: 4884 } 4885 } 4886 else 4887 { 4888 if (config.flags3 & CFG3pic) 4889 { 4890 localgot = null; // no local variables 4891 CodeBuilder cdbgot; cdbgot.ctor(); 4892 load_localgot(cdbgot); // load GOT in EBX 4893 code *c1 = cdbgot.finish(); 4894 if (c1) 4895 { 4896 assignaddrc(c1); 4897 cdb.append(c1); 4898 } 4899 } 4900 cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc 4901 cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff); 4902 } 4903 4904 thunkoffset = Offset(seg); 4905 code *c = cdb.finish(); 4906 pinholeopt(c,null); 4907 codout(seg,c,null); 4908 code_free(c); 4909 4910 sthunk.Soffset = thunkoffset; 4911 sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk 4912 sthunk.Sseg = seg; 4913 if (config.exe & EX_posix || 4914 config.objfmt == OBJ_MSCOFF) 4915 { 4916 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4917 } 4918 searchfixlist(sthunk); // resolve forward refs 4919 } 4920 4921 /***************************** 4922 * Assume symbol s is extern. 4923 */ 4924 4925 @trusted 4926 void makeitextern(Symbol *s) 4927 { 4928 if (s.Sxtrnnum == 0) 4929 { 4930 s.Sclass = SC.extern_; /* external */ 4931 /*printf("makeitextern(x%x)\n",s);*/ 4932 objmod.external(s); 4933 } 4934 } 4935 4936 4937 /******************************* 4938 * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. 4939 * This routine depends on FLcode jumps to only be forward 4940 * referenced. 4941 * BFLjmpoptdone is set to true if nothing more can be done 4942 * with this block. 4943 * Input: 4944 * flag !=0 means don't have correct Boffsets yet 4945 * Returns: 4946 * number of bytes saved 4947 */ 4948 4949 @trusted 4950 int branch(block *bl,int flag) 4951 { 4952 int bytesaved; 4953 code* c,cn,ct; 4954 targ_size_t offset,disp; 4955 targ_size_t csize; 4956 4957 if (!flag) 4958 bl.Bflags |= BFLjmpoptdone; // assume this will be all 4959 c = bl.Bcode; 4960 if (!c) 4961 return 0; 4962 bytesaved = 0; 4963 offset = bl.Boffset; /* offset of start of block */ 4964 while (1) 4965 { 4966 ubyte op; 4967 4968 csize = calccodsize(c); 4969 cn = code_next(c); 4970 op = cast(ubyte)c.Iop; 4971 if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 || 4972 (op == JMP && !(c.Iflags & CFjmp5))) 4973 { 4974 L1: 4975 switch (c.IFL2) 4976 { 4977 case FLblock: 4978 if (flag) // no offsets yet, don't optimize 4979 goto L3; 4980 disp = c.IEV2.Vblock.Boffset - offset - csize; 4981 4982 /* If this is a forward branch, and there is an aligned 4983 * block intervening, it is possible that shrinking 4984 * the jump instruction will cause it to be out of 4985 * range of the target. This happens if the alignment 4986 * prevents the target block from moving correspondingly 4987 * closer. 4988 */ 4989 if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset) 4990 { /* Look for intervening alignment 4991 */ 4992 for (block *b = bl.Bnext; b; b = b.Bnext) 4993 { 4994 if (b.Balign) 4995 { 4996 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4997 goto L3; 4998 } 4999 if (b == c.IEV2.Vblock) 5000 break; 5001 } 5002 } 5003 5004 break; 5005 5006 case FLcode: 5007 { 5008 code *cr; 5009 5010 disp = 0; 5011 5012 ct = c.IEV2.Vcode; /* target of branch */ 5013 assert(ct.Iflags & (CFtarg | CFtarg2)); 5014 for (cr = cn; cr; cr = code_next(cr)) 5015 { 5016 if (cr == ct) 5017 break; 5018 disp += calccodsize(cr); 5019 } 5020 5021 if (!cr) 5022 { // Didn't find it in forward search. Try backwards jump 5023 int s = 0; 5024 disp = 0; 5025 for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) 5026 { 5027 assert(cr != null); // must have found it 5028 if (cr == ct) 5029 s = 1; 5030 if (s) 5031 disp += calccodsize(cr); 5032 } 5033 } 5034 5035 if (config.flags4 & CFG4optimized && !flag) 5036 { 5037 /* Propagate branch forward past junk */ 5038 while (1) 5039 { 5040 if (ct.Iop == NOP || 5041 ct.Iop == (ESCAPE | ESClinnum)) 5042 { 5043 ct = code_next(ct); 5044 if (!ct) 5045 goto L2; 5046 } 5047 else 5048 { 5049 c.IEV2.Vcode = ct; 5050 ct.Iflags |= CFtarg; 5051 break; 5052 } 5053 } 5054 5055 /* And eliminate jmps to jmps */ 5056 if ((op == ct.Iop || ct.Iop == JMP) && 5057 (op == JMP || c.Iflags & CFjmp16)) 5058 { 5059 c.IFL2 = ct.IFL2; 5060 c.IEV2.Vcode = ct.IEV2.Vcode; 5061 /*printf("eliminating branch\n");*/ 5062 goto L1; 5063 } 5064 L2: 5065 { } 5066 } 5067 } 5068 break; 5069 5070 default: 5071 goto L3; 5072 } 5073 5074 if (disp == 0) // bra to next instruction 5075 { 5076 bytesaved += csize; 5077 c.Iop = NOP; // del branch instruction 5078 c.IEV2.Vcode = null; 5079 c = cn; 5080 if (!c) 5081 break; 5082 continue; 5083 } 5084 else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && 5085 cast(targ_size_t)cast(targ_schar)disp == disp) 5086 { 5087 if (op == JMP) 5088 { 5089 c.Iop = JMPS; // JMP SHORT 5090 bytesaved += I16 ? 1 : 3; 5091 } 5092 else // else Jcond 5093 { 5094 c.Iflags &= ~CFjmp16; // a branch is ok 5095 bytesaved += I16 ? 3 : 4; 5096 5097 // Replace a cond jump around a call to a function that 5098 // never returns with a cond jump to that function. 5099 if (config.flags4 & CFG4optimized && 5100 config.target_cpu >= TARGET_80386 && 5101 disp == (I16 ? 3 : 5) && 5102 cn && 5103 cn.Iop == CALL && 5104 cn.IFL2 == FLfunc && 5105 cn.IEV2.Vsym.Sflags & SFLexit && 5106 !(cn.Iflags & (CFtarg | CFtarg2)) 5107 ) 5108 { 5109 cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); 5110 c.Iop = NOP; 5111 c.IEV2.Vcode = null; 5112 bytesaved++; 5113 5114 // If nobody else points to ct, we can remove the CFtarg 5115 if (flag && ct) 5116 { 5117 code *cx; 5118 for (cx = bl.Bcode; 1; cx = code_next(cx)) 5119 { 5120 if (!cx) 5121 { 5122 ct.Iflags &= ~CFtarg; 5123 break; 5124 } 5125 if (cx.IEV2.Vcode == ct) 5126 break; 5127 } 5128 } 5129 } 5130 } 5131 csize = calccodsize(c); 5132 } 5133 else 5134 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 5135 } 5136 L3: 5137 if (cn) 5138 { 5139 offset += csize; 5140 c = cn; 5141 } 5142 else 5143 break; 5144 } 5145 //printf("bytesaved = x%x\n",bytesaved); 5146 return bytesaved; 5147 } 5148 5149 5150 /************************************************ 5151 * Adjust all Soffset's of stack variables so they 5152 * are all relative to the frame pointer. 5153 */ 5154 5155 version (MARS) 5156 { 5157 @trusted 5158 void cod3_adjSymOffsets() 5159 { 5160 SYMIDX si; 5161 5162 //printf("cod3_adjSymOffsets()\n"); 5163 for (si = 0; si < globsym.length; si++) 5164 { 5165 //printf("\tglobsym[%d] = %p\n",si,globsym[si]); 5166 Symbol *s = globsym[si]; 5167 5168 switch (s.Sclass) 5169 { 5170 case SC.parameter: 5171 case SC.regpar: 5172 case SC.shadowreg: 5173 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP); 5174 s.Soffset += Para.size; 5175 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember)) 5176 { 5177 if (!hasframe) 5178 s.Soffset += EBPtoESP; 5179 if (funcsym_p.Sfunc.Fflags3 & Fnested) 5180 s.Soffset += REGSIZE; 5181 } 5182 break; 5183 5184 case SC.fastpar: 5185 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, cast(int)s.Soffset, cast(int)Fast.size, cast(int)BPoff); 5186 s.Soffset += Fast.size + BPoff; 5187 break; 5188 5189 case SC.auto_: 5190 case SC.register: 5191 if (s.Sfl == FLfast) 5192 s.Soffset += Fast.size + BPoff; 5193 else 5194 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, cast(int)s.Soffset, cast(int)Auto.size, cast(int)BPoff, cast(int)EBPtoESP); 5195 // if (!(funcsym_p.Sfunc.Fflags3 & Fnested)) 5196 s.Soffset += Auto.size + BPoff; 5197 break; 5198 5199 case SC.bprel: 5200 break; 5201 5202 default: 5203 continue; 5204 } 5205 static if (0) 5206 { 5207 if (!hasframe) 5208 s.Soffset += EBPtoESP; 5209 } 5210 } 5211 } 5212 5213 } 5214 5215 /******************************* 5216 * Take symbol info in union ev and replace it with a real address 5217 * in Vpointer. 5218 */ 5219 5220 @trusted 5221 void assignaddr(block *bl) 5222 { 5223 int EBPtoESPsave = EBPtoESP; 5224 int hasframesave = hasframe; 5225 5226 if (bl.Bflags & BFLoutsideprolog) 5227 { 5228 EBPtoESP = -REGSIZE; 5229 hasframe = 0; 5230 } 5231 assignaddrc(bl.Bcode); 5232 hasframe = hasframesave; 5233 EBPtoESP = EBPtoESPsave; 5234 } 5235 5236 @trusted 5237 void assignaddrc(code *c) 5238 { 5239 int sn; 5240 Symbol *s; 5241 ubyte ins,rm; 5242 targ_size_t soff; 5243 targ_size_t base; 5244 5245 base = EBPtoESP; 5246 for (; c; c = code_next(c)) 5247 { 5248 debug 5249 { 5250 if (0) 5251 { printf("assignaddrc()\n"); 5252 code_print(c); 5253 } 5254 if (code_next(c) && code_next(code_next(c)) == c) 5255 assert(0); 5256 } 5257 5258 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5259 ins = vex_inssize(c); 5260 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 5261 ins = inssize2[(c.Iop >> 8) & 0xFF]; 5262 else if ((c.Iop & 0xFF00) == 0x0F00) 5263 ins = inssize2[c.Iop & 0xFF]; 5264 else if ((c.Iop & 0xFF) == ESCAPE) 5265 { 5266 if (c.Iop == (ESCAPE | ESCadjesp)) 5267 { 5268 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,cast(long)c.IEV1.Vint); 5269 EBPtoESP += c.IEV1.Vint; 5270 c.Iop = NOP; 5271 } 5272 else if (c.Iop == (ESCAPE | ESCfixesp)) 5273 { 5274 //printf("fix ESP\n"); 5275 if (hasframe) 5276 { 5277 // LEA ESP,-EBPtoESP[EBP] 5278 c.Iop = LEA; 5279 if (c.Irm & 8) 5280 c.Irex |= REX_R; 5281 c.Irm = modregrm(2,SP,BP); 5282 c.Iflags = CFoff; 5283 c.IFL1 = FLconst; 5284 c.IEV1.Vuns = -EBPtoESP; 5285 if (enforcealign) 5286 { 5287 // AND ESP, -STACKALIGN 5288 code *cn = code_calloc(); 5289 cn.Iop = 0x81; 5290 cn.Irm = modregrm(3, 4, SP); 5291 cn.Iflags = CFoff; 5292 cn.IFL2 = FLconst; 5293 cn.IEV2.Vsize_t = -STACKALIGN; 5294 if (I64) 5295 c.Irex |= REX_W; 5296 cn.next = c.next; 5297 c.next = cn; 5298 } 5299 } 5300 } 5301 else if (c.Iop == (ESCAPE | ESCframeptr)) 5302 { // Convert to load of frame pointer 5303 // c.Irm is the register to use 5304 if (hasframe && !enforcealign) 5305 { // MOV reg,EBP 5306 c.Iop = 0x89; 5307 if (c.Irm & 8) 5308 c.Irex |= REX_B; 5309 c.Irm = modregrm(3,BP,c.Irm & 7); 5310 } 5311 else 5312 { // LEA reg,EBPtoESP[ESP] 5313 c.Iop = LEA; 5314 if (c.Irm & 8) 5315 c.Irex |= REX_R; 5316 c.Irm = modregrm(2,c.Irm & 7,4); 5317 c.Isib = modregrm(0,4,SP); 5318 c.Iflags = CFoff; 5319 c.IFL1 = FLconst; 5320 c.IEV1.Vuns = EBPtoESP; 5321 } 5322 } 5323 if (I64) 5324 c.Irex |= REX_W; 5325 continue; 5326 } 5327 else 5328 ins = inssize[c.Iop & 0xFF]; 5329 if (!(ins & M) || 5330 ((rm = c.Irm) & 0xC0) == 0xC0) 5331 goto do2; /* if no first operand */ 5332 if (is32bitaddr(I32,c.Iflags)) 5333 { 5334 5335 if ( 5336 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 5337 ) 5338 goto do2; /* if no first operand */ 5339 } 5340 else 5341 { 5342 if ( 5343 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 5344 ) 5345 goto do2; /* if no first operand */ 5346 } 5347 s = c.IEV1.Vsym; 5348 switch (c.IFL1) 5349 { 5350 case FLdata: 5351 if (config.objfmt == OBJ_OMF && s.Sclass != SC.comdat && s.Sclass != SC.extern_) 5352 { 5353 version (MARS) 5354 { 5355 c.IEV1.Vseg = s.Sseg; 5356 } 5357 else 5358 { 5359 c.IEV1.Vseg = DATA; 5360 } 5361 c.IEV1.Vpointer += s.Soffset; 5362 c.IFL1 = FLdatseg; 5363 } 5364 else 5365 c.IFL1 = FLextern; 5366 goto do2; 5367 5368 case FLudata: 5369 if (config.objfmt == OBJ_OMF) 5370 { 5371 version (MARS) 5372 { 5373 c.IEV1.Vseg = s.Sseg; 5374 } 5375 else 5376 { 5377 c.IEV1.Vseg = UDATA; 5378 } 5379 c.IEV1.Vpointer += s.Soffset; 5380 c.IFL1 = FLdatseg; 5381 } 5382 else 5383 c.IFL1 = FLextern; 5384 goto do2; 5385 5386 case FLtlsdata: 5387 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5388 c.IFL1 = FLextern; 5389 goto do2; 5390 5391 case FLdatseg: 5392 //c.IEV1.Vseg = DATA; 5393 goto do2; 5394 5395 case FLfardata: 5396 case FLcsdata: 5397 case FLpseudo: 5398 goto do2; 5399 5400 case FLstack: 5401 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", 5402 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer); 5403 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset; 5404 break; 5405 5406 case FLfast: 5407 soff = Fast.size; 5408 goto L1; 5409 5410 case FLreg: 5411 case FLauto: 5412 soff = Auto.size; 5413 L1: 5414 if (Symbol_Sisdead(*s, anyiasm)) 5415 { 5416 c.Iop = NOP; // remove references to it 5417 continue; 5418 } 5419 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2) 5420 { 5421 reg_t reg = s.Sreglsw; 5422 5423 assert(!(s.Sregm & ~mask(reg))); 5424 if (c.IEV1.Vpointer == 1) 5425 { 5426 assert(reg < 4); /* must be a BYTEREGS */ 5427 reg |= 4; /* convert to high byte reg */ 5428 } 5429 if (reg & 8) 5430 { 5431 assert(I64); 5432 c.Irex |= REX_B; 5433 reg &= 7; 5434 } 5435 c.Irm = (c.Irm & modregrm(0,7,0)) 5436 | modregrm(3,0,reg); 5437 assert(c.Iop != LES && c.Iop != LEA); 5438 goto do2; 5439 } 5440 else 5441 { c.IEV1.Vpointer += s.Soffset + soff + BPoff; 5442 if (s.Sflags & SFLunambig) 5443 c.Iflags |= CFunambig; 5444 L2: 5445 if (!hasframe || (enforcealign && c.IFL1 != FLpara)) 5446 { /* Convert to ESP relative address instead of EBP */ 5447 assert(!I16); 5448 c.IEV1.Vpointer += EBPtoESP; 5449 ubyte crm = c.Irm; 5450 if ((crm & 7) == 4) // if SIB byte 5451 { 5452 assert((c.Isib & 7) == BP); 5453 assert((crm & 0xC0) != 0); 5454 c.Isib = (c.Isib & ~7) | modregrm(0,0,SP); 5455 } 5456 else 5457 { 5458 assert((crm & 7) == 5); 5459 c.Irm = (crm & modregrm(0,7,0)) 5460 | modregrm(2,0,4); 5461 c.Isib = modregrm(0,4,SP); 5462 } 5463 } 5464 } 5465 break; 5466 5467 case FLpara: 5468 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d, Vpointer = %d\n", 5469 //s.Sident.ptr, cast(int)s.Soffset, cast(int)Para.size, cast(int)BPoff, 5470 //cast(int)EBPtoESP, cast(int)c.IEV1.Vpointer); 5471 soff = Para.size - BPoff; // cancel out add of BPoff 5472 goto L1; 5473 5474 case FLfltreg: 5475 c.IEV1.Vpointer += Foff + BPoff; 5476 c.Iflags |= CFunambig; 5477 goto L2; 5478 5479 case FLallocatmp: 5480 c.IEV1.Vpointer += Alloca.offset + BPoff; 5481 goto L2; 5482 5483 case FLfuncarg: 5484 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff; 5485 goto L2; 5486 5487 case FLbprel: 5488 c.IEV1.Vpointer += s.Soffset; 5489 break; 5490 5491 case FLcs: 5492 sn = c.IEV1.Vuns; 5493 if (!CSE.loaded(sn)) // if never loaded 5494 { 5495 c.Iop = NOP; 5496 continue; 5497 } 5498 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff; 5499 c.Iflags |= CFunambig; 5500 goto L2; 5501 5502 case FLregsave: 5503 sn = c.IEV1.Vuns; 5504 c.IEV1.Vpointer = sn + regsave.off + BPoff; 5505 c.Iflags |= CFunambig; 5506 goto L2; 5507 5508 case FLndp: 5509 version (MARS) 5510 { 5511 assert(c.IEV1.Vuns < global87.save.length); 5512 } 5513 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff; 5514 c.Iflags |= CFunambig; 5515 goto L2; 5516 5517 case FLoffset: 5518 break; 5519 5520 case FLlocalsize: 5521 c.IEV1.Vpointer += localsize; 5522 break; 5523 5524 case FLconst: 5525 default: 5526 goto do2; 5527 } 5528 c.IFL1 = FLconst; 5529 do2: 5530 /* Ignore TEST (F6 and F7) opcodes */ 5531 if (!(ins & T)) goto done; /* if no second operand */ 5532 s = c.IEV2.Vsym; 5533 switch (c.IFL2) 5534 { 5535 case FLdata: 5536 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5537 { 5538 c.IFL2 = FLextern; 5539 goto do2; 5540 } 5541 else 5542 { 5543 if (s.Sclass == SC.comdat) 5544 { c.IFL2 = FLextern; 5545 goto do2; 5546 } 5547 c.IEV2.Vseg = MARS ? s.Sseg : DATA; 5548 c.IEV2.Vpointer += s.Soffset; 5549 c.IFL2 = FLdatseg; 5550 goto done; 5551 } 5552 5553 case FLudata: 5554 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5555 { 5556 c.IFL2 = FLextern; 5557 goto do2; 5558 } 5559 else 5560 { 5561 c.IEV2.Vseg = MARS ? s.Sseg : UDATA; 5562 c.IEV2.Vpointer += s.Soffset; 5563 c.IFL2 = FLdatseg; 5564 goto done; 5565 } 5566 5567 case FLtlsdata: 5568 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5569 { 5570 c.IFL2 = FLextern; 5571 goto do2; 5572 } 5573 goto done; 5574 5575 case FLdatseg: 5576 //c.IEV2.Vseg = DATA; 5577 goto done; 5578 5579 case FLcsdata: 5580 case FLfardata: 5581 goto done; 5582 5583 case FLreg: 5584 case FLpseudo: 5585 assert(0); 5586 /* NOTREACHED */ 5587 5588 case FLfast: 5589 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff; 5590 break; 5591 5592 case FLauto: 5593 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff; 5594 L3: 5595 if (!hasframe || (enforcealign && c.IFL2 != FLpara)) 5596 /* Convert to ESP relative address instead of EBP */ 5597 c.IEV2.Vpointer += EBPtoESP; 5598 break; 5599 5600 case FLpara: 5601 c.IEV2.Vpointer += s.Soffset + Para.size; 5602 goto L3; 5603 5604 case FLfltreg: 5605 c.IEV2.Vpointer += Foff + BPoff; 5606 goto L3; 5607 5608 case FLallocatmp: 5609 c.IEV2.Vpointer += Alloca.offset + BPoff; 5610 goto L3; 5611 5612 case FLfuncarg: 5613 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff; 5614 goto L3; 5615 5616 case FLbprel: 5617 c.IEV2.Vpointer += s.Soffset; 5618 break; 5619 5620 case FLstack: 5621 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base; 5622 break; 5623 5624 case FLcs: 5625 case FLndp: 5626 case FLregsave: 5627 assert(0); 5628 5629 case FLconst: 5630 break; 5631 5632 case FLlocalsize: 5633 c.IEV2.Vpointer += localsize; 5634 break; 5635 5636 default: 5637 goto done; 5638 } 5639 c.IFL2 = FLconst; 5640 done: 5641 { } 5642 } 5643 } 5644 5645 /******************************* 5646 * Return offset from BP of symbol s. 5647 */ 5648 5649 @trusted 5650 targ_size_t cod3_bpoffset(Symbol *s) 5651 { 5652 targ_size_t offset; 5653 5654 symbol_debug(s); 5655 offset = s.Soffset; 5656 switch (s.Sfl) 5657 { 5658 case FLpara: 5659 offset += Para.size; 5660 break; 5661 5662 case FLfast: 5663 offset += Fast.size + BPoff; 5664 break; 5665 5666 case FLauto: 5667 offset += Auto.size + BPoff; 5668 break; 5669 5670 default: 5671 WRFL(cast(FL)s.Sfl); 5672 symbol_print(s); 5673 assert(0); 5674 } 5675 assert(hasframe); 5676 return offset; 5677 } 5678 5679 5680 /******************************* 5681 * Find shorter versions of the same instructions. 5682 * Does these optimizations: 5683 * replaces jmps to the next instruction with NOPs 5684 * sign extension of modregrm displacement 5685 * sign extension of immediate data (can't do it for OR, AND, XOR 5686 * as the opcodes are not defined) 5687 * short versions for AX EA 5688 * short versions for reg EA 5689 * Code is neither removed nor added. 5690 * Params: 5691 * b = block for code (or null) 5692 * c = code list to optimize 5693 */ 5694 5695 @trusted 5696 void pinholeopt(code *c,block *b) 5697 { 5698 targ_size_t a; 5699 uint mod; 5700 ubyte ins; 5701 int usespace; 5702 int useopsize; 5703 int space; 5704 block *bn; 5705 5706 debug 5707 { 5708 __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); } 5709 } 5710 5711 debug 5712 { 5713 code *cstart = c; 5714 if (debugc) 5715 { 5716 printf("+pinholeopt(%p)\n",c); 5717 } 5718 } 5719 5720 if (b) 5721 { 5722 bn = b.Bnext; 5723 usespace = (config.flags4 & CFG4space && b.BC != BCasm); 5724 useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm)); 5725 } 5726 else 5727 { 5728 bn = null; 5729 usespace = (config.flags4 & CFG4space); 5730 useopsize = (I16 || config.flags4 & CFG4space); 5731 } 5732 for (; c; c = code_next(c)) 5733 { 5734 L1: 5735 opcode_t op = c.Iop; 5736 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5737 ins = vex_inssize(c); 5738 else if ((op & 0xFFFD00) == 0x0F3800) 5739 ins = inssize2[(op >> 8) & 0xFF]; 5740 else if ((op & 0xFF00) == 0x0F00) 5741 ins = inssize2[op & 0xFF]; 5742 else 5743 ins = inssize[op & 0xFF]; 5744 if (ins & M) // if modregrm byte 5745 { 5746 int shortop = (c.Iflags & CFopsize) ? !I16 : I16; 5747 int local_BPRM = BPRM; 5748 5749 if (c.Iflags & CFaddrsize) 5750 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 5751 5752 uint rm = c.Irm; 5753 reg_t reg = rm & modregrm(0,7,0); // isolate reg field 5754 reg_t ereg = rm & 7; 5755 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 5756 5757 /* If immediate second operand */ 5758 if ((ins & T || 5759 ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0))) 5760 ) && 5761 c.IFL2 == FLconst) 5762 { 5763 int flags = c.Iflags & CFpsw; /* if want result in flags */ 5764 targ_long u = c.IEV2.Vuns; 5765 if (ins & E) 5766 u = cast(byte) u; 5767 else if (shortop) 5768 u = cast(short) u; 5769 5770 // Replace CMP reg,0 with TEST reg,reg 5771 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 5772 rm >= modregrm(3,7,AX) && 5773 u == 0) 5774 { 5775 c.Iop = (op & 1) | 0x84; 5776 c.Irm = modregrm(3,ereg,ereg); 5777 if (c.Irex & REX_B) 5778 c.Irex |= REX_R; 5779 goto L1; 5780 } 5781 5782 /* Optimize ANDs with an immediate constant */ 5783 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 5784 { 5785 if (rm >= modregrm(3,4,AX)) // AND reg,imm 5786 { 5787 if (u == 0) 5788 { 5789 /* Replace with XOR reg,reg */ 5790 c.Iop = 0x30 | (op & 1); 5791 c.Irm = modregrm(3,ereg,ereg); 5792 if (c.Irex & REX_B) 5793 c.Irex |= REX_R; 5794 goto L1; 5795 } 5796 if (u == 0xFFFFFFFF && !flags) 5797 { 5798 c.Iop = NOP; 5799 goto L1; 5800 } 5801 } 5802 if (op == 0x81 && !flags) 5803 { // If we can do the operation in one byte 5804 5805 // If EA is not SI or DI 5806 if ((rm < modregrm(3,4,SP) || I64) && 5807 (config.flags4 & CFG4space || 5808 config.target_cpu < TARGET_PentiumPro) 5809 ) 5810 { 5811 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 5812 goto L2; 5813 else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)) 5814 { 5815 if (!shortop) 5816 { 5817 if ((u & 0xFFFF00FF) == 0xFFFF00FF) 5818 goto L3; 5819 } 5820 else 5821 { 5822 if ((u & 0xFF) == 0xFF) 5823 goto L3; 5824 } 5825 } 5826 } 5827 if (!shortop && useopsize) 5828 { 5829 if ((u & 0xFFFF0000) == 0xFFFF0000) 5830 { 5831 c.Iflags ^= CFopsize; 5832 goto L1; 5833 } 5834 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 5835 { 5836 c.IEV1.Voffset += 2; /* address MSW */ 5837 c.IEV2.Vuns >>= 16; 5838 c.Iflags ^= CFopsize; 5839 goto L1; 5840 } 5841 if (rm >= modregrm(3,4,AX)) 5842 { 5843 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 5844 { 5845 c.Iop = MOVZXb; // MOVZX 5846 c.Irm = modregrm(3,ereg,ereg); 5847 if (c.Irex & REX_B) 5848 c.Irex |= REX_R; 5849 goto L1; 5850 } 5851 if (u == 0xFFFF) 5852 { 5853 c.Iop = MOVZXw; // MOVZX 5854 c.Irm = modregrm(3,ereg,ereg); 5855 if (c.Irex & REX_B) 5856 c.Irex |= REX_R; 5857 goto L1; 5858 } 5859 } 5860 } 5861 } 5862 } 5863 5864 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 5865 if (!flags && 5866 (op == 0x81 || op == 0x80) && 5867 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 5868 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 5869 ) 5870 { 5871 if (u == 0) 5872 { 5873 c.Iop = NOP; 5874 goto L1; 5875 } 5876 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 5877 { 5878 c.Iop = 0xF6 | (op & 1); /* NOT */ 5879 c.Irm ^= modregrm(0,6^2,0); 5880 goto L1; 5881 } 5882 if (!shortop && 5883 useopsize && 5884 op == 0x81 && 5885 (u & 0xFFFF0000) == 0 && 5886 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 5887 { 5888 c.Iflags ^= CFopsize; 5889 goto L1; 5890 } 5891 } 5892 5893 /* Look for TEST or OR or XOR with an immediate constant */ 5894 /* that we can replace with a byte operation */ 5895 if (op == 0xF7 && reg == modregrm(0,0,0) || 5896 op == 0x81 && reg == modregrm(0,6,0) && !flags || 5897 op == 0x81 && reg == modregrm(0,1,0)) 5898 { 5899 // See if we can replace a dword with a word 5900 // (avoid for 32 bit instructions, because CFopsize 5901 // is too slow) 5902 if (!shortop && useopsize) 5903 { 5904 if ((u & 0xFFFF0000) == 0) 5905 { 5906 c.Iflags ^= CFopsize; 5907 goto L1; 5908 } 5909 /* If memory (not register) addressing mode */ 5910 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 5911 { 5912 c.IEV1.Voffset += 2; /* address MSW */ 5913 c.IEV2.Vuns >>= 16; 5914 c.Iflags ^= CFopsize; 5915 goto L1; 5916 } 5917 } 5918 5919 // If EA is not SI or DI 5920 if (rm < (modregrm(3,0,SP) | reg) && 5921 (usespace || 5922 config.target_cpu < TARGET_PentiumPro) 5923 ) 5924 { 5925 if ((u & 0xFFFFFF00) == 0) 5926 { 5927 L2: c.Iop--; /* to byte instruction */ 5928 c.Iflags &= ~CFopsize; 5929 goto L1; 5930 } 5931 if (((u & 0xFFFF00FF) == 0 || 5932 (shortop && (u & 0xFF) == 0)) && 5933 (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))) 5934 { 5935 L3: 5936 c.IEV2.Vuns >>= 8; 5937 if (rm >= (modregrm(3,0,AX) | reg)) 5938 c.Irm |= 4; /* AX.AH, BX.BH, etc. */ 5939 else 5940 c.IEV1.Voffset += 1; 5941 goto L2; 5942 } 5943 } 5944 5945 // BUG: which is right? 5946 //else if ((u & 0xFFFF0000) == 0) 5947 5948 else if (0 && op == 0xF7 && 5949 rm >= modregrm(3,0,SP) && 5950 (u & 0xFFFF0000) == 0) 5951 5952 c.Iflags &= ~CFopsize; 5953 } 5954 5955 // Try to replace TEST reg,-1 with TEST reg,reg 5956 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 5957 { 5958 if ((u & 0xFF) == 0xFF) 5959 { 5960 L4: 5961 c.Iop = 0x84; // TEST regL,regL 5962 c.Irm = modregrm(3,ereg,ereg); 5963 if (c.Irex & REX_B) 5964 c.Irex |= REX_R; 5965 c.Iflags &= ~CFopsize; 5966 goto L1; 5967 } 5968 } 5969 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 5970 { 5971 if (u == 0xFF) 5972 { 5973 if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix 5974 c.Irex |= REX; 5975 goto L4; 5976 } 5977 if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4) 5978 { 5979 ereg |= 4; /* to regH */ 5980 goto L4; 5981 } 5982 } 5983 5984 /* Look for sign extended immediate data */ 5985 if (cast(byte) u == u) 5986 { 5987 if (op == 0x81) 5988 { 5989 if (reg != 0x08 && reg != 0x20 && reg != 0x30) 5990 c.Iop = op = 0x83; /* 8 bit sgn ext */ 5991 } 5992 else if (op == 0x69) /* IMUL rw,ew,dw */ 5993 c.Iop = op = 0x6B; /* IMUL rw,ew,db */ 5994 } 5995 5996 // Look for SHIFT EA,imm8 we can replace with short form 5997 if (u == 1 && ((op & 0xFE) == 0xC0)) 5998 c.Iop |= 0xD0; 5999 6000 } /* if immediate second operand */ 6001 6002 /* Look for AX short form */ 6003 if (ins & A) 6004 { 6005 if (rm == modregrm(0,AX,local_BPRM) && 6006 !(c.Irex & REX_R) && // and it's AX, not R8 6007 (op & ~3) == 0x88 && 6008 !I64) 6009 { 6010 op = ((op & 3) + 0xA0) ^ 2; 6011 /* 8A. A0 */ 6012 /* 8B. A1 */ 6013 /* 88. A2 */ 6014 /* 89. A3 */ 6015 c.Iop = op; 6016 c.IFL2 = c.IFL1; 6017 c.IEV2 = c.IEV1; 6018 } 6019 6020 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 6021 else if (!I16 && 6022 (op == 0x89 || op == 0x8B) && 6023 (rm & 0xC0) == 0xC0 && 6024 (!b || b.BC != BCasm) 6025 ) 6026 c.Iflags &= ~CFopsize; 6027 6028 // If rm is AX 6029 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B))) 6030 { 6031 switch (op) 6032 { 6033 case 0x80: op = reg | 4; break; 6034 case 0x81: op = reg | 5; break; 6035 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 6036 6037 case 0xF6: 6038 if (reg == 0) 6039 op = 0xA8; /* TEST AL,immed8 */ 6040 break; 6041 6042 case 0xF7: 6043 if (reg == 0) 6044 op = 0xA9; /* TEST AX,immed16 */ 6045 break; 6046 6047 default: 6048 break; 6049 } 6050 c.Iop = op; 6051 } 6052 } 6053 6054 /* Look for reg short form */ 6055 if ((ins & R) && (rm & 0xC0) == 0xC0) 6056 { 6057 switch (op) 6058 { 6059 case 0xC6: op = 0xB0 + ereg; break; 6060 case 0xC7: // if no sign extension 6061 if (!(c.Irex & REX_W && c.IEV2.Vint < 0)) 6062 { 6063 c.Irm = 0; 6064 c.Irex &= ~REX_W; 6065 op = 0xB8 + ereg; 6066 } 6067 break; 6068 6069 case 0xFF: 6070 switch (reg) 6071 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 6072 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 6073 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 6074 default: break; 6075 } 6076 break; 6077 6078 case 0x8F: op = 0x58 + ereg; break; 6079 case 0x87: 6080 if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8 6081 op = 0x90 + ereg; 6082 break; 6083 6084 default: 6085 break; 6086 } 6087 c.Iop = op; 6088 } 6089 6090 // Look to remove redundant REX prefix on XOR 6091 if (c.Irex == REX_W // ignore ops involving R8..R15 6092 && (op == 0x31 || op == 0x33) // XOR 6093 && ((rm & 0xC0) == 0xC0) // register direct 6094 && ((reg >> 3) == ereg)) // register with itself 6095 { 6096 c.Irex = 0; 6097 } 6098 6099 // Look to replace SHL reg,1 with ADD reg,reg 6100 if ((op & ~1) == 0xD0 && 6101 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 6102 config.target_cpu >= TARGET_80486) 6103 { 6104 c.Iop &= 1; 6105 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3)); 6106 if (c.Irex & REX_B) 6107 c.Irex |= REX_R; 6108 if (!(c.Iflags & CFpsw) && !I16) 6109 c.Iflags &= ~CFopsize; 6110 goto L1; 6111 } 6112 6113 /* Look for sign extended modregrm displacement, or 0 6114 * displacement. 6115 */ 6116 6117 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 6118 c.IFL1 == FLconst) // and it's a constant 6119 { 6120 a = c.IEV1.Vpointer; 6121 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 6122 !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP) 6123 ) 6124 c.Irm &= 0x3F; 6125 else if (!I16) 6126 { 6127 if (cast(targ_size_t)cast(targ_schar)a == a) 6128 c.Irm ^= 0xC0; /* do 8 sx */ 6129 } 6130 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 6131 c.Irm ^= 0xC0; /* do 8 sx */ 6132 } 6133 6134 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 6135 if (op == LEA) 6136 { 6137 rm = c.Irm & 7; 6138 mod = c.Irm & modregrm(3,0,0); 6139 if (mod == 0) 6140 { 6141 if (!I16) 6142 { 6143 switch (rm) 6144 { 6145 case 4: 6146 case 5: 6147 break; 6148 6149 default: 6150 c.Irm |= modregrm(3,0,0); 6151 c.Iop = 0x8B; 6152 break; 6153 } 6154 } 6155 else 6156 { 6157 switch (rm) 6158 { 6159 case 4: rm = modregrm(3,0,SI); goto L6; 6160 case 5: rm = modregrm(3,0,DI); goto L6; 6161 case 7: rm = modregrm(3,0,BX); goto L6; 6162 L6: c.Irm = cast(ubyte)(rm + reg); 6163 c.Iop = 0x8B; 6164 break; 6165 6166 default: 6167 break; 6168 } 6169 } 6170 } 6171 6172 /* replace LEA reg,0[BP] with MOV reg,BP */ 6173 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 6174 c.IFL1 == FLconst && c.IEV1.Vpointer == 0) 6175 { 6176 c.Iop = 0x8B; /* MOV reg,BP */ 6177 c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg); 6178 } 6179 } 6180 6181 // Replace [R13] with 0[R13] 6182 if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) || 6183 issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP)) 6184 { 6185 c.Irm |= modregrm(1,0,0); 6186 c.IFL1 = FLconst; 6187 c.IEV1.Vpointer = 0; 6188 } 6189 } 6190 else if (!(c.Iflags & CFvex)) 6191 { 6192 switch (op) 6193 { 6194 default: 6195 // Look for MOV r64, immediate 6196 if ((c.Irex & REX_W) && (op & ~7) == 0xB8) 6197 { 6198 /* Look for zero extended immediate data */ 6199 if (c.IEV2.Vsize_t == c.IEV2.Vuns) 6200 { 6201 c.Irex &= ~REX_W; 6202 } 6203 /* Look for sign extended immediate data */ 6204 else if (c.IEV2.Vsize_t == c.IEV2.Vint) 6205 { 6206 c.Irm = modregrm(3,0,op & 7); 6207 c.Iop = op = 0xC7; 6208 c.IEV2.Vsize_t = c.IEV2.Vuns; 6209 } 6210 } 6211 if ((op & ~0x0F) != 0x70) 6212 break; 6213 goto case JMP; 6214 6215 case JMP: 6216 switch (c.IFL2) 6217 { 6218 case FLcode: 6219 if (c.IEV2.Vcode == code_next(c)) 6220 { 6221 c.Iop = NOP; 6222 continue; 6223 } 6224 break; 6225 6226 case FLblock: 6227 if (!code_next(c) && c.IEV2.Vblock == bn) 6228 { 6229 c.Iop = NOP; 6230 continue; 6231 } 6232 break; 6233 6234 case FLconst: 6235 case FLfunc: 6236 case FLextern: 6237 break; 6238 6239 default: 6240 WRFL(cast(FL)c.IFL2); 6241 assert(0); 6242 } 6243 break; 6244 6245 case 0x68: // PUSH immed16 6246 if (c.IFL2 == FLconst) 6247 { 6248 targ_long u = c.IEV2.Vuns; 6249 if (I64 || 6250 ((c.Iflags & CFopsize) ? I16 : I32)) 6251 { // PUSH 32/64 bit operand 6252 if (u == cast(byte) u) 6253 c.Iop = 0x6A; // PUSH immed8 6254 } 6255 else // PUSH 16 bit operand 6256 { 6257 if (cast(short)u == cast(byte) u) 6258 c.Iop = 0x6A; // PUSH immed8 6259 } 6260 } 6261 break; 6262 } 6263 } 6264 } 6265 6266 debug 6267 if (debugc) 6268 { 6269 printf("-pinholeopt(%p)\n",cstart); 6270 for (c = cstart; c; c = code_next(c)) 6271 code_print(c); 6272 } 6273 } 6274 6275 6276 debug 6277 { 6278 @trusted 6279 private void pinholeopt_unittest() 6280 { 6281 //printf("pinholeopt_unittest()\n"); 6282 static struct CS 6283 { 6284 uint model,op,ea; 6285 targ_size_t ev1,ev2; 6286 uint flags; 6287 } 6288 __gshared CS[2][22] tests = 6289 [ 6290 // XOR reg,immed NOT regL 6291 [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF } ], 6292 6293 // MOV 0[BX],3 MOV [BX],3 6294 [ { 16,0xC7,modregrm(2,0,7),0,3 }, { 0,0xC7,modregrm(0,0,7),0,3 } ], 6295 6296 /+ // only if config.flags4 & CFG4space 6297 // TEST regL,immed8 6298 [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6299 [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6300 [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6301 [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6302 +/ 6303 6304 // PUSH immed => PUSH immed8 6305 [ { 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }], 6306 [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }], 6307 [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }], 6308 [ { 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6309 [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6310 [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6311 [ { 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }], 6312 [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }], 6313 [ { 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6314 [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6315 [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6316 [ { 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }], 6317 [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }], 6318 6319 // clear r64, for r64 != R8..R15 6320 [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}], 6321 [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}], 6322 6323 // MOV r64, immed 6324 [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}], 6325 [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}], 6326 [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }], 6327 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }], 6328 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}], 6329 ]; 6330 6331 //config.flags4 |= CFG4space; 6332 for (int i = 0; i < tests.length; i++) 6333 { CS *pin = &tests[i][0]; 6334 CS *pout = &tests[i][1]; 6335 code cs = void; 6336 memset(&cs, 0, cs.sizeof); 6337 if (pin.model) 6338 { 6339 if (I16 && pin.model != 16) 6340 continue; 6341 if (I32 && pin.model != 32) 6342 continue; 6343 if (I64 && pin.model != 64) 6344 continue; 6345 } 6346 //printf("[%d]\n", i); 6347 cs.Iop = pin.op; 6348 cs.Iea = pin.ea; 6349 cs.IFL1 = FLconst; 6350 cs.IFL2 = FLconst; 6351 cs.IEV1.Vsize_t = pin.ev1; 6352 cs.IEV2.Vsize_t = pin.ev2; 6353 cs.Iflags = pin.flags; 6354 pinholeopt(&cs, null); 6355 if (cs.Iop != pout.op) 6356 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op); 6357 assert(0); 6358 } 6359 assert(cs.Iea == pout.ea); 6360 assert(cs.IEV1.Vsize_t == pout.ev1); 6361 assert(cs.IEV2.Vsize_t == pout.ev2); 6362 assert(cs.Iflags == pout.flags); 6363 } 6364 } 6365 } 6366 6367 @trusted 6368 void simplify_code(code* c) 6369 { 6370 reg_t reg; 6371 if (config.flags4 & CFG4optimized && 6372 (c.Iop == 0x81 || c.Iop == 0x80) && 6373 c.IFL2 == FLconst && 6374 reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,®) && 6375 !(I16 && c.Iflags & CFopsize) 6376 ) 6377 { 6378 // See if we can replace immediate instruction with register instruction 6379 static immutable ubyte[8] regop = 6380 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ]; 6381 6382 //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong); 6383 c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1); 6384 code_newreg(c, reg); 6385 if (I64 && !(c.Iop & 1) && (reg & 4)) 6386 c.Irex |= REX; 6387 } 6388 } 6389 6390 /************************** 6391 * Compute jump addresses for FLcode. 6392 * Note: only works for forward referenced code. 6393 * only direct jumps and branches are detected. 6394 * LOOP instructions only work for backward refs. 6395 */ 6396 6397 @trusted 6398 void jmpaddr(code *c) 6399 { 6400 code* ci,cn,ctarg,cstart; 6401 targ_size_t ad; 6402 6403 //printf("jmpaddr()\n"); 6404 cstart = c; /* remember start of code */ 6405 while (c) 6406 { 6407 const op = c.Iop; 6408 if (op <= 0xEB && 6409 inssize[op] & T && // if second operand 6410 c.IFL2 == FLcode && 6411 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL)) 6412 { 6413 ci = code_next(c); 6414 ctarg = c.IEV2.Vcode; /* target code */ 6415 ad = 0; /* IP displacement */ 6416 while (ci && ci != ctarg) 6417 { 6418 ad += calccodsize(ci); 6419 ci = code_next(ci); 6420 } 6421 if (!ci) 6422 goto Lbackjmp; // couldn't find it 6423 if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL) 6424 c.IEV2.Vpointer = ad; 6425 else /* else conditional */ 6426 { 6427 if (!(c.Iflags & CFjmp16)) /* if branch */ 6428 c.IEV2.Vpointer = ad; 6429 else /* branch around a long jump */ 6430 { 6431 cn = code_next(c); 6432 c.next = code_calloc(); 6433 code_next(c).next = cn; 6434 c.Iop = op ^ 1; /* converse jmp */ 6435 c.Iflags &= ~CFjmp16; 6436 c.IEV2.Vpointer = I16 ? 3 : 5; 6437 cn = code_next(c); 6438 cn.Iop = JMP; /* long jump */ 6439 cn.IFL2 = FLconst; 6440 cn.IEV2.Vpointer = ad; 6441 } 6442 } 6443 c.IFL2 = FLconst; 6444 } 6445 if (op == LOOP && c.IFL2 == FLcode) /* backwards refs */ 6446 { 6447 Lbackjmp: 6448 ctarg = c.IEV2.Vcode; 6449 for (ci = cstart; ci != ctarg; ci = code_next(ci)) 6450 if (!ci || ci == c) 6451 assert(0); 6452 ad = 2; /* - IP displacement */ 6453 while (ci != c) 6454 { 6455 assert(ci); 6456 ad += calccodsize(ci); 6457 ci = code_next(ci); 6458 } 6459 c.IEV2.Vpointer = (-ad) & 0xFF; 6460 c.IFL2 = FLconst; 6461 } 6462 c = code_next(c); 6463 } 6464 } 6465 6466 /******************************* 6467 * Calculate bl.Bsize. 6468 */ 6469 6470 uint calcblksize(code *c) 6471 { 6472 uint size; 6473 for (size = 0; c; c = code_next(c)) 6474 { 6475 uint sz = calccodsize(c); 6476 //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop); 6477 size += sz; 6478 } 6479 //printf("calcblksize(c = x%x) = %d\n", c, size); 6480 return size; 6481 } 6482 6483 /***************************** 6484 * Calculate and return code size of a code. 6485 * Note that NOPs are sometimes used as markers, but are 6486 * never output. LINNUMs are never output. 6487 * Note: This routine must be fast. Profiling shows it is significant. 6488 */ 6489 6490 @trusted 6491 uint calccodsize(code *c) 6492 { 6493 uint size; 6494 ubyte rm,mod,ins; 6495 uint iflags; 6496 uint i32 = I32 || I64; 6497 uint a32 = i32; 6498 6499 debug 6500 assert((a32 & ~1) == 0); 6501 6502 iflags = c.Iflags; 6503 opcode_t op = c.Iop; 6504 //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags); 6505 if (iflags & CFvex && c.Ivex.pfx == 0xC4) 6506 { 6507 ins = vex_inssize(c); 6508 size = ins & 7; 6509 goto Lmodrm; 6510 } 6511 else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800) 6512 op = 0x0F; 6513 else 6514 op &= 0xFF; 6515 switch (op) 6516 { 6517 case 0x0F: 6518 if ((c.Iop & 0xFFFD00) == 0x0F3800) 6519 { // 3 byte op ( 0F38-- or 0F3A-- ) 6520 ins = inssize2[(c.Iop >> 8) & 0xFF]; 6521 size = ins & 7; 6522 if (c.Iop & 0xFF000000) 6523 size++; 6524 } 6525 else 6526 { // 2 byte op ( 0F-- ) 6527 ins = inssize2[c.Iop & 0xFF]; 6528 size = ins & 7; 6529 if (c.Iop & 0xFF0000) 6530 size++; 6531 } 6532 break; 6533 6534 case 0x90: 6535 size = (c.Iop == PAUSE) ? 2 : 1; 6536 goto Lret2; 6537 6538 case NOP: 6539 case ESCAPE: 6540 size = 0; // since these won't be output 6541 goto Lret2; 6542 6543 case ASM: 6544 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6545 size = _tysize[TYnptr]; 6546 else 6547 size = cast(uint)c.IEV1.len; 6548 goto Lret2; 6549 6550 case 0xA1: 6551 case 0xA3: 6552 if (c.Irex) 6553 { 6554 size = 9; // 64 bit immediate value for MOV to/from RAX 6555 goto Lret; 6556 } 6557 goto Ldefault; 6558 6559 case 0xF6: /* TEST mem8,immed8 */ 6560 ins = inssize[op]; 6561 size = ins & 7; 6562 if (i32) 6563 size = inssize32[op]; 6564 if ((c.Irm & (7<<3)) == 0) 6565 size++; /* size of immed8 */ 6566 break; 6567 6568 case 0xF7: 6569 ins = inssize[op]; 6570 size = ins & 7; 6571 if (i32) 6572 size = inssize32[op]; 6573 if ((c.Irm & (7<<3)) == 0) 6574 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2; 6575 break; 6576 6577 default: 6578 Ldefault: 6579 ins = inssize[op]; 6580 size = ins & 7; 6581 if (i32) 6582 size = inssize32[op]; 6583 } 6584 6585 if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG)) 6586 { 6587 if (iflags & CFwait) // if add FWAIT prefix 6588 size++; 6589 if (iflags & CFSEG) // if segment override 6590 size++; 6591 6592 // If the instruction has a second operand that is not an 8 bit, 6593 // and the operand size prefix is present, then fix the size computation 6594 // because the operand size will be different. 6595 // Walter, I had problems with this bit at the end. There can still be 6596 // an ADDRSIZE prefix for these and it does indeed change the operand size. 6597 6598 if (iflags & (CFopsize | CFaddrsize)) 6599 { 6600 if ((ins & (T|E)) == T) 6601 { 6602 if ((op & 0xAC) == 0xA0) 6603 { 6604 if (iflags & CFaddrsize && !I64) 6605 { if (I32) 6606 size -= 2; 6607 else 6608 size += 2; 6609 } 6610 } 6611 else if (iflags & CFopsize) 6612 { if (I16) 6613 size += 2; 6614 else 6615 size -= 2; 6616 } 6617 } 6618 if (iflags & CFaddrsize) 6619 { if (!I64) 6620 a32 ^= 1; 6621 size++; 6622 } 6623 if (iflags & CFopsize) 6624 size++; /* +1 for OPSIZE prefix */ 6625 } 6626 } 6627 6628 Lmodrm: 6629 if ((op & ~0x0F) == 0x70) 6630 { 6631 if (iflags & CFjmp16) // if long branch 6632 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 6633 } 6634 else if (ins & M) // if modregrm byte 6635 { 6636 rm = c.Irm; 6637 mod = rm & 0xC0; 6638 if (a32 || I64) 6639 { // 32 bit addressing 6640 if (issib(rm)) 6641 size++; 6642 switch (mod) 6643 { case 0: 6644 if (issib(rm) && (c.Isib & 7) == 5 || 6645 (rm & 7) == 5) 6646 size += 4; /* disp32 */ 6647 if (c.Irex & REX_B && (rm & 7) == 5) 6648 /* Instead of selecting R13, this mode is an [RIP] relative 6649 * address. Although valid, it's redundant, and should not 6650 * be generated. Instead, generate 0[R13] instead of [R13]. 6651 */ 6652 assert(0); 6653 break; 6654 6655 case 0x40: 6656 size++; /* disp8 */ 6657 break; 6658 6659 case 0x80: 6660 size += 4; /* disp32 */ 6661 break; 6662 6663 default: 6664 break; 6665 } 6666 } 6667 else 6668 { // 16 bit addressing 6669 if (mod == 0x40) /* 01: 8 bit displacement */ 6670 size++; 6671 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 6672 size += 2; 6673 } 6674 } 6675 6676 Lret: 6677 if (!(iflags & CFvex) && c.Irex) 6678 { 6679 size++; 6680 if (c.Irex & REX_W && (op & ~7) == 0xB8) 6681 size += 4; 6682 } 6683 Lret2: 6684 //printf("op = x%02x, size = %d\n",op,size); 6685 return size; 6686 } 6687 6688 /******************************** 6689 * Return !=0 if codes match. 6690 */ 6691 6692 static if (0) 6693 { 6694 6695 int code_match(code *c1,code *c2) 6696 { 6697 code cs1,cs2; 6698 ubyte ins; 6699 6700 if (c1 == c2) 6701 goto match; 6702 cs1 = *c1; 6703 cs2 = *c2; 6704 if (cs1.Iop != cs2.Iop) 6705 goto nomatch; 6706 switch (cs1.Iop) 6707 { 6708 case ESCAPE | ESCctor: 6709 case ESCAPE | ESCdtor: 6710 goto nomatch; 6711 6712 case NOP: 6713 goto match; 6714 6715 case ASM: 6716 if (cs1.IEV1.len == cs2.IEV1.len && 6717 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0) 6718 goto match; 6719 else 6720 goto nomatch; 6721 6722 default: 6723 if ((cs1.Iop & 0xFF) == ESCAPE) 6724 goto match; 6725 break; 6726 } 6727 if (cs1.Iflags != cs2.Iflags) 6728 goto nomatch; 6729 6730 ins = inssize[cs1.Iop & 0xFF]; 6731 if ((cs1.Iop & 0xFFFD00) == 0x0F3800) 6732 { 6733 ins = inssize2[(cs1.Iop >> 8) & 0xFF]; 6734 } 6735 else if ((cs1.Iop & 0xFF00) == 0x0F00) 6736 { 6737 ins = inssize2[cs1.Iop & 0xFF]; 6738 } 6739 6740 if (ins & M) // if modregrm byte 6741 { 6742 if (cs1.Irm != cs2.Irm) 6743 goto nomatch; 6744 if ((cs1.Irm & 0xC0) == 0xC0) 6745 goto do2; 6746 if (is32bitaddr(I32,cs1.Iflags)) 6747 { 6748 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 6749 goto nomatch; 6750 if ( 6751 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 6752 ) 6753 goto do2; /* if no first operand */ 6754 } 6755 else 6756 { 6757 if ( 6758 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 6759 ) 6760 goto do2; /* if no first operand */ 6761 } 6762 if (cs1.IFL1 != cs2.IFL1) 6763 goto nomatch; 6764 if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym) 6765 goto nomatch; 6766 if (cs1.IEV1.Voffset != cs2.IEV1.Voffset) 6767 goto nomatch; 6768 } 6769 6770 do2: 6771 if (!(ins & T)) // if no second operand 6772 goto match; 6773 if (cs1.IFL2 != cs2.IFL2) 6774 goto nomatch; 6775 if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym) 6776 goto nomatch; 6777 if (cs1.IEV2.Voffset != cs2.IEV2.Voffset) 6778 goto nomatch; 6779 6780 match: 6781 return 1; 6782 6783 nomatch: 6784 return 0; 6785 } 6786 6787 } 6788 6789 /************************ 6790 * Little buffer allocated on the stack to accumulate instruction bytes to 6791 * later be sent along to objmod 6792 */ 6793 private struct MiniCodeBuf 6794 { 6795 nothrow: 6796 uint index; 6797 uint offset; 6798 int seg; 6799 Barray!ubyte* disasmBuf; 6800 ubyte[256] bytes; // = void; 6801 6802 @trusted 6803 this(int seg) 6804 { 6805 index = 0; 6806 this.offset = cast(uint)Offset(seg); 6807 this.seg = seg; 6808 } 6809 6810 @trusted 6811 void flushx() 6812 { 6813 // Emit accumulated bytes to code segment 6814 debug assert(index < bytes.length); 6815 6816 if (disasmBuf) // write to buffer for disassembly 6817 { 6818 foreach (c; bytes[0 .. index]) // not efficient, but for verbose output anyway 6819 disasmBuf.push(c); 6820 } 6821 6822 offset += objmod.bytes(seg, offset, index, bytes.ptr); 6823 index = 0; 6824 } 6825 6826 @trusted 6827 void gen(ubyte c) { bytes[index++] = c; } 6828 6829 @trusted 6830 void genp(uint n, void *p) { memcpy(&bytes[index], p, n); index += n; } 6831 6832 @trusted 6833 void flush() { if (index) flushx(); } 6834 6835 @trusted 6836 uint getOffset() { return offset + index; } 6837 6838 @trusted 6839 uint available() { return cast(uint)bytes.length - index; } 6840 6841 /****************************** 6842 * write64/write32/write16 write `value` to `disasmBuf` 6843 */ 6844 @trusted 6845 void write64(ulong value) 6846 { 6847 if (disasmBuf) 6848 { 6849 disasmBuf.push(cast(ubyte)value); 6850 disasmBuf.push(cast(ubyte)(value >> 8)); 6851 disasmBuf.push(cast(ubyte)(value >> 16)); 6852 disasmBuf.push(cast(ubyte)(value >> 24)); 6853 disasmBuf.push(cast(ubyte)(value >> 32)); 6854 disasmBuf.push(cast(ubyte)(value >> 36)); 6855 disasmBuf.push(cast(ubyte)(value >> 40)); 6856 disasmBuf.push(cast(ubyte)(value >> 44)); 6857 } 6858 } 6859 6860 pragma(inline, true) 6861 @trusted 6862 void write32(uint value) 6863 { 6864 if (disasmBuf) 6865 { 6866 disasmBuf.push(cast(ubyte)value); 6867 disasmBuf.push(cast(ubyte)(value >> 8)); 6868 disasmBuf.push(cast(ubyte)(value >> 16)); 6869 disasmBuf.push(cast(ubyte)(value >> 24)); 6870 } 6871 } 6872 6873 pragma(inline, true) 6874 @trusted 6875 void write16(uint value) 6876 { 6877 if (disasmBuf) 6878 { 6879 disasmBuf.push(cast(ubyte)value); 6880 disasmBuf.push(cast(ubyte)(value >> 8)); 6881 } 6882 } 6883 } 6884 6885 /************************** 6886 * Convert instructions to object code and write them to objmod. 6887 * Params: 6888 * seg = code segment to write to, code starts at Offset(seg) 6889 * c = list of instructions to write 6890 * disasmBuf = if not null, then also write object code here 6891 * Returns: 6892 * offset of end of code emitted 6893 */ 6894 6895 @trusted 6896 uint codout(int seg, code *c, Barray!ubyte* disasmBuf) 6897 { 6898 ubyte rm,mod; 6899 ubyte ins; 6900 code *cn; 6901 uint flags; 6902 Symbol *s; 6903 6904 debug 6905 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); 6906 6907 MiniCodeBuf ggen = void; 6908 ggen.index = 0; 6909 ggen.offset = cast(uint)Offset(seg); 6910 ggen.seg = seg; 6911 ggen.disasmBuf = disasmBuf; 6912 6913 for (; c; c = code_next(c)) 6914 { 6915 debug 6916 { 6917 if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); } 6918 uint startoffset = ggen.getOffset(); 6919 } 6920 6921 opcode_t op = c.Iop; 6922 ins = inssize[op & 0xFF]; 6923 switch (op & 0xFF) 6924 { 6925 case ESCAPE: 6926 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */ 6927 if(op == 0x660F383E || c.Iflags & CFvex) break; 6928 6929 switch (op & 0xFFFF00) 6930 { case ESClinnum: 6931 /* put out line number stuff */ 6932 objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); 6933 break; 6934 version (SCPP) 6935 { 6936 static if (1) 6937 { 6938 case ESCctor: 6939 case ESCdtor: 6940 case ESCoffset: 6941 if (config.exe != EX_WIN32) 6942 except_pair_setoffset(c,ggen.getOffset() - funcoffset); 6943 break; 6944 6945 case ESCmark: 6946 case ESCrelease: 6947 case ESCmark2: 6948 case ESCrelease2: 6949 break; 6950 } 6951 else 6952 { 6953 case ESCctor: 6954 except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6955 break; 6956 6957 case ESCdtor: 6958 except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6959 break; 6960 6961 case ESCmark: 6962 except_mark(); 6963 break; 6964 6965 case ESCrelease: 6966 except_release(); 6967 break; 6968 } 6969 } 6970 case ESCadjesp: 6971 //printf("adjust ESP %ld\n", cast(long)c.IEV1.Vint); 6972 break; 6973 6974 default: 6975 break; 6976 } 6977 6978 debug 6979 assert(calccodsize(c) == 0); 6980 6981 continue; 6982 6983 case NOP: /* don't send them out */ 6984 if (op != NOP) 6985 break; 6986 debug 6987 assert(calccodsize(c) == 0); 6988 6989 continue; 6990 6991 case ASM: 6992 if (op != ASM) 6993 break; 6994 ggen.flush(); 6995 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6996 { 6997 do32bit(&ggen, FLblockoff,&c.IEV1,0,0); 6998 } 6999 else 7000 { 7001 ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); 7002 } 7003 debug 7004 assert(calccodsize(c) == c.IEV1.len); 7005 7006 continue; 7007 7008 default: 7009 break; 7010 } 7011 flags = c.Iflags; 7012 7013 // See if we need to flush (don't have room for largest code sequence) 7014 if (ggen.available() < (1+4+4+8+8)) 7015 ggen.flush(); 7016 7017 // see if we need to put out prefix bytes 7018 if (flags & (CFwait | CFPREFIX | CFjmp16)) 7019 { 7020 int override_; 7021 7022 if (flags & CFwait) 7023 ggen.gen(0x9B); // FWAIT 7024 /* ? SEGES : SEGSS */ 7025 switch (flags & CFSEG) 7026 { case CFes: override_ = SEGES; goto segover; 7027 case CFss: override_ = SEGSS; goto segover; 7028 case CFcs: override_ = SEGCS; goto segover; 7029 case CFds: override_ = SEGDS; goto segover; 7030 case CFfs: override_ = SEGFS; goto segover; 7031 case CFgs: override_ = SEGGS; goto segover; 7032 segover: ggen.gen(cast(ubyte)override_); 7033 break; 7034 7035 default: break; 7036 } 7037 7038 if (flags & CFaddrsize) 7039 ggen.gen(0x67); 7040 7041 // Do this last because of instructions like ADDPD 7042 if (flags & CFopsize) 7043 ggen.gen(0x66); /* operand size */ 7044 7045 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 7046 { 7047 if (!I16) 7048 { // Put out 16 bit conditional jump 7049 c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 7050 } 7051 else 7052 { 7053 cn = code_calloc(); 7054 /*cxcalloc++;*/ 7055 cn.next = code_next(c); 7056 c.next= cn; // link into code 7057 cn.Iop = JMP; // JMP block 7058 cn.IFL2 = c.IFL2; 7059 cn.IEV2.Vblock = c.IEV2.Vblock; 7060 c.Iop = op ^= 1; // toggle condition 7061 c.IFL2 = FLconst; 7062 c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block 7063 c.Iflags &= ~CFjmp16; 7064 } 7065 } 7066 } 7067 7068 if (flags & CFvex) 7069 { 7070 if (flags & CFvex3) 7071 { 7072 ggen.gen(0xC4); 7073 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex)); 7074 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex)); 7075 ggen.gen(c.Ivex.op); 7076 } 7077 else 7078 { 7079 ggen.gen(0xC5); 7080 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex)); 7081 ggen.gen(c.Ivex.op); 7082 } 7083 ins = vex_inssize(c); 7084 goto Lmodrm; 7085 } 7086 7087 if (op > 0xFF) 7088 { 7089 if ((op & 0xFFFD00) == 0x0F3800) 7090 ins = inssize2[(op >> 8) & 0xFF]; 7091 else if ((op & 0xFF00) == 0x0F00) 7092 ins = inssize2[op & 0xFF]; 7093 7094 if (op & 0xFF000000) 7095 { 7096 ubyte op1 = op >> 24; 7097 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 7098 { 7099 ggen.gen(op1); 7100 if (c.Irex) 7101 ggen.gen(c.Irex | REX); 7102 } 7103 else 7104 { 7105 if (c.Irex) 7106 ggen.gen(c.Irex | REX); 7107 ggen.gen(op1); 7108 } 7109 ggen.gen((op >> 16) & 0xFF); 7110 ggen.gen((op >> 8) & 0xFF); 7111 ggen.gen(op & 0xFF); 7112 } 7113 else if (op & 0xFF0000) 7114 { 7115 ubyte op1 = cast(ubyte)(op >> 16); 7116 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 7117 { 7118 ggen.gen(op1); 7119 if (c.Irex) 7120 ggen.gen(c.Irex | REX); 7121 } 7122 else 7123 { 7124 if (c.Irex) 7125 ggen.gen(c.Irex | REX); 7126 ggen.gen(op1); 7127 } 7128 ggen.gen((op >> 8) & 0xFF); 7129 ggen.gen(op & 0xFF); 7130 } 7131 else 7132 { 7133 if (c.Irex) 7134 ggen.gen(c.Irex | REX); 7135 ggen.gen((op >> 8) & 0xFF); 7136 ggen.gen(op & 0xFF); 7137 } 7138 } 7139 else 7140 { 7141 if (c.Irex) 7142 ggen.gen(c.Irex | REX); 7143 ggen.gen(cast(ubyte)op); 7144 } 7145 Lmodrm: 7146 if (ins & M) /* if modregrm byte */ 7147 { 7148 rm = c.Irm; 7149 ggen.gen(rm); 7150 7151 // Look for an address size override when working with the 7152 // MOD R/M and SIB bytes 7153 7154 if (is32bitaddr( I32, flags)) 7155 { 7156 if (issib(rm)) 7157 ggen.gen(c.Isib); 7158 switch (rm & 0xC0) 7159 { 7160 case 0x40: 7161 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7162 break; 7163 7164 case 0: 7165 if (!(issib(rm) && (c.Isib & 7) == 5 || 7166 (rm & 7) == 5)) 7167 break; 7168 goto case 0x80; 7169 7170 case 0x80: 7171 { 7172 int cfflags = CFoff; 7173 targ_size_t val = 0; 7174 if (I64) 7175 { 7176 if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP] 7177 { 7178 cfflags |= CFpc32; 7179 val = -4; 7180 reg_t reg = rm & modregrm(0,7,0); 7181 if (ins & T || 7182 ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0)))) 7183 { if (ins & E || op == 0xF6) 7184 val = -5; 7185 else if (c.Iflags & CFopsize) 7186 val = -6; 7187 else 7188 val = -8; 7189 } 7190 7191 if (config.exe & (EX_OSX64 | EX_WIN64)) 7192 /* Mach-O and Win64 fixups already take the 4 byte size 7193 * into account, so bias by 4 7194 */ 7195 val += 4; 7196 } 7197 } 7198 do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val); 7199 break; 7200 } 7201 7202 default: 7203 break; 7204 } 7205 } 7206 else 7207 { 7208 switch (rm & 0xC0) 7209 { case 0x40: 7210 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7211 break; 7212 7213 case 0: 7214 if ((rm & 7) != 6) 7215 break; 7216 goto case 0x80; 7217 7218 case 0x80: 7219 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff); 7220 break; 7221 7222 default: 7223 break; 7224 } 7225 } 7226 } 7227 else 7228 { 7229 if (op == ENTER) 7230 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0); 7231 } 7232 flags &= CFseg | CFoff | CFselfrel; 7233 if (ins & T) /* if second operand */ 7234 { 7235 if (ins & E) /* if data-8 */ 7236 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2); 7237 else if (!I16) 7238 { 7239 switch (op) 7240 { 7241 case 0xC2: /* RETN imm16 */ 7242 case 0xCA: /* RETF imm16 */ 7243 do16: 7244 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7245 break; 7246 7247 case 0xA1: 7248 case 0xA3: 7249 if (I64 && c.Irex) 7250 { 7251 do64: 7252 do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7253 break; 7254 } 7255 goto case 0xA0; 7256 7257 case 0xA0: /* MOV AL,byte ptr [] */ 7258 case 0xA2: 7259 if (c.Iflags & CFaddrsize && !I64) 7260 goto do16; 7261 else 7262 do32: 7263 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags,0); 7264 break; 7265 7266 case 0x9A: 7267 case 0xEA: 7268 if (c.Iflags & CFopsize) 7269 goto ptr1616; 7270 else 7271 goto ptr1632; 7272 7273 case 0x68: // PUSH immed32 7274 if (cast(FL)c.IFL2 == FLblock) 7275 { 7276 c.IFL2 = FLblockoff; 7277 goto do32; 7278 } 7279 else 7280 goto case_default; 7281 7282 case CALL: // CALL rel 7283 case JMP: // JMP rel 7284 flags |= CFselfrel; 7285 goto case_default; 7286 7287 default: 7288 if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32 7289 flags |= CFselfrel; 7290 if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W) 7291 goto do64; 7292 case_default: 7293 if (c.Iflags & CFopsize) 7294 goto do16; 7295 else 7296 goto do32; 7297 } 7298 } 7299 else 7300 { 7301 switch (op) 7302 { 7303 case 0xC2: 7304 case 0xCA: 7305 goto do16; 7306 7307 case 0xA0: 7308 case 0xA1: 7309 case 0xA2: 7310 case 0xA3: 7311 if (c.Iflags & CFaddrsize) 7312 goto do32; 7313 else 7314 goto do16; 7315 7316 case 0x9A: 7317 case 0xEA: 7318 if (c.Iflags & CFopsize) 7319 goto ptr1632; 7320 else 7321 goto ptr1616; 7322 7323 ptr1616: 7324 ptr1632: 7325 //assert(c.IFL2 == FLfunc); 7326 ggen.flush(); 7327 if (c.IFL2 == FLdatseg) 7328 { 7329 objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer, 7330 c.IEV2.Vseg,flags); 7331 ggen.offset += 4; 7332 } 7333 else 7334 { 7335 s = c.IEV2.Vsym; 7336 ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags); 7337 } 7338 break; 7339 7340 case 0x68: // PUSH immed16 7341 if (cast(FL)c.IFL2 == FLblock) 7342 { c.IFL2 = FLblockoff; 7343 goto do16; 7344 } 7345 else 7346 goto case_default16; 7347 7348 case CALL: 7349 case JMP: 7350 flags |= CFselfrel; 7351 goto default; 7352 7353 default: 7354 case_default16: 7355 if (c.Iflags & CFopsize) 7356 goto do32; 7357 else 7358 goto do16; 7359 } 7360 } 7361 } 7362 else if (op == 0xF6) /* TEST mem8,immed8 */ 7363 { 7364 if ((rm & (7<<3)) == 0) 7365 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2); 7366 } 7367 else if (op == 0xF7) 7368 { 7369 if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 7370 { 7371 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0)) 7372 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags,0); 7373 else 7374 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7375 } 7376 } 7377 7378 debug 7379 if (ggen.getOffset() - startoffset != calccodsize(c)) 7380 { 7381 printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c)); 7382 code_print(c); 7383 assert(0); 7384 } 7385 } 7386 ggen.flush(); 7387 Offset(seg) = ggen.offset; 7388 //printf("-codout(), Coffset = x%x\n", Offset(seg)); 7389 return cast(uint)ggen.offset; /* ending address */ 7390 } 7391 7392 7393 @trusted 7394 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7395 { 7396 char *p; 7397 Symbol *s; 7398 targ_size_t ad; 7399 7400 assert(I64); 7401 switch (fl) 7402 { 7403 case FLconst: 7404 ad = *cast(targ_size_t *) uev; 7405 L1: 7406 pbuf.genp(8,&ad); 7407 return; 7408 7409 case FLdatseg: 7410 pbuf.flush(); 7411 pbuf.write64(uev.Vpointer); 7412 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags); 7413 break; 7414 7415 case FLframehandler: 7416 framehandleroffset = pbuf.getOffset(); 7417 ad = 0; 7418 goto L1; 7419 7420 case FLswitch: 7421 pbuf.flush(); 7422 ad = uev.Vswitch.Btableoffset; 7423 pbuf.write64(ad); 7424 if (config.flags & CFGromable) 7425 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7426 else 7427 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7428 break; 7429 7430 case FLcsdata: 7431 case FLfardata: 7432 //symbol_print(uev.Vsym); 7433 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7434 // strings and statics are treated like offsets from a 7435 // un-named external with is the start of .rodata or .data 7436 case FLextern: /* external data symbol */ 7437 case FLtlsdata: 7438 pbuf.flush(); 7439 s = uev.Vsym; /* symbol pointer */ 7440 pbuf.write64(uev.Voffset); 7441 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7442 break; 7443 7444 case FLgotoff: 7445 if (config.exe & (EX_OSX | EX_OSX64)) 7446 { 7447 assert(0); 7448 } 7449 else if (config.exe & EX_posix) 7450 { 7451 pbuf.flush(); 7452 s = uev.Vsym; /* symbol pointer */ 7453 pbuf.write64(uev.Voffset); 7454 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7455 break; 7456 } 7457 else 7458 assert(0); 7459 7460 case FLgot: 7461 if (config.exe & (EX_OSX | EX_OSX64)) 7462 { 7463 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7464 ad = 0; 7465 goto L1; 7466 } 7467 else if (config.exe & EX_posix) 7468 { 7469 pbuf.flush(); 7470 s = uev.Vsym; /* symbol pointer */ 7471 pbuf.write64(uev.Voffset); 7472 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7473 break; 7474 } 7475 else 7476 assert(0); 7477 7478 case FLfunc: /* function call */ 7479 s = uev.Vsym; /* symbol pointer */ 7480 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7481 pbuf.flush(); 7482 pbuf.write64(0); 7483 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags); 7484 break; 7485 7486 case FLblock: /* displacement to another block */ 7487 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7488 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7489 goto L1; 7490 7491 case FLblockoff: 7492 pbuf.flush(); 7493 assert(uev.Vblock); 7494 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7495 pbuf.write64(uev.Vblock.Boffset); 7496 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7497 break; 7498 7499 default: 7500 WRFL(fl); 7501 assert(0); 7502 } 7503 pbuf.offset += 8; 7504 } 7505 7506 7507 @trusted 7508 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val) 7509 { 7510 char *p; 7511 Symbol *s; 7512 targ_size_t ad; 7513 7514 //printf("do32bit(flags = x%x)\n", flags); 7515 switch (fl) 7516 { 7517 case FLconst: 7518 assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8); 7519 ad = * cast(targ_size_t *) uev; 7520 L1: 7521 pbuf.genp(4,&ad); 7522 return; 7523 7524 case FLdatseg: 7525 pbuf.flush(); 7526 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7527 pbuf.write32(cast(uint)uev.Vpointer); 7528 break; 7529 7530 case FLframehandler: 7531 framehandleroffset = pbuf.getOffset(); 7532 ad = 0; 7533 goto L1; 7534 7535 case FLswitch: 7536 pbuf.flush(); 7537 ad = uev.Vswitch.Btableoffset; 7538 if (config.flags & CFGromable) 7539 { 7540 if (config.exe & (EX_OSX | EX_OSX64)) 7541 { 7542 // These are magic values based on the exact code generated for the switch jump 7543 if (I64) 7544 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7545 else 7546 uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8; 7547 ad -= uev.Vswitch.Btablebase; 7548 goto L1; 7549 } 7550 else if (config.exe & EX_windos) 7551 { 7552 if (I64) 7553 { 7554 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7555 ad -= uev.Vswitch.Btablebase; 7556 goto L1; 7557 } 7558 else 7559 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7560 } 7561 else 7562 { 7563 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7564 } 7565 } 7566 else 7567 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7568 pbuf.write32(cast(uint)ad); 7569 break; 7570 7571 case FLcode: 7572 //assert(JMPJMPTABLE); // the only use case 7573 pbuf.flush(); 7574 ad = *cast(targ_size_t *) uev + pbuf.getOffset(); 7575 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7576 pbuf.write32(cast(uint)ad); 7577 break; 7578 7579 case FLcsdata: 7580 case FLfardata: 7581 //symbol_print(uev.Vsym); 7582 7583 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7584 // strings and statics are treated like offsets from a 7585 // un-named external with is the start of .rodata or .data 7586 case FLextern: /* external data symbol */ 7587 case FLtlsdata: 7588 pbuf.flush(); 7589 s = uev.Vsym; /* symbol pointer */ 7590 if (config.exe & EX_windos && I64 && (flags & CFpc32)) 7591 { 7592 /* This is for those funky fixups where the location to be fixed up 7593 * is a 'val' amount back from the current RIP, biased by adding 4. 7594 */ 7595 assert(val >= -5 && val <= 0); 7596 flags |= (-val & 7) << 24; // set CFREL value 7597 assert(CFREL == (7 << 24)); 7598 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7599 pbuf.write32(cast(uint)uev.Voffset); 7600 } 7601 else 7602 { 7603 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7604 pbuf.write32(cast(uint)(uev.Voffset + val)); 7605 } 7606 break; 7607 7608 case FLgotoff: 7609 if (config.exe & (EX_OSX | EX_OSX64)) 7610 { 7611 assert(0); 7612 } 7613 else if (config.exe & EX_posix) 7614 { 7615 pbuf.flush(); 7616 s = uev.Vsym; /* symbol pointer */ 7617 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7618 pbuf.write32(cast(uint)(uev.Voffset + val)); 7619 break; 7620 } 7621 else 7622 assert(0); 7623 7624 case FLgot: 7625 if (config.exe & (EX_OSX | EX_OSX64)) 7626 { 7627 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7628 ad = 0; 7629 goto L1; 7630 } 7631 else if (config.exe & EX_posix) 7632 { 7633 pbuf.flush(); 7634 s = uev.Vsym; /* symbol pointer */ 7635 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7636 pbuf.write32(cast(uint)(uev.Voffset + val)); 7637 break; 7638 } 7639 else 7640 assert(0); 7641 7642 case FLfunc: /* function call */ 7643 s = uev.Vsym; /* symbol pointer */ 7644 if (tyfarfunc(s.ty())) 7645 { /* Large code references are always absolute */ 7646 pbuf.flush(); 7647 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4; 7648 pbuf.write32(0); 7649 } 7650 else if (s.Sseg == pbuf.seg && 7651 (s.Sclass == SC.static_ || s.Sclass == SC.global) && 7652 s.Sxtrnnum == 0 && flags & CFselfrel) 7653 { /* if we know it's relative address */ 7654 ad = s.Soffset - pbuf.getOffset() - 4; 7655 goto L1; 7656 } 7657 else 7658 { 7659 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7660 pbuf.flush(); 7661 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags); 7662 pbuf.write32(cast(uint)(val)); 7663 } 7664 break; 7665 7666 case FLblock: /* displacement to another block */ 7667 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7668 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7669 goto L1; 7670 7671 case FLblockoff: 7672 pbuf.flush(); 7673 assert(uev.Vblock); 7674 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7675 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7676 pbuf.write32(cast(uint)(uev.Vblock.Boffset)); 7677 break; 7678 7679 default: 7680 WRFL(fl); 7681 assert(0); 7682 } 7683 pbuf.offset += 4; 7684 } 7685 7686 7687 @trusted 7688 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7689 { 7690 char *p; 7691 Symbol *s; 7692 targ_size_t ad; 7693 7694 switch (fl) 7695 { 7696 case FLconst: 7697 pbuf.genp(2,cast(char *) uev); 7698 return; 7699 7700 case FLdatseg: 7701 pbuf.flush(); 7702 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7703 pbuf.write16(cast(uint)uev.Vpointer); 7704 break; 7705 7706 case FLswitch: 7707 pbuf.flush(); 7708 ad = uev.Vswitch.Btableoffset; 7709 if (config.flags & CFGromable) 7710 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7711 else 7712 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7713 pbuf.write16(cast(uint)ad); 7714 break; 7715 7716 case FLcsdata: 7717 case FLfardata: 7718 case FLextern: /* external data symbol */ 7719 case FLtlsdata: 7720 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7721 pbuf.flush(); 7722 s = uev.Vsym; /* symbol pointer */ 7723 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7724 pbuf.write16(cast(uint)uev.Voffset); 7725 break; 7726 7727 case FLfunc: /* function call */ 7728 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7729 s = uev.Vsym; /* symbol pointer */ 7730 if (tyfarfunc(s.ty())) 7731 { /* Large code references are always absolute */ 7732 pbuf.flush(); 7733 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2; 7734 } 7735 else if (s.Sseg == pbuf.seg && 7736 (s.Sclass == SC.static_ || s.Sclass == SC.global) && 7737 s.Sxtrnnum == 0 && flags & CFselfrel) 7738 { /* if we know it's relative address */ 7739 ad = s.Soffset - pbuf.getOffset() - 2; 7740 goto L1; 7741 } 7742 else 7743 { 7744 pbuf.flush(); 7745 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags); 7746 } 7747 pbuf.write16(0); 7748 break; 7749 7750 case FLblock: /* displacement to another block */ 7751 ad = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7752 debug 7753 { 7754 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7755 assert(cast(short)delta == delta); 7756 } 7757 L1: 7758 pbuf.genp(2,&ad); // displacement 7759 return; 7760 7761 case FLblockoff: 7762 pbuf.flush(); 7763 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7764 pbuf.write16(cast(uint)uev.Vblock.Boffset); 7765 break; 7766 7767 default: 7768 WRFL(fl); 7769 assert(0); 7770 } 7771 pbuf.offset += 2; 7772 } 7773 7774 7775 @trusted 7776 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev) 7777 { 7778 char c; 7779 targ_ptrdiff_t delta; 7780 7781 switch (fl) 7782 { 7783 case FLconst: 7784 c = cast(char)uev.Vuns; 7785 break; 7786 7787 case FLblock: 7788 delta = uev.Vblock.Boffset - pbuf.getOffset() - 1; 7789 if (cast(byte)delta != delta) 7790 { 7791 version (MARS) 7792 { 7793 if (uev.Vblock.Bsrcpos.Slinnum) 7794 printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum); 7795 } 7796 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta); 7797 err_exit(); 7798 } 7799 c = cast(char)delta; 7800 debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F); 7801 break; 7802 7803 default: 7804 debug printf("fl = %d\n",fl); 7805 assert(0); 7806 } 7807 pbuf.gen(c); 7808 } 7809 7810 7811 /********************************** 7812 */ 7813 7814 version (SCPP) 7815 { 7816 static if (HYDRATE) 7817 { 7818 @trusted 7819 void code_hydrate(code **pc) 7820 { 7821 code *c; 7822 ubyte ins,rm; 7823 FL fl; 7824 7825 assert(pc); 7826 while (*pc) 7827 { 7828 c = cast(code *) ph_hydrate(cast(void**)pc); 7829 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7830 ins = vex_inssize(c); 7831 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7832 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7833 else if ((c.Iop & 0xFF00) == 0x0F00) 7834 ins = inssize2[c.Iop & 0xFF]; 7835 else 7836 ins = inssize[c.Iop & 0xFF]; 7837 switch (c.Iop) 7838 { 7839 default: 7840 break; 7841 7842 case ESCAPE | ESClinnum: 7843 srcpos_hydrate(&c.IEV1.Vsrcpos); 7844 goto done; 7845 7846 case ESCAPE | ESCctor: 7847 case ESCAPE | ESCdtor: 7848 el_hydrate(&c.IEV1.Vtor); 7849 goto done; 7850 7851 case ASM: 7852 ph_hydrate(cast(void**)&c.IEV1.bytes); 7853 goto done; 7854 } 7855 if (!(ins & M) || 7856 ((rm = c.Irm) & 0xC0) == 0xC0) 7857 goto do2; /* if no first operand */ 7858 if (is32bitaddr(I32,c.Iflags)) 7859 { 7860 7861 if ( 7862 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7863 ) 7864 goto do2; /* if no first operand */ 7865 } 7866 else 7867 { 7868 if ( 7869 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7870 ) 7871 goto do2; /* if no first operand */ 7872 } 7873 fl = cast(FL) c.IFL1; 7874 switch (fl) 7875 { 7876 case FLudata: 7877 case FLdata: 7878 case FLreg: 7879 case FLauto: 7880 case FLfast: 7881 case FLbprel: 7882 case FLpara: 7883 case FLcsdata: 7884 case FLfardata: 7885 case FLtlsdata: 7886 case FLfunc: 7887 case FLpseudo: 7888 case FLextern: 7889 assert(flinsymtab[fl]); 7890 symbol_hydrate(&c.IEV1.Vsym); 7891 symbol_debug(c.IEV1.Vsym); 7892 break; 7893 7894 case FLdatseg: 7895 case FLfltreg: 7896 case FLallocatmp: 7897 case FLcs: 7898 case FLndp: 7899 case FLoffset: 7900 case FLlocalsize: 7901 case FLconst: 7902 case FLframehandler: 7903 assert(!flinsymtab[fl]); 7904 break; 7905 7906 case FLcode: 7907 ph_hydrate(cast(void**)&c.IEV1.Vcode); 7908 break; 7909 7910 case FLblock: 7911 case FLblockoff: 7912 ph_hydrate(cast(void**)&c.IEV1.Vblock); 7913 break; 7914 version (SCPP) 7915 { 7916 case FLctor: 7917 case FLdtor: 7918 el_hydrate(cast(elem**)&c.IEV1.Vtor); 7919 break; 7920 } 7921 case FLasm: 7922 ph_hydrate(cast(void**)&c.IEV1.bytes); 7923 break; 7924 7925 default: 7926 WRFL(fl); 7927 assert(0); 7928 } 7929 do2: 7930 /* Ignore TEST (F6 and F7) opcodes */ 7931 if (!(ins & T)) 7932 goto done; /* if no second operand */ 7933 7934 fl = cast(FL) c.IFL2; 7935 switch (fl) 7936 { 7937 case FLudata: 7938 case FLdata: 7939 case FLreg: 7940 case FLauto: 7941 case FLfast: 7942 case FLbprel: 7943 case FLpara: 7944 case FLcsdata: 7945 case FLfardata: 7946 case FLtlsdata: 7947 case FLfunc: 7948 case FLpseudo: 7949 case FLextern: 7950 assert(flinsymtab[fl]); 7951 symbol_hydrate(&c.IEV2.Vsym); 7952 symbol_debug(c.IEV2.Vsym); 7953 break; 7954 7955 case FLdatseg: 7956 case FLfltreg: 7957 case FLallocatmp: 7958 case FLcs: 7959 case FLndp: 7960 case FLoffset: 7961 case FLlocalsize: 7962 case FLconst: 7963 case FLframehandler: 7964 assert(!flinsymtab[fl]); 7965 break; 7966 7967 case FLcode: 7968 ph_hydrate(cast(void**)&c.IEV2.Vcode); 7969 break; 7970 7971 case FLblock: 7972 case FLblockoff: 7973 ph_hydrate(cast(void**)&c.IEV2.Vblock); 7974 break; 7975 7976 default: 7977 WRFL(fl); 7978 assert(0); 7979 } 7980 done: 7981 { } 7982 7983 pc = &c.next; 7984 } 7985 } 7986 } 7987 7988 /********************************** 7989 */ 7990 7991 static if (DEHYDRATE) 7992 { 7993 @trusted 7994 void code_dehydrate(code **pc) 7995 { 7996 code *c; 7997 ubyte ins,rm; 7998 FL fl; 7999 8000 while ((c = *pc) != null) 8001 { 8002 ph_dehydrate(pc); 8003 8004 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 8005 ins = vex_inssize(c); 8006 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 8007 ins = inssize2[(c.Iop >> 8) & 0xFF]; 8008 else if ((c.Iop & 0xFF00) == 0x0F00) 8009 ins = inssize2[c.Iop & 0xFF]; 8010 else 8011 ins = inssize[c.Iop & 0xFF]; 8012 switch (c.Iop) 8013 { 8014 default: 8015 break; 8016 8017 case ESCAPE | ESClinnum: 8018 srcpos_dehydrate(&c.IEV1.Vsrcpos); 8019 goto done; 8020 8021 case ESCAPE | ESCctor: 8022 case ESCAPE | ESCdtor: 8023 el_dehydrate(&c.IEV1.Vtor); 8024 goto done; 8025 8026 case ASM: 8027 ph_dehydrate(&c.IEV1.bytes); 8028 goto done; 8029 } 8030 8031 if (!(ins & M) || 8032 ((rm = c.Irm) & 0xC0) == 0xC0) 8033 goto do2; /* if no first operand */ 8034 if (is32bitaddr(I32,c.Iflags)) 8035 { 8036 8037 if ( 8038 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 8039 ) 8040 goto do2; /* if no first operand */ 8041 } 8042 else 8043 { 8044 if ( 8045 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 8046 ) 8047 goto do2; /* if no first operand */ 8048 } 8049 fl = cast(FL) c.IFL1; 8050 switch (fl) 8051 { 8052 case FLudata: 8053 case FLdata: 8054 case FLreg: 8055 case FLauto: 8056 case FLfast: 8057 case FLbprel: 8058 case FLpara: 8059 case FLcsdata: 8060 case FLfardata: 8061 case FLtlsdata: 8062 case FLfunc: 8063 case FLpseudo: 8064 case FLextern: 8065 assert(flinsymtab[fl]); 8066 symbol_dehydrate(&c.IEV1.Vsym); 8067 break; 8068 8069 case FLdatseg: 8070 case FLfltreg: 8071 case FLallocatmp: 8072 case FLcs: 8073 case FLndp: 8074 case FLoffset: 8075 case FLlocalsize: 8076 case FLconst: 8077 case FLframehandler: 8078 assert(!flinsymtab[fl]); 8079 break; 8080 8081 case FLcode: 8082 ph_dehydrate(&c.IEV1.Vcode); 8083 break; 8084 8085 case FLblock: 8086 case FLblockoff: 8087 ph_dehydrate(&c.IEV1.Vblock); 8088 break; 8089 version (SCPP) 8090 { 8091 case FLctor: 8092 case FLdtor: 8093 el_dehydrate(&c.IEV1.Vtor); 8094 break; 8095 } 8096 case FLasm: 8097 ph_dehydrate(&c.IEV1.bytes); 8098 break; 8099 8100 default: 8101 WRFL(fl); 8102 assert(0); 8103 break; 8104 } 8105 do2: 8106 /* Ignore TEST (F6 and F7) opcodes */ 8107 if (!(ins & T)) 8108 goto done; /* if no second operand */ 8109 8110 fl = cast(FL) c.IFL2; 8111 switch (fl) 8112 { 8113 case FLudata: 8114 case FLdata: 8115 case FLreg: 8116 case FLauto: 8117 case FLfast: 8118 case FLbprel: 8119 case FLpara: 8120 case FLcsdata: 8121 case FLfardata: 8122 case FLtlsdata: 8123 case FLfunc: 8124 case FLpseudo: 8125 case FLextern: 8126 assert(flinsymtab[fl]); 8127 symbol_dehydrate(&c.IEV2.Vsym); 8128 break; 8129 8130 case FLdatseg: 8131 case FLfltreg: 8132 case FLallocatmp: 8133 case FLcs: 8134 case FLndp: 8135 case FLoffset: 8136 case FLlocalsize: 8137 case FLconst: 8138 case FLframehandler: 8139 assert(!flinsymtab[fl]); 8140 break; 8141 8142 case FLcode: 8143 ph_dehydrate(&c.IEV2.Vcode); 8144 break; 8145 8146 case FLblock: 8147 case FLblockoff: 8148 ph_dehydrate(&c.IEV2.Vblock); 8149 break; 8150 8151 default: 8152 WRFL(fl); 8153 assert(0); 8154 break; 8155 } 8156 done: 8157 pc = &code_next(c); 8158 } 8159 } 8160 } 8161 } 8162 8163 /*************************** 8164 * Debug code to dump code structure. 8165 */ 8166 8167 void WRcodlst(code *c) 8168 { 8169 for (; c; c = code_next(c)) 8170 code_print(c); 8171 } 8172 8173 @trusted 8174 extern (C) void code_print(scope code* c) 8175 { 8176 ubyte ins; 8177 ubyte rexb; 8178 8179 if (c == null) 8180 { 8181 printf("code 0\n"); 8182 return; 8183 } 8184 8185 const op = c.Iop; 8186 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 8187 ins = vex_inssize(c); 8188 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 8189 ins = inssize2[(op >> 8) & 0xFF]; 8190 else if ((c.Iop & 0xFF00) == 0x0F00) 8191 ins = inssize2[op & 0xFF]; 8192 else 8193 ins = inssize[op & 0xFF]; 8194 8195 printf("code %p: nxt=%p ",c,code_next(c)); 8196 8197 if (c.Iflags & CFvex) 8198 { 8199 if (c.Iflags & CFvex3) 8200 { 8201 printf("vex=0xC4"); 8202 printf(" 0x%02X", VEX3_B1(c.Ivex)); 8203 printf(" 0x%02X", VEX3_B2(c.Ivex)); 8204 rexb = 8205 ( c.Ivex.w ? REX_W : 0) | 8206 (!c.Ivex.r ? REX_R : 0) | 8207 (!c.Ivex.x ? REX_X : 0) | 8208 (!c.Ivex.b ? REX_B : 0); 8209 } 8210 else 8211 { 8212 printf("vex=0xC5"); 8213 printf(" 0x%02X", VEX2_B1(c.Ivex)); 8214 rexb = !c.Ivex.r ? REX_R : 0; 8215 } 8216 printf(" "); 8217 } 8218 else 8219 rexb = c.Irex; 8220 8221 if (rexb) 8222 { 8223 printf("rex=0x%02X ", c.Irex); 8224 if (rexb & REX_W) 8225 printf("W"); 8226 if (rexb & REX_R) 8227 printf("R"); 8228 if (rexb & REX_X) 8229 printf("X"); 8230 if (rexb & REX_B) 8231 printf("B"); 8232 printf(" "); 8233 } 8234 printf("op=0x%02X",op); 8235 8236 if ((op & 0xFF) == ESCAPE) 8237 { 8238 if ((op & 0xFF00) == ESClinnum) 8239 { 8240 printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum); 8241 return; 8242 } 8243 printf(" ESCAPE %d",c.Iop >> 8); 8244 } 8245 if (c.Iflags) 8246 printf(" flg=%x",c.Iflags); 8247 if (ins & M) 8248 { 8249 uint rm = c.Irm; 8250 printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 8251 if (!I16 && issib(rm)) 8252 { 8253 ubyte sib = c.Isib; 8254 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 8255 } 8256 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 8257 { 8258 switch (c.IFL1) 8259 { 8260 case FLconst: 8261 case FLoffset: 8262 printf(" int = %4d",c.IEV1.Vuns); 8263 break; 8264 8265 case FLblock: 8266 printf(" block = %p",c.IEV1.Vblock); 8267 break; 8268 8269 case FLswitch: 8270 case FLblockoff: 8271 case FLlocalsize: 8272 case FLframehandler: 8273 case 0: 8274 break; 8275 8276 case FLdatseg: 8277 printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer); 8278 break; 8279 8280 case FLauto: 8281 case FLfast: 8282 case FLreg: 8283 case FLdata: 8284 case FLudata: 8285 case FLpara: 8286 case FLbprel: 8287 case FLtlsdata: 8288 case FLextern: 8289 printf(" "); 8290 WRFL(cast(FL)c.IFL1); 8291 printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr); 8292 if (c.IEV1.Voffset) 8293 printf(".%d", cast(int)c.IEV1.Voffset); 8294 break; 8295 8296 default: 8297 WRFL(cast(FL)c.IFL1); 8298 break; 8299 } 8300 } 8301 } 8302 if (ins & T) 8303 { 8304 printf(" "); 8305 WRFL(cast(FL)c.IFL2); 8306 switch (c.IFL2) 8307 { 8308 case FLconst: 8309 printf(" int = %4d",c.IEV2.Vuns); 8310 break; 8311 8312 case FLblock: 8313 printf(" block = %p",c.IEV2.Vblock); 8314 break; 8315 8316 case FLswitch: 8317 case FLblockoff: 8318 case 0: 8319 case FLlocalsize: 8320 case FLframehandler: 8321 break; 8322 8323 case FLdatseg: 8324 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer); 8325 break; 8326 8327 case FLauto: 8328 case FLfast: 8329 case FLreg: 8330 case FLpara: 8331 case FLbprel: 8332 case FLfunc: 8333 case FLdata: 8334 case FLudata: 8335 case FLtlsdata: 8336 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr); 8337 break; 8338 8339 case FLcode: 8340 printf(" code = %p",c.IEV2.Vcode); 8341 break; 8342 8343 default: 8344 WRFL(cast(FL)c.IFL2); 8345 break; 8346 } 8347 } 8348 printf("\n"); 8349 } 8350 8351 /************************************** 8352 * Pretty-print a CF mask. 8353 * Params: 8354 * cf = CF mask 8355 */ 8356 @trusted 8357 extern (C) void CF_print(uint cf) 8358 { 8359 void print(uint mask, const(char)* string) 8360 { 8361 if (cf & mask) 8362 { 8363 printf(string); 8364 cf &= ~mask; 8365 if (cf) 8366 printf("|"); 8367 } 8368 } 8369 8370 print(CFindirect, "CFindirect"); 8371 print(CFswitch, "CFswitch"); 8372 print(CFjmp5, "CFjmp5"); 8373 print(CFvex3, "CFvex3"); 8374 print(CFvex, "CFvex"); 8375 print(CFpc32, "CFpc32"); 8376 print(CFoffset64, "CFoffset64"); 8377 print(CFclassinit, "CFclassinit"); 8378 print(CFvolatile, "CFvolatile"); 8379 print(CFtarg2, "CFtarg2"); 8380 print(CFunambig, "CFunambig"); 8381 print(CFselfrel, "CFselfrel"); 8382 print(CFwait, "CFwait"); 8383 print(CFfs, "CFfs"); 8384 print(CFcs, "CFcs"); 8385 print(CFds, "CFds"); 8386 print(CFss, "CFss"); 8387 print(CFes, "CFes"); 8388 print(CFaddrsize, "CFaddrsize"); 8389 print(CFopsize, "CFopsize"); 8390 print(CFpsw, "CFpsw"); 8391 print(CFoff, "CFoff"); 8392 print(CFseg, "CFseg"); 8393 print(CFtarg, "CFtarg"); 8394 print(CFjmp16, "CFjmp16"); 8395 printf("\n"); 8396 } 8397 8398 }