1 /** 2 * Code generation 3 3 * 4 * Includes: 5 * - generating a function prolog (pushing return address, loading paramters) 6 * - generating a function epilog (restoring registers, returning) 7 * - generation / peephole optimizations of jump / branch instructions 8 * 9 * Compiler implementation of the 10 * $(LINK2 https://www.dlang.org, D programming language). 11 * 12 * Copyright: Copyright (C) 1994-1998 by Symantec 13 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 14 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 15 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 16 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d) 17 * Documentation: https://dlang.org/phobos/dmd_backend_cod3.html 18 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d 19 */ 20 21 module dmd.backend.cod3; 22 23 import core.bitop; 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc.string; 27 28 import dmd.backend.backend; 29 import dmd.backend.barray; 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.cgcse; 33 import dmd.backend.code; 34 import dmd.backend.code_x86; 35 import dmd.backend.codebuilder; 36 import dmd.backend.dlist; 37 import dmd.backend.dvec; 38 import dmd.backend.melf; 39 import dmd.backend.mem; 40 import dmd.backend.el; 41 import dmd.backend.global; 42 import dmd.backend.obj; 43 import dmd.backend.oper; 44 import dmd.backend.rtlsym; 45 import dmd.backend.symtab; 46 import dmd.backend.ty; 47 import dmd.backend.type; 48 import dmd.backend.xmm; 49 50 51 nothrow: 52 @safe: 53 54 enum MARS = true; 55 56 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 57 58 enum JMPJMPTABLE = false; // benchmarking shows it's slower 59 60 /************* 61 * Size in bytes of each instruction. 62 * 0 means illegal instruction. 63 * bit M: if there is a modregrm field (EV1 is reserved for modregrm) 64 * bit T: if there is a second operand (EV2) 65 * bit E: if second operand is only 8 bits 66 * bit A: a short version exists for the AX reg 67 * bit R: a short version exists for regs 68 * bits 2..0: size of instruction (excluding optional bytes) 69 */ 70 71 enum 72 { 73 M = 0x80, 74 T = 0x40, 75 E = 0x20, 76 A = 0x10, 77 R = 0x08, 78 W = 0, 79 } 80 81 private __gshared ubyte[256] inssize = 82 [ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */ 83 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */ 84 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */ 85 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */ 86 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */ 87 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */ 88 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */ 89 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */ 90 1,1,1,1, 1,1,1,1, /* 40 */ 91 1,1,1,1, 1,1,1,1, /* 48 */ 92 1,1,1,1, 1,1,1,1, /* 50 */ 93 1,1,1,1, 1,1,1,1, /* 58 */ 94 1,1,M|2,M|2, 1,1,1,1, /* 60 */ 95 T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */ 96 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */ 97 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */ 98 M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */ 99 M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */ 100 1,1,1,1, 1,1,1,1, /* 90 */ 101 1,1,T|5,1, 1,1,1,1, /* 98 */ 102 103 // cod3_set32() patches this 104 // T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */ 105 T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */ 106 107 T|E|2,T|3,1,1, 1,1,1,1, /* A8 */ 108 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */ 109 T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */ 110 M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */ 111 T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */ 112 M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */ 113 /* For the floating instructions, allow room for the FWAIT */ 114 M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */ 115 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */ 116 T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */ 117 1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */ 118 1,1,1,1, 1,1,M|2,M|R|2 /* F8 */ 119 ]; 120 121 private __gshared const ubyte[256] inssize32 = 122 [ 2,2,2,2, 2,5,1,1, /* 00 */ 123 2,2,2,2, 2,5,1,1, /* 08 */ 124 2,2,2,2, 2,5,1,1, /* 10 */ 125 2,2,2,2, 2,5,1,1, /* 18 */ 126 2,2,2,2, 2,5,1,1, /* 20 */ 127 2,2,2,2, 2,5,1,1, /* 28 */ 128 2,2,2,2, 2,5,1,1, /* 30 */ 129 2,2,2,2, 2,5,1,1, /* 38 */ 130 1,1,1,1, 1,1,1,1, /* 40 */ 131 1,1,1,1, 1,1,1,1, /* 48 */ 132 1,1,1,1, 1,1,1,1, /* 50 */ 133 1,1,1,1, 1,1,1,1, /* 58 */ 134 1,1,2,2, 1,1,1,1, /* 60 */ 135 5,6,2,3, 1,1,1,1, /* 68 */ 136 2,2,2,2, 2,2,2,2, /* 70 */ 137 2,2,2,2, 2,2,2,2, /* 78 */ 138 3,6,3,3, 2,2,2,2, /* 80 */ 139 2,2,2,2, 2,2,2,2, /* 88 */ 140 1,1,1,1, 1,1,1,1, /* 90 */ 141 1,1,7,1, 1,1,1,1, /* 98 */ 142 5,5,5,5, 1,1,1,1, /* A0 */ 143 2,5,1,1, 1,1,1,1, /* A8 */ 144 2,2,2,2, 2,2,2,2, /* B0 */ 145 5,5,5,5, 5,5,5,5, /* B8 */ 146 3,3,3,1, 2,2,3,6, /* C0 */ 147 4,1,3,1, 1,2,1,1, /* C8 */ 148 2,2,2,2, 2,2,0,1, /* D0 */ 149 /* For the floating instructions, don't need room for the FWAIT */ 150 2,2,2,2, 2,2,2,2, /* D8 */ 151 152 2,2,2,2, 2,2,2,2, /* E0 */ 153 5,5,7,2, 1,1,1,1, /* E8 */ 154 1,0,1,1, 1,1,2,2, /* F0 */ 155 1,1,1,1, 1,1,2,2 /* F8 */ 156 ]; 157 158 /* For 2 byte opcodes starting with 0x0F */ 159 private __gshared ubyte[256] inssize2 = 160 [ M|3,M|3,M|3,M|3, 2,2,2,2, // 00 161 2,2,M|3,2, 2,M|3,2,M|T|E|4, // 08 162 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10 163 M|3,2,2,2, 2,2,2,2, // 18 164 M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20 165 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28 166 2,2,2,2, 2,2,2,2, // 30 167 M|4,2,M|T|E|5,2, 2,2,2,2, // 38 168 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40 169 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48 170 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50 171 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58 172 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60 173 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68 174 M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70 175 2,2,2,2, M|3,M|3,M|3,M|3, // 78 176 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80 177 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88 178 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90 179 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98 180 2,2,2,M|3, M|T|E|4,M|3,2,2, // A0 181 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 182 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 183 M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 184 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 185 2,2,2,2, 2,2,2,2, // C8 186 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 187 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 188 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 189 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 190 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 191 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 192 ]; 193 194 /************************************************* 195 * Generate code to save `reg` in `regsave` stack area. 196 * Params: 197 * regsave = register save areay on stack 198 * cdb = where to write generated code 199 * reg = register to save 200 * idx = set to location in regsave for use in REGSAVE_restore() 201 */ 202 203 @trusted 204 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) 205 { 206 if (isXMMreg(reg)) 207 { 208 regsave.alignment = 16; 209 regsave.idx = (regsave.idx + 15) & ~15; 210 idx = regsave.idx; 211 regsave.idx += 16; 212 // MOVD idx[RBP],xmm 213 opcode_t op = STOAPD; 214 if (TARGET_LINUX && I32) 215 // Haven't yet figured out why stack is not aligned to 16 216 op = STOUPD; 217 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 218 } 219 else 220 { 221 if (!regsave.alignment) 222 regsave.alignment = REGSIZE; 223 idx = regsave.idx; 224 regsave.idx += REGSIZE; 225 // MOV idx[RBP],reg 226 cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 227 if (I64) 228 code_orrex(cdb.last(), REX_W); 229 } 230 reflocal = true; 231 if (regsave.idx > regsave.top) 232 regsave.top = regsave.idx; // keep high water mark 233 } 234 235 /******************************* 236 * Restore `reg` from `regsave` area. 237 * Complement REGSAVE_save(). 238 */ 239 240 @trusted 241 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) 242 { 243 if (isXMMreg(reg)) 244 { 245 assert(regsave.alignment == 16); 246 // MOVD xmm,idx[RBP] 247 opcode_t op = LODAPD; 248 if (TARGET_LINUX && I32) 249 // Haven't yet figured out why stack is not aligned to 16 250 op = LODUPD; 251 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 252 } 253 else 254 { // MOV reg,idx[RBP] 255 cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 256 if (I64) 257 code_orrex(cdb.last(), REX_W); 258 } 259 } 260 261 /************************************ 262 * Size for vex encoded instruction. 263 */ 264 265 @trusted 266 ubyte vex_inssize(code *c) 267 { 268 assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4); 269 ubyte ins; 270 if (c.Iflags & CFvex3) 271 { 272 switch (c.Ivex.mmmm) 273 { 274 case 0: // no prefix 275 case 1: // 0F 276 ins = cast(ubyte)(inssize2[c.Ivex.op] + 2); 277 break; 278 case 2: // 0F 38 279 ins = cast(ubyte)(inssize2[0x38] + 1); 280 break; 281 case 3: // 0F 3A 282 ins = cast(ubyte)(inssize2[0x3A] + 1); 283 break; 284 default: 285 printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm); 286 assert(0); 287 } 288 } 289 else 290 { 291 ins = cast(ubyte)(inssize2[c.Ivex.op] + 1); 292 } 293 return ins; 294 } 295 296 /************************************ 297 * Determine if there is a modregrm byte for instruction. 298 * Params: 299 * c = instruction 300 * Returns: 301 * true if has modregrm byte 302 */ 303 304 @trusted 305 bool hasModregrm(scope const code* c) 306 { 307 uint ins; 308 opcode_t op1 = c.Iop & 0xFF; 309 if (op1 == ESCAPE) 310 ins = 0; 311 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 312 ins = inssize2[(c.Iop >> 8) & 0xFF]; 313 else if ((c.Iop & 0xFF00) == 0x0F00) 314 ins = inssize2[op1]; 315 else 316 ins = inssize[op1]; 317 return (ins & M) != 0; 318 } 319 320 /******************************** 321 * setup ALLREGS and BYTEREGS 322 * called by: codgen 323 */ 324 325 @trusted 326 void cod3_initregs() 327 { 328 if (I64) 329 { 330 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 331 BYTEREGS = ALLREGS; 332 } 333 else 334 { 335 ALLREGS = ALLREGS_INIT; 336 BYTEREGS = BYTEREGS_INIT; 337 } 338 } 339 340 /******************************** 341 * set initial global variable values 342 */ 343 344 @trusted 345 void cod3_setdefault() 346 { 347 fregsaved = mBP | mSI | mDI; 348 } 349 350 /******************************** 351 * Fix global variables for 386. 352 */ 353 @trusted 354 void cod3_set32() 355 { 356 inssize[0xA0] = T|5; 357 inssize[0xA1] = T|5; 358 inssize[0xA2] = T|5; 359 inssize[0xA3] = T|5; 360 BPRM = 5; /* [EBP] addressing mode */ 361 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 362 FLOATREGS = FLOATREGS_32; 363 FLOATREGS2 = FLOATREGS2_32; 364 DOUBLEREGS = DOUBLEREGS_32; 365 if (config.flags3 & CFG3eseqds) 366 fregsaved |= mES; 367 368 foreach (ref v; inssize2[0x80 .. 0x90]) 369 v = W|T|6; 370 371 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4; 372 } 373 374 /******************************** 375 * Fix global variables for I64. 376 */ 377 378 @trusted 379 void cod3_set64() 380 { 381 inssize[0xA0] = T|5; // MOV AL,mem 382 inssize[0xA1] = T|5; // MOV RAX,mem 383 inssize[0xA2] = T|5; // MOV mem,AL 384 inssize[0xA3] = T|5; // MOV mem,RAX 385 BPRM = 5; // [RBP] addressing mode 386 387 fregsaved = (config.exe & EX_windos) 388 ? mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7 // also XMM8..15; 389 : mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 390 391 FLOATREGS = FLOATREGS_64; 392 FLOATREGS2 = FLOATREGS2_64; 393 DOUBLEREGS = DOUBLEREGS_64; 394 395 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 396 BYTEREGS = ALLREGS; 397 398 foreach (ref v; inssize2[0x80 .. 0x90]) 399 v = W|T|6; 400 401 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8; 402 } 403 404 /********************************* 405 * Word or dword align start of function. 406 * Params: 407 * seg = segment to write alignment bytes to 408 * nbytes = number of alignment bytes to write 409 */ 410 @trusted 411 void cod3_align_bytes(int seg, size_t nbytes) 412 { 413 /* Table 4-2 from Intel Instruction Set Reference M-Z 414 * 1 bytes NOP 90 415 * 2 bytes 66 NOP 66 90 416 * 3 bytes NOP DWORD ptr [EAX] 0F 1F 00 417 * 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00 418 * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00 419 * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00 420 * 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00 421 * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00 422 * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00 423 * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B 424 */ 425 426 assert(SegData[seg].SDseg == seg); 427 428 while (nbytes) 429 { size_t n = nbytes; 430 const(char)* p; 431 432 if (nbytes > 1 && (I64 || config.fpxmmregs)) 433 { 434 switch (n) 435 { 436 case 2: p = "\x66\x90"; break; 437 case 3: p = "\x0F\x1F\x00"; break; 438 case 4: p = "\x0F\x1F\x40\x00"; break; 439 case 5: p = "\x0F\x1F\x44\x00\x00"; break; 440 case 6: p = "\x66\x0F\x1F\x44\x00\x00"; break; 441 case 7: p = "\x0F\x1F\x80\x00\x00\x00\x00"; break; 442 case 8: p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break; 443 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break; 444 } 445 } 446 else 447 { 448 static immutable ubyte[15] nops = [ 449 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 450 ]; // XCHG AX,AX 451 if (n > nops.length) 452 n = nops.length; 453 p = cast(char*)nops; 454 } 455 objmod.write_bytes(SegData[seg],p[0 .. n]); 456 nbytes -= n; 457 } 458 } 459 460 /**************************** 461 * Align start of function. 462 * Params: 463 * seg = segment of function 464 */ 465 @trusted 466 void cod3_align(int seg) 467 { 468 if (config.exe & EX_windos) 469 { 470 if (config.flags4 & CFG4speed) // if optimized for speed 471 { 472 // Pick alignment based on CPU target 473 if (config.target_cpu == TARGET_80486 || 474 config.target_cpu >= TARGET_PentiumPro) 475 { // 486 does reads on 16 byte boundaries, so if we are near 476 // such a boundary, align us to it 477 478 const nbytes = -Offset(seg) & 15; 479 if (nbytes < 8) 480 cod3_align_bytes(seg, nbytes); 481 } 482 } 483 } 484 else 485 { 486 const nbytes = -Offset(seg) & 7; 487 cod3_align_bytes(seg, nbytes); 488 } 489 } 490 491 492 /********************************** 493 * Generate code to adjust the stack pointer by `nbytes` 494 * Params: 495 * cdb = code builder 496 * nbytes = number of bytes to adjust stack pointer 497 */ 498 void cod3_stackadj(ref CodeBuilder cdb, int nbytes) 499 { 500 //printf("cod3_stackadj(%d)\n", nbytes); 501 uint grex = I64 ? REX_W << 16 : 0; 502 uint rm; 503 if (nbytes > 0) 504 rm = modregrm(3,5,SP); // SUB ESP,nbytes 505 else 506 { 507 nbytes = -nbytes; 508 rm = modregrm(3,0,SP); // ADD ESP,nbytes 509 } 510 cdb.genc2(0x81, grex | rm, nbytes); 511 } 512 513 /********************************** 514 * Generate code to align the stack pointer at `nbytes` 515 * Params: 516 * cdb = code builder 517 * nbytes = number of bytes to align stack pointer 518 */ 519 void cod3_stackalign(ref CodeBuilder cdb, int nbytes) 520 { 521 //printf("cod3_stackalign(%d)\n", nbytes); 522 const grex = I64 ? REX_W << 16 : 0; 523 const rm = modregrm(3, 4, SP); // AND ESP,-nbytes 524 cdb.genc2(0x81, grex | rm, -nbytes); 525 } 526 527 /* Constructor that links the ModuleReference to the head of 528 * the list pointed to by _Dmoduleref 529 * 530 * For ELF object files. 531 */ 532 static if (0) 533 { 534 void cod3_buildmodulector(OutBuffer* buf, int codeOffset, int refOffset) 535 { 536 /* ret 537 * codeOffset: 538 * pushad 539 * mov EAX,&ModuleReference 540 * mov ECX,_DmoduleRef 541 * mov EDX,[ECX] 542 * mov [EAX],EDX 543 * mov [ECX],EAX 544 * popad 545 * ret 546 */ 547 548 const int seg = CODE; 549 550 if (I64 && config.flags3 & CFG3pic) 551 { // LEA RAX,ModuleReference[RIP] 552 buf.writeByte(REX | REX_W); 553 buf.writeByte(LEA); 554 buf.writeByte(modregrm(0,AX,5)); 555 codeOffset += 3; 556 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4); 557 558 // MOV RCX,_DmoduleRef@GOTPCREL[RIP] 559 buf.writeByte(REX | REX_W); 560 buf.writeByte(0x8B); 561 buf.writeByte(modregrm(0,CX,5)); 562 codeOffset += 3; 563 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4); 564 } 565 else 566 { 567 /* movl ModuleReference*, %eax */ 568 buf.writeByte(0xB8); 569 codeOffset += 1; 570 const uint reltype = I64 ? R_X86_64_32 : R_386_32; 571 codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset); 572 573 /* movl _Dmodule_ref, %ecx */ 574 buf.writeByte(0xB9); 575 codeOffset += 1; 576 codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0); 577 } 578 579 if (I64) 580 buf.writeByte(REX | REX_W); 581 buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */ 582 if (I64) 583 buf.writeByte(REX | REX_W); 584 buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */ 585 if (I64) 586 buf.writeByte(REX | REX_W); 587 buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */ 588 589 buf.writeByte(0xC3); /* ret */ 590 } 591 } 592 593 /***************************** 594 * Given a type, return a mask of 595 * registers to hold that type. 596 * Input: 597 * tyf function type 598 */ 599 600 @trusted 601 regm_t regmask(tym_t tym, tym_t tyf) 602 { 603 switch (tybasic(tym)) 604 { 605 case TYvoid: 606 case TYnoreturn: 607 case TYstruct: 608 case TYarray: 609 return 0; 610 611 case TYbool: 612 case TYwchar_t: 613 case TYchar16: 614 case TYchar: 615 case TYschar: 616 case TYuchar: 617 case TYshort: 618 case TYushort: 619 case TYint: 620 case TYuint: 621 case TYnullptr: 622 case TYnptr: 623 case TYnref: 624 case TYsptr: 625 case TYcptr: 626 case TYimmutPtr: 627 case TYsharePtr: 628 case TYrestrictPtr: 629 case TYfgPtr: 630 return mAX; 631 632 case TYfloat: 633 case TYifloat: 634 if (I64) 635 return mXMM0; 636 if (config.exe & EX_flat) 637 return mST0; 638 goto case TYlong; 639 640 case TYlong: 641 case TYulong: 642 case TYdchar: 643 if (!I16) 644 return mAX; 645 goto case TYfptr; 646 647 case TYfptr: 648 case TYhptr: 649 return mDX | mAX; 650 651 case TYcent: 652 case TYucent: 653 assert(I64); 654 return mDX | mAX; 655 656 case TYvptr: 657 return mDX | mBX; 658 659 case TYdouble: 660 case TYdouble_alias: 661 case TYidouble: 662 if (I64) 663 return mXMM0; 664 if (config.exe & EX_flat) 665 return mST0; 666 return DOUBLEREGS; 667 668 case TYllong: 669 case TYullong: 670 return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS); 671 672 case TYldouble: 673 case TYildouble: 674 return mST0; 675 676 case TYcfloat: 677 if (config.exe & EX_posix && I32 && tybasic(tyf) == TYnfunc) 678 return mDX | mAX; 679 goto case TYcdouble; 680 681 case TYcdouble: 682 if (I64) 683 return mXMM0 | mXMM1; 684 goto case TYcldouble; 685 686 case TYcldouble: 687 return mST01; 688 689 // SIMD vector types 690 case TYfloat4: 691 case TYdouble2: 692 case TYschar16: 693 case TYuchar16: 694 case TYshort8: 695 case TYushort8: 696 case TYlong4: 697 case TYulong4: 698 case TYllong2: 699 case TYullong2: 700 701 case TYfloat8: 702 case TYdouble4: 703 case TYschar32: 704 case TYuchar32: 705 case TYshort16: 706 case TYushort16: 707 case TYlong8: 708 case TYulong8: 709 case TYllong4: 710 case TYullong4: 711 if (!config.fpxmmregs) 712 { printf("SIMD operations not supported on this platform\n"); 713 exit(1); 714 } 715 return mXMM0; 716 717 default: 718 debug printf("%s\n", tym_str(tym)); 719 assert(0); 720 } 721 } 722 723 /******************************* 724 * setup register allocator parameters with platform specific data 725 */ 726 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg) 727 { 728 *dst_integer_reg = AX; 729 *dst_float_reg = XMM0; 730 } 731 732 @trusted 733 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw) 734 { 735 //printf("cgreg_set_priorities %x\n", ty); 736 const sz = tysize(ty); 737 738 if (tyxmmreg(ty)) 739 { 740 static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG]; 741 *pseq = sequence.ptr; 742 } 743 else if (I64) 744 { 745 if (sz == REGSIZE * 2) 746 { 747 static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG]; 748 static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG]; 749 *pseq = seqlsw1.ptr; 750 *pseqmsw = seqmsw1.ptr; 751 } 752 else 753 { // R10 is reserved for the static link 754 static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG]; 755 *pseq = cast(ubyte*)sequence2.ptr; 756 } 757 } 758 else if (I32) 759 { 760 if (sz == REGSIZE * 2) 761 { 762 static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG]; 763 static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG]; 764 *pseq = seqlsw3.ptr; 765 *pseqmsw = seqmsw3.ptr; 766 } 767 else 768 { 769 static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 770 *pseq = sequence4.ptr; 771 } 772 } 773 else 774 { assert(I16); 775 if (typtr(ty)) 776 { 777 // For pointer types, try to pick index register first 778 static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG]; 779 *pseq = seqidx5.ptr; 780 } 781 else 782 { 783 // Otherwise, try to pick index registers last 784 static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 785 *pseq = sequence6.ptr; 786 } 787 } 788 } 789 790 /******************************************* 791 * Call finally block. 792 * Params: 793 * bf = block to call 794 * retregs = registers to preserve across call 795 * Returns: 796 * code generated 797 */ 798 @trusted 799 private code *callFinallyBlock(block *bf, regm_t retregs) 800 { 801 CodeBuilder cdbs; cdbs.ctor(); 802 CodeBuilder cdbr; cdbr.ctor(); 803 int nalign = 0; 804 805 calledFinally = true; 806 uint npush = gensaverestore(retregs,cdbs,cdbr); 807 808 if (STACKALIGN >= 16) 809 { npush += REGSIZE; 810 if (npush & (STACKALIGN - 1)) 811 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 812 cod3_stackadj(cdbs, nalign); 813 } 814 } 815 cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf); 816 regcon.immed.mval = 0; 817 if (nalign) 818 cod3_stackadj(cdbs, -nalign); 819 cdbs.append(cdbr); 820 return cdbs.finish(); 821 } 822 823 /******************************* 824 * Generate block exit code 825 */ 826 @trusted 827 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave) 828 { 829 CodeBuilder cdb2; cdb2.ctor(); 830 elem *e = bl.Belem; 831 block *nextb; 832 regm_t retregs = 0; 833 834 if (bl.BC != BCasm) 835 assert(bl.Bcode == null); 836 837 switch (bl.BC) /* block exit condition */ 838 { 839 case BCiftrue: 840 { 841 bool jcond = true; 842 block *bs1 = bl.nthSucc(0); 843 block *bs2 = bl.nthSucc(1); 844 if (bs1 == bl.Bnext) 845 { // Swap bs1 and bs2 846 block *btmp; 847 848 jcond ^= 1; 849 btmp = bs1; 850 bs1 = bs2; 851 bs2 = btmp; 852 } 853 logexp(cdb,e,jcond,FLblock,cast(code *) bs1); 854 nextb = bs2; 855 } 856 L5: 857 if (configv.addlinenumbers && bl.Bsrcpos.Slinnum && 858 !(funcsym_p.ty() & mTYnaked)) 859 { 860 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum); 861 cdb.genlinnum(bl.Bsrcpos); 862 } 863 if (nextb != bl.Bnext) 864 { 865 assert(!(bl.Bflags & BFLepilog)); 866 genjmp(cdb,JMP,FLblock,nextb); 867 } 868 break; 869 870 case BCjmptab: 871 case BCifthen: 872 case BCswitch: 873 { 874 assert(!(bl.Bflags & BFLepilog)); 875 doswitch(cdb,bl); // hide messy details 876 break; 877 } 878 case BCjcatch: // D catch clause of try-catch 879 assert(ehmethod(funcsym_p) != EHmethod.EH_NONE); 880 // Mark all registers as destroyed. This will prevent 881 // register assignments to variables used in catch blocks. 882 getregs(cdb,lpadregs()); 883 884 if (config.ehmethod == EHmethod.EH_DWARF) 885 { 886 /* Each block must have ESP set to the same value it was at the end 887 * of the prolog. But the unwinder calls catch blocks with ESP set 888 * at the value it was when the throwing function was called, which 889 * may have arguments pushed on the stack. 890 * This instruction will reset ESP to the correct offset from EBP. 891 */ 892 cdb.gen1(ESCAPE | ESCfixesp); 893 } 894 goto case_goto; 895 case BCgoto: 896 nextb = bl.nthSucc(0); 897 if ((MARS || 898 funcsym_p.Sfunc.Fflags3 & Fnteh) && 899 ehmethod(funcsym_p) != EHmethod.EH_DWARF && 900 bl.Btry != nextb.Btry && 901 nextb.BC != BC_finally) 902 { 903 regm_t retregsx = 0; 904 gencodelem(cdb,e,&retregsx,true); 905 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1; 906 assert(bl.Btry); 907 int fromindex = bl.Btry.Bscope_index; 908 if (toindex + 1 == fromindex) 909 { // Simply call __finally 910 if (bl.Btry && 911 bl.Btry.nthSucc(1).BC == BCjcatch) 912 { 913 goto L5; // it's a try-catch, not a try-finally 914 } 915 } 916 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 917 config.ehmethod == EHmethod.EH_SEH) 918 { 919 nteh_unwind(cdb,0,toindex); 920 } 921 else 922 { 923 if (toindex + 1 <= fromindex) 924 { 925 //c = cat(c, linux_unwind(0, toindex)); 926 block *bt; 927 928 //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex); 929 bt = bl; 930 while ((bt = bt.Btry) != null && bt.Bscope_index != toindex) 931 { block *bf; 932 933 //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index); 934 bf = bt.nthSucc(1); 935 // Only look at try-finally blocks 936 if (bf.BC == BCjcatch) 937 continue; 938 939 if (bf == nextb) 940 continue; 941 //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx); 942 if (nextb.BC == BCgoto && 943 !nextb.Belem && 944 bf == nextb.nthSucc(0)) 945 continue; 946 947 // call __finally 948 cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx)); 949 } 950 } 951 } 952 goto L5; 953 } 954 case_goto: 955 { 956 regm_t retregsx = 0; 957 gencodelem(cdb,e,&retregsx,true); 958 if (anyspill) 959 { // Add in the epilog code 960 CodeBuilder cdbstore; cdbstore.ctor(); 961 CodeBuilder cdbload; cdbload.ctor(); 962 963 for (int i = 0; i < anyspill; i++) 964 { Symbol *s = globsym[i]; 965 966 if (s.Sflags & SFLspill && 967 vec_testbit(dfoidx,s.Srange)) 968 { 969 s.Sfl = sflsave[i]; // undo block register assignments 970 cgreg_spillreg_epilog(bl,s,cdbstore,cdbload); 971 } 972 } 973 cdb.append(cdbstore); 974 cdb.append(cdbload); 975 } 976 nextb = bl.nthSucc(0); 977 goto L5; 978 } 979 980 case BC_try: 981 if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none) 982 { 983 /* Need to use frame pointer to access locals, not the stack pointer, 984 * because we'll be calling the BC_finally blocks and the stack will be off. 985 */ 986 needframe = 1; 987 } 988 else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32) 989 { 990 usednteh |= NTEH_try; 991 nteh_usevars(); 992 } 993 else 994 usednteh |= EHtry; 995 goto case_goto; 996 997 case BC_finally: 998 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 999 { 1000 // Mark scratch registers as destroyed. 1001 getregsNoSave(lpadregs()); 1002 1003 regm_t retregsx = 0; 1004 gencodelem(cdb,bl.Belem,&retregsx,true); 1005 1006 // JMP bl.nthSucc(1) 1007 nextb = bl.nthSucc(1); 1008 1009 goto L5; 1010 } 1011 else 1012 { 1013 if (config.ehmethod == EHmethod.EH_SEH || 1014 config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none)) 1015 { 1016 // Mark all registers as destroyed. This will prevent 1017 // register assignments to variables used in finally blocks. 1018 getregsNoSave(lpadregs()); 1019 } 1020 1021 assert(!e); 1022 // Generate CALL to finalizer code 1023 cdb.append(callFinallyBlock(bl.nthSucc(0), 0)); 1024 1025 // JMP bl.nthSucc(1) 1026 nextb = bl.nthSucc(1); 1027 1028 goto L5; 1029 } 1030 1031 case BC_lpad: 1032 { 1033 assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF); 1034 // Mark all registers as destroyed. This will prevent 1035 // register assignments to variables used in finally blocks. 1036 getregsNoSave(lpadregs()); 1037 1038 regm_t retregsx = 0; 1039 gencodelem(cdb,bl.Belem,&retregsx,true); 1040 1041 // JMP bl.nthSucc(0) 1042 nextb = bl.nthSucc(0); 1043 goto L5; 1044 } 1045 1046 case BC_ret: 1047 { 1048 regm_t retregsx = 0; 1049 gencodelem(cdb,e,&retregsx,true); 1050 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1051 { 1052 } 1053 else 1054 cdb.gen1(0xC3); // RET 1055 break; 1056 } 1057 1058 static if (NTEXCEPTIONS) 1059 { 1060 case BC_except: 1061 { 1062 assert(!e); 1063 usednteh |= NTEH_except; 1064 nteh_setsp(cdb,0x8B); 1065 getregsNoSave(allregs); 1066 nextb = bl.nthSucc(0); 1067 goto L5; 1068 } 1069 case BC_filter: 1070 { 1071 nteh_filter(cdb, bl); 1072 // Mark all registers as destroyed. This will prevent 1073 // register assignments to variables used in filter blocks. 1074 getregsNoSave(allregs); 1075 regm_t retregsx = regmask(e.Ety, TYnfunc); 1076 gencodelem(cdb,e,&retregsx,true); 1077 cdb.gen1(0xC3); // RET 1078 break; 1079 } 1080 } 1081 1082 case BCretexp: 1083 reg_t reg1, reg2, lreg, mreg; 1084 retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), reg1, reg2); 1085 //printf("allocretregs returns %s\n", regm_str(mask(reg1) | mask(reg2))); 1086 1087 lreg = mreg = NOREG; 1088 if (reg1 == NOREG) 1089 {} 1090 else if (tybasic(e.Ety) == TYcfloat) 1091 lreg = ST01; 1092 else if (mask(reg1) & (mST0 | mST01)) 1093 lreg = reg1; 1094 else if (reg2 == NOREG) 1095 lreg = reg1; 1096 else if (mask(reg1) & XMMREGS) 1097 { 1098 lreg = XMM0; 1099 mreg = XMM1; 1100 } 1101 else 1102 { 1103 lreg = mask(reg1) & mLSW ? reg1 : AX; 1104 mreg = mask(reg2) & mMSW ? reg2 : DX; 1105 } 1106 if (reg1 != NOREG) 1107 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG); 1108 1109 // For the final load into the return regs, don't set regcon.used, 1110 // so that the optimizer can potentially use retregs for register 1111 // variable assignments. 1112 1113 if (config.flags4 & CFG4optimized) 1114 { regm_t usedsave; 1115 1116 docommas(cdb,e); 1117 usedsave = regcon.used; 1118 if (!OTleaf(e.Eoper)) 1119 gencodelem(cdb,e,&retregs,true); 1120 else 1121 { 1122 if (e.Eoper == OPconst) 1123 regcon.mvar = 0; 1124 gencodelem(cdb,e,&retregs,true); 1125 regcon.used = usedsave; 1126 if (e.Eoper == OPvar) 1127 { Symbol *s = e.EV.Vsym; 1128 1129 if (s.Sfl == FLreg && s.Sregm != mAX) 1130 *retsym = s; 1131 } 1132 } 1133 } 1134 else 1135 { 1136 gencodelem(cdb,e,&retregs,true); 1137 } 1138 1139 if (reg1 == NOREG) 1140 { 1141 } 1142 else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01)) 1143 { 1144 assert(reg1 == lreg && reg2 == NOREG); 1145 regm_t pretregs = mask(reg1) | mask(reg2); 1146 fixresult87(cdb, e, retregs, &pretregs, true); 1147 } 1148 // fix return registers 1149 else if (tybasic(e.Ety) == TYcfloat) 1150 { 1151 assert(lreg == ST01); 1152 if (I64) 1153 { 1154 assert(reg2 == NOREG); 1155 // spill 1156 pop87(); 1157 pop87(); 1158 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 1159 genfwait(cdb); 1160 cdb.genfltreg(0xD9, 3, 0); 1161 genfwait(cdb); 1162 // reload 1163 if (config.exe == EX_WIN64) 1164 { 1165 assert(reg1 == AX); 1166 cdb.genfltreg(LOD, reg1, 0); 1167 code_orrex(cdb.last(), REX_W); 1168 } 1169 else 1170 { 1171 assert(reg1 == XMM0); 1172 cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble); 1173 } 1174 } 1175 else 1176 { 1177 assert(reg1 == AX && reg2 == DX); 1178 regm_t pretregs = mask(reg1) | mask(reg2); 1179 fixresult_complex87(cdb, e, retregs, &pretregs, true); 1180 } 1181 } 1182 else if (reg2 == NOREG) 1183 assert(lreg == reg1); 1184 else for (int v = 0; v < 2; v++) 1185 { 1186 if (v ^ (reg1 != mreg)) 1187 genmovreg(cdb, reg1, lreg); 1188 else 1189 genmovreg(cdb, reg2, mreg); 1190 } 1191 if (reg1 != NOREG) 1192 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1193 goto L4; 1194 1195 case BCret: 1196 retregs = 0; 1197 gencodelem(cdb,e,&retregs,true); 1198 L4: 1199 if (retregs == mST0) 1200 { assert(global87.stackused == 1); 1201 pop87(); // account for return value 1202 } 1203 else if (retregs == mST01) 1204 { assert(global87.stackused == 2); 1205 pop87(); 1206 pop87(); // account for return value 1207 } 1208 1209 if (MARS || usednteh & NTEH_try) 1210 { 1211 block *bt = bl; 1212 while ((bt = bt.Btry) != null) 1213 { 1214 block *bf = bt.nthSucc(1); 1215 // Only look at try-finally blocks 1216 if (bf.BC == BCjcatch) 1217 { 1218 continue; 1219 } 1220 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 1221 config.ehmethod == EHmethod.EH_SEH) 1222 { 1223 if (bt.Bscope_index == 0) 1224 { 1225 // call __finally 1226 CodeBuilder cdbs; cdbs.ctor(); 1227 CodeBuilder cdbr; cdbr.ctor(); 1228 1229 nteh_gensindex(cdb,-1); 1230 gensaverestore(retregs,cdbs,cdbr); 1231 cdb.append(cdbs); 1232 cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0)); 1233 regcon.immed.mval = 0; 1234 cdb.append(cdbr); 1235 } 1236 else 1237 { 1238 nteh_unwind(cdb,retregs,~0); 1239 } 1240 break; 1241 } 1242 else 1243 { 1244 // call __finally 1245 cdb.append(callFinallyBlock(bf.nthSucc(0), retregs)); 1246 } 1247 } 1248 } 1249 break; 1250 1251 case BCexit: 1252 retregs = 0; 1253 gencodelem(cdb,e,&retregs,true); 1254 if (config.flags4 & CFG4optimized) 1255 mfuncreg = mfuncregsave; 1256 break; 1257 1258 case BCasm: 1259 { 1260 assert(!e); 1261 // Mark destroyed registers 1262 CodeBuilder cdbx; cdbx.ctor(); 1263 getregs(cdbx,iasm_regs(bl)); // mark destroyed registers 1264 code *c = cdbx.finish(); 1265 if (bl.Bsucc) 1266 { nextb = bl.nthSucc(0); 1267 if (!bl.Bnext) 1268 { 1269 cdb.append(bl.Bcode); 1270 cdb.append(c); 1271 goto L5; 1272 } 1273 if (nextb != bl.Bnext && 1274 bl.Bnext && 1275 !(bl.Bnext.BC == BCgoto && 1276 !bl.Bnext.Belem && 1277 nextb == bl.Bnext.nthSucc(0))) 1278 { 1279 // See if already have JMP at end of block 1280 code *cl = code_last(bl.Bcode); 1281 if (!cl || cl.Iop != JMP) 1282 { 1283 cdb.append(bl.Bcode); 1284 cdb.append(c); 1285 goto L5; // add JMP at end of block 1286 } 1287 } 1288 } 1289 cdb.append(bl.Bcode); 1290 break; 1291 } 1292 1293 default: 1294 debug 1295 printf("bl.BC = %d\n",bl.BC); 1296 assert(0); 1297 } 1298 } 1299 1300 /*************************** 1301 * Allocate registers for function return values. 1302 * 1303 * Params: 1304 * ty = return type 1305 * t = return type extended info 1306 * tyf = function type 1307 * reg1 = set to the first part register, else NOREG 1308 * reg2 = set to the second part register, else NOREG 1309 * 1310 * Returns: 1311 * a bit mask of return registers. 1312 * 0 if function returns on the stack or returns void. 1313 */ 1314 @trusted 1315 regm_t allocretregs(const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2) 1316 { 1317 //printf("allocretregs() ty: %s\n", tym_str(ty)); 1318 reg1 = reg2 = NOREG; 1319 1320 if (!(config.exe & EX_posix)) 1321 return regmask(ty, tyf); // for non-Posix ABI 1322 1323 /* The rest is for the Itanium ABI 1324 */ 1325 1326 const tyb = tybasic(ty); 1327 if (tyb == TYvoid || tyb == TYnoreturn) 1328 return 0; 1329 1330 tym_t ty1 = tyb; 1331 tym_t ty2 = TYMAX; // stays TYMAX if only one register is needed 1332 1333 if (ty & mTYxmmgpr) 1334 { 1335 ty1 = TYdouble; 1336 ty2 = TYllong; 1337 } 1338 else if (ty & mTYgprxmm) 1339 { 1340 ty1 = TYllong; 1341 ty2 = TYdouble; 1342 } 1343 1344 if (tyb == TYstruct) 1345 { 1346 assert(t); 1347 ty1 = t.Tty; 1348 } 1349 1350 const tyfb = tybasic(tyf); 1351 switch (tyrelax(ty1)) 1352 { 1353 case TYcent: 1354 if (I32) 1355 return 0; 1356 ty1 = ty2 = TYllong; 1357 break; 1358 1359 case TYcdouble: 1360 if (tyfb == TYjfunc && I32) 1361 break; 1362 if (I32) 1363 return 0; 1364 ty1 = ty2 = TYdouble; 1365 break; 1366 1367 case TYcfloat: 1368 if (tyfb == TYjfunc && I32) 1369 break; 1370 if (I32) 1371 goto case TYllong; 1372 ty1 = TYdouble; 1373 break; 1374 1375 case TYcldouble: 1376 if (tyfb == TYjfunc && I32) 1377 break; 1378 if (I32) 1379 return 0; 1380 break; 1381 1382 case TYllong: 1383 if (I32) 1384 ty1 = ty2 = TYlong; 1385 break; 1386 1387 case TYarray: 1388 type* targ1, targ2; 1389 argtypes(t, targ1, targ2); 1390 if (targ1) 1391 ty1 = targ1.Tty; 1392 else 1393 return 0; 1394 if (targ2) 1395 ty2 = targ2.Tty; 1396 break; 1397 1398 case TYstruct: 1399 assert(t); 1400 if (I64) 1401 { 1402 assert(tybasic(t.Tty) == TYstruct); 1403 if (const targ1 = t.Ttag.Sstruct.Sarg1type) 1404 ty1 = targ1.Tty; 1405 else 1406 return 0; 1407 if (const targ2 = t.Ttag.Sstruct.Sarg2type) 1408 ty2 = targ2.Tty; 1409 break; 1410 } 1411 return 0; 1412 1413 default: 1414 break; 1415 } 1416 1417 /* now we have ty1 and ty2, use that to determine which register 1418 * is used for ty1 and which for ty2 1419 */ 1420 1421 static struct RetRegsAllocator 1422 { 1423 nothrow: 1424 static immutable reg_t[2] gpr_regs = [AX, DX]; 1425 static immutable reg_t[2] xmm_regs = [XMM0, XMM1]; 1426 1427 uint cntgpr = 0, 1428 cntxmm = 0; 1429 1430 reg_t gpr() { return gpr_regs[cntgpr++]; } 1431 reg_t xmm() { return xmm_regs[cntxmm++]; } 1432 } 1433 1434 RetRegsAllocator rralloc; 1435 1436 reg_t allocreg(tym_t tym) 1437 { 1438 if (tym == TYMAX) 1439 return NOREG; 1440 switch (tysize(tym)) 1441 { 1442 case 1: 1443 case 2: 1444 case 4: 1445 if (tyfloating(tym)) 1446 return I64 ? rralloc.xmm() : ST0; 1447 else 1448 return rralloc.gpr(); 1449 1450 case 8: 1451 if (tycomplex(tym)) 1452 { 1453 assert(tyfb == TYjfunc && I32); 1454 return ST01; 1455 } 1456 else if (tysimd(tym)) 1457 { 1458 return rralloc.xmm(); 1459 } 1460 assert(I64 || tyfloating(tym)); 1461 goto case 4; 1462 1463 default: 1464 if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble) 1465 { 1466 return ST0; 1467 } 1468 else if (tybasic(tym) == TYcldouble) 1469 { 1470 return ST01; 1471 } 1472 else if (tycomplex(tym) && tyfb == TYjfunc && I32) 1473 { 1474 return ST01; 1475 } 1476 else if (tysimd(tym)) 1477 { 1478 return rralloc.xmm(); 1479 } 1480 1481 debug printf("%s\n", tym_str(tym)); 1482 assert(0); 1483 } 1484 } 1485 1486 reg1 = allocreg(ty1); 1487 reg2 = allocreg(ty2); 1488 1489 return (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1490 } 1491 1492 /*********************************************** 1493 * Struct necessary for sorting switch cases. 1494 */ 1495 1496 private alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 1497 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 1498 1499 extern (C) // qsort cmp functions need to be "C" 1500 { 1501 struct CaseVal 1502 { 1503 targ_ullong val; 1504 block *target; 1505 1506 /* Sort function for qsort() */ 1507 @trusted 1508 extern (C) static nothrow pure @nogc int cmp(scope const(void*) p, scope const(void*) q) 1509 { 1510 const(CaseVal)* c1 = cast(const(CaseVal)*)p; 1511 const(CaseVal)* c2 = cast(const(CaseVal)*)q; 1512 return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1); 1513 } 1514 } 1515 } 1516 1517 /*** 1518 * Generate comparison of [reg2,reg] with val 1519 */ 1520 @trusted 1521 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg) 1522 { 1523 if (I64 && sz == 8) 1524 { 1525 assert(reg2 == NOREG); 1526 if (val == cast(int)val) // if val is a 64 bit value sign-extended from 32 bits 1527 { 1528 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,value32 1529 cdb.last().Irex |= REX_W; // 64 bit operand 1530 } 1531 else 1532 { 1533 assert(sreg != NOREG); 1534 movregconst(cdb,sreg,cast(targ_size_t)val,64); // MOV sreg,val64 1535 genregs(cdb,0x3B,reg,sreg); // CMP reg,sreg 1536 code_orrex(cdb.last(), REX_W); 1537 getregsNoSave(mask(sreg)); // don't remember we loaded this constant 1538 } 1539 } 1540 else if (reg2 == NOREG) 1541 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1542 else 1543 { 1544 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1545 code *cnext = gennop(null); 1546 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1547 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1548 cdb.append(cnext); 1549 } 1550 } 1551 1552 @trusted extern (D) 1553 private void ifthen(ref CodeBuilder cdb, scope CaseVal[] casevals, 1554 uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last) 1555 { 1556 const ncases = casevals.length; 1557 if (ncases >= 4 && config.flags4 & CFG4speed) 1558 { 1559 size_t pivot = ncases >> 1; 1560 1561 // Compares for casevals[0..pivot] 1562 CodeBuilder cdb1; cdb1.ctor(); 1563 ifthen(cdb1, casevals[0 .. pivot], sz, reg, reg2, sreg, bdefault, true); 1564 1565 // Compares for casevals[pivot+1..ncases] 1566 CodeBuilder cdb2; cdb2.ctor(); 1567 ifthen(cdb2, casevals[pivot + 1 .. $], sz, reg, reg2, sreg, bdefault, last); 1568 code *c2 = gennop(null); 1569 1570 // Compare for caseval[pivot] 1571 cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg); 1572 genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target 1573 // Note uint jump here, as cases were sorted using uint comparisons 1574 genjmp(cdb,JA,FLcode,cast(block *) c2); // JG c2 1575 1576 cdb.append(cdb1); 1577 cdb.append(c2); 1578 cdb.append(cdb2); 1579 } 1580 else 1581 { // Not worth doing a binary search, just do a sequence of CMP/JE 1582 foreach (size_t n; 0 .. ncases) 1583 { 1584 targ_llong val = casevals[n].val; 1585 cmpval(cdb, val, sz, reg, reg2, sreg); 1586 code *cnext = null; 1587 if (reg2 != NOREG) 1588 { 1589 cnext = gennop(null); 1590 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1591 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1592 } 1593 genjmp(cdb,JE,FLblock,casevals[n].target); // JE caseaddr 1594 cdb.append(cnext); 1595 } 1596 1597 if (last) // if default is not next block 1598 genjmp(cdb,JMP,FLblock,bdefault); 1599 } 1600 } 1601 1602 /******************************* 1603 * Generate code for blocks ending in a switch statement. 1604 * Take BCswitch and decide on 1605 * BCifthen use if - then code 1606 * BCjmptab index into jump table 1607 * BCswitch search table for match 1608 */ 1609 1610 @trusted 1611 void doswitch(ref CodeBuilder cdb, block *b) 1612 { 1613 // If switch tables are in code segment and we need a CS: override to get at them 1614 bool csseg = cast(bool)(config.flags & CFGromable); 1615 1616 //printf("doswitch(%d)\n", b.BC); 1617 elem *e = b.Belem; 1618 elem_debug(e); 1619 docommas(cdb,e); 1620 cgstate.stackclean++; 1621 tym_t tys = tybasic(e.Ety); 1622 int sz = _tysize[tys]; 1623 bool dword = (sz == 2 * REGSIZE); 1624 targ_ulong msw; 1625 bool mswsame = true; // assume all msw's are the same 1626 1627 targ_llong vmax = long.min; // smallest possible llong 1628 targ_llong vmin = long.max; // largest possible llong 1629 foreach (n, val; b.Bswitch) // find max and min case values 1630 { 1631 if (val > vmax) vmax = val; 1632 if (val < vmin) vmin = val; 1633 if (REGSIZE == 2) 1634 { 1635 ushort ms = (val >> 16) & 0xFFFF; 1636 if (n == 0) 1637 msw = ms; 1638 else if (msw != ms) 1639 mswsame = false; 1640 } 1641 else // REGSIZE == 4 1642 { 1643 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 1644 if (n == 0) 1645 msw = ms; 1646 else if (msw != ms) 1647 mswsame = false; 1648 } 1649 } 1650 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 1651 1652 /* Three kinds of switch strategies - pick one 1653 */ 1654 const ncases = b.Bswitch.length; 1655 if (ncases <= 3) 1656 goto Lifthen; 1657 else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2) 1658 goto Ljmptab; // >=50% of the table is case values, rest is default 1659 else if (config.flags3 & CFG3ibt) 1660 goto Lifthen; // no jump table for ENDBR 1661 else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3) 1662 goto Ljmptab; // >= 33% of the table is case values, rest is default 1663 else if (I16) 1664 goto Lswitch; 1665 else 1666 goto Lifthen; 1667 1668 /*************************************************************************/ 1669 { // generate if-then sequence 1670 Lifthen: 1671 regm_t retregs = ALLREGS; 1672 b.BC = BCifthen; 1673 scodelem(cdb,e,&retregs,0,true); 1674 reg_t reg, reg2; 1675 if (dword) 1676 { reg = findreglsw(retregs); 1677 reg2 = findregmsw(retregs); 1678 } 1679 else 1680 { 1681 reg = findreg(retregs); // reg that result is in 1682 reg2 = NOREG; 1683 } 1684 list_t bl = b.Bsucc; 1685 block *bdefault = b.nthSucc(0); 1686 if (dword && mswsame) 1687 { 1688 cdb.genc2(0x81,modregrm(3,7,reg2),msw); // CMP reg2,MSW 1689 genjmp(cdb,JNE,FLblock,bdefault); // JNE default 1690 reg2 = NOREG; 1691 } 1692 1693 reg_t sreg = NOREG; // may need a scratch register 1694 1695 // Put into casevals[0..ncases] so we can sort then slice 1696 1697 import dmd.common.string : SmallBuffer; 1698 CaseVal[10] tmp = void; 1699 auto sb = SmallBuffer!(CaseVal)(ncases, tmp[]); 1700 CaseVal[] casevals = sb[]; 1701 1702 foreach (n, val; b.Bswitch) 1703 { 1704 casevals[n].val = val; 1705 bl = list_next(bl); 1706 casevals[n].target = list_block(bl); 1707 1708 // See if we need a scratch register 1709 if (sreg == NOREG && I64 && sz == 8 && val != cast(int)val) 1710 { regm_t regm = ALLREGS & ~mask(reg); 1711 allocreg(cdb,®m, &sreg, TYint); 1712 } 1713 } 1714 1715 // Sort cases so we can do a runtime binary search 1716 qsort(casevals.ptr, casevals.length, CaseVal.sizeof, &CaseVal.cmp); 1717 1718 //for (uint n = 0; n < ncases; n++) 1719 //printf("casevals[%lld] = x%x\n", n, casevals[n].val); 1720 1721 // Generate binary tree of comparisons 1722 ifthen(cdb, casevals, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext); 1723 1724 cgstate.stackclean--; 1725 return; 1726 } 1727 1728 /*************************************************************************/ 1729 { 1730 // Use switch value to index into jump table 1731 Ljmptab: 1732 //printf("Ljmptab:\n"); 1733 1734 b.BC = BCjmptab; 1735 1736 /* If vmin is small enough, we can just set it to 0 and the jump 1737 * table entries from 0..vmin-1 can be set with the default target. 1738 * This saves the SUB instruction. 1739 * Must be same computation as used in outjmptab(). 1740 */ 1741 if (vmin > 0 && vmin <= _tysize[TYint]) 1742 vmin = 0; 1743 1744 b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr); 1745 regm_t retregs = IDXREGS; 1746 if (dword) 1747 retregs |= mMSW; 1748 if (config.exe & EX_posix && I32 && config.flags3 & CFG3pic) 1749 retregs &= ~mBX; // need EBX for GOT 1750 bool modify = (I16 || I64 || vmin); 1751 scodelem(cdb,e,&retregs,0,!modify); 1752 reg_t reg = findreg(retregs & IDXREGS); // reg that result is in 1753 reg_t reg2; 1754 if (dword) 1755 reg2 = findregmsw(retregs); 1756 if (modify) 1757 { 1758 assert(!(retregs & regcon.mvar)); 1759 getregs(cdb,retregs); 1760 } 1761 if (vmin) // if there is a minimum 1762 { 1763 cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin 1764 if (dword) 1765 { cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin 1766 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1767 } 1768 } 1769 else if (dword) 1770 { gentstreg(cdb,reg2); // TEST reg2,reg2 1771 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1772 } 1773 if (vmax - vmin != REGMASK) // if there is a maximum 1774 { // CMP reg,vmax-vmin 1775 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin)); 1776 if (I64 && sz == 8) 1777 code_orrex(cdb.last(), REX_W); 1778 genjmp(cdb,JA,FLblock,b.nthSucc(0)); // JA default 1779 } 1780 if (I64) 1781 { 1782 if (!vmin) 1783 { // Need to clear out high 32 bits of reg 1784 // Use 8B instead of 89, as 89 will be optimized away as a NOP 1785 genregs(cdb,0x8B,reg,reg); // MOV reg,reg 1786 } 1787 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 1788 { 1789 /* LEA R1,disp[RIP] 48 8D 05 00 00 00 00 1790 * MOVSXD R2,[reg*4][R1] 48 63 14 B8 1791 * LEA R1,[R1][R2] 48 8D 04 02 1792 * JMP R1 FF E0 1793 */ 1794 reg_t r1; 1795 regm_t scratchm = ALLREGS & ~mask(reg); 1796 allocreg(cdb,&scratchm,&r1,TYint); 1797 reg_t r2; 1798 scratchm = ALLREGS & ~(mask(reg) | mask(r1)); 1799 allocreg(cdb,&scratchm,&r2,TYint); 1800 1801 CodeBuilder cdbe; cdbe.ctor(); 1802 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0); // LEA R1,disp[RIP] 1803 cdbe.last().IEV1.Vswitch = b; 1804 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1] 1805 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2)); // LEA R1,[R1][R2] 1806 cdbe.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1807 1808 b.Btablesize = cast(int) (vmax - vmin + 1) * 4; 1809 code *ce = cdbe.finish(); 1810 pinholeopt(ce, null); 1811 1812 cdb.append(cdbe); 1813 } 1814 else 1815 { 1816 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[reg*8] 1817 cdb.last().IEV1.Vswitch = b; 1818 cdb.last().Isib = modregrm(3,reg & 7,5); 1819 if (reg & 8) 1820 cdb.last().Irex |= REX_X; 1821 } 1822 } 1823 else if (I32) 1824 { 1825 static if (JMPJMPTABLE) 1826 { 1827 /* LEA jreg,offset ctable[reg][reg * 4] 1828 JMP jreg 1829 ctable: 1830 JMP case0 1831 JMP case1 1832 ... 1833 */ 1834 CodeBuilder ctable; ctable.ctor(); 1835 block *bdef = b.nthSucc(0); 1836 targ_llong u; 1837 for (u = vmin; ; u++) 1838 { block *targ = bdef; 1839 foreach (n, val; b.Bswitch) 1840 { 1841 if (val == u) 1842 { targ = b.nthSucc(n + 1); 1843 break; 1844 } 1845 } 1846 genjmp(ctable,JMP,FLblock,targ); 1847 ctable.last().Iflags |= CFjmp5; // don't shrink these 1848 if (u == vmax) 1849 break; 1850 } 1851 1852 // Allocate scratch register jreg 1853 regm_t scratchm = ALLREGS & ~mask(reg); 1854 uint jreg = AX; 1855 allocreg(cdb,&scratchm,&jreg,TYint); 1856 1857 // LEA jreg, offset ctable[reg][reg*4] 1858 cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6); 1859 cdb.last().Isib = modregrm(2,reg,reg); 1860 cdb.gen2(0xFF,modregrm(3,4,jreg)); // JMP jreg 1861 cdb.append(ctable); 1862 b.Btablesize = 0; 1863 cgstate.stackclean--; 1864 return; 1865 } 1866 else 1867 { 1868 if (config.exe & (EX_OSX | EX_OSX64)) 1869 { 1870 /* CALL L1 1871 * L1: POP R1 1872 * ADD R1,disp[reg*4][R1] 1873 * JMP R1 1874 */ 1875 // Allocate scratch register r1 1876 regm_t scratchm = ALLREGS & ~mask(reg); 1877 reg_t r1; 1878 allocreg(cdb,&scratchm,&r1,TYint); 1879 1880 cdb.genc2(CALL,0,0); // CALL L1 1881 cdb.gen1(0x58 + r1); // L1: POP R1 1882 cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0); // ADD R1,disp[reg*4][EBX] 1883 cdb.last().IEV1.Vswitch = b; 1884 cdb.last().Isib = modregrm(2,reg,r1); 1885 cdb.gen2(0xFF,modregrm(3,4,r1)); // JMP R1 1886 } 1887 else 1888 { 1889 if (config.flags3 & CFG3pic) 1890 { 1891 /* MOV R1,EBX 1892 * SUB R1,funcsym_p@GOTOFF[offset][reg*4][EBX] 1893 * JMP R1 1894 */ 1895 1896 // Load GOT in EBX 1897 load_localgot(cdb); 1898 1899 // Allocate scratch register r1 1900 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX); 1901 reg_t r1; 1902 allocreg(cdb,&scratchm,&r1,TYint); 1903 1904 genmovreg(cdb,r1,BX); // MOV R1,EBX 1905 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0); // SUB R1,disp[reg*4][EBX] 1906 cdb.last().IEV1.Vswitch = b; 1907 cdb.last().Isib = modregrm(2,reg,BX); 1908 cdb.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1909 } 1910 else 1911 { 1912 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[idxreg*4] 1913 cdb.last().IEV1.Vswitch = b; 1914 cdb.last().Isib = modregrm(2,reg,5); 1915 } 1916 } 1917 } 1918 } 1919 else if (I16) 1920 { 1921 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 1922 uint rm = getaddrmode(retregs) | modregrm(0,4,0); 1923 cdb.genc1(0xFF,rm,FLswitch,0); // JMP [CS:]disp[idxreg] 1924 cdb.last().IEV1.Vswitch = b; 1925 cdb.last().Iflags |= csseg ? CFcs : 0; // segment override 1926 } 1927 else 1928 assert(0); 1929 cgstate.stackclean--; 1930 return; 1931 } 1932 1933 /*************************************************************************/ 1934 { 1935 /* Scan a table of case values, and jump to corresponding address. 1936 * Since it relies on REPNE SCASW, it has really nothing to recommend it 1937 * over Lifthen for 32 and 64 bit code. 1938 * Note that it has not been tested with MACHOBJ (OSX). 1939 */ 1940 Lswitch: 1941 regm_t retregs = mAX; // SCASW requires AX 1942 if (dword) 1943 retregs |= mDX; 1944 else if (ncases <= 6 || config.flags4 & CFG4speed) 1945 goto Lifthen; 1946 scodelem(cdb,e,&retregs,0,true); 1947 if (dword && mswsame) 1948 { /* CMP DX,MSW */ 1949 cdb.genc2(0x81,modregrm(3,7,DX),msw); 1950 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1951 } 1952 getregs(cdb,mCX|mDI); 1953 1954 if (config.flags3 & CFG3pic && config.exe & EX_posix) 1955 { // Add in GOT 1956 getregs(cdb,mDX); 1957 cdb.genc2(CALL,0,0); // CALL L1 1958 cdb.gen1(0x58 + DI); // L1: POP EDI 1959 1960 // ADD EDI,_GLOBAL_OFFSET_TABLE_+3 1961 Symbol *gotsym = Obj.getGOTsym(); 1962 cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym); 1963 cdb.last().Iflags = CFoff; 1964 cdb.last().IEV2.Voffset = 3; 1965 1966 makeitextern(gotsym); 1967 1968 genmovreg(cdb, DX, DI); // MOV EDX, EDI 1969 // ADD EDI,offset of switch table 1970 cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null); 1971 cdb.last().IEV2.Vswitch = b; 1972 } 1973 1974 if (!(config.flags3 & CFG3pic)) 1975 { 1976 // MOV DI,offset of switch table 1977 cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null); 1978 cdb.last().IEV2.Vswitch = b; 1979 } 1980 movregconst(cdb,CX,ncases,0); // MOV CX,ncases 1981 1982 /* The switch table will be accessed through ES:DI. 1983 * Therefore, load ES with proper segment value. 1984 */ 1985 if (config.flags3 & CFG3eseqds) 1986 { 1987 assert(!csseg); 1988 getregs(cdb,mCX); // allocate CX 1989 } 1990 else 1991 { 1992 getregs(cdb,mES|mCX); // allocate ES and CX 1993 cdb.gen1(csseg ? 0x0E : 0x1E); // PUSH CS/DS 1994 cdb.gen1(0x07); // POP ES 1995 } 1996 1997 targ_size_t disp = (ncases - 1) * _tysize[TYint]; // displacement to jump table 1998 if (dword && !mswsame) 1999 { 2000 2001 /* Build the following: 2002 L1: SCASW 2003 JNE L2 2004 CMP DX,[CS:]disp[DI] 2005 L2: LOOPNE L1 2006 */ 2007 2008 const int mod = (disp > 127) ? 2 : 1; // displacement size 2009 code *cloop = genc2(null,0xE0,0,-7 - mod - csseg); // LOOPNE scasw 2010 cdb.gen1(0xAF); // SCASW 2011 code_orflag(cdb.last(),CFtarg2); // target of jump 2012 genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop 2013 // CMP DX,[CS:]disp[DI] 2014 cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp); 2015 cdb.last().Iflags |= csseg ? CFcs : 0; // possible seg override 2016 cdb.append(cloop); 2017 disp += ncases * _tysize[TYint]; // skip over msw table 2018 } 2019 else 2020 { 2021 cdb.gen1(0xF2); // REPNE 2022 cdb.gen1(0xAF); // SCASW 2023 } 2024 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2025 const int mod = (disp > 127) ? 2 : 1; // 1 or 2 byte displacement 2026 if (csseg) 2027 cdb.gen1(SEGCS); // table is in code segment 2028 2029 if (config.flags3 & CFG3pic && 2030 config.exe & EX_posix) 2031 { // ADD EDX,(ncases-1)*2[EDI] 2032 cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp); 2033 // JMP EDX 2034 cdb.gen2(0xFF,modregrm(3,4,DX)); 2035 } 2036 2037 if (!(config.flags3 & CFG3pic)) 2038 { // JMP (ncases-1)*2[DI] 2039 cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp); 2040 cdb.last().Iflags |= csseg ? CFcs : 0; 2041 } 2042 b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr); 2043 //assert(b.Bcode); 2044 cgstate.stackclean--; 2045 return; 2046 } 2047 } 2048 2049 /****************************** 2050 * Output data block for a jump table (BCjmptab). 2051 * The 'holes' in the table get filled with the 2052 * default label. 2053 */ 2054 2055 @trusted 2056 void outjmptab(block *b) 2057 { 2058 if (JMPJMPTABLE && I32) 2059 return; 2060 2061 const ncases = b.Bswitch.length; // number of cases 2062 2063 /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1] 2064 * Must be same computation as used in doswitch(). 2065 */ 2066 targ_llong vmax = long.min; // smallest possible llong 2067 targ_llong vmin = long.max; // largest possible llong 2068 foreach (val; b.Bswitch) // find min case value 2069 { 2070 if (val > vmax) vmax = val; 2071 if (val < vmin) vmin = val; 2072 } 2073 if (vmin > 0 && vmin <= _tysize[TYint]) 2074 vmin = 0; 2075 assert(vmin <= vmax); 2076 2077 /* Segment and offset into which the jump table will be emitted 2078 */ 2079 int jmpseg = objmod.jmpTableSegment(funcsym_p); 2080 targ_size_t *poffset = &Offset(jmpseg); 2081 2082 /* Align start of jump table 2083 */ 2084 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2085 objmod.lidata(jmpseg,*poffset,alignbytes); 2086 assert(*poffset == b.Btableoffset); // should match precomputed value 2087 2088 Symbol *gotsym = null; 2089 targ_size_t def = b.nthSucc(0).Boffset; // default address 2090 for (targ_llong u = vmin; ; u++) 2091 { targ_size_t targ = def; // default 2092 foreach (n; 0 .. ncases) 2093 { 2094 if (b.Bswitch[n] == u) 2095 { 2096 targ = b.nthSucc(cast(int)(n + 1)).Boffset; 2097 break; 2098 } 2099 } 2100 if (config.exe & (EX_LINUX64 | EX_FREEBSD64 | EX_OPENBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS64)) 2101 { 2102 if (config.flags3 & CFG3pic) 2103 { 2104 objmod.reftodatseg(jmpseg,*poffset,cast(targ_size_t)(targ + (u - vmin) * 4),funcsym_p.Sseg,CFswitch); 2105 *poffset += 4; 2106 } 2107 else 2108 { 2109 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch); 2110 *poffset += 8; 2111 } 2112 } 2113 else if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2114 { 2115 if (config.flags3 & CFG3pic) 2116 { 2117 assert(config.flags & CFGromable); 2118 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_ 2119 if (!gotsym) 2120 gotsym = Obj.getGOTsym(); 2121 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch); 2122 } 2123 else 2124 objmod.reftocodeseg(jmpseg,*poffset,targ); 2125 *poffset += 4; 2126 } 2127 else if (config.exe & (EX_OSX | EX_OSX64) || I64) 2128 { 2129 const val = cast(uint)(targ - (I64 ? b.Btableoffset : b.Btablebase)); 2130 objmod.write_bytes(SegData[jmpseg],(&val)[0 .. 1]); 2131 } 2132 else 2133 { 2134 objmod.reftocodeseg(jmpseg,*poffset,targ); 2135 *poffset += tysize(TYnptr); 2136 } 2137 2138 if (u == vmax) // for case that (vmax == ~0) 2139 break; 2140 } 2141 } 2142 2143 2144 /****************************** 2145 * Output data block for a switch table. 2146 * Two consecutive tables, the first is the case value table, the 2147 * second is the address table. 2148 */ 2149 2150 @trusted 2151 void outswitab(block *b) 2152 { 2153 //printf("outswitab()\n"); 2154 const ncases = b.Bswitch.length; // number of cases 2155 2156 const int seg = objmod.jmpTableSegment(funcsym_p); 2157 targ_size_t *poffset = &Offset(seg); 2158 targ_size_t offset = *poffset; 2159 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2160 objmod.lidata(seg,*poffset,alignbytes); // any alignment bytes necessary 2161 assert(*poffset == offset + alignbytes); 2162 2163 uint sz = _tysize[TYint]; 2164 assert(SegData[seg].SDseg == seg); 2165 foreach (val; b.Bswitch) // send out value table 2166 { 2167 //printf("\tcase %d, offset = x%x\n", n, *poffset); 2168 objmod.write_bytes(SegData[seg],(cast(void*)&val)[0 .. sz]); 2169 } 2170 offset += alignbytes + sz * ncases; 2171 assert(*poffset == offset); 2172 2173 if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr))) 2174 { 2175 // Send out MSW table 2176 foreach (val; b.Bswitch) 2177 { 2178 auto msval = cast(targ_size_t)MSREG(val); 2179 objmod.write_bytes(SegData[seg],(cast(void*)&msval)[0 .. REGSIZE]); 2180 } 2181 offset += REGSIZE * ncases; 2182 assert(*poffset == offset); 2183 } 2184 2185 list_t bl = b.Bsucc; 2186 foreach (n; 0 .. ncases) // send out address table 2187 { 2188 bl = list_next(bl); 2189 objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset); 2190 *poffset += tysize(TYnptr); 2191 } 2192 assert(*poffset == offset + ncases * tysize(TYnptr)); 2193 } 2194 2195 /***************************** 2196 * Return a jump opcode relevant to the elem for a JMP true. 2197 */ 2198 2199 @trusted 2200 int jmpopcode(elem *e) 2201 { 2202 //printf("jmpopcode()\n"); elem_print(e); 2203 tym_t tym; 2204 int zero,i,jp,op; 2205 static immutable ubyte[6][2][2] jops = 2206 [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ 2207 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed */ 2208 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */ 2209 /+ 2210 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real */ 2211 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 */ 2212 [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R */ 2213 +/ 2214 ]; 2215 2216 enum 2217 { 2218 XP = (JP << 8), 2219 XNP = (JNP << 8), 2220 } 2221 static immutable uint[26][1] jfops = 2222 /* le gt lt ge eqeq ne unord lg leg ule ul uge */ 2223 [ 2224 [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE, 2225 2226 /* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */ 2227 XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE ], /* 8087 */ 2228 ]; 2229 2230 assert(e); 2231 while (e.Eoper == OPcomma || 2232 /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where */ 2233 /* we decide if mPSW is passed on when evaluating E2 or not. */ 2234 (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper))) 2235 { 2236 e = e.EV.E2; /* right operand determines it */ 2237 } 2238 2239 op = e.Eoper; 2240 tym_t tymx = tybasic(e.Ety); 2241 bool needsNanCheck = tyfloating(tymx) && config.inline8087 && 2242 (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble || 2243 tymx == TYcdouble || tymx == TYcfloat || 2244 (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) || 2245 op == OPind || 2246 (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS)))); 2247 2248 if (!needsNanCheck) 2249 { 2250 /* If e is in an XMM register, need to use XP. 2251 * Match same test in loaddata() 2252 */ 2253 Symbol* s; 2254 needsNanCheck = e.Eoper == OPvar && 2255 (s = e.EV.Vsym).Sfl == FLreg && 2256 s.Sregm & XMMREGS && 2257 (tymx == TYfloat || tymx == TYifloat || tymx == TYdouble || tymx ==TYidouble); 2258 } 2259 2260 if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set 2261 { 2262 if (needsNanCheck) // except for floating point values that need a NaN check 2263 return XP|JNE; 2264 else 2265 return JNE; 2266 } 2267 if (!OTrel(op)) // not relational operator 2268 { 2269 if (needsNanCheck) 2270 return XP|JNE; 2271 2272 if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; } 2273 if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; } 2274 if (op == OPu8_16) op = e.EV.E1.Eoper; 2275 return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE; 2276 } 2277 2278 if (e.EV.E2.Eoper == OPconst) 2279 zero = !boolres(e.EV.E2); 2280 else 2281 zero = 0; 2282 2283 tym = e.EV.E1.Ety; 2284 if (tyfloating(tym)) 2285 { 2286 static if (1) 2287 { 2288 i = 0; 2289 if (config.inline8087) 2290 { i = 1; 2291 2292 static if (1) 2293 { 2294 if (rel_exception(op) || config.flags4 & CFG4fastfloat) 2295 { 2296 const bool NOSAHF = (I64 || config.fpxmmregs); 2297 if (zero) 2298 { 2299 if (NOSAHF) 2300 op = swaprel(op); 2301 } 2302 else if (NOSAHF) 2303 op = swaprel(op); 2304 else if (cmporder87(e.EV.E2)) 2305 op = swaprel(op); 2306 else 2307 { } 2308 } 2309 else 2310 { 2311 if (zero && config.target_cpu < TARGET_80386) 2312 { } 2313 else 2314 op = swaprel(op); 2315 } 2316 } 2317 else 2318 { 2319 if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386) 2320 op = swaprel(op); 2321 else if (!zero && 2322 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat))) 2323 /* compare is reversed */ 2324 op = swaprel(op); 2325 } 2326 } 2327 jp = jfops[0][op - OPle]; 2328 goto L1; 2329 } 2330 else 2331 { 2332 i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2; 2333 } 2334 } 2335 else if (tyuns(tym) || tyuns(e.EV.E2.Ety)) 2336 i = 1; 2337 else if (tyintegral(tym) || typtr(tym)) 2338 i = 0; 2339 else 2340 { 2341 debug 2342 elem_print(e); 2343 printf("%s\n", tym_str(tym)); 2344 assert(0); 2345 } 2346 2347 jp = jops[i][zero][op - OPle]; /* table starts with OPle */ 2348 2349 /* Try to rewrite uint comparisons so they rely on just the Carry flag 2350 */ 2351 if (i == 1 && (jp == JA || jp == JBE) && 2352 (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst)) 2353 { 2354 jp = (jp == JA) ? JC : JNC; 2355 } 2356 2357 L1: 2358 debug 2359 if ((jp & 0xF0) != 0x70) 2360 { 2361 printf("%s i %d zero %d op x%x jp x%x\n",oper_str(op),i,zero,op,jp); 2362 } 2363 2364 assert((jp & 0xF0) == 0x70); 2365 return jp; 2366 } 2367 2368 /********************************** 2369 * Append code to cdb which validates pointer described by 2370 * addressing mode in *pcs. Modify addressing mode in *pcs. 2371 * Params: 2372 * cdb = append generated code to this 2373 * pcs = original addressing mode to be updated 2374 * keepmsk = mask of registers we must not destroy or use 2375 * if (keepmsk & RMstore), this will be only a store operation 2376 * into the lvalue 2377 */ 2378 2379 @trusted 2380 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk) 2381 { 2382 ubyte sib; 2383 reg_t reg; 2384 uint flagsave; 2385 2386 assert(!I64); 2387 if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2388 return; // not designed to deal with 48 bit far pointers 2389 2390 ubyte rm = pcs.Irm; 2391 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 2392 2393 // If the addressing mode is already a register 2394 reg = rm & 7; 2395 if (I16) 2396 { static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ]; 2397 2398 reg = imode[reg]; // convert [SI] to SI, etc. 2399 } 2400 regm_t idxregs = mask(reg); 2401 if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) || 2402 !(idxregs & ALLREGS) 2403 ) 2404 { 2405 // Load the offset into a register, so we can push the address 2406 regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs 2407 assert(idxregs2); 2408 allocreg(cdb,&idxregs2,®,TYoffset); 2409 2410 const opsave = pcs.Iop; 2411 flagsave = pcs.Iflags; 2412 pcs.Iop = LEA; 2413 pcs.Irm |= modregrm(0,reg,0); 2414 pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed 2415 cdb.gen(pcs); // LEA reg,EA 2416 2417 pcs.Iflags = flagsave; 2418 pcs.Iop = opsave; 2419 } 2420 2421 // registers destroyed by the function call 2422 //used = (mBP | ALLREGS | mES) & ~fregsaved; 2423 regm_t used = 0; // much less code generated this way 2424 2425 code *cs2 = null; 2426 regm_t tosave = used & (keepmsk | idxregs); 2427 for (int i = 0; tosave; i++) 2428 { 2429 regm_t mi = mask(i); 2430 2431 assert(i < REGMAX); 2432 if (mi & tosave) /* i = register to save */ 2433 { 2434 int push,pop; 2435 2436 stackchanged = 1; 2437 if (i == ES) 2438 { push = 0x06; 2439 pop = 0x07; 2440 } 2441 else 2442 { push = 0x50 + i; 2443 pop = push | 8; 2444 } 2445 cdb.gen1(push); // PUSH i 2446 cs2 = cat(gen1(null,pop),cs2); // POP i 2447 tosave &= ~mi; 2448 } 2449 } 2450 2451 // For 16 bit models, push a far pointer 2452 if (I16) 2453 { 2454 int segreg; 2455 2456 switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2457 { case CFes: segreg = 0x06; break; 2458 case CFss: segreg = 0x16; break; 2459 case CFcs: segreg = 0x0E; break; 2460 case 0: segreg = 0x1E; break; // DS 2461 default: 2462 assert(0); 2463 } 2464 2465 // See if we should default to SS: 2466 // (Happens when BP is part of the addressing mode) 2467 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 2468 rm & 2 && (rm & 7) != 7) 2469 { 2470 segreg = 0x16; 2471 if (config.wflags & WFssneds) 2472 pcs.Iflags |= CFss; // because BP won't be there anymore 2473 } 2474 cdb.gen1(segreg); // PUSH segreg 2475 } 2476 2477 cdb.gen1(0x50 + reg); // PUSH reg 2478 2479 // Rewrite the addressing mode in *pcs so it is just 0[reg] 2480 setaddrmode(pcs, idxregs); 2481 pcs.IFL1 = FLoffset; 2482 pcs.IEV1.Vuns = 0; 2483 2484 // Call the validation function 2485 { 2486 makeitextern(getRtlsym(RTLSYM.PTRCHK)); 2487 2488 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 2489 getregs(cdb,used); 2490 // CALL __ptrchk 2491 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.PTRCHK)); 2492 } 2493 2494 cdb.append(cs2); 2495 } 2496 2497 /*********************************** 2498 * Determine if BP can be used as a general purpose register. 2499 * Note parallels between this routine and prolog(). 2500 * Returns: 2501 * 0 can't be used, needed for frame 2502 * mBP can be used 2503 */ 2504 2505 @trusted 2506 regm_t cod3_useBP() 2507 { 2508 tym_t tym; 2509 tym_t tyf; 2510 2511 // Note that DOSX memory model cannot use EBP as a general purpose 2512 // register, as SS != DS. 2513 if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp)) 2514 goto Lcant; 2515 2516 if (anyiasm) 2517 goto Lcant; 2518 2519 tyf = funcsym_p.ty(); 2520 if (tyf & mTYnaked) // if no prolog/epilog for function 2521 goto Lcant; 2522 2523 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh) 2524 { 2525 goto Lcant; // need consistent stack frame 2526 } 2527 2528 tym = tybasic(tyf); 2529 if (tym == TYifunc) 2530 goto Lcant; 2531 2532 stackoffsets(globsym, true); // estimate stack offsets 2533 localsize = Auto.offset + Fast.offset; // an estimate only 2534 // if (localsize) 2535 { 2536 if (!(config.flags4 & CFG4speed) || 2537 config.target_cpu < TARGET_Pentium || 2538 tyfarfunc(tym) || 2539 config.flags & CFGstack || 2540 localsize >= 0x100 || // arbitrary value < 0x1000 2541 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 2542 calledFinally || 2543 Alloca.size 2544 ) 2545 goto Lcant; 2546 } 2547 return mBP; 2548 2549 Lcant: 2550 return 0; 2551 } 2552 2553 /************************************************* 2554 * Generate code segment to be used later to restore a cse 2555 */ 2556 2557 @trusted 2558 bool cse_simple(code *c, elem *e) 2559 { 2560 regm_t regm; 2561 reg_t reg; 2562 int sz = tysize(e.Ety); 2563 2564 if (!I16 && // don't bother with 16 bit code 2565 e.Eoper == OPadd && 2566 sz == REGSIZE && 2567 e.EV.E2.Eoper == OPconst && 2568 e.EV.E1.Eoper == OPvar && 2569 isregvar(e.EV.E1,regm,reg) && 2570 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2571 ) 2572 { 2573 memset(c,0,(*c).sizeof); 2574 2575 // Make this an LEA instruction 2576 c.Iop = LEA; 2577 buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns); 2578 if (I64) 2579 { if (sz == 8) 2580 c.Irex |= REX_W; 2581 } 2582 2583 return true; 2584 } 2585 else if (e.Eoper == OPind && 2586 sz <= REGSIZE && 2587 e.EV.E1.Eoper == OPvar && 2588 isregvar(e.EV.E1,regm,reg) && 2589 (I32 || I64 || regm & IDXREGS) && 2590 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2591 ) 2592 { 2593 memset(c,0,(*c).sizeof); 2594 2595 // Make this a MOV instruction 2596 c.Iop = (sz == 1) ? 0x8A : 0x8B; // MOV reg,EA 2597 buildEA(c,reg,-1,1,0); 2598 if (sz == 2 && I32) 2599 c.Iflags |= CFopsize; 2600 else if (I64) 2601 { if (sz == 8) 2602 c.Irex |= REX_W; 2603 } 2604 2605 return true; 2606 } 2607 return false; 2608 } 2609 2610 /************************** 2611 * Store `reg` to the common subexpression save area in index `slot`. 2612 * Params: 2613 * cdb = where to write code to 2614 * tym = type of value that's in `reg` 2615 * reg = register to save 2616 * slot = index into common subexpression save area 2617 */ 2618 @trusted 2619 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2620 { 2621 // MOV slot[BP],reg 2622 if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES 2623 { 2624 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2625 const op = xmmstore(tym, aligned); 2626 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2627 return; 2628 } 2629 opcode_t op = STO; // normal mov 2630 if (reg == ES) 2631 { 2632 reg = 0; // the real reg number 2633 op = 0x8C; // segment reg mov 2634 } 2635 cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot); 2636 if (I64) 2637 code_orrex(cdb.last(), REX_W); 2638 } 2639 2640 @trusted 2641 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot) 2642 { 2643 // CMP slot[BP],0 2644 cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM), 2645 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0); 2646 if ((I64 || I32) && sz == 2) 2647 cdb.last().Iflags |= CFopsize; 2648 if (I64 && sz == 8) 2649 code_orrex(cdb.last(), REX_W); 2650 } 2651 2652 @trusted 2653 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2654 { 2655 // MOV reg,slot[BP] 2656 if (isXMMreg(reg) && config.fpxmmregs) 2657 { 2658 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2659 const op = xmmload(tym, aligned); 2660 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2661 return; 2662 } 2663 opcode_t op = LOD; 2664 if (reg == ES) 2665 { 2666 op = 0x8E; 2667 reg = 0; 2668 } 2669 cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot); 2670 if (I64) 2671 code_orrex(cdb.last(), REX_W); 2672 } 2673 2674 /*************************************** 2675 * Gen code for OPframeptr 2676 */ 2677 2678 @trusted 2679 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2680 { 2681 regm_t retregs = *pretregs & allregs; 2682 if (!retregs) 2683 retregs = allregs; 2684 reg_t reg; 2685 allocreg(cdb,&retregs, ®, TYint); 2686 2687 code cs; 2688 cs.Iop = ESCAPE | ESCframeptr; 2689 cs.Iflags = 0; 2690 cs.Irex = 0; 2691 cs.Irm = cast(ubyte)reg; 2692 cdb.gen(&cs); 2693 fixresult(cdb,e,retregs,pretregs); 2694 } 2695 2696 /*************************************** 2697 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 2698 * This value gets cached in the local variable 'localgot'. 2699 */ 2700 2701 @trusted 2702 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2703 { 2704 if (config.exe & (EX_OSX | EX_OSX64)) 2705 { 2706 regm_t retregs = *pretregs & allregs; 2707 if (!retregs) 2708 retregs = allregs; 2709 reg_t reg; 2710 allocreg(cdb,&retregs, ®, TYnptr); 2711 2712 cdb.genc(CALL,0,0,0,FLgot,0); // CALL L1 2713 cdb.gen1(0x58 + reg); // L1: POP reg 2714 2715 fixresult(cdb,e,retregs,pretregs); 2716 } 2717 else if (config.exe & EX_posix) 2718 { 2719 regm_t retregs = *pretregs & allregs; 2720 if (!retregs) 2721 retregs = allregs; 2722 reg_t reg; 2723 allocreg(cdb,&retregs, ®, TYnptr); 2724 2725 cdb.genc2(CALL,0,0); // CALL L1 2726 cdb.gen1(0x58 + reg); // L1: POP reg 2727 2728 // ADD reg,_GLOBAL_OFFSET_TABLE_+3 2729 Symbol *gotsym = Obj.getGOTsym(); 2730 cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym); 2731 /* Because the 2:3 offset from L1: is hardcoded, 2732 * this sequence of instructions must not 2733 * have any instructions in between, 2734 * so set CFvolatile to prevent the scheduler from rearranging it. 2735 */ 2736 code *cgot = cdb.last(); 2737 cgot.Iflags = CFoff | CFvolatile; 2738 cgot.IEV2.Voffset = (reg == AX) ? 2 : 3; 2739 2740 makeitextern(gotsym); 2741 fixresult(cdb,e,retregs,pretregs); 2742 } 2743 else 2744 assert(0); 2745 } 2746 2747 /************************************************** 2748 * Load contents of localgot into EBX. 2749 */ 2750 2751 @trusted 2752 void load_localgot(ref CodeBuilder cdb) 2753 { 2754 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // note: I32 only 2755 { 2756 if (config.flags3 & CFG3pic) 2757 { 2758 if (localgot && !(localgot.Sflags & SFLdead)) 2759 { 2760 localgot.Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator 2761 elem *e = el_var(localgot); 2762 regm_t retregs = mBX; 2763 codelem(cdb,e,&retregs,false); 2764 el_free(e); 2765 } 2766 else 2767 { 2768 elem *e = el_long(TYnptr, 0); 2769 e.Eoper = OPgot; 2770 regm_t retregs = mBX; 2771 codelem(cdb,e,&retregs,false); 2772 el_free(e); 2773 } 2774 } 2775 } 2776 } 2777 2778 /***************************** 2779 * Returns: 2780 * # of bytes stored 2781 */ 2782 2783 2784 @trusted 2785 int obj_namestring(char *p,const(char)* name) 2786 { 2787 size_t len = strlen(name); 2788 if (len > 255) 2789 { 2790 short *ps = cast(short *)p; 2791 p[0] = 0xFF; 2792 p[1] = 0; 2793 ps[1] = cast(short)len; 2794 memcpy(p + 4,name,len); 2795 const int ONS_OHD = 4; // max # of extra bytes added by obj_namestring() 2796 len += ONS_OHD; 2797 } 2798 else 2799 { 2800 p[0] = cast(char)len; 2801 memcpy(p + 1,name,len); 2802 len++; 2803 } 2804 return cast(int)len; 2805 } 2806 2807 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg) 2808 { 2809 return cdb.gen2(op,modregxrmx(3,dstreg,srcreg)); 2810 } 2811 2812 void gentstreg(ref CodeBuilder cdb, uint t) 2813 { 2814 cdb.gen2(0x85,modregxrmx(3,t,t)); // TEST t,t 2815 code_orflag(cdb.last(),CFpsw); 2816 } 2817 2818 void genpush(ref CodeBuilder cdb, reg_t reg) 2819 { 2820 cdb.gen1(0x50 + (reg & 7)); 2821 if (reg & 8) 2822 code_orrex(cdb.last(), REX_B); 2823 } 2824 2825 void genpop(ref CodeBuilder cdb, reg_t reg) 2826 { 2827 cdb.gen1(0x58 + (reg & 7)); 2828 if (reg & 8) 2829 code_orrex(cdb.last(), REX_B); 2830 } 2831 2832 /************************** 2833 * Generate a MOV to,from register instruction. 2834 * Smart enough to dump redundant register moves, and segment 2835 * register moves. 2836 */ 2837 2838 code *genmovreg(uint to,uint from) 2839 { 2840 CodeBuilder cdb; cdb.ctor(); 2841 genmovreg(cdb, to, from); 2842 return cdb.finish(); 2843 } 2844 2845 void genmovreg(ref CodeBuilder cdb,uint to,uint from) 2846 { 2847 genmovreg(cdb, to, from, TYMAX); 2848 } 2849 2850 @trusted 2851 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym) 2852 { 2853 // register kind. ex: GPR,XMM,SEG 2854 static uint _K(uint reg) 2855 { 2856 switch (reg) 2857 { 2858 case ES: return ES; 2859 case XMM15: 2860 case XMM0: .. case XMM7: return XMM0; 2861 case AX: .. case R15: return AX; 2862 default: return reg; 2863 } 2864 } 2865 2866 // kind combination (order kept) 2867 static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); } 2868 2869 if (to != from) 2870 { 2871 if (tym == TYMAX) tym = TYsize_t; // avoid register slicing 2872 switch (_X(to, from)) 2873 { 2874 case _X(AX, AX): 2875 genregs(cdb, 0x89, from, to); // MOV to,from 2876 if (I64 && tysize(tym) >= 8) 2877 code_orrex(cdb.last(), REX_W); 2878 break; 2879 2880 case _X(XMM0, XMM0): // MOVD/Q to,from 2881 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0); 2882 checkSetVex(cdb.last(), tym); 2883 break; 2884 2885 case _X(AX, XMM0): // MOVD/Q to,from 2886 genregs(cdb, STOD, from-XMM0, to); 2887 if (I64 && tysize(tym) >= 8) 2888 code_orrex(cdb.last(), REX_W); 2889 checkSetVex(cdb.last(), tym); 2890 break; 2891 2892 case _X(XMM0, AX): // MOVD/Q to,from 2893 genregs(cdb, LODD, to-XMM0, from); 2894 if (I64 && tysize(tym) >= 8) 2895 code_orrex(cdb.last(), REX_W); 2896 checkSetVex(cdb.last(), tym); 2897 break; 2898 2899 case _X(ES, AX): 2900 assert(tysize(tym) <= REGSIZE); 2901 genregs(cdb, 0x8E, 0, from); 2902 break; 2903 2904 case _X(AX, ES): 2905 assert(tysize(tym) <= REGSIZE); 2906 genregs(cdb, 0x8C, 0, to); 2907 break; 2908 2909 default: 2910 debug printf("genmovreg(to = %s, from = %s)\n" 2911 , regm_str(mask(to)), regm_str(mask(from))); 2912 assert(0); 2913 } 2914 } 2915 } 2916 2917 /*************************************** 2918 * Generate immediate multiply instruction for r1=r2*imm. 2919 * Optimize it into LEA's if we can. 2920 */ 2921 2922 @trusted 2923 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm) 2924 { 2925 // These optimizations should probably be put into pinholeopt() 2926 switch (imm) 2927 { 2928 case 1: 2929 genmovreg(cdb,r1,r2); 2930 break; 2931 2932 case 5: 2933 { 2934 code cs; 2935 cs.Iop = LEA; 2936 cs.Iflags = 0; 2937 cs.Irex = 0; 2938 buildEA(&cs,r2,r2,4,0); 2939 cs.orReg(r1); 2940 cdb.gen(&cs); 2941 break; 2942 } 2943 2944 default: 2945 cdb.genc2(0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 2946 break; 2947 } 2948 } 2949 2950 /****************************** 2951 * Load CX with the value of _AHSHIFT. 2952 */ 2953 2954 void genshift(ref CodeBuilder cdb) 2955 { 2956 assert(0); 2957 } 2958 2959 /****************************** 2960 * Move constant value into reg. 2961 * Take advantage of existing values in registers. 2962 * If flags & mPSW 2963 * set flags based on result 2964 * Else if flags & 8 2965 * do not disturb flags 2966 * Else 2967 * don't care about flags 2968 * If flags & 1 then byte move 2969 * If flags & 2 then short move (for I32 and I64) 2970 * If flags & 4 then don't disturb unused portion of register 2971 * If flags & 16 then reg is a byte register AL..BH 2972 * If flags & 64 (0x40) then 64 bit move (I64 only) 2973 * Returns: 2974 * code (if any) generated 2975 */ 2976 2977 @trusted 2978 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) 2979 { 2980 reg_t r; 2981 regm_t mreg; 2982 2983 //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags); 2984 2985 regm_t regm = regcon.immed.mval & mask(reg); 2986 targ_size_t regv = regcon.immed.value[reg]; 2987 2988 if (flags & 1) // 8 bits 2989 { 2990 value &= 0xFF; 2991 regm &= BYTEREGS; 2992 2993 // If we already have the right value in the right register 2994 if (regm && (regv & 0xFF) == value) 2995 goto L2; 2996 2997 if (flags & 16 && reg & 4 && // if an H byte register 2998 regcon.immed.mval & mask(reg & 3) && 2999 (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value) 3000 goto L2; 3001 3002 /* Avoid byte register loads to avoid dependency stalls. 3003 */ 3004 if ((I32 || I64) && 3005 config.target_cpu >= TARGET_PentiumPro && !(flags & 4)) 3006 goto L3; 3007 3008 // See if another register has the right value 3009 r = 0; 3010 for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1) 3011 { 3012 if (mreg & 1) 3013 { 3014 if ((regcon.immed.value[r] & 0xFF) == value) 3015 { 3016 genregs(cdb,0x8A,reg,r); // MOV regL,rL 3017 if (I64 && reg >= 4 || r >= 4) 3018 code_orrex(cdb.last(), REX); 3019 goto L2; 3020 } 3021 if (!(I64 && reg >= 4) && 3022 r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value) 3023 { 3024 genregs(cdb,0x8A,reg,r | 4); // MOV regL,rH 3025 goto L2; 3026 } 3027 } 3028 r++; 3029 } 3030 3031 if (value == 0 && !(flags & 8)) 3032 { 3033 if (!(flags & 4) && // if we can set the whole register 3034 !(flags & 16 && reg & 4)) // and reg is not an H register 3035 { 3036 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3037 regimmed_set(reg,value); 3038 regv = 0; 3039 } 3040 else 3041 genregs(cdb,0x30,reg,reg); // XOR regL,regL 3042 flags &= ~mPSW; // flags already set by XOR 3043 } 3044 else 3045 { 3046 cdb.genc2(0xC6,modregrmx(3,0,reg),value); // MOV regL,value 3047 if (reg >= 4 && I64) 3048 { 3049 code_orrex(cdb.last(), REX); 3050 } 3051 } 3052 L2: 3053 if (flags & mPSW) 3054 genregs(cdb,0x84,reg,reg); // TEST regL,regL 3055 3056 if (regm) 3057 // Set just the 'L' part of the register value 3058 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value); 3059 else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3)) 3060 // Set just the 'H' part of the register value 3061 regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8)); 3062 return; 3063 } 3064 L3: 3065 if (I16) 3066 value = cast(targ_short) value; // sign-extend MSW 3067 else if (I32) 3068 value = cast(targ_int) value; 3069 3070 if (!I16 && flags & 2) // load 16 bit value 3071 { 3072 value &= 0xFFFF; 3073 if (value && !(flags & mPSW)) 3074 { 3075 cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value 3076 regimmed_set(reg, value); 3077 return; 3078 } 3079 } 3080 3081 // If we already have the right value in the right register 3082 if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) 3083 { 3084 if (flags & mPSW) 3085 gentstreg(cdb,reg); 3086 } 3087 else if (flags & 64 && regm && regv == value) 3088 { // Look at the full 64 bits 3089 if (flags & mPSW) 3090 { 3091 gentstreg(cdb,reg); 3092 code_orrex(cdb.last(), REX_W); 3093 } 3094 } 3095 else 3096 { 3097 if (flags & mPSW) 3098 { 3099 switch (value) 3100 { 3101 case 0: 3102 genregs(cdb,0x31,reg,reg); 3103 break; 3104 3105 case 1: 3106 if (I64) 3107 goto L4; 3108 genregs(cdb,0x31,reg,reg); 3109 goto inc; 3110 3111 case ~cast(targ_size_t)0: 3112 if (I64) 3113 goto L4; 3114 genregs(cdb,0x31,reg,reg); 3115 goto dec; 3116 3117 default: 3118 L4: 3119 if (flags & 64) 3120 { 3121 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3122 gentstreg(cdb,reg); 3123 code_orrex(cdb.last(), REX_W); 3124 } 3125 else 3126 { 3127 value &= 0xFFFFFFFF; 3128 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3129 gentstreg(cdb,reg); 3130 } 3131 break; 3132 } 3133 } 3134 else 3135 { 3136 // Look for single byte conversion 3137 if (regcon.immed.mval & mAX) 3138 { 3139 if (I32) 3140 { 3141 if (reg == AX && value == cast(targ_short) regv) 3142 { 3143 cdb.gen1(0x98); // CWDE 3144 goto done; 3145 } 3146 if (reg == DX && 3147 value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) && 3148 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3149 ) 3150 { 3151 cdb.gen1(0x99); // CDQ 3152 goto done; 3153 } 3154 } 3155 else if (I16) 3156 { 3157 if (reg == AX && 3158 cast(targ_short) value == cast(byte) regv) 3159 { 3160 cdb.gen1(0x98); // CBW 3161 goto done; 3162 } 3163 3164 if (reg == DX && 3165 cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) && 3166 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3167 ) 3168 { 3169 cdb.gen1(0x99); // CWD 3170 goto done; 3171 } 3172 } 3173 } 3174 if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486) 3175 { 3176 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3177 goto done; 3178 } 3179 3180 if (!I64 && regm && !(flags & 8)) 3181 { 3182 if (regv + 1 == value || 3183 // Catch case of (0xFFFF+1 == 0) for 16 bit compiles 3184 (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value)) 3185 { 3186 inc: 3187 cdb.gen1(0x40 + reg); // INC reg 3188 goto done; 3189 } 3190 if (regv - 1 == value) 3191 { 3192 dec: 3193 cdb.gen1(0x48 + reg); // DEC reg 3194 goto done; 3195 } 3196 } 3197 3198 // See if another register has the right value 3199 r = 0; 3200 for (mreg = regcon.immed.mval; mreg; mreg >>= 1) 3201 { 3202 debug 3203 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]); 3204 3205 if (mreg & 1 && regcon.immed.value[r] == value) 3206 { 3207 genmovreg(cdb,reg,r); 3208 goto done; 3209 } 3210 r++; 3211 } 3212 3213 if (value == 0 && !(flags & 8)) 3214 { 3215 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3216 } 3217 else 3218 { // See if we can just load a byte 3219 if (regm & BYTEREGS && 3220 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro) 3221 ) 3222 { 3223 if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF)) 3224 { 3225 movregconst(cdb,reg,value,(flags & 8) |4|1); // load regL 3226 return; 3227 } 3228 if (regm & (mAX|mBX|mCX|mDX) && 3229 (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) && 3230 !I64) 3231 { 3232 movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH 3233 return; 3234 } 3235 } 3236 if (flags & 64) 3237 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3238 else 3239 { 3240 value &= 0xFFFFFFFF; 3241 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3242 } 3243 } 3244 } 3245 done: 3246 regimmed_set(reg,value); 3247 } 3248 } 3249 3250 /************************** 3251 * Generate a jump instruction. 3252 */ 3253 3254 @trusted 3255 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ) 3256 { 3257 code cs; 3258 cs.Iop = op & 0xFF; 3259 cs.Iflags = 0; 3260 cs.Irex = 0; 3261 if (op != JMP && op != 0xE8) // if not already long branch 3262 cs.Iflags = CFjmp16; // assume long branch for op = 0x7x 3263 cs.IFL2 = cast(ubyte)fltarg; // FLblock (or FLcode) 3264 cs.IEV2.Vblock = targ; // target block (or code) 3265 if (fltarg == FLcode) 3266 (cast(code *)targ).Iflags |= CFtarg; 3267 3268 if (config.flags4 & CFG4fastfloat) // if fast floating point 3269 { 3270 cdb.gen(&cs); 3271 return; 3272 } 3273 3274 switch (op & 0xFF00) // look at second jump opcode 3275 { 3276 // The JP and JNP come from floating point comparisons 3277 case JP << 8: 3278 cdb.gen(&cs); 3279 cs.Iop = JP; 3280 cdb.gen(&cs); 3281 break; 3282 3283 case JNP << 8: 3284 { 3285 // Do a JP around the jump instruction 3286 code *cnop = gennop(null); 3287 genjmp(cdb,JP,FLcode,cast(block *) cnop); 3288 cdb.gen(&cs); 3289 cdb.append(cnop); 3290 break; 3291 } 3292 3293 case 1 << 8: // toggled no jump 3294 case 0 << 8: 3295 cdb.gen(&cs); 3296 break; 3297 3298 default: 3299 debug 3300 printf("jop = x%x\n",op); 3301 assert(0); 3302 } 3303 } 3304 3305 /********************************************* 3306 * Generate first part of prolog for interrupt function. 3307 */ 3308 @trusted 3309 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf) 3310 { 3311 static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ]; 3312 static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53, 3313 0x54,0x55,0x56,0x57, 3314 0x1E,0x06,0 ]; 3315 3316 immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 3317 do 3318 cdb.gen1(*p); 3319 while (*++p); 3320 3321 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3322 if (localsize) 3323 cod3_stackadj(cdb, cast(int)localsize); 3324 3325 *tyf |= mTYloadds; 3326 } 3327 3328 @trusted 3329 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds) 3330 { 3331 /* Determine if we need to reload DS */ 3332 if (tyf & mTYloadds) 3333 { 3334 if (!pushds) // if not already pushed 3335 cdb.gen1(0x1E); // PUSH DS 3336 spoff += _tysize[TYint]; 3337 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV AX,DGROUP 3338 code *c = cdb.last(); 3339 c.IEV2.Vseg = DATA; 3340 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3341 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3342 useregs(mAX); 3343 } 3344 3345 if (tym == TYifunc) 3346 cdb.gen1(0xFC); // CLD 3347 } 3348 3349 @trusted 3350 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds) 3351 { 3352 int wflags = config.wflags; 3353 if (wflags & WFreduced && !(*tyf & mTYexport)) 3354 { // reduced prolog/epilog for non-exported functions 3355 wflags &= ~(WFdgroup | WFds | WFss); 3356 } 3357 3358 getregsNoSave(mAX); // should not have any value in AX 3359 3360 int segreg; 3361 switch (wflags & (WFdgroup | WFds | WFss)) 3362 { 3363 case WFdgroup: // MOV AX,DGROUP 3364 { 3365 if (wflags & WFreduced) 3366 *tyf &= ~mTYloadds; // remove redundancy 3367 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); 3368 code *c = cdb.last(); 3369 c.IEV2.Vseg = DATA; 3370 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3371 break; 3372 } 3373 3374 case WFss: 3375 segreg = 2; // SS 3376 goto Lmovax; 3377 3378 case WFds: 3379 segreg = 3; // DS 3380 Lmovax: 3381 cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg 3382 if (wflags & WFds) 3383 cdb.gen1(0x90); // NOP 3384 break; 3385 3386 case 0: 3387 break; 3388 3389 default: 3390 debug 3391 printf("config.wflags = x%x\n",config.wflags); 3392 assert(0); 3393 } 3394 if (wflags & WFincbp) 3395 cdb.gen1(0x40 + BP); // INC BP 3396 cdb.gen1(0x50 + BP); // PUSH BP 3397 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3398 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 3399 { 3400 cdb.gen1(0x1E); // PUSH DS 3401 *pushds = true; 3402 BPoff = -REGSIZE; 3403 } 3404 if (wflags & (WFds | WFss | WFdgroup)) 3405 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3406 } 3407 3408 /********************************************** 3409 * Set up frame register. 3410 * Params: 3411 * cdb = write generated code here 3412 * farfunc = true if a far function 3413 * enter = set to true if ENTER instruction can be used, false otherwise 3414 * xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer 3415 * cfa_offset = set to frame pointer's offset from the CFA 3416 * Returns: 3417 * generated code 3418 */ 3419 @trusted 3420 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset) 3421 { 3422 //printf("prolog_frame\n"); 3423 cfa_offset = 0; 3424 3425 if (0 && config.exe == EX_WIN64) 3426 { 3427 // PUSH RBP 3428 // LEA RBP,0[RSP] 3429 cdb. gen1(0x50 + BP); 3430 cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0); 3431 enter = false; 3432 return; 3433 } 3434 3435 if (config.wflags & WFincbp && farfunc) 3436 cdb.gen1(0x40 + BP); // INC BP 3437 if (config.target_cpu < TARGET_80286 || 3438 config.exe & (EX_posix | EX_WIN64) || 3439 !localsize || 3440 config.flags & CFGstack || 3441 (xlocalsize >= 0x1000 && config.exe & EX_flat) || 3442 localsize >= 0x10000 || 3443 (NTEXCEPTIONS == 2 && 3444 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) || 3445 (config.target_cpu >= TARGET_80386 && 3446 config.flags4 & CFG4speed) 3447 ) 3448 { 3449 cdb.gen1(0x50 + BP); // PUSH BP 3450 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3451 if (I64) 3452 code_orrex(cdb.last(), REX_W); // MOV RBP,RSP 3453 if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes) 3454 // Don't reorder instructions, as dwarf CFA relies on it 3455 code_orflag(cdb.last(), CFvolatile); 3456 static if (NTEXCEPTIONS == 2) 3457 { 3458 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH)) 3459 { 3460 nteh_prolog(cdb); 3461 int sz = nteh_contextsym_size(); 3462 assert(sz != 0); // should be 5*4, not 0 3463 xlocalsize -= sz; // sz is already subtracted from ESP 3464 // by nteh_prolog() 3465 } 3466 } 3467 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3468 config.ehmethod == EHmethod.EH_DWARF) 3469 { 3470 int off = 2 * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3471 dwarf_CFA_set_loc(1); // address after PUSH EBP 3472 dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP] 3473 dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP] 3474 dwarf_CFA_set_loc(I64 ? 4 : 3); // address after MOV EBP,ESP 3475 /* Oddly, the CFA is not the same as the frame pointer, 3476 * which is why the offset of BP is set to 8 3477 */ 3478 dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP] 3479 cfa_offset = off; // remember the difference between the CFA and the frame pointer 3480 } 3481 enter = false; /* do not use ENTER instruction */ 3482 } 3483 else 3484 enter = true; 3485 } 3486 3487 /********************************************** 3488 * Enforce stack alignment. 3489 * Input: 3490 * cdb code builder. 3491 * Returns: 3492 * generated code 3493 */ 3494 @trusted 3495 void prolog_stackalign(ref CodeBuilder cdb) 3496 { 3497 if (!enforcealign) 3498 return; 3499 3500 const offset = (hasframe ? 2 : 1) * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3501 if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN) 3502 cod3_stackalign(cdb, STACKALIGN); 3503 } 3504 3505 @trusted 3506 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc) 3507 { 3508 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3509 3510 bool check; 3511 if (config.exe & (EX_LINUX | EX_LINUX64)) 3512 check = false; // seems that Linux doesn't need to fault in stack pages 3513 else 3514 check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check 3515 || (config.exe & (EX_windos & EX_flat) && xlocalsize >= 0x1000); 3516 3517 if (check) 3518 { 3519 if (I16) 3520 { 3521 // BUG: Won't work if parameter is passed in AX 3522 movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize 3523 makeitextern(getRtlsym(RTLSYM.CHKSTK)); 3524 // CALL _chkstk 3525 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM.CHKSTK)); 3526 useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM.CHKSTK).Sregsaved); 3527 } 3528 else 3529 { 3530 /* Watch out for 64 bit code where EDX is passed as a register parameter 3531 */ 3532 reg_t reg = I64 ? R11 : DX; // scratch register 3533 3534 /* MOV EDX, xlocalsize/0x1000 3535 * L1: SUB ESP, 0x1000 3536 * TEST [ESP],ESP 3537 * DEC EDX 3538 * JNE L1 3539 * SUB ESP, xlocalsize % 0x1000 3540 */ 3541 movregconst(cdb, reg, xlocalsize / 0x1000, false); 3542 cod3_stackadj(cdb, 0x1000); 3543 code_orflag(cdb.last(), CFtarg2); 3544 cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP)); 3545 if (I64) 3546 { cdb.gen2(0xFF, modregrmx(3,1,R11)); // DEC R11D 3547 cdb.genc2(JNE,0,cast(targ_uns)-15); 3548 } 3549 else 3550 { cdb.gen1(0x48 + DX); // DEC EDX 3551 cdb.genc2(JNE,0,cast(targ_uns)-12); 3552 } 3553 regimmed_set(reg,0); // reg is now 0 3554 cod3_stackadj(cdb, xlocalsize & 0xFFF); 3555 useregs(mask(reg)); 3556 } 3557 } 3558 else 3559 { 3560 if (enter) 3561 { // ENTER xlocalsize,0 3562 cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0); 3563 assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data 3564 } 3565 else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized) 3566 { 3567 cdb. gen1(0x50 + pushallocreg); // PUSH AX 3568 // Do this to prevent an -x[EBP] to be moved in 3569 // front of the push. 3570 code_orflag(cdb.last(),CFvolatile); 3571 *pushalloc = true; 3572 } 3573 else 3574 cod3_stackadj(cdb, xlocalsize); 3575 } 3576 } 3577 3578 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc) 3579 { 3580 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3581 if (xlocalsize == REGSIZE) 3582 { 3583 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3584 *pushalloc = true; 3585 } 3586 else if (xlocalsize == 2 * REGSIZE) 3587 { 3588 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3589 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3590 *pushalloc = true; 3591 } 3592 else 3593 cod3_stackadj(cdb, xlocalsize); 3594 } 3595 3596 @trusted 3597 void prolog_setupalloca(ref CodeBuilder cdb) 3598 { 3599 //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n", 3600 //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment); 3601 // Set up magic parameter for alloca() 3602 // MOV -REGSIZE[BP],localsize - BPoff 3603 cdb.genc(0xC7,modregrm(2,0,BPRM), 3604 FLconst,Alloca.offset + BPoff, 3605 FLconst,localsize - BPoff); 3606 if (I64) 3607 code_orrex(cdb.last(), REX_W); 3608 } 3609 3610 /************************************** 3611 * Save registers that the function destroys, 3612 * but that the ABI says should be preserved across 3613 * function calls. 3614 * 3615 * Emit Dwarf info for these saves. 3616 * Params: 3617 * cdb = append generated instructions to this 3618 * topush = mask of registers to push 3619 * cfa_offset = offset of frame pointer from CFA 3620 */ 3621 3622 @trusted 3623 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset) 3624 { 3625 if (pushoffuse) 3626 { 3627 // Save to preallocated section in the stack frame 3628 int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes 3629 int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save 3630 targ_size_t xmmoffset = pushoff + BPoff; 3631 if (!hasframe || enforcealign) 3632 xmmoffset += EBPtoESP; 3633 targ_size_t gpoffset = xmmoffset + xmmtopush * 16; 3634 while (topush) 3635 { 3636 reg_t reg = findreg(topush); 3637 topush &= ~mask(reg); 3638 if (isXMMreg(reg)) 3639 { 3640 if (hasframe && !enforcealign) 3641 { 3642 // MOVUPD xmmoffset[EBP],xmm 3643 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3644 } 3645 else 3646 { 3647 // MOVUPD xmmoffset[ESP],xmm 3648 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3649 } 3650 xmmoffset += 16; 3651 } 3652 else 3653 { 3654 if (hasframe && !enforcealign) 3655 { 3656 // MOV gpoffset[EBP],reg 3657 cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3658 } 3659 else 3660 { 3661 // MOV gpoffset[ESP],reg 3662 cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3663 } 3664 if (I64) 3665 code_orrex(cdb.last(), REX_W); 3666 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3667 config.ehmethod == EHmethod.EH_DWARF) 3668 { // Emit debug_frame data giving location of saved register 3669 code *c = cdb.finish(); 3670 pinholeopt(c, null); 3671 dwarf_CFA_set_loc(calcblksize(c)); // address after save 3672 dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset)); 3673 cdb.reset(); 3674 cdb.append(c); 3675 } 3676 gpoffset += REGSIZE; 3677 } 3678 } 3679 } 3680 else 3681 { 3682 while (topush) /* while registers to push */ 3683 { 3684 reg_t reg = findreg(topush); 3685 topush &= ~mask(reg); 3686 if (isXMMreg(reg)) 3687 { 3688 // SUB RSP,16 3689 cod3_stackadj(cdb, 16); 3690 // MOVUPD 0[RSP],xmm 3691 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3692 EBPtoESP += 16; 3693 spoff += 16; 3694 } 3695 else 3696 { 3697 genpush(cdb, reg); 3698 EBPtoESP += REGSIZE; 3699 spoff += REGSIZE; 3700 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3701 config.ehmethod == EHmethod.EH_DWARF) 3702 { // Emit debug_frame data giving location of saved register 3703 // relative to 0[EBP] 3704 code *c = cdb.finish(); 3705 pinholeopt(c, null); 3706 dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg 3707 dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset); 3708 cdb.reset(); 3709 cdb.append(c); 3710 } 3711 } 3712 } 3713 } 3714 } 3715 3716 /************************************** 3717 * Undo prolog_saveregs() 3718 */ 3719 3720 @trusted 3721 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop) 3722 { 3723 debug 3724 if (topop & ~(XMMREGS | 0xFFFF)) 3725 printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg)); 3726 3727 assert(!(topop & ~(XMMREGS | 0xFFFF))); 3728 if (pushoffuse) 3729 { 3730 // Save to preallocated section in the stack frame 3731 int xmmtopop = popcnt(topop & XMMREGS); // XMM regs take 16 bytes 3732 int gptopop = popcnt(topop) - xmmtopop; // general purpose registers to save 3733 targ_size_t xmmoffset = pushoff + BPoff; 3734 if (!hasframe || enforcealign) 3735 xmmoffset += EBPtoESP; 3736 targ_size_t gpoffset = xmmoffset + xmmtopop * 16; 3737 while (topop) 3738 { 3739 reg_t reg = findreg(topop); 3740 topop &= ~mask(reg); 3741 if (isXMMreg(reg)) 3742 { 3743 if (hasframe && !enforcealign) 3744 { 3745 // MOVUPD xmm,xmmoffset[EBP] 3746 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3747 } 3748 else 3749 { 3750 // MOVUPD xmm,xmmoffset[ESP] 3751 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3752 } 3753 xmmoffset += 16; 3754 } 3755 else 3756 { 3757 if (hasframe && !enforcealign) 3758 { 3759 // MOV reg,gpoffset[EBP] 3760 cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3761 } 3762 else 3763 { 3764 // MOV reg,gpoffset[ESP] 3765 cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3766 } 3767 if (I64) 3768 code_orrex(cdb.last(), REX_W); 3769 gpoffset += REGSIZE; 3770 } 3771 } 3772 } 3773 else 3774 { 3775 reg_t reg = I64 ? XMM7 : DI; 3776 if (!(topop & XMMREGS)) 3777 reg = R15; 3778 regm_t regm = 1 << reg; 3779 3780 while (topop) 3781 { if (topop & regm) 3782 { 3783 if (isXMMreg(reg)) 3784 { 3785 // MOVUPD xmm,0[RSP] 3786 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3787 // ADD RSP,16 3788 cod3_stackadj(cdb, -16); 3789 } 3790 else 3791 { 3792 cdb.gen1(0x58 + (reg & 7)); // POP reg 3793 if (reg & 8) 3794 code_orrex(cdb.last(), REX_B); 3795 } 3796 topop &= ~regm; 3797 } 3798 regm >>= 1; 3799 reg--; 3800 } 3801 } 3802 } 3803 3804 /****************************** 3805 * Generate special varargs prolog for Posix 64 bit systems. 3806 * Params: 3807 * cdb = sink for generated code 3808 * sv = symbol for __va_argsave 3809 */ 3810 @trusted 3811 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv) 3812 { 3813 /* Generate code to move any arguments passed in registers into 3814 * the stack variable __va_argsave, 3815 * so we can reference it via pointers through va_arg(). 3816 * struct __va_argsave_t { 3817 * size_t[6] regs; 3818 * real[8] fpregs; 3819 * uint offset_regs; 3820 * uint offset_fpregs; 3821 * void* stack_args; 3822 * void* reg_args; 3823 * } 3824 * The MOVAPS instructions seg fault if data is not aligned on 3825 * 16 bytes, so this gives us a nice check to ensure no mistakes. 3826 MOV voff+0*8[RBP],EDI 3827 MOV voff+1*8[RBP],ESI 3828 MOV voff+2*8[RBP],RDX 3829 MOV voff+3*8[RBP],RCX 3830 MOV voff+4*8[RBP],R8 3831 MOV voff+5*8[RBP],R9 3832 TEST AL,AL 3833 LEA RAX,voff+6*8+0x7F[RBP] 3834 JE L2 3835 3836 MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used 3837 MOVAPS -0x1F[RAX],XMM6 3838 MOVAPS -0x2F[RAX],XMM5 3839 MOVAPS -0x3F[RAX],XMM4 3840 MOVAPS -0x4F[RAX],XMM3 3841 MOVAPS -0x5F[RAX],XMM2 3842 MOVAPS -0x6F[RAX],XMM1 3843 MOVAPS -0x7F[RAX],XMM0 3844 3845 L2: 3846 LEA R11, Para.size+Para.offset[RBP] 3847 MOV 9+16[RAX],R11 // set __va_argsave.stack_args 3848 * RAX and R11 are destroyed. 3849 */ 3850 3851 /* Save registers into the voff area on the stack 3852 */ 3853 targ_size_t voff = Auto.size + BPoff + sv.Soffset; // EBP offset of start of sv 3854 const int vregnum = 6; 3855 const uint vsize = vregnum * 8 + 8 * 16; 3856 3857 static immutable reg_t[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ]; 3858 3859 if (!hasframe || enforcealign) 3860 voff += EBPtoESP; 3861 3862 regm_t namedargs = prolog_namedArgs(); 3863 foreach (i, r; regs) 3864 { 3865 if (!(mask(r) & namedargs)) // unnamed arguments would be the ... ones 3866 { 3867 uint ea = (REX_W << 16) | modregxrm(2,r,BPRM); 3868 if (!hasframe || enforcealign) 3869 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4); 3870 cdb.genc1(0x89,ea,FLconst,voff + i*8); // MOV voff+i*8[RBP],r 3871 } 3872 } 3873 3874 code* cnop = gennop(null); 3875 genregs(cdb,0x84,AX,AX); // TEST AL,AL 3876 3877 uint ea = (REX_W << 16) | modregrm(2,AX,BPRM); 3878 if (!hasframe || enforcealign) 3879 // add sib byte for [RSP] addressing 3880 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4); 3881 int raxoff = cast(int)(voff+6*8+0x7F); 3882 cdb.genc1(LEA,ea,FLconst,raxoff); // LEA RAX,voff+vsize-6*8-16+0x7F[RBP] 3883 3884 genjmp(cdb,JE,FLcode, cast(block *)cnop); // JE L2 3885 3886 foreach (i; 0 .. 8) 3887 { 3888 // MOVAPS -15-16*i[RAX],XMM7-i 3889 cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i); 3890 } 3891 cdb.append(cnop); 3892 3893 // LEA R11, Para.size+Para.offset[RBP] 3894 uint ea2 = modregxrm(2,R11,BPRM); 3895 if (!hasframe) 3896 ea2 = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4); 3897 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 3898 cdb.genc1(LEA,(REX_W << 16) | ea2,FLconst,Para.size + Para.offset); 3899 3900 // MOV 9+16[RAX],R11 3901 cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9 + 16); // into stack_args_save 3902 3903 pinholeopt(cdb.peek(), null); 3904 useregs(mAX|mR11); 3905 } 3906 3907 /******************************** 3908 * Generate elems for va_start() 3909 * Params: 3910 * sv = symbol for __va_argsave 3911 * parmn = last named parameter 3912 */ 3913 @trusted 3914 elem* prolog_genva_start(Symbol* sv, Symbol* parmn) 3915 { 3916 enum Vregnum = 6; 3917 3918 /* the stack variable __va_argsave points to an instance of: 3919 * struct __va_argsave_t { 3920 * size_t[Vregnum] regs; 3921 * real[8] fpregs; 3922 * struct __va_list_tag { 3923 * uint offset_regs; 3924 * uint offset_fpregs; 3925 * void* stack_args; 3926 * void* reg_args; 3927 * } 3928 * void* stack_args_save; 3929 * } 3930 */ 3931 3932 enum OFF // offsets into __va_argsave_t 3933 { 3934 Offset_regs = Vregnum*8 + 8*16, 3935 Offset_fpregs = Offset_regs + 4, 3936 Stack_args = Offset_fpregs + 4, 3937 Reg_args = Stack_args + 8, 3938 Stack_args_save = Reg_args + 8, 3939 } 3940 3941 /* Compute offset_regs and offset_fpregs 3942 */ 3943 regm_t namedargs = prolog_namedArgs(); 3944 uint offset_regs = 0; 3945 uint offset_fpregs = Vregnum * 8; 3946 for (int i = AX; i <= XMM7; i++) 3947 { 3948 regm_t m = mask(i); 3949 if (m & namedargs) 3950 { 3951 if (m & (mDI|mSI|mDX|mCX|mR8|mR9)) 3952 offset_regs += 8; 3953 else if (m & XMMREGS) 3954 offset_fpregs += 16; 3955 namedargs &= ~m; 3956 if (!namedargs) 3957 break; 3958 } 3959 } 3960 3961 // set offset_regs 3962 elem* e1 = el_bin(OPeq, TYint, el_var(sv), el_long(TYint, offset_regs)); 3963 e1.EV.E1.Ety = TYint; 3964 e1.EV.E1.EV.Voffset = OFF.Offset_regs; 3965 3966 // set offset_fpregs 3967 elem* e2 = el_bin(OPeq, TYint, el_var(sv), el_long(TYint, offset_fpregs)); 3968 e2.EV.E1.Ety = TYint; 3969 e2.EV.E1.EV.Voffset = OFF.Offset_fpregs; 3970 3971 // set reg_args 3972 elem* e4 = el_bin(OPeq, TYnptr, el_var(sv), el_ptr(sv)); 3973 e4.EV.E1.Ety = TYnptr; 3974 e4.EV.E1.EV.Voffset = OFF.Reg_args; 3975 3976 // set stack_args 3977 /* which is a pointer to the first variadic argument on the stack. 3978 * Normally, we could set it by taking the address of the last named parameter 3979 * (parmn) and then skipping past it. The trouble, though, is it fails 3980 * when all the named parameters get passed in a register. 3981 * elem* e3 = el_bin(OPeq, TYnptr, el_var(sv), el_ptr(parmn)); 3982 * e3.EV.E1.Ety = TYnptr; 3983 * e3.EV.E1.EV.Voffset = OFF.Stack_args; 3984 * auto sz = type_size(parmn.Stype); 3985 * sz = (sz + (REGSIZE - 1)) & ~(REGSIZE - 1); 3986 * e3.EV.E2.EV.Voffset += sz; 3987 * The next possibility is to do it the way prolog_genvarargs() does: 3988 * LEA R11, Para.size+Para.offset[RBP] 3989 * The trouble there is Para.size and Para.offset is not available when 3990 * this function is called. It might be possible to compute this earlier.(1) 3991 * Another possibility is creating a special operand type that gets filled 3992 * in after the prolog_genvarargs() is called. 3993 * Or do it this simpler way - compute the needed value in prolog_genvarargs(), 3994 * and save it in a slot just after va_argsave, called `stack_args_save`. 3995 * Then, just copy from `stack_args_save` to `stack_args`. 3996 * Although, doing (1) might be optimal. 3997 */ 3998 elem* e3 = el_bin(OPeq, TYnptr, el_var(sv), el_var(sv)); 3999 e3.EV.E1.Ety = TYnptr; 4000 e3.EV.E1.EV.Voffset = OFF.Stack_args; 4001 e3.EV.E2.Ety = TYnptr; 4002 e3.EV.E2.EV.Voffset = OFF.Stack_args_save; 4003 4004 elem* e = el_combine(e1, el_combine(e2, el_combine(e3, e4))); 4005 return e; 4006 } 4007 4008 void prolog_gen_win64_varargs(ref CodeBuilder cdb) 4009 { 4010 /* The Microsoft scheme. 4011 * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29 4012 * Copy registers onto stack. 4013 mov 8[RSP],RCX 4014 mov 010h[RSP],RDX 4015 mov 018h[RSP],R8 4016 mov 020h[RSP],R9 4017 */ 4018 } 4019 4020 /************************************ 4021 * Get mask of registers that named parameters (not ... variadic arguments) were passed in. 4022 * Returns: 4023 * the mask 4024 */ 4025 @trusted regm_t prolog_namedArgs() 4026 { 4027 regm_t namedargs; 4028 foreach (s; globsym[]) 4029 { 4030 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 4031 namedargs |= s.Spregm(); 4032 } 4033 return namedargs; 4034 } 4035 4036 /************************************ 4037 * Params: 4038 * cdb = generated code sink 4039 * tf = what's the type of the function 4040 * pushalloc = use PUSH to allocate on the stack rather than subtracting from SP 4041 */ 4042 @trusted 4043 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc) 4044 { 4045 //printf("prolog_loadparams() %s\n", funcsym_p.Sident.ptr); 4046 debug 4047 for (SYMIDX si = 0; si < globsym.length; si++) 4048 { 4049 Symbol *s = globsym[si]; 4050 if (debugr && (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)) 4051 { 4052 printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr, 4053 regm_str(mask(s.Spreg)), 4054 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2)))); 4055 if (s.Sfl == FLreg) 4056 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw))); 4057 } 4058 } 4059 4060 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 4061 4062 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned 4063 * registers into their stack locations. 4064 */ 4065 regm_t shadowregm = 0; 4066 for (SYMIDX si = 0; si < globsym.length; si++) 4067 { 4068 Symbol *s = globsym[si]; 4069 uint sz = cast(uint)type_size(s.Stype); 4070 4071 if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl != FLreg)) 4072 continue; 4073 // Argument is passed in a register 4074 4075 type *t = s.Stype; 4076 type *t2 = null; 4077 4078 tym_t tyb = tybasic(t.Tty); 4079 4080 // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d 4081 // 4082 // Find suitable SROA based on the element type 4083 // (Don't put volatile parameters in registers on Windows) 4084 if (tyb == TYarray && (config.exe != EX_WIN64 || !(t.Tty & mTYvolatile))) 4085 { 4086 type *targ1; 4087 argtypes(t, targ1, t2); 4088 if (targ1) 4089 t = targ1; 4090 } 4091 4092 // If struct just wraps another type 4093 if (tyb == TYstruct) 4094 { 4095 // On windows 64 bits, structs occupy a general purpose register, 4096 // regardless of the struct size or the number & types of its fields. 4097 if (config.exe != EX_WIN64) 4098 { 4099 type *targ1 = t.Ttag.Sstruct.Sarg1type; 4100 t2 = t.Ttag.Sstruct.Sarg2type; 4101 if (targ1) 4102 t = targ1; 4103 } 4104 } 4105 4106 if (Symbol_Sisdead(*s, anyiasm)) 4107 { 4108 // Ignore it, as it is never referenced 4109 continue; 4110 } 4111 4112 targ_size_t offset = Fast.size + BPoff; 4113 if (s.Sclass == SC.shadowreg) 4114 offset = Para.size; 4115 offset += s.Soffset; 4116 if (!hasframe || (enforcealign && s.Sclass != SC.shadowreg)) 4117 offset += EBPtoESP; 4118 4119 reg_t preg = s.Spreg; 4120 foreach (i; 0 .. 2) // twice, once for each possible parameter register 4121 { 4122 static type* type_arrayBase(type* ta) 4123 { 4124 while (tybasic(ta.Tty) == TYarray) 4125 ta = ta.Tnext; 4126 return ta; 4127 } 4128 shadowregm |= mask(preg); 4129 const opcode_t op = isXMMreg(preg) 4130 ? xmmstore(type_arrayBase(t).Tty) 4131 : 0x89; // MOV x[EBP],preg 4132 if (!(pushalloc && preg == pushallocreg) || s.Sclass == SC.shadowreg) 4133 { 4134 if (hasframe && (!enforcealign || s.Sclass == SC.shadowreg)) 4135 { 4136 // MOV x[EBP],preg 4137 cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset); 4138 if (isXMMreg(preg)) 4139 { 4140 checkSetVex(cdb.last(), t.Tty); 4141 } 4142 else 4143 { 4144 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n", 4145 // s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz); 4146 if (I64 && sz > 4) 4147 code_orrex(cdb.last(), REX_W); 4148 } 4149 } 4150 else 4151 { 4152 // MOV offset[ESP],preg 4153 // BUG: byte size? 4154 cdb.genc1(op, 4155 (modregrm(0,4,SP) << 8) | 4156 modregxrm(2,preg,4),FLconst,offset); 4157 if (isXMMreg(preg)) 4158 { 4159 checkSetVex(cdb.last(), t.Tty); 4160 } 4161 else 4162 { 4163 if (I64 && sz > 4) 4164 cdb.last().Irex |= REX_W; 4165 } 4166 } 4167 } 4168 preg = s.Spreg2; 4169 if (preg == NOREG) 4170 break; 4171 if (t2) 4172 t = t2; 4173 offset += REGSIZE; 4174 } 4175 } 4176 4177 if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype)) 4178 { 4179 /* The Microsoft scheme. 4180 * https://msdn.microsoft.com/en-US/library/dd2wa36c%28v=vs.100%29 4181 * Copy registers onto stack. 4182 mov 8[RSP],RCX or XMM0 4183 mov 010h[RSP],RDX or XMM1 4184 mov 018h[RSP],R8 or XMM2 4185 mov 020h[RSP],R9 or XMM3 4186 */ 4187 static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ]; 4188 for (int i = 0; i < vregs.length; ++i) 4189 { 4190 uint preg = vregs[i]; 4191 uint offset = cast(uint)(Para.size + i * REGSIZE); 4192 if (!(shadowregm & (mask(preg) | mask(XMM0 + i)))) 4193 { 4194 if (hasframe) 4195 { 4196 // MOV x[EBP],preg 4197 cdb.genc1(0x89, 4198 modregxrm(2,preg,BPRM),FLconst, offset); 4199 code_orrex(cdb.last(), REX_W); 4200 } 4201 else 4202 { 4203 // MOV offset[ESP],preg 4204 cdb.genc1(0x89, 4205 (modregrm(0,4,SP) << 8) | 4206 modregxrm(2,preg,4),FLconst,offset + EBPtoESP); 4207 } 4208 cdb.last().Irex |= REX_W; 4209 } 4210 } 4211 } 4212 4213 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers 4214 * into their assigned registers. 4215 * Note that we have a big problem if Pa is passed in R1 and assigned to R2, 4216 * and Pb is passed in R2 but assigned to R1. Detect it and assert. 4217 */ 4218 regm_t assignregs = 0; 4219 for (SYMIDX si = 0; si < globsym.length; si++) 4220 { 4221 Symbol *s = globsym[si]; 4222 uint sz = cast(uint)type_size(s.Stype); 4223 4224 if (!((s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && s.Sfl == FLreg)) 4225 { 4226 // Argument is passed in a register 4227 continue; 4228 } 4229 4230 type *t = s.Stype; 4231 type *t2 = null; 4232 if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64) 4233 { type *targ1 = t.Ttag.Sstruct.Sarg1type; 4234 t2 = t.Ttag.Sstruct.Sarg2type; 4235 if (targ1) 4236 t = targ1; 4237 } 4238 4239 reg_t preg = s.Spreg; 4240 reg_t r = s.Sreglsw; 4241 for (int i = 0; i < 2; ++i) 4242 { 4243 if (preg == NOREG) 4244 break; 4245 assert(!(mask(preg) & assignregs)); // not already stepped on 4246 assignregs |= mask(r); 4247 4248 // MOV reg,preg 4249 if (r == preg) 4250 { 4251 } 4252 else if (mask(preg) & XMMREGS) 4253 { 4254 const op = xmmload(t.Tty); // MOVSS/D xreg,preg 4255 uint xreg = r - XMM0; 4256 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0)); 4257 } 4258 else 4259 { 4260 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]); 4261 genmovreg(cdb,r,preg); 4262 if (I64 && sz == 8) 4263 code_orrex(cdb.last(), REX_W); 4264 } 4265 preg = s.Spreg2; 4266 r = s.Sregmsw; 4267 if (t2) 4268 t = t2; 4269 } 4270 } 4271 4272 /* For parameters that were passed on the stack, but are enregistered, 4273 * initialize the registers with the parameter stack values. 4274 * Do not use assignaddr(), as it will replace the stack reference with 4275 * the register. 4276 */ 4277 for (SYMIDX si = 0; si < globsym.length; si++) 4278 { 4279 Symbol *s = globsym[si]; 4280 uint sz = cast(uint)type_size(s.Stype); 4281 4282 if (!((s.Sclass == SC.regpar || s.Sclass == SC.parameter) && 4283 s.Sfl == FLreg && 4284 (refparam 4285 // This variable has been reference by a nested function 4286 || MARS && s.Stype.Tty & mTYvolatile 4287 ))) 4288 { 4289 continue; 4290 } 4291 // MOV reg,param[BP] 4292 //assert(refparam); 4293 if (mask(s.Sreglsw) & XMMREGS) 4294 { 4295 const op = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4296 uint xreg = s.Sreglsw - XMM0; 4297 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset); 4298 if (!hasframe) 4299 { // Convert to ESP relative address rather than EBP 4300 code *c = cdb.last(); 4301 c.Irm = cast(ubyte)modregxrm(2,xreg,4); 4302 c.Isib = modregrm(0,4,SP); 4303 c.IEV1.Vpointer += EBPtoESP; 4304 } 4305 continue; 4306 } 4307 4308 cdb.genc1(sz == 1 ? 0x8A : 0x8B, 4309 modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset); 4310 code *c = cdb.last(); 4311 if (!I16 && sz == SHORTSIZE) 4312 c.Iflags |= CFopsize; // operand size 4313 if (I64 && sz >= REGSIZE) 4314 c.Irex |= REX_W; 4315 if (I64 && sz == 1 && s.Sreglsw >= 4) 4316 c.Irex |= REX; 4317 if (!hasframe) 4318 { // Convert to ESP relative address rather than EBP 4319 assert(!I16); 4320 c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4); 4321 c.Isib = modregrm(0,4,SP); 4322 c.IEV1.Vpointer += EBPtoESP; 4323 } 4324 if (sz > REGSIZE) 4325 { 4326 cdb.genc1(0x8B, 4327 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE); 4328 code *cx = cdb.last(); 4329 if (I64) 4330 cx.Irex |= REX_W; 4331 if (!hasframe) 4332 { // Convert to ESP relative address rather than EBP 4333 assert(!I16); 4334 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4); 4335 cx.Isib = modregrm(0,4,SP); 4336 cx.IEV1.Vpointer += EBPtoESP; 4337 } 4338 } 4339 } 4340 } 4341 4342 /******************************* 4343 * Generate and return function epilog. 4344 * Output: 4345 * retsize Size of function epilog 4346 */ 4347 4348 @trusted 4349 void epilog(block *b) 4350 { 4351 code *cpopds; 4352 reg_t reg; 4353 reg_t regx; // register that's not a return reg 4354 regm_t topop,regm; 4355 targ_size_t xlocalsize = localsize; 4356 4357 CodeBuilder cdbx; cdbx.ctor(); 4358 tym_t tyf = funcsym_p.ty(); 4359 tym_t tym = tybasic(tyf); 4360 bool farfunc = tyfarfunc(tym) != 0; 4361 if (!(b.Bflags & BFLepilog)) // if no epilog code 4362 goto Lret; // just generate RET 4363 regx = (b.BC == BCret) ? AX : CX; 4364 4365 retsize = 0; 4366 4367 if (tyf & mTYnaked) // if no prolog/epilog 4368 return; 4369 4370 if (tym == TYifunc) 4371 { 4372 static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ]; 4373 static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E, 4374 0x5D,0x5B,0x5B,0x5A, 4375 0x59,0x58,0xCF,0 ]; 4376 4377 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4378 auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 4379 do 4380 cdbx.gen1(*p); 4381 while (*++p); 4382 goto Lopt; 4383 } 4384 4385 if (config.flags & CFGtrace && 4386 (!(config.flags4 & CFG4allcomdat) || 4387 funcsym_p.Sclass == SC.comdat || 4388 funcsym_p.Sclass == SC.global || 4389 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 4390 ) 4391 ) 4392 { 4393 Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_EPI_F : RTLSYM.TRACE_EPI_N); 4394 makeitextern(s); 4395 cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace 4396 if (!I16) 4397 code_orflag(cdbx.last(),CFoff | CFselfrel); 4398 useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); 4399 } 4400 4401 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) 4402 { 4403 nteh_epilog(cdbx); 4404 } 4405 4406 cpopds = null; 4407 if (tyf & mTYloadds) 4408 { 4409 cdbx.gen1(0x1F); // POP DS 4410 cpopds = cdbx.last(); 4411 } 4412 4413 /* Pop all the general purpose registers saved on the stack 4414 * by the prolog code. Remember to do them in the reverse 4415 * order they were pushed. 4416 */ 4417 topop = fregsaved & ~mfuncreg; 4418 epilog_restoreregs(cdbx, topop); 4419 4420 if (usednteh & NTEHjmonitor) 4421 { 4422 regm_t retregs = 0; 4423 if (b.BC == BCretexp) 4424 retregs = regmask(b.Belem.Ety, tym); 4425 nteh_monitor_epilog(cdbx,retregs); 4426 xlocalsize += 8; 4427 } 4428 4429 if (config.wflags & WFwindows && farfunc) 4430 { 4431 int wflags = config.wflags; 4432 if (wflags & WFreduced && !(tyf & mTYexport)) 4433 { // reduced prolog/epilog for non-exported functions 4434 wflags &= ~(WFdgroup | WFds | WFss); 4435 if (!(wflags & WFsaveds)) 4436 goto L4; 4437 } 4438 4439 if (localsize) 4440 { 4441 cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */ 4442 } 4443 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 4444 { 4445 if (cpopds) 4446 cpopds.Iop = NOP; // don't need previous one 4447 cdbx.gen1(0x1F); // POP DS 4448 } 4449 cdbx.gen1(0x58 + BP); // POP BP 4450 if (config.wflags & WFincbp) 4451 cdbx.gen1(0x48 + BP); // DEC BP 4452 assert(hasframe); 4453 } 4454 else 4455 { 4456 if (needframe || (xlocalsize && hasframe)) 4457 { 4458 L4: 4459 assert(hasframe); 4460 if (xlocalsize || enforcealign) 4461 { 4462 if (config.flags2 & CFG2stomp) 4463 { /* MOV ECX,0xBEAF 4464 * L1: 4465 * MOV [ESP],ECX 4466 * ADD ESP,4 4467 * CMP EBP,ESP 4468 * JNE L1 4469 * POP EBP 4470 */ 4471 /* Value should be: 4472 * 1. != 0 (code checks for null pointers) 4473 * 2. be odd (to mess up alignment) 4474 * 3. fall in first 64K (likely marked as inaccessible) 4475 * 4. be a value that stands out in the debugger 4476 */ 4477 assert(I32 || I64); 4478 targ_size_t value = 0x0000BEAF; 4479 reg_t regcx = CX; 4480 mfuncreg &= ~mask(regcx); 4481 uint grex = I64 ? REX_W << 16 : 0; 4482 cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value 4483 cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx 4484 code *c1 = cdbx.last(); 4485 cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE 4486 genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP 4487 if (I64) 4488 code_orrex(cdbx.last(),REX_W); 4489 genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 4490 // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) 4491 cdbx.last().Iflags &= ~CFjmp16; 4492 cdbx.gen1(0x58 + BP); // POP BP 4493 } 4494 else if (config.exe == EX_WIN64) 4495 { // See https://msdn.microsoft.com/en-us/library/tawsa7cb%28v=vs.100%29.aspx 4496 // LEA RSP,0[RBP] 4497 cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); 4498 cdbx.gen1(0x58 + BP); // POP RBP 4499 } 4500 else if (config.target_cpu >= TARGET_80286 && 4501 !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed) 4502 ) 4503 cdbx.gen1(LEAVE); // LEAVE 4504 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32) 4505 { // This doesn't work - I should figure out why 4506 mfuncreg &= ~mask(regx); 4507 cdbx.gen1(0x58 + regx); // POP regx 4508 cdbx.gen1(0x58 + BP); // POP BP 4509 } 4510 else 4511 { 4512 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4513 if (I64) 4514 code_orrex(cdbx.last(), REX_W); // MOV RSP,RBP 4515 cdbx.gen1(0x58 + BP); // POP BP 4516 } 4517 } 4518 else 4519 cdbx.gen1(0x58 + BP); // POP BP 4520 if (config.wflags & WFincbp && farfunc) 4521 cdbx.gen1(0x48 + BP); // DEC BP 4522 } 4523 else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret)) 4524 { 4525 mfuncreg &= ~mask(regx); 4526 cdbx.gen1(0x58 + regx); // POP regx 4527 } 4528 else if (xlocalsize) 4529 cod3_stackadj(cdbx, cast(int)-xlocalsize); 4530 } 4531 if (b.BC == BCret || b.BC == BCretexp) 4532 { 4533 Lret: 4534 opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2; 4535 if (tym == TYhfunc) 4536 { 4537 cdbx.genc2(0xC2,0,4); // RET 4 4538 } 4539 else if (!typfunc(tym) || // if caller cleans the stack 4540 config.exe == EX_WIN64 || 4541 Para.offset == 0) // or nothing pushed on the stack anyway 4542 { 4543 op++; // to a regular RET 4544 cdbx.gen1(op); 4545 } 4546 else 4547 { // Stack is always aligned on register size boundary 4548 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4549 if (Para.offset >= 0x10000) 4550 { 4551 /* 4552 POP REG 4553 ADD ESP, Para.offset 4554 JMP REG 4555 */ 4556 cdbx.gen1(0x58+regx); 4557 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset); 4558 if (I64) 4559 code_orrex(cdbx.last(), REX_W); 4560 cdbx.genc2(0xFF, modregrm(3,4,regx), 0); 4561 if (I64) 4562 code_orrex(cdbx.last(), REX_W); 4563 } 4564 else 4565 cdbx.genc2(op,0,Para.offset); // RET Para.offset 4566 } 4567 } 4568 4569 Lopt: 4570 // If last instruction in ce is ADD SP,imm, and first instruction 4571 // in c sets SP, we can dump the ADD. 4572 CodeBuilder cdb; cdb.ctor(); 4573 cdb.append(b.Bcode); 4574 code *cr = cdb.last(); 4575 code *c = cdbx.peek(); 4576 if (cr && c && !I64) 4577 { 4578 if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP)) // if ADD SP,imm 4579 { 4580 if ( 4581 c.Iop == LEAVE || // LEAVE 4582 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP 4583 (c.Iop == LEA && c.Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 4584 ) 4585 cr.Iop = NOP; 4586 else if (c.Iop == 0x58 + BP) // if POP BP 4587 { 4588 cr.Iop = 0x8B; 4589 cr.Irm = modregrm(3,SP,BP); // MOV SP,BP 4590 } 4591 } 4592 else 4593 { 4594 static if (0) 4595 { 4596 // These optimizations don't work if the called function 4597 // cleans off the stack. 4598 if (c.Iop == 0xC3 && cr.Iop == CALL) // CALL near 4599 { 4600 cr.Iop = 0xE9; // JMP near 4601 c.Iop = NOP; 4602 } 4603 else if (c.Iop == 0xCB && cr.Iop == 0x9A) // CALL far 4604 { 4605 cr.Iop = 0xEA; // JMP far 4606 c.Iop = NOP; 4607 } 4608 } 4609 } 4610 } 4611 4612 pinholeopt(c, null); 4613 retsize += calcblksize(c); // compute size of function epilog 4614 cdb.append(cdbx); 4615 b.Bcode = cdb.finish(); 4616 } 4617 4618 /******************************* 4619 * Return offset of SP from BP. 4620 */ 4621 4622 @trusted 4623 targ_size_t cod3_spoff() 4624 { 4625 //printf("spoff = x%x, localsize = x%x\n", cast(int)spoff, cast(int)localsize); 4626 return spoff + localsize; 4627 } 4628 4629 @trusted 4630 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg) 4631 { 4632 code cs; 4633 const regm_t keepmsk = toreg ? RMload : RMstore; 4634 4635 elem* e = el_var(s); // so we can trick getlvalue() into working for us 4636 4637 if (mask(s.Sreglsw) & XMMREGS) 4638 { // Convert to save/restore of XMM register 4639 if (toreg) 4640 cs.Iop = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4641 else 4642 cs.Iop = xmmstore(s.Stype.Tty); // MOVSS/D mem,xreg 4643 getlvalue(cdb,&cs,e,keepmsk); 4644 cs.orReg(s.Sreglsw - XMM0); 4645 cdb.gen(&cs); 4646 } 4647 else 4648 { 4649 const int sz = cast(int)type_size(s.Stype); 4650 cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg 4651 cs.Iop ^= (sz == 1); 4652 getlvalue(cdb,&cs,e,keepmsk); 4653 cs.orReg(s.Sreglsw); 4654 if (I64 && sz == 1 && s.Sreglsw >= 4) 4655 cs.Irex |= REX; 4656 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4657 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4658 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4659 { } // skip MOV reg,reg 4660 else 4661 cdb.gen(&cs); 4662 if (sz > REGSIZE) 4663 { 4664 cs.setReg(s.Sregmsw); 4665 getlvalue_msw(&cs); 4666 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4667 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4668 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4669 { } // skip MOV reg,reg 4670 else 4671 cdb.gen(&cs); 4672 } 4673 } 4674 4675 el_free(e); 4676 } 4677 4678 /**************************** 4679 * Generate code for, and output a thunk. 4680 * Params: 4681 * sthunk = Symbol of thunk 4682 * sfunc = Symbol of thunk's target function 4683 * thisty = Type of this pointer 4684 * p = ESP parameter offset to this pointer 4685 * d = offset to add to 'this' pointer 4686 * d2 = offset from 'this' to vptr 4687 * i = offset into vtbl[] 4688 */ 4689 4690 @trusted 4691 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty, 4692 uint d,int i,uint d2) 4693 { 4694 targ_size_t thunkoffset; 4695 4696 int seg = sthunk.Sseg; 4697 cod3_align(seg); 4698 4699 // Skip over return address 4700 tym_t thunkty = tybasic(sthunk.ty()); 4701 if (tyfarfunc(thunkty)) 4702 p += I32 ? 8 : tysize(TYfptr); // far function 4703 else 4704 p += tysize(TYnptr); 4705 if (tybasic(sfunc.ty()) == TYhfunc) 4706 p += tysize(TYnptr); // skip over hidden pointer 4707 4708 CodeBuilder cdb; cdb.ctor(); 4709 if (!I16) 4710 { 4711 /* 4712 Generate: 4713 ADD p[ESP],d 4714 For direct call: 4715 JMP sfunc 4716 For virtual call: 4717 MOV EAX, p[ESP] EAX = this 4718 MOV EAX, d2[EAX] EAX = this.vptr 4719 JMP i[EAX] jump to virtual function 4720 */ 4721 if (config.flags3 & CFG3ibt) 4722 cdb.gen1(I32 ? ENDBR32 : ENDBR64); 4723 4724 reg_t reg = 0; 4725 if (cast(int)d < 0) 4726 { 4727 d = -d; 4728 reg = 5; // switch from ADD to SUB 4729 } 4730 if (thunkty == TYmfunc) 4731 { // ADD ECX,d 4732 if (d) 4733 cdb.genc2(0x81,modregrm(3,reg,CX),d); 4734 } 4735 else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc)) 4736 { // ADD EAX,d 4737 int rm = AX; 4738 if (config.exe == EX_WIN64) 4739 rm = CX; 4740 else if (I64) 4741 rm = (thunkty == TYnfunc && (sfunc.Sfunc.Fflags3 & F3hiddenPtr)) ? SI : DI; 4742 if (d) 4743 cdb.genc2(0x81,modregrm(3,reg,rm),d); 4744 } 4745 else 4746 { 4747 cdb.genc(0x81,modregrm(2,reg,4), 4748 FLconst,p, // to this 4749 FLconst,d); // ADD p[ESP],d 4750 cdb.last().Isib = modregrm(0,4,SP); 4751 } 4752 if (I64 && cdb.peek()) 4753 cdb.last().Irex |= REX_W; 4754 } 4755 else 4756 { 4757 /* 4758 Generate: 4759 MOV BX,SP 4760 ADD [SS:] p[BX],d 4761 For direct call: 4762 JMP sfunc 4763 For virtual call: 4764 MOV BX, p[BX] BX = this 4765 MOV BX, d2[BX] BX = this.vptr 4766 JMP i[BX] jump to virtual function 4767 */ 4768 4769 genregs(cdb,0x89,SP,BX); // MOV BX,SP 4770 cdb.genc(0x81,modregrm(2,0,7), 4771 FLconst,p, // to this 4772 FLconst,d); // ADD p[BX],d 4773 if (config.wflags & WFssneds || 4774 // If DS needs reloading from SS, 4775 // then assume SS != DS on thunk entry 4776 (LARGEDATA && config.wflags & WFss)) 4777 cdb.last().Iflags |= CFss; // SS: 4778 } 4779 4780 if ((i & 0xFFFF) != 0xFFFF) // if virtual call 4781 { 4782 const bool FARTHIS = (tysize(thisty) > REGSIZE); 4783 const bool FARVPTR = FARTHIS; 4784 4785 assert(thisty != TYvptr); // can't handle this case 4786 4787 if (!I16) 4788 { 4789 assert(!FARTHIS && !LARGECODE); 4790 if (thunkty == TYmfunc) // if 'this' is in ECX 4791 { 4792 // MOV EAX,d2[ECX] 4793 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2); 4794 } 4795 else if (thunkty == TYjfunc) // if 'this' is in EAX 4796 { 4797 // MOV EAX,d2[EAX] 4798 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4799 } 4800 else 4801 { 4802 // MOV EAX,p[ESP] 4803 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p); 4804 if (I64) 4805 cdb.last().Irex |= REX_W; 4806 4807 // MOV EAX,d2[EAX] 4808 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4809 } 4810 if (I64) 4811 code_orrex(cdb.last(), REX_W); 4812 // JMP i[EAX] 4813 cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i); 4814 } 4815 else 4816 { 4817 // MOV/LES BX,[SS:] p[BX] 4818 cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p); 4819 if (config.wflags & WFssneds || 4820 // If DS needs reloading from SS, 4821 // then assume SS != DS on thunk entry 4822 (LARGEDATA && config.wflags & WFss)) 4823 cdb.last().Iflags |= CFss; // SS: 4824 4825 // MOV/LES BX,[ES:]d2[BX] 4826 cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2); 4827 if (FARTHIS) 4828 cdb.last().Iflags |= CFes; // ES: 4829 4830 // JMP i[BX] 4831 cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i); 4832 if (FARVPTR) 4833 cdb.last().Iflags |= CFes; // ES: 4834 } 4835 } 4836 else 4837 { 4838 if (config.flags3 & CFG3pic) 4839 { 4840 localgot = null; // no local variables 4841 CodeBuilder cdbgot; cdbgot.ctor(); 4842 load_localgot(cdbgot); // load GOT in EBX 4843 code *c1 = cdbgot.finish(); 4844 if (c1) 4845 { 4846 assignaddrc(c1); 4847 cdb.append(c1); 4848 } 4849 } 4850 cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc 4851 cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff); 4852 } 4853 4854 thunkoffset = Offset(seg); 4855 code *c = cdb.finish(); 4856 pinholeopt(c,null); 4857 codout(seg,c,null); 4858 code_free(c); 4859 4860 sthunk.Soffset = thunkoffset; 4861 sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk 4862 sthunk.Sseg = seg; 4863 if (config.exe & EX_posix || 4864 config.objfmt == OBJ_MSCOFF) 4865 { 4866 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4867 } 4868 } 4869 4870 /***************************** 4871 * Assume symbol s is extern. 4872 */ 4873 4874 @trusted 4875 void makeitextern(Symbol *s) 4876 { 4877 if (s.Sxtrnnum == 0) 4878 { 4879 s.Sclass = SC.extern_; /* external */ 4880 /*printf("makeitextern(x%x)\n",s);*/ 4881 objmod.external(s); 4882 } 4883 } 4884 4885 4886 /******************************* 4887 * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. 4888 * This routine depends on FLcode jumps to only be forward 4889 * referenced. 4890 * BFLjmpoptdone is set to true if nothing more can be done 4891 * with this block. 4892 * Input: 4893 * flag !=0 means don't have correct Boffsets yet 4894 * Returns: 4895 * number of bytes saved 4896 */ 4897 4898 @trusted 4899 int branch(block *bl,int flag) 4900 { 4901 int bytesaved; 4902 code* c,cn,ct; 4903 targ_size_t offset,disp; 4904 targ_size_t csize; 4905 4906 if (!flag) 4907 bl.Bflags |= BFLjmpoptdone; // assume this will be all 4908 c = bl.Bcode; 4909 if (!c) 4910 return 0; 4911 bytesaved = 0; 4912 offset = bl.Boffset; /* offset of start of block */ 4913 while (1) 4914 { 4915 ubyte op; 4916 4917 csize = calccodsize(c); 4918 cn = code_next(c); 4919 op = cast(ubyte)c.Iop; 4920 if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 || 4921 (op == JMP && !(c.Iflags & CFjmp5))) 4922 { 4923 L1: 4924 switch (c.IFL2) 4925 { 4926 case FLblock: 4927 if (flag) // no offsets yet, don't optimize 4928 goto L3; 4929 disp = c.IEV2.Vblock.Boffset - offset - csize; 4930 4931 /* If this is a forward branch, and there is an aligned 4932 * block intervening, it is possible that shrinking 4933 * the jump instruction will cause it to be out of 4934 * range of the target. This happens if the alignment 4935 * prevents the target block from moving correspondingly 4936 * closer. 4937 */ 4938 if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset) 4939 { /* Look for intervening alignment 4940 */ 4941 for (block *b = bl.Bnext; b; b = b.Bnext) 4942 { 4943 if (b.Balign) 4944 { 4945 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4946 goto L3; 4947 } 4948 if (b == c.IEV2.Vblock) 4949 break; 4950 } 4951 } 4952 4953 break; 4954 4955 case FLcode: 4956 { 4957 code *cr; 4958 4959 disp = 0; 4960 4961 ct = c.IEV2.Vcode; /* target of branch */ 4962 assert(ct.Iflags & (CFtarg | CFtarg2)); 4963 for (cr = cn; cr; cr = code_next(cr)) 4964 { 4965 if (cr == ct) 4966 break; 4967 disp += calccodsize(cr); 4968 } 4969 4970 if (!cr) 4971 { // Didn't find it in forward search. Try backwards jump 4972 int s = 0; 4973 disp = 0; 4974 for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) 4975 { 4976 assert(cr != null); // must have found it 4977 if (cr == ct) 4978 s = 1; 4979 if (s) 4980 disp += calccodsize(cr); 4981 } 4982 } 4983 4984 if (config.flags4 & CFG4optimized && !flag) 4985 { 4986 /* Propagate branch forward past junk */ 4987 while (1) 4988 { 4989 if (ct.Iop == NOP || 4990 ct.Iop == (ESCAPE | ESClinnum)) 4991 { 4992 ct = code_next(ct); 4993 if (!ct) 4994 goto L2; 4995 } 4996 else 4997 { 4998 c.IEV2.Vcode = ct; 4999 ct.Iflags |= CFtarg; 5000 break; 5001 } 5002 } 5003 5004 /* And eliminate jmps to jmps */ 5005 if ((op == ct.Iop || ct.Iop == JMP) && 5006 (op == JMP || c.Iflags & CFjmp16)) 5007 { 5008 c.IFL2 = ct.IFL2; 5009 c.IEV2.Vcode = ct.IEV2.Vcode; 5010 /*printf("eliminating branch\n");*/ 5011 goto L1; 5012 } 5013 L2: 5014 { } 5015 } 5016 } 5017 break; 5018 5019 default: 5020 goto L3; 5021 } 5022 5023 if (disp == 0) // bra to next instruction 5024 { 5025 bytesaved += csize; 5026 c.Iop = NOP; // del branch instruction 5027 c.IEV2.Vcode = null; 5028 c = cn; 5029 if (!c) 5030 break; 5031 continue; 5032 } 5033 else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && 5034 cast(targ_size_t)cast(targ_schar)disp == disp) 5035 { 5036 if (op == JMP) 5037 { 5038 c.Iop = JMPS; // JMP SHORT 5039 bytesaved += I16 ? 1 : 3; 5040 } 5041 else // else Jcond 5042 { 5043 c.Iflags &= ~CFjmp16; // a branch is ok 5044 bytesaved += I16 ? 3 : 4; 5045 5046 // Replace a cond jump around a call to a function that 5047 // never returns with a cond jump to that function. 5048 if (config.flags4 & CFG4optimized && 5049 config.target_cpu >= TARGET_80386 && 5050 disp == (I16 ? 3 : 5) && 5051 cn && 5052 cn.Iop == CALL && 5053 cn.IFL2 == FLfunc && 5054 cn.IEV2.Vsym.Sflags & SFLexit && 5055 !(cn.Iflags & (CFtarg | CFtarg2)) 5056 ) 5057 { 5058 cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); 5059 c.Iop = NOP; 5060 c.IEV2.Vcode = null; 5061 bytesaved++; 5062 5063 // If nobody else points to ct, we can remove the CFtarg 5064 if (flag && ct) 5065 { 5066 code *cx; 5067 for (cx = bl.Bcode; 1; cx = code_next(cx)) 5068 { 5069 if (!cx) 5070 { 5071 ct.Iflags &= ~CFtarg; 5072 break; 5073 } 5074 if (cx.IEV2.Vcode == ct) 5075 break; 5076 } 5077 } 5078 } 5079 } 5080 csize = calccodsize(c); 5081 } 5082 else 5083 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 5084 } 5085 L3: 5086 if (cn) 5087 { 5088 offset += csize; 5089 c = cn; 5090 } 5091 else 5092 break; 5093 } 5094 //printf("bytesaved = x%x\n",bytesaved); 5095 return bytesaved; 5096 } 5097 5098 5099 /************************************************ 5100 * Adjust all Soffset's of stack variables so they 5101 * are all relative to the frame pointer. 5102 */ 5103 5104 @trusted 5105 void cod3_adjSymOffsets() 5106 { 5107 SYMIDX si; 5108 5109 //printf("cod3_adjSymOffsets()\n"); 5110 for (si = 0; si < globsym.length; si++) 5111 { 5112 //printf("\tglobsym[%d] = %p\n",si,globsym[si]); 5113 Symbol *s = globsym[si]; 5114 5115 switch (s.Sclass) 5116 { 5117 case SC.parameter: 5118 case SC.regpar: 5119 case SC.shadowreg: 5120 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP); 5121 s.Soffset += Para.size; 5122 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember)) 5123 { 5124 if (!hasframe) 5125 s.Soffset += EBPtoESP; 5126 if (funcsym_p.Sfunc.Fflags3 & Fnested) 5127 s.Soffset += REGSIZE; 5128 } 5129 break; 5130 5131 case SC.fastpar: 5132 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, cast(int)s.Soffset, cast(int)Fast.size, cast(int)BPoff); 5133 s.Soffset += Fast.size + BPoff; 5134 break; 5135 5136 case SC.auto_: 5137 case SC.register: 5138 if (s.Sfl == FLfast) 5139 s.Soffset += Fast.size + BPoff; 5140 else 5141 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, cast(int)s.Soffset, cast(int)Auto.size, cast(int)BPoff, cast(int)EBPtoESP); 5142 // if (!(funcsym_p.Sfunc.Fflags3 & Fnested)) 5143 s.Soffset += Auto.size + BPoff; 5144 break; 5145 5146 case SC.bprel: 5147 break; 5148 5149 default: 5150 continue; 5151 } 5152 static if (0) 5153 { 5154 if (!hasframe) 5155 s.Soffset += EBPtoESP; 5156 } 5157 } 5158 } 5159 5160 /******************************* 5161 * Take symbol info in union ev and replace it with a real address 5162 * in Vpointer. 5163 */ 5164 5165 @trusted 5166 void assignaddr(block *bl) 5167 { 5168 int EBPtoESPsave = EBPtoESP; 5169 int hasframesave = hasframe; 5170 5171 if (bl.Bflags & BFLoutsideprolog) 5172 { 5173 EBPtoESP = -REGSIZE; 5174 hasframe = 0; 5175 } 5176 assignaddrc(bl.Bcode); 5177 hasframe = hasframesave; 5178 EBPtoESP = EBPtoESPsave; 5179 } 5180 5181 @trusted 5182 void assignaddrc(code *c) 5183 { 5184 int sn; 5185 Symbol *s; 5186 ubyte ins,rm; 5187 targ_size_t soff; 5188 targ_size_t base; 5189 5190 base = EBPtoESP; 5191 for (; c; c = code_next(c)) 5192 { 5193 debug 5194 { 5195 if (0) 5196 { printf("assignaddrc()\n"); 5197 code_print(c); 5198 } 5199 if (code_next(c) && code_next(code_next(c)) == c) 5200 assert(0); 5201 } 5202 5203 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5204 ins = vex_inssize(c); 5205 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 5206 ins = inssize2[(c.Iop >> 8) & 0xFF]; 5207 else if ((c.Iop & 0xFF00) == 0x0F00) 5208 ins = inssize2[c.Iop & 0xFF]; 5209 else if ((c.Iop & 0xFF) == ESCAPE) 5210 { 5211 if (c.Iop == (ESCAPE | ESCadjesp)) 5212 { 5213 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,cast(long)c.IEV1.Vint); 5214 EBPtoESP += c.IEV1.Vint; 5215 c.Iop = NOP; 5216 } 5217 else if (c.Iop == (ESCAPE | ESCfixesp)) 5218 { 5219 //printf("fix ESP\n"); 5220 if (hasframe) 5221 { 5222 // LEA ESP,-EBPtoESP[EBP] 5223 c.Iop = LEA; 5224 if (c.Irm & 8) 5225 c.Irex |= REX_R; 5226 c.Irm = modregrm(2,SP,BP); 5227 c.Iflags = CFoff; 5228 c.IFL1 = FLconst; 5229 c.IEV1.Vuns = -EBPtoESP; 5230 if (enforcealign) 5231 { 5232 // AND ESP, -STACKALIGN 5233 code *cn = code_calloc(); 5234 cn.Iop = 0x81; 5235 cn.Irm = modregrm(3, 4, SP); 5236 cn.Iflags = CFoff; 5237 cn.IFL2 = FLconst; 5238 cn.IEV2.Vsize_t = -STACKALIGN; 5239 if (I64) 5240 c.Irex |= REX_W; 5241 cn.next = c.next; 5242 c.next = cn; 5243 } 5244 } 5245 } 5246 else if (c.Iop == (ESCAPE | ESCframeptr)) 5247 { // Convert to load of frame pointer 5248 // c.Irm is the register to use 5249 if (hasframe && !enforcealign) 5250 { // MOV reg,EBP 5251 c.Iop = 0x89; 5252 if (c.Irm & 8) 5253 c.Irex |= REX_B; 5254 c.Irm = modregrm(3,BP,c.Irm & 7); 5255 } 5256 else 5257 { // LEA reg,EBPtoESP[ESP] 5258 c.Iop = LEA; 5259 if (c.Irm & 8) 5260 c.Irex |= REX_R; 5261 c.Irm = modregrm(2,c.Irm & 7,4); 5262 c.Isib = modregrm(0,4,SP); 5263 c.Iflags = CFoff; 5264 c.IFL1 = FLconst; 5265 c.IEV1.Vuns = EBPtoESP; 5266 } 5267 } 5268 if (I64) 5269 c.Irex |= REX_W; 5270 continue; 5271 } 5272 else 5273 ins = inssize[c.Iop & 0xFF]; 5274 if (!(ins & M) || 5275 ((rm = c.Irm) & 0xC0) == 0xC0) 5276 goto do2; /* if no first operand */ 5277 if (is32bitaddr(I32,c.Iflags)) 5278 { 5279 5280 if ( 5281 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 5282 ) 5283 goto do2; /* if no first operand */ 5284 } 5285 else 5286 { 5287 if ( 5288 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 5289 ) 5290 goto do2; /* if no first operand */ 5291 } 5292 s = c.IEV1.Vsym; 5293 switch (c.IFL1) 5294 { 5295 case FLdata: 5296 if (config.objfmt == OBJ_OMF && s.Sclass != SC.comdat && s.Sclass != SC.extern_) 5297 { 5298 c.IEV1.Vseg = s.Sseg; 5299 c.IEV1.Vpointer += s.Soffset; 5300 c.IFL1 = FLdatseg; 5301 } 5302 else 5303 c.IFL1 = FLextern; 5304 goto do2; 5305 5306 case FLudata: 5307 if (config.objfmt == OBJ_OMF) 5308 { 5309 c.IEV1.Vseg = s.Sseg; 5310 c.IEV1.Vpointer += s.Soffset; 5311 c.IFL1 = FLdatseg; 5312 } 5313 else 5314 c.IFL1 = FLextern; 5315 goto do2; 5316 5317 case FLtlsdata: 5318 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5319 c.IFL1 = FLextern; 5320 goto do2; 5321 5322 case FLdatseg: 5323 //c.IEV1.Vseg = DATA; 5324 goto do2; 5325 5326 case FLfardata: 5327 case FLcsdata: 5328 case FLpseudo: 5329 goto do2; 5330 5331 case FLstack: 5332 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", 5333 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer); 5334 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset; 5335 break; 5336 5337 case FLfast: 5338 soff = Fast.size; 5339 goto L1; 5340 5341 case FLreg: 5342 case FLauto: 5343 soff = Auto.size; 5344 L1: 5345 if (Symbol_Sisdead(*s, anyiasm)) 5346 { 5347 c.Iop = NOP; // remove references to it 5348 continue; 5349 } 5350 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2) 5351 { 5352 reg_t reg = s.Sreglsw; 5353 5354 assert(!(s.Sregm & ~mask(reg))); 5355 if (c.IEV1.Vpointer == 1) 5356 { 5357 assert(reg < 4); /* must be a BYTEREGS */ 5358 reg |= 4; /* convert to high byte reg */ 5359 } 5360 if (reg & 8) 5361 { 5362 assert(I64); 5363 c.Irex |= REX_B; 5364 reg &= 7; 5365 } 5366 c.Irm = (c.Irm & modregrm(0,7,0)) 5367 | modregrm(3,0,reg); 5368 assert(c.Iop != LES && c.Iop != LEA); 5369 goto do2; 5370 } 5371 else 5372 { c.IEV1.Vpointer += s.Soffset + soff + BPoff; 5373 if (s.Sflags & SFLunambig) 5374 c.Iflags |= CFunambig; 5375 L2: 5376 if (!hasframe || (enforcealign && c.IFL1 != FLpara)) 5377 { /* Convert to ESP relative address instead of EBP */ 5378 assert(!I16); 5379 c.IEV1.Vpointer += EBPtoESP; 5380 ubyte crm = c.Irm; 5381 if ((crm & 7) == 4) // if SIB byte 5382 { 5383 assert((c.Isib & 7) == BP); 5384 assert((crm & 0xC0) != 0); 5385 c.Isib = (c.Isib & ~7) | modregrm(0,0,SP); 5386 } 5387 else 5388 { 5389 assert((crm & 7) == 5); 5390 c.Irm = (crm & modregrm(0,7,0)) 5391 | modregrm(2,0,4); 5392 c.Isib = modregrm(0,4,SP); 5393 } 5394 } 5395 } 5396 break; 5397 5398 case FLpara: 5399 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d, Vpointer = %d\n", 5400 //s.Sident.ptr, cast(int)s.Soffset, cast(int)Para.size, cast(int)BPoff, 5401 //cast(int)EBPtoESP, cast(int)c.IEV1.Vpointer); 5402 soff = Para.size - BPoff; // cancel out add of BPoff 5403 goto L1; 5404 5405 case FLfltreg: 5406 c.IEV1.Vpointer += Foff + BPoff; 5407 c.Iflags |= CFunambig; 5408 goto L2; 5409 5410 case FLallocatmp: 5411 c.IEV1.Vpointer += Alloca.offset + BPoff; 5412 goto L2; 5413 5414 case FLfuncarg: 5415 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff; 5416 goto L2; 5417 5418 case FLbprel: 5419 c.IEV1.Vpointer += s.Soffset; 5420 break; 5421 5422 case FLcs: 5423 sn = c.IEV1.Vuns; 5424 if (!CSE.loaded(sn)) // if never loaded 5425 { 5426 c.Iop = NOP; 5427 continue; 5428 } 5429 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff; 5430 c.Iflags |= CFunambig; 5431 goto L2; 5432 5433 case FLregsave: 5434 sn = c.IEV1.Vuns; 5435 c.IEV1.Vpointer = sn + regsave.off + BPoff; 5436 c.Iflags |= CFunambig; 5437 goto L2; 5438 5439 case FLndp: 5440 assert(c.IEV1.Vuns < global87.save.length); 5441 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff; 5442 c.Iflags |= CFunambig; 5443 goto L2; 5444 5445 case FLoffset: 5446 break; 5447 5448 case FLlocalsize: 5449 c.IEV1.Vpointer += localsize; 5450 break; 5451 5452 case FLconst: 5453 default: 5454 goto do2; 5455 } 5456 c.IFL1 = FLconst; 5457 do2: 5458 /* Ignore TEST (F6 and F7) opcodes */ 5459 if (!(ins & T)) goto done; /* if no second operand */ 5460 s = c.IEV2.Vsym; 5461 switch (c.IFL2) 5462 { 5463 case FLdata: 5464 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5465 { 5466 c.IFL2 = FLextern; 5467 goto do2; 5468 } 5469 else 5470 { 5471 if (s.Sclass == SC.comdat) 5472 { c.IFL2 = FLextern; 5473 goto do2; 5474 } 5475 c.IEV2.Vseg = MARS ? s.Sseg : DATA; 5476 c.IEV2.Vpointer += s.Soffset; 5477 c.IFL2 = FLdatseg; 5478 goto done; 5479 } 5480 5481 case FLudata: 5482 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5483 { 5484 c.IFL2 = FLextern; 5485 goto do2; 5486 } 5487 else 5488 { 5489 c.IEV2.Vseg = MARS ? s.Sseg : UDATA; 5490 c.IEV2.Vpointer += s.Soffset; 5491 c.IFL2 = FLdatseg; 5492 goto done; 5493 } 5494 5495 case FLtlsdata: 5496 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5497 { 5498 c.IFL2 = FLextern; 5499 goto do2; 5500 } 5501 goto done; 5502 5503 case FLdatseg: 5504 //c.IEV2.Vseg = DATA; 5505 goto done; 5506 5507 case FLcsdata: 5508 case FLfardata: 5509 goto done; 5510 5511 case FLreg: 5512 case FLpseudo: 5513 assert(0); 5514 /* NOTREACHED */ 5515 5516 case FLfast: 5517 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff; 5518 break; 5519 5520 case FLauto: 5521 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff; 5522 L3: 5523 if (!hasframe || (enforcealign && c.IFL2 != FLpara)) 5524 /* Convert to ESP relative address instead of EBP */ 5525 c.IEV2.Vpointer += EBPtoESP; 5526 break; 5527 5528 case FLpara: 5529 c.IEV2.Vpointer += s.Soffset + Para.size; 5530 goto L3; 5531 5532 case FLfltreg: 5533 c.IEV2.Vpointer += Foff + BPoff; 5534 goto L3; 5535 5536 case FLallocatmp: 5537 c.IEV2.Vpointer += Alloca.offset + BPoff; 5538 goto L3; 5539 5540 case FLfuncarg: 5541 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff; 5542 goto L3; 5543 5544 case FLbprel: 5545 c.IEV2.Vpointer += s.Soffset; 5546 break; 5547 5548 case FLstack: 5549 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base; 5550 break; 5551 5552 case FLcs: 5553 case FLndp: 5554 case FLregsave: 5555 assert(0); 5556 5557 case FLconst: 5558 break; 5559 5560 case FLlocalsize: 5561 c.IEV2.Vpointer += localsize; 5562 break; 5563 5564 default: 5565 goto done; 5566 } 5567 c.IFL2 = FLconst; 5568 done: 5569 { } 5570 } 5571 } 5572 5573 /******************************* 5574 * Return offset from BP of symbol s. 5575 */ 5576 5577 @trusted 5578 targ_size_t cod3_bpoffset(Symbol *s) 5579 { 5580 targ_size_t offset; 5581 5582 symbol_debug(s); 5583 offset = s.Soffset; 5584 switch (s.Sfl) 5585 { 5586 case FLpara: 5587 offset += Para.size; 5588 break; 5589 5590 case FLfast: 5591 offset += Fast.size + BPoff; 5592 break; 5593 5594 case FLauto: 5595 offset += Auto.size + BPoff; 5596 break; 5597 5598 default: 5599 WRFL(s.Sfl); 5600 symbol_print(s); 5601 assert(0); 5602 } 5603 assert(hasframe); 5604 return offset; 5605 } 5606 5607 5608 /******************************* 5609 * Find shorter versions of the same instructions. 5610 * Does these optimizations: 5611 * replaces jmps to the next instruction with NOPs 5612 * sign extension of modregrm displacement 5613 * sign extension of immediate data (can't do it for OR, AND, XOR 5614 * as the opcodes are not defined) 5615 * short versions for AX EA 5616 * short versions for reg EA 5617 * Code is neither removed nor added. 5618 * Params: 5619 * b = block for code (or null) 5620 * c = code list to optimize 5621 */ 5622 5623 @trusted 5624 void pinholeopt(code *c,block *b) 5625 { 5626 targ_size_t a; 5627 uint mod; 5628 ubyte ins; 5629 int usespace; 5630 int useopsize; 5631 int space; 5632 block *bn; 5633 5634 debug 5635 { 5636 __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); } 5637 } 5638 5639 debug 5640 { 5641 code *cstart = c; 5642 if (debugc) 5643 { 5644 printf("+pinholeopt(%p)\n",c); 5645 } 5646 } 5647 5648 if (b) 5649 { 5650 bn = b.Bnext; 5651 usespace = (config.flags4 & CFG4space && b.BC != BCasm); 5652 useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm)); 5653 } 5654 else 5655 { 5656 bn = null; 5657 usespace = (config.flags4 & CFG4space); 5658 useopsize = (I16 || config.flags4 & CFG4space); 5659 } 5660 for (; c; c = code_next(c)) 5661 { 5662 L1: 5663 opcode_t op = c.Iop; 5664 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5665 ins = vex_inssize(c); 5666 else if ((op & 0xFFFD00) == 0x0F3800) 5667 ins = inssize2[(op >> 8) & 0xFF]; 5668 else if ((op & 0xFF00) == 0x0F00) 5669 ins = inssize2[op & 0xFF]; 5670 else 5671 ins = inssize[op & 0xFF]; 5672 if (ins & M) // if modregrm byte 5673 { 5674 int shortop = (c.Iflags & CFopsize) ? !I16 : I16; 5675 int local_BPRM = BPRM; 5676 5677 if (c.Iflags & CFaddrsize) 5678 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 5679 5680 uint rm = c.Irm; 5681 reg_t reg = rm & modregrm(0,7,0); // isolate reg field 5682 reg_t ereg = rm & 7; 5683 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 5684 5685 /* If immediate second operand */ 5686 if ((ins & T || 5687 ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0))) 5688 ) && 5689 c.IFL2 == FLconst) 5690 { 5691 int flags = c.Iflags & CFpsw; /* if want result in flags */ 5692 targ_long u = c.IEV2.Vuns; 5693 if (ins & E) 5694 u = cast(byte) u; 5695 else if (shortop) 5696 u = cast(short) u; 5697 5698 // Replace CMP reg,0 with TEST reg,reg 5699 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 5700 rm >= modregrm(3,7,AX) && 5701 u == 0) 5702 { 5703 c.Iop = (op & 1) | 0x84; 5704 c.Irm = modregrm(3,ereg,ereg); 5705 if (c.Irex & REX_B) 5706 c.Irex |= REX_R; 5707 goto L1; 5708 } 5709 5710 /* Optimize ANDs with an immediate constant */ 5711 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 5712 { 5713 if (rm >= modregrm(3,4,AX)) // AND reg,imm 5714 { 5715 if (u == 0) 5716 { 5717 /* Replace with XOR reg,reg */ 5718 c.Iop = 0x30 | (op & 1); 5719 c.Irm = modregrm(3,ereg,ereg); 5720 if (c.Irex & REX_B) 5721 c.Irex |= REX_R; 5722 goto L1; 5723 } 5724 if (u == 0xFFFFFFFF && !flags) 5725 { 5726 c.Iop = NOP; 5727 goto L1; 5728 } 5729 } 5730 if (op == 0x81 && !flags) 5731 { // If we can do the operation in one byte 5732 5733 // If EA is not SI or DI 5734 if ((rm < modregrm(3,4,SP) || I64) && 5735 (config.flags4 & CFG4space || 5736 config.target_cpu < TARGET_PentiumPro) 5737 ) 5738 { 5739 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 5740 goto L2; 5741 else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)) 5742 { 5743 if (!shortop) 5744 { 5745 if ((u & 0xFFFF00FF) == 0xFFFF00FF) 5746 goto L3; 5747 } 5748 else 5749 { 5750 if ((u & 0xFF) == 0xFF) 5751 goto L3; 5752 } 5753 } 5754 } 5755 if (!shortop && useopsize) 5756 { 5757 if ((u & 0xFFFF0000) == 0xFFFF0000) 5758 { 5759 c.Iflags ^= CFopsize; 5760 goto L1; 5761 } 5762 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 5763 { 5764 c.IEV1.Voffset += 2; /* address MSW */ 5765 c.IEV2.Vuns >>= 16; 5766 c.Iflags ^= CFopsize; 5767 goto L1; 5768 } 5769 if (rm >= modregrm(3,4,AX)) 5770 { 5771 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 5772 { 5773 c.Iop = MOVZXb; // MOVZX 5774 c.Irm = modregrm(3,ereg,ereg); 5775 if (c.Irex & REX_B) 5776 c.Irex |= REX_R; 5777 goto L1; 5778 } 5779 if (u == 0xFFFF) 5780 { 5781 c.Iop = MOVZXw; // MOVZX 5782 c.Irm = modregrm(3,ereg,ereg); 5783 if (c.Irex & REX_B) 5784 c.Irex |= REX_R; 5785 goto L1; 5786 } 5787 } 5788 } 5789 } 5790 } 5791 5792 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 5793 if (!flags && 5794 (op == 0x81 || op == 0x80) && 5795 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 5796 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 5797 ) 5798 { 5799 if (u == 0) 5800 { 5801 c.Iop = NOP; 5802 goto L1; 5803 } 5804 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 5805 { 5806 c.Iop = 0xF6 | (op & 1); /* NOT */ 5807 c.Irm ^= modregrm(0,6^2,0); 5808 goto L1; 5809 } 5810 if (!shortop && 5811 useopsize && 5812 op == 0x81 && 5813 (u & 0xFFFF0000) == 0 && 5814 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 5815 { 5816 c.Iflags ^= CFopsize; 5817 goto L1; 5818 } 5819 } 5820 5821 /* Look for TEST or OR or XOR with an immediate constant */ 5822 /* that we can replace with a byte operation */ 5823 if (op == 0xF7 && reg == modregrm(0,0,0) || 5824 op == 0x81 && reg == modregrm(0,6,0) && !flags || 5825 op == 0x81 && reg == modregrm(0,1,0)) 5826 { 5827 // See if we can replace a dword with a word 5828 // (avoid for 32 bit instructions, because CFopsize 5829 // is too slow) 5830 if (!shortop && useopsize) 5831 { 5832 if ((u & 0xFFFF0000) == 0) 5833 { 5834 c.Iflags ^= CFopsize; 5835 goto L1; 5836 } 5837 /* If memory (not register) addressing mode */ 5838 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 5839 { 5840 c.IEV1.Voffset += 2; /* address MSW */ 5841 c.IEV2.Vuns >>= 16; 5842 c.Iflags ^= CFopsize; 5843 goto L1; 5844 } 5845 } 5846 5847 // If EA is not SI or DI 5848 if (rm < (modregrm(3,0,SP) | reg) && 5849 (usespace || 5850 config.target_cpu < TARGET_PentiumPro) 5851 ) 5852 { 5853 if ((u & 0xFFFFFF00) == 0) 5854 { 5855 L2: c.Iop--; /* to byte instruction */ 5856 c.Iflags &= ~CFopsize; 5857 goto L1; 5858 } 5859 if (((u & 0xFFFF00FF) == 0 || 5860 (shortop && (u & 0xFF) == 0)) && 5861 (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))) 5862 { 5863 L3: 5864 c.IEV2.Vuns >>= 8; 5865 if (rm >= (modregrm(3,0,AX) | reg)) 5866 c.Irm |= 4; /* AX.AH, BX.BH, etc. */ 5867 else 5868 c.IEV1.Voffset += 1; 5869 goto L2; 5870 } 5871 } 5872 5873 // BUG: which is right? 5874 //else if ((u & 0xFFFF0000) == 0) 5875 5876 else if (0 && op == 0xF7 && 5877 rm >= modregrm(3,0,SP) && 5878 (u & 0xFFFF0000) == 0) 5879 5880 c.Iflags &= ~CFopsize; 5881 } 5882 5883 // Try to replace TEST reg,-1 with TEST reg,reg 5884 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 5885 { 5886 if ((u & 0xFF) == 0xFF) 5887 { 5888 L4: 5889 c.Iop = 0x84; // TEST regL,regL 5890 c.Irm = modregrm(3,ereg,ereg); 5891 if (c.Irex & REX_B) 5892 c.Irex |= REX_R; 5893 c.Iflags &= ~CFopsize; 5894 goto L1; 5895 } 5896 } 5897 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 5898 { 5899 if (u == 0xFF) 5900 { 5901 if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix 5902 c.Irex |= REX; 5903 goto L4; 5904 } 5905 if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4) 5906 { 5907 ereg |= 4; /* to regH */ 5908 goto L4; 5909 } 5910 } 5911 5912 /* Look for sign extended immediate data */ 5913 if (cast(byte) u == u) 5914 { 5915 if (op == 0x81) 5916 { 5917 if (reg != 0x08 && reg != 0x20 && reg != 0x30) 5918 c.Iop = op = 0x83; /* 8 bit sgn ext */ 5919 } 5920 else if (op == 0x69) /* IMUL rw,ew,dw */ 5921 c.Iop = op = 0x6B; /* IMUL rw,ew,db */ 5922 } 5923 5924 // Look for SHIFT EA,imm8 we can replace with short form 5925 if (u == 1 && ((op & 0xFE) == 0xC0)) 5926 c.Iop |= 0xD0; 5927 5928 } /* if immediate second operand */ 5929 5930 /* Look for AX short form */ 5931 if (ins & A) 5932 { 5933 if (rm == modregrm(0,AX,local_BPRM) && 5934 !(c.Irex & REX_R) && // and it's AX, not R8 5935 (op & ~3) == 0x88 && 5936 !I64) 5937 { 5938 op = ((op & 3) + 0xA0) ^ 2; 5939 /* 8A. A0 */ 5940 /* 8B. A1 */ 5941 /* 88. A2 */ 5942 /* 89. A3 */ 5943 c.Iop = op; 5944 c.IFL2 = c.IFL1; 5945 c.IEV2 = c.IEV1; 5946 } 5947 5948 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 5949 else if (!I16 && 5950 (op == 0x89 || op == 0x8B) && 5951 (rm & 0xC0) == 0xC0 && 5952 (!b || b.BC != BCasm) 5953 ) 5954 c.Iflags &= ~CFopsize; 5955 5956 // If rm is AX 5957 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B))) 5958 { 5959 switch (op) 5960 { 5961 case 0x80: op = reg | 4; break; 5962 case 0x81: op = reg | 5; break; 5963 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 5964 5965 case 0xF6: 5966 if (reg == 0) 5967 op = 0xA8; /* TEST AL,immed8 */ 5968 break; 5969 5970 case 0xF7: 5971 if (reg == 0) 5972 op = 0xA9; /* TEST AX,immed16 */ 5973 break; 5974 5975 default: 5976 break; 5977 } 5978 c.Iop = op; 5979 } 5980 } 5981 5982 /* Look for reg short form */ 5983 if ((ins & R) && (rm & 0xC0) == 0xC0) 5984 { 5985 switch (op) 5986 { 5987 case 0xC6: op = 0xB0 + ereg; break; 5988 case 0xC7: // if no sign extension 5989 if (!(c.Irex & REX_W && c.IEV2.Vint < 0)) 5990 { 5991 c.Irm = 0; 5992 c.Irex &= ~REX_W; 5993 op = 0xB8 + ereg; 5994 } 5995 break; 5996 5997 case 0xFF: 5998 switch (reg) 5999 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 6000 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 6001 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 6002 default: break; 6003 } 6004 break; 6005 6006 case 0x8F: op = 0x58 + ereg; break; 6007 case 0x87: 6008 if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8 6009 op = 0x90 + ereg; 6010 break; 6011 6012 default: 6013 break; 6014 } 6015 c.Iop = op; 6016 } 6017 6018 // Look to remove redundant REX prefix on XOR 6019 if (c.Irex == REX_W // ignore ops involving R8..R15 6020 && (op == 0x31 || op == 0x33) // XOR 6021 && ((rm & 0xC0) == 0xC0) // register direct 6022 && ((reg >> 3) == ereg)) // register with itself 6023 { 6024 c.Irex = 0; 6025 } 6026 6027 // Look to replace SHL reg,1 with ADD reg,reg 6028 if ((op & ~1) == 0xD0 && 6029 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 6030 config.target_cpu >= TARGET_80486) 6031 { 6032 c.Iop &= 1; 6033 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3)); 6034 if (c.Irex & REX_B) 6035 c.Irex |= REX_R; 6036 if (!(c.Iflags & CFpsw) && !I16) 6037 c.Iflags &= ~CFopsize; 6038 goto L1; 6039 } 6040 6041 /* Look for sign extended modregrm displacement, or 0 6042 * displacement. 6043 */ 6044 6045 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 6046 c.IFL1 == FLconst) // and it's a constant 6047 { 6048 a = c.IEV1.Vpointer; 6049 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 6050 !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP) 6051 ) 6052 c.Irm &= 0x3F; 6053 else if (!I16) 6054 { 6055 if (cast(targ_size_t)cast(targ_schar)a == a) 6056 c.Irm ^= 0xC0; /* do 8 sx */ 6057 } 6058 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 6059 c.Irm ^= 0xC0; /* do 8 sx */ 6060 } 6061 6062 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 6063 if (op == LEA) 6064 { 6065 rm = c.Irm & 7; 6066 mod = c.Irm & modregrm(3,0,0); 6067 if (mod == 0) 6068 { 6069 if (!I16) 6070 { 6071 switch (rm) 6072 { 6073 case 4: 6074 case 5: 6075 break; 6076 6077 default: 6078 c.Irm |= modregrm(3,0,0); 6079 c.Iop = 0x8B; 6080 break; 6081 } 6082 } 6083 else 6084 { 6085 switch (rm) 6086 { 6087 case 4: rm = modregrm(3,0,SI); goto L6; 6088 case 5: rm = modregrm(3,0,DI); goto L6; 6089 case 7: rm = modregrm(3,0,BX); goto L6; 6090 L6: c.Irm = cast(ubyte)(rm + reg); 6091 c.Iop = 0x8B; 6092 break; 6093 6094 default: 6095 break; 6096 } 6097 } 6098 } 6099 6100 /* replace LEA reg,0[BP] with MOV reg,BP */ 6101 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 6102 c.IFL1 == FLconst && c.IEV1.Vpointer == 0) 6103 { 6104 c.Iop = 0x8B; /* MOV reg,BP */ 6105 c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg); 6106 } 6107 } 6108 6109 // Replace [R13] with 0[R13] 6110 if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) || 6111 issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP)) 6112 { 6113 c.Irm |= modregrm(1,0,0); 6114 c.IFL1 = FLconst; 6115 c.IEV1.Vpointer = 0; 6116 } 6117 } 6118 else if (!(c.Iflags & CFvex)) 6119 { 6120 switch (op) 6121 { 6122 default: 6123 // Look for MOV r64, immediate 6124 if ((c.Irex & REX_W) && (op & ~7) == 0xB8) 6125 { 6126 /* Look for zero extended immediate data */ 6127 if (c.IEV2.Vsize_t == c.IEV2.Vuns) 6128 { 6129 c.Irex &= ~REX_W; 6130 } 6131 /* Look for sign extended immediate data */ 6132 else if (c.IEV2.Vsize_t == c.IEV2.Vint) 6133 { 6134 c.Irm = modregrm(3,0,op & 7); 6135 c.Iop = op = 0xC7; 6136 c.IEV2.Vsize_t = c.IEV2.Vuns; 6137 } 6138 } 6139 if ((op & ~0x0F) != 0x70) 6140 break; 6141 goto case JMP; 6142 6143 case JMP: 6144 switch (c.IFL2) 6145 { 6146 case FLcode: 6147 if (c.IEV2.Vcode == code_next(c)) 6148 { 6149 c.Iop = NOP; 6150 continue; 6151 } 6152 break; 6153 6154 case FLblock: 6155 if (!code_next(c) && c.IEV2.Vblock == bn) 6156 { 6157 c.Iop = NOP; 6158 continue; 6159 } 6160 break; 6161 6162 case FLconst: 6163 case FLfunc: 6164 case FLextern: 6165 break; 6166 6167 default: 6168 WRFL(c.IFL2); 6169 assert(0); 6170 } 6171 break; 6172 6173 case 0x68: // PUSH immed16 6174 if (c.IFL2 == FLconst) 6175 { 6176 targ_long u = c.IEV2.Vuns; 6177 if (I64 || 6178 ((c.Iflags & CFopsize) ? I16 : I32)) 6179 { // PUSH 32/64 bit operand 6180 if (u == cast(byte) u) 6181 c.Iop = 0x6A; // PUSH immed8 6182 } 6183 else // PUSH 16 bit operand 6184 { 6185 if (cast(short)u == cast(byte) u) 6186 c.Iop = 0x6A; // PUSH immed8 6187 } 6188 } 6189 break; 6190 } 6191 } 6192 } 6193 6194 debug 6195 if (debugc) 6196 { 6197 printf("-pinholeopt(%p)\n",cstart); 6198 for (c = cstart; c; c = code_next(c)) 6199 code_print(c); 6200 } 6201 } 6202 6203 6204 debug 6205 { 6206 @trusted 6207 private void pinholeopt_unittest() 6208 { 6209 //printf("pinholeopt_unittest()\n"); 6210 static struct CS 6211 { 6212 uint model,op,ea; 6213 targ_size_t ev1,ev2; 6214 uint flags; 6215 } 6216 __gshared CS[2][22] tests = 6217 [ 6218 // XOR reg,immed NOT regL 6219 [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF } ], 6220 6221 // MOV 0[BX],3 MOV [BX],3 6222 [ { 16,0xC7,modregrm(2,0,7),0,3 }, { 0,0xC7,modregrm(0,0,7),0,3 } ], 6223 6224 /+ // only if config.flags4 & CFG4space 6225 // TEST regL,immed8 6226 [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6227 [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6228 [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6229 [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6230 +/ 6231 6232 // PUSH immed => PUSH immed8 6233 [ { 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }], 6234 [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }], 6235 [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }], 6236 [ { 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6237 [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6238 [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6239 [ { 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }], 6240 [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }], 6241 [ { 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6242 [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6243 [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6244 [ { 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }], 6245 [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }], 6246 6247 // clear r64, for r64 != R8..R15 6248 [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}], 6249 [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}], 6250 6251 // MOV r64, immed 6252 [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}], 6253 [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}], 6254 [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }], 6255 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }], 6256 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}], 6257 ]; 6258 6259 //config.flags4 |= CFG4space; 6260 for (int i = 0; i < tests.length; i++) 6261 { CS *pin = &tests[i][0]; 6262 CS *pout = &tests[i][1]; 6263 code cs = void; 6264 memset(&cs, 0, cs.sizeof); 6265 if (pin.model) 6266 { 6267 if (I16 && pin.model != 16) 6268 continue; 6269 if (I32 && pin.model != 32) 6270 continue; 6271 if (I64 && pin.model != 64) 6272 continue; 6273 } 6274 //printf("[%d]\n", i); 6275 cs.Iop = pin.op; 6276 cs.Iea = pin.ea; 6277 cs.IFL1 = FLconst; 6278 cs.IFL2 = FLconst; 6279 cs.IEV1.Vsize_t = pin.ev1; 6280 cs.IEV2.Vsize_t = pin.ev2; 6281 cs.Iflags = pin.flags; 6282 pinholeopt(&cs, null); 6283 if (cs.Iop != pout.op) 6284 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op); 6285 assert(0); 6286 } 6287 assert(cs.Iea == pout.ea); 6288 assert(cs.IEV1.Vsize_t == pout.ev1); 6289 assert(cs.IEV2.Vsize_t == pout.ev2); 6290 assert(cs.Iflags == pout.flags); 6291 } 6292 } 6293 } 6294 6295 @trusted 6296 void simplify_code(code* c) 6297 { 6298 reg_t reg; 6299 if (config.flags4 & CFG4optimized && 6300 (c.Iop == 0x81 || c.Iop == 0x80) && 6301 c.IFL2 == FLconst && 6302 reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,reg) && 6303 !(I16 && c.Iflags & CFopsize) 6304 ) 6305 { 6306 // See if we can replace immediate instruction with register instruction 6307 static immutable ubyte[8] regop = 6308 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ]; 6309 6310 //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong); 6311 c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1); 6312 code_newreg(c, reg); 6313 if (I64 && !(c.Iop & 1) && (reg & 4)) 6314 c.Irex |= REX; 6315 } 6316 } 6317 6318 /************************** 6319 * Compute jump addresses for FLcode. 6320 * Note: only works for forward referenced code. 6321 * only direct jumps and branches are detected. 6322 * LOOP instructions only work for backward refs. 6323 */ 6324 6325 @trusted 6326 void jmpaddr(code *c) 6327 { 6328 code* ci,cn,ctarg,cstart; 6329 targ_size_t ad; 6330 6331 //printf("jmpaddr()\n"); 6332 cstart = c; /* remember start of code */ 6333 while (c) 6334 { 6335 const op = c.Iop; 6336 if (op <= 0xEB && 6337 inssize[op] & T && // if second operand 6338 c.IFL2 == FLcode && 6339 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL)) 6340 { 6341 ci = code_next(c); 6342 ctarg = c.IEV2.Vcode; /* target code */ 6343 ad = 0; /* IP displacement */ 6344 while (ci && ci != ctarg) 6345 { 6346 ad += calccodsize(ci); 6347 ci = code_next(ci); 6348 } 6349 if (!ci) 6350 goto Lbackjmp; // couldn't find it 6351 if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL) 6352 c.IEV2.Vpointer = ad; 6353 else /* else conditional */ 6354 { 6355 if (!(c.Iflags & CFjmp16)) /* if branch */ 6356 c.IEV2.Vpointer = ad; 6357 else /* branch around a long jump */ 6358 { 6359 cn = code_next(c); 6360 c.next = code_calloc(); 6361 code_next(c).next = cn; 6362 c.Iop = op ^ 1; /* converse jmp */ 6363 c.Iflags &= ~CFjmp16; 6364 c.IEV2.Vpointer = I16 ? 3 : 5; 6365 cn = code_next(c); 6366 cn.Iop = JMP; /* long jump */ 6367 cn.IFL2 = FLconst; 6368 cn.IEV2.Vpointer = ad; 6369 } 6370 } 6371 c.IFL2 = FLconst; 6372 } 6373 if (op == LOOP && c.IFL2 == FLcode) /* backwards refs */ 6374 { 6375 Lbackjmp: 6376 ctarg = c.IEV2.Vcode; 6377 for (ci = cstart; ci != ctarg; ci = code_next(ci)) 6378 if (!ci || ci == c) 6379 assert(0); 6380 ad = 2; /* - IP displacement */ 6381 while (ci != c) 6382 { 6383 assert(ci); 6384 ad += calccodsize(ci); 6385 ci = code_next(ci); 6386 } 6387 c.IEV2.Vpointer = (-ad) & 0xFF; 6388 c.IFL2 = FLconst; 6389 } 6390 c = code_next(c); 6391 } 6392 } 6393 6394 /******************************* 6395 * Calculate bl.Bsize. 6396 */ 6397 6398 uint calcblksize(code *c) 6399 { 6400 uint size; 6401 for (size = 0; c; c = code_next(c)) 6402 { 6403 uint sz = calccodsize(c); 6404 //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop); 6405 size += sz; 6406 } 6407 //printf("calcblksize(c = x%x) = %d\n", c, size); 6408 return size; 6409 } 6410 6411 /***************************** 6412 * Calculate and return code size of a code. 6413 * Note that NOPs are sometimes used as markers, but are 6414 * never output. LINNUMs are never output. 6415 * Note: This routine must be fast. Profiling shows it is significant. 6416 */ 6417 6418 @trusted 6419 uint calccodsize(code *c) 6420 { 6421 uint size; 6422 ubyte rm,mod,ins; 6423 uint iflags; 6424 uint i32 = I32 || I64; 6425 uint a32 = i32; 6426 6427 debug 6428 assert((a32 & ~1) == 0); 6429 6430 iflags = c.Iflags; 6431 opcode_t op = c.Iop; 6432 //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags); 6433 if (iflags & CFvex && c.Ivex.pfx == 0xC4) 6434 { 6435 ins = vex_inssize(c); 6436 size = ins & 7; 6437 goto Lmodrm; 6438 } 6439 else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800) 6440 op = 0x0F; 6441 else 6442 op &= 0xFF; 6443 switch (op) 6444 { 6445 case 0x0F: 6446 if ((c.Iop & 0xFFFD00) == 0x0F3800) 6447 { // 3 byte op ( 0F38-- or 0F3A-- ) 6448 ins = inssize2[(c.Iop >> 8) & 0xFF]; 6449 size = ins & 7; 6450 if (c.Iop & 0xFF000000) 6451 size++; 6452 } 6453 else 6454 { // 2 byte op ( 0F-- ) 6455 ins = inssize2[c.Iop & 0xFF]; 6456 size = ins & 7; 6457 if (c.Iop & 0xFF0000) 6458 size++; 6459 } 6460 break; 6461 6462 case 0x90: 6463 size = (c.Iop == PAUSE) ? 2 : 1; 6464 goto Lret2; 6465 6466 case NOP: 6467 case ESCAPE: 6468 size = 0; // since these won't be output 6469 goto Lret2; 6470 6471 case ASM: 6472 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6473 size = _tysize[TYnptr]; 6474 else 6475 size = cast(uint)c.IEV1.len; 6476 goto Lret2; 6477 6478 case 0xA1: 6479 case 0xA3: 6480 if (c.Irex) 6481 { 6482 size = 9; // 64 bit immediate value for MOV to/from RAX 6483 goto Lret; 6484 } 6485 goto default; 6486 6487 case 0xF6: /* TEST mem8,immed8 */ 6488 ins = inssize[op]; 6489 size = ins & 7; 6490 if (i32) 6491 size = inssize32[op]; 6492 if ((c.Irm & (7<<3)) == 0) 6493 size++; /* size of immed8 */ 6494 break; 6495 6496 case 0xF7: 6497 ins = inssize[op]; 6498 size = ins & 7; 6499 if (i32) 6500 size = inssize32[op]; 6501 if ((c.Irm & (7<<3)) == 0) 6502 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2; 6503 break; 6504 6505 case 0xFA: 6506 case 0xFB: 6507 if (c.Iop == ENDBR32 || c.Iop == ENDBR64) 6508 { 6509 size = 4; 6510 break; 6511 } 6512 goto default; 6513 6514 default: 6515 ins = inssize[op]; 6516 size = ins & 7; 6517 if (i32) 6518 size = inssize32[op]; 6519 } 6520 6521 if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG)) 6522 { 6523 if (iflags & CFwait) // if add FWAIT prefix 6524 size++; 6525 if (iflags & CFSEG) // if segment override 6526 size++; 6527 6528 // If the instruction has a second operand that is not an 8 bit, 6529 // and the operand size prefix is present, then fix the size computation 6530 // because the operand size will be different. 6531 // Walter, I had problems with this bit at the end. There can still be 6532 // an ADDRSIZE prefix for these and it does indeed change the operand size. 6533 6534 if (iflags & (CFopsize | CFaddrsize)) 6535 { 6536 if ((ins & (T|E)) == T) 6537 { 6538 if ((op & 0xAC) == 0xA0) 6539 { 6540 if (iflags & CFaddrsize && !I64) 6541 { if (I32) 6542 size -= 2; 6543 else 6544 size += 2; 6545 } 6546 } 6547 else if (iflags & CFopsize) 6548 { if (I16) 6549 size += 2; 6550 else 6551 size -= 2; 6552 } 6553 } 6554 if (iflags & CFaddrsize) 6555 { if (!I64) 6556 a32 ^= 1; 6557 size++; 6558 } 6559 if (iflags & CFopsize) 6560 size++; /* +1 for OPSIZE prefix */ 6561 } 6562 } 6563 6564 Lmodrm: 6565 if ((op & ~0x0F) == 0x70) 6566 { 6567 if (iflags & CFjmp16) // if long branch 6568 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 6569 } 6570 else if (ins & M) // if modregrm byte 6571 { 6572 rm = c.Irm; 6573 mod = rm & 0xC0; 6574 if (a32 || I64) 6575 { // 32 bit addressing 6576 if (issib(rm)) 6577 size++; 6578 switch (mod) 6579 { case 0: 6580 if (issib(rm) && (c.Isib & 7) == 5 || 6581 (rm & 7) == 5) 6582 size += 4; /* disp32 */ 6583 if (c.Irex & REX_B && (rm & 7) == 5) 6584 /* Instead of selecting R13, this mode is an [RIP] relative 6585 * address. Although valid, it's redundant, and should not 6586 * be generated. Instead, generate 0[R13] instead of [R13]. 6587 */ 6588 assert(0); 6589 break; 6590 6591 case 0x40: 6592 size++; /* disp8 */ 6593 break; 6594 6595 case 0x80: 6596 size += 4; /* disp32 */ 6597 break; 6598 6599 default: 6600 break; 6601 } 6602 } 6603 else 6604 { // 16 bit addressing 6605 if (mod == 0x40) /* 01: 8 bit displacement */ 6606 size++; 6607 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 6608 size += 2; 6609 } 6610 } 6611 6612 Lret: 6613 if (!(iflags & CFvex) && c.Irex) 6614 { 6615 size++; 6616 if (c.Irex & REX_W && (op & ~7) == 0xB8) 6617 size += 4; 6618 } 6619 Lret2: 6620 //printf("op = x%02x, size = %d\n",op,size); 6621 return size; 6622 } 6623 6624 /******************************** 6625 * Return !=0 if codes match. 6626 */ 6627 6628 static if (0) 6629 { 6630 6631 int code_match(code *c1,code *c2) 6632 { 6633 code cs1,cs2; 6634 ubyte ins; 6635 6636 if (c1 == c2) 6637 goto match; 6638 cs1 = *c1; 6639 cs2 = *c2; 6640 if (cs1.Iop != cs2.Iop) 6641 goto nomatch; 6642 switch (cs1.Iop) 6643 { 6644 case ESCAPE | ESCctor: 6645 case ESCAPE | ESCdtor: 6646 goto nomatch; 6647 6648 case NOP: 6649 goto match; 6650 6651 case ASM: 6652 if (cs1.IEV1.len == cs2.IEV1.len && 6653 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0) 6654 goto match; 6655 else 6656 goto nomatch; 6657 6658 default: 6659 if ((cs1.Iop & 0xFF) == ESCAPE) 6660 goto match; 6661 break; 6662 } 6663 if (cs1.Iflags != cs2.Iflags) 6664 goto nomatch; 6665 6666 ins = inssize[cs1.Iop & 0xFF]; 6667 if ((cs1.Iop & 0xFFFD00) == 0x0F3800) 6668 { 6669 ins = inssize2[(cs1.Iop >> 8) & 0xFF]; 6670 } 6671 else if ((cs1.Iop & 0xFF00) == 0x0F00) 6672 { 6673 ins = inssize2[cs1.Iop & 0xFF]; 6674 } 6675 6676 if (ins & M) // if modregrm byte 6677 { 6678 if (cs1.Irm != cs2.Irm) 6679 goto nomatch; 6680 if ((cs1.Irm & 0xC0) == 0xC0) 6681 goto do2; 6682 if (is32bitaddr(I32,cs1.Iflags)) 6683 { 6684 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 6685 goto nomatch; 6686 if ( 6687 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 6688 ) 6689 goto do2; /* if no first operand */ 6690 } 6691 else 6692 { 6693 if ( 6694 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 6695 ) 6696 goto do2; /* if no first operand */ 6697 } 6698 if (cs1.IFL1 != cs2.IFL1) 6699 goto nomatch; 6700 if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym) 6701 goto nomatch; 6702 if (cs1.IEV1.Voffset != cs2.IEV1.Voffset) 6703 goto nomatch; 6704 } 6705 6706 do2: 6707 if (!(ins & T)) // if no second operand 6708 goto match; 6709 if (cs1.IFL2 != cs2.IFL2) 6710 goto nomatch; 6711 if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym) 6712 goto nomatch; 6713 if (cs1.IEV2.Voffset != cs2.IEV2.Voffset) 6714 goto nomatch; 6715 6716 match: 6717 return 1; 6718 6719 nomatch: 6720 return 0; 6721 } 6722 6723 } 6724 6725 /************************ 6726 * Little buffer allocated on the stack to accumulate instruction bytes to 6727 * later be sent along to objmod 6728 */ 6729 private struct MiniCodeBuf 6730 { 6731 nothrow: 6732 uint index; 6733 uint offset; 6734 int seg; 6735 Barray!ubyte* disasmBuf; 6736 ubyte[256] bytes; // = void; 6737 6738 @trusted 6739 this(int seg) 6740 { 6741 index = 0; 6742 this.offset = cast(uint)Offset(seg); 6743 this.seg = seg; 6744 } 6745 6746 @trusted 6747 void flushx() 6748 { 6749 // Emit accumulated bytes to code segment 6750 debug assert(index < bytes.length); 6751 6752 if (disasmBuf) // write to buffer for disassembly 6753 { 6754 foreach (c; bytes[0 .. index]) // not efficient, but for verbose output anyway 6755 disasmBuf.push(c); 6756 } 6757 6758 offset += objmod.bytes(seg, offset, index, bytes.ptr); 6759 index = 0; 6760 } 6761 6762 @trusted 6763 void gen(ubyte c) { bytes[index++] = c; } 6764 6765 @trusted 6766 void genp(uint n, void *p) { memcpy(&bytes[index], p, n); index += n; } 6767 6768 @trusted 6769 void flush() { if (index) flushx(); } 6770 6771 @trusted 6772 uint getOffset() { return offset + index; } 6773 6774 @trusted 6775 uint available() { return cast(uint)bytes.length - index; } 6776 6777 /****************************** 6778 * write64/write32/write16 write `value` to `disasmBuf` 6779 */ 6780 @trusted 6781 void write64(ulong value) 6782 { 6783 if (disasmBuf) 6784 { 6785 disasmBuf.push(cast(ubyte)value); 6786 disasmBuf.push(cast(ubyte)(value >> 8)); 6787 disasmBuf.push(cast(ubyte)(value >> 16)); 6788 disasmBuf.push(cast(ubyte)(value >> 24)); 6789 disasmBuf.push(cast(ubyte)(value >> 32)); 6790 disasmBuf.push(cast(ubyte)(value >> 36)); 6791 disasmBuf.push(cast(ubyte)(value >> 40)); 6792 disasmBuf.push(cast(ubyte)(value >> 44)); 6793 } 6794 } 6795 6796 pragma(inline, true) 6797 @trusted 6798 void write32(uint value) 6799 { 6800 if (disasmBuf) 6801 { 6802 disasmBuf.push(cast(ubyte)value); 6803 disasmBuf.push(cast(ubyte)(value >> 8)); 6804 disasmBuf.push(cast(ubyte)(value >> 16)); 6805 disasmBuf.push(cast(ubyte)(value >> 24)); 6806 } 6807 } 6808 6809 pragma(inline, true) 6810 @trusted 6811 void write16(uint value) 6812 { 6813 if (disasmBuf) 6814 { 6815 disasmBuf.push(cast(ubyte)value); 6816 disasmBuf.push(cast(ubyte)(value >> 8)); 6817 } 6818 } 6819 } 6820 6821 /************************** 6822 * Convert instructions to object code and write them to objmod. 6823 * Params: 6824 * seg = code segment to write to, code starts at Offset(seg) 6825 * c = list of instructions to write 6826 * disasmBuf = if not null, then also write object code here 6827 * Returns: 6828 * offset of end of code emitted 6829 */ 6830 6831 @trusted 6832 uint codout(int seg, code *c, Barray!ubyte* disasmBuf) 6833 { 6834 ubyte rm,mod; 6835 ubyte ins; 6836 code *cn; 6837 uint flags; 6838 Symbol *s; 6839 6840 debug 6841 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); 6842 6843 MiniCodeBuf ggen = void; 6844 ggen.index = 0; 6845 ggen.offset = cast(uint)Offset(seg); 6846 ggen.seg = seg; 6847 ggen.disasmBuf = disasmBuf; 6848 6849 for (; c; c = code_next(c)) 6850 { 6851 debug 6852 { 6853 if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); } 6854 uint startoffset = ggen.getOffset(); 6855 } 6856 6857 opcode_t op = c.Iop; 6858 ins = inssize[op & 0xFF]; 6859 switch (op & 0xFF) 6860 { 6861 case ESCAPE: 6862 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */ 6863 if(op == 0x660F383E || c.Iflags & CFvex) break; 6864 6865 switch (op & 0xFFFF00) 6866 { case ESClinnum: 6867 /* put out line number stuff */ 6868 objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); 6869 break; 6870 case ESCadjesp: 6871 //printf("adjust ESP %ld\n", cast(long)c.IEV1.Vint); 6872 break; 6873 6874 default: 6875 break; 6876 } 6877 6878 debug 6879 assert(calccodsize(c) == 0); 6880 6881 continue; 6882 6883 case NOP: /* don't send them out */ 6884 if (op != NOP) 6885 break; 6886 debug 6887 assert(calccodsize(c) == 0); 6888 6889 continue; 6890 6891 case ASM: 6892 if (op != ASM) 6893 break; 6894 ggen.flush(); 6895 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6896 { 6897 do32bit(ggen, FLblockoff,c.IEV1,0,0); 6898 } 6899 else 6900 { 6901 ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); 6902 } 6903 debug 6904 assert(calccodsize(c) == c.IEV1.len); 6905 6906 continue; 6907 6908 default: 6909 break; 6910 } 6911 flags = c.Iflags; 6912 6913 // See if we need to flush (don't have room for largest code sequence) 6914 if (ggen.available() < (1+4+4+8+8)) 6915 ggen.flush(); 6916 6917 // see if we need to put out prefix bytes 6918 if (flags & (CFwait | CFPREFIX | CFjmp16)) 6919 { 6920 int override_; 6921 6922 if (flags & CFwait) 6923 ggen.gen(0x9B); // FWAIT 6924 /* ? SEGES : SEGSS */ 6925 switch (flags & CFSEG) 6926 { case CFes: override_ = SEGES; goto segover; 6927 case CFss: override_ = SEGSS; goto segover; 6928 case CFcs: override_ = SEGCS; goto segover; 6929 case CFds: override_ = SEGDS; goto segover; 6930 case CFfs: override_ = SEGFS; goto segover; 6931 case CFgs: override_ = SEGGS; goto segover; 6932 segover: ggen.gen(cast(ubyte)override_); 6933 break; 6934 6935 default: break; 6936 } 6937 6938 if (flags & CFaddrsize) 6939 ggen.gen(0x67); 6940 6941 // Do this last because of instructions like ADDPD 6942 if (flags & CFopsize) 6943 ggen.gen(0x66); /* operand size */ 6944 6945 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 6946 { 6947 if (!I16) 6948 { // Put out 16 bit conditional jump 6949 c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 6950 } 6951 else 6952 { 6953 cn = code_calloc(); 6954 /*cxcalloc++;*/ 6955 cn.next = code_next(c); 6956 c.next= cn; // link into code 6957 cn.Iop = JMP; // JMP block 6958 cn.IFL2 = c.IFL2; 6959 cn.IEV2.Vblock = c.IEV2.Vblock; 6960 c.Iop = op ^= 1; // toggle condition 6961 c.IFL2 = FLconst; 6962 c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block 6963 c.Iflags &= ~CFjmp16; 6964 } 6965 } 6966 } 6967 6968 if (flags & CFvex) 6969 { 6970 if (flags & CFvex3) 6971 { 6972 ggen.gen(0xC4); 6973 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex)); 6974 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex)); 6975 ggen.gen(c.Ivex.op); 6976 } 6977 else 6978 { 6979 ggen.gen(0xC5); 6980 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex)); 6981 ggen.gen(c.Ivex.op); 6982 } 6983 ins = vex_inssize(c); 6984 goto Lmodrm; 6985 } 6986 6987 if (op > 0xFF) 6988 { 6989 if ((op & 0xFFFD00) == 0x0F3800) 6990 ins = inssize2[(op >> 8) & 0xFF]; 6991 else if ((op & 0xFF00) == 0x0F00) 6992 ins = inssize2[op & 0xFF]; 6993 6994 if (op & 0xFF_00_00_00) 6995 { 6996 ubyte op1 = op >> 24; 6997 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6998 { 6999 ggen.gen(op1); 7000 if (c.Irex) 7001 ggen.gen(c.Irex | REX); 7002 } 7003 else 7004 { 7005 if (c.Irex) 7006 ggen.gen(c.Irex | REX); 7007 ggen.gen(op1); 7008 } 7009 ggen.gen((op >> 16) & 0xFF); 7010 ggen.gen((op >> 8) & 0xFF); 7011 ggen.gen(op & 0xFF); 7012 } 7013 else if (op & 0xFF0000) 7014 { 7015 ubyte op1 = cast(ubyte)(op >> 16); 7016 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 7017 { 7018 ggen.gen(op1); 7019 if (c.Irex) 7020 ggen.gen(c.Irex | REX); 7021 } 7022 else 7023 { 7024 if (c.Irex) 7025 ggen.gen(c.Irex | REX); 7026 ggen.gen(op1); 7027 } 7028 ggen.gen((op >> 8) & 0xFF); 7029 ggen.gen(op & 0xFF); 7030 } 7031 else 7032 { 7033 if (c.Irex) 7034 ggen.gen(c.Irex | REX); 7035 ggen.gen((op >> 8) & 0xFF); 7036 ggen.gen(op & 0xFF); 7037 } 7038 } 7039 else 7040 { 7041 if (c.Irex) 7042 ggen.gen(c.Irex | REX); 7043 ggen.gen(cast(ubyte)op); 7044 } 7045 Lmodrm: 7046 if (ins & M) /* if modregrm byte */ 7047 { 7048 rm = c.Irm; 7049 ggen.gen(rm); 7050 7051 // Look for an address size override when working with the 7052 // MOD R/M and SIB bytes 7053 7054 if (is32bitaddr( I32, flags)) 7055 { 7056 if (issib(rm)) 7057 ggen.gen(c.Isib); 7058 switch (rm & 0xC0) 7059 { 7060 case 0x40: 7061 do8bit(ggen, cast(FL) c.IFL1,c.IEV1); // 8 bit 7062 break; 7063 7064 case 0: 7065 if (!(issib(rm) && (c.Isib & 7) == 5 || 7066 (rm & 7) == 5)) 7067 break; 7068 goto case 0x80; 7069 7070 case 0x80: 7071 { 7072 int cfflags = CFoff; 7073 targ_size_t val = 0; 7074 if (I64) 7075 { 7076 if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP] 7077 { 7078 cfflags |= CFpc32; 7079 val = -4; 7080 reg_t reg = rm & modregrm(0,7,0); 7081 if (ins & T || 7082 ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0)))) 7083 { if (ins & E || op == 0xF6) 7084 val = -5; 7085 else if (c.Iflags & CFopsize) 7086 val = -6; 7087 else 7088 val = -8; 7089 } 7090 7091 if (config.exe & (EX_OSX64 | EX_WIN64)) 7092 /* Mach-O and Win64 fixups already take the 4 byte size 7093 * into account, so bias by 4 7094 */ 7095 val += 4; 7096 } 7097 } 7098 do32bit(ggen, cast(FL)c.IFL1,c.IEV1,cfflags,cast(int)val); 7099 break; 7100 } 7101 7102 default: 7103 break; 7104 } 7105 } 7106 else 7107 { 7108 switch (rm & 0xC0) 7109 { case 0x40: 7110 do8bit(ggen, cast(FL) c.IFL1,c.IEV1); // 8 bit 7111 break; 7112 7113 case 0: 7114 if ((rm & 7) != 6) 7115 break; 7116 goto case 0x80; 7117 7118 case 0x80: 7119 do16bit(ggen, cast(FL)c.IFL1,c.IEV1,CFoff); 7120 break; 7121 7122 default: 7123 break; 7124 } 7125 } 7126 } 7127 else 7128 { 7129 if (op == ENTER) 7130 do16bit(ggen, cast(FL)c.IFL1,c.IEV1,0); 7131 } 7132 flags &= CFseg | CFoff | CFselfrel; 7133 if (ins & T) /* if second operand */ 7134 { 7135 if (ins & E) /* if data-8 */ 7136 do8bit(ggen, cast(FL) c.IFL2,c.IEV2); 7137 else if (!I16) 7138 { 7139 switch (op) 7140 { 7141 case 0xC2: /* RETN imm16 */ 7142 case 0xCA: /* RETF imm16 */ 7143 do16: 7144 do16bit(ggen, cast(FL)c.IFL2,c.IEV2,flags); 7145 break; 7146 7147 case 0xA1: 7148 case 0xA3: 7149 if (I64 && c.Irex) 7150 { 7151 do64: 7152 do64bit(ggen, cast(FL)c.IFL2,c.IEV2,flags); 7153 break; 7154 } 7155 goto case 0xA0; 7156 7157 case 0xA0: /* MOV AL,byte ptr [] */ 7158 case 0xA2: 7159 if (c.Iflags & CFaddrsize && !I64) 7160 goto do16; 7161 else 7162 do32: 7163 do32bit(ggen, cast(FL)c.IFL2,c.IEV2,flags,0); 7164 break; 7165 7166 case 0x9A: 7167 case 0xEA: 7168 if (c.Iflags & CFopsize) 7169 goto ptr1616; 7170 else 7171 goto ptr1632; 7172 7173 case 0x68: // PUSH immed32 7174 if (c.IFL2 == FLblock) 7175 { 7176 c.IFL2 = FLblockoff; 7177 goto do32; 7178 } 7179 else 7180 goto case_default; 7181 7182 case CALL: // CALL rel 7183 case JMP: // JMP rel 7184 flags |= CFselfrel; 7185 goto case_default; 7186 7187 default: 7188 if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32 7189 flags |= CFselfrel; 7190 if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W) 7191 goto do64; 7192 case_default: 7193 if (c.Iflags & CFopsize) 7194 goto do16; 7195 else 7196 goto do32; 7197 } 7198 } 7199 else 7200 { 7201 switch (op) 7202 { 7203 case 0xC2: 7204 case 0xCA: 7205 goto do16; 7206 7207 case 0xA0: 7208 case 0xA1: 7209 case 0xA2: 7210 case 0xA3: 7211 if (c.Iflags & CFaddrsize) 7212 goto do32; 7213 else 7214 goto do16; 7215 7216 case 0x9A: 7217 case 0xEA: 7218 if (c.Iflags & CFopsize) 7219 goto ptr1632; 7220 else 7221 goto ptr1616; 7222 7223 ptr1616: 7224 ptr1632: 7225 //assert(c.IFL2 == FLfunc); 7226 ggen.flush(); 7227 if (c.IFL2 == FLdatseg) 7228 { 7229 objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer, 7230 c.IEV2.Vseg,flags); 7231 ggen.offset += 4; 7232 } 7233 else 7234 { 7235 s = c.IEV2.Vsym; 7236 ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags); 7237 } 7238 break; 7239 7240 case 0x68: // PUSH immed16 7241 if (c.IFL2 == FLblock) 7242 { c.IFL2 = FLblockoff; 7243 goto do16; 7244 } 7245 else 7246 goto case_default16; 7247 7248 case CALL: 7249 case JMP: 7250 flags |= CFselfrel; 7251 goto default; 7252 7253 default: 7254 case_default16: 7255 if (c.Iflags & CFopsize) 7256 goto do32; 7257 else 7258 goto do16; 7259 } 7260 } 7261 } 7262 else if (op == 0xF6) /* TEST mem8,immed8 */ 7263 { 7264 if ((rm & (7<<3)) == 0) 7265 do8bit(ggen, cast(FL)c.IFL2,c.IEV2); 7266 } 7267 else if (op == 0xF7) 7268 { 7269 if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 7270 { 7271 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0)) 7272 do32bit(ggen, cast(FL)c.IFL2,c.IEV2,flags,0); 7273 else 7274 do16bit(ggen, cast(FL)c.IFL2,c.IEV2,flags); 7275 } 7276 } 7277 7278 debug 7279 if (ggen.getOffset() - startoffset != calccodsize(c)) 7280 { 7281 printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c)); 7282 code_print(c); 7283 assert(0); 7284 } 7285 } 7286 ggen.flush(); 7287 Offset(seg) = ggen.offset; 7288 //printf("-codout(), Coffset = x%x\n", Offset(seg)); 7289 return cast(uint)ggen.offset; /* ending address */ 7290 } 7291 7292 7293 @trusted 7294 private void do64bit(ref MiniCodeBuf pbuf, FL fl, ref evc uev,int flags) 7295 { 7296 char *p; 7297 Symbol *s; 7298 targ_size_t ad; 7299 7300 assert(I64); 7301 switch (fl) 7302 { 7303 case FLconst: 7304 ad = *cast(targ_size_t *) &uev; 7305 L1: 7306 pbuf.genp(8,&ad); 7307 return; 7308 7309 case FLdatseg: 7310 pbuf.flush(); 7311 pbuf.write64(uev.Vpointer); 7312 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags); 7313 break; 7314 7315 case FLframehandler: 7316 framehandleroffset = pbuf.getOffset(); 7317 ad = 0; 7318 goto L1; 7319 7320 case FLswitch: 7321 pbuf.flush(); 7322 ad = uev.Vswitch.Btableoffset; 7323 pbuf.write64(ad); 7324 if (config.flags & CFGromable) 7325 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7326 else 7327 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7328 break; 7329 7330 case FLcsdata: 7331 case FLfardata: 7332 //symbol_print(uev.Vsym); 7333 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7334 // strings and statics are treated like offsets from a 7335 // un-named external with is the start of .rodata or .data 7336 case FLextern: /* external data symbol */ 7337 case FLtlsdata: 7338 pbuf.flush(); 7339 s = uev.Vsym; /* symbol pointer */ 7340 pbuf.write64(uev.Voffset); 7341 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7342 break; 7343 7344 case FLgotoff: 7345 if (config.exe & (EX_OSX | EX_OSX64)) 7346 { 7347 assert(0); 7348 } 7349 else if (config.exe & EX_posix) 7350 { 7351 pbuf.flush(); 7352 s = uev.Vsym; /* symbol pointer */ 7353 pbuf.write64(uev.Voffset); 7354 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7355 break; 7356 } 7357 else 7358 assert(0); 7359 7360 case FLgot: 7361 if (config.exe & (EX_OSX | EX_OSX64)) 7362 { 7363 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7364 ad = 0; 7365 goto L1; 7366 } 7367 else if (config.exe & EX_posix) 7368 { 7369 pbuf.flush(); 7370 s = uev.Vsym; /* symbol pointer */ 7371 pbuf.write64(uev.Voffset); 7372 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7373 break; 7374 } 7375 else 7376 assert(0); 7377 7378 case FLfunc: /* function call */ 7379 s = uev.Vsym; /* symbol pointer */ 7380 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7381 pbuf.flush(); 7382 pbuf.write64(0); 7383 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags); 7384 break; 7385 7386 case FLblock: /* displacement to another block */ 7387 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7388 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7389 goto L1; 7390 7391 case FLblockoff: 7392 pbuf.flush(); 7393 assert(uev.Vblock); 7394 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7395 pbuf.write64(uev.Vblock.Boffset); 7396 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7397 break; 7398 7399 default: 7400 WRFL(fl); 7401 assert(0); 7402 } 7403 pbuf.offset += 8; 7404 } 7405 7406 7407 @trusted 7408 private void do32bit(ref MiniCodeBuf pbuf, FL fl, ref evc uev,int flags, int val) 7409 { 7410 char *p; 7411 Symbol *s; 7412 targ_size_t ad; 7413 7414 //printf("do32bit(flags = x%x)\n", flags); 7415 switch (fl) 7416 { 7417 case FLconst: 7418 assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8); 7419 ad = * cast(targ_size_t *) &uev; 7420 L1: 7421 pbuf.genp(4,&ad); 7422 return; 7423 7424 case FLdatseg: 7425 pbuf.flush(); 7426 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7427 pbuf.write32(cast(uint)uev.Vpointer); 7428 break; 7429 7430 case FLframehandler: 7431 framehandleroffset = pbuf.getOffset(); 7432 ad = 0; 7433 goto L1; 7434 7435 case FLswitch: 7436 pbuf.flush(); 7437 ad = uev.Vswitch.Btableoffset; 7438 if (config.flags & CFGromable) 7439 { 7440 if (config.exe & (EX_OSX | EX_OSX64)) 7441 { 7442 // These are magic values based on the exact code generated for the switch jump 7443 if (I64) 7444 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7445 else 7446 uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8; 7447 ad -= uev.Vswitch.Btablebase; 7448 goto L1; 7449 } 7450 else if (config.exe & EX_windos) 7451 { 7452 if (I64) 7453 { 7454 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7455 ad -= uev.Vswitch.Btablebase; 7456 goto L1; 7457 } 7458 else 7459 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7460 } 7461 else 7462 { 7463 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7464 } 7465 } 7466 else 7467 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7468 pbuf.write32(cast(uint)ad); 7469 break; 7470 7471 case FLcode: 7472 //assert(JMPJMPTABLE); // the only use case 7473 pbuf.flush(); 7474 ad = *cast(targ_size_t *) &uev + pbuf.getOffset(); 7475 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7476 pbuf.write32(cast(uint)ad); 7477 break; 7478 7479 case FLcsdata: 7480 case FLfardata: 7481 //symbol_print(uev.Vsym); 7482 7483 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7484 // strings and statics are treated like offsets from a 7485 // un-named external with is the start of .rodata or .data 7486 case FLextern: /* external data symbol */ 7487 case FLtlsdata: 7488 pbuf.flush(); 7489 s = uev.Vsym; /* symbol pointer */ 7490 if (config.exe & EX_windos && I64 && (flags & CFpc32)) 7491 { 7492 /* This is for those funky fixups where the location to be fixed up 7493 * is a 'val' amount back from the current RIP, biased by adding 4. 7494 */ 7495 assert(val >= -5 && val <= 0); 7496 flags |= (-val & 7) << 24; // set CFREL value 7497 assert(CFREL == (7 << 24)); 7498 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7499 pbuf.write32(cast(uint)uev.Voffset); 7500 } 7501 else 7502 { 7503 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7504 pbuf.write32(cast(uint)(uev.Voffset + val)); 7505 } 7506 break; 7507 7508 case FLgotoff: 7509 if (config.exe & (EX_OSX | EX_OSX64)) 7510 { 7511 assert(0); 7512 } 7513 else if (config.exe & EX_posix) 7514 { 7515 pbuf.flush(); 7516 s = uev.Vsym; /* symbol pointer */ 7517 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7518 pbuf.write32(cast(uint)(uev.Voffset + val)); 7519 break; 7520 } 7521 else 7522 assert(0); 7523 7524 case FLgot: 7525 if (config.exe & (EX_OSX | EX_OSX64)) 7526 { 7527 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7528 ad = 0; 7529 goto L1; 7530 } 7531 else if (config.exe & EX_posix) 7532 { 7533 pbuf.flush(); 7534 s = uev.Vsym; /* symbol pointer */ 7535 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7536 pbuf.write32(cast(uint)(uev.Voffset + val)); 7537 break; 7538 } 7539 else 7540 assert(0); 7541 7542 case FLfunc: /* function call */ 7543 s = uev.Vsym; /* symbol pointer */ 7544 if (tyfarfunc(s.ty())) 7545 { /* Large code references are always absolute */ 7546 pbuf.flush(); 7547 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4; 7548 pbuf.write32(0); 7549 } 7550 else if (s.Sseg == pbuf.seg && 7551 (s.Sclass == SC.static_ || s.Sclass == SC.global) && 7552 s.Sxtrnnum == 0 && flags & CFselfrel) 7553 { /* if we know it's relative address */ 7554 ad = s.Soffset - pbuf.getOffset() - 4; 7555 goto L1; 7556 } 7557 else 7558 { 7559 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7560 pbuf.flush(); 7561 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags); 7562 pbuf.write32(cast(uint)(val)); 7563 } 7564 break; 7565 7566 case FLblock: /* displacement to another block */ 7567 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7568 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7569 goto L1; 7570 7571 case FLblockoff: 7572 pbuf.flush(); 7573 assert(uev.Vblock); 7574 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7575 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7576 pbuf.write32(cast(uint)(uev.Vblock.Boffset)); 7577 break; 7578 7579 default: 7580 WRFL(fl); 7581 assert(0); 7582 } 7583 pbuf.offset += 4; 7584 } 7585 7586 7587 @trusted 7588 private void do16bit(ref MiniCodeBuf pbuf, FL fl, ref evc uev,int flags) 7589 { 7590 char *p; 7591 Symbol *s; 7592 targ_size_t ad; 7593 7594 switch (fl) 7595 { 7596 case FLconst: 7597 pbuf.genp(2,cast(char *) &uev); 7598 return; 7599 7600 case FLdatseg: 7601 pbuf.flush(); 7602 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7603 pbuf.write16(cast(uint)uev.Vpointer); 7604 break; 7605 7606 case FLswitch: 7607 pbuf.flush(); 7608 ad = uev.Vswitch.Btableoffset; 7609 if (config.flags & CFGromable) 7610 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7611 else 7612 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7613 pbuf.write16(cast(uint)ad); 7614 break; 7615 7616 case FLcsdata: 7617 case FLfardata: 7618 case FLextern: /* external data symbol */ 7619 case FLtlsdata: 7620 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7621 pbuf.flush(); 7622 s = uev.Vsym; /* symbol pointer */ 7623 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7624 pbuf.write16(cast(uint)uev.Voffset); 7625 break; 7626 7627 case FLfunc: /* function call */ 7628 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7629 s = uev.Vsym; /* symbol pointer */ 7630 if (tyfarfunc(s.ty())) 7631 { /* Large code references are always absolute */ 7632 pbuf.flush(); 7633 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2; 7634 } 7635 else if (s.Sseg == pbuf.seg && 7636 (s.Sclass == SC.static_ || s.Sclass == SC.global) && 7637 s.Sxtrnnum == 0 && flags & CFselfrel) 7638 { /* if we know it's relative address */ 7639 ad = s.Soffset - pbuf.getOffset() - 2; 7640 goto L1; 7641 } 7642 else 7643 { 7644 pbuf.flush(); 7645 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags); 7646 } 7647 pbuf.write16(0); 7648 break; 7649 7650 case FLblock: /* displacement to another block */ 7651 ad = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7652 debug 7653 { 7654 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7655 assert(cast(short)delta == delta); 7656 } 7657 L1: 7658 pbuf.genp(2,&ad); // displacement 7659 return; 7660 7661 case FLblockoff: 7662 pbuf.flush(); 7663 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7664 pbuf.write16(cast(uint)uev.Vblock.Boffset); 7665 break; 7666 7667 default: 7668 WRFL(fl); 7669 assert(0); 7670 } 7671 pbuf.offset += 2; 7672 } 7673 7674 7675 @trusted 7676 private void do8bit(ref MiniCodeBuf pbuf, FL fl, ref evc uev) 7677 { 7678 ubyte c; 7679 7680 switch (fl) 7681 { 7682 case FLconst: 7683 c = cast(ubyte)uev.Vuns; 7684 break; 7685 7686 case FLblock: 7687 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 1; 7688 if (cast(byte)delta != delta) 7689 { 7690 if (uev.Vblock.Bsrcpos.Slinnum) 7691 printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum); 7692 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta); 7693 err_exit(); 7694 } 7695 c = cast(ubyte)delta; 7696 debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F); 7697 break; 7698 7699 default: 7700 debug printf("fl = %d\n",fl); 7701 assert(0); 7702 } 7703 pbuf.gen(c); 7704 } 7705 7706 7707 /*************************** 7708 * Debug code to dump code structure. 7709 */ 7710 7711 void WRcodlst(code *c) 7712 { 7713 for (; c; c = code_next(c)) 7714 code_print(c); 7715 } 7716 7717 @trusted 7718 extern (C) void code_print(scope code* c) 7719 { 7720 ubyte ins; 7721 ubyte rexb; 7722 7723 if (c == null) 7724 { 7725 printf("code 0\n"); 7726 return; 7727 } 7728 7729 const op = c.Iop; 7730 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7731 ins = vex_inssize(c); 7732 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7733 ins = inssize2[(op >> 8) & 0xFF]; 7734 else if ((c.Iop & 0xFF00) == 0x0F00) 7735 ins = inssize2[op & 0xFF]; 7736 else 7737 ins = inssize[op & 0xFF]; 7738 7739 printf("code %p: nxt=%p ",c,code_next(c)); 7740 7741 if (c.Iflags & CFvex) 7742 { 7743 if (c.Iflags & CFvex3) 7744 { 7745 printf("vex=0xC4"); 7746 printf(" 0x%02X", VEX3_B1(c.Ivex)); 7747 printf(" 0x%02X", VEX3_B2(c.Ivex)); 7748 rexb = 7749 ( c.Ivex.w ? REX_W : 0) | 7750 (!c.Ivex.r ? REX_R : 0) | 7751 (!c.Ivex.x ? REX_X : 0) | 7752 (!c.Ivex.b ? REX_B : 0); 7753 } 7754 else 7755 { 7756 printf("vex=0xC5"); 7757 printf(" 0x%02X", VEX2_B1(c.Ivex)); 7758 rexb = !c.Ivex.r ? REX_R : 0; 7759 } 7760 printf(" "); 7761 } 7762 else 7763 rexb = c.Irex; 7764 7765 if (rexb) 7766 { 7767 printf("rex=0x%02X ", c.Irex); 7768 if (rexb & REX_W) 7769 printf("W"); 7770 if (rexb & REX_R) 7771 printf("R"); 7772 if (rexb & REX_X) 7773 printf("X"); 7774 if (rexb & REX_B) 7775 printf("B"); 7776 printf(" "); 7777 } 7778 printf("op=0x%02X",op); 7779 7780 if ((op & 0xFF) == ESCAPE) 7781 { 7782 if ((op & 0xFF00) == ESClinnum) 7783 { 7784 printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum); 7785 return; 7786 } 7787 printf(" ESCAPE %d",c.Iop >> 8); 7788 } 7789 if (c.Iflags) 7790 printf(" flg=%x",c.Iflags); 7791 if (ins & M) 7792 { 7793 uint rm = c.Irm; 7794 printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 7795 if (!I16 && issib(rm)) 7796 { 7797 ubyte sib = c.Isib; 7798 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 7799 } 7800 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 7801 { 7802 switch (c.IFL1) 7803 { 7804 case FLconst: 7805 case FLoffset: 7806 printf(" int = %4d",c.IEV1.Vuns); 7807 break; 7808 7809 case FLblock: 7810 printf(" block = %p",c.IEV1.Vblock); 7811 break; 7812 7813 case FLswitch: 7814 case FLblockoff: 7815 case FLlocalsize: 7816 case FLframehandler: 7817 case 0: 7818 break; 7819 7820 case FLdatseg: 7821 printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer); 7822 break; 7823 7824 case FLauto: 7825 case FLfast: 7826 case FLreg: 7827 case FLdata: 7828 case FLudata: 7829 case FLpara: 7830 case FLbprel: 7831 case FLtlsdata: 7832 case FLextern: 7833 printf(" "); 7834 WRFL(c.IFL1); 7835 printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr); 7836 if (c.IEV1.Voffset) 7837 printf(".%d", cast(int)c.IEV1.Voffset); 7838 break; 7839 7840 default: 7841 WRFL(c.IFL1); 7842 break; 7843 } 7844 } 7845 } 7846 if (ins & T) 7847 { 7848 printf(" "); 7849 WRFL(c.IFL2); 7850 switch (c.IFL2) 7851 { 7852 case FLconst: 7853 printf(" int = %4d",c.IEV2.Vuns); 7854 break; 7855 7856 case FLblock: 7857 printf(" block = %p",c.IEV2.Vblock); 7858 break; 7859 7860 case FLswitch: 7861 case FLblockoff: 7862 case 0: 7863 case FLlocalsize: 7864 case FLframehandler: 7865 break; 7866 7867 case FLdatseg: 7868 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer); 7869 break; 7870 7871 case FLauto: 7872 case FLfast: 7873 case FLreg: 7874 case FLpara: 7875 case FLbprel: 7876 case FLfunc: 7877 case FLdata: 7878 case FLudata: 7879 case FLtlsdata: 7880 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr); 7881 break; 7882 7883 case FLcode: 7884 printf(" code = %p",c.IEV2.Vcode); 7885 break; 7886 7887 default: 7888 WRFL(c.IFL2); 7889 break; 7890 } 7891 } 7892 printf("\n"); 7893 } 7894 7895 /************************************** 7896 * Pretty-print a CF mask. 7897 * Params: 7898 * cf = CF mask 7899 */ 7900 @trusted 7901 extern (C) void CF_print(uint cf) 7902 { 7903 void print(uint mask, const(char)* string) 7904 { 7905 if (cf & mask) 7906 { 7907 printf(string); 7908 cf &= ~mask; 7909 if (cf) 7910 printf("|"); 7911 } 7912 } 7913 7914 print(CFindirect, "CFindirect"); 7915 print(CFswitch, "CFswitch"); 7916 print(CFjmp5, "CFjmp5"); 7917 print(CFvex3, "CFvex3"); 7918 print(CFvex, "CFvex"); 7919 print(CFpc32, "CFpc32"); 7920 print(CFoffset64, "CFoffset64"); 7921 print(CFclassinit, "CFclassinit"); 7922 print(CFvolatile, "CFvolatile"); 7923 print(CFtarg2, "CFtarg2"); 7924 print(CFunambig, "CFunambig"); 7925 print(CFselfrel, "CFselfrel"); 7926 print(CFwait, "CFwait"); 7927 print(CFfs, "CFfs"); 7928 print(CFcs, "CFcs"); 7929 print(CFds, "CFds"); 7930 print(CFss, "CFss"); 7931 print(CFes, "CFes"); 7932 print(CFaddrsize, "CFaddrsize"); 7933 print(CFopsize, "CFopsize"); 7934 print(CFpsw, "CFpsw"); 7935 print(CFoff, "CFoff"); 7936 print(CFseg, "CFseg"); 7937 print(CFtarg, "CFtarg"); 7938 print(CFjmp16, "CFjmp16"); 7939 printf("\n"); 7940 }