1 /** 2 * SROA structured replacement of aggregate optimization 3 * 4 * This 'slices' a two register wide aggregate into two separate register-sized variables, 5 * enabling much better enregistering. 6 * SROA (Scalar Replacement Of Aggregates) is the common term for this. 7 * 8 * Compiler implementation of the 9 * $(LINK2 https://www.dlang.org, D programming language). 10 * 11 * Copyright: Copyright (C) 2016-2023 by The D Language Foundation, All Rights Reserved 12 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 13 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 14 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gsroa.c, backend/gsroa.d) 15 */ 16 17 module dmd.backend.gsroa; 18 19 version (SCPP) 20 version = COMPILE; 21 version (MARS) 22 version = COMPILE; 23 24 version (COMPILE) 25 { 26 27 import core.stdc.stdio; 28 import core.stdc.stdlib; 29 import core.stdc.string; 30 import core.stdc.time; 31 32 import dmd.backend.cc; 33 import dmd.backend.cdef; 34 import dmd.backend.code_x86; 35 import dmd.backend.oper; 36 import dmd.backend.global; 37 import dmd.backend.el; 38 import dmd.backend.symtab; 39 import dmd.backend.ty; 40 import dmd.backend.type; 41 42 import dmd.backend.dlist; 43 import dmd.backend.dvec; 44 45 extern (C++): 46 47 nothrow: 48 @safe: 49 50 private enum log = false; // print logging info 51 private enum enable = true; // enable SROA 52 53 int REGSIZE(); 54 55 alias SLICESIZE = REGSIZE; // slices are all register-sized 56 enum MAXSLICES = 2; // max # of pieces we can slice an aggregate into 57 58 struct SymInfo 59 { 60 bool canSlice; 61 bool accessSlice; // if Symbol was accessed as a slice 62 tym_t[MAXSLICES] ty; // type of each slice 63 SYMIDX si0; // index of first slice, the rest follow sequentially 64 } 65 66 /******************************** 67 * Gather information about slice-able variables by scanning e. 68 * Params: 69 * symtab = symbol table 70 * e = expression to scan 71 * sia = where to put gathered information 72 */ 73 @trusted 74 extern (D) private void sliceStructs_Gather(ref const symtab_t symtab, SymInfo[] sia, const(elem)* e) 75 { 76 while (1) 77 { 78 switch (e.Eoper) 79 { 80 case OPvar: 81 { 82 const si = e.EV.Vsym.Ssymnum; 83 if (si != SYMIDX.max && sia[si].canSlice) 84 { 85 assert(si < symtab.length); 86 const n = nthSlice(e); 87 const sz = getSize(e); 88 if (sz == 2 * SLICESIZE && !tyfv(e.Ety) && 89 tybasic(e.Ety) != TYldouble && tybasic(e.Ety) != TYildouble) 90 { 91 // Rewritten as OPpair later 92 } 93 else if (n != NOTSLICE) 94 { 95 if (!sia[si].accessSlice) 96 { 97 /* [1] default as pointer type 98 */ 99 foreach (ref ty; sia[si].ty) 100 ty = TYnptr; 101 102 const s = e.EV.Vsym; 103 const t = s.Stype; 104 if (tybasic(t.Tty) == TYstruct) 105 { 106 if (t.Ttag.Sstruct.Sflags & STRbitfields) 107 { 108 // Can get "used before set" errors from slicing this 109 // Would be workable if the symbol was flagged instead of the type 110 sia[si].canSlice = false; 111 return; 112 } 113 114 if (const targ1 = t.Ttag.Sstruct.Sarg1type) 115 if (const targ2 = t.Ttag.Sstruct.Sarg2type) 116 { 117 sia[si].ty[0] = targ1.Tty; 118 sia[si].ty[1] = targ2.Tty; 119 120 if (config.fpxmmregs && 121 tyxmmreg(targ1.Tty) && !tyxmmreg(targ2.Tty) || 122 !tyxmmreg(targ1.Tty) && tyxmmreg(targ2.Tty)) 123 124 { 125 /* https://issues.dlang.org/show_bug.cgi?22438 126 * disable till fixed 127 */ 128 if (log) printf(" [%d] can't because xmmgpr or gprxmm\n", cast(int)si); 129 sia[si].canSlice = false; 130 return; 131 } 132 } 133 } 134 else if (tybasic(t.Tty) == TYarray) 135 { 136 // could be an array of floats, deal with this later 137 if (log) printf(" [%d] can't because array of floats\n", cast(int)si); 138 sia[si].canSlice = false; 139 return; 140 } 141 } 142 if (sz == SLICESIZE) 143 { 144 sia[si].ty[n] = tybasic(e.Ety); 145 if (SLICESIZE == 4 && config.fpxmmregs && tyxmmreg(e.Ety)) 146 { 147 /* for 32 bits, OPstreq is converted to a TYllong. 148 * It needs to be converted to cfloat, otherwise XMM 149 * registers cannot be handled. This fails: 150 * struct F { float x, y; } 151 * void foo(F p1, ref F rfp) { rfp = F(p.x, p.y); } 152 */ 153 if (log) printf(" [%d] can't because 32 bit XMM\n", cast(int)si); 154 sia[si].canSlice = false; 155 return; 156 } 157 if (config.fpxmmregs && tyxmmreg(e.Ety)) 158 { 159 /* Too many issues with mixing XMM with non-XMM 160 * One problem is an OPpair with one operand a long, the other XMM. 161 * Giving it up for now. 162 */ 163 if (log) printf(" [%d] can't because XMM\n", cast(int)si); 164 sia[si].canSlice = false; 165 return; 166 } 167 } 168 sia[si].accessSlice = true; 169 } 170 else 171 { 172 if (log) printf(" [%d] can't because NOTSLICE 1\n", cast(int)si); 173 sia[si].canSlice = false; 174 } 175 } 176 return; 177 } 178 179 default: 180 if (OTassign(e.Eoper)) 181 { 182 if (OTbinary(e.Eoper)) 183 sliceStructs_Gather(symtab, sia, e.EV.E2); 184 185 // Assignment to a whole var will disallow SROA 186 if (e.EV.E1.Eoper == OPvar) 187 { 188 const e1 = e.EV.E1; 189 const si = e1.EV.Vsym.Ssymnum; 190 if (si != SYMIDX.max && sia[si].canSlice) 191 { 192 assert(si < symtab.length); 193 if (nthSlice(e1) == NOTSLICE) 194 { 195 if (log) 196 { 197 printf(" [%d] can't because NOTSLICE 2\n", cast(int)si); 198 elem_print(e); 199 } 200 sia[si].canSlice = false; 201 } 202 // Disable SROA on OSX32 (because XMM registers?) 203 // https://issues.dlang.org/show_bug.cgi?id=15206 204 // https://github.com/dlang/dmd/pull/8034 205 else if (!(config.exe & EX_OSX)) 206 { 207 sliceStructs_Gather(symtab, sia, e.EV.E1); 208 } 209 } 210 return; 211 } 212 e = e.EV.E1; 213 break; 214 } 215 if (OTunary(e.Eoper)) 216 { 217 e = e.EV.E1; 218 break; 219 } 220 if (OTbinary(e.Eoper)) 221 { 222 sliceStructs_Gather(symtab, sia, e.EV.E2); 223 e = e.EV.E1; 224 break; 225 } 226 return; 227 } 228 } 229 } 230 231 /*********************************** 232 * Rewrite expression tree e based on info in sia[]. 233 * Params: 234 * symtab = symbol table 235 * sia = slicing info 236 * e = expression tree to rewrite in place 237 */ 238 @trusted 239 extern (D) private void sliceStructs_Replace(ref symtab_t symtab, const SymInfo[] sia, elem *e) 240 { 241 while (1) 242 { 243 switch (e.Eoper) 244 { 245 case OPvar: 246 { 247 Symbol *s = e.EV.Vsym; 248 const si = s.Ssymnum; 249 //printf("e: %d %d\n", si, sia[si].canSlice); 250 //elem_print(e); 251 if (si != SYMIDX.max && sia[si].canSlice) 252 { 253 const n = nthSlice(e); 254 if (getSize(e) == 2 * SLICESIZE) 255 { 256 if (log) { printf("slicing struct before "); elem_print(e); } 257 // Rewrite e as (si0 OPpair si0+1) 258 elem *e1 = el_calloc(); 259 el_copy(e1, e); 260 e1.Ety = sia[si].ty[0]; 261 262 elem *e2 = el_calloc(); 263 el_copy(e2, e); 264 Symbol *s1 = symtab[sia[si].si0 + 1]; // +1 for second slice 265 e2.Ety = sia[si].ty[1]; 266 e2.EV.Vsym = s1; 267 e2.EV.Voffset = 0; 268 269 e.Eoper = OPpair; 270 e.EV.E1 = e1; 271 e.EV.E2 = e2; 272 273 if (tycomplex(e.Ety)) 274 { 275 /* Ensure complex OPpair operands are floating point types 276 * because [1] may have defaulted them to a pointer type. 277 * https://issues.dlang.org/show_bug.cgi?id=18936 278 */ 279 tym_t tyop; 280 switch (tybasic(e.Ety)) 281 { 282 case TYcfloat: tyop = TYfloat; break; 283 case TYcdouble: tyop = TYdouble; break; 284 case TYcldouble: tyop = TYldouble; break; 285 default: 286 assert(0); 287 } 288 if (!tyfloating(e1.Ety)) 289 e1.Ety = tyop; 290 if (!tyfloating(e2.Ety)) 291 e2.Ety = tyop; 292 } 293 if (log) { printf("slicing struct after\n"); elem_print(e); } 294 } 295 else if (n == 0) // the first slice of the symbol is the same as the original 296 { 297 if (log) { printf("slicing slice 0 "); elem_print(e); } 298 } 299 else // the nth slice 300 { 301 if (log) { printf("slicing slice %d ", n); elem_print(e); } 302 e.EV.Vsym = symtab[sia[si].si0 + n]; 303 e.EV.Voffset -= n * SLICESIZE; 304 //printf("replaced with:\n"); 305 //elem_print(e); 306 } 307 } 308 return; 309 } 310 311 case OPrelconst: 312 { 313 Symbol *s = e.EV.Vsym; 314 const si = s.Ssymnum; 315 //printf("e: %d %d\n", si, sia[si].canSlice); 316 //elem_print(e); 317 if (si != SYMIDX.max && sia[si].canSlice) 318 { 319 printf("shouldn't be slicing %s\n", s.Sident.ptr); 320 assert(0); 321 } 322 return; 323 } 324 325 default: 326 if (OTunary(e.Eoper)) 327 { 328 e = e.EV.E1; 329 break; 330 } 331 if (OTbinary(e.Eoper)) 332 { 333 sliceStructs_Replace(symtab, sia, e.EV.E2); 334 e = e.EV.E1; 335 break; 336 } 337 return; 338 } 339 } 340 } 341 342 @trusted 343 void sliceStructs(ref symtab_t symtab, block* startblock) 344 { 345 if (enable) // disable while we test the inliner 346 { 347 if (log) printf("\n************ sliceStructs() %s *******************\n", funcsym_p.Sident.ptr); 348 const sia_length = symtab.length; 349 /* 3 is because it is used for two arrays, sia[] and sia2[]. 350 * sia2[] can grow to twice the size of sia[], as symbols can get split into two. 351 */ 352 debug 353 enum tmp_length = 3; 354 else 355 enum tmp_length = 6; 356 SymInfo[tmp_length] tmp = void; 357 358 import dmd.common.string : SmallBuffer; 359 auto sb = SmallBuffer!(SymInfo)(3 * sia_length, tmp[]); 360 SymInfo* sip = sb.ptr; 361 memset(sip, 0, 3 * sia_length * SymInfo.sizeof); 362 SymInfo[] sia = sip[0 .. sia_length]; 363 SymInfo[] sia2 = sip[sia_length .. sia_length * 3]; 364 365 if (log) foreach (si; 0 .. symtab.length) 366 { 367 Symbol *s = symtab[si]; 368 printf("[%d]: %p %d %s %s\n", cast(int)si, s, cast(int)type_size(s.Stype), s.Sident.ptr, tym_str(s.Stype.Tty)); 369 } 370 371 bool anySlice = false; 372 foreach (si; 0 .. symtab.length) 373 { 374 Symbol *s = symtab[si]; 375 if (log) printf("slice1 [%d]: %s\n", cast(int)si, s.Sident.ptr); 376 377 //if (strcmp(s.Sident.ptr, "__inlineretval3".ptr) == 0) { printf("can't\n"); sia[si].canSlice = false; continue; } 378 if (!(s.Sflags & SFLunambig)) // if somebody took the address of s 379 { 380 if (log) printf(" can't because SFLunambig\n"); 381 sia[si].canSlice = false; 382 continue; 383 } 384 385 const sz = type_size(s.Stype); 386 if (sz != 2 * SLICESIZE || 387 tyvector(s.Stype.Tty) || // SIMD types 388 tyfv(s.Stype.Tty) || tybasic(s.Stype.Tty) == TYhptr) // because there is no TYseg 389 { 390 if (log) printf(" can't because size or pointer type\n"); 391 sia[si].canSlice = false; 392 continue; 393 } 394 395 switch (s.Sclass) 396 { 397 case SC.fastpar: 398 case SC.register: 399 case SC.auto_: 400 case SC.shadowreg: 401 case SC.parameter: 402 anySlice = true; 403 sia[si].canSlice = true; 404 sia[si].accessSlice = false; 405 // We can't slice whole XMM registers 406 if (tyxmmreg(s.Stype.Tty) && 407 isXMMreg(s.Spreg) && s.Spreg2 == NOREG) 408 { 409 if (log) printf(" can't because XMM reg\n"); 410 sia[si].canSlice = false; 411 } 412 break; 413 414 case SC.stack: 415 case SC.pseudo: 416 case SC.static_: 417 case SC.bprel: 418 if (log) printf(" can't because Sclass\n"); 419 sia[si].canSlice = false; 420 break; 421 422 default: 423 symbol_print(s); 424 assert(0); 425 } 426 } 427 428 if (!anySlice) 429 return; 430 431 foreach (b; BlockRange(startblock)) 432 { 433 if (b.BC == BCasm) 434 return; 435 if (b.Belem) 436 sliceStructs_Gather(symtab, sia, b.Belem); 437 } 438 439 { // scope needed because of goto skipping declarations 440 bool any = false; 441 int n = 0; // the number of symbols added 442 foreach (si; 0 .. sia_length) 443 { 444 sia2[si + n].canSlice = false; 445 if (sia[si].canSlice) 446 { 447 // If never did access it as a slice, don't slice 448 if (!sia[si].accessSlice) 449 { 450 if (log) printf(" can't slice %s because no accessSlice\n", symtab[si].Sident.ptr); 451 sia[si].canSlice = false; 452 continue; 453 } 454 455 /* Split slice-able symbol sold into two symbols, 456 * (sold,snew) in adjacent slots in the symbol table. 457 */ 458 Symbol *sold = symtab[si + n]; 459 460 const idlen = 2 + strlen(sold.Sident.ptr) + 2; 461 char *id = cast(char *)malloc(idlen + 1); 462 if (!id) 463 err_nomem(); 464 const len = snprintf(id, idlen + 1, "__%s_%d", sold.Sident.ptr, SLICESIZE); 465 assert(len == idlen); 466 if (log) printf("retyping slice symbol %s %s\n", sold.Sident.ptr, tym_str(sia[si].ty[0])); 467 if (log) printf("creating slice symbol %s %s\n", id, tym_str(sia[si].ty[1])); 468 Symbol *snew = symbol_calloc(id[0 .. idlen]); 469 free(id); 470 snew.Sclass = sold.Sclass; 471 snew.Sfl = sold.Sfl; 472 snew.Sflags = sold.Sflags; 473 if (snew.Sclass == SC.fastpar || snew.Sclass == SC.shadowreg) 474 { 475 snew.Spreg = sold.Spreg2; 476 snew.Spreg2 = NOREG; 477 sold.Spreg2 = NOREG; 478 } 479 type_free(sold.Stype); 480 sold.Stype = type_fake(sia[si].ty[0]); 481 sold.Stype.Tcount++; 482 snew.Stype = type_fake(sia[si].ty[1]); 483 snew.Stype.Tcount++; 484 485 // insert snew into symtab[si + n + 1] 486 symbol_insert(symtab, snew, si + n + 1); 487 488 sia2[si + n].canSlice = true; 489 sia2[si + n].si0 = si + n; 490 sia2[si + n].ty[] = sia[si].ty[]; 491 ++n; 492 any = true; 493 } 494 } 495 if (!any) 496 return; 497 } 498 499 foreach (si; 0 .. symtab.length) 500 { 501 Symbol *s = symtab[si]; 502 assert(s.Ssymnum == si); 503 } 504 505 foreach (b; BlockRange(startblock)) 506 { 507 if (b.Belem) 508 sliceStructs_Replace(symtab, sia2, b.Belem); 509 } 510 511 static if (0) 512 { 513 printf("after slicing:\n"); 514 foreach (b; BlockRange(startblock)) 515 { 516 if (b.Belem) 517 elem_print(b.Belem); 518 } 519 printf("after slicing done\n"); 520 } 521 522 } 523 } 524 525 526 /************************************* 527 * Determine if `e` is a slice. 528 * Params: 529 * e = elem that may be a slice 530 * Returns: 531 * slice number if it is, NOTSLICE if not 532 */ 533 enum NOTSLICE = -1; 534 int nthSlice(const(elem)* e) 535 { 536 const sz = tysize(e.Ety); // not getSize(e) because type_fake(TYstruct) doesn't work 537 if (sz == -1) 538 return NOTSLICE; 539 const sliceSize = SLICESIZE; 540 541 /* if sz is less than sliceSize, this causes problems because if, say, 542 * sz is 4 while sliceSize is 8, and sz gets enregistered, then assigning to 543 * the lower 4 bytes of sz will zero out the upper 4 bytes. 544 * https://github.com/dlang/dmd/pull/13220 545 */ 546 if (sz != sliceSize) 547 return NOTSLICE; 548 549 /* See if e fits in a slice 550 */ 551 const lwr = e.EV.Voffset; 552 const upr = lwr + sz; 553 if (0 <= lwr && upr <= sliceSize) 554 return 0; 555 if (sliceSize <= lwr && upr <= sliceSize * 2) 556 return 1; 557 558 return NOTSLICE; 559 } 560 561 /****************************************** 562 * Get size of an elem e. 563 */ 564 private int getSize(const(elem)* e) 565 { 566 int sz = tysize(e.Ety); 567 if (sz == -1 && e.ET && (tybasic(e.Ety) == TYstruct || tybasic(e.Ety) == TYarray)) 568 sz = cast(int)type_size(e.ET); 569 return sz; 570 } 571 572 }