1 /** 2 * SROA structured replacement of aggregate optimization 3 * 4 * This 'slices' a two register wide aggregate into two separate register-sized variables, 5 * enabling much better enregistering. 6 * SROA (Scalar Replacement Of Aggregates) is the common term for this. 7 * 8 * Compiler implementation of the 9 * $(LINK2 https://www.dlang.org, D programming language). 10 * 11 * Copyright: Copyright (C) 2016-2023 by The D Language Foundation, All Rights Reserved 12 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 13 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 14 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gsroa.c, backend/gsroa.d) 15 */ 16 17 module dmd.backend.gsroa; 18 19 import core.stdc.stdio; 20 import core.stdc.stdlib; 21 import core.stdc.string; 22 import core.stdc.time; 23 24 import dmd.backend.cc; 25 import dmd.backend.cdef; 26 import dmd.backend.code_x86; 27 import dmd.backend.oper; 28 import dmd.backend.global; 29 import dmd.backend.el; 30 import dmd.backend.symtab; 31 import dmd.backend.ty; 32 import dmd.backend.type; 33 34 import dmd.backend.dlist; 35 import dmd.backend.dvec; 36 37 38 nothrow: 39 @safe: 40 41 private enum log = false; // print logging info 42 private enum enable = true; // enable SROA 43 44 alias SLICESIZE = REGSIZE; // slices are all register-sized 45 enum MAXSLICES = 2; // max # of pieces we can slice an aggregate into 46 47 struct SymInfo 48 { 49 bool canSlice; 50 bool accessSlice; // if Symbol was accessed as a slice 51 tym_t[MAXSLICES] ty; // type of each slice 52 SYMIDX si0; // index of first slice, the rest follow sequentially 53 } 54 55 /******************************** 56 * Gather information about slice-able variables by scanning e. 57 * Params: 58 * symtab = symbol table 59 * e = expression to scan 60 * sia = where to put gathered information 61 */ 62 @trusted 63 extern (D) private void sliceStructs_Gather(ref const symtab_t symtab, SymInfo[] sia, const(elem)* e) 64 { 65 while (1) 66 { 67 switch (e.Eoper) 68 { 69 case OPvar: 70 { 71 const si = e.EV.Vsym.Ssymnum; 72 if (si != SYMIDX.max && sia[si].canSlice) 73 { 74 assert(si < symtab.length); 75 const n = nthSlice(e); 76 const sz = getSize(e); 77 if (sz == 2 * SLICESIZE && !tyfv(e.Ety) && 78 tybasic(e.Ety) != TYldouble && tybasic(e.Ety) != TYildouble) 79 { 80 // Rewritten as OPpair later 81 } 82 else if (n != NOTSLICE) 83 { 84 if (!sia[si].accessSlice) 85 { 86 /* [1] default as pointer type 87 */ 88 foreach (ref ty; sia[si].ty) 89 ty = TYnptr; 90 91 const s = e.EV.Vsym; 92 const t = s.Stype; 93 if (tybasic(t.Tty) == TYstruct) 94 { 95 if (t.Ttag.Sstruct.Sflags & STRbitfields) 96 { 97 // Can get "used before set" errors from slicing this 98 // Would be workable if the symbol was flagged instead of the type 99 sia[si].canSlice = false; 100 return; 101 } 102 103 if (const targ1 = t.Ttag.Sstruct.Sarg1type) 104 if (const targ2 = t.Ttag.Sstruct.Sarg2type) 105 { 106 sia[si].ty[0] = targ1.Tty; 107 sia[si].ty[1] = targ2.Tty; 108 109 if (config.fpxmmregs && 110 tyxmmreg(targ1.Tty) && !tyxmmreg(targ2.Tty) || 111 !tyxmmreg(targ1.Tty) && tyxmmreg(targ2.Tty)) 112 113 { 114 /* https://issues.dlang.org/show_bug.cgi?22438 115 * disable till fixed 116 */ 117 if (log) printf(" [%d] can't because xmmgpr or gprxmm\n", cast(int)si); 118 sia[si].canSlice = false; 119 return; 120 } 121 } 122 } 123 else if (tybasic(t.Tty) == TYarray) 124 { 125 // could be an array of floats, deal with this later 126 if (log) printf(" [%d] can't because array of floats\n", cast(int)si); 127 sia[si].canSlice = false; 128 return; 129 } 130 } 131 if (sz == SLICESIZE) 132 { 133 sia[si].ty[n] = tybasic(e.Ety); 134 if (SLICESIZE == 4 && config.fpxmmregs && tyxmmreg(e.Ety)) 135 { 136 /* for 32 bits, OPstreq is converted to a TYllong. 137 * It needs to be converted to cfloat, otherwise XMM 138 * registers cannot be handled. This fails: 139 * struct F { float x, y; } 140 * void foo(F p1, ref F rfp) { rfp = F(p.x, p.y); } 141 */ 142 if (log) printf(" [%d] can't because 32 bit XMM\n", cast(int)si); 143 sia[si].canSlice = false; 144 return; 145 } 146 if (config.fpxmmregs && tyxmmreg(e.Ety)) 147 { 148 /* Too many issues with mixing XMM with non-XMM 149 * One problem is an OPpair with one operand a long, the other XMM. 150 * Giving it up for now. 151 */ 152 if (log) printf(" [%d] can't because XMM\n", cast(int)si); 153 sia[si].canSlice = false; 154 return; 155 } 156 } 157 sia[si].accessSlice = true; 158 } 159 else 160 { 161 if (log) printf(" [%d] can't because NOTSLICE 1\n", cast(int)si); 162 sia[si].canSlice = false; 163 } 164 } 165 return; 166 } 167 168 default: 169 if (OTassign(e.Eoper)) 170 { 171 if (OTbinary(e.Eoper)) 172 sliceStructs_Gather(symtab, sia, e.EV.E2); 173 174 // Assignment to a whole var will disallow SROA 175 if (e.EV.E1.Eoper == OPvar) 176 { 177 const e1 = e.EV.E1; 178 const si = e1.EV.Vsym.Ssymnum; 179 if (si != SYMIDX.max && sia[si].canSlice) 180 { 181 assert(si < symtab.length); 182 if (nthSlice(e1) == NOTSLICE) 183 { 184 if (log) 185 { 186 printf(" [%d] can't because NOTSLICE 2\n", cast(int)si); 187 elem_print(e); 188 } 189 sia[si].canSlice = false; 190 } 191 // Disable SROA on OSX32 (because XMM registers?) 192 // https://issues.dlang.org/show_bug.cgi?id=15206 193 // https://github.com/dlang/dmd/pull/8034 194 else if (!(config.exe & EX_OSX)) 195 { 196 sliceStructs_Gather(symtab, sia, e.EV.E1); 197 } 198 } 199 return; 200 } 201 e = e.EV.E1; 202 break; 203 } 204 if (OTunary(e.Eoper)) 205 { 206 e = e.EV.E1; 207 break; 208 } 209 if (OTbinary(e.Eoper)) 210 { 211 sliceStructs_Gather(symtab, sia, e.EV.E2); 212 e = e.EV.E1; 213 break; 214 } 215 return; 216 } 217 } 218 } 219 220 /*********************************** 221 * Rewrite expression tree e based on info in sia[]. 222 * Params: 223 * symtab = symbol table 224 * sia = slicing info 225 * e = expression tree to rewrite in place 226 */ 227 @trusted 228 extern (D) private void sliceStructs_Replace(ref symtab_t symtab, const SymInfo[] sia, elem *e) 229 { 230 while (1) 231 { 232 switch (e.Eoper) 233 { 234 case OPvar: 235 { 236 Symbol *s = e.EV.Vsym; 237 const si = s.Ssymnum; 238 //printf("e: %d %d\n", si, sia[si].canSlice); 239 //elem_print(e); 240 if (si != SYMIDX.max && sia[si].canSlice) 241 { 242 const n = nthSlice(e); 243 if (getSize(e) == 2 * SLICESIZE) 244 { 245 if (log) { printf("slicing struct before "); elem_print(e); } 246 // Rewrite e as (si0 OPpair si0+1) 247 elem *e1 = el_calloc(); 248 el_copy(e1, e); 249 e1.Ety = sia[si].ty[0]; 250 251 elem *e2 = el_calloc(); 252 el_copy(e2, e); 253 Symbol *s1 = symtab[sia[si].si0 + 1]; // +1 for second slice 254 e2.Ety = sia[si].ty[1]; 255 e2.EV.Vsym = s1; 256 e2.EV.Voffset = 0; 257 258 e.Eoper = OPpair; 259 e.EV.E1 = e1; 260 e.EV.E2 = e2; 261 262 if (tycomplex(e.Ety)) 263 { 264 /* Ensure complex OPpair operands are floating point types 265 * because [1] may have defaulted them to a pointer type. 266 * https://issues.dlang.org/show_bug.cgi?id=18936 267 */ 268 tym_t tyop; 269 switch (tybasic(e.Ety)) 270 { 271 case TYcfloat: tyop = TYfloat; break; 272 case TYcdouble: tyop = TYdouble; break; 273 case TYcldouble: tyop = TYldouble; break; 274 default: 275 assert(0); 276 } 277 if (!tyfloating(e1.Ety)) 278 e1.Ety = tyop; 279 if (!tyfloating(e2.Ety)) 280 e2.Ety = tyop; 281 } 282 if (log) { printf("slicing struct after\n"); elem_print(e); } 283 } 284 else if (n == 0) // the first slice of the symbol is the same as the original 285 { 286 if (log) { printf("slicing slice 0 "); elem_print(e); } 287 } 288 else // the nth slice 289 { 290 if (log) { printf("slicing slice %d ", n); elem_print(e); } 291 e.EV.Vsym = symtab[sia[si].si0 + n]; 292 e.EV.Voffset -= n * SLICESIZE; 293 //printf("replaced with:\n"); 294 //elem_print(e); 295 } 296 } 297 return; 298 } 299 300 case OPrelconst: 301 { 302 Symbol *s = e.EV.Vsym; 303 const si = s.Ssymnum; 304 //printf("e: %d %d\n", si, sia[si].canSlice); 305 //elem_print(e); 306 if (si != SYMIDX.max && sia[si].canSlice) 307 { 308 printf("shouldn't be slicing %s\n", s.Sident.ptr); 309 assert(0); 310 } 311 return; 312 } 313 314 default: 315 if (OTunary(e.Eoper)) 316 { 317 e = e.EV.E1; 318 break; 319 } 320 if (OTbinary(e.Eoper)) 321 { 322 sliceStructs_Replace(symtab, sia, e.EV.E2); 323 e = e.EV.E1; 324 break; 325 } 326 return; 327 } 328 } 329 } 330 331 @trusted 332 void sliceStructs(ref symtab_t symtab, block* startblock) 333 { 334 if (enable) // disable while we test the inliner 335 { 336 if (log) printf("\n************ sliceStructs() %s *******************\n", funcsym_p.Sident.ptr); 337 const sia_length = symtab.length; 338 /* 3 is because it is used for two arrays, sia[] and sia2[]. 339 * sia2[] can grow to twice the size of sia[], as symbols can get split into two. 340 */ 341 debug 342 enum tmp_length = 3; 343 else 344 enum tmp_length = 6; 345 SymInfo[tmp_length] tmp = void; 346 347 import dmd.common.string : SmallBuffer; 348 auto sb = SmallBuffer!(SymInfo)(3 * sia_length, tmp[]); 349 SymInfo* sip = sb.ptr; 350 memset(sip, 0, 3 * sia_length * SymInfo.sizeof); 351 SymInfo[] sia = sip[0 .. sia_length]; 352 SymInfo[] sia2 = sip[sia_length .. sia_length * 3]; 353 354 if (log) foreach (si; 0 .. symtab.length) 355 { 356 Symbol *s = symtab[si]; 357 printf("[%d]: %p %d %s %s\n", cast(int)si, s, cast(int)type_size(s.Stype), s.Sident.ptr, tym_str(s.Stype.Tty)); 358 } 359 360 bool anySlice = false; 361 foreach (si; 0 .. symtab.length) 362 { 363 Symbol *s = symtab[si]; 364 if (log) printf("slice1 [%d]: %s\n", cast(int)si, s.Sident.ptr); 365 366 //if (strcmp(s.Sident.ptr, "__inlineretval3".ptr) == 0) { printf("can't\n"); sia[si].canSlice = false; continue; } 367 if (!(s.Sflags & SFLunambig)) // if somebody took the address of s 368 { 369 if (log) printf(" can't because SFLunambig\n"); 370 sia[si].canSlice = false; 371 continue; 372 } 373 374 const sz = type_size(s.Stype); 375 if (sz != 2 * SLICESIZE || 376 tyvector(s.Stype.Tty) || // SIMD types 377 tyfv(s.Stype.Tty) || tybasic(s.Stype.Tty) == TYhptr) // because there is no TYseg 378 { 379 if (log) printf(" can't because size or pointer type\n"); 380 sia[si].canSlice = false; 381 continue; 382 } 383 384 switch (s.Sclass) 385 { 386 case SC.fastpar: 387 case SC.register: 388 case SC.auto_: 389 case SC.shadowreg: 390 case SC.parameter: 391 anySlice = true; 392 sia[si].canSlice = true; 393 sia[si].accessSlice = false; 394 // We can't slice whole XMM registers 395 if (tyxmmreg(s.Stype.Tty) && 396 isXMMreg(s.Spreg) && s.Spreg2 == NOREG) 397 { 398 if (log) printf(" can't because XMM reg\n"); 399 sia[si].canSlice = false; 400 } 401 break; 402 403 case SC.stack: 404 case SC.pseudo: 405 case SC.static_: 406 case SC.bprel: 407 if (log) printf(" can't because Sclass\n"); 408 sia[si].canSlice = false; 409 break; 410 411 default: 412 symbol_print(s); 413 assert(0); 414 } 415 } 416 417 if (!anySlice) 418 return; 419 420 foreach (b; BlockRange(startblock)) 421 { 422 if (b.BC == BCasm) 423 return; 424 if (b.Belem) 425 sliceStructs_Gather(symtab, sia, b.Belem); 426 } 427 428 { // scope needed because of goto skipping declarations 429 bool any = false; 430 int n = 0; // the number of symbols added 431 foreach (si; 0 .. sia_length) 432 { 433 sia2[si + n].canSlice = false; 434 if (sia[si].canSlice) 435 { 436 // If never did access it as a slice, don't slice 437 if (!sia[si].accessSlice) 438 { 439 if (log) printf(" can't slice %s because no accessSlice\n", symtab[si].Sident.ptr); 440 sia[si].canSlice = false; 441 continue; 442 } 443 444 /* Split slice-able symbol sold into two symbols, 445 * (sold,snew) in adjacent slots in the symbol table. 446 */ 447 Symbol *sold = symtab[si + n]; 448 449 const idlen = 2 + strlen(sold.Sident.ptr) + 2; 450 char *id = cast(char *)malloc(idlen + 1); 451 if (!id) 452 err_nomem(); 453 const len = snprintf(id, idlen + 1, "__%s_%d", sold.Sident.ptr, SLICESIZE); 454 assert(len == idlen); 455 if (log) printf("retyping slice symbol %s %s\n", sold.Sident.ptr, tym_str(sia[si].ty[0])); 456 if (log) printf("creating slice symbol %s %s\n", id, tym_str(sia[si].ty[1])); 457 Symbol *snew = symbol_calloc(id[0 .. idlen]); 458 free(id); 459 snew.Sclass = sold.Sclass; 460 snew.Sfl = sold.Sfl; 461 snew.Sflags = sold.Sflags; 462 if (snew.Sclass == SC.fastpar || snew.Sclass == SC.shadowreg) 463 { 464 snew.Spreg = sold.Spreg2; 465 snew.Spreg2 = NOREG; 466 sold.Spreg2 = NOREG; 467 } 468 type_free(sold.Stype); 469 sold.Stype = type_fake(sia[si].ty[0]); 470 sold.Stype.Tcount++; 471 snew.Stype = type_fake(sia[si].ty[1]); 472 snew.Stype.Tcount++; 473 474 // insert snew into symtab[si + n + 1] 475 symbol_insert(symtab, snew, si + n + 1); 476 477 sia2[si + n].canSlice = true; 478 sia2[si + n].si0 = si + n; 479 sia2[si + n].ty[] = sia[si].ty[]; 480 ++n; 481 any = true; 482 } 483 } 484 if (!any) 485 return; 486 } 487 488 foreach (si; 0 .. symtab.length) 489 { 490 Symbol *s = symtab[si]; 491 assert(s.Ssymnum == si); 492 } 493 494 foreach (b; BlockRange(startblock)) 495 { 496 if (b.Belem) 497 sliceStructs_Replace(symtab, sia2, b.Belem); 498 } 499 500 static if (0) 501 { 502 printf("after slicing:\n"); 503 foreach (b; BlockRange(startblock)) 504 { 505 if (b.Belem) 506 elem_print(b.Belem); 507 } 508 printf("after slicing done\n"); 509 } 510 511 } 512 } 513 514 515 /************************************* 516 * Determine if `e` is a slice. 517 * Params: 518 * e = elem that may be a slice 519 * Returns: 520 * slice number if it is, NOTSLICE if not 521 */ 522 enum NOTSLICE = -1; 523 int nthSlice(const(elem)* e) 524 { 525 const sz = tysize(e.Ety); // not getSize(e) because type_fake(TYstruct) doesn't work 526 if (sz == -1) 527 return NOTSLICE; 528 const sliceSize = SLICESIZE; 529 530 /* if sz is less than sliceSize, this causes problems because if, say, 531 * sz is 4 while sliceSize is 8, and sz gets enregistered, then assigning to 532 * the lower 4 bytes of sz will zero out the upper 4 bytes. 533 * https://github.com/dlang/dmd/pull/13220 534 */ 535 if (sz != sliceSize) 536 return NOTSLICE; 537 538 /* See if e fits in a slice 539 */ 540 const lwr = e.EV.Voffset; 541 const upr = lwr + sz; 542 if (0 <= lwr && upr <= sliceSize) 543 return 0; 544 if (sliceSize <= lwr && upr <= sliceSize * 2) 545 return 1; 546 547 return NOTSLICE; 548 } 549 550 /****************************************** 551 * Get size of an elem e. 552 */ 553 private int getSize(const(elem)* e) 554 { 555 int sz = tysize(e.Ety); 556 if (sz == -1 && e.ET && (tybasic(e.Ety) == TYstruct || tybasic(e.Ety) == TYarray)) 557 sz = cast(int)type_size(e.ET); 558 return sz; 559 }