1 /**
2  * SROA structured replacement of aggregate optimization
3  *
4  * This 'slices' a two register wide aggregate into two separate register-sized variables,
5  * enabling much better enregistering.
6  * SROA (Scalar Replacement Of Aggregates) is the common term for this.
7  *
8  * Compiler implementation of the
9  * $(LINK2 https://www.dlang.org, D programming language).
10  *
11  * Copyright:   Copyright (C) 2016-2023 by The D Language Foundation, All Rights Reserved
12  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
13  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
14  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gsroa.c, backend/gsroa.d)
15  */
16 
17 module dmd.backend.gsroa;
18 
19 version (SCPP)
20     version = COMPILE;
21 version (MARS)
22     version = COMPILE;
23 
24 version (COMPILE)
25 {
26 
27 import core.stdc.stdio;
28 import core.stdc.stdlib;
29 import core.stdc.string;
30 import core.stdc.time;
31 
32 import dmd.backend.cc;
33 import dmd.backend.cdef;
34 import dmd.backend.code_x86;
35 import dmd.backend.oper;
36 import dmd.backend.global;
37 import dmd.backend.el;
38 import dmd.backend.symtab;
39 import dmd.backend.ty;
40 import dmd.backend.type;
41 
42 import dmd.backend.dlist;
43 import dmd.backend.dvec;
44 
45 extern (C++):
46 
47 nothrow:
48 @safe:
49 
50 private enum log = false;       // print logging info
51 private enum enable = true;     // enable SROA
52 
53 int REGSIZE();
54 
55 alias SLICESIZE = REGSIZE;  // slices are all register-sized
56 enum MAXSLICES = 2;         // max # of pieces we can slice an aggregate into
57 
58 struct SymInfo
59 {
60     bool canSlice;
61     bool accessSlice;   // if Symbol was accessed as a slice
62     tym_t[MAXSLICES] ty; // type of each slice
63     SYMIDX si0;          // index of first slice, the rest follow sequentially
64 }
65 
66 /********************************
67  * Gather information about slice-able variables by scanning e.
68  * Params:
69  *      symtab = symbol table
70  *      e = expression to scan
71  *      sia = where to put gathered information
72  */
73 @trusted
74 extern (D) private void sliceStructs_Gather(ref const symtab_t symtab, SymInfo[] sia, const(elem)* e)
75 {
76     while (1)
77     {
78         switch (e.Eoper)
79         {
80             case OPvar:
81             {
82                 const si = e.EV.Vsym.Ssymnum;
83                 if (si != SYMIDX.max && sia[si].canSlice)
84                 {
85                     assert(si < symtab.length);
86                     const n = nthSlice(e);
87                     const sz = getSize(e);
88                     if (sz == 2 * SLICESIZE && !tyfv(e.Ety) &&
89                         tybasic(e.Ety) != TYldouble && tybasic(e.Ety) != TYildouble)
90                     {
91                         // Rewritten as OPpair later
92                     }
93                     else if (n != NOTSLICE)
94                     {
95                         if (!sia[si].accessSlice)
96                         {
97                             /* [1] default as pointer type
98                              */
99                             foreach (ref ty; sia[si].ty)
100                                 ty = TYnptr;
101 
102                             const s = e.EV.Vsym;
103                             const t = s.Stype;
104                             if (tybasic(t.Tty) == TYstruct)
105                             {
106                                 if (t.Ttag.Sstruct.Sflags & STRbitfields)
107                                 {
108                                     // Can get "used before set" errors from slicing this
109                                     // Would be workable if the symbol was flagged instead of the type
110                                     sia[si].canSlice = false;
111                                     return;
112                                 }
113 
114                                 if (const targ1 = t.Ttag.Sstruct.Sarg1type)
115                                     if (const targ2 = t.Ttag.Sstruct.Sarg2type)
116                                     {
117                                         sia[si].ty[0] = targ1.Tty;
118                                         sia[si].ty[1] = targ2.Tty;
119 
120                                         if (config.fpxmmregs &&
121                                              tyxmmreg(targ1.Tty) && !tyxmmreg(targ2.Tty) ||
122                                             !tyxmmreg(targ1.Tty) &&  tyxmmreg(targ2.Tty))
123 
124                                         {
125                                             /* https://issues.dlang.org/show_bug.cgi?22438
126                                              * disable till fixed
127                                              */
128                                             if (log) printf(" [%d] can't because xmmgpr or gprxmm\n", cast(int)si);
129                                             sia[si].canSlice = false;
130                                             return;
131                                         }
132                                     }
133                             }
134                             else if (tybasic(t.Tty) == TYarray)
135                             {
136                                 // could be an array of floats, deal with this later
137                                 if (log) printf(" [%d] can't because array of floats\n", cast(int)si);
138                                 sia[si].canSlice = false;
139                                 return;
140                             }
141                         }
142                         if (sz == SLICESIZE)
143                         {
144                             sia[si].ty[n] = tybasic(e.Ety);
145                             if (SLICESIZE == 4 && config.fpxmmregs && tyxmmreg(e.Ety))
146                             {
147                                 /* for 32 bits, OPstreq is converted to a TYllong.
148                                  * It needs to be converted to cfloat, otherwise XMM
149                                  * registers cannot be handled. This fails:
150                                  *   struct F { float x, y; }
151                                  *   void foo(F p1, ref F rfp) { rfp = F(p.x, p.y); }
152                                  */
153                                 if (log) printf(" [%d] can't because 32 bit XMM\n", cast(int)si);
154                                 sia[si].canSlice = false;
155                                 return;
156                             }
157                             if (config.fpxmmregs && tyxmmreg(e.Ety))
158                             {
159                                 /* Too many issues with mixing XMM with non-XMM
160                                  * One problem is an OPpair with one operand a long, the other XMM.
161                                  * Giving it up for now.
162                                  */
163                                 if (log) printf(" [%d] can't because XMM\n", cast(int)si);
164                                 sia[si].canSlice = false;
165                                 return;
166                             }
167                         }
168                         sia[si].accessSlice = true;
169                     }
170                     else
171                     {
172                         if (log) printf(" [%d] can't because NOTSLICE 1\n", cast(int)si);
173                         sia[si].canSlice = false;
174                     }
175                 }
176                 return;
177             }
178 
179             default:
180                 if (OTassign(e.Eoper))
181                 {
182                     if (OTbinary(e.Eoper))
183                         sliceStructs_Gather(symtab, sia, e.EV.E2);
184 
185                     // Assignment to a whole var will disallow SROA
186                     if (e.EV.E1.Eoper == OPvar)
187                     {
188                         const e1 = e.EV.E1;
189                         const si = e1.EV.Vsym.Ssymnum;
190                         if (si != SYMIDX.max && sia[si].canSlice)
191                         {
192                             assert(si < symtab.length);
193                             if (nthSlice(e1) == NOTSLICE)
194                             {
195                                 if (log)
196                                 {
197                                     printf(" [%d] can't because NOTSLICE 2\n", cast(int)si);
198                                     elem_print(e);
199                                 }
200                                 sia[si].canSlice = false;
201                             }
202                             // Disable SROA on OSX32 (because XMM registers?)
203                             // https://issues.dlang.org/show_bug.cgi?id=15206
204                             // https://github.com/dlang/dmd/pull/8034
205                             else if (!(config.exe & EX_OSX))
206                             {
207                                 sliceStructs_Gather(symtab, sia, e.EV.E1);
208                             }
209                         }
210                         return;
211                     }
212                     e = e.EV.E1;
213                     break;
214                 }
215                 if (OTunary(e.Eoper))
216                 {
217                     e = e.EV.E1;
218                     break;
219                 }
220                 if (OTbinary(e.Eoper))
221                 {
222                     sliceStructs_Gather(symtab, sia, e.EV.E2);
223                     e = e.EV.E1;
224                     break;
225                 }
226                 return;
227         }
228     }
229 }
230 
231 /***********************************
232  * Rewrite expression tree e based on info in sia[].
233  * Params:
234  *      symtab = symbol table
235  *      sia = slicing info
236  *      e = expression tree to rewrite in place
237  */
238 @trusted
239 extern (D) private void sliceStructs_Replace(ref symtab_t symtab, const SymInfo[] sia, elem *e)
240 {
241     while (1)
242     {
243         switch (e.Eoper)
244         {
245             case OPvar:
246             {
247                 Symbol *s = e.EV.Vsym;
248                 const si = s.Ssymnum;
249                 //printf("e: %d %d\n", si, sia[si].canSlice);
250                 //elem_print(e);
251                 if (si != SYMIDX.max && sia[si].canSlice)
252                 {
253                     const n = nthSlice(e);
254                     if (getSize(e) == 2 * SLICESIZE)
255                     {
256                         if (log) { printf("slicing struct before "); elem_print(e); }
257                         // Rewrite e as (si0 OPpair si0+1)
258                         elem *e1 = el_calloc();
259                         el_copy(e1, e);
260                         e1.Ety = sia[si].ty[0];
261 
262                         elem *e2 = el_calloc();
263                         el_copy(e2, e);
264                         Symbol *s1 = symtab[sia[si].si0 + 1]; // +1 for second slice
265                         e2.Ety = sia[si].ty[1];
266                         e2.EV.Vsym = s1;
267                         e2.EV.Voffset = 0;
268 
269                         e.Eoper = OPpair;
270                         e.EV.E1 = e1;
271                         e.EV.E2 = e2;
272 
273                         if (tycomplex(e.Ety))
274                         {
275                             /* Ensure complex OPpair operands are floating point types
276                              * because [1] may have defaulted them to a pointer type.
277                              * https://issues.dlang.org/show_bug.cgi?id=18936
278                              */
279                             tym_t tyop;
280                             switch (tybasic(e.Ety))
281                             {
282                                 case TYcfloat:   tyop = TYfloat;   break;
283                                 case TYcdouble:  tyop = TYdouble;  break;
284                                 case TYcldouble: tyop = TYldouble; break;
285                                 default:
286                                     assert(0);
287                             }
288                             if (!tyfloating(e1.Ety))
289                                 e1.Ety = tyop;
290                             if (!tyfloating(e2.Ety))
291                                 e2.Ety = tyop;
292                         }
293                         if (log) { printf("slicing struct after\n"); elem_print(e); }
294                     }
295                     else if (n == 0)  // the first slice of the symbol is the same as the original
296                     {
297                         if (log) { printf("slicing slice 0 "); elem_print(e); }
298                     }
299                     else // the nth slice
300                     {
301                         if (log) { printf("slicing slice %d ", n); elem_print(e); }
302                         e.EV.Vsym = symtab[sia[si].si0 + n];
303                         e.EV.Voffset -= n * SLICESIZE;
304                         //printf("replaced with:\n");
305                         //elem_print(e);
306                     }
307                 }
308                 return;
309             }
310 
311             case OPrelconst:
312             {
313                 Symbol *s = e.EV.Vsym;
314                 const si = s.Ssymnum;
315                 //printf("e: %d %d\n", si, sia[si].canSlice);
316                 //elem_print(e);
317                 if (si != SYMIDX.max && sia[si].canSlice)
318                 {
319                     printf("shouldn't be slicing %s\n", s.Sident.ptr);
320                     assert(0);
321                 }
322                 return;
323             }
324 
325             default:
326                 if (OTunary(e.Eoper))
327                 {
328                     e = e.EV.E1;
329                     break;
330                 }
331                 if (OTbinary(e.Eoper))
332                 {
333                     sliceStructs_Replace(symtab, sia, e.EV.E2);
334                     e = e.EV.E1;
335                     break;
336                 }
337                 return;
338         }
339     }
340 }
341 
342 @trusted
343 void sliceStructs(ref symtab_t symtab, block* startblock)
344 {
345 if (enable) // disable while we test the inliner
346 {
347     if (log) printf("\n************ sliceStructs() %s *******************\n", funcsym_p.Sident.ptr);
348     const sia_length = symtab.length;
349     /* 3 is because it is used for two arrays, sia[] and sia2[].
350      * sia2[] can grow to twice the size of sia[], as symbols can get split into two.
351      */
352     debug
353         enum tmp_length = 3;
354     else
355         enum tmp_length = 6;
356     SymInfo[tmp_length] tmp = void;
357 
358     import dmd.common.string : SmallBuffer;
359     auto sb = SmallBuffer!(SymInfo)(3 * sia_length, tmp[]);
360     SymInfo* sip = sb.ptr;
361     memset(sip, 0, 3 * sia_length * SymInfo.sizeof);
362     SymInfo[] sia = sip[0 .. sia_length];
363     SymInfo[] sia2 = sip[sia_length .. sia_length * 3];
364 
365     if (log) foreach (si; 0 .. symtab.length)
366     {
367         Symbol *s = symtab[si];
368         printf("[%d]: %p %d %s %s\n", cast(int)si, s, cast(int)type_size(s.Stype), s.Sident.ptr, tym_str(s.Stype.Tty));
369     }
370 
371     bool anySlice = false;
372     foreach (si; 0 .. symtab.length)
373     {
374         Symbol *s = symtab[si];
375         if (log) printf("slice1 [%d]: %s\n", cast(int)si, s.Sident.ptr);
376 
377         //if (strcmp(s.Sident.ptr, "__inlineretval3".ptr) == 0) { printf("can't\n"); sia[si].canSlice = false; continue; }
378         if (!(s.Sflags & SFLunambig))   // if somebody took the address of s
379         {
380             if (log) printf(" can't because SFLunambig\n");
381             sia[si].canSlice = false;
382             continue;
383         }
384 
385         const sz = type_size(s.Stype);
386         if (sz != 2 * SLICESIZE ||
387             tyvector(s.Stype.Tty) ||            // SIMD types
388             tyfv(s.Stype.Tty) || tybasic(s.Stype.Tty) == TYhptr)    // because there is no TYseg
389         {
390             if (log) printf(" can't because size or pointer type\n");
391             sia[si].canSlice = false;
392             continue;
393         }
394 
395         switch (s.Sclass)
396         {
397             case SC.fastpar:
398             case SC.register:
399             case SC.auto_:
400             case SC.shadowreg:
401             case SC.parameter:
402                 anySlice = true;
403                 sia[si].canSlice = true;
404                 sia[si].accessSlice = false;
405                 // We can't slice whole XMM registers
406                 if (tyxmmreg(s.Stype.Tty) &&
407                     isXMMreg(s.Spreg) && s.Spreg2 == NOREG)
408                 {
409                     if (log) printf(" can't because XMM reg\n");
410                     sia[si].canSlice = false;
411                 }
412                 break;
413 
414             case SC.stack:
415             case SC.pseudo:
416             case SC.static_:
417             case SC.bprel:
418                 if (log) printf(" can't because Sclass\n");
419                 sia[si].canSlice = false;
420                 break;
421 
422             default:
423                 symbol_print(s);
424                 assert(0);
425         }
426     }
427 
428     if (!anySlice)
429         return;
430 
431     foreach (b; BlockRange(startblock))
432     {
433         if (b.BC == BCasm)
434             return;
435         if (b.Belem)
436             sliceStructs_Gather(symtab, sia, b.Belem);
437     }
438 
439     {   // scope needed because of goto skipping declarations
440         bool any = false;
441         int n = 0;              // the number of symbols added
442         foreach (si; 0 .. sia_length)
443         {
444             sia2[si + n].canSlice = false;
445             if (sia[si].canSlice)
446             {
447                 // If never did access it as a slice, don't slice
448                 if (!sia[si].accessSlice)
449                 {
450                     if (log) printf(" can't slice %s because no accessSlice\n", symtab[si].Sident.ptr);
451                     sia[si].canSlice = false;
452                     continue;
453                 }
454 
455                 /* Split slice-able symbol sold into two symbols,
456                  * (sold,snew) in adjacent slots in the symbol table.
457                  */
458                 Symbol *sold = symtab[si + n];
459 
460                 const idlen = 2 + strlen(sold.Sident.ptr) + 2;
461                 char *id = cast(char *)malloc(idlen + 1);
462                 if (!id)
463                     err_nomem();
464                 const len = snprintf(id, idlen + 1, "__%s_%d", sold.Sident.ptr, SLICESIZE);
465                 assert(len == idlen);
466                 if (log) printf("retyping slice symbol %s %s\n", sold.Sident.ptr, tym_str(sia[si].ty[0]));
467                 if (log) printf("creating slice symbol %s %s\n", id, tym_str(sia[si].ty[1]));
468                 Symbol *snew = symbol_calloc(id[0 .. idlen]);
469                 free(id);
470                 snew.Sclass = sold.Sclass;
471                 snew.Sfl = sold.Sfl;
472                 snew.Sflags = sold.Sflags;
473                 if (snew.Sclass == SC.fastpar || snew.Sclass == SC.shadowreg)
474                 {
475                     snew.Spreg = sold.Spreg2;
476                     snew.Spreg2 = NOREG;
477                     sold.Spreg2 = NOREG;
478                 }
479                 type_free(sold.Stype);
480                 sold.Stype = type_fake(sia[si].ty[0]);
481                 sold.Stype.Tcount++;
482                 snew.Stype = type_fake(sia[si].ty[1]);
483                 snew.Stype.Tcount++;
484 
485                 // insert snew into symtab[si + n + 1]
486                 symbol_insert(symtab, snew, si + n + 1);
487 
488                 sia2[si + n].canSlice = true;
489                 sia2[si + n].si0 = si + n;
490                 sia2[si + n].ty[] = sia[si].ty[];
491                 ++n;
492                 any = true;
493             }
494         }
495         if (!any)
496             return;
497     }
498 
499     foreach (si; 0 .. symtab.length)
500     {
501         Symbol *s = symtab[si];
502         assert(s.Ssymnum == si);
503     }
504 
505     foreach (b; BlockRange(startblock))
506     {
507         if (b.Belem)
508             sliceStructs_Replace(symtab, sia2, b.Belem);
509     }
510 
511     static if (0)
512     {
513         printf("after slicing:\n");
514         foreach (b; BlockRange(startblock))
515         {
516             if (b.Belem)
517                 elem_print(b.Belem);
518         }
519         printf("after slicing done\n");
520     }
521 
522 }
523 }
524 
525 
526 /*************************************
527  * Determine if `e` is a slice.
528  * Params:
529  *      e = elem that may be a slice
530  * Returns:
531  *      slice number if it is, NOTSLICE if not
532  */
533 enum NOTSLICE = -1;
534 int nthSlice(const(elem)* e)
535 {
536     const sz = tysize(e.Ety); // not getSize(e) because type_fake(TYstruct) doesn't work
537     if (sz == -1)
538         return NOTSLICE;
539     const sliceSize = SLICESIZE;
540 
541     /* if sz is less than sliceSize, this causes problems because if, say,
542      * sz is 4 while sliceSize is 8, and sz gets enregistered, then assigning to
543      * the lower 4 bytes of sz will zero out the upper 4 bytes.
544      * https://github.com/dlang/dmd/pull/13220
545      */
546     if (sz != sliceSize)
547         return NOTSLICE;
548 
549     /* See if e fits in a slice
550      */
551     const lwr = e.EV.Voffset;
552     const upr = lwr + sz;
553     if (0 <= lwr && upr <= sliceSize)
554         return 0;
555     if (sliceSize <= lwr && upr <= sliceSize * 2)
556         return 1;
557 
558     return NOTSLICE;
559 }
560 
561 /******************************************
562  * Get size of an elem e.
563  */
564 private int getSize(const(elem)* e)
565 {
566     int sz = tysize(e.Ety);
567     if (sz == -1 && e.ET && (tybasic(e.Ety) == TYstruct || tybasic(e.Ety) == TYarray))
568         sz = cast(int)type_size(e.ET);
569     return sz;
570 }
571 
572 }