1 /**
2  * Support for lexical scope of local variables
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 2015-2023 by The D Language Foundation, All Rights Reserved
8  * Authors:     Rainer Schuetze
9  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
10  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/dvarstats.d, backend/dvarstats.d)
11  */
12 
13 module dmd.backend.dvarstats;
14 
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 
18 import dmd.backend.cc;
19 import dmd.backend.cdef;
20 import dmd.backend.global;
21 import dmd.backend.code;
22 import dmd.backend.symtab;
23 import dmd.backend.barray;
24 
25 extern (C++):
26 
27 nothrow:
28 
29 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
30 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
31 
32 version (all) // free function version
33 {
34     import dmd.backend.dvarstats;
35 
36     void varStats_writeSymbolTable(ref symtab_t symtab,
37             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
38             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
39     {
40         varStats.writeSymbolTable(symtab, fnWriteVar, fnEndArgs, fnBeginBlock, fnEndBlock);
41     }
42 
43     void varStats_startFunction()
44     {
45         varStats.startFunction();
46     }
47 
48     void varStats_recordLineOffset(Srcpos src, targ_size_t off)
49     {
50         varStats.recordLineOffset(src, off);
51     }
52 
53     __gshared VarStatistics varStats;
54 }
55 
56 
57 // estimate of variable life time
58 struct LifeTime
59 {
60     Symbol* sym;
61     int offCreate;  // variable created before this code offset
62     int offDestroy; // variable destroyed after this code offset
63 }
64 
65 struct LineOffset
66 {
67     targ_size_t offset;
68     uint linnum;
69     uint diffNextOffset;
70 }
71 
72 struct VarStatistics
73 {
74 private:
75 nothrow:
76     Barray!LifeTime lifeTimes;
77 
78     // symbol table sorted by offset of variable creation
79     symtab_t sortedSymtab;
80     Barray!SYMIDX nextSym;      // next symbol with identifier with same hash, same size as sortedSymtab
81     int uniquecnt;        // number of variables that have unique name and don't need lexical scope
82 
83     // line number records for the current function
84     Barray!LineOffset lineOffsets;
85     const(char)* srcfile;  // only one file supported, no inline
86 
87 public void startFunction()
88 {
89     lineOffsets.setLength(0);
90     srcfile = null;
91 }
92 
93 // figure if can we can add a lexical scope for the variable
94 // (this should exclude variables from inlined functions as there is
95 //  no support for gathering stats from different files)
96 private bool isLexicalScopeVar(Symbol* sa)
97 {
98     if (sa.lposscopestart.Slinnum <= 0 || sa.lposscopestart.Slinnum > sa.lnoscopeend)
99         return false;
100 
101     // is it inside the function? Unfortunately we cannot verify the source file in case of inlining
102     if (sa.lposscopestart.Slinnum < funcsym_p.Sfunc.Fstartline.Slinnum)
103         return false;
104     if (sa.lnoscopeend > funcsym_p.Sfunc.Fendline.Slinnum)
105         return false;
106 
107     return true;
108 }
109 
110 // compare function to sort symbols by line offsets of their creation
111 private extern (C) static int cmpLifeTime(scope const void* p1, scope const void* p2)
112 {
113     const LifeTime* lt1 = cast(const(LifeTime)*)p1;
114     const LifeTime* lt2 = cast(const(LifeTime)*)p2;
115 
116     return lt1.offCreate - lt2.offCreate;
117 }
118 
119 // a parent scope contains the creation offset of the child scope
120 private static extern(D) SYMIDX isParentScope(ref Barray!LifeTime lifetimes, SYMIDX parent, SYMIDX si)
121 {
122     if (parent == SYMIDX.max) // full function
123         return true;
124     return lifetimes[parent].offCreate <= lifetimes[si].offCreate &&
125            lifetimes[parent].offDestroy > lifetimes[si].offCreate;
126 }
127 
128 // find a symbol that includes the creation of the given symbol as part of its life time
129 private static extern(D) SYMIDX findParentScope(ref Barray!LifeTime lifetimes, SYMIDX si)
130 {
131     if (si != SYMIDX.max)
132     {
133         for(SYMIDX sj = si; sj; --sj)
134             if(isParentScope(lifetimes, sj - 1, si))
135                 return sj;
136     }
137     return SYMIDX.max;
138 }
139 
140 private static int getHash(const(char)* s)
141 {
142     int hash = 0;
143     for (; *s; s++)
144         hash = hash * 11 + *s;
145     return hash;
146 }
147 
148 private bool hashSymbolIdentifiers(ref symtab_t symtab)
149 {
150     // build circular-linked lists of symbols with same identifier hash
151     bool hashCollisions = false;
152     SYMIDX[256] firstSym = SYMIDX.max;
153     for (SYMIDX si = 0; si < symtab.length; si++)
154     {
155         Symbol* sa = symtab[si];
156         int hash = getHash(sa.Sident.ptr) & 255;
157         SYMIDX first = firstSym[hash];
158         if (first == SYMIDX.max)
159         {
160             // connect full circle, so we don't have to recalculate the hash
161             nextSym[si] = si;
162             firstSym[hash] = si;
163         }
164         else
165         {
166             // insert after first entry
167             nextSym[si] = nextSym[first];
168             nextSym[first] = si;
169             hashCollisions = true;
170         }
171     }
172     return hashCollisions;
173 }
174 
175 private bool hasUniqueIdentifier(ref symtab_t symtab, SYMIDX si)
176 {
177     Symbol* sa = symtab[si];
178     for (SYMIDX sj = nextSym[si]; sj != si; sj = nextSym[sj])
179         if (strcmp(sa.Sident.ptr, symtab[sj].Sident.ptr) == 0)
180             return false;
181     return true;
182 }
183 
184 // gather statistics about creation and destructions of variables that are
185 //  used by the current function
186 private symtab_t* calcLexicalScope(return ref symtab_t symtab) return
187 {
188     // make a copy of the symbol table
189     // - arguments should be kept at the very beginning
190     // - variables with unique name come first (will be emitted with full function scope)
191     // - variables with duplicate names are added with ascending code offset
192     nextSym.setLength(symtab.length);
193     if (sortedSymtab.symmax < symtab.length)
194     {
195         sortedSymtab.tab = cast(Symbol**) util_realloc(sortedSymtab.tab, symtab.length, (Symbol*).sizeof);
196         sortedSymtab.symmax = symtab.length;
197     }
198     sortedSymtab.length = symtab.length;
199 
200     if (!hashSymbolIdentifiers(symtab))
201     {
202         // without any collisions, there are no duplicate symbol names, so bail out early
203         uniquecnt = cast(int)symtab.length;
204         return &symtab;
205     }
206 
207     SYMIDX argcnt;
208     for (argcnt = 0; argcnt < symtab.length; argcnt++)
209     {
210         Symbol* sa = symtab[argcnt];
211         if (sa.Sclass != SC.parameter && sa.Sclass != SC.regpar && sa.Sclass != SC.fastpar && sa.Sclass != SC.shadowreg)
212             break;
213         sortedSymtab[argcnt] = sa;
214     }
215 
216     // find symbols with identical names, only these need lexical scope
217     uniquecnt = cast(int)argcnt;
218     SYMIDX dupcnt = 0;
219     for (SYMIDX sj, si = argcnt; si < symtab.length; si++)
220     {
221         Symbol* sa = symtab[si];
222         if (!isLexicalScopeVar(sa) || hasUniqueIdentifier(symtab, si))
223             sortedSymtab[uniquecnt++] = sa;
224         else
225             sortedSymtab[symtab.length - 1 - dupcnt++] = sa; // fill from the top
226     }
227     sortedSymtab.length = symtab.length;
228     if(dupcnt == 0)
229         return &symtab;
230 
231     sortLineOffsets();
232 
233     // precalc the lexical blocks to emit so that identically named symbols don't overlap
234     lifeTimes.setLength(dupcnt);
235 
236     for (SYMIDX si = 0; si < dupcnt; si++)
237     {
238         lifeTimes[si].sym = sortedSymtab[uniquecnt + si];
239         lifeTimes[si].offCreate = cast(int)getLineOffset(lifeTimes[si].sym.lposscopestart.Slinnum);
240         lifeTimes[si].offDestroy = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopeend);
241     }
242     qsort(lifeTimes[].ptr, dupcnt, (lifeTimes[0]).sizeof, &cmpLifeTime);
243 
244     // ensure that an inner block does not extend beyond the end of a parent block
245     for (SYMIDX si = 0; si < dupcnt; si++)
246     {
247         SYMIDX sj = findParentScope(lifeTimes, si);
248         if(sj != SYMIDX.max && lifeTimes[si].offDestroy > lifeTimes[sj].offDestroy)
249             lifeTimes[si].offDestroy = lifeTimes[sj].offDestroy;
250     }
251 
252     // extend life time to the creation of the next symbol that is not contained in the parent scope
253     // or that has the same name
254     for (SYMIDX sj, si = 0; si < dupcnt; si++)
255     {
256         SYMIDX parent = findParentScope(lifeTimes, si);
257 
258         for (sj = si + 1; sj < dupcnt; sj++)
259             if(!isParentScope(lifeTimes, parent, sj))
260                 break;
261             else if (strcmp(lifeTimes[si].sym.Sident.ptr, lifeTimes[sj].sym.Sident.ptr) == 0)
262                 break;
263 
264         lifeTimes[si].offDestroy = cast(int)(sj < dupcnt ? lifeTimes[sj].offCreate : retoffset + retsize); // function length
265     }
266 
267     // store duplicate symbols back with new ordering
268     for (SYMIDX si = 0; si < dupcnt; si++)
269         sortedSymtab[uniquecnt + si] = lifeTimes[si].sym;
270 
271     return &sortedSymtab;
272 }
273 
274 public void writeSymbolTable(ref symtab_t symtab,
275             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
276             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
277 {
278     auto symtab2 = calcLexicalScope(symtab);
279 
280     int openBlocks = 0;
281     int lastOffset = 0;
282 
283     // Write local symbol table
284     bool endarg = false;
285     for (SYMIDX si = 0; si < symtab2.length; si++)
286     {
287         Symbol *sa = (*symtab2)[si];
288         if (endarg == false &&
289             sa.Sclass != SC.parameter &&
290             sa.Sclass != SC.fastpar &&
291             sa.Sclass != SC.regpar &&
292             sa.Sclass != SC.shadowreg)
293         {
294             if(fnEndArgs)
295                 (*fnEndArgs)();
296             endarg = true;
297         }
298         if (si >= uniquecnt)
299         {
300             int off = lifeTimes[si - uniquecnt].offCreate;
301             // close scopes that end before the creation of this symbol
302             for (SYMIDX sj = si; sj > uniquecnt; --sj)
303             {
304                 if (lastOffset < lifeTimes[sj - 1 - uniquecnt].offDestroy && lifeTimes[sj - 1 - uniquecnt].offDestroy <= off)
305                 {
306                     assert(openBlocks > 0);
307                     if(fnEndBlock)
308                         (*fnEndBlock)();
309                     openBlocks--;
310                 }
311             }
312             int len = lifeTimes[si - uniquecnt].offDestroy - off;
313             // don't emit a block for length 0, it isn't captured by the close condition above
314             if (len > 0)
315             {
316                 if(fnBeginBlock)
317                     (*fnBeginBlock)(off, len);
318                 openBlocks++;
319             }
320             lastOffset = off;
321         }
322         (*fnWriteVar)(sa);
323     }
324 
325     while (openBlocks > 0)
326     {
327         if(fnEndBlock)
328             (*fnEndBlock)();
329         openBlocks--;
330     }
331 }
332 
333 // compare function to sort line offsets ascending by line (and offset on identical line)
334 private extern (C) static int cmpLineOffsets(scope const void* off1, scope const void* off2)
335 {
336     const LineOffset* loff1 = cast(const(LineOffset)*)off1;
337     const LineOffset* loff2 = cast(const(LineOffset)*)off2;
338 
339     if (loff1.linnum == loff2.linnum)
340         return cast(int)(loff1.offset - loff2.offset);
341     return loff1.linnum - loff2.linnum;
342 }
343 
344 private void sortLineOffsets()
345 {
346     if (lineOffsets.length == 0)
347         return;
348 
349     // remember the offset to the next recorded offset on another line
350     for (int i = 1; i < lineOffsets.length; i++)
351         lineOffsets[i-1].diffNextOffset = cast(uint)(lineOffsets[i].offset - lineOffsets[i-1].offset);
352     lineOffsets[lineOffsets.length - 1].diffNextOffset = cast(uint)(retoffset + retsize - lineOffsets[lineOffsets.length - 1].offset);
353 
354     // sort line records and remove duplicate lines preferring smaller offsets
355     qsort(lineOffsets[].ptr, lineOffsets.length, (lineOffsets[0]).sizeof, &cmpLineOffsets);
356     int j = 0;
357     for (int i = 1; i < lineOffsets.length; i++)
358         if (lineOffsets[i].linnum > lineOffsets[j].linnum)
359             lineOffsets[++j] = lineOffsets[i];
360     lineOffsets.setLength(j + 1);
361 }
362 
363 private targ_size_t getLineOffset(int linnum)
364 {
365     int idx = findLineIndex(linnum);
366     if (idx >= lineOffsets.length || lineOffsets[idx].linnum < linnum)
367         return retoffset + retsize; // function length
368     if (idx > 0 && lineOffsets[idx].linnum != linnum)
369         // for inexact line numbers, use the offset following the previous line
370         return lineOffsets[idx-1].offset + lineOffsets[idx-1].diffNextOffset;
371     return lineOffsets[idx].offset;
372 }
373 
374 // return the first record index in the lineOffsets array with linnum >= line
375 private int findLineIndex(uint line)
376 {
377     int low = 0;
378     int high = cast(int)lineOffsets.length;
379     while (low < high)
380     {
381         int mid = low + ((high - low) >> 1);
382         int ln = lineOffsets[mid].linnum;
383         if (line < ln)
384             high = mid;
385         else if (line > ln)
386             low = mid + 1;
387         else
388             return mid;
389     }
390     return low;
391 }
392 
393 public void recordLineOffset(Srcpos src, targ_size_t off)
394 {
395     version (MARS)
396         const sfilename = src.Sfilename;
397     else
398         const sfilename = srcpos_name(src);
399 
400     // only record line numbers from one file, symbol info does not include source file
401     if (!sfilename || !src.Slinnum)
402         return;
403     if (!srcfile)
404         srcfile = sfilename;
405     if (srcfile != sfilename && strcmp(srcfile, sfilename) != 0)
406         return;
407 
408     // assume ascending code offsets generated during codegen, ignore any other
409     //  (e.g. there is an additional line number emitted at the end of the function
410     //   or multiple line numbers at the same offset)
411     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].offset >= off)
412         return;
413 
414     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].linnum == src.Slinnum)
415     {
416         // optimize common case: new offset on same line
417         return;
418     }
419     // don't care for lineOffsets being ordered now, that is taken care of later (calcLexicalScope)
420     LineOffset* linoff = lineOffsets.push();
421     linoff.linnum = src.Slinnum;
422     linoff.offset = off;
423     linoff.diffNextOffset = 0;
424 }
425 
426 }