1 /**
2  * A specialized associative array with string keys stored in a variable length structure.
3  *
4  * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:   Walter Bright, https://www.digitalmars.com
6  * License:   $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/stringtable.d, root/_stringtable.d)
8  * Documentation:  https://dlang.org/phobos/dmd_root_stringtable.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/stringtable.d
10  */
11 
12 module dmd.root.stringtable;
13 
14 import core.stdc.string;
15 import dmd.root.rmem, dmd.root.hash;
16 
17 private enum POOL_BITS = 12;
18 private enum POOL_SIZE = (1U << POOL_BITS);
19 
20 /*
21 Returns the smallest integer power of 2 larger than val.
22 if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an
23 endless loop because of overflow.
24 */
25 private size_t nextpow2(size_t val) @nogc nothrow pure @safe
26 {
27     size_t res = 1;
28     while (res < val)
29         res <<= 1;
30     return res;
31 }
32 
33 unittest
34 {
35     assert(nextpow2(0) == 1);
36     assert(nextpow2(0xFFFF) == (1 << 16));
37     assert(nextpow2(size_t.max / 2) == size_t.max / 2 + 1);
38     // note: nextpow2((1UL << 63) + 1) results in an endless loop
39 }
40 
41 private enum loadFactorNumerator = 8;
42 private enum loadFactorDenominator = 10;        // for a load factor of 0.8
43 
44 private struct StringEntry
45 {
46     uint hash;
47     uint vptr;
48 }
49 
50 /********************************
51  * StringValue is a variable-length structure. It has neither proper c'tors nor a
52  * factory method because the only thing which should be creating these is StringTable.
53  * The string characters are stored in memory immediately after the StringValue struct.
54  */
55 struct StringValue(T)
56 {
57     T value; //T is/should typically be a pointer or a slice
58     private size_t length;
59     /+
60     char[length] chars;  // the string characters are stored here
61      +/
62 
63     char* lstring() @nogc nothrow pure return
64     {
65         return cast(char*)(&this + 1);
66     }
67 
68     size_t len() const @nogc nothrow pure @safe
69     {
70         return length;
71     }
72 
73     const(char)* toDchars() const @nogc nothrow pure return
74     {
75         return cast(const(char)*)(&this + 1);
76     }
77 
78     /// Returns: The content of this entry as a D slice
79     const(char)[] toString() const @nogc nothrow pure
80     {
81         return (cast(inout(char)*)(&this + 1))[0 .. length];
82     }
83 }
84 
85 struct StringTable(T)
86 {
87 private:
88     StringEntry[] table;
89     ubyte*[] pools;
90     size_t nfill;
91     size_t count;
92     size_t countTrigger;   // amount which will trigger growing the table
93 
94 public:
95     void _init(size_t size = 0) nothrow pure
96     {
97         size = nextpow2((size * loadFactorDenominator) / loadFactorNumerator);
98         if (size < 32)
99             size = 32;
100         table = (cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof))[0 .. size];
101         countTrigger = (table.length * loadFactorNumerator) / loadFactorDenominator;
102         pools = null;
103         nfill = 0;
104         count = 0;
105     }
106 
107     void reset(size_t size = 0) nothrow pure
108     {
109         freeMem();
110         _init(size);
111     }
112 
113     ~this() nothrow pure
114     {
115         freeMem();
116     }
117 
118     /**
119     Looks up the given string in the string table and returns its associated
120     value.
121 
122     Params:
123      s = the string to look up
124      length = the length of $(D_PARAM s)
125      str = the string to look up
126 
127     Returns: the string's associated value, or `null` if the string doesn't
128      exist in the string table
129     */
130     inout(StringValue!T)* lookup(scope const(char)[] str) inout @nogc nothrow pure
131     {
132         const(size_t) hash = calcHash(str);
133         const(size_t) i = findSlot(hash, str);
134         // printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null);
135         return getValue(table[i].vptr);
136     }
137 
138     /// ditto
139     inout(StringValue!T)* lookup(scope const(char)* s, size_t length) inout @nogc nothrow pure
140     {
141         return lookup(s[0 .. length]);
142     }
143 
144     /**
145     Inserts the given string and the given associated value into the string
146     table.
147 
148     Params:
149      s = the string to insert
150      length = the length of $(D_PARAM s)
151      ptrvalue = the value to associate with the inserted string
152      str = the string to insert
153      value = the value to associate with the inserted string
154 
155     Returns: the newly inserted value, or `null` if the string table already
156      contains the string
157     */
158     StringValue!(T)* insert(scope const(char)[] str, T value) nothrow pure
159     {
160         const(size_t) hash = calcHash(str);
161         size_t i = findSlot(hash, str);
162         if (table[i].vptr)
163             return null; // already in table
164         if (++count > countTrigger)
165         {
166             grow();
167             i = findSlot(hash, str);
168         }
169         table[i].hash = hash;
170         table[i].vptr = allocValue(str, value);
171         // printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
172         return getValue(table[i].vptr);
173     }
174 
175     /// ditto
176     StringValue!(T)* insert(scope const(char)* s, size_t length, T value) nothrow pure
177     {
178         return insert(s[0 .. length], value);
179     }
180 
181     StringValue!(T)* update(scope const(char)[] str) nothrow pure
182     {
183         const(size_t) hash = calcHash(str);
184         size_t i = findSlot(hash, str);
185         if (!table[i].vptr)
186         {
187             if (++count > countTrigger)
188             {
189                 grow();
190                 i = findSlot(hash, str);
191             }
192             table[i].hash = hash;
193             table[i].vptr = allocValue(str, T.init);
194         }
195         // printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
196         return getValue(table[i].vptr);
197     }
198 
199     StringValue!(T)* update(scope const(char)* s, size_t length) nothrow pure
200     {
201         return update(s[0 .. length]);
202     }
203 
204     /********************************
205      * Walk the contents of the string table,
206      * calling fp for each entry.
207      * Params:
208      *      fp = function to call. Returns !=0 to stop
209      * Returns:
210      *      last return value of fp call
211      */
212     int apply(int function(const(StringValue!T)*) nothrow fp) nothrow
213     {
214         foreach (const se; table)
215         {
216             if (!se.vptr)
217                 continue;
218             const sv = getValue(se.vptr);
219             int result = (*fp)(sv);
220             if (result)
221                 return result;
222         }
223         return 0;
224     }
225 
226     /// ditto
227     int opApply(scope int delegate(const(StringValue!T)*) nothrow dg) nothrow
228     {
229         foreach (const se; table)
230         {
231             if (!se.vptr)
232                 continue;
233             const sv = getValue(se.vptr);
234             int result = dg(sv);
235             if (result)
236                 return result;
237         }
238         return 0;
239     }
240 
241 private:
242     /// Free all memory in use by this StringTable
243     void freeMem() nothrow pure
244     {
245         foreach (pool; pools)
246             mem.xfree(pool);
247         mem.xfree(table.ptr);
248         mem.xfree(pools.ptr);
249         table = null;
250         pools = null;
251     }
252 
253     // Note that a copy is made of str
254     uint allocValue(scope const(char)[] str, T value) nothrow pure
255     {
256         const(size_t) nbytes = (StringValue!T).sizeof + str.length + 1;
257         if (!pools.length || nfill + nbytes > POOL_SIZE)
258         {
259             pools = (cast(ubyte**) mem.xrealloc(pools.ptr, (pools.length + 1) * (pools[0]).sizeof))[0 .. pools.length + 1];
260             pools[$-1] = cast(ubyte*) mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE);
261             if (mem.isGCEnabled)
262                 memset(pools[$ - 1], 0xff, POOL_SIZE); // 0xff less likely to produce GC pointer
263             nfill = 0;
264         }
265         StringValue!(T)* sv = cast(StringValue!(T)*)&pools[$ - 1][nfill];
266         sv.value = value;
267         sv.length = str.length;
268         .memcpy(sv.lstring(), str.ptr, str.length);
269         sv.lstring()[str.length] = 0;
270         const(uint) vptr = cast(uint)(pools.length << POOL_BITS | nfill);
271         nfill += nbytes + (-nbytes & 7); // align to 8 bytes
272         return vptr;
273     }
274 
275     inout(StringValue!T)* getValue(uint vptr) inout @nogc nothrow pure
276     {
277         if (!vptr)
278             return null;
279         const(size_t) idx = (vptr >> POOL_BITS) - 1;
280         const(size_t) off = vptr & POOL_SIZE - 1;
281         return cast(inout(StringValue!T)*)&pools[idx][off];
282     }
283 
284     size_t findSlot(hash_t hash, scope const(char)[] str) const @nogc nothrow pure
285     {
286         // quadratic probing using triangular numbers
287         // https://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
288         for (size_t i = hash & (table.length - 1), j = 1;; ++j)
289         {
290             const(StringValue!T)* sv;
291             auto vptr = table[i].vptr;
292             if (!vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0)
293                 return i;
294             i = (i + j) & (table.length - 1);
295         }
296     }
297 
298     void grow() nothrow pure
299     {
300         const odim = table.length;
301         auto otab = table;
302         const ndim = table.length * 2;
303         countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator;
304         table = (cast(StringEntry*)mem.xcalloc_noscan(ndim, (table[0]).sizeof))[0 .. ndim];
305         foreach (const se; otab[0 .. odim])
306         {
307             if (!se.vptr)
308                 continue;
309             const sv = getValue(se.vptr);
310             table[findSlot(se.hash, sv.toString())] = se;
311         }
312         mem.xfree(otab.ptr);
313     }
314 }
315 
316 nothrow unittest
317 {
318     StringTable!(const(char)*) tab;
319     tab._init(10);
320 
321     // construct two strings with the same text, but a different pointer
322     const(char)[6] fooBuffer = "foofoo";
323     const(char)[] foo = fooBuffer[0 .. 3];
324     const(char)[] fooAltPtr = fooBuffer[3 .. 6];
325 
326     assert(foo.ptr != fooAltPtr.ptr);
327 
328     // first insertion returns value
329     assert(tab.insert(foo, foo.ptr).value == foo.ptr);
330 
331     // subsequent insertion of same string return null
332     assert(tab.insert(foo.ptr, foo.length, foo.ptr) == null);
333     assert(tab.insert(fooAltPtr, foo.ptr) == null);
334 
335     const lookup = tab.lookup("foo");
336     assert(lookup.value == foo.ptr);
337     assert(lookup.len == 3);
338     assert(lookup.toString() == "foo");
339 
340     assert(tab.lookup("bar") == null);
341     tab.update("bar".ptr, "bar".length);
342     assert(tab.lookup("bar").value == null);
343 
344     tab.reset(0);
345     assert(tab.lookup("foo".ptr, "foo".length) == null);
346     //tab.insert("bar");
347 }
348 
349 nothrow unittest
350 {
351     StringTable!(void*) tab;
352     tab._init(100);
353 
354     enum testCount = 2000;
355 
356     char[2 * testCount] buf;
357 
358     foreach(i; 0 .. testCount)
359     {
360         buf[i * 2 + 0] = cast(char) (i % 256);
361         buf[i * 2 + 1] = cast(char) (i / 256);
362         auto toInsert = cast(const(char)[]) buf[i * 2 .. i * 2 + 2];
363         tab.insert(toInsert, cast(void*) i);
364     }
365 
366     foreach(i; 0 .. testCount)
367     {
368         auto toLookup = cast(const(char)[]) buf[i * 2 .. i * 2 + 2];
369         assert(tab.lookup(toLookup).value == cast(void*) i);
370     }
371 }
372 
373 nothrow unittest
374 {
375     StringTable!(int) tab;
376     tab._init(10);
377     tab.insert("foo",  4);
378     tab.insert("bar",  6);
379 
380     static int resultFp = 0;
381     int resultDg = 0;
382     static bool returnImmediately = false;
383 
384     int function(const(StringValue!int)*) nothrow applyFunc = (const(StringValue!int)* s)
385     {
386         resultFp += s.value;
387         return returnImmediately;
388     };
389 
390     scope int delegate(const(StringValue!int)*) nothrow applyDeleg = (const(StringValue!int)* s)
391     {
392         resultDg += s.value;
393         return returnImmediately;
394     };
395 
396     tab.apply(applyFunc);
397     tab.opApply(applyDeleg);
398 
399     assert(resultDg == 10);
400     assert(resultFp == 10);
401 
402     returnImmediately = true;
403 
404     tab.apply(applyFunc);
405     tab.opApply(applyDeleg);
406 
407     // Order of string table iteration is not specified, either foo or bar could
408     // have been visited first.
409     assert(resultDg == 14 || resultDg == 16);
410     assert(resultFp == 14 || resultFp == 16);
411 }