1 /** 2 * A specialized associative array with string keys stored in a variable length structure. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: Walter Bright, https://www.digitalmars.com 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/stringtable.d, root/_stringtable.d) 8 * Documentation: https://dlang.org/phobos/dmd_root_stringtable.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/stringtable.d 10 */ 11 12 module dmd.root.stringtable; 13 14 import core.stdc.string; 15 import dmd.root.rmem, dmd.root.hash; 16 17 private enum POOL_BITS = 12; 18 private enum POOL_SIZE = (1U << POOL_BITS); 19 20 /* 21 Returns the smallest integer power of 2 larger than val. 22 if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an 23 endless loop because of overflow. 24 */ 25 private size_t nextpow2(size_t val) @nogc nothrow pure @safe 26 { 27 size_t res = 1; 28 while (res < val) 29 res <<= 1; 30 return res; 31 } 32 33 unittest 34 { 35 assert(nextpow2(0) == 1); 36 assert(nextpow2(0xFFFF) == (1 << 16)); 37 assert(nextpow2(size_t.max / 2) == size_t.max / 2 + 1); 38 // note: nextpow2((1UL << 63) + 1) results in an endless loop 39 } 40 41 private enum loadFactorNumerator = 8; 42 private enum loadFactorDenominator = 10; // for a load factor of 0.8 43 44 private struct StringEntry 45 { 46 uint hash; 47 uint vptr; 48 } 49 50 /******************************** 51 * StringValue is a variable-length structure. It has neither proper c'tors nor a 52 * factory method because the only thing which should be creating these is StringTable. 53 * The string characters are stored in memory immediately after the StringValue struct. 54 */ 55 struct StringValue(T) 56 { 57 T value; //T is/should typically be a pointer or a slice 58 private size_t length; 59 /+ 60 char[length] chars; // the string characters are stored here 61 +/ 62 63 char* lstring() @nogc nothrow pure return 64 { 65 return cast(char*)(&this + 1); 66 } 67 68 size_t len() const @nogc nothrow pure @safe 69 { 70 return length; 71 } 72 73 const(char)* toDchars() const @nogc nothrow pure return 74 { 75 return cast(const(char)*)(&this + 1); 76 } 77 78 /// Returns: The content of this entry as a D slice 79 const(char)[] toString() const @nogc nothrow pure 80 { 81 return (cast(inout(char)*)(&this + 1))[0 .. length]; 82 } 83 } 84 85 struct StringTable(T) 86 { 87 private: 88 StringEntry[] table; 89 ubyte*[] pools; 90 size_t nfill; 91 size_t count; 92 size_t countTrigger; // amount which will trigger growing the table 93 94 public: 95 void _init(size_t size = 0) nothrow pure 96 { 97 size = nextpow2((size * loadFactorDenominator) / loadFactorNumerator); 98 if (size < 32) 99 size = 32; 100 table = (cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof))[0 .. size]; 101 countTrigger = (table.length * loadFactorNumerator) / loadFactorDenominator; 102 pools = null; 103 nfill = 0; 104 count = 0; 105 } 106 107 void reset(size_t size = 0) nothrow pure 108 { 109 freeMem(); 110 _init(size); 111 } 112 113 ~this() nothrow pure 114 { 115 freeMem(); 116 } 117 118 /** 119 Looks up the given string in the string table and returns its associated 120 value. 121 122 Params: 123 s = the string to look up 124 length = the length of $(D_PARAM s) 125 str = the string to look up 126 127 Returns: the string's associated value, or `null` if the string doesn't 128 exist in the string table 129 */ 130 inout(StringValue!T)* lookup(scope const(char)[] str) inout @nogc nothrow pure 131 { 132 const(size_t) hash = calcHash(str); 133 const(size_t) i = findSlot(hash, str); 134 // printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null); 135 return getValue(table[i].vptr); 136 } 137 138 /// ditto 139 inout(StringValue!T)* lookup(scope const(char)* s, size_t length) inout @nogc nothrow pure 140 { 141 return lookup(s[0 .. length]); 142 } 143 144 /** 145 Inserts the given string and the given associated value into the string 146 table. 147 148 Params: 149 s = the string to insert 150 length = the length of $(D_PARAM s) 151 ptrvalue = the value to associate with the inserted string 152 str = the string to insert 153 value = the value to associate with the inserted string 154 155 Returns: the newly inserted value, or `null` if the string table already 156 contains the string 157 */ 158 StringValue!(T)* insert(scope const(char)[] str, T value) nothrow pure 159 { 160 const(size_t) hash = calcHash(str); 161 size_t i = findSlot(hash, str); 162 if (table[i].vptr) 163 return null; // already in table 164 if (++count > countTrigger) 165 { 166 grow(); 167 i = findSlot(hash, str); 168 } 169 table[i].hash = hash; 170 table[i].vptr = allocValue(str, value); 171 // printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL); 172 return getValue(table[i].vptr); 173 } 174 175 /// ditto 176 StringValue!(T)* insert(scope const(char)* s, size_t length, T value) nothrow pure 177 { 178 return insert(s[0 .. length], value); 179 } 180 181 StringValue!(T)* update(scope const(char)[] str) nothrow pure 182 { 183 const(size_t) hash = calcHash(str); 184 size_t i = findSlot(hash, str); 185 if (!table[i].vptr) 186 { 187 if (++count > countTrigger) 188 { 189 grow(); 190 i = findSlot(hash, str); 191 } 192 table[i].hash = hash; 193 table[i].vptr = allocValue(str, T.init); 194 } 195 // printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL); 196 return getValue(table[i].vptr); 197 } 198 199 StringValue!(T)* update(scope const(char)* s, size_t length) nothrow pure 200 { 201 return update(s[0 .. length]); 202 } 203 204 /******************************** 205 * Walk the contents of the string table, 206 * calling fp for each entry. 207 * Params: 208 * fp = function to call. Returns !=0 to stop 209 * Returns: 210 * last return value of fp call 211 */ 212 int apply(int function(const(StringValue!T)*) nothrow fp) nothrow 213 { 214 foreach (const se; table) 215 { 216 if (!se.vptr) 217 continue; 218 const sv = getValue(se.vptr); 219 int result = (*fp)(sv); 220 if (result) 221 return result; 222 } 223 return 0; 224 } 225 226 /// ditto 227 int opApply(scope int delegate(const(StringValue!T)*) nothrow dg) nothrow 228 { 229 foreach (const se; table) 230 { 231 if (!se.vptr) 232 continue; 233 const sv = getValue(se.vptr); 234 int result = dg(sv); 235 if (result) 236 return result; 237 } 238 return 0; 239 } 240 241 private: 242 /// Free all memory in use by this StringTable 243 void freeMem() nothrow pure 244 { 245 foreach (pool; pools) 246 mem.xfree(pool); 247 mem.xfree(table.ptr); 248 mem.xfree(pools.ptr); 249 table = null; 250 pools = null; 251 } 252 253 // Note that a copy is made of str 254 uint allocValue(scope const(char)[] str, T value) nothrow pure 255 { 256 const(size_t) nbytes = (StringValue!T).sizeof + str.length + 1; 257 if (!pools.length || nfill + nbytes > POOL_SIZE) 258 { 259 pools = (cast(ubyte**) mem.xrealloc(pools.ptr, (pools.length + 1) * (pools[0]).sizeof))[0 .. pools.length + 1]; 260 pools[$-1] = cast(ubyte*) mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE); 261 if (mem.isGCEnabled) 262 memset(pools[$ - 1], 0xff, POOL_SIZE); // 0xff less likely to produce GC pointer 263 nfill = 0; 264 } 265 StringValue!(T)* sv = cast(StringValue!(T)*)&pools[$ - 1][nfill]; 266 sv.value = value; 267 sv.length = str.length; 268 .memcpy(sv.lstring(), str.ptr, str.length); 269 sv.lstring()[str.length] = 0; 270 const(uint) vptr = cast(uint)(pools.length << POOL_BITS | nfill); 271 nfill += nbytes + (-nbytes & 7); // align to 8 bytes 272 return vptr; 273 } 274 275 inout(StringValue!T)* getValue(uint vptr) inout @nogc nothrow pure 276 { 277 if (!vptr) 278 return null; 279 const(size_t) idx = (vptr >> POOL_BITS) - 1; 280 const(size_t) off = vptr & POOL_SIZE - 1; 281 return cast(inout(StringValue!T)*)&pools[idx][off]; 282 } 283 284 size_t findSlot(hash_t hash, scope const(char)[] str) const @nogc nothrow pure 285 { 286 // quadratic probing using triangular numbers 287 // https://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774 288 for (size_t i = hash & (table.length - 1), j = 1;; ++j) 289 { 290 const(StringValue!T)* sv; 291 auto vptr = table[i].vptr; 292 if (!vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0) 293 return i; 294 i = (i + j) & (table.length - 1); 295 } 296 } 297 298 void grow() nothrow pure 299 { 300 const odim = table.length; 301 auto otab = table; 302 const ndim = table.length * 2; 303 countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator; 304 table = (cast(StringEntry*)mem.xcalloc_noscan(ndim, (table[0]).sizeof))[0 .. ndim]; 305 foreach (const se; otab[0 .. odim]) 306 { 307 if (!se.vptr) 308 continue; 309 const sv = getValue(se.vptr); 310 table[findSlot(se.hash, sv.toString())] = se; 311 } 312 mem.xfree(otab.ptr); 313 } 314 } 315 316 nothrow unittest 317 { 318 StringTable!(const(char)*) tab; 319 tab._init(10); 320 321 // construct two strings with the same text, but a different pointer 322 const(char)[6] fooBuffer = "foofoo"; 323 const(char)[] foo = fooBuffer[0 .. 3]; 324 const(char)[] fooAltPtr = fooBuffer[3 .. 6]; 325 326 assert(foo.ptr != fooAltPtr.ptr); 327 328 // first insertion returns value 329 assert(tab.insert(foo, foo.ptr).value == foo.ptr); 330 331 // subsequent insertion of same string return null 332 assert(tab.insert(foo.ptr, foo.length, foo.ptr) == null); 333 assert(tab.insert(fooAltPtr, foo.ptr) == null); 334 335 const lookup = tab.lookup("foo"); 336 assert(lookup.value == foo.ptr); 337 assert(lookup.len == 3); 338 assert(lookup.toString() == "foo"); 339 340 assert(tab.lookup("bar") == null); 341 tab.update("bar".ptr, "bar".length); 342 assert(tab.lookup("bar").value == null); 343 344 tab.reset(0); 345 assert(tab.lookup("foo".ptr, "foo".length) == null); 346 //tab.insert("bar"); 347 } 348 349 nothrow unittest 350 { 351 StringTable!(void*) tab; 352 tab._init(100); 353 354 enum testCount = 2000; 355 356 char[2 * testCount] buf; 357 358 foreach(i; 0 .. testCount) 359 { 360 buf[i * 2 + 0] = cast(char) (i % 256); 361 buf[i * 2 + 1] = cast(char) (i / 256); 362 auto toInsert = cast(const(char)[]) buf[i * 2 .. i * 2 + 2]; 363 tab.insert(toInsert, cast(void*) i); 364 } 365 366 foreach(i; 0 .. testCount) 367 { 368 auto toLookup = cast(const(char)[]) buf[i * 2 .. i * 2 + 2]; 369 assert(tab.lookup(toLookup).value == cast(void*) i); 370 } 371 } 372 373 nothrow unittest 374 { 375 StringTable!(int) tab; 376 tab._init(10); 377 tab.insert("foo", 4); 378 tab.insert("bar", 6); 379 380 static int resultFp = 0; 381 int resultDg = 0; 382 static bool returnImmediately = false; 383 384 int function(const(StringValue!int)*) nothrow applyFunc = (const(StringValue!int)* s) 385 { 386 resultFp += s.value; 387 return returnImmediately; 388 }; 389 390 scope int delegate(const(StringValue!int)*) nothrow applyDeleg = (const(StringValue!int)* s) 391 { 392 resultDg += s.value; 393 return returnImmediately; 394 }; 395 396 tab.apply(applyFunc); 397 tab.opApply(applyDeleg); 398 399 assert(resultDg == 10); 400 assert(resultFp == 10); 401 402 returnImmediately = true; 403 404 tab.apply(applyFunc); 405 tab.opApply(applyDeleg); 406 407 // Order of string table iteration is not specified, either foo or bar could 408 // have been visited first. 409 assert(resultDg == 14 || resultDg == 16); 410 assert(resultFp == 14 || resultFp == 16); 411 }