1 /** 2 * Spell checker 3 * 4 * Does not have any dependencies on the rest of DMD. 5 * 6 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 7 * Authors: Walter Bright, https://www.digitalmars.com 8 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d) 10 * Documentation: https://dlang.org/phobos/dmd_root_speller.html 11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d 12 */ 13 14 module dmd.root.speller; 15 16 /************************************************** 17 * Looks for correct spelling. 18 * Looks a distance of up to two. 19 * This does an exhaustive search, so can potentially be very slow. 20 * Params: 21 * seed = wrongly spelled word 22 * dg = search delegate of the form `T delegate(const(char)[] p, out int cost)` 23 * Returns: 24 * T.init = no correct spellings found, 25 * otherwise the value returned by dg() for first possible correct spelling 26 */ 27 auto speller(alias dg)(const(char)[] seed) 28 if (isSearchFunction!dg) 29 { 30 const size_t maxdist = seed.length < 4 ? seed.length / 2 : 2; 31 foreach (distance; 0 .. maxdist) 32 { 33 if (auto p = spellerX!dg(seed, distance != 0)) 34 return p; 35 // if (seedlen > 10) 36 // break; 37 } 38 return null; // didn't find it 39 } 40 41 private: 42 43 import core.stdc.stdlib; 44 import core.stdc.string; 45 import dmd.common.string : SmallBuffer; 46 47 enum isSearchFunction(alias fun) = is(searchFunctionType!fun); 48 alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}()); 49 50 /************************************* 51 * Spell check level 1. 52 * Params: 53 * dg = delegate that looks up string in dictionary AA and returns value found 54 * seed = starting string 55 * flag = if true, do 2 level lookup otherwise 1 level 56 * Returns: 57 * whatever dg returns, null if no match 58 */ 59 auto spellerX(alias dg)(const(char)[] seed, bool flag) 60 { 61 if (!seed.length) 62 return null; 63 64 /* Need buffer to store trial strings in 65 */ 66 char[30] tmp = void; 67 auto sb = SmallBuffer!char(seed.length + 1, tmp[]); 68 char[] buf = sb[]; 69 70 int cost = int.max; 71 searchFunctionType!dg p = null; 72 73 /* Deletions */ 74 buf[0 .. seed.length - 1] = seed[1 .. $]; 75 foreach (i; 0 .. seed.length) 76 { 77 //printf("del buf = '%s'\n", buf); 78 int ncost; 79 auto np = flag ? spellerY!dg(buf[0 .. seed.length - 1], i, ncost) 80 : dg(buf[0 .. seed.length - 1], ncost); 81 if (combineSpellerResult(p, cost, np, ncost)) 82 return p; 83 buf[i] = seed[i]; 84 } 85 86 /* Transpositions */ 87 if (!flag) 88 { 89 buf[0 .. seed.length] = seed; 90 for (size_t i = 0; i + 1 < seed.length; i++) 91 { 92 // swap [i] and [i + 1] 93 buf[i] = seed[i + 1]; 94 buf[i + 1] = seed[i]; 95 //printf("tra buf = '%s'\n", buf); 96 int ncost; 97 auto np = dg(buf[0 .. seed.length], ncost); 98 if (combineSpellerResult(p, cost, np, ncost)) 99 return p; 100 buf[i] = seed[i]; 101 } 102 } 103 104 /* Substitutions */ 105 buf[0 .. seed.length] = seed; 106 foreach (i; 0 .. seed.length) 107 { 108 foreach (s; idchars) 109 { 110 buf[i] = s; 111 //printf("sub buf = '%s'\n", buf); 112 int ncost; 113 auto np = flag ? spellerY!dg(buf[0 .. seed.length], i + 1, ncost) 114 : dg(buf[0 .. seed.length], ncost); 115 if (combineSpellerResult(p, cost, np, ncost)) 116 return p; 117 } 118 buf[i] = seed[i]; 119 } 120 121 /* Insertions */ 122 buf[1 .. seed.length + 1] = seed; 123 foreach (i; 0 .. seed.length + 1) // yes, do seed.length+1 iterations 124 { 125 foreach (s; idchars) 126 { 127 buf[i] = s; 128 //printf("ins buf = '%s'\n", buf); 129 int ncost; 130 auto np = flag ? spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost) 131 : dg(buf[0 .. seed.length + 1], ncost); 132 if (combineSpellerResult(p, cost, np, ncost)) 133 return p; 134 } 135 if (i < seed.length) 136 buf[i] = seed[i]; 137 } 138 139 return p; // return "best" result 140 } 141 142 /********************************************** 143 * Do second level of spell matching. 144 * Params: 145 * dg = delegate that looks up string in dictionary AA and returns value found 146 * seed = starting string 147 * index = index into seed[] that is where we will mutate it 148 * cost = set to cost of match 149 * Returns: 150 * whatever dg returns, null if no match 151 */ 152 auto spellerY(alias dg)(const(char)[] seed, size_t index, out int cost) 153 { 154 if (!seed.length) 155 return null; 156 157 /* Allocate a buf to store the new string to play with, needs 158 * space for an extra char for insertions 159 */ 160 char[30] tmp = void; // stack allocations are fastest 161 auto sb = SmallBuffer!char(seed.length + 1, tmp[]); 162 char[] buf = sb[]; 163 buf[0 .. index] = seed[0 .. index]; 164 165 cost = int.max; // start with worst possible match 166 searchFunctionType!dg p = null; 167 168 /* Delete character at seed[index] */ 169 if (index < seed.length) 170 { 171 buf[index .. seed.length - 1] = seed[index + 1 .. $]; // seed[] with deleted character 172 int ncost; 173 auto np = dg(buf[0 .. seed.length - 1], ncost); // look it up 174 if (combineSpellerResult(p, cost, np, ncost)) // compare with prev match 175 return p; // cannot get any better 176 } 177 178 /* Substitute character at seed[index] */ 179 if (index < seed.length) 180 { 181 buf[0 .. seed.length] = seed; 182 foreach (s; idchars) 183 { 184 buf[index] = s; // seed[] with substituted character 185 //printf("sub buf = '%s'\n", buf); 186 int ncost; 187 auto np = dg(buf[0 .. seed.length], ncost); 188 if (combineSpellerResult(p, cost, np, ncost)) 189 return p; 190 } 191 } 192 193 /* Insert character at seed[index] */ 194 buf[index + 1 .. seed.length + 1] = seed[index .. $]; 195 foreach (s; idchars) 196 { 197 buf[index] = s; 198 //printf("ins buf = '%s'\n", buf); 199 int ncost; 200 auto np = dg(buf[0 .. seed.length + 1], ncost); 201 if (combineSpellerResult(p, cost, np, ncost)) 202 return p; 203 } 204 return p; // return "best" result 205 } 206 207 208 /* Characters used to substitute ones in the string we're checking 209 * the spelling on. 210 */ 211 immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 212 213 /************************************************** 214 * Combine a new result from the spell checker to 215 * find the one with the closest symbol with 216 * respect to the cost defined by the search function 217 * Params: 218 * p = best found spelling so far, T.init if none found yet. 219 * If np is better, p is replaced with np 220 * cost = cost of p (int.max if none found yet). 221 * If np is better, cost is replaced with ncost 222 * np = current spelling to check against p, T.init if none 223 * ncost = cost of np if np is not T.init 224 * Returns: 225 * true if the cost is less or equal 0, meaning we can stop looking 226 * false otherwise 227 */ 228 bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost) 229 { 230 if (np && ncost < cost) // if np is better 231 { 232 p = np; // np is new best match 233 cost = ncost; 234 if (cost <= 0) 235 return true; // meaning we can stop looking 236 } 237 return false; 238 } 239 240 /************************************* Tests ***********************/ 241 242 unittest 243 { 244 static immutable string[][] cases = 245 [ 246 ["hello", "hell", "y"], 247 ["hello", "hel", "y"], 248 ["hello", "ello", "y"], 249 ["hello", "llo", "y"], 250 ["hello", "hellox", "y"], 251 ["hello", "helloxy", "y"], 252 ["hello", "xhello", "y"], 253 ["hello", "xyhello", "y"], 254 ["hello", "ehllo", "y"], 255 ["hello", "helol", "y"], 256 ["hello", "abcd", "n"], 257 ["hello", "helxxlo", "y"], 258 ["hello", "ehlxxlo", "n"], 259 ["hello", "heaao", "y"], 260 ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"], 261 ]; 262 //printf("unittest_speller()\n"); 263 264 string dgarg; 265 266 string speller_test(const(char)[] s, ref int cost) 267 { 268 assert(s[$-1] != '\0'); 269 //printf("speller_test(%s, %s)\n", dgarg, s); 270 cost = 0; 271 if (dgarg == s) 272 return dgarg; 273 return null; 274 } 275 276 dgarg = "hell"; 277 auto p = speller!speller_test("hello"); 278 assert(p !is null); 279 foreach (testCase; cases) 280 { 281 //printf("case [%d]\n", i); 282 dgarg = testCase[1]; 283 auto p2 = speller!speller_test(testCase[0]); 284 if (p2) 285 assert(testCase[2][0] == 'y'); 286 else 287 assert(testCase[2][0] == 'n'); 288 } 289 //printf("unittest_speller() success\n"); 290 }