1 /** 2 * Algorithms from "Division by Invariant Integers using Multiplication" 3 * by Torbjoern Granlund and Peter L. Montgomery 4 * 5 * Compiler implementation of the 6 * $(LINK2 https://www.dlang.org, D programming language). 7 * 8 * Copyright: Copyright (C) 2013-2023 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/divcoeff.d, backend/divcoeff.d) 12 */ 13 module dmd.backend.divcoeff; 14 15 import core.stdc.stdio; 16 17 18 nothrow: 19 @safe: 20 21 alias ullong = ulong; 22 23 /* unsigned 128 bit math 24 */ 25 26 bool SIGN64(ullong x) 27 { 28 return cast(long)x < 0; 29 } 30 31 void SHL128(out ullong dh, out ullong dl, ullong xh,ullong xl) 32 { 33 dh = (xh << 1) | SIGN64(xl); 34 dl = xl << 1; 35 } 36 37 void SHR128(out ullong dh, out ullong dl, ullong xh,ullong xl) 38 { 39 dl = (xl >> 1) | ((xh & 1) << 63); 40 dh = xh >> 1; 41 } 42 43 bool XltY128(ullong xh, ullong xl, ullong yh, ullong yl) 44 { 45 return xh < yh || (xh == yh && xl < yl); 46 } 47 48 void u128Div(ullong xh, ullong xl, ullong yh, ullong yl, ullong *pqh, ullong *pql) 49 { 50 /* Use auld skool shift & subtract algorithm. 51 * Not very efficient. 52 */ 53 54 //ullong xxh = xh, xxl = xl, yyh = yh, yyl = yl; 55 56 assert(yh || yl); // no div-by-0 bugs 57 58 // left justify y 59 uint shiftcount = 1; 60 if (!yh) 61 { yh = yl; 62 yl = 0; 63 shiftcount += 64; 64 } 65 while (!SIGN64(yh)) 66 { 67 SHL128(yh,yl, yh,yl); 68 shiftcount += 1; 69 } 70 71 ullong qh = 0; 72 ullong ql = 0; 73 do 74 { 75 SHL128(qh,ql, qh,ql); 76 if (XltY128(yh,yl,xh,xl)) 77 { 78 // x -= y; 79 if (xl < yl) 80 { xl -= yl; 81 xh -= yh + 1; 82 } 83 else 84 { xl -= yl; 85 xh -= yh; 86 } 87 88 ql |= 1; 89 } 90 SHR128(yh,yl, yh,yl); 91 } while (--shiftcount); 92 93 *pqh = qh; 94 *pql = ql; 95 96 // Remainder is xh,xl 97 98 version (none) 99 { 100 printf("%016llx_%016llx / %016llx_%016llx = %016llx_%016llx\n", xxh,xxl,yyh,yyl,qh,ql); 101 if (xxh == 0 && yyh == 0) 102 printf("should be %llx\n", xxl / yyl); 103 } 104 } 105 106 /************************************ 107 * Implement Algorithm 6.2: Selection of multiplier and shift count 108 * Params: 109 * N = 32 or 64 110 * d = divisor (must not be 0 or a power of 2) 111 * prec = bits of precision desired 112 * Output: 113 * *pm = factor 114 * *pshpost = post shift 115 * Returns: 116 * true m >= 2**N 117 */ 118 119 @trusted 120 extern (C) bool choose_multiplier(int N, ullong d, int prec, ullong *pm, int *pshpost) 121 { 122 assert(N == 32 || N == 64); 123 assert(prec <= N); 124 assert(d > 1 && (d & (d - 1))); 125 126 // Compute b such that 2**(b-1) < d <= 2**b 127 // which is the number of significant bits in d 128 int b = 0; 129 ullong d1 = d; 130 while (d1) 131 { 132 ++b; 133 d1 >>= 1; 134 } 135 136 int shpost = b; 137 138 bool mhighbit = false; 139 if (N == 32) 140 { 141 // mlow = (2**(N + b)) / d 142 ullong mlow = (1UL << (N + b)) / d; 143 144 // uhigh = (2**(N + b) + 2**(N + b - prec)) / d 145 ullong mhigh = ((1UL << (N + b)) + (1UL << (N + b - prec))) / d; 146 147 while (mlow/2 < mhigh/2 && shpost) 148 { 149 mlow /= 2; 150 mhigh /= 2; 151 --shpost; 152 } 153 154 *pm = mhigh & 0xFFFFFFFF; 155 mhighbit = (mhigh >> N) != 0; 156 } 157 else if (N == 64) 158 { 159 // Same as for N==32, but use 128 bit unsigned arithmetic 160 161 // mlow = (2**(N + b)) / d 162 ullong mlowl = 0; 163 ullong mlowh = 1UL << b; 164 165 // mlow /= d 166 u128Div(mlowh, mlowl, 0, d, &mlowh, &mlowl); 167 168 // mhigh = (2**(N + b) + 2**(N + b - prec)) / d 169 ullong mhighl = 0; 170 ullong mhighh = 1UL << b; 171 int e = N + b - prec; 172 if (e < 64) 173 mhighl = 1UL << e; 174 else 175 mhighh |= 1UL << (e - 64); 176 177 // mhigh /= d 178 u128Div(mhighh, mhighl, 0, d, &mhighh, &mhighl); 179 180 while (1) 181 { 182 // mlowb = mlow / 2 183 ullong mlowbh,mlowbl; 184 SHR128(mlowbh,mlowbl, mlowh,mlowl); 185 186 // mhighb = mhigh / 2 187 ullong mhighbh,mhighbl; 188 SHR128(mhighbh,mhighbl, mhighh,mhighl); 189 190 // if (mlowb < mhighb && shpost) 191 if (XltY128(mlowbh,mlowbl, mhighbh,mhighbl) && shpost) 192 { 193 // mlow = mlowb 194 mlowl = mlowbl; 195 mlowh = mlowbh; 196 197 // mhigh = mhighb 198 mhighl = mhighbl; 199 mhighh = mhighbh; 200 201 --shpost; 202 } 203 else 204 break; 205 } 206 207 *pm = mhighl; 208 mhighbit = mhighh & 1; 209 } 210 else 211 assert(0); 212 213 *pshpost = shpost; 214 return mhighbit; 215 } 216 217 /************************************* 218 * Find coefficients for Algorithm 4.2: 219 * Optimized code generation of unsigned q=n/d for constant nonzero d 220 * Input: 221 * N 32 or 64 (width of divide) 222 * d divisor (not a power of 2) 223 * Output: 224 * *pshpre pre-shift 225 * *pm factor 226 * *pshpost post-shift 227 * Returns: 228 * true Use algorithm: 229 * t1 = MULUH(m, n) 230 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 231 * 232 * false Use algorithm: 233 * q = SRL(MULUH(m, SRL(n, shpre)), shpost) 234 */ 235 236 extern (C) bool udiv_coefficients(int N, ullong d, int *pshpre, ullong *pm, int *pshpost) 237 { 238 bool mhighbit = choose_multiplier(N, d, N, pm, pshpost); 239 if (mhighbit && (d & 1) == 0) 240 { 241 int e = 0; 242 while ((d & 1) == 0) 243 { ++e; 244 d >>= 1; 245 } 246 *pshpre = e; 247 mhighbit = choose_multiplier(N, d, N - e, pm, pshpost); 248 assert(mhighbit == false); 249 } 250 else 251 *pshpre = 0; 252 return mhighbit; 253 } 254 255 @trusted 256 unittest 257 { 258 struct S 259 { 260 int N; 261 ullong d; 262 int shpre; 263 int highbit; 264 ullong m; 265 int shpost; 266 } 267 268 static immutable S[14] table = 269 [ 270 { 32, 10, 0, 0, 0xCCCCCCCD, 3 }, 271 { 32, 13, 0, 0, 0x4EC4EC4F, 2 }, 272 { 32, 14, 1, 0, 0x92492493, 2 }, 273 { 32, 15, 0, 0, 0x88888889, 3 }, 274 { 32, 17, 0, 0, 0xF0F0F0F1, 4 }, 275 { 32, 14_007, 0, 1, 0x2B71840D, 14 }, 276 277 { 64, 7, 0, 1, 0x2492492492492493, 3 }, 278 { 64, 10, 0, 0, 0xCCCCCCCCCCCCCCCD, 3 }, 279 { 64, 13, 0, 0, 0x4EC4EC4EC4EC4EC5, 2 }, 280 { 64, 14, 1, 0, 0x4924924924924925, 1 }, 281 { 64, 15, 0, 0, 0x8888888888888889, 3 }, 282 { 64, 17, 0, 0, 0xF0F0F0F0F0F0F0F1, 4 }, 283 { 64, 100 , 2, 0, 0x28F5C28F5C28F5C3, 2 }, 284 { 64, 14_007, 0, 1, 0x2B71840C5ADF02C3, 14 }, 285 ]; 286 287 for (int i = 0; i < table.length; i++) 288 { const ps = &table[i]; 289 290 ullong m; 291 int shpre; 292 int shpost; 293 bool mhighbit = udiv_coefficients(ps.N, ps.d, &shpre, &m, &shpost); 294 295 //printf("[%d] %d %d %llx %d\n", i, shpre, mhighbit, m, shpost); 296 assert(shpre == ps.shpre); 297 assert(mhighbit == ps.highbit); 298 assert(m == ps.m); 299 assert(shpost == ps.shpost); 300 } 301 } 302 303 version (none) 304 { 305 import core.stdc.stdlib; 306 307 extern (D) int main(string[] args) 308 { 309 if (args.length == 2) 310 { 311 ullong d = atoi(args[1].ptr); 312 ullong m; 313 int shpre; 314 int shpost; 315 bool mhighbit = udiv_coefficients(64, d, &shpre, &m, &shpost); 316 317 printf("%d %d %llx, %d\n", shpre, mhighbit, m, shpost); 318 } 319 return 0; 320 } 321 }