1 /** 2 * Support for 64-bit longs. 3 * 4 * Copyright: Copyright Digital Mars 2000 - 2012. 5 * License: Distributed under the 6 * $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0). 7 * (See accompanying file LICENSE) 8 * Authors: Walter Bright, Sean Kelly 9 * Source: $(DRUNTIMESRC rt/_llmath.d) 10 */ 11 12 module rt.llmath; 13 14 extern (C): 15 16 17 /*************************************** 18 * Unsigned long divide. 19 * Input: 20 * [EDX,EAX],[ECX,EBX] 21 * Output: 22 * [EDX,EAX] = [EDX,EAX] / [ECX,EBX] 23 * [ECX,EBX] = [EDX,EAX] % [ECX,EBX] 24 */ 25 26 void __ULDIV2__() 27 { 28 version (D_InlineAsm_X86) 29 { 30 asm 31 { 32 naked ; 33 mov EBX,4[ESP] ; // the only difference between this and __ULDIV__ 34 test ECX,ECX ; 35 jz uldiv ; 36 37 // if ECX > EDX, then quotient is 0 and remainder is [EDX,EAX] 38 cmp ECX,EDX ; 39 ja quo0 ; 40 41 test ECX,ECX ; 42 js Lleft ; 43 44 /* We have n>d, and know that n/d will fit in 32 bits. 45 * d will be left justified if we shift it left s bits. 46 * [d1,d0] <<= s 47 * [n2,n1,n0] = [n1,n0] << s 48 * 49 * Use one divide, by this reasoning: 50 * ([n2,n1]<<32 + n0)/(d1<<32 + d0) 51 * becomes: 52 * ([n2,n1]<<32)/(d1<<32 + d0) + n0/(d1<<32 + d0) 53 * The second divide is always 0. 54 * Ignore the d0 in the first divide, which will yield a quotient 55 * that might be too high by 1 (because d1 is left justified). 56 * We can tell if it's too big if: 57 * q*[d1,d0] > [n2,n1,n0] 58 * which is: 59 * q*[d1,d0] > [[q*[d1,0]+q%[d1,0],n1,n0] 60 * If we subtract q*[d1,0] from both sides, we get: 61 * q*d0 > [[n2,n1]%d1,n0] 62 * So if it is too big by one, reduce q by one to q'=q-one. 63 * Compute remainder as: 64 * r = ([n1,n0] - q'*[d1,d0]) >> s 65 * Again, we can subtract q*[d1,0]: 66 * r = ([n1,n0] - q*[d1,0] - (q'*[d1,d0] - q*[d1,0])) >> s 67 * r = ([[n2,n1]%d1,n0] + (q*[d1,0] - (q - one)*[d1,d0])) >> s 68 * r = ([[n2,n1]%d1,n0] + (q*[d1,0] - [d1 *(q-one),d0*(1-q)])) >> s 69 * r = ([[n2,n1]%d1,n0] + [d1 *one,d0*(one-q)])) >> s 70 */ 71 72 push EBP ; 73 push ESI ; 74 push EDI ; 75 76 mov ESI,EDX ; 77 mov EDI,EAX ; 78 mov EBP,ECX ; 79 80 bsr EAX,ECX ; // EAX is now 30..0 81 xor EAX,0x1F ; // EAX is now 1..31 82 mov CH,AL ; 83 neg EAX ; 84 add EAX,32 ; 85 mov CL,AL ; 86 87 mov EAX,EBX ; 88 shr EAX,CL ; 89 xchg CH,CL ; 90 shl EBP,CL ; 91 or EBP,EAX ; 92 shl EBX,CL ; 93 94 mov EDX,ESI ; 95 xchg CH,CL ; 96 shr EDX,CL ; 97 98 mov EAX,EDI ; 99 shr EAX,CL ; 100 xchg CH,CL ; 101 shl EDI,CL ; 102 shl ESI,CL ; 103 or EAX,ESI ; 104 105 div EBP ; 106 push EBP ; 107 mov EBP,EAX ; 108 mov ESI,EDX ; 109 110 mul EBX ; 111 cmp EDX,ESI ; 112 ja L1 ; 113 jb L2 ; 114 cmp EAX,EDI ; 115 jbe L2 ; 116 L1: dec EBP ; 117 sub EAX,EBX ; 118 sbb EDX,0[ESP] ; 119 L2: 120 add ESP,4 ; 121 sub EDI,EAX ; 122 sbb ESI,EDX ; 123 mov EAX,ESI ; 124 xchg CH,CL ; 125 shl EAX,CL ; 126 xchg CH,CL ; 127 shr EDI,CL ; 128 or EDI,EAX ; 129 shr ESI,CL ; 130 mov EBX,EDI ; 131 mov ECX,ESI ; 132 mov EAX,EBP ; 133 xor EDX,EDX ; 134 135 pop EDI ; 136 pop ESI ; 137 pop EBP ; 138 ret ; 139 140 uldiv: test EDX,EDX ; 141 jnz D3 ; 142 // Both high words are 0, we can use the DIV instruction 143 div EBX ; 144 mov EBX,EDX ; 145 mov EDX,ECX ; // EDX = ECX = 0 146 ret ; 147 148 even ; 149 D3: // Divide [EDX,EAX] by EBX 150 mov ECX,EAX ; 151 mov EAX,EDX ; 152 xor EDX,EDX ; 153 div EBX ; 154 xchg ECX,EAX ; 155 div EBX ; 156 // ECX,EAX = result 157 // EDX = remainder 158 mov EBX,EDX ; 159 mov EDX,ECX ; 160 xor ECX,ECX ; 161 ret ; 162 163 quo0: // Quotient is 0 164 // Remainder is [EDX,EAX] 165 mov EBX,EAX ; 166 mov ECX,EDX ; 167 xor EAX,EAX ; 168 xor EDX,EDX ; 169 ret ; 170 171 Lleft: // The quotient is 0 or 1 and EDX >= ECX 172 cmp EDX,ECX ; 173 ja quo1 ; // [EDX,EAX] > [ECX,EBX] 174 // EDX == ECX 175 cmp EAX,EBX ; 176 jb quo0 ; 177 178 quo1: // Quotient is 1 179 // Remainder is [EDX,EAX] - [ECX,EBX] 180 sub EAX,EBX ; 181 sbb EDX,ECX ; 182 mov EBX,EAX ; 183 mov ECX,EDX ; 184 mov EAX,1 ; 185 xor EDX,EDX ; 186 ret ; 187 } 188 } 189 else version (D_InlineAsm_X86_64) 190 assert(0); 191 else 192 static assert(0); 193 } 194 195 void __ULDIV__() 196 { 197 version (D_InlineAsm_X86) 198 { 199 asm 200 { 201 naked ; 202 test ECX,ECX ; 203 jz uldiv ; 204 205 // if ECX > EDX, then quotient is 0 and remainder is [EDX,EAX] 206 cmp ECX,EDX ; 207 ja quo0 ; 208 209 test ECX,ECX ; 210 js Lleft ; 211 212 /* We have n>d, and know that n/d will fit in 32 bits. 213 * d will be left justified if we shift it left s bits. 214 * [d1,d0] <<= s 215 * [n2,n1,n0] = [n1,n0] << s 216 * 217 * Use one divide, by this reasoning: 218 * ([n2,n1]<<32 + n0)/(d1<<32 + d0) 219 * becomes: 220 * ([n2,n1]<<32)/(d1<<32 + d0) + n0/(d1<<32 + d0) 221 * The second divide is always 0. 222 * Ignore the d0 in the first divide, which will yield a quotient 223 * that might be too high by 1 (because d1 is left justified). 224 * We can tell if it's too big if: 225 * q*[d1,d0] > [n2,n1,n0] 226 * which is: 227 * q*[d1,d0] > [[q*[d1,0]+q%[d1,0],n1,n0] 228 * If we subtract q*[d1,0] from both sides, we get: 229 * q*d0 > [[n2,n1]%d1,n0] 230 * So if it is too big by one, reduce q by one to q'=q-one. 231 * Compute remainder as: 232 * r = ([n1,n0] - q'*[d1,d0]) >> s 233 * Again, we can subtract q*[d1,0]: 234 * r = ([n1,n0] - q*[d1,0] - (q'*[d1,d0] - q*[d1,0])) >> s 235 * r = ([[n2,n1]%d1,n0] + (q*[d1,0] - (q - one)*[d1,d0])) >> s 236 * r = ([[n2,n1]%d1,n0] + (q*[d1,0] - [d1 *(q-one),d0*(1-q)])) >> s 237 * r = ([[n2,n1]%d1,n0] + [d1 *one,d0*(one-q)])) >> s 238 */ 239 240 push EBP ; 241 push ESI ; 242 push EDI ; 243 244 mov ESI,EDX ; 245 mov EDI,EAX ; 246 mov EBP,ECX ; 247 248 bsr EAX,ECX ; // EAX is now 30..0 249 xor EAX,0x1F ; // EAX is now 1..31 250 mov CH,AL ; 251 neg EAX ; 252 add EAX,32 ; 253 mov CL,AL ; 254 255 mov EAX,EBX ; 256 shr EAX,CL ; 257 xchg CH,CL ; 258 shl EBP,CL ; 259 or EBP,EAX ; 260 shl EBX,CL ; 261 262 mov EDX,ESI ; 263 xchg CH,CL ; 264 shr EDX,CL ; 265 266 mov EAX,EDI ; 267 shr EAX,CL ; 268 xchg CH,CL ; 269 shl EDI,CL ; 270 shl ESI,CL ; 271 or EAX,ESI ; 272 273 div EBP ; 274 push EBP ; 275 mov EBP,EAX ; 276 mov ESI,EDX ; 277 278 mul EBX ; 279 cmp EDX,ESI ; 280 ja L1 ; 281 jb L2 ; 282 cmp EAX,EDI ; 283 jbe L2 ; 284 L1: dec EBP ; 285 sub EAX,EBX ; 286 sbb EDX,0[ESP] ; 287 L2: 288 add ESP,4 ; 289 sub EDI,EAX ; 290 sbb ESI,EDX ; 291 mov EAX,ESI ; 292 xchg CH,CL ; 293 shl EAX,CL ; 294 xchg CH,CL ; 295 shr EDI,CL ; 296 or EDI,EAX ; 297 shr ESI,CL ; 298 mov EBX,EDI ; 299 mov ECX,ESI ; 300 mov EAX,EBP ; 301 xor EDX,EDX ; 302 303 pop EDI ; 304 pop ESI ; 305 pop EBP ; 306 ret ; 307 308 uldiv: test EDX,EDX ; 309 jnz D3 ; 310 // Both high words are 0, we can use the DIV instruction 311 div EBX ; 312 mov EBX,EDX ; 313 mov EDX,ECX ; // EDX = ECX = 0 314 ret ; 315 316 even ; 317 D3: // Divide [EDX,EAX] by EBX 318 mov ECX,EAX ; 319 mov EAX,EDX ; 320 xor EDX,EDX ; 321 div EBX ; 322 xchg ECX,EAX ; 323 div EBX ; 324 // ECX,EAX = result 325 // EDX = remainder 326 mov EBX,EDX ; 327 mov EDX,ECX ; 328 xor ECX,ECX ; 329 ret ; 330 331 quo0: // Quotient is 0 332 // Remainder is [EDX,EAX] 333 mov EBX,EAX ; 334 mov ECX,EDX ; 335 xor EAX,EAX ; 336 xor EDX,EDX ; 337 ret ; 338 339 Lleft: // The quotient is 0 or 1 and EDX >= ECX 340 cmp EDX,ECX ; 341 ja quo1 ; // [EDX,EAX] > [ECX,EBX] 342 // EDX == ECX 343 cmp EAX,EBX ; 344 jb quo0 ; 345 346 quo1: // Quotient is 1 347 // Remainder is [EDX,EAX] - [ECX,EBX] 348 sub EAX,EBX ; 349 sbb EDX,ECX ; 350 mov EBX,EAX ; 351 mov ECX,EDX ; 352 mov EAX,1 ; 353 xor EDX,EDX ; 354 ret ; 355 } 356 } 357 else version (D_InlineAsm_X86_64) 358 assert(0); 359 else 360 static assert(0); 361 } 362 363 364 /*************************************** 365 * Signed long divide. 366 * Input: 367 * [EDX,EAX],[ECX,EBX] 368 * Output: 369 * [EDX,EAX] = [EDX,EAX] / [ECX,EBX] 370 * [ECX,EBX] = [EDX,EAX] % [ECX,EBX] 371 * ESI,EDI destroyed 372 */ 373 374 void __LDIV2__() 375 { 376 version (D_InlineAsm_X86) 377 { 378 asm 379 { 380 naked ; 381 test EDX,EDX ; // [EDX,EAX] negative? 382 jns L10 ; // no 383 //neg64 EDX,EAX ; // [EDX,EAX] = -[EDX,EAX] 384 neg EDX ; 385 neg EAX ; 386 sbb EDX,0 ; 387 test ECX,ECX ; // [ECX,EBX] negative? 388 jns L11 ; // no 389 //neg64 ECX,EBX ; 390 neg ECX ; 391 neg dword ptr 4[ESP] ; 392 sbb ECX,0 ; 393 push dword ptr 4[ESP] ; 394 call __ULDIV2__ ; 395 add ESP,4 ; 396 //neg64 ECX,EBX ; // remainder same sign as dividend 397 neg ECX ; 398 neg EBX ; 399 sbb ECX,0 ; 400 ret ; 401 402 L11: 403 push dword ptr 4[ESP] ; 404 call __ULDIV2__ ; 405 add ESP,4 ; 406 //neg64 ECX,EBX ; // remainder same sign as dividend 407 neg ECX ; 408 neg EBX ; 409 sbb ECX,0 ; 410 //neg64 EDX,EAX ; // quotient is negative 411 neg EDX ; 412 neg EAX ; 413 sbb EDX,0 ; 414 ret ; 415 416 L10: test ECX,ECX ; // [ECX,EBX] negative? 417 jns L12 ; // no (all is positive) 418 //neg64 ECX,EBX ; 419 neg ECX ; 420 neg dword ptr 4[ESP] ; 421 sbb ECX,0 ; 422 push dword ptr 4[ESP] ; 423 call __ULDIV2__ ; 424 add ESP,4 ; 425 //neg64 EDX,EAX ; // quotient is negative 426 neg EDX ; 427 neg EAX ; 428 sbb EDX,0 ; 429 ret ; 430 431 L12: jmp __ULDIV2__ ; 432 } 433 } 434 else version (D_InlineAsm_X86_64) 435 assert(0); 436 else 437 static assert(0); 438 } 439 440 void __LDIV__() 441 { 442 version (D_InlineAsm_X86) 443 { 444 asm 445 { 446 naked ; 447 test EDX,EDX ; // [EDX,EAX] negative? 448 jns L10 ; // no 449 //neg64 EDX,EAX ; // [EDX,EAX] = -[EDX,EAX] 450 neg EDX ; 451 neg EAX ; 452 sbb EDX,0 ; 453 test ECX,ECX ; // [ECX,EBX] negative? 454 jns L11 ; // no 455 //neg64 ECX,EBX ; 456 neg ECX ; 457 neg EBX ; 458 sbb ECX,0 ; 459 call __ULDIV__ ; 460 //neg64 ECX,EBX ; // remainder same sign as dividend 461 neg ECX ; 462 neg EBX ; 463 sbb ECX,0 ; 464 ret ; 465 466 L11: call __ULDIV__ ; 467 //neg64 ECX,EBX ; // remainder same sign as dividend 468 neg ECX ; 469 neg EBX ; 470 sbb ECX,0 ; 471 //neg64 EDX,EAX ; // quotient is negative 472 neg EDX ; 473 neg EAX ; 474 sbb EDX,0 ; 475 ret ; 476 477 L10: test ECX,ECX ; // [ECX,EBX] negative? 478 jns L12 ; // no (all is positive) 479 //neg64 ECX,EBX ; 480 neg ECX ; 481 neg EBX ; 482 sbb ECX,0 ; 483 call __ULDIV__ ; 484 //neg64 EDX,EAX ; // quotient is negative 485 neg EDX ; 486 neg EAX ; 487 sbb EDX,0 ; 488 ret ; 489 490 L12: jmp __ULDIV__ ; 491 } 492 } 493 else version (D_InlineAsm_X86_64) 494 assert(0); 495 else 496 static assert(0); 497 } 498 499 500 version (Win32) version (CRuntime_Microsoft) 501 { 502 extern(C) void _alldiv(); 503 extern(C) void _aulldiv(); 504 extern(C) void _allrem(); 505 extern(C) void _aullrem(); 506 507 void _ms_alldiv() 508 { 509 asm 510 { 511 naked ; 512 push ECX ; 513 push EBX ; 514 push EDX ; 515 push EAX ; 516 call _alldiv ; 517 ret ; 518 } 519 } 520 521 void _ms_aulldiv() 522 { 523 asm 524 { 525 naked ; 526 push ECX ; 527 push EBX ; 528 push EDX ; 529 push EAX ; 530 call _aulldiv ; 531 ret ; 532 } 533 } 534 535 void _ms_allrem() 536 { 537 asm 538 { 539 naked ; 540 push ECX ; 541 push EBX ; 542 push EDX ; 543 push EAX ; 544 call _allrem ; 545 mov EBX,EAX ; 546 mov ECX,EDX ; 547 ret ; 548 } 549 } 550 551 void _ms_aullrem() 552 { 553 asm 554 { 555 naked ; 556 push ECX ; 557 push EBX ; 558 push EDX ; 559 push EAX ; 560 call _aullrem ; 561 mov EBX,EAX ; 562 mov ECX,EDX ; 563 ret ; 564 } 565 } 566 }