1 /** 2 * Global loop optimizations 3 * 4 * Compiler implementation of the 5 * $(LINK2 https://www.dlang.org, D programming language). 6 * 7 * Copyright: Copyright (C) 1985-1998 by Symantec 8 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d) 12 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d 13 */ 14 15 module dmd.backend.gloop; 16 17 import core.stdc.stdio; 18 import core.stdc.stdlib; 19 import core.stdc.string; 20 21 import dmd.backend.cc; 22 import dmd.backend.cdef; 23 import dmd.backend.code_x86; 24 import dmd.backend.evalu8 : el_toldoubled; 25 import dmd.backend.oper; 26 import dmd.backend.global; 27 import dmd.backend.goh; 28 import dmd.backend.el; 29 import dmd.backend.symtab; 30 import dmd.backend.ty; 31 import dmd.backend.type; 32 33 import dmd.backend.barray; 34 import dmd.backend.dlist; 35 import dmd.backend.dvec; 36 import dmd.backend.mem; 37 38 nothrow: 39 @safe: 40 41 @trusted 42 char symbol_isintab(const Symbol *s) { return sytab[s.Sclass] & SCSS; } 43 44 45 import dmd.backend.gother : findloopparameters; 46 47 alias Loops = Rarray!Loop; 48 49 50 /********************************* 51 * Loop data structure. 52 */ 53 54 struct Loop 55 { 56 nothrow: 57 vec_t Lloop; // Vector of blocks in this loop 58 vec_t Lexit; // Vector of exit blocks of loop 59 block *Lhead; // Pointer to header of loop 60 block *Ltail; // Pointer to tail 61 block *Lpreheader; // Pointer to preheader (if any) 62 Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so 63 // redundant temporaries aren't created 64 Rarray!Iv Livlist; // basic induction variables 65 Rarray!Iv Lopeqlist; // list of other op= variables 66 67 /************************* 68 * Reset memory so this allocation can be re-used. 69 */ 70 @trusted 71 void reset() 72 { 73 vec_free(Lloop); 74 vec_free(Lexit); 75 76 foreach (ref iv; Livlist) 77 iv.reset(); 78 foreach (ref iv; Lopeqlist) 79 iv.reset(); 80 81 Llis.reset(); 82 Livlist.reset(); 83 Lopeqlist.reset(); 84 } 85 86 /*********************** 87 * Write loop. 88 */ 89 90 @trusted 91 void print() 92 { 93 debug 94 { 95 Loop *l = &this; 96 printf("loop %p\n", l); 97 printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx, 98 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx 99 : cast(uint)-1); 100 printf("\tLloop "); vec_println(l.Lloop); 101 printf("\tLexit "); vec_println(l.Lexit); 102 } 103 } 104 } 105 106 struct famlist 107 { 108 nothrow: 109 elem **FLpelem; /* parent of elem in the family */ 110 elem *c1; 111 elem *c2; // c1*(basic IV) + c2 112 Symbol *FLtemp; // symbol index of temporary (FLELIM if */ 113 /* this entry has no temporary) */ 114 tym_t FLty; /* type of this induction variable */ 115 tym_t FLivty; /* type of the basic IV elem (which is */ 116 /* not necessarilly the type of the IV */ 117 /* elem!) */ 118 119 void reset() 120 { 121 el_free(c1); 122 el_free(c2); 123 } 124 125 @trusted 126 void print() const 127 { 128 debug 129 { 130 printf("famlist:\n"); 131 printf("*FLpelem:\n"); 132 elem_print(*FLpelem); 133 printf("c1:"); 134 elem_print(c1); 135 printf("c2:"); 136 elem_print(c2); 137 printf("FLty = %s\n", tym_str(FLty)); 138 printf("FLivty = %s\n", tym_str(FLivty)); 139 } 140 } 141 } 142 143 @system 144 enum FLELIM = cast(Symbol *)-1; 145 146 struct Iv 147 { 148 nothrow: 149 Symbol *IVbasic; // symbol of basic IV 150 elem **IVincr; // pointer to parent of IV increment elem 151 Barray!famlist IVfamily; // variables in this family 152 153 @trusted 154 void reset() 155 { 156 foreach (ref fl; IVfamily) 157 { 158 fl.reset(); 159 } 160 IVfamily.reset(); 161 } 162 163 @trusted 164 void print() const 165 { 166 debug 167 { 168 printf("IV: '%s'\n",IVbasic.Sident.ptr); 169 printf("*IVincr:\n"); 170 elem_print(*IVincr); 171 } 172 } 173 } 174 175 176 private __gshared bool addblk; /* if TRUE, then we added a block */ 177 178 /* is elem loop invariant? */ 179 int isLI(const elem* n) { return n.Nflags & NFLli; } 180 181 /* make elem loop invariant */ 182 void makeLI(elem* n) { n.Nflags |= NFLli; } 183 184 /****************************** 185 * Only variables that could only be unambiguously defined 186 * are candidates for loop invariant removal and induction 187 * variables. 188 * This means only variables that have the SFLunambig flag 189 * set for them. 190 * Doing this will still cover 90% (I hope) of the cases, and 191 * is a lot faster to compute. 192 */ 193 194 /************* 195 * Free loops. 196 */ 197 198 private void freeloop(ref Loops loops) 199 { 200 foreach (ref loop; loops) 201 loop.reset(); 202 loops.reset(); 203 } 204 205 206 /********************************** 207 * Initialize block information. 208 * Returns: 209 * true if there is a BCasm block 210 */ 211 212 @trusted 213 bool blockinit() 214 { 215 bool hasasm = false; 216 217 assert(dfo); 218 foreach (b; BlockRange(startblock)) 219 { 220 debug /* check integrity of Bpred and Bsucc */ 221 L1: 222 foreach (blp; ListRange(b.Bpred)) 223 { 224 foreach (bls; ListRange(list_block(blp).Bsucc)) 225 if (list_block(bls) == b) 226 continue L1; 227 assert(0); 228 } 229 230 hasasm |= (b.BC == BCasm); 231 } 232 foreach (j, b; dfo[]) 233 { 234 assert(b.Bdfoidx == j); 235 b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */ 236 vec_clear(b.Bdom); 237 } 238 return hasasm; 239 } 240 241 /**************************************** 242 * Compute dominators (Bdom) for each block. 243 * See Aho & Ullman Fig. 13.5. 244 * Note that flow graph is reducible if there is only one 245 * pass through the loop. 246 * Params: 247 * dfo = depth first order array of blocks, 248 * Bdom vector is filled in for each block 249 */ 250 251 @trusted 252 void compdom() 253 { 254 compdom(dfo[]); 255 } 256 257 @trusted 258 private extern (D) void compdom(block*[] dfo) 259 { 260 assert(dfo.length); 261 block* sb = dfo[0]; // starting block 262 263 vec_clear(sb.Bdom); 264 vec_setbit(0,sb.Bdom); // starting block only doms itself 265 foreach (b; dfo) // for all except startblock 266 { 267 if (b != sb) 268 vec_set(b.Bdom); // dominate all blocks 269 } 270 271 vec_t t1 = vec_calloc(vec_numbits(sb.Bdom)); // allocate a temporary 272 uint counter = 0; // # of times thru loop 273 bool changes; 274 do 275 { 276 changes = false; 277 foreach (i, b; dfo) // for each block in dfo[] 278 { 279 if (i == 0) 280 continue; // except startblock 281 if (b.Bpred) // if there are predecessors 282 { 283 vec_set(t1); 284 foreach (bl; ListRange(b.Bpred)) 285 { 286 vec_andass(t1,list_block(bl).Bdom); 287 } 288 } 289 else 290 vec_clear(t1); // no predecessors to dominate 291 vec_setbit(i,t1); // each block doms itself 292 if (changes) 293 vec_copy(b.Bdom,t1); 294 else if (!vec_equal(b.Bdom,t1)) // if any changes 295 { 296 vec_copy(b.Bdom,t1); 297 changes = true; 298 } 299 } 300 counter++; 301 assert(counter < 50); // should have converged by now 302 } while (changes); 303 vec_free(t1); 304 305 debug if (debugc) 306 { 307 printf("Flow graph is%s reducible\n", counter <= 2 ? "".ptr : " not".ptr); 308 } 309 } 310 311 /*************************** 312 * Test if block A dominates block B. 313 * Params: 314 * A = block 315 * B = another block 316 * Returns: 317 * true if A dominates B. 318 */ 319 320 @trusted 321 bool dom(const block* A, const block* B) 322 { 323 assert(A && B && dfo && dfo[A.Bdfoidx] == A); 324 return vec_testbit(A.Bdfoidx,B.Bdom) != 0; 325 } 326 327 /********************** 328 * Find all the loops. 329 */ 330 331 private extern (D) void findloops(block*[] dfo, ref Loops loops) 332 { 333 freeloop(loops); 334 335 //printf("findloops()\n"); 336 foreach (b; dfo) 337 b.Bweight = 1; // reset Bweights 338 foreach_reverse (b; dfo) // for each block (note reverse 339 // dfo order, so most nested 340 // loops are found first) 341 { 342 assert(b); 343 foreach (bl; ListRange(b.Bsucc)) 344 { 345 block *s = list_block(bl); // each successor s to b 346 assert(s); 347 if (dom(s,b)) // if s dominates b 348 buildloop(loops, s, b); // we found a loop 349 } 350 } 351 352 debug if (debugc) 353 { 354 foreach (ref l; loops) 355 l.print(); 356 } 357 } 358 359 /******************************** 360 * Increase weight of a variable by a factor. 361 * Params: 362 * weight = how important a variable is 363 * factor = loops scale the importance 364 * Returns: 365 * new weight 366 */ 367 368 private uint loop_weight(uint weight, int factor) pure 369 { 370 // Be careful not to overflow 371 if (weight < 0x1_0000) 372 weight *= 10 * factor; 373 else if (weight < 0x10_0000) 374 weight *= 2 * factor; 375 else 376 weight += factor; 377 assert(cast(int)weight > 0); // overflow check 378 return weight; 379 } 380 381 /***************************** 382 * Construct natural loop. 383 * Algorithm 13.1 from Aho & Ullman. 384 * Note that head dom tail. 385 * Params: 386 * ploops = collection of existing loops to add to 387 * head = head of loop; head dominates tail 388 * tail = tail of loop 389 */ 390 391 @trusted 392 private void buildloop(ref Loops ploops,block *head,block *tail) 393 { 394 //printf("buildloop()\n"); 395 396 /* Add a block b and all its predecessors to vector v. 397 */ 398 static void insert(block *b, vec_t v) 399 { 400 assert(b && v); 401 if (!vec_testbit(b.Bdfoidx,v)) // if block is not in loop 402 { 403 vec_setbit(b.Bdfoidx,v); // add block to loop 404 b.Bweight = loop_weight(b.Bweight,1); // *10 usage count 405 foreach (bl; ListRange(b.Bpred)) 406 insert(list_block(bl), v); // insert all its predecessors 407 } 408 } 409 410 Loop* l; 411 412 /* See if this is part of an existing loop. If so, merge the two. */ 413 foreach (ref lp; ploops) 414 if (lp.Lhead == head) /* two loops with same header */ 415 { 416 // Calculate loop contents separately so we get the Bweights 417 // done accurately. 418 419 vec_t v = vec_calloc(dfo.length); 420 vec_setbit(head.Bdfoidx,v); 421 head.Bweight = loop_weight(head.Bweight, 1); 422 insert(tail,v); 423 424 vec_orass(lp.Lloop,v); // merge into existing loop 425 vec_free(v); 426 427 vec_clear(lp.Lexit); // recompute exit blocks 428 l = &lp; 429 goto L1; 430 } 431 432 /* Allocate loop entry */ 433 l = ploops.push(); 434 435 l.Lloop = vec_calloc(dfo.length); // allocate loop bit vector 436 l.Lexit = vec_calloc(dfo.length); // bit vector for exit blocks 437 l.Lhead = head; 438 l.Ltail = tail; 439 l.Lpreheader = null; 440 441 vec_setbit(head.Bdfoidx,l.Lloop); /* add head to the loop */ 442 head.Bweight = loop_weight(head.Bweight, 2); // *20 usage for loop header 443 444 insert(tail,l.Lloop); /* insert tail in loop */ 445 446 L1: 447 /* Find all the exit blocks (those blocks with 448 * successors outside the loop). 449 */ 450 451 // for each block in this loop 452 foreach (i; VecRange(l.Lloop)) 453 { 454 if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit) 455 vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */ 456 else 457 { 458 foreach (bl; ListRange(dfo[i].Bsucc)) 459 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop)) 460 { 461 vec_setbit(i,l.Lexit); 462 break; 463 } 464 } 465 } 466 467 /* Find preheader, if any, to the loop. 468 The preheader is a block that has only the head as a successor. 469 All other predecessors of head must be inside the loop. 470 */ 471 l.Lpreheader = null; 472 foreach (bl; ListRange(head.Bpred)) 473 { 474 block *b = list_block(bl); 475 476 if (!vec_testbit(b.Bdfoidx,l.Lloop)) /* if not in loop */ 477 { 478 if (l.Lpreheader) /* if already one */ 479 { 480 l.Lpreheader = null; /* can only be one */ 481 break; 482 } 483 else 484 { 485 if (list_next(b.Bsucc)) // if more than 1 successor 486 break; // b can't be a preheader 487 l.Lpreheader = b; 488 } 489 } 490 } 491 } 492 493 /************************************** 494 * Perform loop rotations. 495 * Loop starts as: 496 * 497 * prehead 498 * | 499 * v 500 * +->head----> 501 * | | 502 * | v 503 * | body 504 * | | 505 * | v 506 * +--tail 507 * 508 * Two types are done: 509 * 1) Header is moved to be past the tail. 510 * 511 * prehead 512 * | 513 * +---+ 514 * | 515 * | body<-+ 516 * | | | 517 * | v | 518 * | tail | 519 * | | | 520 * | v | 521 * +->head--+ 522 * | 523 * v 524 * 525 * 2) Header is copied past the tail (done only if MFtime is set). 526 * 527 * prehead 528 * | 529 * v 530 * head1-----+ 531 * | | 532 * v | 533 * body<--+ | 534 * | | | 535 * v | | 536 * tail | | 537 * | | | 538 * v | | 539 * head2--+ | 540 * | | 541 * +--------+ 542 * v 543 * 544 * Input: 545 * Loop information (do not depend on the preheader information) 546 * Output: 547 * Revised list of blocks, a new dfo and new loop information 548 * Returns: 549 * true need to recompute loop data 550 */ 551 552 @trusted 553 private bool looprotate(ref Loop l) 554 { 555 block *tail = l.Ltail; 556 block *head = l.Lhead; 557 558 //printf("looprotate(%p)\n",l); 559 560 // Do not rotate loop if: 561 if (head == tail || // loop is only one block big 562 !vec_testbit(head.Bdfoidx,l.Lexit)) // header is not an exit block 563 goto Lret; 564 565 if (//iter != 1 && 566 vec_testbit(tail.Bdfoidx,l.Lexit)) // tail is an exit block 567 goto Lret; 568 569 // Do not rotate if already rotated 570 foreach (b; BlockRange(tail.Bnext)) 571 if (b == head) // if loop already rotated 572 goto Lret; 573 574 if (head.BC == BCtry) 575 goto Lret; 576 if (head.BC == BC_try) 577 goto Lret; 578 579 //if (debugc) { printf("looprotate: "); l.print(); } 580 581 if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm) 582 { // Duplicate the header past the tail (but doing 583 // switches would be too expensive in terms of code 584 // generated). 585 586 auto head2 = block_calloc(); // create new head block 587 head2.Btry = head.Btry; 588 head2.Bflags = head.Bflags; 589 head.Bflags = BFLnomerg; // move flags over to head2 590 head2.Bflags |= BFLnomerg; 591 head2.BC = head.BC; 592 assert(head2.BC != BCswitch); 593 if (head.Belem) // copy expression tree 594 head2.Belem = el_copytree(head.Belem); 595 head2.Bnext = tail.Bnext; 596 tail.Bnext = head2; 597 598 // pred(head1) = pred(head) outside loop 599 // pred(head2) = pred(head) inside loop 600 list_t *pbln; 601 auto pbl2 = &(head2.Bpred); 602 for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln) 603 { 604 if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop)) 605 { // if this predecessor is inside the loop 606 607 *pbl2 = *pbl; 608 *pbl = list_next(*pbl); 609 pbln = pbl; // don't skip this next one 610 (*pbl2).next = null; 611 auto bsucc = list_block(*pbl2).Bsucc; 612 pbl2 = &((*pbl2).next); 613 foreach (bl; ListRange(bsucc)) 614 if (list_block(bl) == head) 615 { 616 bl.ptr = cast(void *)head2; 617 goto L2; 618 } 619 assert(0); 620 L2: 621 } 622 else 623 pbln = &((*pbl).next); // next predecessor in list 624 } // for each pred(head) 625 626 // succ(head2) = succ(head) 627 foreach (bl; ListRange(head.Bsucc)) 628 { 629 list_append(&(head2.Bsucc),list_block(bl)); 630 list_append(&(list_block(bl).Bpred),head2); 631 } 632 if (debugc) printf("1Rotated loop %p\n", &l); 633 go.changes++; 634 return true; 635 } 636 else if (startblock != head 637 /* This screws up the OPctor/OPdtor sequence for: 638 * struct CString 639 * { CString(); 640 * ~CString(); 641 * int GetLength(); 642 * }; 643 * 644 * void f(void) 645 * { for(;;) 646 * { CString s ; 647 * if(s.GetLength()!=0) 648 * break ; 649 * } 650 * } 651 */ 652 && !(config.flags3 & CFG3eh) 653 ) 654 { // optimize for space 655 // Simply position the header past the tail 656 foreach (b; BlockRange(startblock)) 657 { 658 if (b.Bnext == head) 659 { // found parent b of head 660 b.Bnext = head.Bnext; 661 head.Bnext = tail.Bnext; 662 tail.Bnext = head; 663 if (debugc) printf("2Rotated loop %p\n", &l); 664 go.changes++; 665 return false; 666 } 667 } 668 assert(0); 669 } 670 Lret: 671 return false; 672 } 673 674 private __gshared 675 { 676 bool doflow; // true if flow analysis has to be redone 677 } 678 679 /********************************* 680 * Loop invariant and induction variable elimination. 681 * Input: 682 * iter which optimization iteration we are on 683 */ 684 685 @trusted 686 void loopopt() 687 { 688 __gshared Loops startloop_cache; 689 690 Loops startloop = startloop_cache; 691 692 if (debugc) printf("loopopt()\n"); 693 restart: 694 if (blockinit()) // init block data 695 { 696 findloops(dfo[], startloop); // Compute Bweights 697 freeloop(startloop); // free existing loops 698 startloop_cache = startloop; 699 return; // can't handle ASM blocks 700 } 701 compdom(); // compute dominators 702 findloops(dfo[], startloop); // find the loops 703 704 L3: 705 while (1) 706 { 707 foreach (ref l; startloop) 708 { 709 if (looprotate(l)) // rotate the loop 710 { 711 compdfo(dfo, startblock); 712 blockinit(); 713 compdom(); 714 findloops(dfo[], startloop); 715 continue L3; 716 } 717 } 718 break; 719 } 720 721 // Make sure there is a preheader for each loop. 722 723 addblk = false; /* assume no blocks added */ 724 foreach (ref l; startloop) 725 { 726 //if (debugc) l.print(); 727 728 if (!l.Lpreheader) /* if no preheader */ 729 { 730 if (debugc) printf("Generating preheader for loop\n"); 731 addblk = true; // add one 732 block* p = block_calloc(); // the preheader 733 block* h = l.Lhead; // loop header 734 735 /* Find parent of h */ 736 if (h == startblock) 737 startblock = p; 738 else 739 { 740 for (auto ph = startblock; 1; ph = ph.Bnext) 741 { 742 assert(ph); /* should have found it */ 743 if (ph.Bnext == h) 744 { 745 // Link p into block list between ph and h 746 ph.Bnext = p; 747 break; 748 } 749 } 750 } 751 p.Bnext = h; 752 753 l.Lpreheader = p; 754 p.BC = BCgoto; 755 assert(p.Bsucc == null); 756 list_append(&(p.Bsucc),h); /* only successor is h */ 757 p.Btry = h.Btry; 758 759 if (debugc) printf("Adding preheader %p to loop %p\n",p,&l); 760 761 // Move preds of h that aren't in the loop to preds of p 762 for (list_t bl = h.Bpred; bl;) 763 { 764 block *b = list_block(bl); 765 766 if (!vec_testbit (b.Bdfoidx, l.Lloop)) 767 { 768 list_append(&(p.Bpred), b); 769 list_subtract(&(h.Bpred), b); 770 bl = h.Bpred; /* dunno what subtract did */ 771 772 /* Fix up successors of predecessors */ 773 foreach (bls; ListRange(b.Bsucc)) 774 if (list_block(bls) == h) 775 bls.ptr = cast(void *)p; 776 } 777 else 778 bl = list_next(bl); 779 } 780 list_append(&(h.Bpred),p); /* p is a predecessor to h */ 781 } 782 } /* for */ 783 if (addblk) /* if any blocks were added */ 784 { 785 compdfo(dfo, startblock); /* compute depth-first order */ 786 blockinit(); 787 compdom(); 788 findloops(dfo[], startloop); // recompute block info 789 addblk = false; 790 } 791 792 /* Do the loop optimizations. 793 */ 794 795 doflow = true; /* do flow analysis */ 796 797 if (go.mfoptim & MFtime) 798 { 799 if (debugc) printf("Starting loop unrolling\n"); 800 L2: 801 while (1) 802 { 803 foreach (ref l; startloop) 804 { 805 if (loopunroll(l)) 806 { 807 compdfo(dfo, startblock); // compute depth-first order 808 blockinit(); 809 compdom(); 810 findloops(dfo[], startloop); // recompute block info 811 doflow = true; 812 continue L2; 813 } 814 } 815 break; 816 } 817 } 818 819 /* Note that accessing the loops 820 * starting from startloop will access them in least nested 821 * one first, thus moving LIs out as far as possible 822 */ 823 if (debugc) printf("Starting loop invariants\n"); 824 825 foreach_reverse (ref l; startloop) 826 { 827 //if (debugc) l.print(); 828 829 assert(l.Lpreheader); 830 if (doflow) 831 { 832 flowrd(); /* compute reaching definitions */ 833 flowlv(); /* compute live variables */ 834 flowae(); // compute available expressions 835 doflow = false; /* no need to redo it */ 836 if (go.defnod.length == 0) /* if no definition elems */ 837 break; /* no need to optimize */ 838 } 839 vec_t lv = l.Lloop; 840 if (debugc) printf("...Loop %p start...\n",&l); 841 842 /* Unmark all elems in this loop */ 843 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 844 if (dfo[i].Belem) 845 unmarkall(dfo[i].Belem); /* unmark all elems */ 846 847 /* Find & mark all LIs */ 848 vec_t gin = vec_clone(l.Lpreheader.Bout); 849 vec_t rd = vec_calloc(go.defnod.length); /* allocate our running RD vector */ 850 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop 851 { 852 block *b = dfo[i]; 853 854 if (debugc) printf("B%d\n",i); 855 if (b.Belem) 856 { 857 vec_copy(rd, b.Binrd); // IN reaching defs 858 static if (0) 859 { 860 printf("i = %d\n",i); 861 { 862 for (int j = 0; j < go.defnod.length; j++) 863 elem_print(go.defnod[j].DNelem); 864 } 865 printf("rd : "); vec_println(rd); 866 } 867 markInvariants(b != l.Lhead, b, lv, gin, b.Belem, rd); 868 static if (0) 869 { 870 printf("B%d\n", i); 871 { 872 foreach (j; 0 .. go.defnod.length) 873 { 874 printf(" [%2d] ", j); 875 WReqn(go.defnod[j].DNelem); 876 printf("\n"); 877 } 878 } 879 printf("rd : "); vec_println(rd); 880 printf("Boutrd: "); vec_println(b.Boutrd); 881 } 882 assert(vec_equal(rd, b.Boutrd)); 883 } 884 else 885 assert(vec_equal(b.Binrd, b.Boutrd)); 886 } 887 vec_free(rd); 888 vec_free(gin); 889 890 /* Move loop invariants */ 891 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 892 { 893 uint domexit; // true if this block dominates all 894 // exit blocks of the loop 895 896 for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block 897 { 898 if (!vec_testbit (i, dfo[j].Bdom)) 899 { 900 domexit = 0; 901 goto L1; // break if !(i dom j) 902 } 903 } 904 // if i dom (all exit blocks) 905 domexit = 1; 906 L1: 907 if (dfo[i].Belem) 908 { // If there is any hope of making an improvement 909 if (domexit || l.Llis.length) 910 { 911 //if (dfo[i] != l.Lhead) 912 //domexit |= 2; 913 movelis(dfo[i].Belem, dfo[i], l, domexit); 914 } 915 } 916 } 917 if (debugc) printf("...Loop %p done...\n",&l); 918 919 if (go.mfoptim & MFliv) 920 { 921 loopiv(l); /* induction variables */ 922 if (addblk) /* if we added a block */ 923 { 924 compdfo(dfo, startblock); 925 goto restart; /* play it safe and start over */ 926 } 927 } 928 } /* for */ 929 freeloop(startloop); 930 startloop_cache = startloop; 931 } 932 933 /***************************** 934 * If elem is loop invariant, mark it. 935 * Input: 936 * lv = vector of all the blocks in this loop. 937 * rd = vector of loop invariants for this elem. This must be 938 * continually updated. 939 * Note that we do not iterate until no more LIs are found. The only 940 * thing this would buy us is stuff that depends on LI assignments. 941 */ 942 943 @trusted 944 private void markInvariants(int gref, block* gblock, vec_t lv, vec_t gin, elem *n, vec_t rd) 945 { 946 947 void markinvar(elem *n,vec_t rd) 948 { 949 vec_t tmp; 950 uint i; 951 Symbol *v; 952 elem *n1; 953 954 assert(n && rd); 955 assert(vec_numbits(rd) == go.defnod.length); 956 switch (n.Eoper) 957 { 958 case OPaddass: case OPminass: case OPmulass: case OPandass: 959 case OPorass: case OPxorass: case OPdivass: case OPmodass: 960 case OPshlass: case OPshrass: case OPashrass: 961 case OPpostinc: case OPpostdec: 962 case OPcall: 963 case OPvecsto: 964 case OPcmpxchg: 965 markinvar(n.EV.E2,rd); 966 goto case OPnegass; 967 968 case OPnegass: 969 n1 = n.EV.E1; 970 if (n1.Eoper == OPind) 971 markinvar(n1.EV.E1,rd); 972 else if (OTbinary(n1.Eoper)) 973 { markinvar(n1.EV.E1,rd); 974 markinvar(n1.EV.E2,rd); 975 } 976 L2: 977 if (n.Eoper == OPcall || 978 gblock.Btry || 979 !(n1.Eoper == OPvar && 980 symbol_isintab(n1.EV.Vsym))) 981 { 982 gref = 1; 983 } 984 985 updaterd(n,rd,null); 986 break; 987 988 case OPcallns: 989 markinvar(n.EV.E2,rd); 990 markinvar(n.EV.E1,rd); 991 break; 992 993 case OPstrcpy: 994 case OPstrcat: 995 case OPmemcpy: 996 case OPmemset: 997 markinvar(n.EV.E2,rd); 998 markinvar(n.EV.E1,rd); 999 updaterd(n,rd,null); 1000 break; 1001 1002 case OPbtc: 1003 case OPbtr: 1004 case OPbts: 1005 markinvar(n.EV.E1,rd); 1006 markinvar(n.EV.E2,rd); 1007 updaterd(n,rd,null); 1008 break; 1009 1010 case OPucall: 1011 markinvar(n.EV.E1,rd); 1012 goto case OPasm; 1013 1014 case OPasm: 1015 gref = 1; 1016 updaterd(n,rd,null); 1017 break; 1018 1019 case OPucallns: 1020 case OPstrpar: 1021 case OPstrctor: 1022 case OPvector: 1023 case OPvoid: 1024 case OPstrlen: 1025 case OPddtor: 1026 case OPinp: 1027 case OPprefetch: // don't mark E2 1028 markinvar(n.EV.E1,rd); 1029 break; 1030 1031 case OPcond: 1032 case OPparam: 1033 case OPstrcmp: 1034 case OPmemcmp: 1035 case OPbt: // OPbt is like OPind, assume not LI 1036 case OPoutp: 1037 markinvar(n.EV.E1,rd); 1038 markinvar(n.EV.E2,rd); 1039 break; 1040 1041 case OPandand: 1042 case OPoror: 1043 markinvar(n.EV.E1,rd); 1044 tmp = vec_clone(rd); 1045 markinvar(n.EV.E2,tmp); 1046 if (el_returns(n.EV.E2)) 1047 vec_orass(rd,tmp); // rd |= tmp 1048 vec_free(tmp); 1049 break; 1050 1051 case OPcolon: 1052 case OPcolon2: 1053 tmp = vec_clone(rd); 1054 switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2))) 1055 { 1056 case 3: // E1 and E2 return 1057 markinvar(n.EV.E1,rd); 1058 markinvar(n.EV.E2,tmp); 1059 vec_orass(rd,tmp); // rd |= tmp 1060 break; 1061 case 2: // E1 returns 1062 markinvar(n.EV.E1,rd); 1063 markinvar(n.EV.E2,tmp); 1064 break; 1065 case 1: // E2 returns 1066 markinvar(n.EV.E1,tmp); 1067 markinvar(n.EV.E2,rd); 1068 break; 1069 case 0: // neither returns 1070 markinvar(n.EV.E1,tmp); 1071 vec_copy(tmp,rd); 1072 markinvar(n.EV.E2,tmp); 1073 break; 1074 default: 1075 assert(0); 1076 } 1077 vec_free(tmp); 1078 break; 1079 1080 case OPaddr: // mark addresses of OPvars as LI 1081 markinvar(n.EV.E1,rd); 1082 if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1)) 1083 makeLI(n); 1084 break; 1085 1086 case OPmsw: 1087 case OPneg: case OPbool: case OPnot: case OPcom: 1088 case OPs16_32: case OPd_s32: case OPs32_d: 1089 case OPd_s16: case OPs16_d: case OPd_f: case OPf_d: 1090 case OP32_16: case OPu8_16: 1091 case OPld_d: case OPd_ld: 1092 case OPld_u64: 1093 case OPc_r: case OPc_i: 1094 case OPu16_32: 1095 case OPu16_d: case OPd_u16: 1096 case OPs8_16: case OP16_8: 1097 case OPd_u32: case OPu32_d: 1098 1099 case OPs32_64: case OPu32_64: 1100 case OP64_32: 1101 case OPd_s64: case OPd_u64: 1102 case OPs64_d: 1103 case OPu64_d: 1104 case OP128_64: 1105 case OPs64_128: 1106 case OPu64_128: 1107 1108 case OPabs: 1109 case OPtoprec: 1110 case OPrndtol: 1111 case OPrint: 1112 case OPsetjmp: 1113 case OPbsf: 1114 case OPbsr: 1115 case OPbswap: 1116 case OPpopcnt: 1117 case OPsqrt: 1118 case OPsin: 1119 case OPcos: 1120 case OPvp_fp: /* BUG for MacHandles */ 1121 case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp: 1122 case OPcvp_fp: 1123 case OPvecfill: 1124 markinvar(n.EV.E1,rd); 1125 if (isLI(n.EV.E1)) /* if child is LI */ 1126 makeLI(n); 1127 break; 1128 1129 case OPeq: 1130 case OPstreq: 1131 markinvar(n.EV.E2,rd); 1132 n1 = n.EV.E1; 1133 markinvar(n1,rd); 1134 1135 /* Determine if assignment is LI. Conditions are: */ 1136 /* 1) Rvalue is LI */ 1137 /* 2) Lvalue is a variable (simplifies things a lot) */ 1138 /* 3) Lvalue can only be affected by unambiguous defs */ 1139 /* 4) No rd's of lvalue that are within the loop (other */ 1140 /* than the current def) */ 1141 if (isLI(n.EV.E2) && n1.Eoper == OPvar) /* 1 & 2 */ 1142 { 1143 v = n1.EV.Vsym; 1144 if (v.Sflags & SFLunambig) 1145 { 1146 tmp = vec_calloc(go.defnod.length); 1147 //filterrd(tmp,rd,v); 1148 listrds(rd,n1,tmp,null); 1149 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) 1150 if (go.defnod[i].DNelem != n && 1151 vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1152 goto L3; 1153 makeLI(n); // then the def is LI 1154 L3: vec_free(tmp); 1155 } 1156 } 1157 goto L2; 1158 1159 case OPadd: case OPmin: case OPmul: case OPand: 1160 case OPor: case OPxor: case OPdiv: case OPmod: 1161 case OPshl: case OPshr: case OPeqeq: case OPne: 1162 case OPlt: case OPle: case OPgt: case OPge: 1163 case OPashr: 1164 case OPror: case OProl: 1165 case OPbtst: 1166 1167 case OPunord: case OPlg: case OPleg: case OPule: 1168 case OPul: case OPuge: case OPug: case OPue: 1169 case OPngt: case OPnge: case OPnlt: case OPnle: 1170 case OPord: case OPnlg: case OPnleg: case OPnule: 1171 case OPnul: case OPnuge: case OPnug: case OPnue: 1172 1173 case OPcomma: 1174 case OPpair: 1175 case OPrpair: 1176 case OPremquo: 1177 case OPscale: 1178 case OPyl2x: 1179 case OPyl2xp1: 1180 markinvar(n.EV.E1,rd); 1181 markinvar(n.EV.E2,rd); 1182 if (isLI(n.EV.E2) && isLI(n.EV.E1)) 1183 makeLI(n); 1184 break; 1185 1186 case OPind: /* must assume this is not LI */ 1187 markinvar(n.EV.E1,rd); 1188 if (isLI(n.EV.E1)) 1189 { 1190 static if (0) 1191 { 1192 // This doesn't work with C++, because exp2_ptrtocomtype() will 1193 // transfer const to where it doesn't belong. 1194 if (n.Ety & mTYconst) 1195 { 1196 makeLI(n); 1197 } 1198 1199 // This was disabled because it was marking as LI 1200 // the loop dimension for the [i] array if 1201 // a[j][i] was in a loop. This meant the a[j] array bounds 1202 // check for the a[j].length was skipped. 1203 else if (n.Ejty) 1204 { 1205 tmp = vec_calloc(go.defnod.length); 1206 filterrdind(tmp,rd,n); // only the RDs pertaining to n 1207 1208 // if (no RDs within loop) 1209 // then it's loop invariant 1210 1211 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1212 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1213 goto L10; // found a RD in the loop 1214 1215 // If gref has occurred, this can still be LI 1216 // if n is an AE that was also an AE at the 1217 // point of gref. 1218 // We can catch a subset of these cases by looking 1219 // at the AEs at the start of the loop. 1220 if (gref) 1221 { 1222 int j; 1223 1224 //printf("\tn is: "); WReqn(n); printf("\n"); 1225 for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j) 1226 { 1227 elem *e = go.expnod[j]; 1228 1229 //printf("\t\tgo.expnod[%d] = %p\n",j,e); 1230 //printf("\t\tAE is: "); WReqn(e); printf("\n"); 1231 if (el_match2(n,e)) 1232 { 1233 makeLI(n); 1234 //printf("Ind LI: "); WReqn(n); printf("\n"); 1235 break; 1236 } 1237 } 1238 } 1239 else 1240 makeLI(n); 1241 L10: 1242 vec_free(tmp); 1243 break; 1244 } 1245 } 1246 } 1247 break; 1248 1249 case OPvar: 1250 v = n.EV.Vsym; 1251 if (v.Sflags & SFLunambig) // must be unambiguous to be LI 1252 { 1253 tmp = vec_calloc(go.defnod.length); 1254 //filterrd(tmp,rd,v); // only the RDs pertaining to v 1255 listrds(rd,n,tmp,null); // only the RDs pertaining to v 1256 1257 // if (no RDs within loop) 1258 // then it's loop invariant 1259 1260 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1261 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1262 goto L1; // found a RD in the loop 1263 makeLI(n); 1264 1265 L1: vec_free(tmp); 1266 } 1267 break; 1268 1269 case OPstring: 1270 case OPrelconst: 1271 case OPconst: /* constants are always LI */ 1272 case OPframeptr: 1273 makeLI(n); 1274 break; 1275 1276 case OPinfo: 1277 markinvar(n.EV.E2,rd); 1278 break; 1279 1280 case OPstrthis: 1281 case OPmark: 1282 case OPctor: 1283 case OPdtor: 1284 case OPdctor: 1285 case OPhalt: 1286 case OPgot: // shouldn't OPgot be makeLI ? 1287 break; 1288 1289 default: 1290 //printf("n.Eoper = %s, OPconst = %d\n", oper_str(n.Eoper), OPconst); 1291 assert(0); 1292 } 1293 1294 debug 1295 { 1296 if (debugc && isLI(n)) 1297 { 1298 printf(" LI elem: "); 1299 WReqn(n); 1300 printf("\n"); 1301 } 1302 } 1303 } 1304 1305 markinvar(n, rd); 1306 } 1307 1308 /******************** 1309 * Update rd vector. 1310 * Input: 1311 * n assignment elem or function call elem or OPasm elem 1312 * rd reaching def vector to update 1313 * (clear bits for defs we kill, set bit for n (which is the 1314 * def we are genning)) 1315 * vecdim go.defnod.length 1316 */ 1317 1318 extern (C) { 1319 @trusted 1320 void updaterd(elem *n,vec_t GEN,vec_t KILL) 1321 { 1322 const op = n.Eoper; 1323 elem *t; 1324 1325 assert(OTdef(op)); 1326 assert(GEN); 1327 elem_debug(n); 1328 1329 uint ni = n.Edef; 1330 assert(ni != -1); 1331 1332 // If unambiguous def 1333 if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar) 1334 { 1335 vec_t v = go.defnod[ni].DNunambig; 1336 assert(v); 1337 if (KILL) 1338 vec_orass(KILL, v); 1339 vec_subass(GEN, v); 1340 } 1341 else 1342 { 1343 static if (0) 1344 { 1345 if (OTassign(op) && t.Eoper != OPvar && t.Ejty) 1346 { 1347 // for all unambig defs in go.defnod[] 1348 foreach (uint i; 0 .. go.defnod.length) 1349 { 1350 elem *tn = go.defnod[i].DNelem; 1351 elem *tn1; 1352 1353 if (tn == n) 1354 ni = i; 1355 1356 if (!OTassign(tn.Eoper)) 1357 continue; 1358 1359 // If def of same variable, kill that def 1360 tn1 = tn.EV.E1; 1361 if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty) 1362 continue; 1363 1364 if (KILL) 1365 vec_setbit(i,KILL); 1366 vec_clearbit(i,GEN); 1367 } 1368 } 1369 } 1370 } 1371 1372 vec_setbit(ni,GEN); // set bit in GEN for this def 1373 } 1374 } 1375 1376 /*************************** 1377 * Mark all elems as not being loop invariant. 1378 */ 1379 1380 @trusted 1381 private void unmarkall(elem *e) 1382 { 1383 for (; 1; e = e.EV.E1) 1384 { 1385 assert(e); 1386 e.Nflags &= ~NFLli; /* unmark this elem */ 1387 if (OTunary(e.Eoper)) 1388 continue; 1389 else if (OTbinary(e.Eoper)) 1390 { 1391 unmarkall(e.EV.E2); 1392 continue; 1393 } 1394 return; 1395 } 1396 } 1397 1398 1399 /******************************** 1400 * Search for references to v in tree n before nstop is encountered. 1401 * Params: 1402 * v = symbol to search for 1403 * n = tree to search 1404 * nstop = stop searching tree when reaching this elem 1405 * Returns: 1406 * true if there are any refs of v in n before nstop is encountered 1407 */ 1408 1409 @trusted 1410 private bool refs(Symbol *v,elem *n,elem *nstop) 1411 { 1412 symbol_debug(v); 1413 assert(symbol_isintab(v)); 1414 assert(v.Ssymnum < globsym.length); 1415 bool stop = false; 1416 1417 // Walk tree in evaluation order 1418 bool walk(elem* n) 1419 { 1420 elem_debug(n); 1421 assert(n); 1422 1423 if (stop) 1424 return false; 1425 bool f = false; 1426 const op = n.Eoper; 1427 if (OTunary(op)) 1428 f = walk(n.EV.E1); 1429 else if (OTbinary(op)) 1430 { 1431 if (ERTOL(n)) /* watch order of evaluation */ 1432 { 1433 /* Note that (OPvar = e) is not a ref of OPvar, whereas */ 1434 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ 1435 /* a ref of OPvar, etc. */ 1436 f = walk(n.EV.E2); 1437 if (!f) 1438 { 1439 if (op == OPeq) 1440 { 1441 if (n.EV.E1.Eoper != OPvar) 1442 f = walk(n.EV.E1.EV.E1); 1443 } 1444 else 1445 f = walk(n.EV.E1); 1446 } 1447 } 1448 else 1449 f = walk(n.EV.E1) || walk(n.EV.E2); 1450 } 1451 1452 if (n == nstop) 1453 stop = true; 1454 else if (n.Eoper == OPvar) /* if variable reference */ 1455 return v == n.EV.Vsym; 1456 else if (op == OPasm) /* everything is referenced */ 1457 return true; 1458 return f; 1459 } 1460 1461 return walk(n); 1462 } 1463 1464 /************************* 1465 * Move LIs to preheader. 1466 * Conditions to be satisfied for code motion are: 1467 * 1) All exit blocks are dominated (true before this is called). 1468 * -- OR -- 1469 * 2) Variable assigned by a statement is not live on entering 1470 * any successor outside the loop of any exit block of the 1471 * loop. 1472 * 1473 * 3) Cannot move assignment to variable if there are any other 1474 * assignments to that variable within the loop (true or 1475 * assignment would not have been marked LI). 1476 * 4) Cannot move assignments to a variable if there is a use 1477 * of that variable in this loop that is reached by any other 1478 * def of it. 1479 * 5) Cannot move expressions that have side effects. 1480 * 6) Do not move assignments to variables that could be affected 1481 * by ambiguous defs. 1482 * 7) It is not worth it to move expressions of the form: 1483 * (var == const) 1484 * Input: 1485 * n the elem we're considering moving 1486 * b the block this elem is in 1487 * l the loop we're in 1488 * domexit flags 1489 * bit 0: 1 this branch is always executed 1490 * 0 this branch is only sometimes executed 1491 * bit 1: 1 do not move LIs that could throw exceptions 1492 * or cannot be moved past possibly thrown exceptions 1493 * Returns: 1494 * revised domexit 1495 */ 1496 1497 @trusted 1498 private void movelis(elem* n, block* b, ref Loop l, ref uint pdomexit) 1499 { 1500 vec_t tmp; 1501 elem *ne; 1502 elem *t; 1503 elem *n2; 1504 Symbol *v; 1505 tym_t ty; 1506 1507 Lnextlis: 1508 //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); } 1509 assert(n); 1510 elem_debug(n); 1511 const op = n.Eoper; 1512 switch (op) 1513 { 1514 case OPvar: 1515 case OPconst: 1516 case OPrelconst: 1517 goto Lret; 1518 1519 case OPandand: 1520 case OPoror: 1521 case OPcond: 1522 { 1523 uint domexit; 1524 1525 movelis(n.EV.E1,b,l,pdomexit); // always executed 1526 domexit = pdomexit & ~1; // sometimes executed 1527 movelis(n.EV.E2,b,l,domexit); 1528 pdomexit |= domexit & 2; 1529 goto Lret; 1530 } 1531 1532 case OPeq: 1533 // Do loop invariant assignments 1534 if (isLI(n) && n.EV.E1.Eoper == OPvar) 1535 { 1536 v = n.EV.E1.EV.Vsym; // variable index number 1537 1538 if (!(v.Sflags & SFLunambig)) goto L3; // case 6 1539 1540 // If case 4 is not satisfied, return 1541 1542 // Function parameters have an implied definition prior to the 1543 // first block of the function. Unfortunately, the rd vector 1544 // does not take this into account. Therefore, we assume the 1545 // worst and reject assignments to function parameters. 1546 if (v.Sclass == SC.parameter || v.Sclass == SC.regpar || 1547 v.Sclass == SC.fastpar || v.Sclass == SC.shadowreg) 1548 goto L3; 1549 1550 if (el_sideeffect(n.EV.E2)) goto L3; // case 5 1551 1552 // If case 1 or case 2 is not satisfied, return 1553 1554 if (!(pdomexit & 1)) // if not case 1 1555 { 1556 uint i; 1557 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 1558 { 1559 foreach (bl; ListRange(dfo[i].Bsucc)) 1560 { 1561 block *s; // successor to exit block 1562 1563 s = list_block(bl); 1564 if (!vec_testbit(s.Bdfoidx,l.Lloop) && 1565 (!symbol_isintab(v) || 1566 vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit 1567 goto L3; 1568 } 1569 } 1570 } 1571 1572 tmp = vec_calloc(go.defnod.length); 1573 uint i; 1574 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 1575 { 1576 if (dfo[i] == b) // except this one 1577 continue; 1578 1579 //<if there are any RDs of v in Binrd other than n> 1580 // <if there are any refs of v in that block> 1581 // return; 1582 1583 //filterrd(tmp,dfo[i].Binrd,v); 1584 listrds(dfo[i].Binrd,n.EV.E1,tmp,null); 1585 uint j; 1586 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1587 { 1588 if (go.defnod[j].DNelem == n) 1589 continue; 1590 if (dfo[i].Belem && 1591 refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v 1592 { 1593 vec_free(tmp); 1594 goto L3; 1595 } 1596 break; 1597 } 1598 } // foreach 1599 1600 // <if there are any RDs of v in b.Binrd other than n> 1601 // <if there are any references to v before the 1602 // assignment to v> 1603 // <can't move this assignment> 1604 1605 //filterrd(tmp,b.Binrd,v); 1606 listrds(b.Binrd,n.EV.E1,tmp,null); 1607 uint j; 1608 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1609 { 1610 if (go.defnod[j].DNelem == n) 1611 continue; 1612 if (b.Belem && refs(v,b.Belem,n)) 1613 { 1614 vec_free(tmp); 1615 goto L3; // can't move it 1616 } 1617 break; // avoid redundant looping 1618 } 1619 vec_free(tmp); 1620 1621 // We have an LI assignment, n. 1622 // Check to see if the rvalue is already in the preheader. 1623 foreach (e; l.Llis) 1624 { 1625 if (el_match(n.EV.E2, e.EV.E2)) 1626 { 1627 el_free(n.EV.E2); 1628 n.EV.E2 = el_calloc(); 1629 el_copy(n.EV.E2, e.EV.E1); 1630 if (debugc) printf("LI assignment rvalue was replaced\n"); 1631 doflow = true; 1632 go.changes++; 1633 break; 1634 } 1635 } 1636 1637 // move assignment elem to preheader 1638 if (debugc) printf("Moved LI assignment "); 1639 1640 debug 1641 if (debugc) 1642 { 1643 WReqn(n); 1644 printf(";\n"); 1645 } 1646 1647 go.changes++; 1648 doflow = true; // redo flow analysis 1649 ne = el_calloc(); 1650 el_copy(ne,n); // create assignment elem 1651 assert(l.Lpreheader); // make sure there is one 1652 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader 1653 l.Llis.push(ne); 1654 1655 el_copy(n,ne.EV.E1); // replace n with just a reference to v 1656 goto Lret; 1657 } // if 1658 break; 1659 1660 case OPcall: 1661 case OPucall: 1662 pdomexit |= 2; 1663 break; 1664 1665 case OPpair: 1666 case OPrpair: // don't move these, as they do not do computation 1667 movelis(n.EV.E1,b,l,pdomexit); 1668 n = n.EV.E2; 1669 goto Lnextlis; 1670 1671 default: 1672 break; 1673 } 1674 1675 L3: 1676 // Do leaves of non-LI expressions, leaves of = elems that didn't 1677 // meet the invariant assignment removal criteria, and don't do leaves 1678 if (OTleaf(op)) 1679 goto Lret; 1680 if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || 1681 // These are usually addressing modes, so moving them is a net loss 1682 (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL) 1683 ) 1684 { 1685 if (OTassign(op)) 1686 { 1687 elem *n1 = n.EV.E1; 1688 elem *n11; 1689 1690 if (OTbinary(op)) 1691 movelis(n.EV.E2,b,l,pdomexit); 1692 1693 // Do lvalue only if it is an expression 1694 if (n1.Eoper == OPvar) 1695 goto Lret; 1696 n11 = n1.EV.E1; 1697 if (OTbinary(n1.Eoper)) 1698 { 1699 movelis(n11,b,l,pdomexit); 1700 n = n1.EV.E2; 1701 } 1702 // If *(x + c), just make x the LI, not the (x + c). 1703 // The +c comes free with the addressing mode. 1704 else if (n1.Eoper == OPind && 1705 isLI(n11) && 1706 n11.Eoper == OPadd && 1707 n11.EV.E2.Eoper == OPconst 1708 ) 1709 { 1710 n = n11.EV.E1; 1711 } 1712 else 1713 n = n11; 1714 movelis(n,b,l,pdomexit); 1715 if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym))) 1716 { 1717 //printf("assign to global => domexit |= 2\n"); 1718 pdomexit |= 2; 1719 } 1720 } 1721 else if (OTunary(op)) 1722 { 1723 elem *e1 = n.EV.E1; 1724 1725 // If *(x + c), just make x the LI, not the (x + c). 1726 // The +c comes free with the addressing mode. 1727 if (op == OPind && 1728 isLI(e1) && 1729 e1.Eoper == OPadd && 1730 e1.EV.E2.Eoper == OPconst 1731 ) 1732 { 1733 n = e1.EV.E1; 1734 } 1735 else 1736 n = e1; 1737 } 1738 else if (OTbinary(op)) 1739 { 1740 movelis(n.EV.E1,b,l,pdomexit); 1741 n = n.EV.E2; 1742 } 1743 goto Lnextlis; 1744 } 1745 1746 if (el_sideeffect(n)) 1747 goto Lret; 1748 1749 static if (0) 1750 { 1751 printf("*pdomexit = %u\n", pdomexit); 1752 if (pdomexit & 2) 1753 { 1754 // If any indirections, can't LI it 1755 1756 // If this operand has already been indirected, we can let 1757 // it pass. 1758 Symbol *s; 1759 1760 printf("looking at:\n"); 1761 elem_print(n); 1762 s = el_basesym(n.EV.E1); 1763 if (s) 1764 { 1765 foreach (el; l.Llis) 1766 { 1767 el = el.EV.E2; 1768 elem_print(el); 1769 if (el.Eoper == OPind && el_basesym(el.EV.E1) == s) 1770 { 1771 printf(" pass!\n"); 1772 goto Lpass; 1773 } 1774 } 1775 } 1776 printf(" skip!\n"); 1777 goto Lret; 1778 1779 Lpass: 1780 } 1781 } 1782 1783 // Move the LI expression to the preheader 1784 if (debugc) printf("Moved LI expression "); 1785 1786 debug 1787 if (debugc) 1788 { 1789 WReqn(n); 1790 printf(";\n"); 1791 } 1792 1793 // See if it's already been moved 1794 ty = n.Ety; 1795 foreach (el; l.Llis) 1796 { 1797 tym_t ty2; 1798 1799 //printf("existing LI: "); WReqn(el); printf("\n"); 1800 ty2 = el.EV.E2.Ety; 1801 if (tysize(ty) == tysize(ty2)) 1802 { 1803 el.EV.E2.Ety = ty; 1804 if (el_match(n,el.EV.E2)) 1805 { 1806 el.EV.E2.Ety = ty2; 1807 if (!OTleaf(n.Eoper)) 1808 { el_free(n.EV.E1); 1809 if (OTbinary(n.Eoper)) 1810 el_free(n.EV.E2); 1811 } 1812 el_copy(n,el.EV.E1); // make copy of temp 1813 n.Ety = ty; 1814 1815 debug 1816 if (debugc) 1817 { printf("Already moved: LI expression replaced with "); 1818 WReqn(n); 1819 printf("\nPreheader %d expression %p ", 1820 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1821 WReqn(l.Lpreheader.Belem); 1822 printf("\n"); 1823 } 1824 1825 go.changes++; 1826 doflow = true; // redo flow analysis 1827 goto Lret; 1828 } 1829 el.EV.E2.Ety = ty2; 1830 } 1831 } 1832 1833 if (!(pdomexit & 1)) // if only sometimes executed 1834 { 1835 if (debugc) printf(" doesn't dominate exit\n"); 1836 goto Lret; // don't move LI 1837 } 1838 1839 if (tyaggregate(n.Ety)) 1840 goto Lret; 1841 1842 go.changes++; 1843 doflow = true; // redo flow analysis 1844 1845 t = el_alloctmp(n.Ety); /* allocate temporary t */ 1846 1847 debug 1848 { 1849 if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr); 1850 if (debugc) printf("%s\n", tym_str(t.Ety)); 1851 if (debugc) printf("\n"); 1852 } 1853 1854 n2 = el_calloc(); 1855 el_copy(n2,n); /* create copy n2 of n */ 1856 ne = el_bin(OPeq,t.Ety,t,n2); /* create elem t=n2 */ 1857 assert(l.Lpreheader); /* make sure there is one */ 1858 appendelem(ne,&(l.Lpreheader.Belem)); /* append ne to preheader */ 1859 1860 debug 1861 if (debugc) 1862 { 1863 printf("Preheader %d expression %p\n\t", 1864 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1865 WReqn(l.Lpreheader.Belem); 1866 printf("\nLI expression replaced with "); WReqn(t); 1867 printf("\n"); 1868 } 1869 1870 el_copy(n,t); /* replace this elem with t */ 1871 1872 // Remember LI expression in elem list 1873 l.Llis.push(ne); 1874 1875 Lret: 1876 1877 } 1878 1879 /*************************** 1880 * Append elem to existing elem using an OPcomma elem. 1881 * Input: 1882 * n elem to append 1883 * *pn elem to append to 1884 */ 1885 1886 @trusted 1887 private void appendelem(elem *n,elem **pn) 1888 { 1889 assert(n && pn); 1890 if (*pn) /* if this elem exists */ 1891 { 1892 while ((*pn).Eoper == OPcomma) /* while we see OPcomma elems */ 1893 { 1894 (*pn).Ety = n.Ety; 1895 pn = &((*pn).EV.E2); /* cruise down right side */ 1896 } 1897 *pn = el_bin(OPcomma,n.Ety,*pn,n); 1898 } 1899 else 1900 *pn = n; /* else create a elem */ 1901 } 1902 1903 /************** LOOP INDUCTION VARIABLES **********************/ 1904 1905 /**************************** 1906 * Create a new famlist entry. 1907 */ 1908 1909 @trusted 1910 private void newfamlist(famlist* fl, tym_t ty) 1911 { 1912 eve c = void; 1913 memset(&c,0,c.sizeof); 1914 1915 fl.FLty = ty; 1916 switch (tybasic(ty)) 1917 { case TYfloat: 1918 c.Vfloat = 1; 1919 break; 1920 1921 case TYdouble: 1922 case TYdouble_alias: 1923 c.Vdouble = 1; 1924 break; 1925 1926 case TYldouble: 1927 c.Vldouble = 1; 1928 break; 1929 1930 case TYbool: 1931 case TYchar: 1932 case TYschar: 1933 case TYuchar: 1934 c.Vchar = 1; 1935 break; 1936 1937 case TYshort: 1938 case TYushort: 1939 case TYchar16: 1940 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 1941 c.Vshort = 1; 1942 break; 1943 1944 case TYsptr: 1945 case TYcptr: 1946 case TYfptr: 1947 case TYvptr: 1948 case TYnptr: 1949 case TYnullptr: 1950 case TYimmutPtr: 1951 case TYsharePtr: 1952 case TYrestrictPtr: 1953 case TYfgPtr: 1954 ty = TYint; 1955 if (I64) 1956 ty = TYllong; 1957 goto case TYuint; 1958 1959 case TYint: 1960 case TYuint: 1961 c.Vint = 1; 1962 break; 1963 1964 case TYhptr: 1965 ty = TYlong; 1966 goto case TYlong; 1967 1968 case TYlong: 1969 case TYulong: 1970 case TYdchar: 1971 default: 1972 c.Vlong = 1; 1973 break; 1974 } 1975 fl.c1 = el_const(ty,&c); /* c1 = 1 */ 1976 c.Vldouble = 0; 1977 if (typtr(ty)) 1978 { 1979 ty = TYint; 1980 if (tybasic(ty) == TYhptr) 1981 ty = TYlong; 1982 if (I64) 1983 ty = TYllong; 1984 } 1985 fl.c2 = el_const(ty,&c); /* c2 = 0 */ 1986 } 1987 1988 /*************************** 1989 * Remove induction variables from loop l. 1990 * Loop invariant removal should have been done just previously. 1991 */ 1992 1993 @trusted 1994 private void loopiv(ref Loop l) 1995 { 1996 if (debugc) printf("loopiv(%p)\n", &l); 1997 assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0); 1998 elimspec(l, dfo); 1999 if (doflow) 2000 { 2001 flowrd(); /* compute reaching defs */ 2002 flowlv(); /* compute live variables */ 2003 flowae(); // compute available expressions 2004 doflow = false; 2005 } 2006 findbasivs(l); /* find basic induction variables */ 2007 findopeqs(l); // find op= variables 2008 findivfams(l); /* find IV families */ 2009 elimfrivivs(l); /* eliminate less useful family IVs */ 2010 intronvars(l); /* introduce new variables */ 2011 elimbasivs(l); /* eliminate basic IVs */ 2012 if (!addblk) // adding a block changes the Binlv 2013 elimopeqs(l); // eliminate op= variables 2014 2015 foreach (ref iv; l.Livlist) 2016 iv.reset(); 2017 l.Livlist.reset(); // reset for reuse 2018 2019 foreach (ref iv; l.Lopeqlist) 2020 iv.reset(); 2021 l.Lopeqlist.reset(); // reset for reuse 2022 2023 /* Do copy propagation and dead assignment elimination */ 2024 /* upon return to optfunc() */ 2025 } 2026 2027 /************************************* 2028 * Find basic IVs of loop l. 2029 * A basic IV x of loop l is a variable x which has 2030 * exactly one assignment within l of the form: 2031 * x += c or x -= c, where c is either a constant 2032 * or a LI. 2033 * Input: 2034 * go.defnod[] loaded with all the definition elems of the loop 2035 */ 2036 2037 @trusted 2038 private void findbasivs(ref Loop l) 2039 { 2040 vec_t poss,notposs; 2041 elem *n; 2042 bool ambdone; 2043 2044 ambdone = false; 2045 poss = vec_calloc(globsym.length); 2046 notposs = vec_calloc(globsym.length); /* vector of all variables */ 2047 /* (initially all unmarked) */ 2048 2049 /* for each def in go.defnod[] that is within loop l */ 2050 2051 foreach (const i; 0 .. go.defnod.length) 2052 { 2053 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2054 continue; /* def is not in the loop */ 2055 2056 n = go.defnod[i].DNelem; 2057 elem_debug(n); 2058 if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar) 2059 { 2060 Symbol *s; /* if unambiguous def */ 2061 2062 s = n.EV.E1.EV.Vsym; 2063 if (symbol_isintab(s)) 2064 { 2065 SYMIDX v; 2066 2067 v = n.EV.E1.EV.Vsym.Ssymnum; 2068 if ((n.Eoper == OPaddass || n.Eoper == OPminass || 2069 n.Eoper == OPpostinc || n.Eoper == OPpostdec) && 2070 (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c */ 2071 n.EV.E2.Eoper == OPvar && isLI(n.EV.E2))) 2072 { 2073 if (vec_testbit(v,poss)) 2074 /* We've already seen this def elem, */ 2075 /* therefore there is more than one */ 2076 /* def of v within the loop, therefore */ 2077 /* v is not a basic IV. */ 2078 vec_setbit(v,notposs); 2079 else 2080 vec_setbit(v,poss); 2081 } 2082 else /* else mark as not possible */ 2083 vec_setbit(v,notposs); 2084 } 2085 } 2086 else /* else ambiguous def */ 2087 { 2088 /* mark any vars that could be affected by */ 2089 /* this def as not possible */ 2090 2091 if (!ambdone) /* avoid redundant loops */ 2092 { 2093 foreach (j; 0 .. globsym.length) 2094 { 2095 if (!(globsym[j].Sflags & SFLunambig)) 2096 vec_setbit(j,notposs); 2097 } 2098 ambdone = true; 2099 } 2100 } 2101 } 2102 static if (0) 2103 { 2104 printf("poss "); vec_println(poss); 2105 printf("notposs "); vec_println(notposs); 2106 } 2107 vec_subass(poss,notposs); /* poss = poss - notposs */ 2108 2109 /* create list of IVs */ 2110 uint i; 2111 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each basic IV 2112 { 2113 Symbol *s; 2114 2115 /* Skip if we don't want it to be a basic IV (see funcprev()) */ 2116 s = globsym[i]; 2117 assert(symbol_isintab(s)); 2118 if (s.Sflags & SFLnotbasiciv) 2119 continue; 2120 2121 // Do not use aggregates as basic IVs. This is because the other loop 2122 // code doesn't check offsets into symbols, (assuming everything 2123 // is at offset 0). We could perhaps amend this by allowing basic IVs 2124 // if the struct consists of only one data member. 2125 if (tyaggregate(s.ty())) 2126 continue; 2127 2128 // Do not use complex types as basic IVs, as the code gen isn't up to it 2129 if (tycomplex(s.ty())) 2130 continue; 2131 2132 auto biv = l.Livlist.push(); 2133 biv.IVbasic = s; // symbol of basic IV 2134 biv.IVincr = null; 2135 2136 if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i); 2137 2138 /* We have the sym idx of the basic IV. We need to find */ 2139 /* the parent of the increment elem for it. */ 2140 2141 /* First find the go.defnod[] */ 2142 foreach (j; 0 .. go.defnod.length) 2143 { 2144 /* If go.defnod is a def of i and it is in the loop */ 2145 if (go.defnod[j].DNelem.EV.E1 && /* OPasm are def nodes */ 2146 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2147 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2148 { 2149 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2150 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2151 2152 debug if (debugc) 2153 { 2154 printf("Increment elem is: "); WReqn(*biv.IVincr); printf("\n"); 2155 } 2156 goto L1; 2157 } 2158 } 2159 assert(0); /* should have found it */ 2160 /* NOTREACHED */ 2161 2162 L1: 2163 } 2164 2165 vec_free(poss); 2166 vec_free(notposs); 2167 } 2168 2169 /************************************* 2170 * Find op= elems of loop l. 2171 * Analogous to findbasivs(). 2172 * Used to eliminate useless loop code normally found in benchmark programs. 2173 * Input: 2174 * go.defnod[] loaded with all the definition elems of the loop 2175 */ 2176 2177 @trusted 2178 private void findopeqs(ref Loop l) 2179 { 2180 vec_t poss,notposs; 2181 elem *n; 2182 bool ambdone; 2183 2184 ambdone = false; 2185 poss = vec_calloc(globsym.length); 2186 notposs = vec_calloc(globsym.length); // vector of all variables 2187 // (initially all unmarked) 2188 2189 // for each def in go.defnod[] that is within loop l 2190 2191 foreach (i; 0 .. go.defnod.length) 2192 { 2193 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2194 continue; // def is not in the loop 2195 2196 n = go.defnod[i].DNelem; 2197 elem_debug(n); 2198 if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar) 2199 { 2200 Symbol *s; // if unambiguous def 2201 2202 s = n.EV.E1.EV.Vsym; 2203 if (symbol_isintab(s)) 2204 { 2205 SYMIDX v; 2206 2207 v = n.EV.E1.EV.Vsym.Ssymnum; 2208 { 2209 if (vec_testbit(v,poss)) 2210 // We've already seen this def elem, 2211 // therefore there is more than one 2212 // def of v within the loop, therefore 2213 // v is not a basic IV. 2214 vec_setbit(v,notposs); 2215 else 2216 vec_setbit(v,poss); 2217 } 2218 } 2219 } 2220 else // else ambiguous def 2221 { 2222 // mark any vars that could be affected by 2223 // this def as not possible 2224 2225 if (!ambdone) // avoid redundant loops 2226 { 2227 foreach (j; 0 .. globsym.length) 2228 { 2229 if (!(globsym[j].Sflags & SFLunambig)) 2230 vec_setbit(j,notposs); 2231 } 2232 ambdone = true; 2233 } 2234 } 2235 } 2236 2237 // Don't use symbols already in Livlist 2238 foreach (ref iv; l.Livlist) 2239 { 2240 vec_setbit(iv.IVbasic.Ssymnum,notposs); 2241 } 2242 2243 2244 static if (0) 2245 { 2246 printf("poss "); vec_println(poss); 2247 printf("notposs "); vec_println(notposs); 2248 } 2249 2250 vec_subass(poss,notposs); // poss = poss - notposs 2251 2252 // create list of IVs 2253 uint i; 2254 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each opeq IV 2255 { 2256 Symbol *s; 2257 2258 s = globsym[i]; 2259 assert(symbol_isintab(s)); 2260 2261 // Do not use aggregates as basic IVs. This is because the other loop 2262 // code doesn't check offsets into symbols, (assuming everything 2263 // is at offset 0). We could perhaps amend this by allowing basic IVs 2264 // if the struct consists of only one data member. 2265 if (tyaggregate(s.ty())) 2266 continue; 2267 2268 auto biv = l.Lopeqlist.push(); 2269 biv.IVbasic = s; // symbol of basic IV 2270 biv.IVincr = null; 2271 2272 if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i); 2273 2274 // We have the sym idx of the basic IV. We need to find 2275 // the parent of the increment elem for it. 2276 2277 // First find the go.defnod[] 2278 foreach (j; 0 .. go.defnod.length) 2279 { 2280 // If go.defnod is a def of i and it is in the loop 2281 if (go.defnod[j].DNelem.EV.E1 && // OPasm are def nodes 2282 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2283 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2284 { 2285 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2286 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2287 2288 debug if (debugc) 2289 { 2290 printf("Opeq elem is: "); WReqn(*biv.IVincr); printf("\n"); 2291 } 2292 goto Lcont; 2293 } 2294 } 2295 assert(0); // should have found it 2296 // NOTREACHED 2297 2298 Lcont: 2299 } 2300 2301 vec_free(poss); 2302 vec_free(notposs); 2303 } 2304 2305 /***************************** 2306 * Find families for each basic IV. 2307 * An IV family is a list of elems of the form 2308 * c1*X+c2, where X is a basic induction variable. 2309 * Note that we do not do divides, because of roundoff error problems. 2310 */ 2311 2312 @trusted 2313 private void findivfams(ref Loop l) 2314 { 2315 if (debugc) printf("findivfams(%p)\n", &l); 2316 foreach (ref biv; l.Livlist) 2317 { 2318 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 2319 if (dfo[i].Belem) 2320 ivfamelems(&biv,&(dfo[i].Belem)); 2321 /* Fold all the constant expressions in c1 and c2. */ 2322 foreach (ref fl; biv.IVfamily) 2323 { 2324 fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain); 2325 fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain); 2326 } 2327 } 2328 } 2329 2330 /************************* 2331 * Tree walking support routine for findivfams(). 2332 * biv = basic induction variable pointer 2333 * pn pointer to elem 2334 */ 2335 2336 @trusted 2337 private void ivfamelems(Iv *biv,elem **pn) 2338 { 2339 tym_t ty,c2ty; 2340 elem *n1; 2341 elem *n2; 2342 2343 assert(pn); 2344 elem* n = *pn; 2345 assert(biv && n); 2346 const op = n.Eoper; 2347 if (OTunary(op)) 2348 { 2349 ivfamelems(biv,&n.EV.E1); 2350 n1 = n.EV.E1; 2351 n2 = null; 2352 } 2353 else if (OTbinary(op)) 2354 { 2355 ivfamelems(biv,&n.EV.E1); 2356 ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */ 2357 n1 = n.EV.E1; 2358 n2 = n.EV.E2; 2359 } 2360 else /* else leaf elem */ 2361 return; /* which can't be in the family */ 2362 2363 if (tycomplex(n.Ety)) 2364 return; 2365 2366 if (op == OPmul || op == OPadd || op == OPmin || 2367 op == OPneg || op == OPshl) 2368 { /* Note that we are wimping out and not considering */ 2369 /* LI variables as part of c1 and c2, but only constants. */ 2370 2371 ty = n.Ety; 2372 2373 /* Since trees are canonicalized, basic induction variables */ 2374 /* will only appear on the left. */ 2375 2376 /* Improvement: */ 2377 /* We wish to pick up the cases (biv + li), (biv - li) and */ 2378 /* (li + biv). OPmul and LS with bivs are out, since if we */ 2379 /* try to eliminate the biv, and the loop test is a >, >=, */ 2380 /* <, <=, we have a problem since we don't know if the li */ 2381 /* is negative. (Would have to call swaprel() on it.) */ 2382 2383 /* If we have (li + var), swap the leaves. */ 2384 if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar) 2385 { 2386 n.EV.E1 = n2; 2387 n2 = n.EV.E2 = n1; 2388 n1 = n.EV.E1; 2389 } 2390 2391 // Get rid of case where we painted a far pointer to a long 2392 if (op == OPadd || op == OPmin) 2393 { 2394 int sz; 2395 2396 sz = tysize(ty); 2397 if (sz == tysize(TYfptr) && !tyfv(ty) && 2398 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety))) 2399 return; 2400 } 2401 2402 /* Look for function of basic IV (-biv or biv op const) */ 2403 if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic) 2404 { 2405 if (op == OPneg) 2406 { 2407 if (debugc) printf("found (-biv), elem %p\n",n); 2408 auto fl = biv.IVfamily.push(); 2409 newfamlist(fl, ty); 2410 fl.FLivty = n1.Ety; 2411 fl.FLpelem = pn; 2412 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1 */ 2413 } 2414 else if (n2.Eoper == OPconst || 2415 isLI(n2) && (op == OPadd || op == OPmin)) 2416 { 2417 debug 2418 if (debugc) 2419 { printf("found (biv op const), elem ("); 2420 WReqn(n); 2421 printf(");\n"); 2422 printf("Types: n1=%s", tym_str(n1.Ety)); 2423 printf(" ty=%s", tym_str(ty)); 2424 printf(" n2=%s\n", tym_str(n2.Ety)); 2425 } 2426 2427 auto fl = biv.IVfamily.push(); 2428 newfamlist(fl, ty); 2429 fl.FLivty = n1.Ety; 2430 fl.FLpelem = pn; 2431 switch (op) 2432 { 2433 case OPadd: /* c2 = right */ 2434 c2ty = n2.Ety; 2435 if (typtr(fl.c2.Ety)) 2436 c2ty = fl.c2.Ety; 2437 goto L1; 2438 2439 case OPmin: /* c2 = -right */ 2440 c2ty = fl.c2.Ety; 2441 /* Check for subtracting two pointers */ 2442 if (typtr(c2ty) && typtr(n2.Ety)) 2443 { 2444 if (tybasic(c2ty) == TYhptr) 2445 c2ty = TYlong; 2446 else 2447 c2ty = I64 ? TYllong : TYint; 2448 } 2449 L1: 2450 fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2)); 2451 break; 2452 2453 case OPmul: /* c1 = right */ 2454 case OPshl: /* c1 = 1 << right */ 2455 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2456 break; 2457 2458 default: 2459 assert(0); 2460 } 2461 } 2462 } 2463 2464 /* Look for function of existing IV */ 2465 2466 foreach (ref fl; biv.IVfamily) 2467 { 2468 if (*fl.FLpelem != n1) /* not it */ 2469 continue; 2470 2471 /* Look for (fl op constant) */ 2472 if (op == OPneg) 2473 { 2474 if (debugc) printf("found (-fl), elem %p\n",n); 2475 /* c1 = -c1; c2 = -c2; */ 2476 fl.c1 = el_una(OPneg,ty,fl.c1); 2477 fl.c2 = el_una(OPneg,ty,fl.c2); 2478 fl.FLty = ty; 2479 fl.FLpelem = pn; /* replace with new IV */ 2480 } 2481 else if (n2.Eoper == OPconst || 2482 isLI(n2) && (op == OPadd || op == OPmin)) 2483 { 2484 debug 2485 if (debugc) 2486 { 2487 printf("found (fl op const), elem ("); 2488 WReqn(n); 2489 assert(*pn == n); 2490 printf(");\n"); 2491 elem_print(n); 2492 } 2493 2494 switch (op) 2495 { 2496 case OPmul: 2497 case OPshl: 2498 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2499 break; 2500 2501 case OPadd: 2502 case OPmin: 2503 break; 2504 2505 default: 2506 assert(0); 2507 } 2508 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2)); 2509 fl.FLty = ty; 2510 fl.FLpelem = pn; /* replace with new IV */ 2511 } /* else if */ 2512 } /* for */ 2513 } /* if */ 2514 } 2515 2516 /********************************* 2517 * Eliminate frivolous family ivs, that is, 2518 * if we can't eliminate the BIV, then eliminate family ivs that 2519 * differ from it only by a constant. 2520 */ 2521 2522 @trusted 2523 private void elimfrivivs(ref Loop l) 2524 { 2525 foreach (ref biv; l.Livlist) 2526 { 2527 int nrefs; 2528 2529 if (debugc) printf("elimfrivivs()\n"); 2530 /* Compute number of family ivs for biv */ 2531 const nfams = biv.IVfamily.length; 2532 if (debugc) printf("nfams = %d\n", cast(int)nfams); 2533 2534 /* Compute number of references to biv */ 2535 if (onlyref(biv.IVbasic,l,*biv.IVincr,nrefs)) 2536 nrefs--; 2537 if (debugc) printf("nrefs = %d\n",nrefs); 2538 assert(nrefs + 1 >= nfams); 2539 if (nrefs > nfams || // if we won't eliminate the biv 2540 (!I16 && nrefs == nfams)) 2541 { /* Eliminate any family ivs that only differ by a constant */ 2542 /* from biv */ 2543 foreach (ref fl; biv.IVfamily) 2544 { 2545 elem *ec1 = fl.c1; 2546 targ_llong c; 2547 2548 if (elemisone(ec1) || 2549 // Eliminate fl's that can be represented by 2550 // an addressing mode 2551 (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) && 2552 ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) 2553 ) 2554 ) 2555 { 2556 fl.FLtemp = FLELIM; 2557 2558 debug 2559 if (debugc) 2560 { 2561 printf("Eliminating frivolous IV "); 2562 WReqn(*fl.FLpelem); 2563 printf("\n"); 2564 } 2565 } 2566 } 2567 } 2568 } 2569 } 2570 2571 2572 /****************************** 2573 * Introduce new variables. 2574 */ 2575 2576 @trusted 2577 private void intronvars(ref Loop l) 2578 { 2579 elem *T; 2580 elem *ne; 2581 elem *t2; 2582 elem *C2; 2583 elem *cmul; 2584 tym_t ty,tyr; 2585 2586 if (debugc) printf("intronvars(%p)\n", &l); 2587 foreach (ref biv; l.Livlist) 2588 { 2589 elem *bivinc = *biv.IVincr; /* ptr to increment elem */ 2590 2591 foreach (ref fl; biv.IVfamily) 2592 { /* for each IV in family of biv */ 2593 if (fl.FLtemp == FLELIM) /* if already eliminated */ 2594 continue; 2595 2596 /* If induction variable can be written as a simple function */ 2597 /* of a previous induction variable, skip it. */ 2598 if (funcprev(biv,fl)) 2599 continue; 2600 2601 ty = fl.FLty; 2602 T = el_alloctmp(ty); /* allocate temporary T */ 2603 fl.FLtemp = T.EV.Vsym; 2604 2605 debug 2606 { 2607 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr); 2608 if (debugc) printf("%s\n", tym_str(ty)); 2609 if (debugc) printf("\n"); 2610 } 2611 2612 /* append elem T=biv*C1+C2 to preheader */ 2613 /* ne = biv*C1 */ 2614 tyr = fl.FLivty; /* type of biv */ 2615 ne = el_var(biv.IVbasic); 2616 ne.Ety = tyr; 2617 if (!elemisone(fl.c1)) /* don't multiply ptrs by 1 */ 2618 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1)); 2619 if (tyfv(tyr) && tysize(ty) == SHORTSIZE) 2620 ne = el_una(OP32_16,ty,ne); 2621 C2 = el_copytree(fl.c2); 2622 t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ 2623 ne = el_bin(OPeq,ty,el_copytree(T),t2); 2624 appendelem(ne, &(l.Lpreheader.Belem)); 2625 2626 /* prefix T+=C1*C to elem biv+=C */ 2627 /* Must prefix in case the value of the expression (biv+=C) */ 2628 /* is used by somebody up the tree. */ 2629 cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1), 2630 el_copytree(bivinc.EV.E2)); 2631 t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul); 2632 t2 = doptelem(t2,GOALvalue | GOALagain); 2633 *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc); 2634 biv.IVincr = &((*biv.IVincr).EV.E2); 2635 2636 debug 2637 if (debugc) 2638 { 2639 printf("Replacing elem ("); 2640 WReqn(*fl.FLpelem); 2641 printf(") with '%s'\n",T.EV.Vsym.Sident.ptr); 2642 printf("The init elem is ("); 2643 WReqn(ne); 2644 printf(");\nThe increment elem is ("); 2645 WReqn(t2); 2646 printf(")\n"); 2647 } 2648 2649 el_free(*fl.FLpelem); 2650 *fl.FLpelem = T; /* replace elem n with ref to T */ 2651 doflow = true; /* redo flow analysis */ 2652 go.changes++; 2653 } /* for */ 2654 } /* for */ 2655 } 2656 2657 /******************************* 2658 * Determine if induction variable can be rewritten as a simple 2659 * function of a previously generated temporary. 2660 * This can frequently 2661 * generate less code than that of an all-new temporary (especially 2662 * if it is the same as a previous temporary!). 2663 * Input: 2664 * biv Basic induction variable 2665 * fl Item in biv's family list we are looking at 2666 * Returns: 2667 * false Caller should create a new induction variable. 2668 * true *FLpelem is replaced with function of a previous 2669 * induction variable. FLtemp is set to FLELIM to 2670 * indicate this. 2671 */ 2672 2673 @trusted 2674 private bool funcprev(ref Iv biv, ref famlist fl) 2675 { 2676 tym_t tymin; 2677 int sz; 2678 elem *e1; 2679 elem *e2; 2680 elem *flse1; 2681 2682 debug if (debugc) 2683 printf("funcprev\n"); 2684 2685 foreach (ref fls; biv.IVfamily) 2686 { 2687 if (!fls.FLtemp) // haven't generated a temporary yet 2688 continue; 2689 if (fls.FLtemp == FLELIM) /* no iv we can use here */ 2690 continue; 2691 2692 /* The multipliers must match */ 2693 if (!el_match(fls.c1,fl.c1)) 2694 continue; 2695 2696 /* If the c2's match also, we got it easy */ 2697 if (el_match(fls.c2,fl.c2)) 2698 { 2699 if (tysize(fl.FLty) > tysize(fls.FLtemp.ty())) 2700 continue; /* can't increase size of var */ 2701 flse1 = el_var(fls.FLtemp); 2702 flse1.Ety = fl.FLty; 2703 goto L2; 2704 } 2705 2706 /* The difference is only in the addition. Therefore, replace 2707 *fl.FLpelem with: 2708 case 1: (fl.c2 + (fls.FLtemp - fls.c2)) 2709 case 2: (fls.FLtemp + (fl.c2 - fls.c2)) 2710 */ 2711 e1 = fl.c2; 2712 /* Subtracting relocatables usually generates slow code for */ 2713 /* linkers that can't handle arithmetic on relocatables. */ 2714 if (typtr(fls.c2.Ety)) 2715 { 2716 if (fls.c2.Eoper == OPrelconst && 2717 !(fl.c2.Eoper == OPrelconst && 2718 fl.c2.EV.Vsym == fls.c2.EV.Vsym) 2719 ) 2720 continue; 2721 } 2722 flse1 = el_var(fls.FLtemp); 2723 e2 = flse1; /* assume case 1 */ 2724 tymin = e2.Ety; 2725 if (typtr(fls.c2.Ety)) 2726 { 2727 if (!typtr(tymin)) 2728 { 2729 if (typtr(e1.Ety)) 2730 { 2731 e1 = e2; 2732 e2 = fl.c2; /* case 2 */ 2733 } 2734 else /* can't subtract fptr */ 2735 goto L1; 2736 } 2737 if (tybasic(fls.c2.Ety) == TYhptr) 2738 tymin = TYlong; 2739 else 2740 tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ 2741 } 2742 2743 /* If e1 and fls.c2 are fptrs, and are not from the same */ 2744 /* segment, we cannot subtract them. */ 2745 if (tyfv(e1.Ety) && tyfv(fls.c2.Ety)) 2746 { 2747 if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst) 2748 goto L1; /* assume expressions have diff segs */ 2749 if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass) 2750 { 2751 L1: 2752 el_free(flse1); 2753 continue; 2754 } 2755 } 2756 2757 /* Some more type checking... */ 2758 sz = tysize(fl.FLty); 2759 if (sz != tysize(e1.Ety) && 2760 sz != tysize(tymin)) 2761 goto L1; 2762 2763 /* Do some type checking (can't add pointers and get a pointer!) */ 2764 //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin)) 2765 //goto L1; 2766 /* Construct (e1 + (e2 - fls.c2)) */ 2767 flse1 = el_bin(OPadd,fl.FLty, 2768 e1, 2769 el_bin(OPmin,tymin, 2770 e2, 2771 el_copytree(fls.c2))); 2772 if (sz < tysize(tymin) && sz == tysize(e1.Ety)) 2773 { 2774 assert(I16); 2775 flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2); 2776 } 2777 2778 flse1 = doptelem(flse1,GOALvalue | GOALagain); 2779 fl.c2 = null; 2780 L2: 2781 debug if (debugc) 2782 { 2783 printf("Replacing "); 2784 WReqn(*fl.FLpelem); 2785 printf(" with "); 2786 WReqn(flse1); 2787 printf("\n"); 2788 } 2789 2790 el_free(*fl.FLpelem); 2791 *fl.FLpelem = flse1; 2792 2793 /* Fix the iv so when we do loops again, we won't create */ 2794 /* yet another iv, which is just what funcprev() is supposed */ 2795 /* to prevent. */ 2796 fls.FLtemp.Sflags |= SFLnotbasiciv; 2797 2798 fl.FLtemp = FLELIM; /* mark iv as being gone */ 2799 go.changes++; 2800 doflow = true; 2801 return true; /* it was replaced */ 2802 } 2803 return false; /* need to create a new variable */ 2804 } 2805 2806 /*********************** 2807 * Eliminate basic IVs. 2808 */ 2809 2810 @trusted 2811 private void elimbasivs(ref Loop l) 2812 { 2813 if (debugc) printf("elimbasivs(%p)\n", &l); 2814 foreach (ref biv; l.Livlist) 2815 { 2816 /* Can't eliminate this basic IV if we have a goal for the */ 2817 /* increment elem. */ 2818 // Be careful about Nflags being in a union... 2819 elem* einc = *biv.IVincr; 2820 if (!(einc.Nflags & NFLnogoal)) 2821 continue; 2822 2823 Symbol* X = biv.IVbasic; 2824 assert(symbol_isintab(X)); 2825 tym_t ty = X.ty(); 2826 int refcount; 2827 elem** pref = onlyref(X,l,einc,refcount); 2828 2829 /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ 2830 if (pref != null && refcount <= 1) 2831 { 2832 if (!biv.IVfamily.length) 2833 continue; 2834 2835 if (catchRef(X, l)) 2836 continue; 2837 2838 elem* ref_ = *pref; 2839 2840 /* Replace (X) with (X != 0) */ 2841 if (ref_.Eoper == OPvar) 2842 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L)); 2843 2844 const fi = simfl(biv.IVfamily, ty); // find simplest elem in family 2845 if (fi == biv.IVfamily.length) 2846 continue; 2847 famlist* fl = &biv.IVfamily[fi]; 2848 2849 // Don't do the replacement if we would replace a 2850 // signed comparison with an unsigned one 2851 tym_t flty = fl.FLty; 2852 if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) 2853 flty = touns(flty); 2854 2855 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && 2856 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) && 2857 tyuns(flty)) 2858 continue; 2859 2860 /* if we have (e relop X), replace it with (X relop e) */ 2861 if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X) 2862 { 2863 elem* tmp = ref_.EV.E2; 2864 ref_.EV.E2 = ref_.EV.E1; 2865 ref_.EV.E1 = tmp; 2866 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper); 2867 } 2868 2869 // If e*c1+c2 would result in a sign change or an overflow 2870 // then we can't do it 2871 if (fl.c1.Eoper == OPconst) 2872 { 2873 targ_llong c1 = el_tolong(fl.c1); 2874 const int sz = tysize(ty); 2875 if (sz == SHORTSIZE && 2876 ((ref_.EV.E2.Eoper == OPconst && 2877 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) || 2878 c1 & ~0x7FFFL) 2879 ) 2880 continue; 2881 2882 if (sz == LONGSIZE && 2883 ((ref_.EV.E2.Eoper == OPconst && 2884 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) || 2885 c1 & ~0x7FFFFFFFL) 2886 ) 2887 continue; 2888 if (sz == LLONGSIZE && 2889 ((ref_.EV.E2.Eoper == OPconst && 2890 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) || 2891 c1 & ~0x7FFFFFFFFFFFFFFFL) 2892 ) 2893 continue; 2894 } 2895 2896 /* If the incr is a decrement, and the relational is < or <=, 2897 * and its unsigned, then don't do it because it could drop below 0. 2898 * https://issues.dlang.org/show_bug.cgi?id=16189 2899 */ 2900 if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) && 2901 (ref_.Eoper == OPlt || ref_.Eoper == OPle) && 2902 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))) 2903 continue; 2904 2905 /* If loop started out with a signed conditional that was 2906 * replaced with an unsigned one, don't do it if c2 2907 * is less than 0. 2908 */ 2909 if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst) 2910 { 2911 targ_llong c2 = el_tolong(fl.c2); 2912 if (c2 < 0) 2913 continue; 2914 } 2915 2916 elem *refE2 = el_copytree(ref_.EV.E2); 2917 int refEoper = ref_.Eoper; 2918 2919 /* if c1 < 0 and relop is < <= > >= 2920 then adjust relop as if both sides were multiplied 2921 by -1 2922 */ 2923 if (!tyuns(ty) && 2924 (tyintegral(ty) && el_tolong(fl.c1) < 0 || 2925 tyfloating(ty) && el_toldoubled(fl.c1) < 0.0)) 2926 refEoper = swaprel(refEoper); 2927 2928 /* Replace (X relop e) with (X relop (short)e) 2929 if T is 1 word but e is 2 2930 */ 2931 if (tysize(flty) == SHORTSIZE && 2932 tysize(refE2.Ety) == LONGSIZE) 2933 refE2 = el_una(OP32_16,flty,refE2); 2934 2935 /* replace e with e*c1 + c2 */ 2936 elem* C2 = el_copytree(fl.c2); 2937 elem* fofe = el_bin(OPadd,flty, 2938 el_bin(OPmul,refE2.Ety, 2939 refE2, 2940 el_copytree(fl.c1)), 2941 C2); 2942 fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants 2943 2944 if (tyuns(flty) && refEoper == OPge && 2945 fofe.Eoper == OPconst && el_allbits(fofe, 0) && 2946 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0)) 2947 { 2948 /* Don't do it if replacement will result in 2949 * an unsigned T>=0 which will be an infinite loop. 2950 */ 2951 el_free(fofe); 2952 continue; 2953 } 2954 2955 if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr); 2956 2957 debug if (debugc) 2958 { 2959 printf("Comparison replaced: "); 2960 WReqn(ref_); 2961 printf(" with "); 2962 } 2963 2964 el_free(ref_.EV.E2); 2965 ref_.EV.E2 = refE2; 2966 ref_.Eoper = cast(ubyte)refEoper; 2967 2968 elimass(einc); // dump the increment elem 2969 2970 // replace X with T 2971 assert(ref_.EV.E1.EV.Voffset == 0); 2972 ref_.EV.E1.EV.Vsym = fl.FLtemp; 2973 ref_.EV.E1.Ety = flty; 2974 ref_.EV.E2 = fofe; 2975 2976 /* If sizes of expression worked out wrong... 2977 Which can happen if we have (int)ptr==e 2978 */ 2979 if (OTbinary(fofe.Eoper)) // if didn't optimize it away 2980 { 2981 const tym_t fofety = fofe.Ety; 2982 const int sz = tysize(fofety); 2983 tym_t ty1 = fofe.EV.E1.Ety; 2984 const tym_t ty2 = fofe.EV.E2.Ety; 2985 /* Sizes of + expression must all be the same */ 2986 if (sz != tysize(ty1) && 2987 sz != tysize(ty2) 2988 ) 2989 { 2990 if (tyuns(fofety)) // if unsigned comparison 2991 ty1 = touns(ty1); /* to unsigned type */ 2992 fofe.Ety = ty1; 2993 ref_.EV.E1.Ety = ty1; 2994 } 2995 } 2996 2997 /* Fix if leaves of compare are TYfptrs and the compare */ 2998 /* operator is < <= > >=. */ 2999 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety)) 3000 { 3001 assert(tyfv(ref_.EV.E2.Ety)); 3002 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1); 3003 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe); 3004 } 3005 3006 debug if (debugc) 3007 { 3008 WReqn(ref_); 3009 printf("\n"); 3010 } 3011 3012 go.changes++; 3013 doflow = true; /* redo flow analysis */ 3014 3015 /* if X is live on entry to any successor S outside loop */ 3016 /* prepend elem X=(T-c2)/c1 to S.Belem */ 3017 3018 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3019 { 3020 elem *ne; 3021 block *b; 3022 3023 foreach (bl; ListRange(dfo[i].Bsucc)) 3024 { /* for each successor */ 3025 b = list_block(bl); 3026 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3027 continue; /* inside loop */ 3028 if (!vec_testbit(X.Ssymnum,b.Binlv)) 3029 continue; /* not live */ 3030 3031 C2 = el_copytree(fl.c2); 3032 ne = el_bin(OPmin,ty, 3033 el_var(fl.FLtemp), 3034 C2); 3035 if (tybasic(ne.EV.E1.Ety) == TYfptr && 3036 tybasic(ne.EV.E2.Ety) == TYfptr) 3037 { 3038 ne.Ety = I64 ? TYllong : TYint; 3039 if (tylong(ty) && _tysize[TYint] == 2) 3040 ne = el_una(OPs16_32,ty,ne); 3041 } 3042 3043 ne = el_bin(OPeq,X.ty(), 3044 el_var(X), 3045 el_bin(OPdiv,ne.Ety, 3046 ne, 3047 el_copytree(fl.c1))); 3048 3049 debug if (debugc) 3050 { 3051 printf("Adding ("); 3052 WReqn(ne); 3053 printf(") to exit block B%d\n",b.Bdfoidx); 3054 //elem_print(ne); 3055 } 3056 3057 /* We have to add a new block if there is */ 3058 /* more than one predecessor to b. */ 3059 if (list_next(b.Bpred)) 3060 { 3061 block *bn = block_calloc(); 3062 bn.Btry = b.Btry; 3063 bn.BC = BCgoto; 3064 bn.Bnext = dfo[i].Bnext; 3065 dfo[i].Bnext = bn; 3066 list_append(&(bn.Bsucc),b); 3067 list_append(&(bn.Bpred),dfo[i]); 3068 bl.ptr = cast(void *)bn; 3069 foreach (bl2; ListRange(b.Bpred)) 3070 if (list_block(bl2) == dfo[i]) 3071 { 3072 bl2.ptr = cast(void *)bn; 3073 goto L2; 3074 } 3075 assert(0); 3076 L2: 3077 b = bn; 3078 addblk = true; 3079 } 3080 3081 if (b.Belem) 3082 b.Belem = 3083 el_bin(OPcomma,b.Belem.Ety, 3084 ne,b.Belem); 3085 else 3086 b.Belem = ne; 3087 go.changes++; 3088 doflow = true; /* redo flow analysis */ 3089 } /* for each successor */ 3090 } /* foreach exit block */ 3091 if (addblk) 3092 return; 3093 } 3094 else if (refcount == 0) /* if no uses of IV in loop */ 3095 { 3096 /* Eliminate the basic IV if it is not live on any successor */ 3097 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3098 { 3099 foreach (bl; ListRange(dfo[i].Bsucc)) 3100 { /* for each successor */ 3101 block *b = list_block(bl); 3102 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3103 continue; /* inside loop */ 3104 if (vec_testbit(X.Ssymnum,b.Binlv)) 3105 goto L1; /* live */ 3106 } 3107 } 3108 3109 if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X); 3110 3111 /* Remove the (s op= e2) by replacing it with (1 , e2) 3112 * and let later passes remove the (1 ,) nodes. 3113 * Don't remove those nodes here because other biv's may refer 3114 * to them. 3115 */ 3116 { 3117 elem* ei = *biv.IVincr; 3118 ei.Eoper = OPcomma; 3119 ei.EV.E1.Eoper = OPconst; 3120 ei.EV.E1.Ety = TYint; 3121 } 3122 3123 go.changes++; 3124 doflow = true; /* redo flow analysis */ 3125 L1: 3126 } 3127 } /* for */ 3128 } 3129 3130 /*********************** 3131 * Eliminate opeq IVs that are not used outside the loop. 3132 */ 3133 3134 @trusted 3135 private void elimopeqs(ref Loop l) 3136 { 3137 elem **pref; 3138 Symbol *X; 3139 int refcount; 3140 3141 if (debugc) printf("elimopeqs(%p)\n", &l); 3142 //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr)); 3143 3144 foreach (ref biv; l.Lopeqlist) 3145 { 3146 // Can't eliminate this basic IV if we have a goal for the 3147 // increment elem. 3148 // Be careful about Nflags being in a union... 3149 if (!((*biv.IVincr).Nflags & NFLnogoal)) 3150 continue; 3151 3152 X = biv.IVbasic; 3153 assert(symbol_isintab(X)); 3154 pref = onlyref(X,l,*biv.IVincr,refcount); 3155 3156 // if only ref of X is of the form (X) or (X relop e) or (e relop X) 3157 if (pref != null && refcount <= 1) 3158 { } 3159 else if (refcount == 0) // if no uses of IV in loop 3160 { // Eliminate the basic IV if it is not live on any successor 3161 uint i; 3162 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3163 { 3164 foreach (bl; ListRange(dfo[i].Bsucc)) 3165 { // for each successor 3166 block *b = list_block(bl); 3167 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3168 continue; // inside loop 3169 if (vec_testbit(X.Ssymnum,b.Binlv)) 3170 goto L1; // live 3171 } 3172 } 3173 3174 if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X); 3175 3176 /* Remove the (s op= e2) by replacing it with (1 , e2) 3177 * and let later passes remove the (1 ,) nodes. 3178 * Don't remove those nodes here because other biv's may refer 3179 * to them, for nodes like (sum += i++) 3180 */ 3181 { 3182 elem* einc = *(biv.IVincr); 3183 einc.Eoper = OPcomma; 3184 einc.EV.E1.Eoper = OPconst; 3185 einc.EV.E1.Ety = TYint; 3186 } 3187 3188 go.changes++; 3189 doflow = true; // redo flow analysis 3190 L1: 3191 } 3192 } 3193 } 3194 3195 /************************** 3196 * Find simplest elem in family. 3197 * Params: 3198 * fams = array of famlist's 3199 * tym = type of basic IV 3200 * Returns: index into fams[] of simplest; fams.length if none found. 3201 */ 3202 3203 @trusted 3204 extern (D) 3205 private size_t simfl(famlist[] fams, tym_t tym) 3206 { 3207 size_t sofar = fams.length; 3208 3209 foreach (i, ref fl; fams) 3210 { 3211 if (fl.FLtemp == FLELIM) /* no variable, so skip it */ 3212 continue; 3213 /* If size of replacement is less than size of biv, we could */ 3214 /* be in trouble due to loss of precision. */ 3215 if (size(fl.FLtemp.ty()) < size(tym)) 3216 continue; 3217 3218 // pick simplest 3219 sofar = sofar == fams.length ? i 3220 : (flcmp(fams[sofar], fams[i]) ? sofar : i); 3221 } 3222 return sofar; 3223 } 3224 3225 /************************** 3226 * Return simpler of two family elems. 3227 * There is room for improvement, namely if 3228 * f1.c1 = 2, f2.c1 = 27 3229 * then pick f1 because it is a shift. 3230 * Returns: 3231 * true for f1 is simpler, false for f2 is simpler 3232 */ 3233 3234 @trusted 3235 private bool flcmp(const ref famlist f1, const ref famlist f2) 3236 { 3237 auto t1 = &(f1.c1.EV); 3238 auto t2 = &(f2.c1.EV); 3239 auto ty = (*f1.FLpelem).Ety; // type of elem 3240 3241 static if (0) 3242 { 3243 printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort); 3244 printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort); 3245 printf("%s %s\n", tym_str((*f1.FLpelem).Ety), tym_str((*f2.FLpelem).Ety)); 3246 } 3247 3248 /* Wimp out and just pick f1 if the types don't match */ 3249 if (tysize(ty) == tysize((*f2.FLpelem).Ety)) 3250 { 3251 switch (tybasic(ty)) 3252 { case TYbool: 3253 case TYchar: 3254 case TYschar: 3255 case TYuchar: 3256 if (t2.Vuchar == 1 || 3257 t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0) 3258 goto Lf2; 3259 break; 3260 3261 case TYshort: 3262 case TYushort: 3263 case TYchar16: 3264 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 3265 case_short: 3266 if (t2.Vshort == 1 || 3267 t1.Vshort != 1 && f2.c2.EV.Vshort == 0) 3268 goto Lf2; 3269 break; 3270 3271 case TYsptr: 3272 case TYcptr: 3273 case TYnptr: // BUG: 64 bit pointers? 3274 case TYimmutPtr: 3275 case TYsharePtr: 3276 case TYrestrictPtr: 3277 case TYfgPtr: 3278 case TYnullptr: 3279 case TYint: 3280 case TYuint: 3281 if (_tysize[TYint] == SHORTSIZE) 3282 goto case_short; 3283 else 3284 goto case_long; 3285 3286 case TYlong: 3287 case TYulong: 3288 case TYdchar: 3289 case TYfptr: 3290 case TYvptr: 3291 case TYhptr: 3292 case_long: 3293 if (t2.Vlong == 1 || 3294 t1.Vlong != 1 && f2.c2.EV.Vlong == 0) 3295 goto Lf2; 3296 break; 3297 3298 case TYfloat: 3299 if (t2.Vfloat == 1 || 3300 t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0) 3301 goto Lf2; 3302 break; 3303 3304 case TYdouble: 3305 case TYdouble_alias: 3306 if (t2.Vdouble == 1.0 || 3307 t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0) 3308 goto Lf2; 3309 break; 3310 3311 case TYldouble: 3312 if (t2.Vldouble == 1.0 || 3313 t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0) 3314 goto Lf2; 3315 break; 3316 3317 case TYllong: 3318 case TYullong: 3319 if (t2.Vllong == 1 || 3320 t1.Vllong != 1 && f2.c2.EV.Vllong == 0) 3321 goto Lf2; 3322 break; 3323 3324 default: 3325 assert(0); 3326 } 3327 } 3328 //printf("picking f1\n"); 3329 return true; 3330 3331 Lf2: 3332 //printf("picking f2\n"); 3333 return false; 3334 } 3335 3336 /***************************** 3337 * If loop is in a try block, see if there are references to x in an enclosing 3338 * try block. This is because the loop may throw and transfer control 3339 * outside the try block, and that will count as a use of x. 3340 * Params: 3341 * x = basic induction variable symbol 3342 * l = loop x is in 3343 * Returns: 3344 * true if x is used outside the try block 3345 */ 3346 @trusted 3347 private bool catchRef(Symbol* x, ref Loop l) 3348 { 3349 block* btry = l.Lhead.Btry; 3350 if (!btry) 3351 return false; // not in a try block 3352 3353 foreach (i, b; dfo[]) 3354 { 3355 if (vec_testbit(b.Bdfoidx, l.Lloop)) 3356 continue; 3357 /* this is conservative, just checking if x is used outside the loop. 3358 * A better check would see if the body of the loop throws, and would 3359 * check the enclosing catch/finally blocks and their exit blocks. 3360 * https://issues.dlang.org/show_bug.cgi?22104 3361 */ 3362 if (vec_testbit(x.Ssymnum, b.Binlv)) 3363 return true; 3364 } 3365 3366 return false; 3367 } 3368 3369 /************************************ 3370 * Params: 3371 * x = basic IV symbol 3372 * incn = increment elem for basic IV X. 3373 * refcount = set to # of references to X other than the increment elem 3374 * Returns: 3375 * If ref of X in loop l is of the form (X relop e) or (e relop X) 3376 * Return the relop elem 3377 * Else 3378 * Return null 3379 */ 3380 3381 private __gshared 3382 { 3383 elem **nd; 3384 elem *sincn; 3385 Symbol *X; 3386 } 3387 3388 @trusted 3389 private elem ** onlyref(Symbol *x, ref Loop l,elem *incn, out int refcount) 3390 { 3391 uint i; 3392 3393 //printf("onlyref('%s')\n", x.Sident.ptr); 3394 X = x; /* save some parameter passing */ 3395 assert(symbol_isintab(x)); 3396 sincn = incn; 3397 3398 debug 3399 if (!(X.Ssymnum < globsym.length && incn)) 3400 printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn); 3401 3402 assert(X.Ssymnum < globsym.length && incn); 3403 int count = 0; 3404 nd = null; 3405 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3406 { 3407 block* b = dfo[i]; 3408 if (b.Belem) 3409 { 3410 count += countrefs(&b.Belem,b.BC == BCiftrue); 3411 } 3412 } 3413 3414 static if (0) 3415 { 3416 printf("count = %d, nd = (", count); 3417 if (nd) WReqn(*nd); 3418 printf(")\n"); 3419 } 3420 3421 refcount = count; 3422 return nd; 3423 } 3424 3425 3426 /****************************** 3427 * Count elems of the form (X relop e) or (e relop X). 3428 * Do not count the node if it is the increment node (sincn). 3429 * Params: 3430 * pn = pointer to start of elem tree 3431 * flag = true if block wants to test the elem 3432 * Returns: 3433 * number of elems of the form 3434 */ 3435 3436 @trusted 3437 private int countrefs(elem **pn,bool flag) 3438 { 3439 elem *n = *pn; 3440 3441 assert(n); 3442 if (n == sincn) /* if it is the increment elem */ 3443 { 3444 return OTbinary(n.Eoper) 3445 ? countrefs(&n.EV.E2, false) 3446 : 0; // don't count lvalue 3447 } 3448 if (OTunary(n.Eoper)) 3449 return countrefs(&n.EV.E1,false); 3450 else if (OTbinary(n.Eoper)) 3451 { 3452 if (OTrel(n.Eoper)) 3453 { 3454 elem *e1 = n.EV.E1; 3455 3456 assert(e1.Eoper != OPcomma); 3457 if (e1 == sincn && 3458 (e1.Eoper == OPeq || OTopeq(e1.Eoper))) 3459 goto L1; 3460 3461 /* Check both subtrees to see if n is the comparison node, 3462 * that is, if X is a leaf of the comparison. 3463 */ 3464 if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2, X) || 3465 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1, X)) 3466 nd = pn; /* found the relop node */ 3467 } 3468 L1: 3469 return countrefs(&n.EV.E1,false) + 3470 countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma)); 3471 } 3472 else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X) 3473 { 3474 if (flag) 3475 nd = pn; /* comparing it with 0 */ 3476 return 1; // found another reference 3477 } 3478 return 0; 3479 } 3480 3481 /******************************* 3482 * Count number of times symbol X appears in elem tree e. 3483 */ 3484 3485 @trusted 3486 private int countrefs2(const(elem)* e, const Symbol* s) 3487 { 3488 debug elem_debug(e); 3489 while (OTunary(e.Eoper)) 3490 e = e.EV.E1; 3491 if (OTbinary(e.Eoper)) 3492 return countrefs2(e.EV.E1, s) + countrefs2(e.EV.E2, s); 3493 return ((e.Eoper == OPvar || e.Eoper == OPrelconst) && 3494 e.EV.Vsym == s); 3495 } 3496 3497 /**************************** 3498 * Eliminate some special cases. 3499 */ 3500 3501 @trusted 3502 private 3503 extern(D) void elimspec(const ref Loop loop, block*[] dfo) 3504 { 3505 // Visit each block in loop 3506 for (size_t i = 0; (i = vec_index(i, loop.Lloop)) < dfo.length; ++i) 3507 { 3508 auto b = dfo[i]; 3509 if (b.Belem) 3510 elimspecwalk(&b.Belem); 3511 } 3512 } 3513 3514 3515 /****************************** 3516 */ 3517 3518 @trusted 3519 private void elimspecwalk(elem **pn) 3520 { 3521 elem *n; 3522 3523 n = *pn; 3524 assert(n); 3525 if (OTunary(n.Eoper)) 3526 elimspecwalk(&n.EV.E1); 3527 else if (OTbinary(n.Eoper)) 3528 { 3529 elimspecwalk(&n.EV.E1); 3530 elimspecwalk(&n.EV.E2); 3531 if (OTrel(n.Eoper)) 3532 { 3533 elem *e1 = n.EV.E1; 3534 3535 /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). 3536 * This will reduce the number of cases for elimbasivs(). 3537 * Don't do equivalent with (e1 rel (e2,e3)) because 3538 * of potential side effects in e1. 3539 */ 3540 if (e1.Eoper == OPcomma) 3541 { 3542 elem *e; 3543 3544 debug if (debugc) 3545 { printf("3rewriting ("); WReqn(n); printf(")\n"); } 3546 3547 e = n.EV.E2; 3548 n.EV.E2 = e1; 3549 n.EV.E1 = n.EV.E2.EV.E1; 3550 n.EV.E2.EV.E1 = n.EV.E2.EV.E2; 3551 n.EV.E2.EV.E2 = e; 3552 n.EV.E2.Eoper = n.Eoper; 3553 n.EV.E2.Ety = n.Ety; 3554 n.Eoper = OPcomma; 3555 3556 go.changes++; 3557 doflow = true; 3558 3559 elimspecwalk(&n.EV.E1); 3560 elimspecwalk(&n.EV.E2); 3561 } 3562 3563 /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) 3564 * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) 3565 * so that the op= will not have a goal, so elimbasivs() 3566 * will work on it. 3567 */ 3568 if ((OTopeq(e1.Eoper) 3569 || OTpost(e1.Eoper) 3570 ) && 3571 !el_sideeffect(e1.EV.E1)) 3572 { 3573 elem *e; 3574 OPER op; 3575 3576 debug if (debugc) 3577 { printf("4rewriting ("); WReqn(n); printf(")\n"); } 3578 3579 e = el_calloc(); 3580 el_copy(e,n); 3581 e.EV.E1 = el_copytree(e1.EV.E1); 3582 e.EV.E1.Ety = n.EV.E1.Ety; 3583 n.EV.E2 = e; 3584 switch (e1.Eoper) 3585 { 3586 case OPpostinc: 3587 e1.Eoper = OPaddass; 3588 op = OPmin; 3589 goto L3; 3590 3591 case OPpostdec: 3592 e1.Eoper = OPminass; 3593 op = OPadd; 3594 L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2)); 3595 break; 3596 3597 default: 3598 break; 3599 } 3600 /* increment node is now guaranteed to have no goal */ 3601 e1.Nflags |= NFLnogoal; 3602 n.Eoper = OPcomma; 3603 //go.changes++; 3604 doflow = true; 3605 3606 elimspecwalk(&n.EV.E1); 3607 elimspecwalk(&n.EV.E2); 3608 } 3609 } 3610 } 3611 } 3612 3613 /******** 3614 * Walk e in execution order. 3615 * When eincrement is found, remove it. 3616 * Continue, replacing instances of `v` with `v+increment` 3617 * When second eincrement is found, stop. 3618 * Params: 3619 * e = expression to walk 3620 * defnum = index of eincrement 3621 * v = increment variable 3622 * increment = amount to increment v 3623 * unrolls = number of times loop has been unrolled 3624 */ 3625 3626 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow 3627 { 3628 int state = 0; 3629 3630 /*********************************** 3631 * Walk e in execution order, fixing it according to state. 3632 * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state 3633 * state == 1..unrolls-1: replacing instances of v with v+(state*increment), 3634 * state == unrolls-1: leave eincrement alone, advance to next state 3635 * state == unrolls: done 3636 */ 3637 3638 void walker(elem *e) @trusted 3639 { 3640 assert(e); 3641 const op = e.Eoper; 3642 if (ERTOL(e)) 3643 { 3644 if (e.Edef != defnum) 3645 { 3646 walker(e.EV.E2); // this function is @trusted because of this union access 3647 walker(e.EV.E1); 3648 } 3649 } 3650 else if (OTbinary(op)) 3651 { 3652 if (e.Edef != defnum) 3653 { 3654 walker(e.EV.E1); 3655 walker(e.EV.E2); 3656 } 3657 } 3658 else if (OTunary(op)) 3659 { 3660 assert(e.Edef != defnum); 3661 walker(e.EV.E1); 3662 } 3663 else if (op == OPvar && 3664 state && 3665 e.EV.Vsym == v) 3666 { 3667 // overwrite e with (v+increment) 3668 elem *e1 = el_calloc(); 3669 el_copy(e1,e); 3670 e.Eoper = OPadd; 3671 e.EV.E1 = e1; 3672 e.EV.E2 = el_long(e.Ety, increment * state); 3673 } 3674 if (OTdef(op) && e.Edef == defnum) 3675 { 3676 // found the increment elem; neuter all but the last one 3677 if (state + 1 < unrolls) 3678 { 3679 el_free(e.EV.E1); 3680 el_free(e.EV.E2); 3681 e.Eoper = OPconst; 3682 e.EV.Vllong = 0; 3683 } 3684 ++state; 3685 } 3686 } 3687 3688 walker(e); 3689 assert(state == unrolls); 3690 } 3691 3692 3693 /********************************* 3694 * Unroll loop if possible. 3695 * Params: 3696 * l = loop to unroll 3697 * Returns: 3698 * true if loop was unrolled 3699 */ 3700 @trusted 3701 bool loopunroll(ref Loop l) 3702 { 3703 const bool log = false; 3704 if (log) printf("loopunroll(%p)\n", &l); 3705 3706 /* Do not repeatedly unroll the same loop, 3707 * or waste time attempting to 3708 */ 3709 if (l.Lhead.Bflags & BFLnounroll) 3710 return false; 3711 l.Lhead.Bflags |= BFLnounroll; 3712 if (log) 3713 WRfunc("loop", funcsym_p, startblock); 3714 3715 if (l.Lhead.Btry || l.Ltail.Btry) 3716 return false; 3717 3718 /* For simplification, only unroll loops that consist only 3719 * of a head and tail, and the tail is the exit block. 3720 */ 3721 const numblocks = vec_numBitsSet(l.Lloop); 3722 { // Ensure no changes 3723 if (dfo.length != vec_numbits(l.Lloop)) assert(0); 3724 int n = 0; 3725 for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3726 ++n; 3727 if (n != numblocks) assert(0); 3728 } 3729 if (numblocks != 2) 3730 { 3731 if (log) printf("\tnot 2 blocks, but %d\n", numblocks); 3732 return false; 3733 } 3734 assert(l.Lhead != l.Ltail); 3735 3736 /* tail must be the sole exit block 3737 */ 3738 if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) || 3739 !vec_testbit(l.Ltail.Bdfoidx, l.Lexit)) 3740 { 3741 if (log) printf("\ttail not sole exit block\n"); 3742 return false; 3743 } 3744 3745 elem *ehead = l.Lhead.Belem; 3746 elem *etail = l.Ltail.Belem; 3747 3748 if (log) 3749 { 3750 printf("Unroll candidate:\n"); 3751 printf(" head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n"); 3752 printf(" tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n"); 3753 } 3754 3755 /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer 3756 */ 3757 if ((etail.Eoper != OPlt && etail.Eoper != OPle) || 3758 etail.EV.E1.Eoper != OPvar || 3759 etail.EV.E2.Eoper != OPconst) 3760 { 3761 if (log) printf("\tnot (v < c)\n"); 3762 return false; 3763 } 3764 3765 elem *e1 = etail.EV.E1; 3766 elem *e2 = etail.EV.E2; 3767 3768 if (!tyintegral(e1.Ety) || 3769 tysize(e1.Ety) > targ_llong.sizeof || 3770 !(tyuns(e1.Ety) || tyuns(e2.Ety))) 3771 { 3772 if (log) printf("\tnot (integral unsigned)\n"); 3773 return false; 3774 } 3775 3776 int cost = el_length(ehead); 3777 //printf("test4 cost: %d\n", cost); 3778 3779 if (cost > 100) 3780 { 3781 if (log) printf("\tcost %d\n", cost); 3782 return false; 3783 } 3784 if (log) printf("cost %d\n", cost); 3785 3786 Symbol* v = e1.EV.Vsym; 3787 3788 // RD info is only reliable for registers and autos 3789 if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig)) 3790 { 3791 if (log) printf("\tnot SCRD\n"); 3792 return false; 3793 } 3794 3795 /* Find the initial, increment elem, and final value of s 3796 */ 3797 elem *einitial; 3798 elem *eincrement; 3799 if (!findloopparameters(etail, einitial, eincrement)) 3800 { 3801 if (log) printf("\tnot findloopparameters()\n"); 3802 return false; 3803 } 3804 3805 targ_llong initial = el_tolong(einitial.EV.E2); 3806 targ_llong increment = el_tolong(eincrement.EV.E2); 3807 if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass) 3808 increment = -increment; 3809 targ_llong final_ = el_tolong(e2); 3810 3811 if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_); 3812 3813 if (etail.Eoper == OPle) 3814 ++final_; 3815 3816 if (initial < 0 || 3817 final_ < initial || 3818 increment <= 0 || 3819 (final_ - initial) % increment) 3820 { 3821 if (log) printf("\tnot (evenly divisible)\n"); 3822 return false; 3823 } 3824 3825 /* If loop would only execute once anyway, just remove the test at the end 3826 */ 3827 if (initial + increment == final_) 3828 { 3829 if (log) printf("\tjust once\n"); 3830 etail.Eoper = OPcomma; 3831 e2.EV.Vllong = 0; 3832 e2.Ety = etail.Ety; 3833 return false; 3834 } 3835 3836 /* number of times the loop is unrolled 3837 */ 3838 targ_ullong numIterations = (final_ - initial) / increment; 3839 const int unrolls = (numIterations < 1000 / cost) 3840 ? cast(int)numIterations 3841 : 2; 3842 3843 if (unrolls == 0 || (final_ - initial) % unrolls) 3844 { 3845 if (log) printf("\tnot (divisible by %d)\n", unrolls); 3846 return false; 3847 } 3848 3849 if (log) printf("Unrolling starting\n"); 3850 3851 // Double the increment 3852 eincrement.EV.E2.EV.Vllong *= unrolls; 3853 //printf(" 4head:\t"); WReqn(l.Lhead.Belem); printf("\n"); 3854 3855 elem* e = null; 3856 foreach (i; 0 .. unrolls) 3857 e = el_combine(e, el_copytree(ehead)); 3858 3859 /* Walk e in execution order. 3860 * When eincrement is found, remove it. 3861 * Continue, replacing instances of `v` with `v+increment` 3862 * When last eincrement is found, stop. 3863 */ 3864 unrollWalker(e, eincrement.Edef, v, increment, unrolls); 3865 3866 l.Lhead.Belem = e; 3867 3868 /* If unrolled loop would only execute once anyway, just remove the test at the end 3869 */ 3870 if (initial + unrolls * increment == final_) 3871 { 3872 if (log) printf("\tcompletely unrolled\n"); 3873 etail.Eoper = OPcomma; 3874 e2.EV.Vllong = 0; 3875 e2.Ety = etail.Ety; 3876 } 3877 3878 //WRfunc("loopunroll done", funcsym_p, startblock); 3879 return true; 3880 } 3881 3882 /****************************** 3883 * Count number of elems in a tree 3884 * Params: 3885 * e = tree 3886 * Returns: 3887 * number of elems in tree 3888 */ 3889 @trusted 3890 private int el_length(elem *e) 3891 { 3892 int n = 0; 3893 while (e) 3894 { 3895 n += 1; 3896 if (!OTleaf(e.Eoper)) 3897 { 3898 if (e.Eoper == OPctor || e.Eoper == OPdtor) 3899 return 10_000; 3900 n += el_length(e.EV.E2); 3901 e = e.EV.E1; 3902 } 3903 else 3904 break; 3905 } 3906 return n; 3907 }