1 /** 2 * Global loop optimizations 3 * 4 * Compiler implementation of the 5 * $(LINK2 https://www.dlang.org, D programming language). 6 * 7 * Copyright: Copyright (C) 1985-1998 by Symantec 8 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d) 12 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d 13 */ 14 15 module dmd.backend.gloop; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.stdc.stdio; 26 import core.stdc.stdlib; 27 import core.stdc.string; 28 29 import dmd.backend.cc; 30 import dmd.backend.cdef; 31 import dmd.backend.code_x86; 32 import dmd.backend.evalu8 : el_toldoubled; 33 import dmd.backend.oper; 34 import dmd.backend.global; 35 import dmd.backend.goh; 36 import dmd.backend.el; 37 import dmd.backend.symtab; 38 import dmd.backend.ty; 39 import dmd.backend.type; 40 41 import dmd.backend.barray; 42 import dmd.backend.dlist; 43 import dmd.backend.dvec; 44 import dmd.backend.mem; 45 46 nothrow: 47 @safe: 48 49 @trusted 50 char symbol_isintab(const Symbol *s) { return sytab[s.Sclass] & SCSS; } 51 52 extern (C++): 53 54 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc); 55 56 alias Loops = Rarray!Loop; 57 58 59 /********************************* 60 * Loop data structure. 61 */ 62 63 struct Loop 64 { 65 nothrow: 66 vec_t Lloop; // Vector of blocks in this loop 67 vec_t Lexit; // Vector of exit blocks of loop 68 block *Lhead; // Pointer to header of loop 69 block *Ltail; // Pointer to tail 70 block *Lpreheader; // Pointer to preheader (if any) 71 Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so 72 // redundant temporaries aren't created 73 Rarray!Iv Livlist; // basic induction variables 74 Rarray!Iv Lopeqlist; // list of other op= variables 75 76 /************************* 77 * Reset memory so this allocation can be re-used. 78 */ 79 @trusted 80 void reset() 81 { 82 vec_free(Lloop); 83 vec_free(Lexit); 84 85 foreach (ref iv; Livlist) 86 iv.reset(); 87 foreach (ref iv; Lopeqlist) 88 iv.reset(); 89 90 Llis.reset(); 91 Livlist.reset(); 92 Lopeqlist.reset(); 93 } 94 95 /*********************** 96 * Write loop. 97 */ 98 99 @trusted 100 void print() 101 { 102 debug 103 { 104 Loop *l = &this; 105 printf("loop %p\n", l); 106 printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx, 107 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx 108 : cast(uint)-1); 109 printf("\tLloop "); vec_println(l.Lloop); 110 printf("\tLexit "); vec_println(l.Lexit); 111 } 112 } 113 } 114 115 struct famlist 116 { 117 nothrow: 118 elem **FLpelem; /* parent of elem in the family */ 119 elem *c1; 120 elem *c2; // c1*(basic IV) + c2 121 Symbol *FLtemp; // symbol index of temporary (FLELIM if */ 122 /* this entry has no temporary) */ 123 tym_t FLty; /* type of this induction variable */ 124 tym_t FLivty; /* type of the basic IV elem (which is */ 125 /* not necessarilly the type of the IV */ 126 /* elem!) */ 127 128 void reset() 129 { 130 el_free(c1); 131 el_free(c2); 132 } 133 134 @trusted 135 void print() const 136 { 137 debug 138 { 139 printf("famlist:\n"); 140 printf("*FLpelem:\n"); 141 elem_print(*FLpelem); 142 printf("c1:"); 143 elem_print(c1); 144 printf("c2:"); 145 elem_print(c2); 146 printf("FLty = %s\n", tym_str(FLty)); 147 printf("FLivty = %s\n", tym_str(FLivty)); 148 } 149 } 150 } 151 152 @system 153 enum FLELIM = cast(Symbol *)-1; 154 155 struct Iv 156 { 157 nothrow: 158 Symbol *IVbasic; // symbol of basic IV 159 elem **IVincr; // pointer to parent of IV increment elem 160 Barray!famlist IVfamily; // variables in this family 161 162 @trusted 163 void reset() 164 { 165 foreach (ref fl; IVfamily) 166 { 167 fl.reset(); 168 } 169 IVfamily.reset(); 170 } 171 172 @trusted 173 void print() const 174 { 175 debug 176 { 177 printf("IV: '%s'\n",IVbasic.Sident.ptr); 178 printf("*IVincr:\n"); 179 elem_print(*IVincr); 180 } 181 } 182 } 183 184 185 private __gshared bool addblk; /* if TRUE, then we added a block */ 186 187 /* is elem loop invariant? */ 188 int isLI(const elem* n) { return n.Nflags & NFLli; } 189 190 /* make elem loop invariant */ 191 void makeLI(elem* n) { n.Nflags |= NFLli; } 192 193 /****************************** 194 * Only variables that could only be unambiguously defined 195 * are candidates for loop invariant removal and induction 196 * variables. 197 * This means only variables that have the SFLunambig flag 198 * set for them. 199 * Doing this will still cover 90% (I hope) of the cases, and 200 * is a lot faster to compute. 201 */ 202 203 /************* 204 * Free loops. 205 */ 206 207 private void freeloop(ref Loops loops) 208 { 209 foreach (ref loop; loops) 210 loop.reset(); 211 loops.reset(); 212 } 213 214 215 /********************************** 216 * Initialize block information. 217 * Returns: 218 * true if there is a BCasm block 219 */ 220 221 @trusted 222 bool blockinit() 223 { 224 bool hasasm = false; 225 226 assert(dfo); 227 foreach (b; BlockRange(startblock)) 228 { 229 debug /* check integrity of Bpred and Bsucc */ 230 L1: 231 foreach (blp; ListRange(b.Bpred)) 232 { 233 foreach (bls; ListRange(list_block(blp).Bsucc)) 234 if (list_block(bls) == b) 235 continue L1; 236 assert(0); 237 } 238 239 hasasm |= (b.BC == BCasm); 240 } 241 foreach (j, b; dfo[]) 242 { 243 assert(b.Bdfoidx == j); 244 b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */ 245 vec_clear(b.Bdom); 246 } 247 return hasasm; 248 } 249 250 /**************************************** 251 * Compute dominators (Bdom) for each block. 252 * See Aho & Ullman Fig. 13.5. 253 * Note that flow graph is reducible if there is only one 254 * pass through the loop. 255 * Params: 256 * dfo = depth first order array of blocks, 257 * Bdom vector is filled in for each block 258 */ 259 260 @trusted 261 void compdom() 262 { 263 compdom(dfo[]); 264 } 265 266 @trusted 267 private extern (D) void compdom(block*[] dfo) 268 { 269 assert(dfo.length); 270 block* sb = dfo[0]; // starting block 271 272 vec_clear(sb.Bdom); 273 vec_setbit(0,sb.Bdom); // starting block only doms itself 274 foreach (b; dfo) // for all except startblock 275 { 276 if (b != sb) 277 vec_set(b.Bdom); // dominate all blocks 278 } 279 280 vec_t t1 = vec_calloc(vec_numbits(sb.Bdom)); // allocate a temporary 281 uint counter = 0; // # of times thru loop 282 bool changes; 283 do 284 { 285 changes = false; 286 foreach (i, b; dfo) // for each block in dfo[] 287 { 288 if (i == 0) 289 continue; // except startblock 290 if (b.Bpred) // if there are predecessors 291 { 292 vec_set(t1); 293 foreach (bl; ListRange(b.Bpred)) 294 { 295 vec_andass(t1,list_block(bl).Bdom); 296 } 297 } 298 else 299 vec_clear(t1); // no predecessors to dominate 300 vec_setbit(i,t1); // each block doms itself 301 if (changes) 302 vec_copy(b.Bdom,t1); 303 else if (!vec_equal(b.Bdom,t1)) // if any changes 304 { 305 vec_copy(b.Bdom,t1); 306 changes = true; 307 } 308 } 309 counter++; 310 assert(counter < 50); // should have converged by now 311 } while (changes); 312 vec_free(t1); 313 314 debug if (debugc) 315 { 316 printf("Flow graph is%s reducible\n", counter <= 2 ? "".ptr : " not".ptr); 317 } 318 } 319 320 /*************************** 321 * Test if block A dominates block B. 322 * Params: 323 * A = block 324 * B = another block 325 * Returns: 326 * true if A dominates B. 327 */ 328 329 @trusted 330 bool dom(const block* A, const block* B) 331 { 332 assert(A && B && dfo && dfo[A.Bdfoidx] == A); 333 return vec_testbit(A.Bdfoidx,B.Bdom) != 0; 334 } 335 336 /********************** 337 * Find all the loops. 338 */ 339 340 private extern (D) void findloops(block*[] dfo, ref Loops loops) 341 { 342 freeloop(loops); 343 344 //printf("findloops()\n"); 345 foreach (b; dfo) 346 b.Bweight = 1; // reset Bweights 347 foreach_reverse (b; dfo) // for each block (note reverse 348 // dfo order, so most nested 349 // loops are found first) 350 { 351 assert(b); 352 foreach (bl; ListRange(b.Bsucc)) 353 { 354 block *s = list_block(bl); // each successor s to b 355 assert(s); 356 if (dom(s,b)) // if s dominates b 357 buildloop(loops, s, b); // we found a loop 358 } 359 } 360 361 debug if (debugc) 362 { 363 foreach (ref l; loops) 364 l.print(); 365 } 366 } 367 368 /******************************** 369 * Increase weight of a variable by a factor. 370 * Params: 371 * weight = how important a variable is 372 * factor = loops scale the importance 373 * Returns: 374 * new weight 375 */ 376 377 private uint loop_weight(uint weight, int factor) pure 378 { 379 // Be careful not to overflow 380 if (weight < 0x1_0000) 381 weight *= 10 * factor; 382 else if (weight < 0x10_0000) 383 weight *= 2 * factor; 384 else 385 weight += factor; 386 assert(cast(int)weight > 0); // overflow check 387 return weight; 388 } 389 390 /***************************** 391 * Construct natural loop. 392 * Algorithm 13.1 from Aho & Ullman. 393 * Note that head dom tail. 394 * Params: 395 * ploops = collection of existing loops to add to 396 * head = head of loop; head dominates tail 397 * tail = tail of loop 398 */ 399 400 @trusted 401 private void buildloop(ref Loops ploops,block *head,block *tail) 402 { 403 //printf("buildloop()\n"); 404 405 /* Add a block b and all its predecessors to vector v. 406 */ 407 static void insert(block *b, vec_t v) 408 { 409 assert(b && v); 410 if (!vec_testbit(b.Bdfoidx,v)) // if block is not in loop 411 { 412 vec_setbit(b.Bdfoidx,v); // add block to loop 413 b.Bweight = loop_weight(b.Bweight,1); // *10 usage count 414 foreach (bl; ListRange(b.Bpred)) 415 insert(list_block(bl), v); // insert all its predecessors 416 } 417 } 418 419 Loop* l; 420 421 /* See if this is part of an existing loop. If so, merge the two. */ 422 foreach (ref lp; ploops) 423 if (lp.Lhead == head) /* two loops with same header */ 424 { 425 // Calculate loop contents separately so we get the Bweights 426 // done accurately. 427 428 vec_t v = vec_calloc(dfo.length); 429 vec_setbit(head.Bdfoidx,v); 430 head.Bweight = loop_weight(head.Bweight, 1); 431 insert(tail,v); 432 433 vec_orass(lp.Lloop,v); // merge into existing loop 434 vec_free(v); 435 436 vec_clear(lp.Lexit); // recompute exit blocks 437 l = &lp; 438 goto L1; 439 } 440 441 /* Allocate loop entry */ 442 l = ploops.push(); 443 444 l.Lloop = vec_calloc(dfo.length); // allocate loop bit vector 445 l.Lexit = vec_calloc(dfo.length); // bit vector for exit blocks 446 l.Lhead = head; 447 l.Ltail = tail; 448 l.Lpreheader = null; 449 450 vec_setbit(head.Bdfoidx,l.Lloop); /* add head to the loop */ 451 head.Bweight = loop_weight(head.Bweight, 2); // *20 usage for loop header 452 453 insert(tail,l.Lloop); /* insert tail in loop */ 454 455 L1: 456 /* Find all the exit blocks (those blocks with 457 * successors outside the loop). 458 */ 459 460 // for each block in this loop 461 foreach (i; VecRange(l.Lloop)) 462 { 463 if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit) 464 vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */ 465 else 466 { 467 foreach (bl; ListRange(dfo[i].Bsucc)) 468 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop)) 469 { 470 vec_setbit(i,l.Lexit); 471 break; 472 } 473 } 474 } 475 476 /* Find preheader, if any, to the loop. 477 The preheader is a block that has only the head as a successor. 478 All other predecessors of head must be inside the loop. 479 */ 480 l.Lpreheader = null; 481 foreach (bl; ListRange(head.Bpred)) 482 { 483 block *b = list_block(bl); 484 485 if (!vec_testbit(b.Bdfoidx,l.Lloop)) /* if not in loop */ 486 { 487 if (l.Lpreheader) /* if already one */ 488 { 489 l.Lpreheader = null; /* can only be one */ 490 break; 491 } 492 else 493 { 494 if (list_next(b.Bsucc)) // if more than 1 successor 495 break; // b can't be a preheader 496 l.Lpreheader = b; 497 } 498 } 499 } 500 } 501 502 /************************************** 503 * Perform loop rotations. 504 * Loop starts as: 505 * 506 * prehead 507 * | 508 * v 509 * +->head----> 510 * | | 511 * | v 512 * | body 513 * | | 514 * | v 515 * +--tail 516 * 517 * Two types are done: 518 * 1) Header is moved to be past the tail. 519 * 520 * prehead 521 * | 522 * +---+ 523 * | 524 * | body<-+ 525 * | | | 526 * | v | 527 * | tail | 528 * | | | 529 * | v | 530 * +->head--+ 531 * | 532 * v 533 * 534 * 2) Header is copied past the tail (done only if MFtime is set). 535 * 536 * prehead 537 * | 538 * v 539 * head1-----+ 540 * | | 541 * v | 542 * body<--+ | 543 * | | | 544 * v | | 545 * tail | | 546 * | | | 547 * v | | 548 * head2--+ | 549 * | | 550 * +--------+ 551 * v 552 * 553 * Input: 554 * Loop information (do not depend on the preheader information) 555 * Output: 556 * Revised list of blocks, a new dfo and new loop information 557 * Returns: 558 * true need to recompute loop data 559 */ 560 561 @trusted 562 private bool looprotate(ref Loop l) 563 { 564 block *tail = l.Ltail; 565 block *head = l.Lhead; 566 567 //printf("looprotate(%p)\n",l); 568 569 // Do not rotate loop if: 570 if (head == tail || // loop is only one block big 571 !vec_testbit(head.Bdfoidx,l.Lexit)) // header is not an exit block 572 goto Lret; 573 574 if (//iter != 1 && 575 vec_testbit(tail.Bdfoidx,l.Lexit)) // tail is an exit block 576 goto Lret; 577 578 // Do not rotate if already rotated 579 foreach (b; BlockRange(tail.Bnext)) 580 if (b == head) // if loop already rotated 581 goto Lret; 582 583 if (head.BC == BCtry) 584 goto Lret; 585 if (head.BC == BC_try) 586 goto Lret; 587 588 //if (debugc) { printf("looprotate: "); l.print(); } 589 590 if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm) 591 { // Duplicate the header past the tail (but doing 592 // switches would be too expensive in terms of code 593 // generated). 594 595 auto head2 = block_calloc(); // create new head block 596 head2.Btry = head.Btry; 597 head2.Bflags = head.Bflags; 598 head.Bflags = BFLnomerg; // move flags over to head2 599 head2.Bflags |= BFLnomerg; 600 head2.BC = head.BC; 601 assert(head2.BC != BCswitch); 602 if (head.Belem) // copy expression tree 603 head2.Belem = el_copytree(head.Belem); 604 head2.Bnext = tail.Bnext; 605 tail.Bnext = head2; 606 607 // pred(head1) = pred(head) outside loop 608 // pred(head2) = pred(head) inside loop 609 list_t *pbln; 610 auto pbl2 = &(head2.Bpred); 611 for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln) 612 { 613 if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop)) 614 { // if this predecessor is inside the loop 615 616 *pbl2 = *pbl; 617 *pbl = list_next(*pbl); 618 pbln = pbl; // don't skip this next one 619 (*pbl2).next = null; 620 auto bsucc = list_block(*pbl2).Bsucc; 621 pbl2 = &((*pbl2).next); 622 foreach (bl; ListRange(bsucc)) 623 if (list_block(bl) == head) 624 { 625 bl.ptr = cast(void *)head2; 626 goto L2; 627 } 628 assert(0); 629 L2: 630 } 631 else 632 pbln = &((*pbl).next); // next predecessor in list 633 } // for each pred(head) 634 635 // succ(head2) = succ(head) 636 foreach (bl; ListRange(head.Bsucc)) 637 { 638 list_append(&(head2.Bsucc),list_block(bl)); 639 list_append(&(list_block(bl).Bpred),head2); 640 } 641 if (debugc) printf("1Rotated loop %p\n", &l); 642 go.changes++; 643 return true; 644 } 645 else if (startblock != head 646 /* This screws up the OPctor/OPdtor sequence for: 647 * struct CString 648 * { CString(); 649 * ~CString(); 650 * int GetLength(); 651 * }; 652 * 653 * void f(void) 654 * { for(;;) 655 * { CString s ; 656 * if(s.GetLength()!=0) 657 * break ; 658 * } 659 * } 660 */ 661 && !(config.flags3 & CFG3eh) 662 ) 663 { // optimize for space 664 // Simply position the header past the tail 665 foreach (b; BlockRange(startblock)) 666 { 667 if (b.Bnext == head) 668 { // found parent b of head 669 b.Bnext = head.Bnext; 670 head.Bnext = tail.Bnext; 671 tail.Bnext = head; 672 if (debugc) printf("2Rotated loop %p\n", &l); 673 go.changes++; 674 return false; 675 } 676 } 677 assert(0); 678 } 679 Lret: 680 return false; 681 } 682 683 private __gshared 684 { 685 int gref; // parameter for markinvar() 686 block *gblock; // parameter for markinvar() 687 vec_t lv; // parameter for markinvar() 688 vec_t gin; // parameter for markinvar() 689 bool doflow; // true if flow analysis has to be redone 690 } 691 692 /********************************* 693 * Loop invariant and induction variable elimination. 694 * Input: 695 * iter which optimization iteration we are on 696 */ 697 698 @trusted 699 void loopopt() 700 { 701 __gshared Loops startloop_cache; 702 703 Loops startloop = startloop_cache; 704 705 if (debugc) printf("loopopt()\n"); 706 restart: 707 file_progress(); 708 if (blockinit()) // init block data 709 { 710 findloops(dfo[], startloop); // Compute Bweights 711 freeloop(startloop); // free existing loops 712 startloop_cache = startloop; 713 return; // can't handle ASM blocks 714 } 715 compdom(); // compute dominators 716 findloops(dfo[], startloop); // find the loops 717 718 L3: 719 while (1) 720 { 721 foreach (ref l; startloop) 722 { 723 if (looprotate(l)) // rotate the loop 724 { 725 compdfo(dfo, startblock); 726 blockinit(); 727 compdom(); 728 findloops(dfo[], startloop); 729 continue L3; 730 } 731 } 732 break; 733 } 734 735 // Make sure there is a preheader for each loop. 736 737 addblk = false; /* assume no blocks added */ 738 foreach (ref l; startloop) 739 { 740 //if (debugc) l.print(); 741 742 if (!l.Lpreheader) /* if no preheader */ 743 { 744 if (debugc) printf("Generating preheader for loop\n"); 745 addblk = true; // add one 746 block* p = block_calloc(); // the preheader 747 block* h = l.Lhead; // loop header 748 749 /* Find parent of h */ 750 if (h == startblock) 751 startblock = p; 752 else 753 { 754 for (auto ph = startblock; 1; ph = ph.Bnext) 755 { 756 assert(ph); /* should have found it */ 757 if (ph.Bnext == h) 758 { 759 // Link p into block list between ph and h 760 ph.Bnext = p; 761 break; 762 } 763 } 764 } 765 p.Bnext = h; 766 767 l.Lpreheader = p; 768 p.BC = BCgoto; 769 assert(p.Bsucc == null); 770 list_append(&(p.Bsucc),h); /* only successor is h */ 771 p.Btry = h.Btry; 772 773 if (debugc) printf("Adding preheader %p to loop %p\n",p,&l); 774 775 // Move preds of h that aren't in the loop to preds of p 776 for (list_t bl = h.Bpred; bl;) 777 { 778 block *b = list_block(bl); 779 780 if (!vec_testbit (b.Bdfoidx, l.Lloop)) 781 { 782 list_append(&(p.Bpred), b); 783 list_subtract(&(h.Bpred), b); 784 bl = h.Bpred; /* dunno what subtract did */ 785 786 /* Fix up successors of predecessors */ 787 foreach (bls; ListRange(b.Bsucc)) 788 if (list_block(bls) == h) 789 bls.ptr = cast(void *)p; 790 } 791 else 792 bl = list_next(bl); 793 } 794 list_append(&(h.Bpred),p); /* p is a predecessor to h */ 795 } 796 } /* for */ 797 if (addblk) /* if any blocks were added */ 798 { 799 compdfo(dfo, startblock); /* compute depth-first order */ 800 blockinit(); 801 compdom(); 802 findloops(dfo[], startloop); // recompute block info 803 addblk = false; 804 } 805 806 /* Do the loop optimizations. 807 */ 808 809 doflow = true; /* do flow analysis */ 810 811 if (go.mfoptim & MFtime) 812 { 813 if (debugc) printf("Starting loop unrolling\n"); 814 L2: 815 while (1) 816 { 817 foreach (ref l; startloop) 818 { 819 if (loopunroll(l)) 820 { 821 compdfo(dfo, startblock); // compute depth-first order 822 blockinit(); 823 compdom(); 824 findloops(dfo[], startloop); // recompute block info 825 doflow = true; 826 continue L2; 827 } 828 } 829 break; 830 } 831 } 832 833 /* Note that accessing the loops 834 * starting from startloop will access them in least nested 835 * one first, thus moving LIs out as far as possible 836 */ 837 if (debugc) printf("Starting loop invariants\n"); 838 839 foreach_reverse (ref l; startloop) 840 { 841 //if (debugc) l.print(); 842 843 file_progress(); 844 assert(l.Lpreheader); 845 if (doflow) 846 { 847 flowrd(); /* compute reaching definitions */ 848 flowlv(); /* compute live variables */ 849 flowae(); // compute available expressions 850 doflow = false; /* no need to redo it */ 851 if (go.defnod.length == 0) /* if no definition elems */ 852 break; /* no need to optimize */ 853 } 854 lv = l.Lloop; 855 if (debugc) printf("...Loop %p start...\n",&l); 856 857 /* Unmark all elems in this loop */ 858 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 859 if (dfo[i].Belem) 860 unmarkall(dfo[i].Belem); /* unmark all elems */ 861 862 /* Find & mark all LIs */ 863 gin = vec_clone(l.Lpreheader.Bout); 864 vec_t rd = vec_calloc(go.defnod.length); /* allocate our running RD vector */ 865 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop 866 { 867 block *b = dfo[i]; 868 869 if (debugc) printf("B%d\n",i); 870 if (b.Belem) 871 { 872 vec_copy(rd, b.Binrd); // IN reaching defs 873 static if (0) 874 { 875 printf("i = %d\n",i); 876 { 877 for (int j = 0; j < go.defnod.length; j++) 878 elem_print(go.defnod[j].DNelem); 879 } 880 printf("rd : "); vec_println(rd); 881 } 882 gblock = b; 883 gref = 0; 884 if (b != l.Lhead) 885 gref = 1; 886 markinvar(b.Belem, rd); 887 static if (0) 888 { 889 printf("B%d\n", i); 890 { 891 foreach (j; 0 .. go.defnod.length) 892 { 893 printf(" [%2d] ", j); 894 WReqn(go.defnod[j].DNelem); 895 printf("\n"); 896 } 897 } 898 printf("rd : "); vec_println(rd); 899 printf("Boutrd: "); vec_println(b.Boutrd); 900 } 901 assert(vec_equal(rd, b.Boutrd)); 902 } 903 else 904 assert(vec_equal(b.Binrd, b.Boutrd)); 905 } 906 vec_free(rd); 907 vec_free(gin); 908 909 /* Move loop invariants */ 910 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 911 { 912 uint domexit; // true if this block dominates all 913 // exit blocks of the loop 914 915 for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block 916 { 917 if (!vec_testbit (i, dfo[j].Bdom)) 918 { 919 domexit = 0; 920 goto L1; // break if !(i dom j) 921 } 922 } 923 // if i dom (all exit blocks) 924 domexit = 1; 925 L1: 926 if (dfo[i].Belem) 927 { // If there is any hope of making an improvement 928 if (domexit || l.Llis.length) 929 { 930 //if (dfo[i] != l.Lhead) 931 //domexit |= 2; 932 movelis(dfo[i].Belem, dfo[i], l, domexit); 933 } 934 } 935 } 936 if (debugc) printf("...Loop %p done...\n",&l); 937 938 if (go.mfoptim & MFliv) 939 { 940 loopiv(l); /* induction variables */ 941 if (addblk) /* if we added a block */ 942 { 943 compdfo(dfo, startblock); 944 goto restart; /* play it safe and start over */ 945 } 946 } 947 } /* for */ 948 freeloop(startloop); 949 startloop_cache = startloop; 950 } 951 952 /***************************** 953 * If elem is loop invariant, mark it. 954 * Input: 955 * lv = vector of all the blocks in this loop. 956 * rd = vector of loop invariants for this elem. This must be 957 * continually updated. 958 * Note that we do not iterate until no more LIs are found. The only 959 * thing this would buy us is stuff that depends on LI assignments. 960 */ 961 962 @trusted 963 private void markinvar(elem *n,vec_t rd) 964 { 965 vec_t tmp; 966 uint i; 967 Symbol *v; 968 elem *n1; 969 970 assert(n && rd); 971 assert(vec_numbits(rd) == go.defnod.length); 972 switch (n.Eoper) 973 { 974 case OPaddass: case OPminass: case OPmulass: case OPandass: 975 case OPorass: case OPxorass: case OPdivass: case OPmodass: 976 case OPshlass: case OPshrass: case OPashrass: 977 case OPpostinc: case OPpostdec: 978 case OPcall: 979 case OPvecsto: 980 case OPcmpxchg: 981 markinvar(n.EV.E2,rd); 982 goto case OPnegass; 983 984 case OPnegass: 985 n1 = n.EV.E1; 986 if (n1.Eoper == OPind) 987 markinvar(n1.EV.E1,rd); 988 else if (OTbinary(n1.Eoper)) 989 { markinvar(n1.EV.E1,rd); 990 markinvar(n1.EV.E2,rd); 991 } 992 L2: 993 if (n.Eoper == OPcall || 994 gblock.Btry || 995 !(n1.Eoper == OPvar && 996 symbol_isintab(n1.EV.Vsym))) 997 { 998 gref = 1; 999 } 1000 1001 updaterd(n,rd,null); 1002 break; 1003 1004 case OPcallns: 1005 markinvar(n.EV.E2,rd); 1006 markinvar(n.EV.E1,rd); 1007 break; 1008 1009 case OPstrcpy: 1010 case OPstrcat: 1011 case OPmemcpy: 1012 case OPmemset: 1013 markinvar(n.EV.E2,rd); 1014 markinvar(n.EV.E1,rd); 1015 updaterd(n,rd,null); 1016 break; 1017 1018 case OPbtc: 1019 case OPbtr: 1020 case OPbts: 1021 markinvar(n.EV.E1,rd); 1022 markinvar(n.EV.E2,rd); 1023 updaterd(n,rd,null); 1024 break; 1025 1026 case OPucall: 1027 markinvar(n.EV.E1,rd); 1028 goto case OPasm; 1029 1030 case OPasm: 1031 gref = 1; 1032 updaterd(n,rd,null); 1033 break; 1034 1035 case OPucallns: 1036 case OPstrpar: 1037 case OPstrctor: 1038 case OPvector: 1039 case OPvoid: 1040 case OPstrlen: 1041 case OPddtor: 1042 case OPinp: 1043 case OPprefetch: // don't mark E2 1044 markinvar(n.EV.E1,rd); 1045 break; 1046 1047 case OPcond: 1048 case OPparam: 1049 case OPstrcmp: 1050 case OPmemcmp: 1051 case OPbt: // OPbt is like OPind, assume not LI 1052 case OPoutp: 1053 markinvar(n.EV.E1,rd); 1054 markinvar(n.EV.E2,rd); 1055 break; 1056 1057 case OPandand: 1058 case OPoror: 1059 markinvar(n.EV.E1,rd); 1060 tmp = vec_clone(rd); 1061 markinvar(n.EV.E2,tmp); 1062 if (el_returns(n.EV.E2)) 1063 vec_orass(rd,tmp); // rd |= tmp 1064 vec_free(tmp); 1065 break; 1066 1067 case OPcolon: 1068 case OPcolon2: 1069 tmp = vec_clone(rd); 1070 switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2))) 1071 { 1072 case 3: // E1 and E2 return 1073 markinvar(n.EV.E1,rd); 1074 markinvar(n.EV.E2,tmp); 1075 vec_orass(rd,tmp); // rd |= tmp 1076 break; 1077 case 2: // E1 returns 1078 markinvar(n.EV.E1,rd); 1079 markinvar(n.EV.E2,tmp); 1080 break; 1081 case 1: // E2 returns 1082 markinvar(n.EV.E1,tmp); 1083 markinvar(n.EV.E2,rd); 1084 break; 1085 case 0: // neither returns 1086 markinvar(n.EV.E1,tmp); 1087 vec_copy(tmp,rd); 1088 markinvar(n.EV.E2,tmp); 1089 break; 1090 default: 1091 assert(0); 1092 } 1093 vec_free(tmp); 1094 break; 1095 1096 case OPaddr: // mark addresses of OPvars as LI 1097 markinvar(n.EV.E1,rd); 1098 if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1)) 1099 makeLI(n); 1100 break; 1101 1102 case OPmsw: 1103 case OPneg: case OPbool: case OPnot: case OPcom: 1104 case OPs16_32: case OPd_s32: case OPs32_d: 1105 case OPd_s16: case OPs16_d: case OPd_f: case OPf_d: 1106 case OP32_16: case OPu8_16: 1107 case OPld_d: case OPd_ld: 1108 case OPld_u64: 1109 case OPc_r: case OPc_i: 1110 case OPu16_32: 1111 case OPu16_d: case OPd_u16: 1112 case OPs8_16: case OP16_8: 1113 case OPd_u32: case OPu32_d: 1114 1115 case OPs32_64: case OPu32_64: 1116 case OP64_32: 1117 case OPd_s64: case OPd_u64: 1118 case OPs64_d: 1119 case OPu64_d: 1120 case OP128_64: 1121 case OPs64_128: 1122 case OPu64_128: 1123 1124 case OPabs: 1125 case OPtoprec: 1126 case OPrndtol: 1127 case OPrint: 1128 case OPsetjmp: 1129 case OPbsf: 1130 case OPbsr: 1131 case OPbswap: 1132 case OPpopcnt: 1133 case OPsqrt: 1134 case OPsin: 1135 case OPcos: 1136 case OPvp_fp: /* BUG for MacHandles */ 1137 case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp: 1138 case OPcvp_fp: 1139 case OPvecfill: 1140 markinvar(n.EV.E1,rd); 1141 if (isLI(n.EV.E1)) /* if child is LI */ 1142 makeLI(n); 1143 break; 1144 1145 case OPeq: 1146 case OPstreq: 1147 markinvar(n.EV.E2,rd); 1148 n1 = n.EV.E1; 1149 markinvar(n1,rd); 1150 1151 /* Determine if assignment is LI. Conditions are: */ 1152 /* 1) Rvalue is LI */ 1153 /* 2) Lvalue is a variable (simplifies things a lot) */ 1154 /* 3) Lvalue can only be affected by unambiguous defs */ 1155 /* 4) No rd's of lvalue that are within the loop (other */ 1156 /* than the current def) */ 1157 if (isLI(n.EV.E2) && n1.Eoper == OPvar) /* 1 & 2 */ 1158 { 1159 v = n1.EV.Vsym; 1160 if (v.Sflags & SFLunambig) 1161 { 1162 tmp = vec_calloc(go.defnod.length); 1163 //filterrd(tmp,rd,v); 1164 listrds(rd,n1,tmp,null); 1165 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) 1166 if (go.defnod[i].DNelem != n && 1167 vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1168 goto L3; 1169 makeLI(n); // then the def is LI 1170 L3: vec_free(tmp); 1171 } 1172 } 1173 goto L2; 1174 1175 case OPadd: case OPmin: case OPmul: case OPand: 1176 case OPor: case OPxor: case OPdiv: case OPmod: 1177 case OPshl: case OPshr: case OPeqeq: case OPne: 1178 case OPlt: case OPle: case OPgt: case OPge: 1179 case OPashr: 1180 case OPror: case OProl: 1181 case OPbtst: 1182 1183 case OPunord: case OPlg: case OPleg: case OPule: 1184 case OPul: case OPuge: case OPug: case OPue: 1185 case OPngt: case OPnge: case OPnlt: case OPnle: 1186 case OPord: case OPnlg: case OPnleg: case OPnule: 1187 case OPnul: case OPnuge: case OPnug: case OPnue: 1188 1189 case OPcomma: 1190 case OPpair: 1191 case OPrpair: 1192 case OPremquo: 1193 case OPscale: 1194 case OPyl2x: 1195 case OPyl2xp1: 1196 markinvar(n.EV.E1,rd); 1197 markinvar(n.EV.E2,rd); 1198 if (isLI(n.EV.E2) && isLI(n.EV.E1)) 1199 makeLI(n); 1200 break; 1201 1202 case OPind: /* must assume this is not LI */ 1203 markinvar(n.EV.E1,rd); 1204 if (isLI(n.EV.E1)) 1205 { 1206 static if (0) 1207 { 1208 // This doesn't work with C++, because exp2_ptrtocomtype() will 1209 // transfer const to where it doesn't belong. 1210 if (n.Ety & mTYconst) 1211 { 1212 makeLI(n); 1213 } 1214 1215 // This was disabled because it was marking as LI 1216 // the loop dimension for the [i] array if 1217 // a[j][i] was in a loop. This meant the a[j] array bounds 1218 // check for the a[j].length was skipped. 1219 else if (n.Ejty) 1220 { 1221 tmp = vec_calloc(go.defnod.length); 1222 filterrdind(tmp,rd,n); // only the RDs pertaining to n 1223 1224 // if (no RDs within loop) 1225 // then it's loop invariant 1226 1227 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1228 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1229 goto L10; // found a RD in the loop 1230 1231 // If gref has occurred, this can still be LI 1232 // if n is an AE that was also an AE at the 1233 // point of gref. 1234 // We can catch a subset of these cases by looking 1235 // at the AEs at the start of the loop. 1236 if (gref) 1237 { 1238 int j; 1239 1240 //printf("\tn is: "); WReqn(n); printf("\n"); 1241 for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j) 1242 { 1243 elem *e = go.expnod[j]; 1244 1245 //printf("\t\tgo.expnod[%d] = %p\n",j,e); 1246 //printf("\t\tAE is: "); WReqn(e); printf("\n"); 1247 if (el_match2(n,e)) 1248 { 1249 makeLI(n); 1250 //printf("Ind LI: "); WReqn(n); printf("\n"); 1251 break; 1252 } 1253 } 1254 } 1255 else 1256 makeLI(n); 1257 L10: 1258 vec_free(tmp); 1259 break; 1260 } 1261 } 1262 } 1263 break; 1264 1265 case OPvar: 1266 v = n.EV.Vsym; 1267 if (v.Sflags & SFLunambig) // must be unambiguous to be LI 1268 { 1269 tmp = vec_calloc(go.defnod.length); 1270 //filterrd(tmp,rd,v); // only the RDs pertaining to v 1271 listrds(rd,n,tmp,null); // only the RDs pertaining to v 1272 1273 // if (no RDs within loop) 1274 // then it's loop invariant 1275 1276 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1277 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1278 goto L1; // found a RD in the loop 1279 makeLI(n); 1280 1281 L1: vec_free(tmp); 1282 } 1283 break; 1284 1285 case OPstring: 1286 case OPrelconst: 1287 case OPconst: /* constants are always LI */ 1288 case OPframeptr: 1289 makeLI(n); 1290 break; 1291 1292 case OPinfo: 1293 markinvar(n.EV.E2,rd); 1294 break; 1295 1296 case OPstrthis: 1297 case OPmark: 1298 case OPctor: 1299 case OPdtor: 1300 case OPdctor: 1301 case OPhalt: 1302 case OPgot: // shouldn't OPgot be makeLI ? 1303 break; 1304 1305 default: 1306 //printf("n.Eoper = %s, OPconst = %d\n", oper_str(n.Eoper), OPconst); 1307 assert(0); 1308 } 1309 1310 debug 1311 { 1312 if (debugc && isLI(n)) 1313 { 1314 printf(" LI elem: "); 1315 WReqn(n); 1316 printf("\n"); 1317 } 1318 } 1319 } 1320 1321 /******************** 1322 * Update rd vector. 1323 * Input: 1324 * n assignment elem or function call elem or OPasm elem 1325 * rd reaching def vector to update 1326 * (clear bits for defs we kill, set bit for n (which is the 1327 * def we are genning)) 1328 * vecdim go.defnod.length 1329 */ 1330 1331 extern (C) { 1332 @trusted 1333 void updaterd(elem *n,vec_t GEN,vec_t KILL) 1334 { 1335 const op = n.Eoper; 1336 elem *t; 1337 1338 assert(OTdef(op)); 1339 assert(GEN); 1340 elem_debug(n); 1341 1342 uint ni = n.Edef; 1343 assert(ni != -1); 1344 1345 // If unambiguous def 1346 if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar) 1347 { 1348 vec_t v = go.defnod[ni].DNunambig; 1349 assert(v); 1350 if (KILL) 1351 vec_orass(KILL, v); 1352 vec_subass(GEN, v); 1353 } 1354 else 1355 { 1356 static if (0) 1357 { 1358 if (OTassign(op) && t.Eoper != OPvar && t.Ejty) 1359 { 1360 // for all unambig defs in go.defnod[] 1361 foreach (uint i; 0 .. go.defnod.length) 1362 { 1363 elem *tn = go.defnod[i].DNelem; 1364 elem *tn1; 1365 1366 if (tn == n) 1367 ni = i; 1368 1369 if (!OTassign(tn.Eoper)) 1370 continue; 1371 1372 // If def of same variable, kill that def 1373 tn1 = tn.EV.E1; 1374 if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty) 1375 continue; 1376 1377 if (KILL) 1378 vec_setbit(i,KILL); 1379 vec_clearbit(i,GEN); 1380 } 1381 } 1382 } 1383 } 1384 1385 vec_setbit(ni,GEN); // set bit in GEN for this def 1386 } 1387 } 1388 1389 /*************************** 1390 * Mark all elems as not being loop invariant. 1391 */ 1392 1393 @trusted 1394 private void unmarkall(elem *e) 1395 { 1396 for (; 1; e = e.EV.E1) 1397 { 1398 assert(e); 1399 e.Nflags &= ~NFLli; /* unmark this elem */ 1400 if (OTunary(e.Eoper)) 1401 continue; 1402 else if (OTbinary(e.Eoper)) 1403 { 1404 unmarkall(e.EV.E2); 1405 continue; 1406 } 1407 return; 1408 } 1409 } 1410 1411 1412 /******************************** 1413 * Search for references to v in tree n before nstop is encountered. 1414 * Params: 1415 * v = symbol to search for 1416 * n = tree to search 1417 * nstop = stop searching tree when reaching this elem 1418 * Returns: 1419 * true if there are any refs of v in n before nstop is encountered 1420 */ 1421 1422 @trusted 1423 private bool refs(Symbol *v,elem *n,elem *nstop) 1424 { 1425 symbol_debug(v); 1426 assert(symbol_isintab(v)); 1427 assert(v.Ssymnum < globsym.length); 1428 bool stop = false; 1429 1430 // Walk tree in evaluation order 1431 bool walk(elem* n) 1432 { 1433 elem_debug(n); 1434 assert(n); 1435 1436 if (stop) 1437 return false; 1438 bool f = false; 1439 const op = n.Eoper; 1440 if (OTunary(op)) 1441 f = walk(n.EV.E1); 1442 else if (OTbinary(op)) 1443 { 1444 if (ERTOL(n)) /* watch order of evaluation */ 1445 { 1446 /* Note that (OPvar = e) is not a ref of OPvar, whereas */ 1447 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ 1448 /* a ref of OPvar, etc. */ 1449 f = walk(n.EV.E2); 1450 if (!f) 1451 { 1452 if (op == OPeq) 1453 { 1454 if (n.EV.E1.Eoper != OPvar) 1455 f = walk(n.EV.E1.EV.E1); 1456 } 1457 else 1458 f = walk(n.EV.E1); 1459 } 1460 } 1461 else 1462 f = walk(n.EV.E1) || walk(n.EV.E2); 1463 } 1464 1465 if (n == nstop) 1466 stop = true; 1467 else if (n.Eoper == OPvar) /* if variable reference */ 1468 return v == n.EV.Vsym; 1469 else if (op == OPasm) /* everything is referenced */ 1470 return true; 1471 return f; 1472 } 1473 1474 return walk(n); 1475 } 1476 1477 /************************* 1478 * Move LIs to preheader. 1479 * Conditions to be satisfied for code motion are: 1480 * 1) All exit blocks are dominated (true before this is called). 1481 * -- OR -- 1482 * 2) Variable assigned by a statement is not live on entering 1483 * any successor outside the loop of any exit block of the 1484 * loop. 1485 * 1486 * 3) Cannot move assignment to variable if there are any other 1487 * assignments to that variable within the loop (true or 1488 * assignment would not have been marked LI). 1489 * 4) Cannot move assignments to a variable if there is a use 1490 * of that variable in this loop that is reached by any other 1491 * def of it. 1492 * 5) Cannot move expressions that have side effects. 1493 * 6) Do not move assignments to variables that could be affected 1494 * by ambiguous defs. 1495 * 7) It is not worth it to move expressions of the form: 1496 * (var == const) 1497 * Input: 1498 * n the elem we're considering moving 1499 * b the block this elem is in 1500 * l the loop we're in 1501 * domexit flags 1502 * bit 0: 1 this branch is always executed 1503 * 0 this branch is only sometimes executed 1504 * bit 1: 1 do not move LIs that could throw exceptions 1505 * or cannot be moved past possibly thrown exceptions 1506 * Returns: 1507 * revised domexit 1508 */ 1509 1510 @trusted 1511 private void movelis(elem* n, block* b, ref Loop l, ref uint pdomexit) 1512 { 1513 vec_t tmp; 1514 elem *ne; 1515 elem *t; 1516 elem *n2; 1517 Symbol *v; 1518 tym_t ty; 1519 1520 Lnextlis: 1521 //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); } 1522 assert(n); 1523 elem_debug(n); 1524 const op = n.Eoper; 1525 switch (op) 1526 { 1527 case OPvar: 1528 case OPconst: 1529 case OPrelconst: 1530 goto Lret; 1531 1532 case OPandand: 1533 case OPoror: 1534 case OPcond: 1535 { 1536 uint domexit; 1537 1538 movelis(n.EV.E1,b,l,pdomexit); // always executed 1539 domexit = pdomexit & ~1; // sometimes executed 1540 movelis(n.EV.E2,b,l,domexit); 1541 pdomexit |= domexit & 2; 1542 goto Lret; 1543 } 1544 1545 case OPeq: 1546 // Do loop invariant assignments 1547 if (isLI(n) && n.EV.E1.Eoper == OPvar) 1548 { 1549 v = n.EV.E1.EV.Vsym; // variable index number 1550 1551 if (!(v.Sflags & SFLunambig)) goto L3; // case 6 1552 1553 // If case 4 is not satisfied, return 1554 1555 // Function parameters have an implied definition prior to the 1556 // first block of the function. Unfortunately, the rd vector 1557 // does not take this into account. Therefore, we assume the 1558 // worst and reject assignments to function parameters. 1559 if (v.Sclass == SC.parameter || v.Sclass == SC.regpar || 1560 v.Sclass == SC.fastpar || v.Sclass == SC.shadowreg) 1561 goto L3; 1562 1563 if (el_sideeffect(n.EV.E2)) goto L3; // case 5 1564 1565 // If case 1 or case 2 is not satisfied, return 1566 1567 if (!(pdomexit & 1)) // if not case 1 1568 { 1569 uint i; 1570 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 1571 { 1572 foreach (bl; ListRange(dfo[i].Bsucc)) 1573 { 1574 block *s; // successor to exit block 1575 1576 s = list_block(bl); 1577 if (!vec_testbit(s.Bdfoidx,l.Lloop) && 1578 (!symbol_isintab(v) || 1579 vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit 1580 goto L3; 1581 } 1582 } 1583 } 1584 1585 tmp = vec_calloc(go.defnod.length); 1586 uint i; 1587 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 1588 { 1589 if (dfo[i] == b) // except this one 1590 continue; 1591 1592 //<if there are any RDs of v in Binrd other than n> 1593 // <if there are any refs of v in that block> 1594 // return; 1595 1596 //filterrd(tmp,dfo[i].Binrd,v); 1597 listrds(dfo[i].Binrd,n.EV.E1,tmp,null); 1598 uint j; 1599 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1600 { 1601 if (go.defnod[j].DNelem == n) 1602 continue; 1603 if (dfo[i].Belem && 1604 refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v 1605 { 1606 vec_free(tmp); 1607 goto L3; 1608 } 1609 break; 1610 } 1611 } // foreach 1612 1613 // <if there are any RDs of v in b.Binrd other than n> 1614 // <if there are any references to v before the 1615 // assignment to v> 1616 // <can't move this assignment> 1617 1618 //filterrd(tmp,b.Binrd,v); 1619 listrds(b.Binrd,n.EV.E1,tmp,null); 1620 uint j; 1621 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1622 { 1623 if (go.defnod[j].DNelem == n) 1624 continue; 1625 if (b.Belem && refs(v,b.Belem,n)) 1626 { 1627 vec_free(tmp); 1628 goto L3; // can't move it 1629 } 1630 break; // avoid redundant looping 1631 } 1632 vec_free(tmp); 1633 1634 // We have an LI assignment, n. 1635 // Check to see if the rvalue is already in the preheader. 1636 foreach (e; l.Llis) 1637 { 1638 if (el_match(n.EV.E2, e.EV.E2)) 1639 { 1640 el_free(n.EV.E2); 1641 n.EV.E2 = el_calloc(); 1642 el_copy(n.EV.E2, e.EV.E1); 1643 if (debugc) printf("LI assignment rvalue was replaced\n"); 1644 doflow = true; 1645 go.changes++; 1646 break; 1647 } 1648 } 1649 1650 // move assignment elem to preheader 1651 if (debugc) printf("Moved LI assignment "); 1652 1653 debug 1654 if (debugc) 1655 { 1656 WReqn(n); 1657 printf(";\n"); 1658 } 1659 1660 go.changes++; 1661 doflow = true; // redo flow analysis 1662 ne = el_calloc(); 1663 el_copy(ne,n); // create assignment elem 1664 assert(l.Lpreheader); // make sure there is one 1665 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader 1666 l.Llis.push(ne); 1667 1668 el_copy(n,ne.EV.E1); // replace n with just a reference to v 1669 goto Lret; 1670 } // if 1671 break; 1672 1673 case OPcall: 1674 case OPucall: 1675 pdomexit |= 2; 1676 break; 1677 1678 case OPpair: 1679 case OPrpair: // don't move these, as they do not do computation 1680 movelis(n.EV.E1,b,l,pdomexit); 1681 n = n.EV.E2; 1682 goto Lnextlis; 1683 1684 default: 1685 break; 1686 } 1687 1688 L3: 1689 // Do leaves of non-LI expressions, leaves of = elems that didn't 1690 // meet the invariant assignment removal criteria, and don't do leaves 1691 if (OTleaf(op)) 1692 goto Lret; 1693 if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || 1694 // These are usually addressing modes, so moving them is a net loss 1695 (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL) 1696 ) 1697 { 1698 if (OTassign(op)) 1699 { 1700 elem *n1 = n.EV.E1; 1701 elem *n11; 1702 1703 if (OTbinary(op)) 1704 movelis(n.EV.E2,b,l,pdomexit); 1705 1706 // Do lvalue only if it is an expression 1707 if (n1.Eoper == OPvar) 1708 goto Lret; 1709 n11 = n1.EV.E1; 1710 if (OTbinary(n1.Eoper)) 1711 { 1712 movelis(n11,b,l,pdomexit); 1713 n = n1.EV.E2; 1714 } 1715 // If *(x + c), just make x the LI, not the (x + c). 1716 // The +c comes free with the addressing mode. 1717 else if (n1.Eoper == OPind && 1718 isLI(n11) && 1719 n11.Eoper == OPadd && 1720 n11.EV.E2.Eoper == OPconst 1721 ) 1722 { 1723 n = n11.EV.E1; 1724 } 1725 else 1726 n = n11; 1727 movelis(n,b,l,pdomexit); 1728 if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym))) 1729 { 1730 //printf("assign to global => domexit |= 2\n"); 1731 pdomexit |= 2; 1732 } 1733 } 1734 else if (OTunary(op)) 1735 { 1736 elem *e1 = n.EV.E1; 1737 1738 // If *(x + c), just make x the LI, not the (x + c). 1739 // The +c comes free with the addressing mode. 1740 if (op == OPind && 1741 isLI(e1) && 1742 e1.Eoper == OPadd && 1743 e1.EV.E2.Eoper == OPconst 1744 ) 1745 { 1746 n = e1.EV.E1; 1747 } 1748 else 1749 n = e1; 1750 } 1751 else if (OTbinary(op)) 1752 { 1753 movelis(n.EV.E1,b,l,pdomexit); 1754 n = n.EV.E2; 1755 } 1756 goto Lnextlis; 1757 } 1758 1759 if (el_sideeffect(n)) 1760 goto Lret; 1761 1762 static if (0) 1763 { 1764 printf("*pdomexit = %u\n", pdomexit); 1765 if (pdomexit & 2) 1766 { 1767 // If any indirections, can't LI it 1768 1769 // If this operand has already been indirected, we can let 1770 // it pass. 1771 Symbol *s; 1772 1773 printf("looking at:\n"); 1774 elem_print(n); 1775 s = el_basesym(n.EV.E1); 1776 if (s) 1777 { 1778 foreach (el; l.Llis) 1779 { 1780 el = el.EV.E2; 1781 elem_print(el); 1782 if (el.Eoper == OPind && el_basesym(el.EV.E1) == s) 1783 { 1784 printf(" pass!\n"); 1785 goto Lpass; 1786 } 1787 } 1788 } 1789 printf(" skip!\n"); 1790 goto Lret; 1791 1792 Lpass: 1793 } 1794 } 1795 1796 // Move the LI expression to the preheader 1797 if (debugc) printf("Moved LI expression "); 1798 1799 debug 1800 if (debugc) 1801 { 1802 WReqn(n); 1803 printf(";\n"); 1804 } 1805 1806 // See if it's already been moved 1807 ty = n.Ety; 1808 foreach (el; l.Llis) 1809 { 1810 tym_t ty2; 1811 1812 //printf("existing LI: "); WReqn(el); printf("\n"); 1813 ty2 = el.EV.E2.Ety; 1814 if (tysize(ty) == tysize(ty2)) 1815 { 1816 el.EV.E2.Ety = ty; 1817 if (el_match(n,el.EV.E2)) 1818 { 1819 el.EV.E2.Ety = ty2; 1820 if (!OTleaf(n.Eoper)) 1821 { el_free(n.EV.E1); 1822 if (OTbinary(n.Eoper)) 1823 el_free(n.EV.E2); 1824 } 1825 el_copy(n,el.EV.E1); // make copy of temp 1826 n.Ety = ty; 1827 1828 debug 1829 if (debugc) 1830 { printf("Already moved: LI expression replaced with "); 1831 WReqn(n); 1832 printf("\nPreheader %d expression %p ", 1833 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1834 WReqn(l.Lpreheader.Belem); 1835 printf("\n"); 1836 } 1837 1838 go.changes++; 1839 doflow = true; // redo flow analysis 1840 goto Lret; 1841 } 1842 el.EV.E2.Ety = ty2; 1843 } 1844 } 1845 1846 if (!(pdomexit & 1)) // if only sometimes executed 1847 { 1848 if (debugc) printf(" doesn't dominate exit\n"); 1849 goto Lret; // don't move LI 1850 } 1851 1852 if (tyaggregate(n.Ety)) 1853 goto Lret; 1854 1855 go.changes++; 1856 doflow = true; // redo flow analysis 1857 1858 t = el_alloctmp(n.Ety); /* allocate temporary t */ 1859 1860 debug 1861 { 1862 if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr); 1863 if (debugc) printf("%s\n", tym_str(t.Ety)); 1864 if (debugc) printf("\n"); 1865 } 1866 1867 n2 = el_calloc(); 1868 el_copy(n2,n); /* create copy n2 of n */ 1869 ne = el_bin(OPeq,t.Ety,t,n2); /* create elem t=n2 */ 1870 assert(l.Lpreheader); /* make sure there is one */ 1871 appendelem(ne,&(l.Lpreheader.Belem)); /* append ne to preheader */ 1872 1873 debug 1874 if (debugc) 1875 { 1876 printf("Preheader %d expression %p\n\t", 1877 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1878 WReqn(l.Lpreheader.Belem); 1879 printf("\nLI expression replaced with "); WReqn(t); 1880 printf("\n"); 1881 } 1882 1883 el_copy(n,t); /* replace this elem with t */ 1884 1885 // Remember LI expression in elem list 1886 l.Llis.push(ne); 1887 1888 Lret: 1889 1890 } 1891 1892 /*************************** 1893 * Append elem to existing elem using an OPcomma elem. 1894 * Input: 1895 * n elem to append 1896 * *pn elem to append to 1897 */ 1898 1899 @trusted 1900 private void appendelem(elem *n,elem **pn) 1901 { 1902 assert(n && pn); 1903 if (*pn) /* if this elem exists */ 1904 { 1905 while ((*pn).Eoper == OPcomma) /* while we see OPcomma elems */ 1906 { 1907 (*pn).Ety = n.Ety; 1908 pn = &((*pn).EV.E2); /* cruise down right side */ 1909 } 1910 *pn = el_bin(OPcomma,n.Ety,*pn,n); 1911 } 1912 else 1913 *pn = n; /* else create a elem */ 1914 } 1915 1916 /************** LOOP INDUCTION VARIABLES **********************/ 1917 1918 /**************************** 1919 * Create a new famlist entry. 1920 */ 1921 1922 @trusted 1923 private void newfamlist(famlist* fl, tym_t ty) 1924 { 1925 eve c = void; 1926 memset(&c,0,c.sizeof); 1927 1928 fl.FLty = ty; 1929 switch (tybasic(ty)) 1930 { case TYfloat: 1931 c.Vfloat = 1; 1932 break; 1933 1934 case TYdouble: 1935 case TYdouble_alias: 1936 c.Vdouble = 1; 1937 break; 1938 1939 case TYldouble: 1940 c.Vldouble = 1; 1941 break; 1942 1943 case TYbool: 1944 case TYchar: 1945 case TYschar: 1946 case TYuchar: 1947 c.Vchar = 1; 1948 break; 1949 1950 case TYshort: 1951 case TYushort: 1952 case TYchar16: 1953 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 1954 c.Vshort = 1; 1955 break; 1956 1957 case TYsptr: 1958 case TYcptr: 1959 case TYfptr: 1960 case TYvptr: 1961 case TYnptr: 1962 case TYnullptr: 1963 case TYimmutPtr: 1964 case TYsharePtr: 1965 case TYrestrictPtr: 1966 case TYfgPtr: 1967 ty = TYint; 1968 if (I64) 1969 ty = TYllong; 1970 goto case TYuint; 1971 1972 case TYint: 1973 case TYuint: 1974 c.Vint = 1; 1975 break; 1976 1977 case TYhptr: 1978 ty = TYlong; 1979 goto case TYlong; 1980 1981 case TYlong: 1982 case TYulong: 1983 case TYdchar: 1984 default: 1985 c.Vlong = 1; 1986 break; 1987 } 1988 fl.c1 = el_const(ty,&c); /* c1 = 1 */ 1989 c.Vldouble = 0; 1990 if (typtr(ty)) 1991 { 1992 ty = TYint; 1993 if (tybasic(ty) == TYhptr) 1994 ty = TYlong; 1995 if (I64) 1996 ty = TYllong; 1997 } 1998 fl.c2 = el_const(ty,&c); /* c2 = 0 */ 1999 } 2000 2001 /*************************** 2002 * Remove induction variables from loop l. 2003 * Loop invariant removal should have been done just previously. 2004 */ 2005 2006 @trusted 2007 private void loopiv(ref Loop l) 2008 { 2009 if (debugc) printf("loopiv(%p)\n", &l); 2010 assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0); 2011 elimspec(l, dfo); 2012 if (doflow) 2013 { 2014 flowrd(); /* compute reaching defs */ 2015 flowlv(); /* compute live variables */ 2016 flowae(); // compute available expressions 2017 doflow = false; 2018 } 2019 findbasivs(l); /* find basic induction variables */ 2020 findopeqs(l); // find op= variables 2021 findivfams(l); /* find IV families */ 2022 elimfrivivs(l); /* eliminate less useful family IVs */ 2023 intronvars(l); /* introduce new variables */ 2024 elimbasivs(l); /* eliminate basic IVs */ 2025 if (!addblk) // adding a block changes the Binlv 2026 elimopeqs(l); // eliminate op= variables 2027 2028 foreach (ref iv; l.Livlist) 2029 iv.reset(); 2030 l.Livlist.reset(); // reset for reuse 2031 2032 foreach (ref iv; l.Lopeqlist) 2033 iv.reset(); 2034 l.Lopeqlist.reset(); // reset for reuse 2035 2036 /* Do copy propagation and dead assignment elimination */ 2037 /* upon return to optfunc() */ 2038 } 2039 2040 /************************************* 2041 * Find basic IVs of loop l. 2042 * A basic IV x of loop l is a variable x which has 2043 * exactly one assignment within l of the form: 2044 * x += c or x -= c, where c is either a constant 2045 * or a LI. 2046 * Input: 2047 * go.defnod[] loaded with all the definition elems of the loop 2048 */ 2049 2050 @trusted 2051 private void findbasivs(ref Loop l) 2052 { 2053 vec_t poss,notposs; 2054 elem *n; 2055 bool ambdone; 2056 2057 ambdone = false; 2058 poss = vec_calloc(globsym.length); 2059 notposs = vec_calloc(globsym.length); /* vector of all variables */ 2060 /* (initially all unmarked) */ 2061 2062 /* for each def in go.defnod[] that is within loop l */ 2063 2064 foreach (const i; 0 .. go.defnod.length) 2065 { 2066 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2067 continue; /* def is not in the loop */ 2068 2069 n = go.defnod[i].DNelem; 2070 elem_debug(n); 2071 if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar) 2072 { 2073 Symbol *s; /* if unambiguous def */ 2074 2075 s = n.EV.E1.EV.Vsym; 2076 if (symbol_isintab(s)) 2077 { 2078 SYMIDX v; 2079 2080 v = n.EV.E1.EV.Vsym.Ssymnum; 2081 if ((n.Eoper == OPaddass || n.Eoper == OPminass || 2082 n.Eoper == OPpostinc || n.Eoper == OPpostdec) && 2083 (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c */ 2084 n.EV.E2.Eoper == OPvar && isLI(n.EV.E2))) 2085 { 2086 if (vec_testbit(v,poss)) 2087 /* We've already seen this def elem, */ 2088 /* therefore there is more than one */ 2089 /* def of v within the loop, therefore */ 2090 /* v is not a basic IV. */ 2091 vec_setbit(v,notposs); 2092 else 2093 vec_setbit(v,poss); 2094 } 2095 else /* else mark as not possible */ 2096 vec_setbit(v,notposs); 2097 } 2098 } 2099 else /* else ambiguous def */ 2100 { 2101 /* mark any vars that could be affected by */ 2102 /* this def as not possible */ 2103 2104 if (!ambdone) /* avoid redundant loops */ 2105 { 2106 foreach (j; 0 .. globsym.length) 2107 { 2108 if (!(globsym[j].Sflags & SFLunambig)) 2109 vec_setbit(j,notposs); 2110 } 2111 ambdone = true; 2112 } 2113 } 2114 } 2115 static if (0) 2116 { 2117 printf("poss "); vec_println(poss); 2118 printf("notposs "); vec_println(notposs); 2119 } 2120 vec_subass(poss,notposs); /* poss = poss - notposs */ 2121 2122 /* create list of IVs */ 2123 uint i; 2124 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each basic IV 2125 { 2126 Symbol *s; 2127 2128 /* Skip if we don't want it to be a basic IV (see funcprev()) */ 2129 s = globsym[i]; 2130 assert(symbol_isintab(s)); 2131 if (s.Sflags & SFLnotbasiciv) 2132 continue; 2133 2134 // Do not use aggregates as basic IVs. This is because the other loop 2135 // code doesn't check offsets into symbols, (assuming everything 2136 // is at offset 0). We could perhaps amend this by allowing basic IVs 2137 // if the struct consists of only one data member. 2138 if (tyaggregate(s.ty())) 2139 continue; 2140 2141 // Do not use complex types as basic IVs, as the code gen isn't up to it 2142 if (tycomplex(s.ty())) 2143 continue; 2144 2145 auto biv = l.Livlist.push(); 2146 biv.IVbasic = s; // symbol of basic IV 2147 biv.IVincr = null; 2148 2149 if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i); 2150 2151 /* We have the sym idx of the basic IV. We need to find */ 2152 /* the parent of the increment elem for it. */ 2153 2154 /* First find the go.defnod[] */ 2155 foreach (j; 0 .. go.defnod.length) 2156 { 2157 /* If go.defnod is a def of i and it is in the loop */ 2158 if (go.defnod[j].DNelem.EV.E1 && /* OPasm are def nodes */ 2159 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2160 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2161 { 2162 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2163 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2164 2165 debug if (debugc) 2166 { 2167 printf("Increment elem is: "); WReqn(*biv.IVincr); printf("\n"); 2168 } 2169 goto L1; 2170 } 2171 } 2172 assert(0); /* should have found it */ 2173 /* NOTREACHED */ 2174 2175 L1: 2176 } 2177 2178 vec_free(poss); 2179 vec_free(notposs); 2180 } 2181 2182 /************************************* 2183 * Find op= elems of loop l. 2184 * Analogous to findbasivs(). 2185 * Used to eliminate useless loop code normally found in benchmark programs. 2186 * Input: 2187 * go.defnod[] loaded with all the definition elems of the loop 2188 */ 2189 2190 @trusted 2191 private void findopeqs(ref Loop l) 2192 { 2193 vec_t poss,notposs; 2194 elem *n; 2195 bool ambdone; 2196 2197 ambdone = false; 2198 poss = vec_calloc(globsym.length); 2199 notposs = vec_calloc(globsym.length); // vector of all variables 2200 // (initially all unmarked) 2201 2202 // for each def in go.defnod[] that is within loop l 2203 2204 foreach (i; 0 .. go.defnod.length) 2205 { 2206 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2207 continue; // def is not in the loop 2208 2209 n = go.defnod[i].DNelem; 2210 elem_debug(n); 2211 if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar) 2212 { 2213 Symbol *s; // if unambiguous def 2214 2215 s = n.EV.E1.EV.Vsym; 2216 if (symbol_isintab(s)) 2217 { 2218 SYMIDX v; 2219 2220 v = n.EV.E1.EV.Vsym.Ssymnum; 2221 { 2222 if (vec_testbit(v,poss)) 2223 // We've already seen this def elem, 2224 // therefore there is more than one 2225 // def of v within the loop, therefore 2226 // v is not a basic IV. 2227 vec_setbit(v,notposs); 2228 else 2229 vec_setbit(v,poss); 2230 } 2231 } 2232 } 2233 else // else ambiguous def 2234 { 2235 // mark any vars that could be affected by 2236 // this def as not possible 2237 2238 if (!ambdone) // avoid redundant loops 2239 { 2240 foreach (j; 0 .. globsym.length) 2241 { 2242 if (!(globsym[j].Sflags & SFLunambig)) 2243 vec_setbit(j,notposs); 2244 } 2245 ambdone = true; 2246 } 2247 } 2248 } 2249 2250 // Don't use symbols already in Livlist 2251 foreach (ref iv; l.Livlist) 2252 { 2253 vec_setbit(iv.IVbasic.Ssymnum,notposs); 2254 } 2255 2256 2257 static if (0) 2258 { 2259 printf("poss "); vec_println(poss); 2260 printf("notposs "); vec_println(notposs); 2261 } 2262 2263 vec_subass(poss,notposs); // poss = poss - notposs 2264 2265 // create list of IVs 2266 uint i; 2267 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each opeq IV 2268 { 2269 Symbol *s; 2270 2271 s = globsym[i]; 2272 assert(symbol_isintab(s)); 2273 2274 // Do not use aggregates as basic IVs. This is because the other loop 2275 // code doesn't check offsets into symbols, (assuming everything 2276 // is at offset 0). We could perhaps amend this by allowing basic IVs 2277 // if the struct consists of only one data member. 2278 if (tyaggregate(s.ty())) 2279 continue; 2280 2281 auto biv = l.Lopeqlist.push(); 2282 biv.IVbasic = s; // symbol of basic IV 2283 biv.IVincr = null; 2284 2285 if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i); 2286 2287 // We have the sym idx of the basic IV. We need to find 2288 // the parent of the increment elem for it. 2289 2290 // First find the go.defnod[] 2291 foreach (j; 0 .. go.defnod.length) 2292 { 2293 // If go.defnod is a def of i and it is in the loop 2294 if (go.defnod[j].DNelem.EV.E1 && // OPasm are def nodes 2295 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2296 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2297 { 2298 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2299 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2300 2301 debug if (debugc) 2302 { 2303 printf("Opeq elem is: "); WReqn(*biv.IVincr); printf("\n"); 2304 } 2305 goto Lcont; 2306 } 2307 } 2308 assert(0); // should have found it 2309 // NOTREACHED 2310 2311 Lcont: 2312 } 2313 2314 vec_free(poss); 2315 vec_free(notposs); 2316 } 2317 2318 /***************************** 2319 * Find families for each basic IV. 2320 * An IV family is a list of elems of the form 2321 * c1*X+c2, where X is a basic induction variable. 2322 * Note that we do not do divides, because of roundoff error problems. 2323 */ 2324 2325 @trusted 2326 private void findivfams(ref Loop l) 2327 { 2328 if (debugc) printf("findivfams(%p)\n", &l); 2329 foreach (ref biv; l.Livlist) 2330 { 2331 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 2332 if (dfo[i].Belem) 2333 ivfamelems(&biv,&(dfo[i].Belem)); 2334 /* Fold all the constant expressions in c1 and c2. */ 2335 foreach (ref fl; biv.IVfamily) 2336 { 2337 fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain); 2338 fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain); 2339 } 2340 } 2341 } 2342 2343 /************************* 2344 * Tree walking support routine for findivfams(). 2345 * biv = basic induction variable pointer 2346 * pn pointer to elem 2347 */ 2348 2349 @trusted 2350 private void ivfamelems(Iv *biv,elem **pn) 2351 { 2352 tym_t ty,c2ty; 2353 elem *n1; 2354 elem *n2; 2355 2356 assert(pn); 2357 elem* n = *pn; 2358 assert(biv && n); 2359 const op = n.Eoper; 2360 if (OTunary(op)) 2361 { 2362 ivfamelems(biv,&n.EV.E1); 2363 n1 = n.EV.E1; 2364 n2 = null; 2365 } 2366 else if (OTbinary(op)) 2367 { 2368 ivfamelems(biv,&n.EV.E1); 2369 ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */ 2370 n1 = n.EV.E1; 2371 n2 = n.EV.E2; 2372 } 2373 else /* else leaf elem */ 2374 return; /* which can't be in the family */ 2375 2376 if (tycomplex(n.Ety)) 2377 return; 2378 2379 if (op == OPmul || op == OPadd || op == OPmin || 2380 op == OPneg || op == OPshl) 2381 { /* Note that we are wimping out and not considering */ 2382 /* LI variables as part of c1 and c2, but only constants. */ 2383 2384 ty = n.Ety; 2385 2386 /* Since trees are canonicalized, basic induction variables */ 2387 /* will only appear on the left. */ 2388 2389 /* Improvement: */ 2390 /* We wish to pick up the cases (biv + li), (biv - li) and */ 2391 /* (li + biv). OPmul and LS with bivs are out, since if we */ 2392 /* try to eliminate the biv, and the loop test is a >, >=, */ 2393 /* <, <=, we have a problem since we don't know if the li */ 2394 /* is negative. (Would have to call swaprel() on it.) */ 2395 2396 /* If we have (li + var), swap the leaves. */ 2397 if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar) 2398 { 2399 n.EV.E1 = n2; 2400 n2 = n.EV.E2 = n1; 2401 n1 = n.EV.E1; 2402 } 2403 2404 // Get rid of case where we painted a far pointer to a long 2405 if (op == OPadd || op == OPmin) 2406 { 2407 int sz; 2408 2409 sz = tysize(ty); 2410 if (sz == tysize(TYfptr) && !tyfv(ty) && 2411 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety))) 2412 return; 2413 } 2414 2415 /* Look for function of basic IV (-biv or biv op const) */ 2416 if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic) 2417 { 2418 if (op == OPneg) 2419 { 2420 if (debugc) printf("found (-biv), elem %p\n",n); 2421 auto fl = biv.IVfamily.push(); 2422 newfamlist(fl, ty); 2423 fl.FLivty = n1.Ety; 2424 fl.FLpelem = pn; 2425 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1 */ 2426 } 2427 else if (n2.Eoper == OPconst || 2428 isLI(n2) && (op == OPadd || op == OPmin)) 2429 { 2430 debug 2431 if (debugc) 2432 { printf("found (biv op const), elem ("); 2433 WReqn(n); 2434 printf(");\n"); 2435 printf("Types: n1=%s", tym_str(n1.Ety)); 2436 printf(" ty=%s", tym_str(ty)); 2437 printf(" n2=%s\n", tym_str(n2.Ety)); 2438 } 2439 2440 auto fl = biv.IVfamily.push(); 2441 newfamlist(fl, ty); 2442 fl.FLivty = n1.Ety; 2443 fl.FLpelem = pn; 2444 switch (op) 2445 { 2446 case OPadd: /* c2 = right */ 2447 c2ty = n2.Ety; 2448 if (typtr(fl.c2.Ety)) 2449 c2ty = fl.c2.Ety; 2450 goto L1; 2451 2452 case OPmin: /* c2 = -right */ 2453 c2ty = fl.c2.Ety; 2454 /* Check for subtracting two pointers */ 2455 if (typtr(c2ty) && typtr(n2.Ety)) 2456 { 2457 if (tybasic(c2ty) == TYhptr) 2458 c2ty = TYlong; 2459 else 2460 c2ty = I64 ? TYllong : TYint; 2461 } 2462 L1: 2463 fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2)); 2464 break; 2465 2466 case OPmul: /* c1 = right */ 2467 case OPshl: /* c1 = 1 << right */ 2468 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2469 break; 2470 2471 default: 2472 assert(0); 2473 } 2474 } 2475 } 2476 2477 /* Look for function of existing IV */ 2478 2479 foreach (ref fl; biv.IVfamily) 2480 { 2481 if (*fl.FLpelem != n1) /* not it */ 2482 continue; 2483 2484 /* Look for (fl op constant) */ 2485 if (op == OPneg) 2486 { 2487 if (debugc) printf("found (-fl), elem %p\n",n); 2488 /* c1 = -c1; c2 = -c2; */ 2489 fl.c1 = el_una(OPneg,ty,fl.c1); 2490 fl.c2 = el_una(OPneg,ty,fl.c2); 2491 fl.FLty = ty; 2492 fl.FLpelem = pn; /* replace with new IV */ 2493 } 2494 else if (n2.Eoper == OPconst || 2495 isLI(n2) && (op == OPadd || op == OPmin)) 2496 { 2497 debug 2498 if (debugc) 2499 { 2500 printf("found (fl op const), elem ("); 2501 WReqn(n); 2502 assert(*pn == n); 2503 printf(");\n"); 2504 elem_print(n); 2505 } 2506 2507 switch (op) 2508 { 2509 case OPmul: 2510 case OPshl: 2511 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2512 break; 2513 2514 case OPadd: 2515 case OPmin: 2516 break; 2517 2518 default: 2519 assert(0); 2520 } 2521 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2)); 2522 fl.FLty = ty; 2523 fl.FLpelem = pn; /* replace with new IV */ 2524 } /* else if */ 2525 } /* for */ 2526 } /* if */ 2527 } 2528 2529 /********************************* 2530 * Eliminate frivolous family ivs, that is, 2531 * if we can't eliminate the BIV, then eliminate family ivs that 2532 * differ from it only by a constant. 2533 */ 2534 2535 @trusted 2536 private void elimfrivivs(ref Loop l) 2537 { 2538 foreach (ref biv; l.Livlist) 2539 { 2540 int nrefs; 2541 2542 if (debugc) printf("elimfrivivs()\n"); 2543 /* Compute number of family ivs for biv */ 2544 const nfams = biv.IVfamily.length; 2545 if (debugc) printf("nfams = %d\n", cast(int)nfams); 2546 2547 /* Compute number of references to biv */ 2548 if (onlyref(biv.IVbasic,l,*biv.IVincr,nrefs)) 2549 nrefs--; 2550 if (debugc) printf("nrefs = %d\n",nrefs); 2551 assert(nrefs + 1 >= nfams); 2552 if (nrefs > nfams || // if we won't eliminate the biv 2553 (!I16 && nrefs == nfams)) 2554 { /* Eliminate any family ivs that only differ by a constant */ 2555 /* from biv */ 2556 foreach (ref fl; biv.IVfamily) 2557 { 2558 elem *ec1 = fl.c1; 2559 targ_llong c; 2560 2561 if (elemisone(ec1) || 2562 // Eliminate fl's that can be represented by 2563 // an addressing mode 2564 (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) && 2565 ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) 2566 ) 2567 ) 2568 { 2569 fl.FLtemp = FLELIM; 2570 2571 debug 2572 if (debugc) 2573 { 2574 printf("Eliminating frivolous IV "); 2575 WReqn(*fl.FLpelem); 2576 printf("\n"); 2577 } 2578 } 2579 } 2580 } 2581 } 2582 } 2583 2584 2585 /****************************** 2586 * Introduce new variables. 2587 */ 2588 2589 @trusted 2590 private void intronvars(ref Loop l) 2591 { 2592 elem *T; 2593 elem *ne; 2594 elem *t2; 2595 elem *C2; 2596 elem *cmul; 2597 tym_t ty,tyr; 2598 2599 if (debugc) printf("intronvars(%p)\n", &l); 2600 foreach (ref biv; l.Livlist) 2601 { 2602 elem *bivinc = *biv.IVincr; /* ptr to increment elem */ 2603 2604 foreach (ref fl; biv.IVfamily) 2605 { /* for each IV in family of biv */ 2606 if (fl.FLtemp == FLELIM) /* if already eliminated */ 2607 continue; 2608 2609 /* If induction variable can be written as a simple function */ 2610 /* of a previous induction variable, skip it. */ 2611 if (funcprev(biv,fl)) 2612 continue; 2613 2614 ty = fl.FLty; 2615 T = el_alloctmp(ty); /* allocate temporary T */ 2616 fl.FLtemp = T.EV.Vsym; 2617 2618 debug 2619 { 2620 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr); 2621 if (debugc) printf("%s\n", tym_str(ty)); 2622 if (debugc) printf("\n"); 2623 } 2624 2625 /* append elem T=biv*C1+C2 to preheader */ 2626 /* ne = biv*C1 */ 2627 tyr = fl.FLivty; /* type of biv */ 2628 ne = el_var(biv.IVbasic); 2629 ne.Ety = tyr; 2630 if (!elemisone(fl.c1)) /* don't multiply ptrs by 1 */ 2631 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1)); 2632 if (tyfv(tyr) && tysize(ty) == SHORTSIZE) 2633 ne = el_una(OP32_16,ty,ne); 2634 C2 = el_copytree(fl.c2); 2635 t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ 2636 ne = el_bin(OPeq,ty,el_copytree(T),t2); 2637 appendelem(ne, &(l.Lpreheader.Belem)); 2638 2639 /* prefix T+=C1*C to elem biv+=C */ 2640 /* Must prefix in case the value of the expression (biv+=C) */ 2641 /* is used by somebody up the tree. */ 2642 cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1), 2643 el_copytree(bivinc.EV.E2)); 2644 t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul); 2645 t2 = doptelem(t2,GOALvalue | GOALagain); 2646 *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc); 2647 biv.IVincr = &((*biv.IVincr).EV.E2); 2648 2649 debug 2650 if (debugc) 2651 { 2652 printf("Replacing elem ("); 2653 WReqn(*fl.FLpelem); 2654 printf(") with '%s'\n",T.EV.Vsym.Sident.ptr); 2655 printf("The init elem is ("); 2656 WReqn(ne); 2657 printf(");\nThe increment elem is ("); 2658 WReqn(t2); 2659 printf(")\n"); 2660 } 2661 2662 el_free(*fl.FLpelem); 2663 *fl.FLpelem = T; /* replace elem n with ref to T */ 2664 doflow = true; /* redo flow analysis */ 2665 go.changes++; 2666 } /* for */ 2667 } /* for */ 2668 } 2669 2670 /******************************* 2671 * Determine if induction variable can be rewritten as a simple 2672 * function of a previously generated temporary. 2673 * This can frequently 2674 * generate less code than that of an all-new temporary (especially 2675 * if it is the same as a previous temporary!). 2676 * Input: 2677 * biv Basic induction variable 2678 * fl Item in biv's family list we are looking at 2679 * Returns: 2680 * false Caller should create a new induction variable. 2681 * true *FLpelem is replaced with function of a previous 2682 * induction variable. FLtemp is set to FLELIM to 2683 * indicate this. 2684 */ 2685 2686 @trusted 2687 private bool funcprev(ref Iv biv, ref famlist fl) 2688 { 2689 tym_t tymin; 2690 int sz; 2691 elem *e1; 2692 elem *e2; 2693 elem *flse1; 2694 2695 debug if (debugc) 2696 printf("funcprev\n"); 2697 2698 foreach (ref fls; biv.IVfamily) 2699 { 2700 if (!fls.FLtemp) // haven't generated a temporary yet 2701 continue; 2702 if (fls.FLtemp == FLELIM) /* no iv we can use here */ 2703 continue; 2704 2705 /* The multipliers must match */ 2706 if (!el_match(fls.c1,fl.c1)) 2707 continue; 2708 2709 /* If the c2's match also, we got it easy */ 2710 if (el_match(fls.c2,fl.c2)) 2711 { 2712 if (tysize(fl.FLty) > tysize(fls.FLtemp.ty())) 2713 continue; /* can't increase size of var */ 2714 flse1 = el_var(fls.FLtemp); 2715 flse1.Ety = fl.FLty; 2716 goto L2; 2717 } 2718 2719 /* The difference is only in the addition. Therefore, replace 2720 *fl.FLpelem with: 2721 case 1: (fl.c2 + (fls.FLtemp - fls.c2)) 2722 case 2: (fls.FLtemp + (fl.c2 - fls.c2)) 2723 */ 2724 e1 = fl.c2; 2725 /* Subtracting relocatables usually generates slow code for */ 2726 /* linkers that can't handle arithmetic on relocatables. */ 2727 if (typtr(fls.c2.Ety)) 2728 { 2729 if (fls.c2.Eoper == OPrelconst && 2730 !(fl.c2.Eoper == OPrelconst && 2731 fl.c2.EV.Vsym == fls.c2.EV.Vsym) 2732 ) 2733 continue; 2734 } 2735 flse1 = el_var(fls.FLtemp); 2736 e2 = flse1; /* assume case 1 */ 2737 tymin = e2.Ety; 2738 if (typtr(fls.c2.Ety)) 2739 { 2740 if (!typtr(tymin)) 2741 { 2742 if (typtr(e1.Ety)) 2743 { 2744 e1 = e2; 2745 e2 = fl.c2; /* case 2 */ 2746 } 2747 else /* can't subtract fptr */ 2748 goto L1; 2749 } 2750 if (tybasic(fls.c2.Ety) == TYhptr) 2751 tymin = TYlong; 2752 else 2753 tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ 2754 } 2755 2756 /* If e1 and fls.c2 are fptrs, and are not from the same */ 2757 /* segment, we cannot subtract them. */ 2758 if (tyfv(e1.Ety) && tyfv(fls.c2.Ety)) 2759 { 2760 if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst) 2761 goto L1; /* assume expressions have diff segs */ 2762 if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass) 2763 { 2764 L1: 2765 el_free(flse1); 2766 continue; 2767 } 2768 } 2769 2770 /* Some more type checking... */ 2771 sz = tysize(fl.FLty); 2772 if (sz != tysize(e1.Ety) && 2773 sz != tysize(tymin)) 2774 goto L1; 2775 2776 /* Do some type checking (can't add pointers and get a pointer!) */ 2777 //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin)) 2778 //goto L1; 2779 /* Construct (e1 + (e2 - fls.c2)) */ 2780 flse1 = el_bin(OPadd,fl.FLty, 2781 e1, 2782 el_bin(OPmin,tymin, 2783 e2, 2784 el_copytree(fls.c2))); 2785 if (sz < tysize(tymin) && sz == tysize(e1.Ety)) 2786 { 2787 assert(I16); 2788 flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2); 2789 } 2790 2791 flse1 = doptelem(flse1,GOALvalue | GOALagain); 2792 fl.c2 = null; 2793 L2: 2794 debug if (debugc) 2795 { 2796 printf("Replacing "); 2797 WReqn(*fl.FLpelem); 2798 printf(" with "); 2799 WReqn(flse1); 2800 printf("\n"); 2801 } 2802 2803 el_free(*fl.FLpelem); 2804 *fl.FLpelem = flse1; 2805 2806 /* Fix the iv so when we do loops again, we won't create */ 2807 /* yet another iv, which is just what funcprev() is supposed */ 2808 /* to prevent. */ 2809 fls.FLtemp.Sflags |= SFLnotbasiciv; 2810 2811 fl.FLtemp = FLELIM; /* mark iv as being gone */ 2812 go.changes++; 2813 doflow = true; 2814 return true; /* it was replaced */ 2815 } 2816 return false; /* need to create a new variable */ 2817 } 2818 2819 /*********************** 2820 * Eliminate basic IVs. 2821 */ 2822 2823 @trusted 2824 private void elimbasivs(ref Loop l) 2825 { 2826 if (debugc) printf("elimbasivs(%p)\n", &l); 2827 foreach (ref biv; l.Livlist) 2828 { 2829 /* Can't eliminate this basic IV if we have a goal for the */ 2830 /* increment elem. */ 2831 // Be careful about Nflags being in a union... 2832 elem* einc = *biv.IVincr; 2833 if (!(einc.Nflags & NFLnogoal)) 2834 continue; 2835 2836 Symbol* X = biv.IVbasic; 2837 assert(symbol_isintab(X)); 2838 tym_t ty = X.ty(); 2839 int refcount; 2840 elem** pref = onlyref(X,l,einc,refcount); 2841 2842 /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ 2843 if (pref != null && refcount <= 1) 2844 { 2845 if (!biv.IVfamily.length) 2846 continue; 2847 2848 if (catchRef(X, l)) 2849 continue; 2850 2851 elem* ref_ = *pref; 2852 2853 /* Replace (X) with (X != 0) */ 2854 if (ref_.Eoper == OPvar) 2855 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L)); 2856 2857 const fi = simfl(biv.IVfamily, ty); // find simplest elem in family 2858 if (fi == biv.IVfamily.length) 2859 continue; 2860 famlist* fl = &biv.IVfamily[fi]; 2861 2862 // Don't do the replacement if we would replace a 2863 // signed comparison with an unsigned one 2864 tym_t flty = fl.FLty; 2865 if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) 2866 flty = touns(flty); 2867 2868 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && 2869 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) && 2870 tyuns(flty)) 2871 continue; 2872 2873 /* if we have (e relop X), replace it with (X relop e) */ 2874 if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X) 2875 { 2876 elem* tmp = ref_.EV.E2; 2877 ref_.EV.E2 = ref_.EV.E1; 2878 ref_.EV.E1 = tmp; 2879 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper); 2880 } 2881 2882 // If e*c1+c2 would result in a sign change or an overflow 2883 // then we can't do it 2884 if (fl.c1.Eoper == OPconst) 2885 { 2886 targ_llong c1 = el_tolong(fl.c1); 2887 const int sz = tysize(ty); 2888 if (sz == SHORTSIZE && 2889 ((ref_.EV.E2.Eoper == OPconst && 2890 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) || 2891 c1 & ~0x7FFFL) 2892 ) 2893 continue; 2894 2895 if (sz == LONGSIZE && 2896 ((ref_.EV.E2.Eoper == OPconst && 2897 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) || 2898 c1 & ~0x7FFFFFFFL) 2899 ) 2900 continue; 2901 if (sz == LLONGSIZE && 2902 ((ref_.EV.E2.Eoper == OPconst && 2903 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) || 2904 c1 & ~0x7FFFFFFFFFFFFFFFL) 2905 ) 2906 continue; 2907 } 2908 2909 /* If the incr is a decrement, and the relational is < or <=, 2910 * and its unsigned, then don't do it because it could drop below 0. 2911 * https://issues.dlang.org/show_bug.cgi?id=16189 2912 */ 2913 if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) && 2914 (ref_.Eoper == OPlt || ref_.Eoper == OPle) && 2915 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))) 2916 continue; 2917 2918 /* If loop started out with a signed conditional that was 2919 * replaced with an unsigned one, don't do it if c2 2920 * is less than 0. 2921 */ 2922 if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst) 2923 { 2924 targ_llong c2 = el_tolong(fl.c2); 2925 if (c2 < 0) 2926 continue; 2927 } 2928 2929 elem *refE2 = el_copytree(ref_.EV.E2); 2930 int refEoper = ref_.Eoper; 2931 2932 /* if c1 < 0 and relop is < <= > >= 2933 then adjust relop as if both sides were multiplied 2934 by -1 2935 */ 2936 if (!tyuns(ty) && 2937 (tyintegral(ty) && el_tolong(fl.c1) < 0 || 2938 tyfloating(ty) && el_toldoubled(fl.c1) < 0.0)) 2939 refEoper = swaprel(refEoper); 2940 2941 /* Replace (X relop e) with (X relop (short)e) 2942 if T is 1 word but e is 2 2943 */ 2944 if (tysize(flty) == SHORTSIZE && 2945 tysize(refE2.Ety) == LONGSIZE) 2946 refE2 = el_una(OP32_16,flty,refE2); 2947 2948 /* replace e with e*c1 + c2 */ 2949 elem* C2 = el_copytree(fl.c2); 2950 elem* fofe = el_bin(OPadd,flty, 2951 el_bin(OPmul,refE2.Ety, 2952 refE2, 2953 el_copytree(fl.c1)), 2954 C2); 2955 fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants 2956 2957 if (tyuns(flty) && refEoper == OPge && 2958 fofe.Eoper == OPconst && el_allbits(fofe, 0) && 2959 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0)) 2960 { 2961 /* Don't do it if replacement will result in 2962 * an unsigned T>=0 which will be an infinite loop. 2963 */ 2964 el_free(fofe); 2965 continue; 2966 } 2967 2968 if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr); 2969 2970 debug if (debugc) 2971 { 2972 printf("Comparison replaced: "); 2973 WReqn(ref_); 2974 printf(" with "); 2975 } 2976 2977 el_free(ref_.EV.E2); 2978 ref_.EV.E2 = refE2; 2979 ref_.Eoper = cast(ubyte)refEoper; 2980 2981 elimass(einc); // dump the increment elem 2982 2983 // replace X with T 2984 assert(ref_.EV.E1.EV.Voffset == 0); 2985 ref_.EV.E1.EV.Vsym = fl.FLtemp; 2986 ref_.EV.E1.Ety = flty; 2987 ref_.EV.E2 = fofe; 2988 2989 /* If sizes of expression worked out wrong... 2990 Which can happen if we have (int)ptr==e 2991 */ 2992 if (OTbinary(fofe.Eoper)) // if didn't optimize it away 2993 { 2994 const tym_t fofety = fofe.Ety; 2995 const int sz = tysize(fofety); 2996 tym_t ty1 = fofe.EV.E1.Ety; 2997 const tym_t ty2 = fofe.EV.E2.Ety; 2998 /* Sizes of + expression must all be the same */ 2999 if (sz != tysize(ty1) && 3000 sz != tysize(ty2) 3001 ) 3002 { 3003 if (tyuns(fofety)) // if unsigned comparison 3004 ty1 = touns(ty1); /* to unsigned type */ 3005 fofe.Ety = ty1; 3006 ref_.EV.E1.Ety = ty1; 3007 } 3008 } 3009 3010 /* Fix if leaves of compare are TYfptrs and the compare */ 3011 /* operator is < <= > >=. */ 3012 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety)) 3013 { 3014 assert(tyfv(ref_.EV.E2.Ety)); 3015 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1); 3016 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe); 3017 } 3018 3019 debug if (debugc) 3020 { 3021 WReqn(ref_); 3022 printf("\n"); 3023 } 3024 3025 go.changes++; 3026 doflow = true; /* redo flow analysis */ 3027 3028 /* if X is live on entry to any successor S outside loop */ 3029 /* prepend elem X=(T-c2)/c1 to S.Belem */ 3030 3031 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3032 { 3033 elem *ne; 3034 block *b; 3035 3036 foreach (bl; ListRange(dfo[i].Bsucc)) 3037 { /* for each successor */ 3038 b = list_block(bl); 3039 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3040 continue; /* inside loop */ 3041 if (!vec_testbit(X.Ssymnum,b.Binlv)) 3042 continue; /* not live */ 3043 3044 C2 = el_copytree(fl.c2); 3045 ne = el_bin(OPmin,ty, 3046 el_var(fl.FLtemp), 3047 C2); 3048 if (tybasic(ne.EV.E1.Ety) == TYfptr && 3049 tybasic(ne.EV.E2.Ety) == TYfptr) 3050 { 3051 ne.Ety = I64 ? TYllong : TYint; 3052 if (tylong(ty) && _tysize[TYint] == 2) 3053 ne = el_una(OPs16_32,ty,ne); 3054 } 3055 3056 ne = el_bin(OPeq,X.ty(), 3057 el_var(X), 3058 el_bin(OPdiv,ne.Ety, 3059 ne, 3060 el_copytree(fl.c1))); 3061 3062 debug if (debugc) 3063 { 3064 printf("Adding ("); 3065 WReqn(ne); 3066 printf(") to exit block B%d\n",b.Bdfoidx); 3067 //elem_print(ne); 3068 } 3069 3070 /* We have to add a new block if there is */ 3071 /* more than one predecessor to b. */ 3072 if (list_next(b.Bpred)) 3073 { 3074 block *bn = block_calloc(); 3075 bn.Btry = b.Btry; 3076 bn.BC = BCgoto; 3077 bn.Bnext = dfo[i].Bnext; 3078 dfo[i].Bnext = bn; 3079 list_append(&(bn.Bsucc),b); 3080 list_append(&(bn.Bpred),dfo[i]); 3081 bl.ptr = cast(void *)bn; 3082 foreach (bl2; ListRange(b.Bpred)) 3083 if (list_block(bl2) == dfo[i]) 3084 { 3085 bl2.ptr = cast(void *)bn; 3086 goto L2; 3087 } 3088 assert(0); 3089 L2: 3090 b = bn; 3091 addblk = true; 3092 } 3093 3094 if (b.Belem) 3095 b.Belem = 3096 el_bin(OPcomma,b.Belem.Ety, 3097 ne,b.Belem); 3098 else 3099 b.Belem = ne; 3100 go.changes++; 3101 doflow = true; /* redo flow analysis */ 3102 } /* for each successor */ 3103 } /* foreach exit block */ 3104 if (addblk) 3105 return; 3106 } 3107 else if (refcount == 0) /* if no uses of IV in loop */ 3108 { 3109 /* Eliminate the basic IV if it is not live on any successor */ 3110 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3111 { 3112 foreach (bl; ListRange(dfo[i].Bsucc)) 3113 { /* for each successor */ 3114 block *b = list_block(bl); 3115 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3116 continue; /* inside loop */ 3117 if (vec_testbit(X.Ssymnum,b.Binlv)) 3118 goto L1; /* live */ 3119 } 3120 } 3121 3122 if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X); 3123 3124 /* Remove the (s op= e2) by replacing it with (1 , e2) 3125 * and let later passes remove the (1 ,) nodes. 3126 * Don't remove those nodes here because other biv's may refer 3127 * to them. 3128 */ 3129 { 3130 elem* ei = *biv.IVincr; 3131 ei.Eoper = OPcomma; 3132 ei.EV.E1.Eoper = OPconst; 3133 ei.EV.E1.Ety = TYint; 3134 } 3135 3136 go.changes++; 3137 doflow = true; /* redo flow analysis */ 3138 L1: 3139 } 3140 } /* for */ 3141 } 3142 3143 /*********************** 3144 * Eliminate opeq IVs that are not used outside the loop. 3145 */ 3146 3147 @trusted 3148 private void elimopeqs(ref Loop l) 3149 { 3150 elem **pref; 3151 Symbol *X; 3152 int refcount; 3153 3154 if (debugc) printf("elimopeqs(%p)\n", &l); 3155 //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr)); 3156 3157 foreach (ref biv; l.Lopeqlist) 3158 { 3159 // Can't eliminate this basic IV if we have a goal for the 3160 // increment elem. 3161 // Be careful about Nflags being in a union... 3162 if (!((*biv.IVincr).Nflags & NFLnogoal)) 3163 continue; 3164 3165 X = biv.IVbasic; 3166 assert(symbol_isintab(X)); 3167 pref = onlyref(X,l,*biv.IVincr,refcount); 3168 3169 // if only ref of X is of the form (X) or (X relop e) or (e relop X) 3170 if (pref != null && refcount <= 1) 3171 { } 3172 else if (refcount == 0) // if no uses of IV in loop 3173 { // Eliminate the basic IV if it is not live on any successor 3174 uint i; 3175 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3176 { 3177 foreach (bl; ListRange(dfo[i].Bsucc)) 3178 { // for each successor 3179 block *b = list_block(bl); 3180 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3181 continue; // inside loop 3182 if (vec_testbit(X.Ssymnum,b.Binlv)) 3183 goto L1; // live 3184 } 3185 } 3186 3187 if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X); 3188 3189 /* Remove the (s op= e2) by replacing it with (1 , e2) 3190 * and let later passes remove the (1 ,) nodes. 3191 * Don't remove those nodes here because other biv's may refer 3192 * to them, for nodes like (sum += i++) 3193 */ 3194 { 3195 elem* einc = *(biv.IVincr); 3196 einc.Eoper = OPcomma; 3197 einc.EV.E1.Eoper = OPconst; 3198 einc.EV.E1.Ety = TYint; 3199 } 3200 3201 go.changes++; 3202 doflow = true; // redo flow analysis 3203 L1: 3204 } 3205 } 3206 } 3207 3208 /************************** 3209 * Find simplest elem in family. 3210 * Params: 3211 * fams = array of famlist's 3212 * tym = type of basic IV 3213 * Returns: index into fams[] of simplest; fams.length if none found. 3214 */ 3215 3216 @trusted 3217 extern (D) 3218 private size_t simfl(famlist[] fams, tym_t tym) 3219 { 3220 size_t sofar = fams.length; 3221 3222 foreach (i, ref fl; fams) 3223 { 3224 if (fl.FLtemp == FLELIM) /* no variable, so skip it */ 3225 continue; 3226 /* If size of replacement is less than size of biv, we could */ 3227 /* be in trouble due to loss of precision. */ 3228 if (size(fl.FLtemp.ty()) < size(tym)) 3229 continue; 3230 3231 // pick simplest 3232 sofar = sofar == fams.length ? i 3233 : (flcmp(fams[sofar], fams[i]) ? sofar : i); 3234 } 3235 return sofar; 3236 } 3237 3238 /************************** 3239 * Return simpler of two family elems. 3240 * There is room for improvement, namely if 3241 * f1.c1 = 2, f2.c1 = 27 3242 * then pick f1 because it is a shift. 3243 * Returns: 3244 * true for f1 is simpler, false for f2 is simpler 3245 */ 3246 3247 @trusted 3248 private bool flcmp(const ref famlist f1, const ref famlist f2) 3249 { 3250 auto t1 = &(f1.c1.EV); 3251 auto t2 = &(f2.c1.EV); 3252 auto ty = (*f1.FLpelem).Ety; // type of elem 3253 3254 static if (0) 3255 { 3256 printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort); 3257 printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort); 3258 printf("%s %s\n", tym_str((*f1.FLpelem).Ety), tym_str((*f2.FLpelem).Ety)); 3259 } 3260 3261 /* Wimp out and just pick f1 if the types don't match */ 3262 if (tysize(ty) == tysize((*f2.FLpelem).Ety)) 3263 { 3264 switch (tybasic(ty)) 3265 { case TYbool: 3266 case TYchar: 3267 case TYschar: 3268 case TYuchar: 3269 if (t2.Vuchar == 1 || 3270 t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0) 3271 goto Lf2; 3272 break; 3273 3274 case TYshort: 3275 case TYushort: 3276 case TYchar16: 3277 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 3278 case_short: 3279 if (t2.Vshort == 1 || 3280 t1.Vshort != 1 && f2.c2.EV.Vshort == 0) 3281 goto Lf2; 3282 break; 3283 3284 case TYsptr: 3285 case TYcptr: 3286 case TYnptr: // BUG: 64 bit pointers? 3287 case TYimmutPtr: 3288 case TYsharePtr: 3289 case TYrestrictPtr: 3290 case TYfgPtr: 3291 case TYnullptr: 3292 case TYint: 3293 case TYuint: 3294 if (_tysize[TYint] == SHORTSIZE) 3295 goto case_short; 3296 else 3297 goto case_long; 3298 3299 case TYlong: 3300 case TYulong: 3301 case TYdchar: 3302 case TYfptr: 3303 case TYvptr: 3304 case TYhptr: 3305 case_long: 3306 if (t2.Vlong == 1 || 3307 t1.Vlong != 1 && f2.c2.EV.Vlong == 0) 3308 goto Lf2; 3309 break; 3310 3311 case TYfloat: 3312 if (t2.Vfloat == 1 || 3313 t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0) 3314 goto Lf2; 3315 break; 3316 3317 case TYdouble: 3318 case TYdouble_alias: 3319 if (t2.Vdouble == 1.0 || 3320 t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0) 3321 goto Lf2; 3322 break; 3323 3324 case TYldouble: 3325 if (t2.Vldouble == 1.0 || 3326 t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0) 3327 goto Lf2; 3328 break; 3329 3330 case TYllong: 3331 case TYullong: 3332 if (t2.Vllong == 1 || 3333 t1.Vllong != 1 && f2.c2.EV.Vllong == 0) 3334 goto Lf2; 3335 break; 3336 3337 default: 3338 assert(0); 3339 } 3340 } 3341 //printf("picking f1\n"); 3342 return true; 3343 3344 Lf2: 3345 //printf("picking f2\n"); 3346 return false; 3347 } 3348 3349 /***************************** 3350 * If loop is in a try block, see if there are references to x in an enclosing 3351 * try block. This is because the loop may throw and transfer control 3352 * outside the try block, and that will count as a use of x. 3353 * Params: 3354 * x = basic induction variable symbol 3355 * l = loop x is in 3356 * Returns: 3357 * true if x is used outside the try block 3358 */ 3359 @trusted 3360 private bool catchRef(Symbol* x, ref Loop l) 3361 { 3362 block* btry = l.Lhead.Btry; 3363 if (!btry) 3364 return false; // not in a try block 3365 3366 foreach (i, b; dfo[]) 3367 { 3368 if (vec_testbit(b.Bdfoidx, l.Lloop)) 3369 continue; 3370 /* this is conservative, just checking if x is used outside the loop. 3371 * A better check would see if the body of the loop throws, and would 3372 * check the enclosing catch/finally blocks and their exit blocks. 3373 * https://issues.dlang.org/show_bug.cgi?22104 3374 */ 3375 if (vec_testbit(x.Ssymnum, b.Binlv)) 3376 return true; 3377 } 3378 3379 return false; 3380 } 3381 3382 /************************************ 3383 * Params: 3384 * x = basic IV symbol 3385 * incn = increment elem for basic IV X. 3386 * refcount = set to # of references to X other than the increment elem 3387 * Returns: 3388 * If ref of X in loop l is of the form (X relop e) or (e relop X) 3389 * Return the relop elem 3390 * Else 3391 * Return null 3392 */ 3393 3394 private __gshared 3395 { 3396 elem **nd; 3397 elem *sincn; 3398 Symbol *X; 3399 } 3400 3401 @trusted 3402 private elem ** onlyref(Symbol *x, ref Loop l,elem *incn, out int refcount) 3403 { 3404 uint i; 3405 3406 //printf("onlyref('%s')\n", x.Sident.ptr); 3407 X = x; /* save some parameter passing */ 3408 assert(symbol_isintab(x)); 3409 sincn = incn; 3410 3411 debug 3412 if (!(X.Ssymnum < globsym.length && incn)) 3413 printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn); 3414 3415 assert(X.Ssymnum < globsym.length && incn); 3416 int count = 0; 3417 nd = null; 3418 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3419 { 3420 block* b = dfo[i]; 3421 if (b.Belem) 3422 { 3423 count += countrefs(&b.Belem,b.BC == BCiftrue); 3424 } 3425 } 3426 3427 static if (0) 3428 { 3429 printf("count = %d, nd = (", count); 3430 if (nd) WReqn(*nd); 3431 printf(")\n"); 3432 } 3433 3434 refcount = count; 3435 return nd; 3436 } 3437 3438 3439 /****************************** 3440 * Count elems of the form (X relop e) or (e relop X). 3441 * Do not count the node if it is the increment node (sincn). 3442 * Params: 3443 * pn = pointer to start of elem tree 3444 * flag = true if block wants to test the elem 3445 * Returns: 3446 * number of elems of the form 3447 */ 3448 3449 @trusted 3450 private int countrefs(elem **pn,bool flag) 3451 { 3452 elem *n = *pn; 3453 3454 assert(n); 3455 if (n == sincn) /* if it is the increment elem */ 3456 { 3457 return OTbinary(n.Eoper) 3458 ? countrefs(&n.EV.E2, false) 3459 : 0; // don't count lvalue 3460 } 3461 if (OTunary(n.Eoper)) 3462 return countrefs(&n.EV.E1,false); 3463 else if (OTbinary(n.Eoper)) 3464 { 3465 if (OTrel(n.Eoper)) 3466 { 3467 elem *e1 = n.EV.E1; 3468 3469 assert(e1.Eoper != OPcomma); 3470 if (e1 == sincn && 3471 (e1.Eoper == OPeq || OTopeq(e1.Eoper))) 3472 goto L1; 3473 3474 /* Check both subtrees to see if n is the comparison node, 3475 * that is, if X is a leaf of the comparison. 3476 */ 3477 if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2, X) || 3478 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1, X)) 3479 nd = pn; /* found the relop node */ 3480 } 3481 L1: 3482 return countrefs(&n.EV.E1,false) + 3483 countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma)); 3484 } 3485 else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X) 3486 { 3487 if (flag) 3488 nd = pn; /* comparing it with 0 */ 3489 return 1; // found another reference 3490 } 3491 return 0; 3492 } 3493 3494 /******************************* 3495 * Count number of times symbol X appears in elem tree e. 3496 */ 3497 3498 @trusted 3499 private int countrefs2(const(elem)* e, const Symbol* s) 3500 { 3501 debug elem_debug(e); 3502 while (OTunary(e.Eoper)) 3503 e = e.EV.E1; 3504 if (OTbinary(e.Eoper)) 3505 return countrefs2(e.EV.E1, s) + countrefs2(e.EV.E2, s); 3506 return ((e.Eoper == OPvar || e.Eoper == OPrelconst) && 3507 e.EV.Vsym == s); 3508 } 3509 3510 /**************************** 3511 * Eliminate some special cases. 3512 */ 3513 3514 @trusted 3515 private 3516 extern(D) void elimspec(const ref Loop loop, block*[] dfo) 3517 { 3518 // Visit each block in loop 3519 for (size_t i = 0; (i = vec_index(i, loop.Lloop)) < dfo.length; ++i) 3520 { 3521 auto b = dfo[i]; 3522 if (b.Belem) 3523 elimspecwalk(&b.Belem); 3524 } 3525 } 3526 3527 3528 /****************************** 3529 */ 3530 3531 @trusted 3532 private void elimspecwalk(elem **pn) 3533 { 3534 elem *n; 3535 3536 n = *pn; 3537 assert(n); 3538 if (OTunary(n.Eoper)) 3539 elimspecwalk(&n.EV.E1); 3540 else if (OTbinary(n.Eoper)) 3541 { 3542 elimspecwalk(&n.EV.E1); 3543 elimspecwalk(&n.EV.E2); 3544 if (OTrel(n.Eoper)) 3545 { 3546 elem *e1 = n.EV.E1; 3547 3548 /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). 3549 * This will reduce the number of cases for elimbasivs(). 3550 * Don't do equivalent with (e1 rel (e2,e3)) because 3551 * of potential side effects in e1. 3552 */ 3553 if (e1.Eoper == OPcomma) 3554 { 3555 elem *e; 3556 3557 debug if (debugc) 3558 { printf("3rewriting ("); WReqn(n); printf(")\n"); } 3559 3560 e = n.EV.E2; 3561 n.EV.E2 = e1; 3562 n.EV.E1 = n.EV.E2.EV.E1; 3563 n.EV.E2.EV.E1 = n.EV.E2.EV.E2; 3564 n.EV.E2.EV.E2 = e; 3565 n.EV.E2.Eoper = n.Eoper; 3566 n.EV.E2.Ety = n.Ety; 3567 n.Eoper = OPcomma; 3568 3569 go.changes++; 3570 doflow = true; 3571 3572 elimspecwalk(&n.EV.E1); 3573 elimspecwalk(&n.EV.E2); 3574 } 3575 3576 /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) 3577 * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) 3578 * so that the op= will not have a goal, so elimbasivs() 3579 * will work on it. 3580 */ 3581 if ((OTopeq(e1.Eoper) 3582 || OTpost(e1.Eoper) 3583 ) && 3584 !el_sideeffect(e1.EV.E1)) 3585 { 3586 elem *e; 3587 OPER op; 3588 3589 debug if (debugc) 3590 { printf("4rewriting ("); WReqn(n); printf(")\n"); } 3591 3592 e = el_calloc(); 3593 el_copy(e,n); 3594 e.EV.E1 = el_copytree(e1.EV.E1); 3595 e.EV.E1.Ety = n.EV.E1.Ety; 3596 n.EV.E2 = e; 3597 switch (e1.Eoper) 3598 { 3599 case OPpostinc: 3600 e1.Eoper = OPaddass; 3601 op = OPmin; 3602 goto L3; 3603 3604 case OPpostdec: 3605 e1.Eoper = OPminass; 3606 op = OPadd; 3607 L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2)); 3608 break; 3609 3610 default: 3611 break; 3612 } 3613 /* increment node is now guaranteed to have no goal */ 3614 e1.Nflags |= NFLnogoal; 3615 n.Eoper = OPcomma; 3616 //go.changes++; 3617 doflow = true; 3618 3619 elimspecwalk(&n.EV.E1); 3620 elimspecwalk(&n.EV.E2); 3621 } 3622 } 3623 } 3624 } 3625 3626 /******** 3627 * Walk e in execution order. 3628 * When eincrement is found, remove it. 3629 * Continue, replacing instances of `v` with `v+increment` 3630 * When second eincrement is found, stop. 3631 * Params: 3632 * e = expression to walk 3633 * defnum = index of eincrement 3634 * v = increment variable 3635 * increment = amount to increment v 3636 * unrolls = number of times loop has been unrolled 3637 */ 3638 3639 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow 3640 { 3641 int state = 0; 3642 3643 /*********************************** 3644 * Walk e in execution order, fixing it according to state. 3645 * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state 3646 * state == 1..unrolls-1: replacing instances of v with v+(state*increment), 3647 * state == unrolls-1: leave eincrement alone, advance to next state 3648 * state == unrolls: done 3649 */ 3650 3651 void walker(elem *e) 3652 { 3653 assert(e); 3654 const op = e.Eoper; 3655 if (ERTOL(e)) 3656 { 3657 if (e.Edef != defnum) 3658 { 3659 walker(e.EV.E2); 3660 walker(e.EV.E1); 3661 } 3662 } 3663 else if (OTbinary(op)) 3664 { 3665 if (e.Edef != defnum) 3666 { 3667 walker(e.EV.E1); 3668 walker(e.EV.E2); 3669 } 3670 } 3671 else if (OTunary(op)) 3672 { 3673 assert(e.Edef != defnum); 3674 walker(e.EV.E1); 3675 } 3676 else if (op == OPvar && 3677 state && 3678 e.EV.Vsym == v) 3679 { 3680 // overwrite e with (v+increment) 3681 elem *e1 = el_calloc(); 3682 el_copy(e1,e); 3683 e.Eoper = OPadd; 3684 e.EV.E1 = e1; 3685 e.EV.E2 = el_long(e.Ety, increment * state); 3686 } 3687 if (OTdef(op) && e.Edef == defnum) 3688 { 3689 // found the increment elem; neuter all but the last one 3690 if (state + 1 < unrolls) 3691 { 3692 el_free(e.EV.E1); 3693 el_free(e.EV.E2); 3694 e.Eoper = OPconst; 3695 e.EV.Vllong = 0; 3696 } 3697 ++state; 3698 } 3699 } 3700 3701 walker(e); 3702 assert(state == unrolls); 3703 } 3704 3705 3706 /********************************* 3707 * Unroll loop if possible. 3708 * Params: 3709 * l = loop to unroll 3710 * Returns: 3711 * true if loop was unrolled 3712 */ 3713 @trusted 3714 bool loopunroll(ref Loop l) 3715 { 3716 const bool log = false; 3717 if (log) printf("loopunroll(%p)\n", &l); 3718 3719 /* Do not repeatedly unroll the same loop, 3720 * or waste time attempting to 3721 */ 3722 if (l.Lhead.Bflags & BFLnounroll) 3723 return false; 3724 l.Lhead.Bflags |= BFLnounroll; 3725 if (log) 3726 WRfunc("loop", funcsym_p, startblock); 3727 3728 if (l.Lhead.Btry || l.Ltail.Btry) 3729 return false; 3730 3731 /* For simplification, only unroll loops that consist only 3732 * of a head and tail, and the tail is the exit block. 3733 */ 3734 const numblocks = vec_numBitsSet(l.Lloop); 3735 { // Ensure no changes 3736 if (dfo.length != vec_numbits(l.Lloop)) assert(0); 3737 int n = 0; 3738 for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3739 ++n; 3740 if (n != numblocks) assert(0); 3741 } 3742 if (numblocks != 2) 3743 { 3744 if (log) printf("\tnot 2 blocks, but %d\n", numblocks); 3745 return false; 3746 } 3747 assert(l.Lhead != l.Ltail); 3748 3749 /* tail must be the sole exit block 3750 */ 3751 if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) || 3752 !vec_testbit(l.Ltail.Bdfoidx, l.Lexit)) 3753 { 3754 if (log) printf("\ttail not sole exit block\n"); 3755 return false; 3756 } 3757 3758 elem *ehead = l.Lhead.Belem; 3759 elem *etail = l.Ltail.Belem; 3760 3761 if (log) 3762 { 3763 printf("Unroll candidate:\n"); 3764 printf(" head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n"); 3765 printf(" tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n"); 3766 } 3767 3768 /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer 3769 */ 3770 if ((etail.Eoper != OPlt && etail.Eoper != OPle) || 3771 etail.EV.E1.Eoper != OPvar || 3772 etail.EV.E2.Eoper != OPconst) 3773 { 3774 if (log) printf("\tnot (v < c)\n"); 3775 return false; 3776 } 3777 3778 elem *e1 = etail.EV.E1; 3779 elem *e2 = etail.EV.E2; 3780 3781 if (!tyintegral(e1.Ety) || 3782 tysize(e1.Ety) > targ_llong.sizeof || 3783 !(tyuns(e1.Ety) || tyuns(e2.Ety))) 3784 { 3785 if (log) printf("\tnot (integral unsigned)\n"); 3786 return false; 3787 } 3788 3789 int cost = el_length(ehead); 3790 //printf("test4 cost: %d\n", cost); 3791 3792 if (cost > 100) 3793 { 3794 if (log) printf("\tcost %d\n", cost); 3795 return false; 3796 } 3797 if (log) printf("cost %d\n", cost); 3798 3799 Symbol* v = e1.EV.Vsym; 3800 3801 // RD info is only reliable for registers and autos 3802 if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig)) 3803 { 3804 if (log) printf("\tnot SCRD\n"); 3805 return false; 3806 } 3807 3808 /* Find the initial, increment elem, and final value of s 3809 */ 3810 elem *einitial; 3811 elem *eincrement; 3812 if (!findloopparameters(etail, einitial, eincrement)) 3813 { 3814 if (log) printf("\tnot findloopparameters()\n"); 3815 return false; 3816 } 3817 3818 targ_llong initial = el_tolong(einitial.EV.E2); 3819 targ_llong increment = el_tolong(eincrement.EV.E2); 3820 if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass) 3821 increment = -increment; 3822 targ_llong final_ = el_tolong(e2); 3823 3824 if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_); 3825 3826 if (etail.Eoper == OPle) 3827 ++final_; 3828 3829 if (initial < 0 || 3830 final_ < initial || 3831 increment <= 0 || 3832 (final_ - initial) % increment) 3833 { 3834 if (log) printf("\tnot (evenly divisible)\n"); 3835 return false; 3836 } 3837 3838 /* If loop would only execute once anyway, just remove the test at the end 3839 */ 3840 if (initial + increment == final_) 3841 { 3842 if (log) printf("\tjust once\n"); 3843 etail.Eoper = OPcomma; 3844 e2.EV.Vllong = 0; 3845 e2.Ety = etail.Ety; 3846 return false; 3847 } 3848 3849 /* number of times the loop is unrolled 3850 */ 3851 targ_ullong numIterations = (final_ - initial) / increment; 3852 const int unrolls = (numIterations < 1000 / cost) 3853 ? cast(int)numIterations 3854 : 2; 3855 3856 if (unrolls == 0 || (final_ - initial) % unrolls) 3857 { 3858 if (log) printf("\tnot (divisible by %d)\n", unrolls); 3859 return false; 3860 } 3861 3862 if (log) printf("Unrolling starting\n"); 3863 3864 // Double the increment 3865 eincrement.EV.E2.EV.Vllong *= unrolls; 3866 //printf(" 4head:\t"); WReqn(l.Lhead.Belem); printf("\n"); 3867 3868 elem* e = null; 3869 foreach (i; 0 .. unrolls) 3870 e = el_combine(e, el_copytree(ehead)); 3871 3872 /* Walk e in execution order. 3873 * When eincrement is found, remove it. 3874 * Continue, replacing instances of `v` with `v+increment` 3875 * When last eincrement is found, stop. 3876 */ 3877 unrollWalker(e, eincrement.Edef, v, increment, unrolls); 3878 3879 l.Lhead.Belem = e; 3880 3881 /* If unrolled loop would only execute once anyway, just remove the test at the end 3882 */ 3883 if (initial + unrolls * increment == final_) 3884 { 3885 if (log) printf("\tcompletely unrolled\n"); 3886 etail.Eoper = OPcomma; 3887 e2.EV.Vllong = 0; 3888 e2.Ety = etail.Ety; 3889 } 3890 3891 //WRfunc("loopunroll done", funcsym_p, startblock); 3892 return true; 3893 } 3894 3895 /****************************** 3896 * Count number of elems in a tree 3897 * Params: 3898 * e = tree 3899 * Returns: 3900 * number of elems in tree 3901 */ 3902 @trusted 3903 private int el_length(elem *e) 3904 { 3905 int n = 0; 3906 while (e) 3907 { 3908 n += 1; 3909 if (!OTleaf(e.Eoper)) 3910 { 3911 if (e.Eoper == OPctor || e.Eoper == OPdtor) 3912 return 10_000; 3913 n += el_length(e.EV.E2); 3914 e = e.EV.E1; 3915 } 3916 else 3917 break; 3918 } 3919 return n; 3920 } 3921 3922 3923 }