1 /**
2  * Global loop optimizations
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 1985-1998 by Symantec
8  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d)
12  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d
13  */
14 
15 module dmd.backend.gloop;
16 
17 version (SCPP)
18     version = COMPILE;
19 version (MARS)
20     version = COMPILE;
21 
22 version (COMPILE)
23 {
24 
25 import core.stdc.stdio;
26 import core.stdc.stdlib;
27 import core.stdc.string;
28 
29 import dmd.backend.cc;
30 import dmd.backend.cdef;
31 import dmd.backend.code_x86;
32 import dmd.backend.evalu8 : el_toldoubled;
33 import dmd.backend.oper;
34 import dmd.backend.global;
35 import dmd.backend.goh;
36 import dmd.backend.el;
37 import dmd.backend.symtab;
38 import dmd.backend.ty;
39 import dmd.backend.type;
40 
41 import dmd.backend.barray;
42 import dmd.backend.dlist;
43 import dmd.backend.dvec;
44 import dmd.backend.mem;
45 
46 nothrow:
47 @safe:
48 
49 @trusted
50 char symbol_isintab(const Symbol *s) { return sytab[s.Sclass] & SCSS; }
51 
52 extern (C++):
53 
54 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc);
55 
56 alias Loops = Rarray!Loop;
57 
58 
59 /*********************************
60  * Loop data structure.
61  */
62 
63 struct Loop
64 {
65 nothrow:
66     vec_t Lloop;        // Vector of blocks in this loop
67     vec_t Lexit;        // Vector of exit blocks of loop
68     block *Lhead;       // Pointer to header of loop
69     block *Ltail;       // Pointer to tail
70     block *Lpreheader;  // Pointer to preheader (if any)
71     Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so
72                         // redundant temporaries aren't created
73     Rarray!Iv Livlist;        // basic induction variables
74     Rarray!Iv Lopeqlist;      // list of other op= variables
75 
76     /*************************
77      * Reset memory so this allocation can be re-used.
78      */
79     @trusted
80     void reset()
81     {
82         vec_free(Lloop);
83         vec_free(Lexit);
84 
85         foreach (ref iv; Livlist)
86             iv.reset();
87         foreach (ref iv; Lopeqlist)
88             iv.reset();
89 
90         Llis.reset();
91         Livlist.reset();
92         Lopeqlist.reset();
93     }
94 
95     /***********************
96      * Write loop.
97      */
98 
99     @trusted
100     void print()
101     {
102         debug
103         {
104             Loop *l = &this;
105             printf("loop %p\n", l);
106             printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx,
107                 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx
108                                                 : cast(uint)-1);
109             printf("\tLloop "); vec_println(l.Lloop);
110             printf("\tLexit "); vec_println(l.Lexit);
111         }
112     }
113 }
114 
115 struct famlist
116 {
117 nothrow:
118     elem **FLpelem;         /* parent of elem in the family         */
119     elem *c1;
120     elem *c2;               // c1*(basic IV) + c2
121     Symbol *FLtemp;         // symbol index of temporary (FLELIM if */
122                             /* this entry has no temporary)         */
123     tym_t FLty;             /* type of this induction variable      */
124     tym_t FLivty;           /* type of the basic IV elem (which is  */
125                             /* not necessarilly the type of the IV  */
126                             /* elem!)                               */
127 
128     void reset()
129     {
130         el_free(c1);
131         el_free(c2);
132     }
133 
134     @trusted
135     void print() const
136     {
137         debug
138         {
139             printf("famlist:\n");
140             printf("*FLpelem:\n");
141             elem_print(*FLpelem);
142             printf("c1:");
143             elem_print(c1);
144             printf("c2:");
145             elem_print(c2);
146             printf("FLty = %s\n", tym_str(FLty));
147             printf("FLivty = %s\n", tym_str(FLivty));
148         }
149     }
150 }
151 
152 @system
153 enum FLELIM = cast(Symbol *)-1;
154 
155 struct Iv
156 {
157 nothrow:
158     Symbol *IVbasic;        // symbol of basic IV
159     elem **IVincr;          // pointer to parent of IV increment elem
160     Barray!famlist IVfamily;      // variables in this family
161 
162     @trusted
163     void reset()
164     {
165         foreach (ref fl; IVfamily)
166         {
167             fl.reset();
168         }
169         IVfamily.reset();
170     }
171 
172     @trusted
173     void print() const
174     {
175         debug
176         {
177             printf("IV: '%s'\n",IVbasic.Sident.ptr);
178             printf("*IVincr:\n");
179             elem_print(*IVincr);
180         }
181     }
182 }
183 
184 
185 private __gshared bool addblk;                    /* if TRUE, then we added a block */
186 
187 /* is elem loop invariant?      */
188 int isLI(const elem* n) { return n.Nflags & NFLli; }
189 
190 /* make elem loop invariant     */
191 void makeLI(elem* n) { n.Nflags |= NFLli; }
192 
193 /******************************
194  *      Only variables that could only be unambiguously defined
195  *      are candidates for loop invariant removal and induction
196  *      variables.
197  *      This means only variables that have the SFLunambig flag
198  *      set for them.
199  *      Doing this will still cover 90% (I hope) of the cases, and
200  *      is a lot faster to compute.
201  */
202 
203 /*************
204  * Free loops.
205  */
206 
207 private void freeloop(ref Loops loops)
208 {
209     foreach (ref loop; loops)
210         loop.reset();
211     loops.reset();
212 }
213 
214 
215 /**********************************
216  * Initialize block information.
217  * Returns:
218  *      true if there is a BCasm block
219  */
220 
221 @trusted
222 bool blockinit()
223 {
224     bool hasasm = false;
225 
226     assert(dfo);
227     foreach (b; BlockRange(startblock))
228     {
229         debug                   /* check integrity of Bpred and Bsucc   */
230           L1:
231             foreach (blp; ListRange(b.Bpred))
232             {
233                 foreach (bls; ListRange(list_block(blp).Bsucc))
234                     if (list_block(bls) == b)
235                         continue L1;
236                 assert(0);
237             }
238 
239         hasasm |= (b.BC == BCasm);
240     }
241     foreach (j, b; dfo[])
242     {
243         assert(b.Bdfoidx == j);
244         b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */
245         vec_clear(b.Bdom);
246     }
247     return hasasm;
248 }
249 
250 /****************************************
251  * Compute dominators (Bdom) for each block.
252  * See Aho & Ullman Fig. 13.5.
253  * Note that flow graph is reducible if there is only one
254  * pass through the loop.
255  * Params:
256  *   dfo = depth first order array of blocks,
257  *         Bdom vector is filled in for each block
258  */
259 
260 @trusted
261 void compdom()
262 {
263     compdom(dfo[]);
264 }
265 
266 @trusted
267 private extern (D) void compdom(block*[] dfo)
268 {
269     assert(dfo.length);
270     block* sb = dfo[0];                  // starting block
271 
272     vec_clear(sb.Bdom);
273     vec_setbit(0,sb.Bdom);               // starting block only doms itself
274     foreach (b; dfo)                     // for all except startblock
275     {
276         if (b != sb)
277             vec_set(b.Bdom);             // dominate all blocks
278     }
279 
280     vec_t t1 = vec_calloc(vec_numbits(sb.Bdom));       // allocate a temporary
281     uint counter = 0;                    // # of times thru loop
282     bool changes;
283     do
284     {
285         changes = false;
286         foreach (i, b; dfo)              // for each block in dfo[]
287         {
288             if (i == 0)
289                 continue;                // except startblock
290             if (b.Bpred)                 // if there are predecessors
291             {
292                 vec_set(t1);
293                 foreach (bl; ListRange(b.Bpred))
294                 {
295                     vec_andass(t1,list_block(bl).Bdom);
296                 }
297             }
298             else
299                 vec_clear(t1);      // no predecessors to dominate
300             vec_setbit(i,t1);       // each block doms itself
301             if (changes)
302                 vec_copy(b.Bdom,t1);
303             else if (!vec_equal(b.Bdom,t1))   // if any changes
304             {
305                 vec_copy(b.Bdom,t1);
306                 changes = true;
307             }
308         }
309         counter++;
310         assert(counter < 50);              // should have converged by now
311     } while (changes);
312     vec_free(t1);
313 
314     debug if (debugc)
315     {
316         printf("Flow graph is%s reducible\n", counter <= 2 ? "".ptr : " not".ptr);
317     }
318 }
319 
320 /***************************
321  * Test if block A dominates block B.
322  * Params:
323  *      A = block
324  *      B = another block
325  * Returns:
326  *      true if A dominates B.
327  */
328 
329 @trusted
330 bool dom(const block* A, const block* B)
331 {
332     assert(A && B && dfo && dfo[A.Bdfoidx] == A);
333     return vec_testbit(A.Bdfoidx,B.Bdom) != 0;
334 }
335 
336 /**********************
337  * Find all the loops.
338  */
339 
340 private extern (D) void findloops(block*[] dfo, ref Loops loops)
341 {
342     freeloop(loops);
343 
344     //printf("findloops()\n");
345     foreach (b; dfo)
346         b.Bweight = 1;             // reset Bweights
347     foreach_reverse (b; dfo)       // for each block (note reverse
348                                    // dfo order, so most nested
349                                    // loops are found first)
350     {
351         assert(b);
352         foreach (bl; ListRange(b.Bsucc))
353         {
354             block *s = list_block(bl);      // each successor s to b
355             assert(s);
356             if (dom(s,b))                   // if s dominates b
357                 buildloop(loops, s, b);     // we found a loop
358         }
359     }
360 
361     debug if (debugc)
362     {
363         foreach (ref l; loops)
364             l.print();
365     }
366 }
367 
368 /********************************
369  * Increase weight of a variable by a factor.
370  * Params:
371  *      weight = how important a variable is
372  *      factor = loops scale the importance
373  * Returns:
374  *      new weight
375  */
376 
377 private uint loop_weight(uint weight, int factor) pure
378 {
379     // Be careful not to overflow
380     if (weight < 0x1_0000)
381         weight *= 10 * factor;
382     else if (weight < 0x10_0000)
383         weight *= 2 * factor;
384     else
385         weight += factor;
386     assert(cast(int)weight > 0); // overflow check
387     return weight;
388 }
389 
390 /*****************************
391  * Construct natural loop.
392  * Algorithm 13.1 from Aho & Ullman.
393  * Note that head dom tail.
394  * Params:
395  *      ploops = collection of existing loops to add to
396  *      head = head of loop; head dominates tail
397  *      tail = tail of loop
398  */
399 
400 @trusted
401 private void buildloop(ref Loops ploops,block *head,block *tail)
402 {
403     //printf("buildloop()\n");
404 
405      /* Add a block b and all its predecessors to vector v.
406       */
407     static void insert(block *b, vec_t v)
408     {
409         assert(b && v);
410         if (!vec_testbit(b.Bdfoidx,v))      // if block is not in loop
411         {
412             vec_setbit(b.Bdfoidx,v);        // add block to loop
413             b.Bweight = loop_weight(b.Bweight,1);   // *10 usage count
414             foreach (bl; ListRange(b.Bpred))
415                 insert(list_block(bl), v);  // insert all its predecessors
416         }
417     }
418 
419     Loop* l;
420 
421     /* See if this is part of an existing loop. If so, merge the two.     */
422     foreach (ref lp; ploops)
423         if (lp.Lhead == head)           /* two loops with same header   */
424         {
425             // Calculate loop contents separately so we get the Bweights
426             // done accurately.
427 
428             vec_t v = vec_calloc(dfo.length);
429             vec_setbit(head.Bdfoidx,v);
430             head.Bweight = loop_weight(head.Bweight, 1);
431             insert(tail,v);
432 
433             vec_orass(lp.Lloop,v);      // merge into existing loop
434             vec_free(v);
435 
436             vec_clear(lp.Lexit);        // recompute exit blocks
437             l = &lp;
438             goto L1;
439         }
440 
441     /* Allocate loop entry        */
442     l = ploops.push();
443 
444     l.Lloop = vec_calloc(dfo.length);    // allocate loop bit vector
445     l.Lexit = vec_calloc(dfo.length);    // bit vector for exit blocks
446     l.Lhead = head;
447     l.Ltail = tail;
448     l.Lpreheader = null;
449 
450     vec_setbit(head.Bdfoidx,l.Lloop);    /* add head to the loop         */
451     head.Bweight = loop_weight(head.Bweight, 2);  // *20 usage for loop header
452 
453     insert(tail,l.Lloop);                /* insert tail in loop          */
454 
455 L1:
456     /* Find all the exit blocks (those blocks with
457      * successors outside the loop).
458      */
459 
460     // for each block in this loop
461     foreach (i; VecRange(l.Lloop))
462     {
463         if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit)
464             vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */
465         else
466         {
467             foreach (bl; ListRange(dfo[i].Bsucc))
468                 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop))
469                 {
470                     vec_setbit(i,l.Lexit);
471                     break;
472                 }
473         }
474     }
475 
476     /*  Find preheader, if any, to the loop.
477         The preheader is a block that has only the head as a successor.
478         All other predecessors of head must be inside the loop.
479      */
480     l.Lpreheader = null;
481     foreach (bl; ListRange(head.Bpred))
482     {
483         block *b = list_block(bl);
484 
485         if (!vec_testbit(b.Bdfoidx,l.Lloop))  /* if not in loop       */
486         {
487             if (l.Lpreheader)                 /* if already one       */
488             {
489                 l.Lpreheader = null;          /* can only be one      */
490                 break;
491             }
492             else
493             {
494                 if (list_next(b.Bsucc))       // if more than 1 successor
495                     break;                    // b can't be a preheader
496                 l.Lpreheader = b;
497             }
498         }
499     }
500 }
501 
502 /**************************************
503  * Perform loop rotations.
504  * Loop starts as:
505  *
506  *         prehead
507  *          |
508  *          v
509  *      +->head---->
510  *      |   |
511  *      |   v
512  *      |  body
513  *      |   |
514  *      |   v
515  *      +--tail
516  *
517  * Two types are done:
518  *      1) Header is moved to be past the tail.
519  *
520  *         prehead
521  *          |
522  *      +---+
523  *      |
524  *      |  body<-+
525  *      |   |    |
526  *      |   v    |
527  *      |  tail  |
528  *      |   |    |
529  *      |   v    |
530  *      +->head--+
531  *          |
532  *          v
533  *
534  *      2) Header is copied past the tail (done only if MFtime is set).
535  *
536  *         prehead
537  *          |
538  *          v
539  *         head1-----+
540  *          |        |
541  *          v        |
542  *         body<--+  |
543  *          |     |  |
544  *          v     |  |
545  *         tail   |  |
546  *          |     |  |
547  *          v     |  |
548  *         head2--+  |
549  *          |        |
550  *          +--------+
551  *          v
552  *
553  * Input:
554  *      Loop information (do not depend on the preheader information)
555  * Output:
556  *      Revised list of blocks, a new dfo and new loop information
557  * Returns:
558  *      true need to recompute loop data
559  */
560 
561 @trusted
562 private bool looprotate(ref Loop l)
563 {
564     block *tail = l.Ltail;
565     block *head = l.Lhead;
566 
567     //printf("looprotate(%p)\n",l);
568 
569     // Do not rotate loop if:
570     if (head == tail ||                         // loop is only one block big
571         !vec_testbit(head.Bdfoidx,l.Lexit))   // header is not an exit block
572         goto Lret;
573 
574     if (//iter != 1 &&
575         vec_testbit(tail.Bdfoidx,l.Lexit))    // tail is an exit block
576         goto Lret;
577 
578     // Do not rotate if already rotated
579     foreach (b; BlockRange(tail.Bnext))
580         if (b == head)                  // if loop already rotated
581             goto Lret;
582 
583     if (head.BC == BCtry)
584          goto Lret;
585     if (head.BC == BC_try)
586          goto Lret;
587 
588     //if (debugc) { printf("looprotate: "); l.print(); }
589 
590     if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm)
591     {   // Duplicate the header past the tail (but doing
592         // switches would be too expensive in terms of code
593         // generated).
594 
595         auto head2 = block_calloc(); // create new head block
596         head2.Btry = head.Btry;
597         head2.Bflags = head.Bflags;
598         head.Bflags = BFLnomerg;       // move flags over to head2
599         head2.Bflags |= BFLnomerg;
600         head2.BC = head.BC;
601         assert(head2.BC != BCswitch);
602         if (head.Belem)                // copy expression tree
603             head2.Belem = el_copytree(head.Belem);
604         head2.Bnext = tail.Bnext;
605         tail.Bnext = head2;
606 
607         // pred(head1) = pred(head) outside loop
608         // pred(head2) = pred(head) inside loop
609         list_t *pbln;
610         auto pbl2 = &(head2.Bpred);
611         for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln)
612         {
613             if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop))
614             {   // if this predecessor is inside the loop
615 
616                 *pbl2 = *pbl;
617                 *pbl = list_next(*pbl);
618                 pbln = pbl;                     // don't skip this next one
619                 (*pbl2).next = null;
620                 auto bsucc = list_block(*pbl2).Bsucc;
621                 pbl2 = &((*pbl2).next);
622                 foreach (bl; ListRange(bsucc))
623                     if (list_block(bl) == head)
624                     {
625                         bl.ptr = cast(void *)head2;
626                         goto L2;
627                     }
628                 assert(0);
629         L2:
630             }
631             else
632                 pbln = &((*pbl).next);      // next predecessor in list
633         } // for each pred(head)
634 
635         // succ(head2) = succ(head)
636         foreach (bl; ListRange(head.Bsucc))
637         {
638             list_append(&(head2.Bsucc),list_block(bl));
639             list_append(&(list_block(bl).Bpred),head2);
640         }
641         if (debugc) printf("1Rotated loop %p\n", &l);
642         go.changes++;
643         return true;
644     }
645     else if (startblock != head
646             /* This screws up the OPctor/OPdtor sequence for:
647              *   struct CString
648              *   {   CString();
649              *      ~CString();
650              *      int GetLength();
651              *   };
652              *
653              *   void f(void)
654              *   {  for(;;)
655              *      {   CString s ;
656              *    if(s.GetLength()!=0)
657              *       break ;
658              *      }
659              *   }
660              */
661             && !(config.flags3 & CFG3eh)
662             )
663     {   // optimize for space
664         // Simply position the header past the tail
665         foreach (b; BlockRange(startblock))
666         {
667             if (b.Bnext == head)
668             {   // found parent b of head
669                 b.Bnext = head.Bnext;
670                 head.Bnext = tail.Bnext;
671                 tail.Bnext = head;
672                 if (debugc) printf("2Rotated loop %p\n", &l);
673                 go.changes++;
674                 return false;
675             }
676         }
677         assert(0);
678     }
679 Lret:
680     return false;
681 }
682 
683 private __gshared
684 {
685     int gref;                // parameter for markinvar()
686     block *gblock;           // parameter for markinvar()
687     vec_t lv;                // parameter for markinvar()
688     vec_t gin;               // parameter for markinvar()
689     bool doflow;             // true if flow analysis has to be redone
690 }
691 
692 /*********************************
693  * Loop invariant and induction variable elimination.
694  * Input:
695  *      iter    which optimization iteration we are on
696  */
697 
698 @trusted
699 void loopopt()
700 {
701     __gshared Loops startloop_cache;
702 
703     Loops startloop = startloop_cache;
704 
705     if (debugc) printf("loopopt()\n");
706 restart:
707     file_progress();
708     if (blockinit())                    // init block data
709     {
710         findloops(dfo[], startloop);    // Compute Bweights
711         freeloop(startloop);            // free existing loops
712         startloop_cache = startloop;
713         return;                         // can't handle ASM blocks
714     }
715     compdom();                          // compute dominators
716     findloops(dfo[], startloop);              // find the loops
717 
718   L3:
719     while (1)
720     {
721         foreach (ref l; startloop)
722         {
723             if (looprotate(l))              // rotate the loop
724             {
725                 compdfo(dfo, startblock);
726                 blockinit();
727                 compdom();
728                 findloops(dfo[], startloop);
729                 continue L3;
730             }
731         }
732         break;
733     }
734 
735     // Make sure there is a preheader for each loop.
736 
737     addblk = false;                     /* assume no blocks added        */
738     foreach (ref l; startloop)
739     {
740         //if (debugc) l.print();
741 
742         if (!l.Lpreheader)             /* if no preheader               */
743         {
744             if (debugc) printf("Generating preheader for loop\n");
745             addblk = true;              // add one
746             block* p = block_calloc();  // the preheader
747             block* h = l.Lhead;         // loop header
748 
749             /* Find parent of h */
750             if (h == startblock)
751                 startblock = p;
752             else
753             {
754                 for (auto ph = startblock; 1; ph = ph.Bnext)
755                 {
756                     assert(ph);         /* should have found it         */
757                     if (ph.Bnext == h)
758                     {
759                         // Link p into block list between ph and h
760                         ph.Bnext = p;
761                         break;
762                     }
763                 }
764             }
765             p.Bnext = h;
766 
767             l.Lpreheader = p;
768             p.BC = BCgoto;
769             assert(p.Bsucc == null);
770             list_append(&(p.Bsucc),h); /* only successor is h          */
771             p.Btry = h.Btry;
772 
773             if (debugc) printf("Adding preheader %p to loop %p\n",p,&l);
774 
775             // Move preds of h that aren't in the loop to preds of p
776             for (list_t bl = h.Bpred; bl;)
777             {
778                 block *b = list_block(bl);
779 
780                 if (!vec_testbit (b.Bdfoidx, l.Lloop))
781                 {
782                     list_append(&(p.Bpred), b);
783                     list_subtract(&(h.Bpred), b);
784                     bl = h.Bpred;      /* dunno what subtract did      */
785 
786                     /* Fix up successors of predecessors        */
787                     foreach (bls; ListRange(b.Bsucc))
788                         if (list_block(bls) == h)
789                                 bls.ptr = cast(void *)p;
790                 }
791                 else
792                     bl = list_next(bl);
793             }
794             list_append(&(h.Bpred),p); /* p is a predecessor to h      */
795         }
796     } /* for */
797     if (addblk)                         /* if any blocks were added      */
798     {
799         compdfo(dfo, startblock);                      /* compute depth-first order    */
800         blockinit();
801         compdom();
802         findloops(dfo[], startloop);          // recompute block info
803         addblk = false;
804     }
805 
806     /* Do the loop optimizations.
807      */
808 
809     doflow = true;                      /* do flow analysis             */
810 
811     if (go.mfoptim & MFtime)
812     {
813         if (debugc) printf("Starting loop unrolling\n");
814     L2:
815         while (1)
816         {
817             foreach (ref l; startloop)
818             {
819                 if (loopunroll(l))
820                 {
821                     compdfo(dfo, startblock);                      // compute depth-first order
822                     blockinit();
823                     compdom();
824                     findloops(dfo[], startloop);    // recompute block info
825                     doflow = true;
826                     continue L2;
827                 }
828             }
829             break;
830         }
831     }
832 
833     /* Note that accessing the loops
834      * starting from startloop will access them in least nested
835      * one first, thus moving LIs out as far as possible
836      */
837     if (debugc) printf("Starting loop invariants\n");
838 
839     foreach_reverse (ref l; startloop)
840     {
841         //if (debugc) l.print();
842 
843         file_progress();
844         assert(l.Lpreheader);
845         if (doflow)
846         {
847             flowrd();               /* compute reaching definitions  */
848             flowlv();               /* compute live variables        */
849             flowae();               // compute available expressions
850             doflow = false;         /* no need to redo it           */
851             if (go.defnod.length == 0)     /* if no definition elems       */
852                 break;              /* no need to optimize          */
853         }
854         lv = l.Lloop;
855         if (debugc) printf("...Loop %p start...\n",&l);
856 
857         /* Unmark all elems in this loop         */
858         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
859             if (dfo[i].Belem)
860                 unmarkall(dfo[i].Belem);       /* unmark all elems     */
861 
862         /* Find & mark all LIs   */
863         gin = vec_clone(l.Lpreheader.Bout);
864         vec_t rd = vec_calloc(go.defnod.length);        /* allocate our running RD vector */
865         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop
866         {
867             block *b = dfo[i];
868 
869             if (debugc) printf("B%d\n",i);
870             if (b.Belem)
871             {
872                 vec_copy(rd, b.Binrd); // IN reaching defs
873                 static if (0)
874                 {
875                     printf("i = %d\n",i);
876                     {
877                         for (int j = 0; j < go.defnod.length; j++)
878                             elem_print(go.defnod[j].DNelem);
879                     }
880                     printf("rd    : "); vec_println(rd);
881                 }
882                 gblock = b;
883                 gref = 0;
884                 if (b != l.Lhead)
885                     gref = 1;
886                 markinvar(b.Belem, rd);
887                 static if (0)
888                 {
889                     printf("B%d\n", i);
890                     {
891                         foreach (j; 0 .. go.defnod.length)
892                         {
893                             printf("  [%2d] ", j);
894                             WReqn(go.defnod[j].DNelem);
895                             printf("\n");
896                         }
897                     }
898                     printf("rd    : "); vec_println(rd);
899                     printf("Boutrd: "); vec_println(b.Boutrd);
900                 }
901                 assert(vec_equal(rd, b.Boutrd));
902             }
903             else
904                 assert(vec_equal(b.Binrd, b.Boutrd));
905         }
906         vec_free(rd);
907         vec_free(gin);
908 
909         /* Move loop invariants  */
910         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
911         {
912             uint domexit;               // true if this block dominates all
913                                         // exit blocks of the loop
914 
915             for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block
916             {
917                 if (!vec_testbit (i, dfo[j].Bdom))
918                 {
919                     domexit = 0;
920                     goto L1;                // break if !(i dom j)
921                 }
922             }
923             // if i dom (all exit blocks)
924             domexit = 1;
925         L1:
926             if (dfo[i].Belem)
927             {   // If there is any hope of making an improvement
928                 if (domexit || l.Llis.length)
929                 {
930                     //if (dfo[i] != l.Lhead)
931                         //domexit |= 2;
932                     movelis(dfo[i].Belem, dfo[i], l, domexit);
933                 }
934             }
935         }
936         if (debugc) printf("...Loop %p done...\n",&l);
937 
938         if (go.mfoptim & MFliv)
939         {
940             loopiv(l);              /* induction variables          */
941             if (addblk)             /* if we added a block          */
942             {
943                 compdfo(dfo, startblock);
944                 goto restart;       /* play it safe and start over  */
945             }
946         }
947     } /* for */
948     freeloop(startloop);
949     startloop_cache = startloop;
950 }
951 
952 /*****************************
953  * If elem is loop invariant, mark it.
954  * Input:
955  *      lv =    vector of all the blocks in this loop.
956  *      rd =    vector of loop invariants for this elem. This must be
957  *              continually updated.
958  * Note that we do not iterate until no more LIs are found. The only
959  * thing this would buy us is stuff that depends on LI assignments.
960  */
961 
962 @trusted
963 private void markinvar(elem *n,vec_t rd)
964 {
965     vec_t tmp;
966     uint i;
967     Symbol *v;
968     elem *n1;
969 
970     assert(n && rd);
971     assert(vec_numbits(rd) == go.defnod.length);
972     switch (n.Eoper)
973     {
974         case OPaddass:  case OPminass:  case OPmulass:  case OPandass:
975         case OPorass:   case OPxorass:  case OPdivass:  case OPmodass:
976         case OPshlass:  case OPshrass:  case OPashrass:
977         case OPpostinc: case OPpostdec:
978         case OPcall:
979         case OPvecsto:
980         case OPcmpxchg:
981             markinvar(n.EV.E2,rd);
982             goto case OPnegass;
983 
984         case OPnegass:
985             n1 = n.EV.E1;
986             if (n1.Eoper == OPind)
987                     markinvar(n1.EV.E1,rd);
988             else if (OTbinary(n1.Eoper))
989             {   markinvar(n1.EV.E1,rd);
990                 markinvar(n1.EV.E2,rd);
991             }
992         L2:
993             if (n.Eoper == OPcall ||
994                 gblock.Btry ||
995                 !(n1.Eoper == OPvar &&
996                     symbol_isintab(n1.EV.Vsym)))
997             {
998                 gref = 1;
999             }
1000 
1001             updaterd(n,rd,null);
1002             break;
1003 
1004         case OPcallns:
1005             markinvar(n.EV.E2,rd);
1006             markinvar(n.EV.E1,rd);
1007             break;
1008 
1009         case OPstrcpy:
1010         case OPstrcat:
1011         case OPmemcpy:
1012         case OPmemset:
1013             markinvar(n.EV.E2,rd);
1014             markinvar(n.EV.E1,rd);
1015             updaterd(n,rd,null);
1016             break;
1017 
1018         case OPbtc:
1019         case OPbtr:
1020         case OPbts:
1021             markinvar(n.EV.E1,rd);
1022             markinvar(n.EV.E2,rd);
1023             updaterd(n,rd,null);
1024             break;
1025 
1026         case OPucall:
1027             markinvar(n.EV.E1,rd);
1028             goto case OPasm;
1029 
1030         case OPasm:
1031             gref = 1;
1032             updaterd(n,rd,null);
1033             break;
1034 
1035         case OPucallns:
1036         case OPstrpar:
1037         case OPstrctor:
1038         case OPvector:
1039         case OPvoid:
1040         case OPstrlen:
1041         case OPddtor:
1042         case OPinp:
1043         case OPprefetch:                // don't mark E2
1044             markinvar(n.EV.E1,rd);
1045             break;
1046 
1047         case OPcond:
1048         case OPparam:
1049         case OPstrcmp:
1050         case OPmemcmp:
1051         case OPbt:                      // OPbt is like OPind, assume not LI
1052         case OPoutp:
1053             markinvar(n.EV.E1,rd);
1054             markinvar(n.EV.E2,rd);
1055             break;
1056 
1057         case OPandand:
1058         case OPoror:
1059             markinvar(n.EV.E1,rd);
1060             tmp = vec_clone(rd);
1061             markinvar(n.EV.E2,tmp);
1062             if (el_returns(n.EV.E2))
1063                 vec_orass(rd,tmp);              // rd |= tmp
1064             vec_free(tmp);
1065             break;
1066 
1067         case OPcolon:
1068         case OPcolon2:
1069             tmp = vec_clone(rd);
1070             switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2)))
1071             {
1072                 case 3: // E1 and E2 return
1073                     markinvar(n.EV.E1,rd);
1074                     markinvar(n.EV.E2,tmp);
1075                     vec_orass(rd,tmp);              // rd |= tmp
1076                     break;
1077                 case 2: // E1 returns
1078                     markinvar(n.EV.E1,rd);
1079                     markinvar(n.EV.E2,tmp);
1080                     break;
1081                 case 1: // E2 returns
1082                     markinvar(n.EV.E1,tmp);
1083                     markinvar(n.EV.E2,rd);
1084                     break;
1085                 case 0: // neither returns
1086                     markinvar(n.EV.E1,tmp);
1087                     vec_copy(tmp,rd);
1088                     markinvar(n.EV.E2,tmp);
1089                     break;
1090                 default:
1091                     assert(0);
1092             }
1093             vec_free(tmp);
1094             break;
1095 
1096         case OPaddr:            // mark addresses of OPvars as LI
1097             markinvar(n.EV.E1,rd);
1098             if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1))
1099                 makeLI(n);
1100             break;
1101 
1102         case OPmsw:
1103         case OPneg:     case OPbool:    case OPnot:     case OPcom:
1104         case OPs16_32:  case OPd_s32:   case OPs32_d:
1105         case OPd_s16:   case OPs16_d:   case OPd_f:     case OPf_d:
1106         case OP32_16:   case OPu8_16:
1107         case OPld_d:    case OPd_ld:
1108         case OPld_u64:
1109         case OPc_r:     case OPc_i:
1110         case OPu16_32:
1111         case OPu16_d:   case OPd_u16:
1112         case OPs8_16:   case OP16_8:
1113         case OPd_u32:   case OPu32_d:
1114 
1115         case OPs32_64:  case OPu32_64:
1116         case OP64_32:
1117         case OPd_s64:   case OPd_u64:
1118         case OPs64_d:
1119         case OPu64_d:
1120         case OP128_64:
1121         case OPs64_128:
1122         case OPu64_128:
1123 
1124         case OPabs:
1125         case OPtoprec:
1126         case OPrndtol:
1127         case OPrint:
1128         case OPsetjmp:
1129         case OPbsf:
1130         case OPbsr:
1131         case OPbswap:
1132         case OPpopcnt:
1133         case OPsqrt:
1134         case OPsin:
1135         case OPcos:
1136         case OPvp_fp: /* BUG for MacHandles */
1137         case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp:
1138         case OPcvp_fp:
1139         case OPvecfill:
1140             markinvar(n.EV.E1,rd);
1141             if (isLI(n.EV.E1))        /* if child is LI               */
1142                 makeLI(n);
1143             break;
1144 
1145         case OPeq:
1146         case OPstreq:
1147             markinvar(n.EV.E2,rd);
1148             n1 = n.EV.E1;
1149             markinvar(n1,rd);
1150 
1151             /* Determine if assignment is LI. Conditions are:       */
1152             /* 1) Rvalue is LI                                      */
1153             /* 2) Lvalue is a variable (simplifies things a lot)    */
1154             /* 3) Lvalue can only be affected by unambiguous defs   */
1155             /* 4) No rd's of lvalue that are within the loop (other */
1156             /*    than the current def)                             */
1157             if (isLI(n.EV.E2) && n1.Eoper == OPvar)          /* 1 & 2 */
1158             {
1159                 v = n1.EV.Vsym;
1160                 if (v.Sflags & SFLunambig)
1161                 {
1162                     tmp = vec_calloc(go.defnod.length);
1163                     //filterrd(tmp,rd,v);
1164                     listrds(rd,n1,tmp,null);
1165                     for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)
1166                         if (go.defnod[i].DNelem != n &&
1167                             vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1168                                 goto L3;
1169                     makeLI(n);      // then the def is LI
1170                 L3: vec_free(tmp);
1171                 }
1172             }
1173             goto L2;
1174 
1175         case OPadd:     case OPmin:     case OPmul:     case OPand:
1176         case OPor:      case OPxor:     case OPdiv:     case OPmod:
1177         case OPshl:     case OPshr:     case OPeqeq:    case OPne:
1178         case OPlt:      case OPle:      case OPgt:      case OPge:
1179         case OPashr:
1180         case OPror:     case OProl:
1181         case OPbtst:
1182 
1183         case OPunord:   case OPlg:      case OPleg:     case OPule:
1184         case OPul:      case OPuge:     case OPug:      case OPue:
1185         case OPngt:     case OPnge:     case OPnlt:     case OPnle:
1186         case OPord:     case OPnlg:     case OPnleg:    case OPnule:
1187         case OPnul:     case OPnuge:    case OPnug:     case OPnue:
1188 
1189         case OPcomma:
1190         case OPpair:
1191         case OPrpair:
1192         case OPremquo:
1193         case OPscale:
1194         case OPyl2x:
1195         case OPyl2xp1:
1196             markinvar(n.EV.E1,rd);
1197             markinvar(n.EV.E2,rd);
1198             if (isLI(n.EV.E2) && isLI(n.EV.E1))
1199                     makeLI(n);
1200             break;
1201 
1202         case OPind:                     /* must assume this is not LI   */
1203             markinvar(n.EV.E1,rd);
1204             if (isLI(n.EV.E1))
1205             {
1206                 static if (0)
1207                 {
1208                     // This doesn't work with C++, because exp2_ptrtocomtype() will
1209                     // transfer const to where it doesn't belong.
1210                     if (n.Ety & mTYconst)
1211                     {
1212                         makeLI(n);
1213                     }
1214 
1215                     // This was disabled because it was marking as LI
1216                     // the loop dimension for the [i] array if
1217                     // a[j][i] was in a loop. This meant the a[j] array bounds
1218                     // check for the a[j].length was skipped.
1219                     else if (n.Ejty)
1220                     {
1221                         tmp = vec_calloc(go.defnod.length);
1222                         filterrdind(tmp,rd,n);  // only the RDs pertaining to n
1223 
1224                         // if (no RDs within loop)
1225                         //      then it's loop invariant
1226 
1227                         for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1228                             if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1229                                 goto L10;       // found a RD in the loop
1230 
1231                         // If gref has occurred, this can still be LI
1232                         // if n is an AE that was also an AE at the
1233                         // point of gref.
1234                         // We can catch a subset of these cases by looking
1235                         // at the AEs at the start of the loop.
1236                         if (gref)
1237                         {
1238                             int j;
1239 
1240                             //printf("\tn is: "); WReqn(n); printf("\n");
1241                             for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j)
1242                             {
1243                                 elem *e = go.expnod[j];
1244 
1245                                 //printf("\t\tgo.expnod[%d] = %p\n",j,e);
1246                                 //printf("\t\tAE is: "); WReqn(e); printf("\n");
1247                                 if (el_match2(n,e))
1248                                 {
1249                                     makeLI(n);
1250                                     //printf("Ind LI: "); WReqn(n); printf("\n");
1251                                     break;
1252                                 }
1253                             }
1254                         }
1255                         else
1256                             makeLI(n);
1257                   L10:
1258                         vec_free(tmp);
1259                         break;
1260                     }
1261                 }
1262             }
1263             break;
1264 
1265         case OPvar:
1266             v = n.EV.Vsym;
1267             if (v.Sflags & SFLunambig)     // must be unambiguous to be LI
1268             {
1269                 tmp = vec_calloc(go.defnod.length);
1270                 //filterrd(tmp,rd,v);       // only the RDs pertaining to v
1271                 listrds(rd,n,tmp,null);  // only the RDs pertaining to v
1272 
1273                 // if (no RDs within loop)
1274                 //  then it's loop invariant
1275 
1276                 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1277                     if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1278                         goto L1;    // found a RD in the loop
1279                 makeLI(n);
1280 
1281             L1: vec_free(tmp);
1282             }
1283             break;
1284 
1285         case OPstring:
1286         case OPrelconst:
1287         case OPconst:                   /* constants are always LI      */
1288         case OPframeptr:
1289             makeLI(n);
1290             break;
1291 
1292         case OPinfo:
1293             markinvar(n.EV.E2,rd);
1294             break;
1295 
1296         case OPstrthis:
1297         case OPmark:
1298         case OPctor:
1299         case OPdtor:
1300         case OPdctor:
1301         case OPhalt:
1302         case OPgot:                     // shouldn't OPgot be makeLI ?
1303             break;
1304 
1305         default:
1306             //printf("n.Eoper = %s, OPconst = %d\n", oper_str(n.Eoper), OPconst);
1307             assert(0);
1308     }
1309 
1310     debug
1311     {
1312         if (debugc && isLI(n))
1313         {
1314             printf("  LI elem: ");
1315             WReqn(n);
1316             printf("\n");
1317         }
1318     }
1319 }
1320 
1321 /********************
1322  * Update rd vector.
1323  * Input:
1324  *      n       assignment elem or function call elem or OPasm elem
1325  *      rd      reaching def vector to update
1326  *              (clear bits for defs we kill, set bit for n (which is the
1327  *               def we are genning))
1328  *      vecdim  go.defnod.length
1329  */
1330 
1331 extern (C) {
1332 @trusted
1333 void updaterd(elem *n,vec_t GEN,vec_t KILL)
1334 {
1335     const op = n.Eoper;
1336     elem *t;
1337 
1338     assert(OTdef(op));
1339     assert(GEN);
1340     elem_debug(n);
1341 
1342     uint ni = n.Edef;
1343     assert(ni != -1);
1344 
1345     // If unambiguous def
1346     if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar)
1347     {
1348         vec_t v = go.defnod[ni].DNunambig;
1349         assert(v);
1350         if (KILL)
1351             vec_orass(KILL, v);
1352         vec_subass(GEN, v);
1353     }
1354     else
1355     {
1356         static if (0)
1357         {
1358             if (OTassign(op) && t.Eoper != OPvar && t.Ejty)
1359             {
1360                 // for all unambig defs in go.defnod[]
1361                 foreach (uint i; 0 .. go.defnod.length)
1362                 {
1363                     elem *tn = go.defnod[i].DNelem;
1364                     elem *tn1;
1365 
1366                     if (tn == n)
1367                         ni = i;
1368 
1369                     if (!OTassign(tn.Eoper))
1370                         continue;
1371 
1372                     // If def of same variable, kill that def
1373                     tn1 = tn.EV.E1;
1374                     if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty)
1375                         continue;
1376 
1377                     if (KILL)
1378                         vec_setbit(i,KILL);
1379                     vec_clearbit(i,GEN);
1380                 }
1381             }
1382         }
1383     }
1384 
1385     vec_setbit(ni,GEN);                 // set bit in GEN for this def
1386 }
1387 }
1388 
1389 /***************************
1390  * Mark all elems as not being loop invariant.
1391  */
1392 
1393 @trusted
1394 private void unmarkall(elem *e)
1395 {
1396     for (; 1; e = e.EV.E1)
1397     {
1398         assert(e);
1399         e.Nflags &= ~NFLli;            /* unmark this elem             */
1400         if (OTunary(e.Eoper))
1401             continue;
1402         else if (OTbinary(e.Eoper))
1403         {
1404             unmarkall(e.EV.E2);
1405             continue;
1406         }
1407         return;
1408     }
1409 }
1410 
1411 
1412 /********************************
1413  * Search for references to v in tree n before nstop is encountered.
1414  * Params:
1415  *      v = symbol to search for
1416  *      n = tree to search
1417  *      nstop = stop searching tree when reaching this elem
1418  * Returns:
1419  *    true if there are any refs of v in n before nstop is encountered
1420  */
1421 
1422 @trusted
1423 private bool refs(Symbol *v,elem *n,elem *nstop)
1424 {
1425     symbol_debug(v);
1426     assert(symbol_isintab(v));
1427     assert(v.Ssymnum < globsym.length);
1428     bool stop = false;
1429 
1430     // Walk tree in evaluation order
1431     bool walk(elem* n)
1432     {
1433         elem_debug(n);
1434         assert(n);
1435 
1436         if (stop)
1437             return false;
1438         bool f = false;
1439         const op = n.Eoper;
1440         if (OTunary(op))
1441             f = walk(n.EV.E1);
1442         else if (OTbinary(op))
1443         {
1444             if (ERTOL(n))                   /* watch order of evaluation    */
1445             {
1446                 /* Note that (OPvar = e) is not a ref of OPvar, whereas     */
1447                 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */
1448                 /* a ref of OPvar, etc.                                     */
1449                 f = walk(n.EV.E2);
1450                 if (!f)
1451                 {
1452                     if (op == OPeq)
1453                     {
1454                         if (n.EV.E1.Eoper != OPvar)
1455                             f = walk(n.EV.E1.EV.E1);
1456                     }
1457                     else
1458                         f = walk(n.EV.E1);
1459                 }
1460             }
1461             else
1462                 f = walk(n.EV.E1) || walk(n.EV.E2);
1463         }
1464 
1465         if (n == nstop)
1466             stop = true;
1467         else if (n.Eoper == OPvar)           /* if variable reference        */
1468             return v == n.EV.Vsym;
1469         else if (op == OPasm)                /* everything is referenced     */
1470             return true;
1471         return f;
1472     }
1473 
1474     return walk(n);
1475 }
1476 
1477 /*************************
1478  * Move LIs to preheader.
1479  * Conditions to be satisfied for code motion are:
1480  *      1) All exit blocks are dominated (true before this is called).
1481  *                      -- OR --
1482  *      2) Variable assigned by a statement is not live on entering
1483  *         any successor outside the loop of any exit block of the
1484  *         loop.
1485  *
1486  *      3) Cannot move assignment to variable if there are any other
1487  *         assignments to that variable within the loop (true or
1488  *         assignment would not have been marked LI).
1489  *      4) Cannot move assignments to a variable if there is a use
1490  *         of that variable in this loop that is reached by any other
1491  *         def of it.
1492  *      5) Cannot move expressions that have side effects.
1493  *      6) Do not move assignments to variables that could be affected
1494  *         by ambiguous defs.
1495  *      7) It is not worth it to move expressions of the form:
1496  *              (var == const)
1497  * Input:
1498  *      n       the elem we're considering moving
1499  *      b       the block this elem is in
1500  *      l       the loop we're in
1501  *      domexit flags
1502  *      bit 0:  1       this branch is always executed
1503  *              0       this branch is only sometimes executed
1504  *      bit 1:  1       do not move LIs that could throw exceptions
1505  *                      or cannot be moved past possibly thrown exceptions
1506  * Returns:
1507  *      revised domexit
1508  */
1509 
1510 @trusted
1511 private void movelis(elem* n, block* b, ref Loop l, ref uint pdomexit)
1512 {
1513     vec_t tmp;
1514     elem *ne;
1515     elem *t;
1516     elem *n2;
1517     Symbol *v;
1518     tym_t ty;
1519 
1520 Lnextlis:
1521     //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); }
1522     assert(n);
1523     elem_debug(n);
1524     const op = n.Eoper;
1525     switch (op)
1526     {
1527         case OPvar:
1528         case OPconst:
1529         case OPrelconst:
1530             goto Lret;
1531 
1532         case OPandand:
1533         case OPoror:
1534         case OPcond:
1535         {
1536             uint domexit;
1537 
1538             movelis(n.EV.E1,b,l,pdomexit);        // always executed
1539             domexit = pdomexit & ~1;   // sometimes executed
1540             movelis(n.EV.E2,b,l,domexit);
1541             pdomexit |= domexit & 2;
1542             goto Lret;
1543         }
1544 
1545         case OPeq:
1546             // Do loop invariant assignments
1547             if (isLI(n) && n.EV.E1.Eoper == OPvar)
1548             {
1549                 v = n.EV.E1.EV.Vsym;          // variable index number
1550 
1551                 if (!(v.Sflags & SFLunambig)) goto L3;         // case 6
1552 
1553                 // If case 4 is not satisfied, return
1554 
1555                 // Function parameters have an implied definition prior to the
1556                 // first block of the function. Unfortunately, the rd vector
1557                 // does not take this into account. Therefore, we assume the
1558                 // worst and reject assignments to function parameters.
1559                 if (v.Sclass == SC.parameter || v.Sclass == SC.regpar ||
1560                     v.Sclass == SC.fastpar || v.Sclass == SC.shadowreg)
1561                         goto L3;
1562 
1563                 if (el_sideeffect(n.EV.E2)) goto L3;              // case 5
1564 
1565                 // If case 1 or case 2 is not satisfied, return
1566 
1567                 if (!(pdomexit & 1))                   // if not case 1
1568                 {
1569                     uint i;
1570                     for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
1571                     {
1572                         foreach (bl; ListRange(dfo[i].Bsucc))
1573                         {
1574                             block *s;           // successor to exit block
1575 
1576                             s = list_block(bl);
1577                             if (!vec_testbit(s.Bdfoidx,l.Lloop) &&
1578                                 (!symbol_isintab(v) ||
1579                                  vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit
1580                                     goto L3;
1581                         }
1582                     }
1583                 }
1584 
1585                 tmp = vec_calloc(go.defnod.length);
1586                 uint i;
1587                 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
1588                 {
1589                     if (dfo[i] == b)        // except this one
1590                         continue;
1591 
1592                     //<if there are any RDs of v in Binrd other than n>
1593                     //    <if there are any refs of v in that block>
1594                     //        return;
1595 
1596                     //filterrd(tmp,dfo[i].Binrd,v);
1597                     listrds(dfo[i].Binrd,n.EV.E1,tmp,null);
1598                     uint j;
1599                     for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1600                     {
1601                         if (go.defnod[j].DNelem == n)
1602                             continue;
1603                         if (dfo[i].Belem &&
1604                             refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v
1605                         {
1606                             vec_free(tmp);
1607                             goto L3;
1608                         }
1609                         break;
1610                     }
1611                 } // foreach
1612 
1613                 // <if there are any RDs of v in b.Binrd other than n>
1614                 //     <if there are any references to v before the
1615                 //      assignment to v>
1616                 //         <can't move this assignment>
1617 
1618                 //filterrd(tmp,b.Binrd,v);
1619                 listrds(b.Binrd,n.EV.E1,tmp,null);
1620                 uint j;
1621                 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1622                 {
1623                     if (go.defnod[j].DNelem == n)
1624                         continue;
1625                     if (b.Belem && refs(v,b.Belem,n))
1626                     {
1627                         vec_free(tmp);
1628                         goto L3;            // can't move it
1629                     }
1630                     break;                  // avoid redundant looping
1631                 }
1632                 vec_free(tmp);
1633 
1634                 // We have an LI assignment, n.
1635                 // Check to see if the rvalue is already in the preheader.
1636                 foreach (e; l.Llis)
1637                 {
1638                     if (el_match(n.EV.E2, e.EV.E2))
1639                     {
1640                         el_free(n.EV.E2);
1641                         n.EV.E2 = el_calloc();
1642                         el_copy(n.EV.E2, e.EV.E1);
1643                         if (debugc) printf("LI assignment rvalue was replaced\n");
1644                         doflow = true;
1645                         go.changes++;
1646                         break;
1647                     }
1648                 }
1649 
1650                 // move assignment elem to preheader
1651                 if (debugc) printf("Moved LI assignment ");
1652 
1653                 debug
1654                     if (debugc)
1655                     {
1656                         WReqn(n);
1657                         printf(";\n");
1658                     }
1659 
1660                 go.changes++;
1661                 doflow = true;                  // redo flow analysis
1662                 ne = el_calloc();
1663                 el_copy(ne,n);                  // create assignment elem
1664                 assert(l.Lpreheader);          // make sure there is one
1665                 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader
1666                 l.Llis.push(ne);
1667 
1668                 el_copy(n,ne.EV.E1);      // replace n with just a reference to v
1669                 goto Lret;
1670             } // if
1671             break;
1672 
1673         case OPcall:
1674         case OPucall:
1675             pdomexit |= 2;
1676             break;
1677 
1678         case OPpair:
1679         case OPrpair:                   // don't move these, as they do not do computation
1680             movelis(n.EV.E1,b,l,pdomexit);
1681             n = n.EV.E2;
1682             goto Lnextlis;
1683 
1684         default:
1685             break;
1686     }
1687 
1688 L3:
1689     // Do leaves of non-LI expressions, leaves of = elems that didn't
1690     // meet the invariant assignment removal criteria, and don't do leaves
1691     if (OTleaf(op))
1692         goto Lret;
1693     if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot ||
1694       // These are usually addressing modes, so moving them is a net loss
1695       (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL)
1696      )
1697     {
1698         if (OTassign(op))
1699         {
1700             elem *n1 = n.EV.E1;
1701             elem *n11;
1702 
1703             if (OTbinary(op))
1704                 movelis(n.EV.E2,b,l,pdomexit);
1705 
1706             // Do lvalue only if it is an expression
1707             if (n1.Eoper == OPvar)
1708                 goto Lret;
1709             n11 = n1.EV.E1;
1710             if (OTbinary(n1.Eoper))
1711             {
1712                 movelis(n11,b,l,pdomexit);
1713                 n = n1.EV.E2;
1714             }
1715             // If *(x + c), just make x the LI, not the (x + c).
1716             // The +c comes free with the addressing mode.
1717             else if (n1.Eoper == OPind &&
1718                     isLI(n11) &&
1719                     n11.Eoper == OPadd &&
1720                     n11.EV.E2.Eoper == OPconst
1721                     )
1722             {
1723                 n = n11.EV.E1;
1724             }
1725             else
1726                 n = n11;
1727             movelis(n,b,l,pdomexit);
1728             if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym)))
1729             {
1730                 //printf("assign to global => domexit |= 2\n");
1731                 pdomexit |= 2;
1732             }
1733         }
1734         else if (OTunary(op))
1735         {
1736             elem *e1 = n.EV.E1;
1737 
1738             // If *(x + c), just make x the LI, not the (x + c).
1739             // The +c comes free with the addressing mode.
1740             if (op == OPind &&
1741                 isLI(e1) &&
1742                 e1.Eoper == OPadd &&
1743                 e1.EV.E2.Eoper == OPconst
1744                )
1745             {
1746                 n = e1.EV.E1;
1747             }
1748             else
1749                 n = e1;
1750         }
1751         else if (OTbinary(op))
1752         {
1753             movelis(n.EV.E1,b,l,pdomexit);
1754             n = n.EV.E2;
1755         }
1756         goto Lnextlis;
1757   }
1758 
1759   if (el_sideeffect(n))
1760         goto Lret;
1761 
1762     static if (0)
1763     {
1764         printf("*pdomexit = %u\n", pdomexit);
1765         if (pdomexit & 2)
1766         {
1767             // If any indirections, can't LI it
1768 
1769             // If this operand has already been indirected, we can let
1770             // it pass.
1771             Symbol *s;
1772 
1773             printf("looking at:\n");
1774             elem_print(n);
1775             s = el_basesym(n.EV.E1);
1776             if (s)
1777             {
1778                 foreach (el; l.Llis)
1779                 {
1780                     el = el.EV.E2;
1781                     elem_print(el);
1782                     if (el.Eoper == OPind && el_basesym(el.EV.E1) == s)
1783                     {
1784                         printf("  pass!\n");
1785                         goto Lpass;
1786                     }
1787                 }
1788             }
1789             printf("  skip!\n");
1790             goto Lret;
1791 
1792         Lpass:
1793         }
1794     }
1795 
1796     // Move the LI expression to the preheader
1797     if (debugc) printf("Moved LI expression ");
1798 
1799     debug
1800         if (debugc)
1801         {
1802             WReqn(n);
1803             printf(";\n");
1804         }
1805 
1806     // See if it's already been moved
1807     ty = n.Ety;
1808     foreach (el; l.Llis)
1809     {
1810         tym_t ty2;
1811 
1812         //printf("existing LI: "); WReqn(el); printf("\n");
1813         ty2 = el.EV.E2.Ety;
1814         if (tysize(ty) == tysize(ty2))
1815         {
1816             el.EV.E2.Ety = ty;
1817             if (el_match(n,el.EV.E2))
1818             {
1819                 el.EV.E2.Ety = ty2;
1820                 if (!OTleaf(n.Eoper))
1821                 {       el_free(n.EV.E1);
1822                         if (OTbinary(n.Eoper))
1823                                 el_free(n.EV.E2);
1824                 }
1825                 el_copy(n,el.EV.E1);      // make copy of temp
1826                 n.Ety = ty;
1827 
1828                 debug
1829                     if (debugc)
1830                     {   printf("Already moved: LI expression replaced with ");
1831                         WReqn(n);
1832                         printf("\nPreheader %d expression %p ",
1833                         l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1834                         WReqn(l.Lpreheader.Belem);
1835                         printf("\n");
1836                     }
1837 
1838                 go.changes++;
1839                 doflow = true;                  // redo flow analysis
1840                 goto Lret;
1841             }
1842             el.EV.E2.Ety = ty2;
1843         }
1844     }
1845 
1846     if (!(pdomexit & 1))                       // if only sometimes executed
1847     {
1848         if (debugc) printf(" doesn't dominate exit\n");
1849         goto Lret;                              // don't move LI
1850     }
1851 
1852     if (tyaggregate(n.Ety))
1853         goto Lret;
1854 
1855     go.changes++;
1856     doflow = true;                              // redo flow analysis
1857 
1858     t = el_alloctmp(n.Ety);                     /* allocate temporary t */
1859 
1860     debug
1861     {
1862         if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr);
1863         if (debugc) printf("%s\n", tym_str(t.Ety));
1864         if (debugc) printf("\n");
1865     }
1866 
1867     n2 = el_calloc();
1868     el_copy(n2,n);                              /* create copy n2 of n  */
1869     ne = el_bin(OPeq,t.Ety,t,n2);               /* create elem t=n2     */
1870     assert(l.Lpreheader);                       /* make sure there is one */
1871     appendelem(ne,&(l.Lpreheader.Belem));       /* append ne to preheader */
1872 
1873     debug
1874         if (debugc)
1875         {
1876             printf("Preheader %d expression %p\n\t",
1877             l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1878             WReqn(l.Lpreheader.Belem);
1879             printf("\nLI expression replaced with "); WReqn(t);
1880             printf("\n");
1881         }
1882 
1883     el_copy(n,t);                                 /* replace this elem with t */
1884 
1885     // Remember LI expression in elem list
1886     l.Llis.push(ne);
1887 
1888 Lret:
1889 
1890 }
1891 
1892 /***************************
1893  * Append elem to existing elem using an OPcomma elem.
1894  * Input:
1895  *      n       elem to append
1896  *      *pn     elem to append to
1897  */
1898 
1899 @trusted
1900 private void appendelem(elem *n,elem **pn)
1901 {
1902     assert(n && pn);
1903     if (*pn)                                    /* if this elem exists  */
1904     {
1905         while ((*pn).Eoper == OPcomma)          /* while we see OPcomma elems */
1906         {
1907             (*pn).Ety = n.Ety;
1908             pn = &((*pn).EV.E2);                /* cruise down right side */
1909         }
1910         *pn = el_bin(OPcomma,n.Ety,*pn,n);
1911     }
1912     else
1913         *pn = n;                                /* else create a elem   */
1914 }
1915 
1916 /************** LOOP INDUCTION VARIABLES **********************/
1917 
1918 /****************************
1919  * Create a new famlist entry.
1920  */
1921 
1922 @trusted
1923 private void newfamlist(famlist* fl, tym_t ty)
1924 {
1925     eve c = void;
1926     memset(&c,0,c.sizeof);
1927 
1928     fl.FLty = ty;
1929     switch (tybasic(ty))
1930     {   case TYfloat:
1931             c.Vfloat = 1;
1932             break;
1933 
1934         case TYdouble:
1935         case TYdouble_alias:
1936             c.Vdouble = 1;
1937             break;
1938 
1939         case TYldouble:
1940             c.Vldouble = 1;
1941             break;
1942 
1943         case TYbool:
1944         case TYchar:
1945         case TYschar:
1946         case TYuchar:
1947             c.Vchar = 1;
1948             break;
1949 
1950         case TYshort:
1951         case TYushort:
1952         case TYchar16:
1953         case TYwchar_t:             // BUG: what about 4 byte wchar_t's?
1954             c.Vshort = 1;
1955             break;
1956 
1957         case TYsptr:
1958         case TYcptr:
1959         case TYfptr:
1960         case TYvptr:
1961         case TYnptr:
1962         case TYnullptr:
1963         case TYimmutPtr:
1964         case TYsharePtr:
1965         case TYrestrictPtr:
1966         case TYfgPtr:
1967             ty = TYint;
1968             if (I64)
1969                 ty = TYllong;
1970             goto case TYuint;
1971 
1972         case TYint:
1973         case TYuint:
1974             c.Vint = 1;
1975             break;
1976 
1977         case TYhptr:
1978             ty = TYlong;
1979             goto case TYlong;
1980 
1981         case TYlong:
1982         case TYulong:
1983         case TYdchar:
1984         default:
1985             c.Vlong = 1;
1986             break;
1987     }
1988     fl.c1 = el_const(ty,&c);               /* c1 = 1               */
1989     c.Vldouble = 0;
1990     if (typtr(ty))
1991     {
1992         ty = TYint;
1993         if (tybasic(ty) == TYhptr)
1994             ty = TYlong;
1995         if (I64)
1996             ty = TYllong;
1997     }
1998     fl.c2 = el_const(ty,&c);               /* c2 = 0               */
1999 }
2000 
2001 /***************************
2002  * Remove induction variables from loop l.
2003  * Loop invariant removal should have been done just previously.
2004  */
2005 
2006 @trusted
2007 private void loopiv(ref Loop l)
2008 {
2009     if (debugc) printf("loopiv(%p)\n", &l);
2010     assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0);
2011     elimspec(l, dfo);
2012     if (doflow)
2013     {
2014         flowrd();               /* compute reaching defs                */
2015         flowlv();               /* compute live variables               */
2016         flowae();               // compute available expressions
2017         doflow = false;
2018     }
2019     findbasivs(l);              /* find basic induction variables       */
2020     findopeqs(l);               // find op= variables
2021     findivfams(l);              /* find IV families                     */
2022     elimfrivivs(l);             /* eliminate less useful family IVs     */
2023     intronvars(l);              /* introduce new variables              */
2024     elimbasivs(l);              /* eliminate basic IVs                  */
2025     if (!addblk)                // adding a block changes the Binlv
2026         elimopeqs(l);           // eliminate op= variables
2027 
2028     foreach (ref iv; l.Livlist)
2029         iv.reset();
2030     l.Livlist.reset();          // reset for reuse
2031 
2032     foreach (ref iv; l.Lopeqlist)
2033         iv.reset();
2034     l.Lopeqlist.reset();        // reset for reuse
2035 
2036     /* Do copy propagation and dead assignment elimination        */
2037     /* upon return to optfunc()                                   */
2038 }
2039 
2040 /*************************************
2041  * Find basic IVs of loop l.
2042  * A basic IV x of loop l is a variable x which has
2043  * exactly one assignment within l of the form:
2044  * x += c or x -= c, where c is either a constant
2045  * or a LI.
2046  * Input:
2047  *      go.defnod[] loaded with all the definition elems of the loop
2048  */
2049 
2050 @trusted
2051 private void findbasivs(ref Loop l)
2052 {
2053     vec_t poss,notposs;
2054     elem *n;
2055     bool ambdone;
2056 
2057     ambdone = false;
2058     poss = vec_calloc(globsym.length);
2059     notposs = vec_calloc(globsym.length);  /* vector of all variables      */
2060                                         /* (initially all unmarked)     */
2061 
2062     /* for each def in go.defnod[] that is within loop l     */
2063 
2064     foreach (const i; 0 .. go.defnod.length)
2065     {
2066         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2067             continue;               /* def is not in the loop       */
2068 
2069         n = go.defnod[i].DNelem;
2070         elem_debug(n);
2071         if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar)
2072         {
2073             Symbol *s;                  /* if unambiguous def           */
2074 
2075             s = n.EV.E1.EV.Vsym;
2076             if (symbol_isintab(s))
2077             {
2078                 SYMIDX v;
2079 
2080                 v = n.EV.E1.EV.Vsym.Ssymnum;
2081                 if ((n.Eoper == OPaddass || n.Eoper == OPminass ||
2082                      n.Eoper == OPpostinc || n.Eoper == OPpostdec) &&
2083                         (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c          */
2084                          n.EV.E2.Eoper == OPvar && isLI(n.EV.E2)))
2085                 {
2086                     if (vec_testbit(v,poss))
2087                         /* We've already seen this def elem,    */
2088                         /* therefore there is more than one     */
2089                         /* def of v within the loop, therefore  */
2090                         /* v is not a basic IV.                 */
2091                         vec_setbit(v,notposs);
2092                     else
2093                         vec_setbit(v,poss);
2094                 }
2095                 else                    /* else mark as not possible    */
2096                     vec_setbit(v,notposs);
2097             }
2098         }
2099         else                            /* else ambiguous def           */
2100         {
2101             /* mark any vars that could be affected by              */
2102             /* this def as not possible                             */
2103 
2104             if (!ambdone)           /* avoid redundant loops        */
2105             {
2106                 foreach (j; 0 .. globsym.length)
2107                 {
2108                     if (!(globsym[j].Sflags & SFLunambig))
2109                         vec_setbit(j,notposs);
2110                 }
2111                 ambdone = true;
2112             }
2113         }
2114     }
2115     static if (0)
2116     {
2117         printf("poss    "); vec_println(poss);
2118         printf("notposs "); vec_println(notposs);
2119     }
2120     vec_subass(poss,notposs);             /* poss = poss - notposs        */
2121 
2122     /* create list of IVs */
2123     uint i;
2124     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each basic IV
2125     {
2126         Symbol *s;
2127 
2128         /* Skip if we don't want it to be a basic IV (see funcprev())   */
2129         s = globsym[i];
2130         assert(symbol_isintab(s));
2131         if (s.Sflags & SFLnotbasiciv)
2132             continue;
2133 
2134         // Do not use aggregates as basic IVs. This is because the other loop
2135         // code doesn't check offsets into symbols, (assuming everything
2136         // is at offset 0). We could perhaps amend this by allowing basic IVs
2137         // if the struct consists of only one data member.
2138         if (tyaggregate(s.ty()))
2139             continue;
2140 
2141         // Do not use complex types as basic IVs, as the code gen isn't up to it
2142         if (tycomplex(s.ty()))
2143                 continue;
2144 
2145         auto biv = l.Livlist.push();
2146         biv.IVbasic = s;               // symbol of basic IV
2147         biv.IVincr = null;
2148 
2149         if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i);
2150 
2151         /* We have the sym idx of the basic IV. We need to find         */
2152         /* the parent of the increment elem for it.                     */
2153 
2154         /* First find the go.defnod[]      */
2155         foreach (j; 0 .. go.defnod.length)
2156         {
2157             /* If go.defnod is a def of i and it is in the loop        */
2158             if (go.defnod[j].DNelem.EV.E1 &&     /* OPasm are def nodes  */
2159                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2160                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2161             {
2162                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2163                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2164 
2165                 debug if (debugc)
2166                 {
2167                     printf("Increment elem is: "); WReqn(*biv.IVincr);     printf("\n");
2168                 }
2169                 goto L1;
2170             }
2171         }
2172         assert(0);                      /* should have found it         */
2173         /* NOTREACHED */
2174 
2175     L1:
2176     }
2177 
2178     vec_free(poss);
2179     vec_free(notposs);
2180 }
2181 
2182 /*************************************
2183  * Find op= elems of loop l.
2184  * Analogous to findbasivs().
2185  * Used to eliminate useless loop code normally found in benchmark programs.
2186  * Input:
2187  *      go.defnod[] loaded with all the definition elems of the loop
2188  */
2189 
2190 @trusted
2191 private void findopeqs(ref Loop l)
2192 {
2193     vec_t poss,notposs;
2194     elem *n;
2195     bool ambdone;
2196 
2197     ambdone = false;
2198     poss = vec_calloc(globsym.length);
2199     notposs = vec_calloc(globsym.length);  // vector of all variables
2200                                         // (initially all unmarked)
2201 
2202     // for each def in go.defnod[] that is within loop l
2203 
2204     foreach (i; 0 .. go.defnod.length)
2205     {
2206         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2207             continue;               // def is not in the loop
2208 
2209         n = go.defnod[i].DNelem;
2210         elem_debug(n);
2211         if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar)
2212         {
2213             Symbol *s;                  // if unambiguous def
2214 
2215             s = n.EV.E1.EV.Vsym;
2216             if (symbol_isintab(s))
2217             {
2218                 SYMIDX v;
2219 
2220                 v = n.EV.E1.EV.Vsym.Ssymnum;
2221                 {
2222                     if (vec_testbit(v,poss))
2223                         // We've already seen this def elem,
2224                         // therefore there is more than one
2225                         // def of v within the loop, therefore
2226                         // v is not a basic IV.
2227                         vec_setbit(v,notposs);
2228                     else
2229                         vec_setbit(v,poss);
2230                 }
2231             }
2232         }
2233         else                            // else ambiguous def
2234         {
2235             // mark any vars that could be affected by
2236             // this def as not possible
2237 
2238             if (!ambdone)           // avoid redundant loops
2239             {
2240                 foreach (j; 0 .. globsym.length)
2241                 {
2242                     if (!(globsym[j].Sflags & SFLunambig))
2243                         vec_setbit(j,notposs);
2244                 }
2245                 ambdone = true;
2246             }
2247         }
2248     }
2249 
2250     // Don't use symbols already in Livlist
2251     foreach (ref iv; l.Livlist)
2252     {
2253         vec_setbit(iv.IVbasic.Ssymnum,notposs);
2254     }
2255 
2256 
2257     static if (0)
2258     {
2259         printf("poss    "); vec_println(poss);
2260         printf("notposs "); vec_println(notposs);
2261     }
2262 
2263     vec_subass(poss,notposs);           // poss = poss - notposs
2264 
2265     // create list of IVs
2266     uint i;
2267     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each opeq IV
2268     {
2269         Symbol *s;
2270 
2271         s = globsym[i];
2272         assert(symbol_isintab(s));
2273 
2274         // Do not use aggregates as basic IVs. This is because the other loop
2275         // code doesn't check offsets into symbols, (assuming everything
2276         // is at offset 0). We could perhaps amend this by allowing basic IVs
2277         // if the struct consists of only one data member.
2278         if (tyaggregate(s.ty()))
2279             continue;
2280 
2281         auto biv = l.Lopeqlist.push();
2282         biv.IVbasic = s;               // symbol of basic IV
2283         biv.IVincr = null;
2284 
2285         if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i);
2286 
2287         // We have the sym idx of the basic IV. We need to find
2288         // the parent of the increment elem for it.
2289 
2290         // First find the go.defnod[]
2291         foreach (j; 0 .. go.defnod.length)
2292         {
2293             // If go.defnod is a def of i and it is in the loop
2294             if (go.defnod[j].DNelem.EV.E1 &&     // OPasm are def nodes
2295                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2296                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2297             {
2298                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2299                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2300 
2301                 debug if (debugc)
2302                 {
2303                     printf("Opeq elem is: "); WReqn(*biv.IVincr);  printf("\n");
2304                 }
2305                 goto Lcont;
2306             }
2307         }
2308         assert(0);                      // should have found it
2309         // NOTREACHED
2310 
2311     Lcont:
2312     }
2313 
2314     vec_free(poss);
2315     vec_free(notposs);
2316 }
2317 
2318 /*****************************
2319  * Find families for each basic IV.
2320  * An IV family is a list of elems of the form
2321  * c1*X+c2, where X is a basic induction variable.
2322  * Note that we do not do divides, because of roundoff error problems.
2323  */
2324 
2325 @trusted
2326 private void findivfams(ref Loop l)
2327 {
2328     if (debugc) printf("findivfams(%p)\n", &l);
2329     foreach (ref biv; l.Livlist)
2330     {
2331         for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
2332             if (dfo[i].Belem)
2333                 ivfamelems(&biv,&(dfo[i].Belem));
2334         /* Fold all the constant expressions in c1 and c2.      */
2335         foreach (ref fl; biv.IVfamily)
2336         {
2337             fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain);
2338             fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain);
2339         }
2340     }
2341 }
2342 
2343 /*************************
2344  * Tree walking support routine for findivfams().
2345  *      biv =   basic induction variable pointer
2346  *      pn      pointer to elem
2347  */
2348 
2349 @trusted
2350 private void ivfamelems(Iv *biv,elem **pn)
2351 {
2352     tym_t ty,c2ty;
2353     elem *n1;
2354     elem *n2;
2355 
2356     assert(pn);
2357     elem* n = *pn;
2358     assert(biv && n);
2359     const op = n.Eoper;
2360     if (OTunary(op))
2361     {
2362        ivfamelems(biv,&n.EV.E1);
2363         n1 = n.EV.E1;
2364         n2 = null;
2365     }
2366     else if (OTbinary(op))
2367     {
2368         ivfamelems(biv,&n.EV.E1);
2369         ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */
2370         n1 = n.EV.E1;
2371         n2 = n.EV.E2;
2372     }
2373     else                                /* else leaf elem               */
2374         return;                         /* which can't be in the family */
2375 
2376     if (tycomplex(n.Ety))
2377         return;
2378 
2379     if (op == OPmul || op == OPadd || op == OPmin ||
2380         op == OPneg || op == OPshl)
2381     {   /* Note that we are wimping out and not considering             */
2382         /* LI variables as part of c1 and c2, but only constants.       */
2383 
2384         ty = n.Ety;
2385 
2386         /* Since trees are canonicalized, basic induction variables     */
2387         /* will only appear on the left.                                */
2388 
2389         /* Improvement:                                                 */
2390         /* We wish to pick up the cases (biv + li), (biv - li) and      */
2391         /* (li + biv). OPmul and LS with bivs are out, since if we      */
2392         /* try to eliminate the biv, and the loop test is a >, >=,      */
2393         /* <, <=, we have a problem since we don't know if the li       */
2394         /* is negative. (Would have to call swaprel() on it.)           */
2395 
2396         /* If we have (li + var), swap the leaves.                      */
2397         if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar)
2398         {
2399             n.EV.E1 = n2;
2400             n2 = n.EV.E2 = n1;
2401             n1 = n.EV.E1;
2402         }
2403 
2404         // Get rid of case where we painted a far pointer to a long
2405         if (op == OPadd || op == OPmin)
2406         {
2407             int sz;
2408 
2409             sz = tysize(ty);
2410             if (sz == tysize(TYfptr) && !tyfv(ty) &&
2411                 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety)))
2412                 return;
2413         }
2414 
2415         /* Look for function of basic IV (-biv or biv op const)         */
2416         if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic)
2417         {
2418             if (op == OPneg)
2419             {
2420                 if (debugc) printf("found (-biv), elem %p\n",n);
2421                 auto fl = biv.IVfamily.push();
2422                 newfamlist(fl, ty);
2423                 fl.FLivty = n1.Ety;
2424                 fl.FLpelem = pn;
2425                 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1       */
2426             }
2427             else if (n2.Eoper == OPconst ||
2428                      isLI(n2) && (op == OPadd || op == OPmin))
2429             {
2430                 debug
2431                     if (debugc)
2432                     {   printf("found (biv op const), elem (");
2433                             WReqn(n);
2434                             printf(");\n");
2435                             printf("Types: n1=%s", tym_str(n1.Ety));
2436                             printf(" ty=%s", tym_str(ty));
2437                             printf(" n2=%s\n", tym_str(n2.Ety));
2438                     }
2439 
2440                 auto fl = biv.IVfamily.push();
2441                 newfamlist(fl, ty);
2442                 fl.FLivty = n1.Ety;
2443                 fl.FLpelem = pn;
2444                 switch (op)
2445                 {
2446                     case OPadd:           /* c2 = right           */
2447                         c2ty = n2.Ety;
2448                         if (typtr(fl.c2.Ety))
2449                                 c2ty = fl.c2.Ety;
2450                         goto L1;
2451 
2452                     case OPmin:           /* c2 = -right          */
2453                         c2ty = fl.c2.Ety;
2454                         /* Check for subtracting two pointers */
2455                         if (typtr(c2ty) && typtr(n2.Ety))
2456                         {
2457                             if (tybasic(c2ty) == TYhptr)
2458                                 c2ty = TYlong;
2459                             else
2460                                 c2ty = I64 ? TYllong : TYint;
2461                         }
2462                   L1:
2463                         fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2));
2464                         break;
2465 
2466                     case OPmul:           /* c1 = right           */
2467                     case OPshl:           /* c1 = 1 << right      */
2468                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2469                         break;
2470 
2471                     default:
2472                         assert(0);
2473                 }
2474             }
2475         }
2476 
2477         /* Look for function of existing IV                             */
2478 
2479         foreach (ref fl; biv.IVfamily)
2480         {
2481             if (*fl.FLpelem != n1)          /* not it               */
2482                 continue;
2483 
2484             /* Look for (fl op constant)     */
2485             if (op == OPneg)
2486             {
2487                 if (debugc) printf("found (-fl), elem %p\n",n);
2488                 /* c1 = -c1; c2 = -c2; */
2489                 fl.c1 = el_una(OPneg,ty,fl.c1);
2490                 fl.c2 = el_una(OPneg,ty,fl.c2);
2491                 fl.FLty = ty;
2492                 fl.FLpelem = pn;        /* replace with new IV  */
2493             }
2494             else if (n2.Eoper == OPconst ||
2495                      isLI(n2) && (op == OPadd || op == OPmin))
2496             {
2497                 debug
2498                     if (debugc)
2499                     {
2500                         printf("found (fl op const), elem (");
2501                         WReqn(n);
2502                         assert(*pn == n);
2503                         printf(");\n");
2504                         elem_print(n);
2505                     }
2506 
2507                 switch (op)
2508                 {
2509                     case OPmul:
2510                     case OPshl:
2511                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2512                         break;
2513 
2514                     case OPadd:
2515                     case OPmin:
2516                         break;
2517 
2518                     default:
2519                         assert(0);
2520                 }
2521                 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2));
2522                 fl.FLty = ty;
2523                 fl.FLpelem = pn;        /* replace with new IV  */
2524             } /* else if */
2525         } /* for */
2526     } /* if */
2527 }
2528 
2529 /*********************************
2530  * Eliminate frivolous family ivs, that is,
2531  * if we can't eliminate the BIV, then eliminate family ivs that
2532  * differ from it only by a constant.
2533  */
2534 
2535 @trusted
2536 private void elimfrivivs(ref Loop l)
2537 {
2538     foreach (ref biv; l.Livlist)
2539     {
2540         int nrefs;
2541 
2542         if (debugc) printf("elimfrivivs()\n");
2543         /* Compute number of family ivs for biv */
2544         const nfams = biv.IVfamily.length;
2545         if (debugc) printf("nfams = %d\n", cast(int)nfams);
2546 
2547         /* Compute number of references to biv  */
2548         if (onlyref(biv.IVbasic,l,*biv.IVincr,nrefs))
2549                 nrefs--;
2550         if (debugc) printf("nrefs = %d\n",nrefs);
2551         assert(nrefs + 1 >= nfams);
2552         if (nrefs > nfams ||            // if we won't eliminate the biv
2553             (!I16 && nrefs == nfams))
2554         {   /* Eliminate any family ivs that only differ by a constant  */
2555             /* from biv                                                 */
2556             foreach (ref fl; biv.IVfamily)
2557             {
2558                 elem *ec1 = fl.c1;
2559                 targ_llong c;
2560 
2561                 if (elemisone(ec1) ||
2562                     // Eliminate fl's that can be represented by
2563                     // an addressing mode
2564                     (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) &&
2565                      ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8)
2566                     )
2567                    )
2568                 {
2569                     fl.FLtemp = FLELIM;
2570 
2571                     debug
2572                         if (debugc)
2573                         {
2574                             printf("Eliminating frivolous IV ");
2575                             WReqn(*fl.FLpelem);
2576                             printf("\n");
2577                         }
2578                 }
2579             }
2580         }
2581     }
2582 }
2583 
2584 
2585 /******************************
2586  * Introduce new variables.
2587  */
2588 
2589 @trusted
2590 private void intronvars(ref Loop l)
2591 {
2592     elem *T;
2593     elem *ne;
2594     elem *t2;
2595     elem *C2;
2596     elem *cmul;
2597     tym_t ty,tyr;
2598 
2599     if (debugc) printf("intronvars(%p)\n", &l);
2600     foreach (ref biv; l.Livlist)
2601     {
2602         elem *bivinc = *biv.IVincr;   /* ptr to increment elem */
2603 
2604         foreach (ref fl; biv.IVfamily)
2605         {                               /* for each IV in family of biv  */
2606             if (fl.FLtemp == FLELIM)   /* if already eliminated         */
2607                 continue;
2608 
2609             /* If induction variable can be written as a simple function */
2610             /* of a previous induction variable, skip it.                */
2611             if (funcprev(biv,fl))
2612                 continue;
2613 
2614             ty = fl.FLty;
2615             T = el_alloctmp(ty);        /* allocate temporary T          */
2616             fl.FLtemp = T.EV.Vsym;
2617 
2618             debug
2619             {
2620                 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr);
2621                 if (debugc) printf("%s\n", tym_str(ty));
2622                 if (debugc) printf("\n");
2623             }
2624 
2625             /* append elem T=biv*C1+C2 to preheader */
2626             /* ne = biv*C1      */
2627             tyr = fl.FLivty;                   /* type of biv              */
2628             ne = el_var(biv.IVbasic);
2629             ne.Ety = tyr;
2630             if (!elemisone(fl.c1))             /* don't multiply ptrs by 1 */
2631                 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1));
2632             if (tyfv(tyr) && tysize(ty) == SHORTSIZE)
2633                 ne = el_una(OP32_16,ty,ne);
2634             C2 = el_copytree(fl.c2);
2635             t2 = el_bin(OPadd,ty,ne,C2);        /* t2 = ne + C2         */
2636             ne = el_bin(OPeq,ty,el_copytree(T),t2);
2637             appendelem(ne, &(l.Lpreheader.Belem));
2638 
2639             /* prefix T+=C1*C to elem biv+=C                            */
2640             /* Must prefix in case the value of the expression (biv+=C) */
2641             /* is used by somebody up the tree.                         */
2642             cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1),
2643                                           el_copytree(bivinc.EV.E2));
2644             t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul);
2645             t2 = doptelem(t2,GOALvalue | GOALagain);
2646             *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc);
2647             biv.IVincr = &((*biv.IVincr).EV.E2);
2648 
2649             debug
2650                 if (debugc)
2651                 {
2652                     printf("Replacing elem (");
2653                     WReqn(*fl.FLpelem);
2654                     printf(") with '%s'\n",T.EV.Vsym.Sident.ptr);
2655                     printf("The init elem is (");
2656                     WReqn(ne);
2657                     printf(");\nThe increment elem is (");
2658                     WReqn(t2);
2659                     printf(")\n");
2660                 }
2661 
2662             el_free(*fl.FLpelem);
2663             *fl.FLpelem = T;           /* replace elem n with ref to T  */
2664             doflow = true;              /* redo flow analysis           */
2665             go.changes++;
2666         } /* for */
2667     } /* for */
2668 }
2669 
2670 /*******************************
2671  * Determine if induction variable can be rewritten as a simple
2672  * function of a previously generated temporary.
2673  * This can frequently
2674  * generate less code than that of an all-new temporary (especially
2675  * if it is the same as a previous temporary!).
2676  * Input:
2677  *      biv             Basic induction variable
2678  *      fl              Item in biv's family list we are looking at
2679  * Returns:
2680  *      false           Caller should create a new induction variable.
2681  *      true            *FLpelem is replaced with function of a previous
2682  *                      induction variable. FLtemp is set to FLELIM to
2683  *                      indicate this.
2684  */
2685 
2686 @trusted
2687 private bool funcprev(ref Iv biv, ref famlist fl)
2688 {
2689     tym_t tymin;
2690     int sz;
2691     elem *e1;
2692     elem *e2;
2693     elem *flse1;
2694 
2695     debug if (debugc)
2696         printf("funcprev\n");
2697 
2698     foreach (ref fls; biv.IVfamily)
2699     {
2700         if (!fls.FLtemp)                // haven't generated a temporary yet
2701             continue;
2702         if (fls.FLtemp == FLELIM)      /* no iv we can use here        */
2703             continue;
2704 
2705         /* The multipliers must match   */
2706         if (!el_match(fls.c1,fl.c1))
2707             continue;
2708 
2709         /* If the c2's match also, we got it easy */
2710         if (el_match(fls.c2,fl.c2))
2711         {
2712             if (tysize(fl.FLty) > tysize(fls.FLtemp.ty()))
2713                 continue;              /* can't increase size of var   */
2714             flse1 = el_var(fls.FLtemp);
2715             flse1.Ety = fl.FLty;
2716             goto L2;
2717         }
2718 
2719         /* The difference is only in the addition. Therefore, replace
2720            *fl.FLpelem with:
2721                 case 1:         (fl.c2 + (fls.FLtemp - fls.c2))
2722                 case 2:         (fls.FLtemp + (fl.c2 - fls.c2))
2723          */
2724         e1 = fl.c2;
2725         /* Subtracting relocatables usually generates slow code for     */
2726         /* linkers that can't handle arithmetic on relocatables.        */
2727         if (typtr(fls.c2.Ety))
2728         {
2729             if (fls.c2.Eoper == OPrelconst &&
2730                 !(fl.c2.Eoper == OPrelconst &&
2731                   fl.c2.EV.Vsym == fls.c2.EV.Vsym)
2732                )
2733                 continue;
2734         }
2735         flse1 = el_var(fls.FLtemp);
2736         e2 = flse1;                             /* assume case 1        */
2737         tymin = e2.Ety;
2738         if (typtr(fls.c2.Ety))
2739         {
2740             if (!typtr(tymin))
2741             {
2742                 if (typtr(e1.Ety))
2743                 {
2744                     e1 = e2;
2745                     e2 = fl.c2;            /* case 2               */
2746                 }
2747                 else                        /* can't subtract fptr  */
2748                     goto L1;
2749             }
2750             if (tybasic(fls.c2.Ety) == TYhptr)
2751                 tymin = TYlong;
2752             else
2753                 tymin = I64 ? TYllong : TYint;         /* type of (ptr - ptr) */
2754         }
2755 
2756         /* If e1 and fls.c2 are fptrs, and are not from the same       */
2757         /* segment, we cannot subtract them.                            */
2758         if (tyfv(e1.Ety) && tyfv(fls.c2.Ety))
2759         {
2760             if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst)
2761                 goto L1;                /* assume expressions have diff segs */
2762             if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass)
2763             {
2764                L1:
2765                 el_free(flse1);
2766                 continue;
2767             }
2768         }
2769 
2770         /* Some more type checking...   */
2771         sz = tysize(fl.FLty);
2772         if (sz != tysize(e1.Ety) &&
2773             sz != tysize(tymin))
2774             goto L1;
2775 
2776         /* Do some type checking (can't add pointers and get a pointer!) */
2777         //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin))
2778             //goto L1;
2779         /* Construct (e1 + (e2 - fls.c2))      */
2780         flse1 = el_bin(OPadd,fl.FLty,
2781                             e1,
2782                             el_bin(OPmin,tymin,
2783                                     e2,
2784                                     el_copytree(fls.c2)));
2785         if (sz < tysize(tymin) && sz == tysize(e1.Ety))
2786         {
2787             assert(I16);
2788             flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2);
2789         }
2790 
2791         flse1 = doptelem(flse1,GOALvalue | GOALagain);
2792         fl.c2 = null;
2793     L2:
2794         debug if (debugc)
2795         {
2796             printf("Replacing ");
2797             WReqn(*fl.FLpelem);
2798             printf(" with ");
2799             WReqn(flse1);
2800             printf("\n");
2801         }
2802 
2803         el_free(*fl.FLpelem);
2804         *fl.FLpelem = flse1;
2805 
2806         /* Fix the iv so when we do loops again, we won't create        */
2807         /* yet another iv, which is just what funcprev() is supposed    */
2808         /* to prevent.                                                  */
2809         fls.FLtemp.Sflags |= SFLnotbasiciv;
2810 
2811         fl.FLtemp = FLELIM;            /* mark iv as being gone        */
2812         go.changes++;
2813         doflow = true;
2814         return true;                    /* it was replaced              */
2815     }
2816     return false;                       /* need to create a new variable */
2817 }
2818 
2819 /***********************
2820  * Eliminate basic IVs.
2821  */
2822 
2823 @trusted
2824 private void elimbasivs(ref Loop l)
2825 {
2826     if (debugc) printf("elimbasivs(%p)\n", &l);
2827     foreach (ref biv; l.Livlist)
2828     {
2829         /* Can't eliminate this basic IV if we have a goal for the      */
2830         /* increment elem.                                              */
2831         // Be careful about Nflags being in a union...
2832         elem* einc = *biv.IVincr;
2833         if (!(einc.Nflags & NFLnogoal))
2834             continue;
2835 
2836         Symbol* X = biv.IVbasic;
2837         assert(symbol_isintab(X));
2838         tym_t ty = X.ty();
2839         int refcount;
2840         elem** pref = onlyref(X,l,einc,refcount);
2841 
2842         /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */
2843         if (pref != null && refcount <= 1)
2844         {
2845             if (!biv.IVfamily.length)
2846                 continue;
2847 
2848             if (catchRef(X, l))
2849                 continue;
2850 
2851             elem* ref_ = *pref;
2852 
2853             /* Replace (X) with (X != 0)                            */
2854             if (ref_.Eoper == OPvar)
2855                 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L));
2856 
2857             const fi = simfl(biv.IVfamily, ty); // find simplest elem in family
2858             if (fi == biv.IVfamily.length)
2859                 continue;
2860             famlist* fl = &biv.IVfamily[fi];
2861 
2862             // Don't do the replacement if we would replace a
2863             // signed comparison with an unsigned one
2864             tym_t flty = fl.FLty;
2865             if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))
2866                 flty = touns(flty);
2867 
2868             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge &&
2869                 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) &&
2870                  tyuns(flty))
2871                     continue;
2872 
2873             /* if we have (e relop X), replace it with (X relop e)  */
2874             if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X)
2875             {
2876                 elem* tmp = ref_.EV.E2;
2877                 ref_.EV.E2 = ref_.EV.E1;
2878                 ref_.EV.E1 = tmp;
2879                 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper);
2880             }
2881 
2882             // If e*c1+c2 would result in a sign change or an overflow
2883             // then we can't do it
2884             if (fl.c1.Eoper == OPconst)
2885             {
2886                 targ_llong c1 = el_tolong(fl.c1);
2887                 const int sz = tysize(ty);
2888                 if (sz == SHORTSIZE &&
2889                     ((ref_.EV.E2.Eoper == OPconst &&
2890                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) ||
2891                      c1 & ~0x7FFFL)
2892                    )
2893                     continue;
2894 
2895                 if (sz == LONGSIZE &&
2896                     ((ref_.EV.E2.Eoper == OPconst &&
2897                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) ||
2898                      c1 & ~0x7FFFFFFFL)
2899                    )
2900                     continue;
2901                 if (sz == LLONGSIZE &&
2902                     ((ref_.EV.E2.Eoper == OPconst &&
2903                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) ||
2904                      c1 & ~0x7FFFFFFFFFFFFFFFL)
2905                    )
2906                     continue;
2907             }
2908 
2909             /* If the incr is a decrement, and the relational is < or <=,
2910              * and its unsigned, then don't do it because it could drop below 0.
2911              * https://issues.dlang.org/show_bug.cgi?id=16189
2912              */
2913             if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) &&
2914                 (ref_.Eoper == OPlt || ref_.Eoper == OPle) &&
2915                 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)))
2916                 continue;
2917 
2918             /* If loop started out with a signed conditional that was
2919              * replaced with an unsigned one, don't do it if c2
2920              * is less than 0.
2921              */
2922             if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst)
2923             {
2924                 targ_llong c2 = el_tolong(fl.c2);
2925                 if (c2 < 0)
2926                     continue;
2927             }
2928 
2929             elem *refE2 = el_copytree(ref_.EV.E2);
2930             int refEoper = ref_.Eoper;
2931 
2932             /* if c1 < 0 and relop is < <= > >=
2933                then adjust relop as if both sides were multiplied
2934                by -1
2935              */
2936             if (!tyuns(ty) &&
2937                 (tyintegral(ty) && el_tolong(fl.c1) < 0 ||
2938                  tyfloating(ty) && el_toldoubled(fl.c1) < 0.0))
2939                 refEoper = swaprel(refEoper);
2940 
2941             /* Replace (X relop e) with (X relop (short)e)
2942                if T is 1 word but e is 2
2943              */
2944             if (tysize(flty) == SHORTSIZE &&
2945                 tysize(refE2.Ety) == LONGSIZE)
2946                 refE2 = el_una(OP32_16,flty,refE2);
2947 
2948             /* replace e with e*c1 + c2             */
2949             elem* C2 = el_copytree(fl.c2);
2950             elem* fofe = el_bin(OPadd,flty,
2951                             el_bin(OPmul,refE2.Ety,
2952                                     refE2,
2953                                     el_copytree(fl.c1)),
2954                             C2);
2955             fofe = doptelem(fofe,GOALvalue | GOALagain);    // fold any constants
2956 
2957             if (tyuns(flty) && refEoper == OPge &&
2958                 fofe.Eoper == OPconst && el_allbits(fofe, 0) &&
2959                 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0))
2960             {
2961                 /* Don't do it if replacement will result in
2962                  * an unsigned T>=0 which will be an infinite loop.
2963                  */
2964                 el_free(fofe);
2965                 continue;
2966             }
2967 
2968             if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr);
2969 
2970             debug if (debugc)
2971             {
2972                 printf("Comparison replaced: ");
2973                 WReqn(ref_);
2974                 printf(" with ");
2975             }
2976 
2977             el_free(ref_.EV.E2);
2978             ref_.EV.E2 = refE2;
2979             ref_.Eoper = cast(ubyte)refEoper;
2980 
2981             elimass(einc);          // dump the increment elem
2982 
2983             // replace X with T
2984             assert(ref_.EV.E1.EV.Voffset == 0);
2985             ref_.EV.E1.EV.Vsym = fl.FLtemp;
2986             ref_.EV.E1.Ety = flty;
2987             ref_.EV.E2 = fofe;
2988 
2989             /* If sizes of expression worked out wrong...
2990                Which can happen if we have (int)ptr==e
2991              */
2992             if (OTbinary(fofe.Eoper))         // if didn't optimize it away
2993             {
2994                 const tym_t fofety = fofe.Ety;
2995                 const int sz = tysize(fofety);
2996                 tym_t ty1 = fofe.EV.E1.Ety;
2997                 const tym_t ty2 = fofe.EV.E2.Ety;
2998                 /* Sizes of + expression must all be the same       */
2999                 if (sz != tysize(ty1) &&
3000                     sz != tysize(ty2)
3001                    )
3002                 {
3003                     if (tyuns(fofety))      // if unsigned comparison
3004                         ty1 = touns(ty1);   /* to unsigned type     */
3005                     fofe.Ety = ty1;
3006                     ref_.EV.E1.Ety = ty1;
3007                 }
3008             }
3009 
3010             /* Fix if leaves of compare are TYfptrs and the compare */
3011             /* operator is < <= > >=.                               */
3012             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety))
3013             {
3014                 assert(tyfv(ref_.EV.E2.Ety));
3015                 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1);
3016                 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe);
3017             }
3018 
3019             debug if (debugc)
3020             {
3021                 WReqn(ref_);
3022                 printf("\n");
3023             }
3024 
3025             go.changes++;
3026             doflow = true;                  /* redo flow analysis   */
3027 
3028             /* if X is live on entry to any successor S outside loop */
3029             /*      prepend elem X=(T-c2)/c1 to S.Belem     */
3030 
3031             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3032             {
3033                 elem *ne;
3034                 block *b;
3035 
3036                 foreach (bl; ListRange(dfo[i].Bsucc))
3037                 {   /* for each successor   */
3038                     b = list_block(bl);
3039                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3040                         continue;       /* inside loop  */
3041                     if (!vec_testbit(X.Ssymnum,b.Binlv))
3042                         continue;       /* not live     */
3043 
3044                     C2 = el_copytree(fl.c2);
3045                     ne = el_bin(OPmin,ty,
3046                             el_var(fl.FLtemp),
3047                             C2);
3048                     if (tybasic(ne.EV.E1.Ety) == TYfptr &&
3049                         tybasic(ne.EV.E2.Ety) == TYfptr)
3050                     {
3051                         ne.Ety = I64 ? TYllong : TYint;
3052                         if (tylong(ty) && _tysize[TYint] == 2)
3053                             ne = el_una(OPs16_32,ty,ne);
3054                     }
3055 
3056                     ne = el_bin(OPeq,X.ty(),
3057                             el_var(X),
3058                             el_bin(OPdiv,ne.Ety,
3059                                 ne,
3060                                 el_copytree(fl.c1)));
3061 
3062                     debug if (debugc)
3063                     {
3064                         printf("Adding (");
3065                         WReqn(ne);
3066                         printf(") to exit block B%d\n",b.Bdfoidx);
3067                         //elem_print(ne);
3068                     }
3069 
3070                     /* We have to add a new block if there is */
3071                     /* more than one predecessor to b.      */
3072                     if (list_next(b.Bpred))
3073                     {
3074                         block *bn = block_calloc();
3075                         bn.Btry = b.Btry;
3076                         bn.BC = BCgoto;
3077                         bn.Bnext = dfo[i].Bnext;
3078                         dfo[i].Bnext = bn;
3079                         list_append(&(bn.Bsucc),b);
3080                         list_append(&(bn.Bpred),dfo[i]);
3081                         bl.ptr = cast(void *)bn;
3082                         foreach (bl2; ListRange(b.Bpred))
3083                             if (list_block(bl2) == dfo[i])
3084                             {
3085                                 bl2.ptr = cast(void *)bn;
3086                                 goto L2;
3087                             }
3088                         assert(0);
3089                     L2:
3090                         b = bn;
3091                         addblk = true;
3092                     }
3093 
3094                     if (b.Belem)
3095                         b.Belem =
3096                             el_bin(OPcomma,b.Belem.Ety,
3097                                 ne,b.Belem);
3098                     else
3099                         b.Belem = ne;
3100                     go.changes++;
3101                     doflow = true;  /* redo flow analysis   */
3102                 } /* for each successor */
3103             } /* foreach exit block */
3104             if (addblk)
3105                 return;
3106         }
3107         else if (refcount == 0)                 /* if no uses of IV in loop  */
3108         {
3109             /* Eliminate the basic IV if it is not live on any successor */
3110             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3111             {
3112                 foreach (bl; ListRange(dfo[i].Bsucc))
3113                 {   /* for each successor   */
3114                     block *b = list_block(bl);
3115                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3116                         continue;       /* inside loop  */
3117                     if (vec_testbit(X.Ssymnum,b.Binlv))
3118                         goto L1;        /* live         */
3119                 }
3120             }
3121 
3122             if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X);
3123 
3124             /* Remove the (s op= e2) by replacing it with (1 , e2)
3125              * and let later passes remove the (1 ,) nodes.
3126              * Don't remove those nodes here because other biv's may refer
3127              * to them.
3128              */
3129             {
3130                 elem* ei = *biv.IVincr;
3131                 ei.Eoper = OPcomma;
3132                 ei.EV.E1.Eoper = OPconst;
3133                 ei.EV.E1.Ety = TYint;
3134             }
3135 
3136             go.changes++;
3137             doflow = true;                  /* redo flow analysis   */
3138           L1:
3139         }
3140     } /* for */
3141 }
3142 
3143 /***********************
3144  * Eliminate opeq IVs that are not used outside the loop.
3145  */
3146 
3147 @trusted
3148 private void elimopeqs(ref Loop l)
3149 {
3150     elem **pref;
3151     Symbol *X;
3152     int refcount;
3153 
3154     if (debugc) printf("elimopeqs(%p)\n", &l);
3155     //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr));
3156 
3157     foreach (ref biv; l.Lopeqlist)
3158     {
3159         // Can't eliminate this basic IV if we have a goal for the
3160         // increment elem.
3161         // Be careful about Nflags being in a union...
3162         if (!((*biv.IVincr).Nflags & NFLnogoal))
3163             continue;
3164 
3165         X = biv.IVbasic;
3166         assert(symbol_isintab(X));
3167         pref = onlyref(X,l,*biv.IVincr,refcount);
3168 
3169         // if only ref of X is of the form (X) or (X relop e) or (e relop X)
3170         if (pref != null && refcount <= 1)
3171         { }
3172         else if (refcount == 0)                 // if no uses of IV in loop
3173         {   // Eliminate the basic IV if it is not live on any successor
3174             uint i;
3175             for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3176             {
3177                 foreach (bl; ListRange(dfo[i].Bsucc))
3178                 {   // for each successor
3179                     block *b = list_block(bl);
3180                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3181                         continue;       // inside loop
3182                     if (vec_testbit(X.Ssymnum,b.Binlv))
3183                         goto L1;        // live
3184                 }
3185             }
3186 
3187             if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X);
3188 
3189             /* Remove the (s op= e2) by replacing it with (1 , e2)
3190              * and let later passes remove the (1 ,) nodes.
3191              * Don't remove those nodes here because other biv's may refer
3192              * to them, for nodes like (sum += i++)
3193              */
3194             {
3195                 elem* einc = *(biv.IVincr);
3196                 einc.Eoper = OPcomma;
3197                 einc.EV.E1.Eoper = OPconst;
3198                 einc.EV.E1.Ety = TYint;
3199             }
3200 
3201             go.changes++;
3202             doflow = true;                      // redo flow analysis
3203         L1:
3204         }
3205     }
3206 }
3207 
3208 /**************************
3209  * Find simplest elem in family.
3210  * Params:
3211  *      fams = array of famlist's
3212  *      tym  = type of basic IV
3213  * Returns: index into fams[] of simplest; fams.length if none found.
3214  */
3215 
3216 @trusted
3217 extern (D)
3218 private size_t simfl(famlist[] fams, tym_t tym)
3219 {
3220     size_t sofar = fams.length;
3221 
3222     foreach (i, ref fl; fams)
3223     {
3224         if (fl.FLtemp == FLELIM)       /* no variable, so skip it      */
3225             continue;
3226         /* If size of replacement is less than size of biv, we could    */
3227         /* be in trouble due to loss of precision.                      */
3228         if (size(fl.FLtemp.ty()) < size(tym))
3229             continue;
3230 
3231         // pick simplest
3232         sofar = sofar == fams.length ? i
3233                                      : (flcmp(fams[sofar], fams[i]) ? sofar : i);
3234     }
3235     return sofar;
3236 }
3237 
3238 /**************************
3239  * Return simpler of two family elems.
3240  * There is room for improvement, namely if
3241  *      f1.c1 = 2, f2.c1 = 27
3242  * then pick f1 because it is a shift.
3243  * Returns:
3244  *      true for f1 is simpler, false  for f2 is simpler
3245  */
3246 
3247 @trusted
3248 private bool flcmp(const ref famlist f1, const ref famlist f2)
3249 {
3250     auto t1 = &(f1.c1.EV);
3251     auto t2 = &(f2.c1.EV);
3252     auto ty = (*f1.FLpelem).Ety;           // type of elem
3253 
3254     static if (0)
3255     {
3256         printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort);
3257         printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort);
3258         printf("%s %s\n", tym_str((*f1.FLpelem).Ety), tym_str((*f2.FLpelem).Ety));
3259     }
3260 
3261     /* Wimp out and just pick f1 if the types don't match               */
3262     if (tysize(ty) == tysize((*f2.FLpelem).Ety))
3263     {
3264         switch (tybasic(ty))
3265         {   case TYbool:
3266             case TYchar:
3267             case TYschar:
3268             case TYuchar:
3269                 if (t2.Vuchar == 1 ||
3270                     t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0)
3271                         goto Lf2;
3272                 break;
3273 
3274             case TYshort:
3275             case TYushort:
3276             case TYchar16:
3277             case TYwchar_t:     // BUG: what about 4 byte wchar_t's?
3278             case_short:
3279                 if (t2.Vshort == 1 ||
3280                     t1.Vshort != 1 && f2.c2.EV.Vshort == 0)
3281                         goto Lf2;
3282                 break;
3283 
3284             case TYsptr:
3285             case TYcptr:
3286             case TYnptr:        // BUG: 64 bit pointers?
3287             case TYimmutPtr:
3288             case TYsharePtr:
3289             case TYrestrictPtr:
3290             case TYfgPtr:
3291             case TYnullptr:
3292             case TYint:
3293             case TYuint:
3294                 if (_tysize[TYint] == SHORTSIZE)
3295                     goto case_short;
3296                 else
3297                     goto case_long;
3298 
3299             case TYlong:
3300             case TYulong:
3301             case TYdchar:
3302             case TYfptr:
3303             case TYvptr:
3304             case TYhptr:
3305             case_long:
3306                 if (t2.Vlong == 1 ||
3307                     t1.Vlong != 1 && f2.c2.EV.Vlong == 0)
3308                         goto Lf2;
3309                 break;
3310 
3311             case TYfloat:
3312                 if (t2.Vfloat == 1 ||
3313                     t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0)
3314                         goto Lf2;
3315                 break;
3316 
3317             case TYdouble:
3318             case TYdouble_alias:
3319                 if (t2.Vdouble == 1.0 ||
3320                     t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0)
3321                         goto Lf2;
3322                 break;
3323 
3324             case TYldouble:
3325                 if (t2.Vldouble == 1.0 ||
3326                     t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0)
3327                         goto Lf2;
3328                 break;
3329 
3330             case TYllong:
3331             case TYullong:
3332                 if (t2.Vllong == 1 ||
3333                     t1.Vllong != 1 && f2.c2.EV.Vllong == 0)
3334                         goto Lf2;
3335                 break;
3336 
3337             default:
3338                 assert(0);
3339         }
3340     }
3341     //printf("picking f1\n");
3342     return true;
3343 
3344 Lf2:
3345     //printf("picking f2\n");
3346     return false;
3347 }
3348 
3349 /*****************************
3350  * If loop is in a try block, see if there are references to x in an enclosing
3351  * try block. This is because the loop may throw and transfer control
3352  * outside the try block, and that will count as a use of x.
3353  * Params:
3354  *      x = basic induction variable symbol
3355  *      l = loop x is in
3356  * Returns:
3357  *      true if x is used outside the try block
3358  */
3359 @trusted
3360 private bool catchRef(Symbol* x, ref Loop l)
3361 {
3362     block* btry = l.Lhead.Btry;
3363     if (!btry)
3364         return false;   // not in a try block
3365 
3366     foreach (i, b; dfo[])
3367     {
3368         if (vec_testbit(b.Bdfoidx, l.Lloop))
3369             continue;
3370         /* this is conservative, just checking if x is used outside the loop.
3371          * A better check would see if the body of the loop throws, and would
3372          * check the enclosing catch/finally blocks and their exit blocks.
3373          * https://issues.dlang.org/show_bug.cgi?22104
3374          */
3375         if (vec_testbit(x.Ssymnum, b.Binlv))
3376             return true;
3377     }
3378 
3379     return false;
3380 }
3381 
3382 /************************************
3383  * Params:
3384  *      x    = basic IV symbol
3385  *      incn = increment elem for basic IV X.
3386  *      refcount = set to # of references to X other than the increment elem
3387  * Returns:
3388  *      If ref of X in loop l is of the form (X relop e) or (e relop X)
3389  *              Return the relop elem
3390  *      Else
3391  *              Return null
3392  */
3393 
3394 private __gshared
3395 {
3396     elem **nd;
3397     elem *sincn;
3398     Symbol *X;
3399 }
3400 
3401 @trusted
3402 private elem ** onlyref(Symbol *x, ref Loop l,elem *incn, out int refcount)
3403 {
3404     uint i;
3405 
3406     //printf("onlyref('%s')\n", x.Sident.ptr);
3407     X = x;                                /* save some parameter passing  */
3408     assert(symbol_isintab(x));
3409     sincn = incn;
3410 
3411     debug
3412       if (!(X.Ssymnum < globsym.length && incn))
3413           printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn);
3414 
3415     assert(X.Ssymnum < globsym.length && incn);
3416     int count = 0;
3417     nd = null;
3418     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3419     {
3420         block* b = dfo[i];
3421         if (b.Belem)
3422         {
3423             count += countrefs(&b.Belem,b.BC == BCiftrue);
3424         }
3425     }
3426 
3427     static if (0)
3428     {
3429         printf("count = %d, nd = (", count);
3430         if (nd) WReqn(*nd);
3431         printf(")\n");
3432     }
3433 
3434     refcount = count;
3435     return nd;
3436 }
3437 
3438 
3439 /******************************
3440  * Count elems of the form (X relop e) or (e relop X).
3441  * Do not count the node if it is the increment node (sincn).
3442  * Params:
3443  *      pn = pointer to start of elem tree
3444  *      flag = true if block wants to test the elem
3445  * Returns:
3446  *      number of elems of the form
3447  */
3448 
3449 @trusted
3450 private int countrefs(elem **pn,bool flag)
3451 {
3452     elem *n = *pn;
3453 
3454     assert(n);
3455     if (n == sincn)                       /* if it is the increment elem  */
3456     {
3457         return OTbinary(n.Eoper)
3458             ? countrefs(&n.EV.E2, false)
3459             : 0;                          // don't count lvalue
3460     }
3461     if (OTunary(n.Eoper))
3462         return countrefs(&n.EV.E1,false);
3463     else if (OTbinary(n.Eoper))
3464     {
3465         if (OTrel(n.Eoper))
3466         {
3467             elem *e1 = n.EV.E1;
3468 
3469             assert(e1.Eoper != OPcomma);
3470             if (e1 == sincn &&
3471                 (e1.Eoper == OPeq || OTopeq(e1.Eoper)))
3472                 goto L1;
3473 
3474             /* Check both subtrees to see if n is the comparison node,
3475              * that is, if X is a leaf of the comparison.
3476              */
3477             if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2, X) ||
3478                 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1, X))
3479                 nd = pn;                /* found the relop node */
3480         }
3481     L1:
3482         return countrefs(&n.EV.E1,false) +
3483                countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma));
3484     }
3485     else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X)
3486     {
3487         if (flag)
3488             nd = pn;                    /* comparing it with 0          */
3489         return 1;                       // found another reference
3490     }
3491     return 0;
3492 }
3493 
3494 /*******************************
3495  * Count number of times symbol X appears in elem tree e.
3496  */
3497 
3498 @trusted
3499 private int countrefs2(const(elem)* e, const Symbol* s)
3500 {
3501     debug elem_debug(e);
3502     while (OTunary(e.Eoper))
3503         e = e.EV.E1;
3504     if (OTbinary(e.Eoper))
3505         return countrefs2(e.EV.E1, s) + countrefs2(e.EV.E2, s);
3506     return ((e.Eoper == OPvar || e.Eoper == OPrelconst) &&
3507             e.EV.Vsym == s);
3508 }
3509 
3510 /****************************
3511  * Eliminate some special cases.
3512  */
3513 
3514 @trusted
3515 private
3516 extern(D) void elimspec(const ref Loop loop, block*[] dfo)
3517 {
3518     // Visit each block in loop
3519     for (size_t i = 0; (i = vec_index(i, loop.Lloop)) < dfo.length; ++i)
3520     {
3521         auto b = dfo[i];
3522         if (b.Belem)
3523             elimspecwalk(&b.Belem);
3524     }
3525 }
3526 
3527 
3528 /******************************
3529  */
3530 
3531 @trusted
3532 private void elimspecwalk(elem **pn)
3533 {
3534     elem *n;
3535 
3536     n = *pn;
3537     assert(n);
3538     if (OTunary(n.Eoper))
3539         elimspecwalk(&n.EV.E1);
3540     else if (OTbinary(n.Eoper))
3541     {
3542         elimspecwalk(&n.EV.E1);
3543         elimspecwalk(&n.EV.E2);
3544         if (OTrel(n.Eoper))
3545         {
3546             elem *e1 = n.EV.E1;
3547 
3548             /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3).
3549              * This will reduce the number of cases for elimbasivs().
3550              * Don't do equivalent with (e1 rel (e2,e3)) because
3551              * of potential side effects in e1.
3552              */
3553             if (e1.Eoper == OPcomma)
3554             {
3555                 elem *e;
3556 
3557                 debug if (debugc)
3558                 {   printf("3rewriting ("); WReqn(n); printf(")\n"); }
3559 
3560                 e = n.EV.E2;
3561                 n.EV.E2 = e1;
3562                 n.EV.E1 = n.EV.E2.EV.E1;
3563                 n.EV.E2.EV.E1 = n.EV.E2.EV.E2;
3564                 n.EV.E2.EV.E2 = e;
3565                 n.EV.E2.Eoper = n.Eoper;
3566                 n.EV.E2.Ety = n.Ety;
3567                 n.Eoper = OPcomma;
3568 
3569                 go.changes++;
3570                 doflow = true;
3571 
3572                 elimspecwalk(&n.EV.E1);
3573                 elimspecwalk(&n.EV.E2);
3574             }
3575 
3576             /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3))
3577              * Rewrite ((X ++  e2) rel e3) into ((X +=  e2),(X-e2 rel e3))
3578              * so that the op= will not have a goal, so elimbasivs()
3579              * will work on it.
3580              */
3581             if ((OTopeq(e1.Eoper)
3582                  || OTpost(e1.Eoper)
3583                 ) &&
3584                 !el_sideeffect(e1.EV.E1))
3585             {
3586                 elem *e;
3587                 OPER op;
3588 
3589                 debug if (debugc)
3590                 {   printf("4rewriting ("); WReqn(n); printf(")\n"); }
3591 
3592                 e = el_calloc();
3593                 el_copy(e,n);
3594                 e.EV.E1 = el_copytree(e1.EV.E1);
3595                 e.EV.E1.Ety = n.EV.E1.Ety;
3596                 n.EV.E2 = e;
3597                 switch (e1.Eoper)
3598                 {
3599                     case OPpostinc:
3600                         e1.Eoper = OPaddass;
3601                         op = OPmin;
3602                         goto L3;
3603 
3604                     case OPpostdec:
3605                         e1.Eoper = OPminass;
3606                         op = OPadd;
3607                     L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2));
3608                         break;
3609 
3610                     default:
3611                         break;
3612                 }
3613                 /* increment node is now guaranteed to have no goal */
3614                 e1.Nflags |= NFLnogoal;
3615                 n.Eoper = OPcomma;
3616                 //go.changes++;
3617                 doflow = true;
3618 
3619                 elimspecwalk(&n.EV.E1);
3620                 elimspecwalk(&n.EV.E2);
3621             }
3622         }
3623   }
3624 }
3625 
3626 /********
3627  * Walk e in execution order.
3628  * When eincrement is found, remove it.
3629  * Continue, replacing instances of `v` with `v+increment`
3630  * When second eincrement is found, stop.
3631  * Params:
3632  *      e = expression to walk
3633  *      defnum = index of eincrement
3634  *      v = increment variable
3635  *      increment = amount to increment v
3636  *      unrolls = number of times loop has been unrolled
3637  */
3638 
3639 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow
3640 {
3641     int state = 0;
3642 
3643     /***********************************
3644      * Walk e in execution order, fixing it according to state.
3645      * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state
3646      * state == 1..unrolls-1: replacing instances of v with v+(state*increment),
3647      * state == unrolls-1: leave eincrement alone, advance to next state
3648      * state == unrolls: done
3649      */
3650 
3651     void walker(elem *e)
3652     {
3653         assert(e);
3654         const op = e.Eoper;
3655         if (ERTOL(e))
3656         {
3657             if (e.Edef != defnum)
3658             {
3659                 walker(e.EV.E2);
3660                 walker(e.EV.E1);
3661             }
3662         }
3663         else if (OTbinary(op))
3664         {
3665             if (e.Edef != defnum)
3666             {
3667                 walker(e.EV.E1);
3668                 walker(e.EV.E2);
3669             }
3670         }
3671         else if (OTunary(op))
3672         {
3673             assert(e.Edef != defnum);
3674             walker(e.EV.E1);
3675         }
3676         else if (op == OPvar &&
3677                  state &&
3678                  e.EV.Vsym == v)
3679         {
3680             // overwrite e with (v+increment)
3681             elem *e1 = el_calloc();
3682             el_copy(e1,e);
3683             e.Eoper = OPadd;
3684             e.EV.E1 = e1;
3685             e.EV.E2 = el_long(e.Ety, increment * state);
3686         }
3687         if (OTdef(op) && e.Edef == defnum)
3688         {
3689             // found the increment elem; neuter all but the last one
3690             if (state + 1 < unrolls)
3691             {
3692                 el_free(e.EV.E1);
3693                 el_free(e.EV.E2);
3694                 e.Eoper = OPconst;
3695                 e.EV.Vllong = 0;
3696             }
3697             ++state;
3698         }
3699     }
3700 
3701     walker(e);
3702     assert(state == unrolls);
3703 }
3704 
3705 
3706 /*********************************
3707  * Unroll loop if possible.
3708  * Params:
3709  *      l = loop to unroll
3710  * Returns:
3711  *      true if loop was unrolled
3712  */
3713 @trusted
3714 bool loopunroll(ref Loop l)
3715 {
3716     const bool log = false;
3717     if (log) printf("loopunroll(%p)\n", &l);
3718 
3719     /* Do not repeatedly unroll the same loop,
3720      * or waste time attempting to
3721      */
3722     if (l.Lhead.Bflags & BFLnounroll)
3723         return false;
3724     l.Lhead.Bflags |= BFLnounroll;
3725     if (log)
3726         WRfunc("loop", funcsym_p, startblock);
3727 
3728     if (l.Lhead.Btry || l.Ltail.Btry)
3729         return false;
3730 
3731     /* For simplification, only unroll loops that consist only
3732      * of a head and tail, and the tail is the exit block.
3733      */
3734     const numblocks = vec_numBitsSet(l.Lloop);
3735 {   // Ensure no changes
3736     if (dfo.length != vec_numbits(l.Lloop)) assert(0);
3737     int n = 0;
3738     for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3739         ++n;
3740     if (n != numblocks) assert(0);
3741 }
3742     if (numblocks != 2)
3743     {
3744         if (log) printf("\tnot 2 blocks, but %d\n", numblocks);
3745         return false;
3746     }
3747     assert(l.Lhead != l.Ltail);
3748 
3749     /* tail must be the sole exit block
3750      */
3751     if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) ||
3752         !vec_testbit(l.Ltail.Bdfoidx, l.Lexit))
3753     {
3754         if (log) printf("\ttail not sole exit block\n");
3755         return false;
3756     }
3757 
3758     elem *ehead = l.Lhead.Belem;
3759     elem *etail = l.Ltail.Belem;
3760 
3761     if (log)
3762     {
3763         printf("Unroll candidate:\n");
3764         printf("  head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n");
3765         printf("  tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n");
3766     }
3767 
3768     /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer
3769      */
3770     if ((etail.Eoper != OPlt && etail.Eoper != OPle) ||
3771         etail.EV.E1.Eoper != OPvar ||
3772         etail.EV.E2.Eoper != OPconst)
3773     {
3774         if (log) printf("\tnot (v < c)\n");
3775         return false;
3776     }
3777 
3778     elem *e1 = etail.EV.E1;
3779     elem *e2 = etail.EV.E2;
3780 
3781     if (!tyintegral(e1.Ety) ||
3782         tysize(e1.Ety) > targ_llong.sizeof ||
3783         !(tyuns(e1.Ety) || tyuns(e2.Ety)))
3784     {
3785         if (log) printf("\tnot (integral unsigned)\n");
3786         return false;
3787     }
3788 
3789     int cost = el_length(ehead);
3790     //printf("test4 cost: %d\n", cost);
3791 
3792     if (cost > 100)
3793     {
3794         if (log) printf("\tcost %d\n", cost);
3795         return false;
3796     }
3797     if (log) printf("cost %d\n", cost);
3798 
3799     Symbol* v = e1.EV.Vsym;
3800 
3801     // RD info is only reliable for registers and autos
3802     if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig))
3803     {
3804         if (log) printf("\tnot SCRD\n");
3805         return false;
3806     }
3807 
3808     /* Find the initial, increment elem, and final value of s
3809      */
3810     elem *einitial;
3811     elem *eincrement;
3812     if (!findloopparameters(etail, einitial, eincrement))
3813     {
3814         if (log) printf("\tnot findloopparameters()\n");
3815         return false;
3816     }
3817 
3818     targ_llong initial = el_tolong(einitial.EV.E2);
3819     targ_llong increment = el_tolong(eincrement.EV.E2);
3820     if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass)
3821         increment = -increment;
3822     targ_llong final_ = el_tolong(e2);
3823 
3824     if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_);
3825 
3826     if (etail.Eoper == OPle)
3827         ++final_;
3828 
3829     if (initial < 0 ||
3830         final_ < initial ||
3831         increment <= 0 ||
3832         (final_ - initial) % increment)
3833     {
3834         if (log) printf("\tnot (evenly divisible)\n");
3835         return false;
3836     }
3837 
3838     /* If loop would only execute once anyway, just remove the test at the end
3839      */
3840     if (initial + increment == final_)
3841     {
3842         if (log) printf("\tjust once\n");
3843         etail.Eoper = OPcomma;
3844         e2.EV.Vllong = 0;
3845         e2.Ety = etail.Ety;
3846         return false;
3847     }
3848 
3849     /* number of times the loop is unrolled
3850      */
3851     targ_ullong numIterations = (final_ - initial) / increment;
3852     const int unrolls = (numIterations < 1000 / cost)
3853         ? cast(int)numIterations
3854         : 2;
3855 
3856     if (unrolls == 0 || (final_ - initial) % unrolls)
3857     {
3858         if (log) printf("\tnot (divisible by %d)\n", unrolls);
3859         return false;
3860     }
3861 
3862     if (log) printf("Unrolling starting\n");
3863 
3864     // Double the increment
3865     eincrement.EV.E2.EV.Vllong *= unrolls;
3866     //printf("  4head:\t"); WReqn(l.Lhead.Belem); printf("\n");
3867 
3868     elem* e = null;
3869     foreach (i; 0 .. unrolls)
3870         e = el_combine(e, el_copytree(ehead));
3871 
3872     /* Walk e in execution order.
3873      * When eincrement is found, remove it.
3874      * Continue, replacing instances of `v` with `v+increment`
3875      * When last eincrement is found, stop.
3876      */
3877     unrollWalker(e, eincrement.Edef, v, increment, unrolls);
3878 
3879     l.Lhead.Belem = e;
3880 
3881     /* If unrolled loop would only execute once anyway, just remove the test at the end
3882      */
3883     if (initial + unrolls * increment == final_)
3884     {
3885         if (log) printf("\tcompletely unrolled\n");
3886         etail.Eoper = OPcomma;
3887         e2.EV.Vllong = 0;
3888         e2.Ety = etail.Ety;
3889     }
3890 
3891     //WRfunc("loopunroll done", funcsym_p, startblock);
3892     return true;
3893 }
3894 
3895 /******************************
3896  * Count number of elems in a tree
3897  * Params:
3898  *  e = tree
3899  * Returns:
3900  *  number of elems in tree
3901  */
3902 @trusted
3903 private int el_length(elem *e)
3904 {
3905     int n = 0;
3906     while (e)
3907     {
3908         n += 1;
3909         if (!OTleaf(e.Eoper))
3910         {
3911             if (e.Eoper == OPctor || e.Eoper == OPdtor)
3912                 return 10_000;
3913             n += el_length(e.EV.E2);
3914             e = e.EV.E1;
3915         }
3916         else
3917             break;
3918     }
3919     return n;
3920 }
3921 
3922 
3923 }