1/* Predictive commoning.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
168
169 It can be replaced with:
170
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
210#include "backend.h"
211#include "rtl.h"
212#include "tree.h"
213#include "gimple.h"
214#include "predict.h"
215#include "tree-pass.h"
216#include "ssa.h"
217#include "gimple-pretty-print.h"
218#include "alias.h"
219#include "fold-const.h"
220#include "cfgloop.h"
221#include "tree-eh.h"
222#include "gimplify.h"
223#include "gimple-iterator.h"
224#include "gimplify-me.h"
225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
231#include "tree-ssa.h"
232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
234#include "params.h"
235#include "tree-affine.h"
236#include "builtins.h"
237
238/* The maximum number of iterations between the considered memory
239 references. */
240
241#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242
243/* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
245
246typedef struct dref_d
247{
248 /* The reference itself. */
249 struct data_reference *ref;
250
251 /* The statement in that the reference appears. */
252 gimple *stmt;
253
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi;
258
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
261 unsigned distance;
262
263 /* Number of iterations offset from the first reference in the component. */
264 widest_int offset;
265
266 /* Number of the reference in a component, in dominance ordering. */
267 unsigned pos;
268
269 /* True if the memory reference is always accessed when the loop is
270 entered. */
271 unsigned always_accessed : 1;
272} *dref;
273
274
275/* Type of the chain of the references. */
276
277enum chain_type
278{
279 /* The addresses of the references in the chain are constant. */
280 CT_INVARIANT,
281
282 /* There are only loads in the chain. */
283 CT_LOAD,
284
285 /* Root of the chain is store, the rest are loads. */
286 CT_STORE_LOAD,
287
288 /* There are only stores in the chain. */
289 CT_STORE_STORE,
290
291 /* A combination of two chains. */
292 CT_COMBINATION
293};
294
295/* Chains of data references. */
296
297typedef struct chain
298{
299 /* Type of the chain. */
300 enum chain_type type;
301
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
304 enum tree_code op;
305 tree rslt_type;
306 struct chain *ch1, *ch2;
307
308 /* The references in the chain. */
309 vec<dref> refs;
310
311 /* The maximum distance of the reference in the chain from the root. */
312 unsigned length;
313
314 /* The variables used to copy the value throughout iterations. */
315 vec<tree> vars;
316
317 /* Initializers for the variables. */
318 vec<tree> inits;
319
320 /* Finalizers for the eliminated stores. */
321 vec<tree> finis;
322
323 /* gimple stmts intializing the initial variables of the chain. */
324 gimple_seq init_seq;
325
326 /* gimple stmts finalizing the eliminated stores of the chain. */
327 gimple_seq fini_seq;
328
329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after : 1;
332
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed : 1;
335
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined : 1;
338
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination : 1;
342} *chain_p;
343
344
345/* Describes the knowledge about the step of the memory references in
346 the component. */
347
348enum ref_step_type
349{
350 /* The step is zero. */
351 RS_INVARIANT,
352
353 /* The step is nonzero. */
354 RS_NONZERO,
355
356 /* The step may or may not be nonzero. */
357 RS_ANY
358};
359
360/* Components of the data dependence graph. */
361
362struct component
363{
364 /* The references in the component. */
365 vec<dref> refs;
366
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step;
369
370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p;
373
374 /* Next component in the list. */
375 struct component *next;
376};
377
378/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
379
380static bitmap looparound_phis;
381
382/* Cache used by tree_to_aff_combination_expand. */
383
384static hash_map<tree, name_expansion *> *name_expansions;
385
386/* Dumps data reference REF to FILE. */
387
388extern void dump_dref (FILE *, dref);
389void
390dump_dref (FILE *file, dref ref)
391{
392 if (ref->ref)
393 {
394 fprintf (file, " ");
395 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396 fprintf (file, " (id %u%s)\n", ref->pos,
397 DR_IS_READ (ref->ref) ? "" : ", write");
398
399 fprintf (file, " offset ");
400 print_decs (ref->offset, file);
401 fprintf (file, "\n");
402
403 fprintf (file, " distance %u\n", ref->distance);
404 }
405 else
406 {
407 if (gimple_code (ref->stmt) == GIMPLE_PHI)
408 fprintf (file, " looparound ref\n");
409 else
410 fprintf (file, " combination ref\n");
411 fprintf (file, " in statement ");
412 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
413 fprintf (file, "\n");
414 fprintf (file, " distance %u\n", ref->distance);
415 }
416
417}
418
419/* Dumps CHAIN to FILE. */
420
421extern void dump_chain (FILE *, chain_p);
422void
423dump_chain (FILE *file, chain_p chain)
424{
425 dref a;
426 const char *chain_type;
427 unsigned i;
428 tree var;
429
430 switch (chain->type)
431 {
432 case CT_INVARIANT:
433 chain_type = "Load motion";
434 break;
435
436 case CT_LOAD:
437 chain_type = "Loads-only";
438 break;
439
440 case CT_STORE_LOAD:
441 chain_type = "Store-loads";
442 break;
443
444 case CT_STORE_STORE:
445 chain_type = "Store-stores";
446 break;
447
448 case CT_COMBINATION:
449 chain_type = "Combination";
450 break;
451
452 default:
453 gcc_unreachable ();
454 }
455
456 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 chain->combined ? " (combined)" : "");
458 if (chain->type != CT_INVARIANT)
459 fprintf (file, " max distance %u%s\n", chain->length,
460 chain->has_max_use_after ? "" : ", may reuse first");
461
462 if (chain->type == CT_COMBINATION)
463 {
464 fprintf (file, " equal to %p %s %p in type ",
465 (void *) chain->ch1, op_symbol_code (chain->op),
466 (void *) chain->ch2);
467 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468 fprintf (file, "\n");
469 }
470
471 if (chain->vars.exists ())
472 {
473 fprintf (file, " vars");
474 FOR_EACH_VEC_ELT (chain->vars, i, var)
475 {
476 fprintf (file, " ");
477 print_generic_expr (file, var, TDF_SLIM);
478 }
479 fprintf (file, "\n");
480 }
481
482 if (chain->inits.exists ())
483 {
484 fprintf (file, " inits");
485 FOR_EACH_VEC_ELT (chain->inits, i, var)
486 {
487 fprintf (file, " ");
488 print_generic_expr (file, var, TDF_SLIM);
489 }
490 fprintf (file, "\n");
491 }
492
493 fprintf (file, " references:\n");
494 FOR_EACH_VEC_ELT (chain->refs, i, a)
495 dump_dref (file, a);
496
497 fprintf (file, "\n");
498}
499
500/* Dumps CHAINS to FILE. */
501
502extern void dump_chains (FILE *, vec<chain_p> );
503void
504dump_chains (FILE *file, vec<chain_p> chains)
505{
506 chain_p chain;
507 unsigned i;
508
509 FOR_EACH_VEC_ELT (chains, i, chain)
510 dump_chain (file, chain);
511}
512
513/* Dumps COMP to FILE. */
514
515extern void dump_component (FILE *, struct component *);
516void
517dump_component (FILE *file, struct component *comp)
518{
519 dref a;
520 unsigned i;
521
522 fprintf (file, "Component%s:\n",
523 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
524 FOR_EACH_VEC_ELT (comp->refs, i, a)
525 dump_dref (file, a);
526 fprintf (file, "\n");
527}
528
529/* Dumps COMPS to FILE. */
530
531extern void dump_components (FILE *, struct component *);
532void
533dump_components (FILE *file, struct component *comps)
534{
535 struct component *comp;
536
537 for (comp = comps; comp; comp = comp->next)
538 dump_component (file, comp);
539}
540
541/* Frees a chain CHAIN. */
542
543static void
544release_chain (chain_p chain)
545{
546 dref ref;
547 unsigned i;
548
549 if (chain == NULL)
550 return;
551
552 FOR_EACH_VEC_ELT (chain->refs, i, ref)
553 free (ref);
554
555 chain->refs.release ();
556 chain->vars.release ();
557 chain->inits.release ();
558 if (chain->init_seq)
559 gimple_seq_discard (chain->init_seq);
560
561 chain->finis.release ();
562 if (chain->fini_seq)
563 gimple_seq_discard (chain->fini_seq);
564
565 free (chain);
566}
567
568/* Frees CHAINS. */
569
570static void
571release_chains (vec<chain_p> chains)
572{
573 unsigned i;
574 chain_p chain;
575
576 FOR_EACH_VEC_ELT (chains, i, chain)
577 release_chain (chain);
578 chains.release ();
579}
580
581/* Frees a component COMP. */
582
583static void
584release_component (struct component *comp)
585{
586 comp->refs.release ();
587 free (comp);
588}
589
590/* Frees list of components COMPS. */
591
592static void
593release_components (struct component *comps)
594{
595 struct component *act, *next;
596
597 for (act = comps; act; act = next)
598 {
599 next = act->next;
600 release_component (act);
601 }
602}
603
604/* Finds a root of tree given by FATHERS containing A, and performs path
605 shortening. */
606
607static unsigned
608component_of (unsigned fathers[], unsigned a)
609{
610 unsigned root, n;
611
612 for (root = a; root != fathers[root]; root = fathers[root])
613 continue;
614
615 for (; a != root; a = n)
616 {
617 n = fathers[a];
618 fathers[a] = root;
619 }
620
621 return root;
622}
623
624/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
626
627static void
628merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629{
630 unsigned ca = component_of (fathers, a);
631 unsigned cb = component_of (fathers, b);
632
633 if (ca == cb)
634 return;
635
636 if (sizes[ca] < sizes[cb])
637 {
638 sizes[cb] += sizes[ca];
639 fathers[ca] = cb;
640 }
641 else
642 {
643 sizes[ca] += sizes[cb];
644 fathers[cb] = ca;
645 }
646}
647
648/* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
651
652static bool
653suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654{
655 tree ref = DR_REF (a), step = DR_STEP (a);
656
657 if (!step
658 || TREE_THIS_VOLATILE (ref)
659 || !is_gimple_reg_type (TREE_TYPE (ref))
660 || tree_could_throw_p (ref))
661 return false;
662
663 if (integer_zerop (step))
664 *ref_step = RS_INVARIANT;
665 else if (integer_nonzerop (step))
666 *ref_step = RS_NONZERO;
667 else
668 *ref_step = RS_ANY;
669
670 return true;
671}
672
673/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
674
675static void
676aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677{
678 tree type = TREE_TYPE (DR_OFFSET (dr));
679 aff_tree delta;
680
681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
682 &name_expansions);
683 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
684 aff_combination_add (offset, &delta);
685}
686
687/* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
692
693static bool
694determine_offset (struct data_reference *a, struct data_reference *b,
695 widest_int *off)
696{
697 aff_tree diff, baseb, step;
698 tree typea, typeb;
699
700 /* Check that both the references access the location in the same type. */
701 typea = TREE_TYPE (DR_REF (a));
702 typeb = TREE_TYPE (DR_REF (b));
703 if (!useless_type_conversion_p (typeb, typea))
704 return false;
705
706 /* Check whether the base address and the step of both references is the
707 same. */
708 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710 return false;
711
712 if (integer_zerop (DR_STEP (a)))
713 {
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
716 *off = 0;
717 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719 }
720
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a, &diff);
724 aff_combination_dr_offset (b, &baseb);
725 aff_combination_scale (&baseb, -1);
726 aff_combination_add (&diff, &baseb);
727
728 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
729 &step, &name_expansions);
730 return aff_combination_constant_multiple_p (&diff, &step, off);
731}
732
733/* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
735
736static basic_block
737last_always_executed_block (struct loop *loop)
738{
739 unsigned i;
740 vec<edge> exits = get_loop_exit_edges (loop);
741 edge ex;
742 basic_block last = loop->latch;
743
744 FOR_EACH_VEC_ELT (exits, i, ex)
745 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
746 exits.release ();
747
748 return last;
749}
750
751/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
752
753static struct component *
754split_data_refs_to_components (struct loop *loop,
755 vec<data_reference_p> datarefs,
756 vec<ddr_p> depends)
757{
758 unsigned i, n = datarefs.length ();
759 unsigned ca, ia, ib, bad;
760 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762 struct component **comps;
763 struct data_reference *dr, *dra, *drb;
764 struct data_dependence_relation *ddr;
765 struct component *comp_list = NULL, *comp;
766 dref dataref;
767 /* Don't do store elimination if loop has multiple exit edges. */
768 bool eliminate_store_p = single_exit (loop) != NULL;
769 basic_block last_always_executed = last_always_executed_block (loop);
770
771 FOR_EACH_VEC_ELT (datarefs, i, dr)
772 {
773 if (!DR_REF (dr))
774 {
775 /* A fake reference for call or asm_expr that may clobber memory;
776 just fail. */
777 goto end;
778 }
779 /* predcom pass isn't prepared to handle calls with data references. */
780 if (is_gimple_call (DR_STMT (dr)))
781 goto end;
782 dr->aux = (void *) (size_t) i;
783 comp_father[i] = i;
784 comp_size[i] = 1;
785 }
786
787 /* A component reserved for the "bad" data references. */
788 comp_father[n] = n;
789 comp_size[n] = 1;
790
791 FOR_EACH_VEC_ELT (datarefs, i, dr)
792 {
793 enum ref_step_type dummy;
794
795 if (!suitable_reference_p (dr, &dummy))
796 {
797 ia = (unsigned) (size_t) dr->aux;
798 merge_comps (comp_father, comp_size, n, ia);
799 }
800 }
801
802 FOR_EACH_VEC_ELT (depends, i, ddr)
803 {
804 widest_int dummy_off;
805
806 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
807 continue;
808
809 dra = DDR_A (ddr);
810 drb = DDR_B (ddr);
811
812 /* Don't do store elimination if there is any unknown dependence for
813 any store data reference. */
814 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
815 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
816 || DDR_NUM_DIST_VECTS (ddr) == 0))
817 eliminate_store_p = false;
818
819 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
820 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
821 if (ia == ib)
822 continue;
823
824 bad = component_of (comp_father, n);
825
826 /* If both A and B are reads, we may ignore unsuitable dependences. */
827 if (DR_IS_READ (dra) && DR_IS_READ (drb))
828 {
829 if (ia == bad || ib == bad
830 || !determine_offset (dra, drb, &dummy_off))
831 continue;
832 }
833 /* If A is read and B write or vice versa and there is unsuitable
834 dependence, instead of merging both components into a component
835 that will certainly not pass suitable_component_p, just put the
836 read into bad component, perhaps at least the write together with
837 all the other data refs in it's component will be optimizable. */
838 else if (DR_IS_READ (dra) && ib != bad)
839 {
840 if (ia == bad)
841 continue;
842 else if (!determine_offset (dra, drb, &dummy_off))
843 {
844 merge_comps (comp_father, comp_size, bad, ia);
845 continue;
846 }
847 }
848 else if (DR_IS_READ (drb) && ia != bad)
849 {
850 if (ib == bad)
851 continue;
852 else if (!determine_offset (dra, drb, &dummy_off))
853 {
854 merge_comps (comp_father, comp_size, bad, ib);
855 continue;
856 }
857 }
858 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
859 && ia != bad && ib != bad
860 && !determine_offset (dra, drb, &dummy_off))
861 {
862 merge_comps (comp_father, comp_size, bad, ia);
863 merge_comps (comp_father, comp_size, bad, ib);
864 continue;
865 }
866
867 merge_comps (comp_father, comp_size, ia, ib);
868 }
869
870 if (eliminate_store_p)
871 {
872 tree niters = number_of_latch_executions (loop);
873
874 /* Don't do store elimination if niters info is unknown because stores
875 in the last iteration can't be eliminated and we need to recover it
876 after loop. */
877 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
878 }
879
880 comps = XCNEWVEC (struct component *, n);
881 bad = component_of (comp_father, n);
882 FOR_EACH_VEC_ELT (datarefs, i, dr)
883 {
884 ia = (unsigned) (size_t) dr->aux;
885 ca = component_of (comp_father, ia);
886 if (ca == bad)
887 continue;
888
889 comp = comps[ca];
890 if (!comp)
891 {
892 comp = XCNEW (struct component);
893 comp->refs.create (comp_size[ca]);
894 comp->eliminate_store_p = eliminate_store_p;
895 comps[ca] = comp;
896 }
897
898 dataref = XCNEW (struct dref_d);
899 dataref->ref = dr;
900 dataref->stmt = DR_STMT (dr);
901 dataref->offset = 0;
902 dataref->distance = 0;
903
904 dataref->always_accessed
905 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
906 gimple_bb (dataref->stmt));
907 dataref->pos = comp->refs.length ();
908 comp->refs.quick_push (dataref);
909 }
910
911 for (i = 0; i < n; i++)
912 {
913 comp = comps[i];
914 if (comp)
915 {
916 comp->next = comp_list;
917 comp_list = comp;
918 }
919 }
920 free (comps);
921
922end:
923 free (comp_father);
924 free (comp_size);
925 return comp_list;
926}
927
928/* Returns true if the component COMP satisfies the conditions
929 described in 2) at the beginning of this file. LOOP is the current
930 loop. */
931
932static bool
933suitable_component_p (struct loop *loop, struct component *comp)
934{
935 unsigned i;
936 dref a, first;
937 basic_block ba, bp = loop->header;
938 bool ok, has_write = false;
939
940 FOR_EACH_VEC_ELT (comp->refs, i, a)
941 {
942 ba = gimple_bb (a->stmt);
943
944 if (!just_once_each_iteration_p (loop, ba))
945 return false;
946
947 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
948 bp = ba;
949
950 if (DR_IS_WRITE (a->ref))
951 has_write = true;
952 }
953
954 first = comp->refs[0];
955 ok = suitable_reference_p (first->ref, &comp->comp_step);
956 gcc_assert (ok);
957 first->offset = 0;
958
959 for (i = 1; comp->refs.iterate (i, &a); i++)
960 {
961 if (!determine_offset (first->ref, a->ref, &a->offset))
962 return false;
963
964 enum ref_step_type a_step;
965 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
966 && a_step == comp->comp_step);
967 }
968
969 /* If there is a write inside the component, we must know whether the
970 step is nonzero or not -- we would not otherwise be able to recognize
971 whether the value accessed by reads comes from the OFFSET-th iteration
972 or the previous one. */
973 if (has_write && comp->comp_step == RS_ANY)
974 return false;
975
976 return true;
977}
978
979/* Check the conditions on references inside each of components COMPS,
980 and remove the unsuitable components from the list. The new list
981 of components is returned. The conditions are described in 2) at
982 the beginning of this file. LOOP is the current loop. */
983
984static struct component *
985filter_suitable_components (struct loop *loop, struct component *comps)
986{
987 struct component **comp, *act;
988
989 for (comp = &comps; *comp; )
990 {
991 act = *comp;
992 if (suitable_component_p (loop, act))
993 comp = &act->next;
994 else
995 {
996 dref ref;
997 unsigned i;
998
999 *comp = act->next;
1000 FOR_EACH_VEC_ELT (act->refs, i, ref)
1001 free (ref);
1002 release_component (act);
1003 }
1004 }
1005
1006 return comps;
1007}
1008
1009/* Compares two drefs A and B by their offset and position. Callback for
1010 qsort. */
1011
1012static int
1013order_drefs (const void *a, const void *b)
1014{
1015 const dref *const da = (const dref *) a;
1016 const dref *const db = (const dref *) b;
1017 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1018
1019 if (offcmp != 0)
1020 return offcmp;
1021
1022 return (*da)->pos - (*db)->pos;
1023}
1024
1025/* Compares two drefs A and B by their position. Callback for qsort. */
1026
1027static int
1028order_drefs_by_pos (const void *a, const void *b)
1029{
1030 const dref *const da = (const dref *) a;
1031 const dref *const db = (const dref *) b;
1032
1033 return (*da)->pos - (*db)->pos;
1034}
1035
1036/* Returns root of the CHAIN. */
1037
1038static inline dref
1039get_chain_root (chain_p chain)
1040{
1041 return chain->refs[0];
1042}
1043
1044/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1045 exist. */
1046
1047static inline dref
1048get_chain_last_write_at (chain_p chain, unsigned distance)
1049{
1050 for (unsigned i = chain->refs.length (); i > 0; i--)
1051 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1052 && distance == chain->refs[i - 1]->distance)
1053 return chain->refs[i - 1];
1054
1055 return NULL;
1056}
1057
1058/* Given CHAIN, returns the last write ref with the same distance before load
1059 at index LOAD_IDX, or NULL if it doesn't exist. */
1060
1061static inline dref
1062get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1063{
1064 gcc_assert (load_idx < chain->refs.length ());
1065
1066 unsigned distance = chain->refs[load_idx]->distance;
1067
1068 for (unsigned i = load_idx; i > 0; i--)
1069 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1070 && distance == chain->refs[i - 1]->distance)
1071 return chain->refs[i - 1];
1072
1073 return NULL;
1074}
1075
1076/* Adds REF to the chain CHAIN. */
1077
1078static void
1079add_ref_to_chain (chain_p chain, dref ref)
1080{
1081 dref root = get_chain_root (chain);
1082
1083 gcc_assert (wi::les_p (root->offset, ref->offset));
1084 widest_int dist = ref->offset - root->offset;
1085 gcc_assert (wi::fits_uhwi_p (dist));
1086
1087 chain->refs.safe_push (ref);
1088
1089 ref->distance = dist.to_uhwi ();
1090
1091 if (ref->distance >= chain->length)
1092 {
1093 chain->length = ref->distance;
1094 chain->has_max_use_after = false;
1095 }
1096
1097 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1098 if (DR_IS_WRITE (ref->ref))
1099 chain->type = CT_STORE_STORE;
1100
1101 /* Don't set the flag for store-store chain since there is no use. */
1102 if (chain->type != CT_STORE_STORE
1103 && ref->distance == chain->length
1104 && ref->pos > root->pos)
1105 chain->has_max_use_after = true;
1106
1107 chain->all_always_accessed &= ref->always_accessed;
1108}
1109
1110/* Returns the chain for invariant component COMP. */
1111
1112static chain_p
1113make_invariant_chain (struct component *comp)
1114{
1115 chain_p chain = XCNEW (struct chain);
1116 unsigned i;
1117 dref ref;
1118
1119 chain->type = CT_INVARIANT;
1120
1121 chain->all_always_accessed = true;
1122
1123 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1124 {
1125 chain->refs.safe_push (ref);
1126 chain->all_always_accessed &= ref->always_accessed;
1127 }
1128
1129 chain->inits = vNULL;
1130 chain->finis = vNULL;
1131
1132 return chain;
1133}
1134
1135/* Make a new chain of type TYPE rooted at REF. */
1136
1137static chain_p
1138make_rooted_chain (dref ref, enum chain_type type)
1139{
1140 chain_p chain = XCNEW (struct chain);
1141
1142 chain->type = type;
1143 chain->refs.safe_push (ref);
1144 chain->all_always_accessed = ref->always_accessed;
1145 ref->distance = 0;
1146
1147 chain->inits = vNULL;
1148 chain->finis = vNULL;
1149
1150 return chain;
1151}
1152
1153/* Returns true if CHAIN is not trivial. */
1154
1155static bool
1156nontrivial_chain_p (chain_p chain)
1157{
1158 return chain != NULL && chain->refs.length () > 1;
1159}
1160
1161/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1162 is no such name. */
1163
1164static tree
1165name_for_ref (dref ref)
1166{
1167 tree name;
1168
1169 if (is_gimple_assign (ref->stmt))
1170 {
1171 if (!ref->ref || DR_IS_READ (ref->ref))
1172 name = gimple_assign_lhs (ref->stmt);
1173 else
1174 name = gimple_assign_rhs1 (ref->stmt);
1175 }
1176 else
1177 name = PHI_RESULT (ref->stmt);
1178
1179 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1180}
1181
1182/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1183 iterations of the innermost enclosing loop). */
1184
1185static bool
1186valid_initializer_p (struct data_reference *ref,
1187 unsigned distance, struct data_reference *root)
1188{
1189 aff_tree diff, base, step;
1190 widest_int off;
1191
1192 /* Both REF and ROOT must be accessing the same object. */
1193 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1194 return false;
1195
1196 /* The initializer is defined outside of loop, hence its address must be
1197 invariant inside the loop. */
1198 gcc_assert (integer_zerop (DR_STEP (ref)));
1199
1200 /* If the address of the reference is invariant, initializer must access
1201 exactly the same location. */
1202 if (integer_zerop (DR_STEP (root)))
1203 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1204 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1205
1206 /* Verify that this index of REF is equal to the root's index at
1207 -DISTANCE-th iteration. */
1208 aff_combination_dr_offset (root, &diff);
1209 aff_combination_dr_offset (ref, &base);
1210 aff_combination_scale (&base, -1);
1211 aff_combination_add (&diff, &base);
1212
1213 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1214 &step, &name_expansions);
1215 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1216 return false;
1217
1218 if (off != distance)
1219 return false;
1220
1221 return true;
1222}
1223
1224/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1225 initial value is correct (equal to initial value of REF shifted by one
1226 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1227 is the root of the current chain. */
1228
1229static gphi *
1230find_looparound_phi (struct loop *loop, dref ref, dref root)
1231{
1232 tree name, init, init_ref;
1233 gphi *phi = NULL;
1234 gimple *init_stmt;
1235 edge latch = loop_latch_edge (loop);
1236 struct data_reference init_dr;
1237 gphi_iterator psi;
1238
1239 if (is_gimple_assign (ref->stmt))
1240 {
1241 if (DR_IS_READ (ref->ref))
1242 name = gimple_assign_lhs (ref->stmt);
1243 else
1244 name = gimple_assign_rhs1 (ref->stmt);
1245 }
1246 else
1247 name = PHI_RESULT (ref->stmt);
1248 if (!name)
1249 return NULL;
1250
1251 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1252 {
1253 phi = psi.phi ();
1254 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1255 break;
1256 }
1257
1258 if (gsi_end_p (psi))
1259 return NULL;
1260
1261 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1262 if (TREE_CODE (init) != SSA_NAME)
1263 return NULL;
1264 init_stmt = SSA_NAME_DEF_STMT (init);
1265 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1266 return NULL;
1267 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1268
1269 init_ref = gimple_assign_rhs1 (init_stmt);
1270 if (!REFERENCE_CLASS_P (init_ref)
1271 && !DECL_P (init_ref))
1272 return NULL;
1273
1274 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1275 loop enclosing PHI). */
1276 memset (&init_dr, 0, sizeof (struct data_reference));
1277 DR_REF (&init_dr) = init_ref;
1278 DR_STMT (&init_dr) = phi;
1279 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
1280 return NULL;
1281
1282 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1283 return NULL;
1284
1285 return phi;
1286}
1287
1288/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1289
1290static void
1291insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1292{
1293 dref nw = XCNEW (struct dref_d), aref;
1294 unsigned i;
1295
1296 nw->stmt = phi;
1297 nw->distance = ref->distance + 1;
1298 nw->always_accessed = 1;
1299
1300 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1301 if (aref->distance >= nw->distance)
1302 break;
1303 chain->refs.safe_insert (i, nw);
1304
1305 if (nw->distance > chain->length)
1306 {
1307 chain->length = nw->distance;
1308 chain->has_max_use_after = false;
1309 }
1310}
1311
1312/* For references in CHAIN that are copied around the LOOP (created previously
1313 by PRE, or by user), add the results of such copies to the chain. This
1314 enables us to remove the copies by unrolling, and may need less registers
1315 (also, it may allow us to combine chains together). */
1316
1317static void
1318add_looparound_copies (struct loop *loop, chain_p chain)
1319{
1320 unsigned i;
1321 dref ref, root = get_chain_root (chain);
1322 gphi *phi;
1323
1324 if (chain->type == CT_STORE_STORE)
1325 return;
1326
1327 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1328 {
1329 phi = find_looparound_phi (loop, ref, root);
1330 if (!phi)
1331 continue;
1332
1333 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1334 insert_looparound_copy (chain, ref, phi);
1335 }
1336}
1337
1338/* Find roots of the values and determine distances in the component COMP.
1339 The references are redistributed into CHAINS. LOOP is the current
1340 loop. */
1341
1342static void
1343determine_roots_comp (struct loop *loop,
1344 struct component *comp,
1345 vec<chain_p> *chains)
1346{
1347 unsigned i;
1348 dref a;
1349 chain_p chain = NULL;
1350 widest_int last_ofs = 0;
1351 enum chain_type type;
1352
1353 /* Invariants are handled specially. */
1354 if (comp->comp_step == RS_INVARIANT)
1355 {
1356 chain = make_invariant_chain (comp);
1357 chains->safe_push (chain);
1358 return;
1359 }
1360
1361 /* Trivial component. */
1362 if (comp->refs.length () <= 1)
1363 {
1364 if (comp->refs.length () == 1)
1365 {
1366 free (comp->refs[0]);
1367 comp->refs.truncate (0);
1368 }
1369 return;
1370 }
1371
1372 comp->refs.qsort (order_drefs);
1373
1374 /* For Store-Store chain, we only support load if it is dominated by a
1375 store statement in the same iteration of loop. */
1376 if (comp->eliminate_store_p)
1377 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1378 {
1379 if (DR_IS_WRITE (comp->refs[i]->ref))
1380 a = comp->refs[i];
1381 else if (a == NULL || a->offset != comp->refs[i]->offset)
1382 {
1383 /* If there is load that is not dominated by a store in the
1384 same iteration of loop, clear the flag so no Store-Store
1385 chain is generated for this component. */
1386 comp->eliminate_store_p = false;
1387 break;
1388 }
1389 }
1390
1391 /* Determine roots and create chains for components. */
1392 FOR_EACH_VEC_ELT (comp->refs, i, a)
1393 {
1394 if (!chain
1395 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1396 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1397 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1398 {
1399 if (nontrivial_chain_p (chain))
1400 {
1401 add_looparound_copies (loop, chain);
1402 chains->safe_push (chain);
1403 }
1404 else
1405 release_chain (chain);
1406
1407 /* Determine type of the chain. If the root reference is a load,
1408 this can only be a CT_LOAD chain; other chains are intialized
1409 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1410 new reference is added. */
1411 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1412 chain = make_rooted_chain (a, type);
1413 last_ofs = a->offset;
1414 continue;
1415 }
1416
1417 add_ref_to_chain (chain, a);
1418 }
1419
1420 if (nontrivial_chain_p (chain))
1421 {
1422 add_looparound_copies (loop, chain);
1423 chains->safe_push (chain);
1424 }
1425 else
1426 release_chain (chain);
1427}
1428
1429/* Find roots of the values and determine distances in components COMPS, and
1430 separates the references to CHAINS. LOOP is the current loop. */
1431
1432static void
1433determine_roots (struct loop *loop,
1434 struct component *comps, vec<chain_p> *chains)
1435{
1436 struct component *comp;
1437
1438 for (comp = comps; comp; comp = comp->next)
1439 determine_roots_comp (loop, comp, chains);
1440}
1441
1442/* Replace the reference in statement STMT with temporary variable
1443 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1444 the reference in the statement. IN_LHS is true if the reference
1445 is in the lhs of STMT, false if it is in rhs. */
1446
1447static void
1448replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1449{
1450 tree val;
1451 gassign *new_stmt;
1452 gimple_stmt_iterator bsi, psi;
1453
1454 if (gimple_code (stmt) == GIMPLE_PHI)
1455 {
1456 gcc_assert (!in_lhs && !set);
1457
1458 val = PHI_RESULT (stmt);
1459 bsi = gsi_after_labels (gimple_bb (stmt));
1460 psi = gsi_for_stmt (stmt);
1461 remove_phi_node (&psi, false);
1462
1463 /* Turn the phi node into GIMPLE_ASSIGN. */
1464 new_stmt = gimple_build_assign (val, new_tree);
1465 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1466 return;
1467 }
1468
1469 /* Since the reference is of gimple_reg type, it should only
1470 appear as lhs or rhs of modify statement. */
1471 gcc_assert (is_gimple_assign (stmt));
1472
1473 bsi = gsi_for_stmt (stmt);
1474
1475 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1476 if (!set)
1477 {
1478 gcc_assert (!in_lhs);
1479 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1480 stmt = gsi_stmt (bsi);
1481 update_stmt (stmt);
1482 return;
1483 }
1484
1485 if (in_lhs)
1486 {
1487 /* We have statement
1488
1489 OLD = VAL
1490
1491 If OLD is a memory reference, then VAL is gimple_val, and we transform
1492 this to
1493
1494 OLD = VAL
1495 NEW = VAL
1496
1497 Otherwise, we are replacing a combination chain,
1498 VAL is the expression that performs the combination, and OLD is an
1499 SSA name. In this case, we transform the assignment to
1500
1501 OLD = VAL
1502 NEW = OLD
1503
1504 */
1505
1506 val = gimple_assign_lhs (stmt);
1507 if (TREE_CODE (val) != SSA_NAME)
1508 {
1509 val = gimple_assign_rhs1 (stmt);
1510 gcc_assert (gimple_assign_single_p (stmt));
1511 if (TREE_CLOBBER_P (val))
1512 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1513 else
1514 gcc_assert (gimple_assign_copy_p (stmt));
1515 }
1516 }
1517 else
1518 {
1519 /* VAL = OLD
1520
1521 is transformed to
1522
1523 VAL = OLD
1524 NEW = VAL */
1525
1526 val = gimple_assign_lhs (stmt);
1527 }
1528
1529 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1530 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1531}
1532
1533/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1534 of the loop it was analyzed in. Append init stmts to STMTS. */
1535
1536static tree
1537ref_at_iteration (data_reference_p dr, int iter,
1538 gimple_seq *stmts, tree niters = NULL_TREE)
1539{
1540 tree off = DR_OFFSET (dr);
1541 tree coff = DR_INIT (dr);
1542 tree ref = DR_REF (dr);
1543 enum tree_code ref_code = ERROR_MARK;
1544 tree ref_type = NULL_TREE;
1545 tree ref_op1 = NULL_TREE;
1546 tree ref_op2 = NULL_TREE;
1547 tree new_offset;
1548
1549 if (iter != 0)
1550 {
1551 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1552 if (TREE_CODE (new_offset) == INTEGER_CST)
1553 coff = size_binop (PLUS_EXPR, coff, new_offset);
1554 else
1555 off = size_binop (PLUS_EXPR, off, new_offset);
1556 }
1557
1558 if (niters != NULL_TREE)
1559 {
1560 niters = fold_convert (ssizetype, niters);
1561 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1562 if (TREE_CODE (niters) == INTEGER_CST)
1563 coff = size_binop (PLUS_EXPR, coff, new_offset);
1564 else
1565 off = size_binop (PLUS_EXPR, off, new_offset);
1566 }
1567
1568 /* While data-ref analysis punts on bit offsets it still handles
1569 bitfield accesses at byte boundaries. Cope with that. Note that
1570 if the bitfield object also starts at a byte-boundary we can simply
1571 replicate the COMPONENT_REF, but we have to subtract the component's
1572 byte-offset from the MEM_REF address first.
1573 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1574 start at offset zero. */
1575 if (TREE_CODE (ref) == COMPONENT_REF
1576 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1577 {
1578 unsigned HOST_WIDE_INT boff;
1579 tree field = TREE_OPERAND (ref, 1);
1580 tree offset = component_ref_field_offset (ref);
1581 ref_type = TREE_TYPE (ref);
1582 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1583 /* This can occur in Ada. See the comment in get_bit_range. */
1584 if (boff % BITS_PER_UNIT != 0
1585 || !tree_fits_uhwi_p (offset))
1586 {
1587 ref_code = BIT_FIELD_REF;
1588 ref_op1 = DECL_SIZE (field);
1589 ref_op2 = bitsize_zero_node;
1590 }
1591 else
1592 {
1593 boff >>= LOG2_BITS_PER_UNIT;
1594 boff += tree_to_uhwi (offset);
1595 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1596 ref_code = COMPONENT_REF;
1597 ref_op1 = field;
1598 ref_op2 = TREE_OPERAND (ref, 2);
1599 ref = TREE_OPERAND (ref, 0);
1600 }
1601 }
1602 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1603 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1604 is_gimple_mem_ref_addr, NULL_TREE);
1605 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1606 tree type = build_aligned_type (TREE_TYPE (ref),
1607 get_object_alignment (ref));
1608 ref = build2 (MEM_REF, type, addr, alias_ptr);
1609 if (ref_type)
1610 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1611 return ref;
1612}
1613
1614/* Get the initialization expression for the INDEX-th temporary variable
1615 of CHAIN. */
1616
1617static tree
1618get_init_expr (chain_p chain, unsigned index)
1619{
1620 if (chain->type == CT_COMBINATION)
1621 {
1622 tree e1 = get_init_expr (chain->ch1, index);
1623 tree e2 = get_init_expr (chain->ch2, index);
1624
1625 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1626 }
1627 else
1628 return chain->inits[index];
1629}
1630
1631/* Returns a new temporary variable used for the I-th variable carrying
1632 value of REF. The variable's uid is marked in TMP_VARS. */
1633
1634static tree
1635predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1636{
1637 tree type = TREE_TYPE (ref);
1638 /* We never access the components of the temporary variable in predictive
1639 commoning. */
1640 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1641 bitmap_set_bit (tmp_vars, DECL_UID (var));
1642 return var;
1643}
1644
1645/* Creates the variables for CHAIN, as well as phi nodes for them and
1646 initialization on entry to LOOP. Uids of the newly created
1647 temporary variables are marked in TMP_VARS. */
1648
1649static void
1650initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1651{
1652 unsigned i;
1653 unsigned n = chain->length;
1654 dref root = get_chain_root (chain);
1655 bool reuse_first = !chain->has_max_use_after;
1656 tree ref, init, var, next;
1657 gphi *phi;
1658 gimple_seq stmts;
1659 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1660
1661 /* If N == 0, then all the references are within the single iteration. And
1662 since this is an nonempty chain, reuse_first cannot be true. */
1663 gcc_assert (n > 0 || !reuse_first);
1664
1665 chain->vars.create (n + 1);
1666
1667 if (chain->type == CT_COMBINATION)
1668 ref = gimple_assign_lhs (root->stmt);
1669 else
1670 ref = DR_REF (root->ref);
1671
1672 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1673 {
1674 var = predcom_tmp_var (ref, i, tmp_vars);
1675 chain->vars.quick_push (var);
1676 }
1677 if (reuse_first)
1678 chain->vars.quick_push (chain->vars[0]);
1679
1680 FOR_EACH_VEC_ELT (chain->vars, i, var)
1681 chain->vars[i] = make_ssa_name (var);
1682
1683 for (i = 0; i < n; i++)
1684 {
1685 var = chain->vars[i];
1686 next = chain->vars[i + 1];
1687 init = get_init_expr (chain, i);
1688
1689 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1690 if (stmts)
1691 gsi_insert_seq_on_edge_immediate (entry, stmts);
1692
1693 phi = create_phi_node (var, loop->header);
1694 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1695 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1696 }
1697}
1698
1699/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1700 all stores to be eliminated store loop invariant values into memory.
1701 In this case, we can use these invariant values directly after LOOP. */
1702
1703static bool
1704is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
1705{
1706 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1707 return false;
1708
1709 gcc_assert (!chain->has_max_use_after);
1710
1711 /* If loop iterates for unknown times or fewer times than chain->lenght,
1712 we still need to setup root variable and propagate it with PHI node. */
1713 tree niters = number_of_latch_executions (loop);
1714 if (TREE_CODE (niters) != INTEGER_CST
1715 || wi::leu_p (wi::to_wide (niters), chain->length))
1716 return false;
1717
1718 /* Check stores in chain for elimination if they only store loop invariant
1719 values. */
1720 for (unsigned i = 0; i < chain->length; i++)
1721 {
1722 dref a = get_chain_last_write_at (chain, i);
1723 if (a == NULL)
1724 continue;
1725
1726 gimple *def_stmt, *stmt = a->stmt;
1727 if (!gimple_assign_single_p (stmt))
1728 return false;
1729
1730 tree val = gimple_assign_rhs1 (stmt);
1731 if (TREE_CLOBBER_P (val))
1732 return false;
1733
1734 if (CONSTANT_CLASS_P (val))
1735 continue;
1736
1737 if (TREE_CODE (val) != SSA_NAME)
1738 return false;
1739
1740 def_stmt = SSA_NAME_DEF_STMT (val);
1741 if (gimple_nop_p (def_stmt))
1742 continue;
1743
1744 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1745 return false;
1746 }
1747 return true;
1748}
1749
1750/* Creates root variables for store elimination CHAIN in which stores for
1751 elimination only store loop invariant values. In this case, we neither
1752 need to load root variables before loop nor propagate it with PHI nodes. */
1753
1754static void
1755initialize_root_vars_store_elim_1 (chain_p chain)
1756{
1757 tree var;
1758 unsigned i, n = chain->length;
1759
1760 chain->vars.create (n);
1761 chain->vars.safe_grow_cleared (n);
1762
1763 /* Initialize root value for eliminated stores at each distance. */
1764 for (i = 0; i < n; i++)
1765 {
1766 dref a = get_chain_last_write_at (chain, i);
1767 if (a == NULL)
1768 continue;
1769
1770 var = gimple_assign_rhs1 (a->stmt);
1771 chain->vars[a->distance] = var;
1772 }
1773
1774 /* We don't propagate values with PHI nodes, so manually propagate value
1775 to bubble positions. */
1776 var = chain->vars[0];
1777 for (i = 1; i < n; i++)
1778 {
1779 if (chain->vars[i] != NULL_TREE)
1780 {
1781 var = chain->vars[i];
1782 continue;
1783 }
1784 chain->vars[i] = var;
1785 }
1786
1787 /* Revert the vector. */
1788 for (i = 0; i < n / 2; i++)
1789 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1790}
1791
1792/* Creates root variables for store elimination CHAIN in which stores for
1793 elimination store loop variant values. In this case, we may need to
1794 load root variables before LOOP and propagate it with PHI nodes. Uids
1795 of the newly created root variables are marked in TMP_VARS. */
1796
1797static void
1798initialize_root_vars_store_elim_2 (struct loop *loop,
1799 chain_p chain, bitmap tmp_vars)
1800{
1801 unsigned i, n = chain->length;
1802 tree ref, init, var, next, val, phi_result;
1803 gimple *stmt;
1804 gimple_seq stmts;
1805
1806 chain->vars.create (n);
1807
1808 ref = DR_REF (get_chain_root (chain)->ref);
1809 for (i = 0; i < n; i++)
1810 {
1811 var = predcom_tmp_var (ref, i, tmp_vars);
1812 chain->vars.quick_push (var);
1813 }
1814
1815 FOR_EACH_VEC_ELT (chain->vars, i, var)
1816 chain->vars[i] = make_ssa_name (var);
1817
1818 /* Root values are either rhs operand of stores to be eliminated, or
1819 loaded from memory before loop. */
1820 auto_vec<tree> vtemps;
1821 vtemps.safe_grow_cleared (n);
1822 for (i = 0; i < n; i++)
1823 {
1824 init = get_init_expr (chain, i);
1825 if (init == NULL_TREE)
1826 {
1827 /* Root value is rhs operand of the store to be eliminated if
1828 it isn't loaded from memory before loop. */
1829 dref a = get_chain_last_write_at (chain, i);
1830 val = gimple_assign_rhs1 (a->stmt);
1831 if (TREE_CLOBBER_P (val))
1832 {
1833 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1834 gimple_assign_set_rhs1 (a->stmt, val);
1835 }
1836
1837 vtemps[n - i - 1] = val;
1838 }
1839 else
1840 {
1841 edge latch = loop_latch_edge (loop);
1842 edge entry = loop_preheader_edge (loop);
1843
1844 /* Root value is loaded from memory before loop, we also need
1845 to add PHI nodes to propagate the value across iterations. */
1846 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1847 if (stmts)
1848 gsi_insert_seq_on_edge_immediate (entry, stmts);
1849
1850 next = chain->vars[n - i];
1851 phi_result = copy_ssa_name (next);
1852 gphi *phi = create_phi_node (phi_result, loop->header);
1853 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1854 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1855 vtemps[n - i - 1] = phi_result;
1856 }
1857 }
1858
1859 /* Find the insertion position. */
1860 dref last = get_chain_root (chain);
1861 for (i = 0; i < chain->refs.length (); i++)
1862 {
1863 if (chain->refs[i]->pos > last->pos)
1864 last = chain->refs[i];
1865 }
1866
1867 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1868
1869 /* Insert statements copying root value to root variable. */
1870 for (i = 0; i < n; i++)
1871 {
1872 var = chain->vars[i];
1873 val = vtemps[i];
1874 stmt = gimple_build_assign (var, val);
1875 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1876 }
1877}
1878
1879/* Generates stores for CHAIN's eliminated stores in LOOP's last
1880 (CHAIN->length - 1) iterations. */
1881
1882static void
1883finalize_eliminated_stores (struct loop *loop, chain_p chain)
1884{
1885 unsigned i, n = chain->length;
1886
1887 for (i = 0; i < n; i++)
1888 {
1889 tree var = chain->vars[i];
1890 tree fini = chain->finis[n - i - 1];
1891 gimple *stmt = gimple_build_assign (fini, var);
1892
1893 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1894 }
1895
1896 if (chain->fini_seq)
1897 {
1898 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1899 chain->fini_seq = NULL;
1900 }
1901}
1902
1903/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1904 initialization on entry to LOOP if necessary. The ssa name for the variable
1905 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1906 around the loop is created. Uid of the newly created temporary variable
1907 is marked in TMP_VARS. INITS is the list containing the (single)
1908 initializer. */
1909
1910static void
1911initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1912 vec<tree> *vars, vec<tree> inits,
1913 bitmap tmp_vars)
1914{
1915 unsigned i;
1916 tree ref = DR_REF (root->ref), init, var, next;
1917 gimple_seq stmts;
1918 gphi *phi;
1919 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1920
1921 /* Find the initializer for the variable, and check that it cannot
1922 trap. */
1923 init = inits[0];
1924
1925 vars->create (written ? 2 : 1);
1926 var = predcom_tmp_var (ref, 0, tmp_vars);
1927 vars->quick_push (var);
1928 if (written)
1929 vars->quick_push ((*vars)[0]);
1930
1931 FOR_EACH_VEC_ELT (*vars, i, var)
1932 (*vars)[i] = make_ssa_name (var);
1933
1934 var = (*vars)[0];
1935
1936 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1937 if (stmts)
1938 gsi_insert_seq_on_edge_immediate (entry, stmts);
1939
1940 if (written)
1941 {
1942 next = (*vars)[1];
1943 phi = create_phi_node (var, loop->header);
1944 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1945 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1946 }
1947 else
1948 {
1949 gassign *init_stmt = gimple_build_assign (var, init);
1950 gsi_insert_on_edge_immediate (entry, init_stmt);
1951 }
1952}
1953
1954
1955/* Execute load motion for references in chain CHAIN. Uids of the newly
1956 created temporary variables are marked in TMP_VARS. */
1957
1958static void
1959execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1960{
1961 auto_vec<tree> vars;
1962 dref a;
1963 unsigned n_writes = 0, ridx, i;
1964 tree var;
1965
1966 gcc_assert (chain->type == CT_INVARIANT);
1967 gcc_assert (!chain->combined);
1968 FOR_EACH_VEC_ELT (chain->refs, i, a)
1969 if (DR_IS_WRITE (a->ref))
1970 n_writes++;
1971
1972 /* If there are no reads in the loop, there is nothing to do. */
1973 if (n_writes == chain->refs.length ())
1974 return;
1975
1976 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1977 &vars, chain->inits, tmp_vars);
1978
1979 ridx = 0;
1980 FOR_EACH_VEC_ELT (chain->refs, i, a)
1981 {
1982 bool is_read = DR_IS_READ (a->ref);
1983
1984 if (DR_IS_WRITE (a->ref))
1985 {
1986 n_writes--;
1987 if (n_writes)
1988 {
1989 var = vars[0];
1990 var = make_ssa_name (SSA_NAME_VAR (var));
1991 vars[0] = var;
1992 }
1993 else
1994 ridx = 1;
1995 }
1996
1997 replace_ref_with (a->stmt, vars[ridx],
1998 !is_read, !is_read);
1999 }
2000}
2001
2002/* Returns the single statement in that NAME is used, excepting
2003 the looparound phi nodes contained in one of the chains. If there is no
2004 such statement, or more statements, NULL is returned. */
2005
2006static gimple *
2007single_nonlooparound_use (tree name)
2008{
2009 use_operand_p use;
2010 imm_use_iterator it;
2011 gimple *stmt, *ret = NULL;
2012
2013 FOR_EACH_IMM_USE_FAST (use, it, name)
2014 {
2015 stmt = USE_STMT (use);
2016
2017 if (gimple_code (stmt) == GIMPLE_PHI)
2018 {
2019 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2020 could not be processed anyway, so just fail for them. */
2021 if (bitmap_bit_p (looparound_phis,
2022 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2023 continue;
2024
2025 return NULL;
2026 }
2027 else if (is_gimple_debug (stmt))
2028 continue;
2029 else if (ret != NULL)
2030 return NULL;
2031 else
2032 ret = stmt;
2033 }
2034
2035 return ret;
2036}
2037
2038/* Remove statement STMT, as well as the chain of assignments in that it is
2039 used. */
2040
2041static void
2042remove_stmt (gimple *stmt)
2043{
2044 tree name;
2045 gimple *next;
2046 gimple_stmt_iterator psi;
2047
2048 if (gimple_code (stmt) == GIMPLE_PHI)
2049 {
2050 name = PHI_RESULT (stmt);
2051 next = single_nonlooparound_use (name);
2052 reset_debug_uses (stmt);
2053 psi = gsi_for_stmt (stmt);
2054 remove_phi_node (&psi, true);
2055
2056 if (!next
2057 || !gimple_assign_ssa_name_copy_p (next)
2058 || gimple_assign_rhs1 (next) != name)
2059 return;
2060
2061 stmt = next;
2062 }
2063
2064 while (1)
2065 {
2066 gimple_stmt_iterator bsi;
2067
2068 bsi = gsi_for_stmt (stmt);
2069
2070 name = gimple_assign_lhs (stmt);
2071 if (TREE_CODE (name) == SSA_NAME)
2072 {
2073 next = single_nonlooparound_use (name);
2074 reset_debug_uses (stmt);
2075 }
2076 else
2077 {
2078 /* This is a store to be eliminated. */
2079 gcc_assert (gimple_vdef (stmt) != NULL);
2080 next = NULL;
2081 }
2082
2083 unlink_stmt_vdef (stmt);
2084 gsi_remove (&bsi, true);
2085 release_defs (stmt);
2086
2087 if (!next
2088 || !gimple_assign_ssa_name_copy_p (next)
2089 || gimple_assign_rhs1 (next) != name)
2090 return;
2091
2092 stmt = next;
2093 }
2094}
2095
2096/* Perform the predictive commoning optimization for a chain CHAIN.
2097 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2098
2099static void
2100execute_pred_commoning_chain (struct loop *loop, chain_p chain,
2101 bitmap tmp_vars)
2102{
2103 unsigned i;
2104 dref a;
2105 tree var;
2106 bool in_lhs;
2107
2108 if (chain->combined)
2109 {
2110 /* For combined chains, just remove the statements that are used to
2111 compute the values of the expression (except for the root one).
2112 We delay this until after all chains are processed. */
2113 }
2114 else if (chain->type == CT_STORE_STORE)
2115 {
2116 if (chain->length > 0)
2117 {
2118 if (chain->inv_store_elimination)
2119 {
2120 /* If dead stores in this chain only store loop invariant
2121 values, we can simply record the invariant value and use
2122 it directly after loop. */
2123 initialize_root_vars_store_elim_1 (chain);
2124 }
2125 else
2126 {
2127 /* If dead stores in this chain store loop variant values,
2128 we need to set up the variables by loading from memory
2129 before loop and propagating it with PHI nodes. */
2130 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2131 }
2132
2133 /* For inter-iteration store elimination chain, stores at each
2134 distance in loop's last (chain->length - 1) iterations can't
2135 be eliminated, because there is no following killing store.
2136 We need to generate these stores after loop. */
2137 finalize_eliminated_stores (loop, chain);
2138 }
2139
2140 bool last_store_p = true;
2141 for (i = chain->refs.length (); i > 0; i--)
2142 {
2143 a = chain->refs[i - 1];
2144 /* Preserve the last store of the chain. Eliminate other stores
2145 which are killed by the last one. */
2146 if (DR_IS_WRITE (a->ref))
2147 {
2148 if (last_store_p)
2149 last_store_p = false;
2150 else
2151 remove_stmt (a->stmt);
2152
2153 continue;
2154 }
2155
2156 /* Any load in Store-Store chain must be dominated by a previous
2157 store, we replace the load reference with rhs of the store. */
2158 dref b = get_chain_last_write_before_load (chain, i - 1);
2159 gcc_assert (b != NULL);
2160 var = gimple_assign_rhs1 (b->stmt);
2161 replace_ref_with (a->stmt, var, false, false);
2162 }
2163 }
2164 else
2165 {
2166 /* For non-combined chains, set up the variables that hold its value. */
2167 initialize_root_vars (loop, chain, tmp_vars);
2168 a = get_chain_root (chain);
2169 in_lhs = (chain->type == CT_STORE_LOAD
2170 || chain->type == CT_COMBINATION);
2171 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2172
2173 /* Replace the uses of the original references by these variables. */
2174 for (i = 1; chain->refs.iterate (i, &a); i++)
2175 {
2176 var = chain->vars[chain->length - a->distance];
2177 replace_ref_with (a->stmt, var, false, false);
2178 }
2179 }
2180}
2181
2182/* Determines the unroll factor necessary to remove as many temporary variable
2183 copies as possible. CHAINS is the list of chains that will be
2184 optimized. */
2185
2186static unsigned
2187determine_unroll_factor (vec<chain_p> chains)
2188{
2189 chain_p chain;
2190 unsigned factor = 1, af, nfactor, i;
2191 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2192
2193 FOR_EACH_VEC_ELT (chains, i, chain)
2194 {
2195 if (chain->type == CT_INVARIANT)
2196 continue;
2197 /* For now we can't handle unrolling when eliminating stores. */
2198 else if (chain->type == CT_STORE_STORE)
2199 return 1;
2200
2201 if (chain->combined)
2202 {
2203 /* For combined chains, we can't handle unrolling if we replace
2204 looparound PHIs. */
2205 dref a;
2206 unsigned j;
2207 for (j = 1; chain->refs.iterate (j, &a); j++)
2208 if (gimple_code (a->stmt) == GIMPLE_PHI)
2209 return 1;
2210 continue;
2211 }
2212
2213 /* The best unroll factor for this chain is equal to the number of
2214 temporary variables that we create for it. */
2215 af = chain->length;
2216 if (chain->has_max_use_after)
2217 af++;
2218
2219 nfactor = factor * af / gcd (factor, af);
2220 if (nfactor <= max)
2221 factor = nfactor;
2222 }
2223
2224 return factor;
2225}
2226
2227/* Perform the predictive commoning optimization for CHAINS.
2228 Uids of the newly created temporary variables are marked in TMP_VARS. */
2229
2230static void
2231execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
2232 bitmap tmp_vars)
2233{
2234 chain_p chain;
2235 unsigned i;
2236
2237 FOR_EACH_VEC_ELT (chains, i, chain)
2238 {
2239 if (chain->type == CT_INVARIANT)
2240 execute_load_motion (loop, chain, tmp_vars);
2241 else
2242 execute_pred_commoning_chain (loop, chain, tmp_vars);
2243 }
2244
2245 FOR_EACH_VEC_ELT (chains, i, chain)
2246 {
2247 if (chain->type == CT_INVARIANT)
2248 ;
2249 else if (chain->combined)
2250 {
2251 /* For combined chains, just remove the statements that are used to
2252 compute the values of the expression (except for the root one). */
2253 dref a;
2254 unsigned j;
2255 for (j = 1; chain->refs.iterate (j, &a); j++)
2256 remove_stmt (a->stmt);
2257 }
2258 }
2259
2260 update_ssa (TODO_update_ssa_only_virtuals);
2261}
2262
2263/* For each reference in CHAINS, if its defining statement is
2264 phi node, record the ssa name that is defined by it. */
2265
2266static void
2267replace_phis_by_defined_names (vec<chain_p> chains)
2268{
2269 chain_p chain;
2270 dref a;
2271 unsigned i, j;
2272
2273 FOR_EACH_VEC_ELT (chains, i, chain)
2274 FOR_EACH_VEC_ELT (chain->refs, j, a)
2275 {
2276 if (gimple_code (a->stmt) == GIMPLE_PHI)
2277 {
2278 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2279 a->stmt = NULL;
2280 }
2281 }
2282}
2283
2284/* For each reference in CHAINS, if name_defined_by_phi is not
2285 NULL, use it to set the stmt field. */
2286
2287static void
2288replace_names_by_phis (vec<chain_p> chains)
2289{
2290 chain_p chain;
2291 dref a;
2292 unsigned i, j;
2293
2294 FOR_EACH_VEC_ELT (chains, i, chain)
2295 FOR_EACH_VEC_ELT (chain->refs, j, a)
2296 if (a->stmt == NULL)
2297 {
2298 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2299 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2300 a->name_defined_by_phi = NULL_TREE;
2301 }
2302}
2303
2304/* Wrapper over execute_pred_commoning, to pass it as a callback
2305 to tree_transform_and_unroll_loop. */
2306
2307struct epcc_data
2308{
2309 vec<chain_p> chains;
2310 bitmap tmp_vars;
2311};
2312
2313static void
2314execute_pred_commoning_cbck (struct loop *loop, void *data)
2315{
2316 struct epcc_data *const dta = (struct epcc_data *) data;
2317
2318 /* Restore phi nodes that were replaced by ssa names before
2319 tree_transform_and_unroll_loop (see detailed description in
2320 tree_predictive_commoning_loop). */
2321 replace_names_by_phis (dta->chains);
2322 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2323}
2324
2325/* Base NAME and all the names in the chain of phi nodes that use it
2326 on variable VAR. The phi nodes are recognized by being in the copies of
2327 the header of the LOOP. */
2328
2329static void
2330base_names_in_chain_on (struct loop *loop, tree name, tree var)
2331{
2332 gimple *stmt, *phi;
2333 imm_use_iterator iter;
2334
2335 replace_ssa_name_symbol (name, var);
2336
2337 while (1)
2338 {
2339 phi = NULL;
2340 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2341 {
2342 if (gimple_code (stmt) == GIMPLE_PHI
2343 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2344 {
2345 phi = stmt;
2346 BREAK_FROM_IMM_USE_STMT (iter);
2347 }
2348 }
2349 if (!phi)
2350 return;
2351
2352 name = PHI_RESULT (phi);
2353 replace_ssa_name_symbol (name, var);
2354 }
2355}
2356
2357/* Given an unrolled LOOP after predictive commoning, remove the
2358 register copies arising from phi nodes by changing the base
2359 variables of SSA names. TMP_VARS is the set of the temporary variables
2360 for those we want to perform this. */
2361
2362static void
2363eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
2364{
2365 edge e;
2366 gphi *phi;
2367 gimple *stmt;
2368 tree name, use, var;
2369 gphi_iterator psi;
2370
2371 e = loop_latch_edge (loop);
2372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2373 {
2374 phi = psi.phi ();
2375 name = PHI_RESULT (phi);
2376 var = SSA_NAME_VAR (name);
2377 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2378 continue;
2379 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2380 gcc_assert (TREE_CODE (use) == SSA_NAME);
2381
2382 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2383 stmt = SSA_NAME_DEF_STMT (use);
2384 while (gimple_code (stmt) == GIMPLE_PHI
2385 /* In case we could not unroll the loop enough to eliminate
2386 all copies, we may reach the loop header before the defining
2387 statement (in that case, some register copies will be present
2388 in loop latch in the final code, corresponding to the newly
2389 created looparound phi nodes). */
2390 && gimple_bb (stmt) != loop->header)
2391 {
2392 gcc_assert (single_pred_p (gimple_bb (stmt)));
2393 use = PHI_ARG_DEF (stmt, 0);
2394 stmt = SSA_NAME_DEF_STMT (use);
2395 }
2396
2397 base_names_in_chain_on (loop, use, var);
2398 }
2399}
2400
2401/* Returns true if CHAIN is suitable to be combined. */
2402
2403static bool
2404chain_can_be_combined_p (chain_p chain)
2405{
2406 return (!chain->combined
2407 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2408}
2409
2410/* Returns the modify statement that uses NAME. Skips over assignment
2411 statements, NAME is replaced with the actual name used in the returned
2412 statement. */
2413
2414static gimple *
2415find_use_stmt (tree *name)
2416{
2417 gimple *stmt;
2418 tree rhs, lhs;
2419
2420 /* Skip over assignments. */
2421 while (1)
2422 {
2423 stmt = single_nonlooparound_use (*name);
2424 if (!stmt)
2425 return NULL;
2426
2427 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2428 return NULL;
2429
2430 lhs = gimple_assign_lhs (stmt);
2431 if (TREE_CODE (lhs) != SSA_NAME)
2432 return NULL;
2433
2434 if (gimple_assign_copy_p (stmt))
2435 {
2436 rhs = gimple_assign_rhs1 (stmt);
2437 if (rhs != *name)
2438 return NULL;
2439
2440 *name = lhs;
2441 }
2442 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2443 == GIMPLE_BINARY_RHS)
2444 return stmt;
2445 else
2446 return NULL;
2447 }
2448}
2449
2450/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2451
2452static bool
2453may_reassociate_p (tree type, enum tree_code code)
2454{
2455 if (FLOAT_TYPE_P (type)
2456 && !flag_unsafe_math_optimizations)
2457 return false;
2458
2459 return (commutative_tree_code (code)
2460 && associative_tree_code (code));
2461}
2462
2463/* If the operation used in STMT is associative and commutative, go through the
2464 tree of the same operations and returns its root. Distance to the root
2465 is stored in DISTANCE. */
2466
2467static gimple *
2468find_associative_operation_root (gimple *stmt, unsigned *distance)
2469{
2470 tree lhs;
2471 gimple *next;
2472 enum tree_code code = gimple_assign_rhs_code (stmt);
2473 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2474 unsigned dist = 0;
2475
2476 if (!may_reassociate_p (type, code))
2477 return NULL;
2478
2479 while (1)
2480 {
2481 lhs = gimple_assign_lhs (stmt);
2482 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2483
2484 next = find_use_stmt (&lhs);
2485 if (!next
2486 || gimple_assign_rhs_code (next) != code)
2487 break;
2488
2489 stmt = next;
2490 dist++;
2491 }
2492
2493 if (distance)
2494 *distance = dist;
2495 return stmt;
2496}
2497
2498/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2499 is no such statement, returns NULL_TREE. In case the operation used on
2500 NAME1 and NAME2 is associative and commutative, returns the root of the
2501 tree formed by this operation instead of the statement that uses NAME1 or
2502 NAME2. */
2503
2504static gimple *
2505find_common_use_stmt (tree *name1, tree *name2)
2506{
2507 gimple *stmt1, *stmt2;
2508
2509 stmt1 = find_use_stmt (name1);
2510 if (!stmt1)
2511 return NULL;
2512
2513 stmt2 = find_use_stmt (name2);
2514 if (!stmt2)
2515 return NULL;
2516
2517 if (stmt1 == stmt2)
2518 return stmt1;
2519
2520 stmt1 = find_associative_operation_root (stmt1, NULL);
2521 if (!stmt1)
2522 return NULL;
2523 stmt2 = find_associative_operation_root (stmt2, NULL);
2524 if (!stmt2)
2525 return NULL;
2526
2527 return (stmt1 == stmt2 ? stmt1 : NULL);
2528}
2529
2530/* Checks whether R1 and R2 are combined together using CODE, with the result
2531 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2532 if it is true. If CODE is ERROR_MARK, set these values instead. */
2533
2534static bool
2535combinable_refs_p (dref r1, dref r2,
2536 enum tree_code *code, bool *swap, tree *rslt_type)
2537{
2538 enum tree_code acode;
2539 bool aswap;
2540 tree atype;
2541 tree name1, name2;
2542 gimple *stmt;
2543
2544 name1 = name_for_ref (r1);
2545 name2 = name_for_ref (r2);
2546 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2547
2548 stmt = find_common_use_stmt (&name1, &name2);
2549
2550 if (!stmt
2551 /* A simple post-dominance check - make sure the combination
2552 is executed under the same condition as the references. */
2553 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2554 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2555 return false;
2556
2557 acode = gimple_assign_rhs_code (stmt);
2558 aswap = (!commutative_tree_code (acode)
2559 && gimple_assign_rhs1 (stmt) != name1);
2560 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2561
2562 if (*code == ERROR_MARK)
2563 {
2564 *code = acode;
2565 *swap = aswap;
2566 *rslt_type = atype;
2567 return true;
2568 }
2569
2570 return (*code == acode
2571 && *swap == aswap
2572 && *rslt_type == atype);
2573}
2574
2575/* Remove OP from the operation on rhs of STMT, and replace STMT with
2576 an assignment of the remaining operand. */
2577
2578static void
2579remove_name_from_operation (gimple *stmt, tree op)
2580{
2581 tree other_op;
2582 gimple_stmt_iterator si;
2583
2584 gcc_assert (is_gimple_assign (stmt));
2585
2586 if (gimple_assign_rhs1 (stmt) == op)
2587 other_op = gimple_assign_rhs2 (stmt);
2588 else
2589 other_op = gimple_assign_rhs1 (stmt);
2590
2591 si = gsi_for_stmt (stmt);
2592 gimple_assign_set_rhs_from_tree (&si, other_op);
2593
2594 /* We should not have reallocated STMT. */
2595 gcc_assert (gsi_stmt (si) == stmt);
2596
2597 update_stmt (stmt);
2598}
2599
2600/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2601 are combined in a single statement, and returns this statement. */
2602
2603static gimple *
2604reassociate_to_the_same_stmt (tree name1, tree name2)
2605{
2606 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2607 gassign *new_stmt, *tmp_stmt;
2608 tree new_name, tmp_name, var, r1, r2;
2609 unsigned dist1, dist2;
2610 enum tree_code code;
2611 tree type = TREE_TYPE (name1);
2612 gimple_stmt_iterator bsi;
2613
2614 stmt1 = find_use_stmt (&name1);
2615 stmt2 = find_use_stmt (&name2);
2616 root1 = find_associative_operation_root (stmt1, &dist1);
2617 root2 = find_associative_operation_root (stmt2, &dist2);
2618 code = gimple_assign_rhs_code (stmt1);
2619
2620 gcc_assert (root1 && root2 && root1 == root2
2621 && code == gimple_assign_rhs_code (stmt2));
2622
2623 /* Find the root of the nearest expression in that both NAME1 and NAME2
2624 are used. */
2625 r1 = name1;
2626 s1 = stmt1;
2627 r2 = name2;
2628 s2 = stmt2;
2629
2630 while (dist1 > dist2)
2631 {
2632 s1 = find_use_stmt (&r1);
2633 r1 = gimple_assign_lhs (s1);
2634 dist1--;
2635 }
2636 while (dist2 > dist1)
2637 {
2638 s2 = find_use_stmt (&r2);
2639 r2 = gimple_assign_lhs (s2);
2640 dist2--;
2641 }
2642
2643 while (s1 != s2)
2644 {
2645 s1 = find_use_stmt (&r1);
2646 r1 = gimple_assign_lhs (s1);
2647 s2 = find_use_stmt (&r2);
2648 r2 = gimple_assign_lhs (s2);
2649 }
2650
2651 /* Remove NAME1 and NAME2 from the statements in that they are used
2652 currently. */
2653 remove_name_from_operation (stmt1, name1);
2654 remove_name_from_operation (stmt2, name2);
2655
2656 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2657 combine it with the rhs of S1. */
2658 var = create_tmp_reg (type, "predreastmp");
2659 new_name = make_ssa_name (var);
2660 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2661
2662 var = create_tmp_reg (type, "predreastmp");
2663 tmp_name = make_ssa_name (var);
2664
2665 /* Rhs of S1 may now be either a binary expression with operation
2666 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2667 so that name1 or name2 was removed from it). */
2668 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2669 gimple_assign_rhs1 (s1),
2670 gimple_assign_rhs2 (s1));
2671
2672 bsi = gsi_for_stmt (s1);
2673 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2674 s1 = gsi_stmt (bsi);
2675 update_stmt (s1);
2676
2677 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2678 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2679
2680 return new_stmt;
2681}
2682
2683/* Returns the statement that combines references R1 and R2. In case R1
2684 and R2 are not used in the same statement, but they are used with an
2685 associative and commutative operation in the same expression, reassociate
2686 the expression so that they are used in the same statement. */
2687
2688static gimple *
2689stmt_combining_refs (dref r1, dref r2)
2690{
2691 gimple *stmt1, *stmt2;
2692 tree name1 = name_for_ref (r1);
2693 tree name2 = name_for_ref (r2);
2694
2695 stmt1 = find_use_stmt (&name1);
2696 stmt2 = find_use_stmt (&name2);
2697 if (stmt1 == stmt2)
2698 return stmt1;
2699
2700 return reassociate_to_the_same_stmt (name1, name2);
2701}
2702
2703/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2704 description of the new chain is returned, otherwise we return NULL. */
2705
2706static chain_p
2707combine_chains (chain_p ch1, chain_p ch2)
2708{
2709 dref r1, r2, nw;
2710 enum tree_code op = ERROR_MARK;
2711 bool swap = false;
2712 chain_p new_chain;
2713 unsigned i;
2714 tree rslt_type = NULL_TREE;
2715
2716 if (ch1 == ch2)
2717 return NULL;
2718 if (ch1->length != ch2->length)
2719 return NULL;
2720
2721 if (ch1->refs.length () != ch2->refs.length ())
2722 return NULL;
2723
2724 for (i = 0; (ch1->refs.iterate (i, &r1)
2725 && ch2->refs.iterate (i, &r2)); i++)
2726 {
2727 if (r1->distance != r2->distance)
2728 return NULL;
2729
2730 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2731 return NULL;
2732 }
2733
2734 if (swap)
2735 std::swap (ch1, ch2);
2736
2737 new_chain = XCNEW (struct chain);
2738 new_chain->type = CT_COMBINATION;
2739 new_chain->op = op;
2740 new_chain->ch1 = ch1;
2741 new_chain->ch2 = ch2;
2742 new_chain->rslt_type = rslt_type;
2743 new_chain->length = ch1->length;
2744
2745 for (i = 0; (ch1->refs.iterate (i, &r1)
2746 && ch2->refs.iterate (i, &r2)); i++)
2747 {
2748 nw = XCNEW (struct dref_d);
2749 nw->stmt = stmt_combining_refs (r1, r2);
2750 nw->distance = r1->distance;
2751
2752 new_chain->refs.safe_push (nw);
2753 }
2754
2755 ch1->combined = true;
2756 ch2->combined = true;
2757 return new_chain;
2758}
2759
2760/* Recursively update position information of all offspring chains to ROOT
2761 chain's position information. */
2762
2763static void
2764update_pos_for_combined_chains (chain_p root)
2765{
2766 chain_p ch1 = root->ch1, ch2 = root->ch2;
2767 dref ref, ref1, ref2;
2768 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2769 && ch1->refs.iterate (j, &ref1)
2770 && ch2->refs.iterate (j, &ref2)); ++j)
2771 ref1->pos = ref2->pos = ref->pos;
2772
2773 if (ch1->type == CT_COMBINATION)
2774 update_pos_for_combined_chains (ch1);
2775 if (ch2->type == CT_COMBINATION)
2776 update_pos_for_combined_chains (ch2);
2777}
2778
2779/* Returns true if statement S1 dominates statement S2. */
2780
2781static bool
2782pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2783{
2784 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2785
2786 if (!bb1 || s1 == s2)
2787 return true;
2788
2789 if (bb1 == bb2)
2790 return gimple_uid (s1) < gimple_uid (s2);
2791
2792 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2793}
2794
2795/* Try to combine the CHAINS in LOOP. */
2796
2797static void
2798try_combine_chains (struct loop *loop, vec<chain_p> *chains)
2799{
2800 unsigned i, j;
2801 chain_p ch1, ch2, cch;
2802 auto_vec<chain_p> worklist;
2803 bool combined_p = false;
2804
2805 FOR_EACH_VEC_ELT (*chains, i, ch1)
2806 if (chain_can_be_combined_p (ch1))
2807 worklist.safe_push (ch1);
2808
2809 while (!worklist.is_empty ())
2810 {
2811 ch1 = worklist.pop ();
2812 if (!chain_can_be_combined_p (ch1))
2813 continue;
2814
2815 FOR_EACH_VEC_ELT (*chains, j, ch2)
2816 {
2817 if (!chain_can_be_combined_p (ch2))
2818 continue;
2819
2820 cch = combine_chains (ch1, ch2);
2821 if (cch)
2822 {
2823 worklist.safe_push (cch);
2824 chains->safe_push (cch);
2825 combined_p = true;
2826 break;
2827 }
2828 }
2829 }
2830 if (!combined_p)
2831 return;
2832
2833 /* Setup UID for all statements in dominance order. */
2834 basic_block *bbs = get_loop_body (loop);
2835 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2836 free (bbs);
2837
2838 /* Re-association in combined chains may generate statements different to
2839 order of references of the original chain. We need to keep references
2840 of combined chain in dominance order so that all uses will be inserted
2841 after definitions. Note:
2842 A) This is necessary for all combined chains.
2843 B) This is only necessary for ZERO distance references because other
2844 references inherit value from loop carried PHIs.
2845
2846 We first update position information for all combined chains. */
2847 dref ref;
2848 for (i = 0; chains->iterate (i, &ch1); ++i)
2849 {
2850 if (ch1->type != CT_COMBINATION || ch1->combined)
2851 continue;
2852
2853 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2854 ref->pos = gimple_uid (ref->stmt);
2855
2856 update_pos_for_combined_chains (ch1);
2857 }
2858 /* Then sort references according to newly updated position information. */
2859 for (i = 0; chains->iterate (i, &ch1); ++i)
2860 {
2861 if (ch1->type != CT_COMBINATION && !ch1->combined)
2862 continue;
2863
2864 /* Find the first reference with non-ZERO distance. */
2865 if (ch1->length == 0)
2866 j = ch1->refs.length();
2867 else
2868 {
2869 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2870 if (ref->distance != 0)
2871 break;
2872 }
2873
2874 /* Sort all ZERO distance references by position. */
2875 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2876
2877 if (ch1->combined)
2878 continue;
2879
2880 /* For ZERO length chain, has_max_use_after must be true since root
2881 combined stmt must dominates others. */
2882 if (ch1->length == 0)
2883 {
2884 ch1->has_max_use_after = true;
2885 continue;
2886 }
2887 /* Check if there is use at max distance after root for combined chains
2888 and set flag accordingly. */
2889 ch1->has_max_use_after = false;
2890 gimple *root_stmt = get_chain_root (ch1)->stmt;
2891 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2892 {
2893 if (ref->distance == ch1->length
2894 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2895 {
2896 ch1->has_max_use_after = true;
2897 break;
2898 }
2899 }
2900 }
2901}
2902
2903/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2904 if this is impossible because one of these initializers may trap, true
2905 otherwise. */
2906
2907static bool
2908prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
2909{
2910 unsigned i, n = chain->length;
2911
2912 /* For now we can't eliminate stores if some of them are conditional
2913 executed. */
2914 if (!chain->all_always_accessed)
2915 return false;
2916
2917 /* Nothing to intialize for intra-iteration store elimination. */
2918 if (n == 0 && chain->type == CT_STORE_STORE)
2919 return true;
2920
2921 /* For store elimination chain, there is nothing to initialize if stores
2922 to be eliminated only store loop invariant values into memory. */
2923 if (chain->type == CT_STORE_STORE
2924 && is_inv_store_elimination_chain (loop, chain))
2925 {
2926 chain->inv_store_elimination = true;
2927 return true;
2928 }
2929
2930 chain->inits.create (n);
2931 chain->inits.safe_grow_cleared (n);
2932
2933 /* For store eliminatin chain like below:
2934
2935 for (i = 0; i < len; i++)
2936 {
2937 a[i] = 1;
2938 // a[i + 1] = ...
2939 a[i + 2] = 3;
2940 }
2941
2942 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2943 content of a[i + 1] remain the same if the loop iterates fewer times
2944 than chain->length. We need to set up root variables for such stores
2945 by loading from memory before loop. Note we only need to load bubble
2946 elements because loop body is guaranteed to be executed at least once
2947 after loop's preheader edge. */
2948 auto_vec<bool> bubbles;
2949 bubbles.safe_grow_cleared (n + 1);
2950 for (i = 0; i < chain->refs.length (); i++)
2951 bubbles[chain->refs[i]->distance] = true;
2952
2953 struct data_reference *dr = get_chain_root (chain)->ref;
2954 for (i = 0; i < n; i++)
2955 {
2956 if (bubbles[i])
2957 continue;
2958
2959 gimple_seq stmts = NULL;
2960
2961 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2962 if (stmts)
2963 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2964
2965 chain->inits[i] = init;
2966 }
2967
2968 return true;
2969}
2970
2971/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2972 impossible because one of these initializers may trap, true otherwise. */
2973
2974static bool
2975prepare_initializers_chain (struct loop *loop, chain_p chain)
2976{
2977 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2978 struct data_reference *dr = get_chain_root (chain)->ref;
2979 tree init;
2980 dref laref;
2981 edge entry = loop_preheader_edge (loop);
2982
2983 if (chain->type == CT_STORE_STORE)
2984 return prepare_initializers_chain_store_elim (loop, chain);
2985
2986 /* Find the initializers for the variables, and check that they cannot
2987 trap. */
2988 chain->inits.create (n);
2989 for (i = 0; i < n; i++)
2990 chain->inits.quick_push (NULL_TREE);
2991
2992 /* If we have replaced some looparound phi nodes, use their initializers
2993 instead of creating our own. */
2994 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2995 {
2996 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2997 continue;
2998
2999 gcc_assert (laref->distance > 0);
3000 chain->inits[n - laref->distance]
3001 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3002 }
3003
3004 for (i = 0; i < n; i++)
3005 {
3006 gimple_seq stmts = NULL;
3007
3008 if (chain->inits[i] != NULL_TREE)
3009 continue;
3010
3011 init = ref_at_iteration (dr, (int) i - n, &stmts);
3012 if (!chain->all_always_accessed && tree_could_trap_p (init))
3013 {
3014 gimple_seq_discard (stmts);
3015 return false;
3016 }
3017
3018 if (stmts)
3019 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3020
3021 chain->inits[i] = init;
3022 }
3023
3024 return true;
3025}
3026
3027/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3028 be used because the initializers might trap. */
3029
3030static void
3031prepare_initializers (struct loop *loop, vec<chain_p> chains)
3032{
3033 chain_p chain;
3034 unsigned i;
3035
3036 for (i = 0; i < chains.length (); )
3037 {
3038 chain = chains[i];
3039 if (prepare_initializers_chain (loop, chain))
3040 i++;
3041 else
3042 {
3043 release_chain (chain);
3044 chains.unordered_remove (i);
3045 }
3046 }
3047}
3048
3049/* Generates finalizer memory references for CHAIN in LOOP. Returns true
3050 if finalizer code for CHAIN can be generated, otherwise false. */
3051
3052static bool
3053prepare_finalizers_chain (struct loop *loop, chain_p chain)
3054{
3055 unsigned i, n = chain->length;
3056 struct data_reference *dr = get_chain_root (chain)->ref;
3057 tree fini, niters = number_of_latch_executions (loop);
3058
3059 /* For now we can't eliminate stores if some of them are conditional
3060 executed. */
3061 if (!chain->all_always_accessed)
3062 return false;
3063
3064 chain->finis.create (n);
3065 for (i = 0; i < n; i++)
3066 chain->finis.quick_push (NULL_TREE);
3067
3068 /* We never use looparound phi node for store elimination chains. */
3069
3070 /* Find the finalizers for the variables, and check that they cannot
3071 trap. */
3072 for (i = 0; i < n; i++)
3073 {
3074 gimple_seq stmts = NULL;
3075 gcc_assert (chain->finis[i] == NULL_TREE);
3076
3077 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3078 {
3079 niters = unshare_expr (niters);
3080 niters = force_gimple_operand (niters, &stmts, true, NULL);
3081 if (stmts)
3082 {
3083 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3084 stmts = NULL;
3085 }
3086 }
3087 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3088 if (stmts)
3089 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3090
3091 chain->finis[i] = fini;
3092 }
3093
3094 return true;
3095}
3096
3097/* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3098 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3099
3100static bool
3101prepare_finalizers (struct loop *loop, vec<chain_p> chains)
3102{
3103 chain_p chain;
3104 unsigned i;
3105 bool loop_closed_ssa = false;
3106
3107 for (i = 0; i < chains.length ();)
3108 {
3109 chain = chains[i];
3110
3111 /* Finalizer is only necessary for inter-iteration store elimination
3112 chains. */
3113 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3114 {
3115 i++;
3116 continue;
3117 }
3118
3119 if (prepare_finalizers_chain (loop, chain))
3120 {
3121 i++;
3122 /* Be conservative, assume loop closed ssa form is corrupted
3123 by store-store chain. Though it's not always the case if
3124 eliminated stores only store loop invariant values into
3125 memory. */
3126 loop_closed_ssa = true;
3127 }
3128 else
3129 {
3130 release_chain (chain);
3131 chains.unordered_remove (i);
3132 }
3133 }
3134 return loop_closed_ssa;
3135}
3136
3137/* Insert all initializing gimple stmts into loop's entry edge. */
3138
3139static void
3140insert_init_seqs (struct loop *loop, vec<chain_p> chains)
3141{
3142 unsigned i;
3143 edge entry = loop_preheader_edge (loop);
3144
3145 for (i = 0; i < chains.length (); ++i)
3146 if (chains[i]->init_seq)
3147 {
3148 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3149 chains[i]->init_seq = NULL;
3150 }
3151}
3152
3153/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3154 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3155 form was corrupted. */
3156
3157static unsigned
3158tree_predictive_commoning_loop (struct loop *loop)
3159{
3160 vec<data_reference_p> datarefs;
3161 vec<ddr_p> dependences;
3162 struct component *components;
3163 vec<chain_p> chains = vNULL;
3164 unsigned unroll_factor;
3165 struct tree_niter_desc desc;
3166 bool unroll = false, loop_closed_ssa = false;
3167 edge exit;
3168
3169 if (dump_file && (dump_flags & TDF_DETAILS))
3170 fprintf (dump_file, "Processing loop %d\n", loop->num);
3171
3172 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3173 if (get_max_loop_iterations_int (loop) == 0)
3174 {
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3177
3178 return 0;
3179 }
3180
3181 /* Find the data references and split them into components according to their
3182 dependence relations. */
3183 auto_vec<loop_p, 3> loop_nest;
3184 dependences.create (10);
3185 datarefs.create (10);
3186 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3187 &dependences))
3188 {
3189 if (dump_file && (dump_flags & TDF_DETAILS))
3190 fprintf (dump_file, "Cannot analyze data dependencies\n");
3191 free_data_refs (datarefs);
3192 free_dependence_relations (dependences);
3193 return 0;
3194 }
3195
3196 if (dump_file && (dump_flags & TDF_DETAILS))
3197 dump_data_dependence_relations (dump_file, dependences);
3198
3199 components = split_data_refs_to_components (loop, datarefs, dependences);
3200 loop_nest.release ();
3201 free_dependence_relations (dependences);
3202 if (!components)
3203 {
3204 free_data_refs (datarefs);
3205 free_affine_expand_cache (&name_expansions);
3206 return 0;
3207 }
3208
3209 if (dump_file && (dump_flags & TDF_DETAILS))
3210 {
3211 fprintf (dump_file, "Initial state:\n\n");
3212 dump_components (dump_file, components);
3213 }
3214
3215 /* Find the suitable components and split them into chains. */
3216 components = filter_suitable_components (loop, components);
3217
3218 auto_bitmap tmp_vars;
3219 looparound_phis = BITMAP_ALLOC (NULL);
3220 determine_roots (loop, components, &chains);
3221 release_components (components);
3222
3223 if (!chains.exists ())
3224 {
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3226 fprintf (dump_file,
3227 "Predictive commoning failed: no suitable chains\n");
3228 goto end;
3229 }
3230 prepare_initializers (loop, chains);
3231 loop_closed_ssa = prepare_finalizers (loop, chains);
3232
3233 /* Try to combine the chains that are always worked with together. */
3234 try_combine_chains (loop, &chains);
3235
3236 insert_init_seqs (loop, chains);
3237
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 {
3240 fprintf (dump_file, "Before commoning:\n\n");
3241 dump_chains (dump_file, chains);
3242 }
3243
3244 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3245 that its number of iterations is divisible by the factor. */
3246 unroll_factor = determine_unroll_factor (chains);
3247 scev_reset ();
3248 unroll = (unroll_factor > 1
3249 && can_unroll_loop_p (loop, unroll_factor, &desc));
3250 exit = single_dom_exit (loop);
3251
3252 /* Execute the predictive commoning transformations, and possibly unroll the
3253 loop. */
3254 if (unroll)
3255 {
3256 struct epcc_data dta;
3257
3258 if (dump_file && (dump_flags & TDF_DETAILS))
3259 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3260
3261 dta.chains = chains;
3262 dta.tmp_vars = tmp_vars;
3263
3264 update_ssa (TODO_update_ssa_only_virtuals);
3265
3266 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3267 execute_pred_commoning_cbck is called may cause phi nodes to be
3268 reallocated, which is a problem since CHAINS may point to these
3269 statements. To fix this, we store the ssa names defined by the
3270 phi nodes here instead of the phi nodes themselves, and restore
3271 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3272 replace_phis_by_defined_names (chains);
3273
3274 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3275 execute_pred_commoning_cbck, &dta);
3276 eliminate_temp_copies (loop, tmp_vars);
3277 }
3278 else
3279 {
3280 if (dump_file && (dump_flags & TDF_DETAILS))
3281 fprintf (dump_file,
3282 "Executing predictive commoning without unrolling.\n");
3283 execute_pred_commoning (loop, chains, tmp_vars);
3284 }
3285
3286end: ;
3287 release_chains (chains);
3288 free_data_refs (datarefs);
3289 BITMAP_FREE (looparound_phis);
3290
3291 free_affine_expand_cache (&name_expansions);
3292
3293 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3294}
3295
3296/* Runs predictive commoning. */
3297
3298unsigned
3299tree_predictive_commoning (void)
3300{
3301 struct loop *loop;
3302 unsigned ret = 0, changed = 0;
3303
3304 initialize_original_copy_tables ();
3305 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3306 if (optimize_loop_for_speed_p (loop))
3307 {
3308 changed |= tree_predictive_commoning_loop (loop);
3309 }
3310 free_original_copy_tables ();
3311
3312 if (changed > 0)
3313 {
3314 scev_reset ();
3315
3316 if (changed > 1)
3317 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3318
3319 ret = TODO_cleanup_cfg;
3320 }
3321
3322 return ret;
3323}
3324
3325/* Predictive commoning Pass. */
3326
3327static unsigned
3328run_tree_predictive_commoning (struct function *fun)
3329{
3330 if (number_of_loops (fun) <= 1)
3331 return 0;
3332
3333 return tree_predictive_commoning ();
3334}
3335
3336namespace {
3337
3338const pass_data pass_data_predcom =
3339{
3340 GIMPLE_PASS, /* type */
3341 "pcom", /* name */
3342 OPTGROUP_LOOP, /* optinfo_flags */
3343 TV_PREDCOM, /* tv_id */
3344 PROP_cfg, /* properties_required */
3345 0, /* properties_provided */
3346 0, /* properties_destroyed */
3347 0, /* todo_flags_start */
3348 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3349};
3350
3351class pass_predcom : public gimple_opt_pass
3352{
3353public:
3354 pass_predcom (gcc::context *ctxt)
3355 : gimple_opt_pass (pass_data_predcom, ctxt)
3356 {}
3357
3358 /* opt_pass methods: */
3359 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
3360 virtual unsigned int execute (function *fun)
3361 {
3362 return run_tree_predictive_commoning (fun);
3363 }
3364
3365}; // class pass_predcom
3366
3367} // anon namespace
3368
3369gimple_opt_pass *
3370make_pass_predcom (gcc::context *ctxt)
3371{
3372 return new pass_predcom (ctxt);
3373}
3374
3375
3376