1/* Loop interchange.
2 Copyright (C) 2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "is-a.h"
26#include "tree.h"
27#include "gimple.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "gimple-pretty-print.h"
31#include "fold-const.h"
32#include "gimplify.h"
33#include "gimple-iterator.h"
34#include "gimplify-me.h"
35#include "cfgloop.h"
36#include "params.h"
37#include "tree-ssa.h"
38#include "tree-scalar-evolution.h"
39#include "tree-ssa-loop-manip.h"
40#include "tree-ssa-loop-niter.h"
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-dce.h"
43#include "tree-data-ref.h"
44#include "tree-vectorizer.h"
45
46/* This pass performs loop interchange: for example, the loop nest
47
48 for (int j = 0; j < N; j++)
49 for (int k = 0; k < N; k++)
50 for (int i = 0; i < N; i++)
51 c[i][j] = c[i][j] + a[i][k]*b[k][j];
52
53 is transformed to
54
55 for (int i = 0; i < N; i++)
56 for (int j = 0; j < N; j++)
57 for (int k = 0; k < N; k++)
58 c[i][j] = c[i][j] + a[i][k]*b[k][j];
59
60 This pass implements loop interchange in the following steps:
61
62 1) Find perfect loop nest for each innermost loop and compute data
63 dependence relations for it. For above example, loop nest is
64 <loop_j, loop_k, loop_i>.
65 2) From innermost to outermost loop, this pass tries to interchange
66 each loop pair. For above case, it firstly tries to interchange
67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
70 loop to the outermost position. For loop pair <loop_i, loop_j>
71 to be interchanged, we:
72 3) Check if data dependence relations are valid for loop interchange.
73 4) Check if both loops can be interchanged in terms of transformation.
74 5) Check if interchanging the two loops is profitable.
75 6) Interchange the two loops by mapping induction variables.
76
77 This pass also handles reductions in loop nest. So far we only support
78 simple reduction of inner loop and double reduction of the loop nest. */
79
80/* Maximum number of stmts in each loop that should be interchanged. */
81#define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
82/* Maximum number of data references in loop nest. */
83#define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
84
85/* Comparison ratio of access stride between inner/outer loops to be
86 interchanged. This is the minimum stride ratio for loop interchange
87 to be profitable. */
88#define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
89/* The same as above, but we require higher ratio for interchanging the
90 innermost two loops. */
91#define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
92
93/* Vector of strides that DR accesses in each level loop of a loop nest. */
94#define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
95
96/* Structure recording loop induction variable. */
97typedef struct induction
98{
99 /* IV itself. */
100 tree var;
101 /* IV's initializing value, which is the init arg of the IV PHI node. */
102 tree init_val;
103 /* IV's initializing expr, which is (the expanded result of) init_val. */
104 tree init_expr;
105 /* IV's step. */
106 tree step;
107} *induction_p;
108
109/* Enum type for loop reduction variable. */
110enum reduction_type
111{
112 UNKNOWN_RTYPE = 0,
113 SIMPLE_RTYPE,
114 DOUBLE_RTYPE
115};
116
117/* Structure recording loop reduction variable. */
118typedef struct reduction
119{
120 /* Reduction itself. */
121 tree var;
122 /* PHI node defining reduction variable. */
123 gphi *phi;
124 /* Init and next variables of the reduction. */
125 tree init;
126 tree next;
127 /* Lcssa PHI node if reduction is used outside of its definition loop. */
128 gphi *lcssa_phi;
129 /* Stmts defining init and next. */
130 gimple *producer;
131 gimple *consumer;
132 /* If init is loaded from memory, this is the loading memory reference. */
133 tree init_ref;
134 /* If reduction is finally stored to memory, this is the stored memory
135 reference. */
136 tree fini_ref;
137 enum reduction_type type;
138} *reduction_p;
139
140
141/* Dump reduction RE. */
142
143static void
144dump_reduction (reduction_p re)
145{
146 if (re->type == SIMPLE_RTYPE)
147 fprintf (dump_file, " Simple reduction: ");
148 else if (re->type == DOUBLE_RTYPE)
149 fprintf (dump_file, " Double reduction: ");
150 else
151 fprintf (dump_file, " Unknown reduction: ");
152
153 print_gimple_stmt (dump_file, re->phi, 0);
154}
155
156/* Dump LOOP's induction IV. */
157static void
158dump_induction (struct loop *loop, induction_p iv)
159{
160 fprintf (dump_file, " Induction: ");
161 print_generic_expr (dump_file, iv->var, TDF_SLIM);
162 fprintf (dump_file, " = {");
163 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
164 fprintf (dump_file, ", ");
165 print_generic_expr (dump_file, iv->step, TDF_SLIM);
166 fprintf (dump_file, "}_%d\n", loop->num);
167}
168
169/* Loop candidate for interchange. */
170
171struct loop_cand
172{
173 loop_cand (struct loop *, struct loop *);
174 ~loop_cand ();
175
176 reduction_p find_reduction_by_stmt (gimple *);
177 void classify_simple_reduction (reduction_p);
178 bool analyze_iloop_reduction_var (tree);
179 bool analyze_oloop_reduction_var (loop_cand *, tree);
180 bool analyze_induction_var (tree, tree);
181 bool analyze_carried_vars (loop_cand *);
182 bool analyze_lcssa_phis (void);
183 bool can_interchange_p (loop_cand *);
184 bool supported_operations (basic_block, loop_cand *, int *);
185 void undo_simple_reduction (reduction_p, bitmap);
186
187 /* The loop itself. */
188 struct loop *m_loop;
189 /* The outer loop for interchange. It equals to loop if this loop cand
190 itself represents the outer loop. */
191 struct loop *m_outer;
192 /* Vector of induction variables in loop. */
193 vec<induction_p> m_inductions;
194 /* Vector of reduction variables in loop. */
195 vec<reduction_p> m_reductions;
196 /* Lcssa PHI nodes of this loop. */
197 vec<gphi *> m_lcssa_nodes;
198 /* Single exit edge of this loop. */
199 edge m_exit;
200 /* Basic blocks of this loop. */
201 basic_block *m_bbs;
202};
203
204/* Constructor. */
205
206loop_cand::loop_cand (struct loop *loop, struct loop *outer)
207 : m_loop (loop), m_outer (outer),
208 m_exit (single_exit (loop)), m_bbs (get_loop_body (loop))
209{
210 m_inductions.create (3);
211 m_reductions.create (3);
212 m_lcssa_nodes.create (3);
213}
214
215/* Destructor. */
216
217loop_cand::~loop_cand ()
218{
219 induction_p iv;
220 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
221 free (iv);
222
223 reduction_p re;
224 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
225 free (re);
226
227 m_inductions.release ();
228 m_reductions.release ();
229 m_lcssa_nodes.release ();
230 free (m_bbs);
231}
232
233/* Return single use stmt of VAR in LOOP, otherwise return NULL. */
234
235static gimple *
236single_use_in_loop (tree var, struct loop *loop)
237{
238 gimple *stmt, *res = NULL;
239 use_operand_p use_p;
240 imm_use_iterator iterator;
241
242 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
243 {
244 stmt = USE_STMT (use_p);
245 if (is_gimple_debug (stmt))
246 continue;
247
248 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
249 continue;
250
251 if (res)
252 return NULL;
253
254 res = stmt;
255 }
256 return res;
257}
258
259/* Return true if E is unsupported in loop interchange, i.e, E is a complex
260 edge or part of irreducible loop. */
261
262static inline bool
263unsupported_edge (edge e)
264{
265 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
266}
267
268/* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
269 stmt. */
270
271reduction_p
272loop_cand::find_reduction_by_stmt (gimple *stmt)
273{
274 gphi *phi = dyn_cast <gphi *> (stmt);
275 reduction_p re;
276
277 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
278 if ((phi != NULL && phi == re->lcssa_phi)
279 || (stmt == re->producer || stmt == re->consumer))
280 return re;
281
282 return NULL;
283}
284
285/* Return true if all stmts in BB can be supported by loop interchange,
286 otherwise return false. ILOOP is not NULL if this loop_cand is the
287 outer loop in loop nest. Add the number of supported statements to
288 NUM_STMTS. */
289
290bool
291loop_cand::supported_operations (basic_block bb, loop_cand *iloop,
292 int *num_stmts)
293{
294 int bb_num_stmts = 0;
295 gphi_iterator psi;
296 gimple_stmt_iterator gsi;
297
298 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
299 {
300 gimple *stmt = gsi_stmt (gsi);
301 if (is_gimple_debug (stmt))
302 continue;
303
304 if (gimple_has_side_effects (stmt))
305 return false;
306
307 bb_num_stmts++;
308 if (gcall *call = dyn_cast <gcall *> (stmt))
309 {
310 /* In basic block of outer loop, the call should be cheap since
311 it will be moved to inner loop. */
312 if (iloop != NULL
313 && !gimple_inexpensive_call_p (call))
314 return false;
315 continue;
316 }
317
318 if (!iloop || !gimple_vuse (stmt))
319 continue;
320
321 /* Support stmt accessing memory in outer loop only if it is for inner
322 loop's reduction. */
323 if (iloop->find_reduction_by_stmt (stmt))
324 continue;
325
326 tree lhs;
327 /* Support loop invariant memory reference if it's only used once by
328 inner loop. */
329 /* ??? How's this checking for invariantness? */
330 if (gimple_assign_single_p (stmt)
331 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
332 && TREE_CODE (lhs) == SSA_NAME
333 && single_use_in_loop (lhs, iloop->m_loop))
334 continue;
335
336 return false;
337 }
338 *num_stmts += bb_num_stmts;
339
340 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
341 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
342 if (!iloop || bb == m_loop->header
343 || bb == iloop->m_exit->dest)
344 return true;
345
346 /* Don't allow any other PHI nodes. */
347 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
348 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
349 return false;
350
351 return true;
352}
353
354/* Return true if current loop_cand be interchanged. ILOOP is not NULL if
355 current loop_cand is outer loop in loop nest. */
356
357bool
358loop_cand::can_interchange_p (loop_cand *iloop)
359{
360 /* For now we only support at most one reduction. */
361 unsigned allowed_reduction_num = 1;
362
363 /* Only support reduction if the loop nest to be interchanged is the
364 innermostin two loops. */
365 if ((iloop == NULL && m_loop->inner != NULL)
366 || (iloop != NULL && iloop->m_loop->inner != NULL))
367 allowed_reduction_num = 0;
368
369 if (m_reductions.length () > allowed_reduction_num
370 || (m_reductions.length () == 1
371 && m_reductions[0]->type == UNKNOWN_RTYPE))
372 return false;
373
374 /* Only support lcssa PHI node which is for reduction. */
375 if (m_lcssa_nodes.length () > allowed_reduction_num)
376 return false;
377
378 int num_stmts = 0;
379 /* Check basic blocks other than loop header/exit. */
380 for (unsigned i = 0; i < m_loop->num_nodes; i++)
381 {
382 basic_block bb = m_bbs[i];
383
384 /* Skip basic blocks of inner loops. */
385 if (bb->loop_father != m_loop)
386 continue;
387
388 /* Check if basic block has any unsupported operation. */
389 if (!supported_operations (bb, iloop, &num_stmts))
390 return false;
391
392 /* Check if loop has too many stmts. */
393 if (num_stmts > MAX_NUM_STMT)
394 return false;
395 }
396
397 return true;
398}
399
400/* Programmers and optimizers (like loop store motion) may optimize code:
401
402 for (int i = 0; i < N; i++)
403 for (int j = 0; j < N; j++)
404 a[i] += b[j][i] * c[j][i];
405
406 into reduction:
407
408 for (int i = 0; i < N; i++)
409 {
410 // producer. Note sum can be intitialized to a constant.
411 int sum = a[i];
412 for (int j = 0; j < N; j++)
413 {
414 sum += b[j][i] * c[j][i];
415 }
416 // consumer.
417 a[i] = sum;
418 }
419
420 The result code can't be interchanged without undoing the optimization.
421 This function classifies this kind reduction and records information so
422 that we can undo the store motion during interchange. */
423
424void
425loop_cand::classify_simple_reduction (reduction_p re)
426{
427 gimple *producer, *consumer;
428
429 /* Check init variable of reduction and how it is initialized. */
430 if (TREE_CODE (re->init) == SSA_NAME)
431 {
432 producer = SSA_NAME_DEF_STMT (re->init);
433 re->producer = producer;
434 basic_block bb = gimple_bb (producer);
435 if (!bb || bb->loop_father != m_outer)
436 return;
437
438 if (!gimple_assign_load_p (producer))
439 return;
440
441 re->init_ref = gimple_assign_rhs1 (producer);
442 }
443 else if (!CONSTANT_CLASS_P (re->init))
444 return;
445
446 /* Check how reduction variable is used. */
447 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
448 if (!consumer
449 || !gimple_store_p (consumer))
450 return;
451
452 re->fini_ref = gimple_get_lhs (consumer);
453 re->consumer = consumer;
454
455 /* Simple reduction with constant initializer. */
456 if (!re->init_ref)
457 {
458 gcc_assert (CONSTANT_CLASS_P (re->init));
459 re->init_ref = unshare_expr (re->fini_ref);
460 }
461
462 /* Require memory references in producer and consumer are the same so
463 that we can undo reduction during interchange. */
464 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
465 return;
466
467 re->type = SIMPLE_RTYPE;
468}
469
470/* Analyze reduction variable VAR for inner loop of the loop nest to be
471 interchanged. Return true if analysis succeeds. */
472
473bool
474loop_cand::analyze_iloop_reduction_var (tree var)
475{
476 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
477 gphi *lcssa_phi = NULL, *use_phi;
478 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
479 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
480 reduction_p re;
481 gimple *stmt, *next_def, *single_use = NULL;
482 use_operand_p use_p;
483 imm_use_iterator iterator;
484
485 if (TREE_CODE (next) != SSA_NAME)
486 return false;
487
488 next_def = SSA_NAME_DEF_STMT (next);
489 basic_block bb = gimple_bb (next_def);
490 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
491 return false;
492
493 /* In restricted reduction, the var is (and must be) used in defining
494 the updated var. The process can be depicted as below:
495
496 var ;; = PHI<init, next>
497 |
498 |
499 v
500 +---------------------+
501 | reduction operators | <-- other operands
502 +---------------------+
503 |
504 |
505 v
506 next
507
508 In terms loop interchange, we don't change how NEXT is computed based
509 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
510 to be interchanged, we don't changed it at all. In the case of simple
511 reduction in inner loop, we only make change how VAR/NEXT is loaded or
512 stored. With these conditions, we can relax restrictions on reduction
513 in a way that reduction operation is seen as black box. In general,
514 we can ignore reassociation of reduction operator; we can handle fake
515 reductions in which VAR is not even used to compute NEXT. */
516 if (! single_imm_use (var, &use_p, &single_use)
517 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
518 return false;
519
520 /* Check the reduction operation. We require a left-associative operation.
521 For FP math we also need to be allowed to associate operations. */
522 if (gassign *ass = dyn_cast <gassign *> (single_use))
523 {
524 enum tree_code code = gimple_assign_rhs_code (ass);
525 if (! (associative_tree_code (code)
526 || (code == MINUS_EXPR
527 && use_p->use == gimple_assign_rhs1_ptr (ass)))
528 || (FLOAT_TYPE_P (TREE_TYPE (var))
529 && ! flag_associative_math))
530 return false;
531 }
532 else
533 return false;
534
535 /* Handle and verify a series of stmts feeding the reduction op. */
536 if (single_use != next_def
537 && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
538 gimple_assign_rhs_code (single_use)))
539 return false;
540
541 /* Only support cases in which INIT is used in inner loop. */
542 if (TREE_CODE (init) == SSA_NAME)
543 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
544 {
545 stmt = USE_STMT (use_p);
546 if (is_gimple_debug (stmt))
547 continue;
548
549 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
550 return false;
551 }
552
553 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
554 {
555 stmt = USE_STMT (use_p);
556 if (is_gimple_debug (stmt))
557 continue;
558
559 /* Or else it's used in PHI itself. */
560 use_phi = dyn_cast <gphi *> (stmt);
561 if (use_phi == phi)
562 continue;
563
564 if (use_phi != NULL
565 && lcssa_phi == NULL
566 && gimple_bb (stmt) == m_exit->dest
567 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
568 lcssa_phi = use_phi;
569 else
570 return false;
571 }
572 if (!lcssa_phi)
573 return false;
574
575 re = XCNEW (struct reduction);
576 re->var = var;
577 re->init = init;
578 re->next = next;
579 re->phi = phi;
580 re->lcssa_phi = lcssa_phi;
581
582 classify_simple_reduction (re);
583
584 if (dump_file && (dump_flags & TDF_DETAILS))
585 dump_reduction (re);
586
587 m_reductions.safe_push (re);
588 return true;
589}
590
591/* Analyze reduction variable VAR for outer loop of the loop nest to be
592 interchanged. ILOOP is not NULL and points to inner loop. For the
593 moment, we only support double reduction for outer loop, like:
594
595 for (int i = 0; i < n; i++)
596 {
597 int sum = 0;
598
599 for (int j = 0; j < n; j++) // outer loop
600 for (int k = 0; k < n; k++) // inner loop
601 sum += a[i][k]*b[k][j];
602
603 s[i] = sum;
604 }
605
606 Note the innermost two loops are the loop nest to be interchanged.
607 Return true if analysis succeeds. */
608
609bool
610loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
611{
612 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
613 gphi *lcssa_phi = NULL, *use_phi;
614 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
615 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
616 reduction_p re;
617 gimple *stmt, *next_def;
618 use_operand_p use_p;
619 imm_use_iterator iterator;
620
621 if (TREE_CODE (next) != SSA_NAME)
622 return false;
623
624 next_def = SSA_NAME_DEF_STMT (next);
625 basic_block bb = gimple_bb (next_def);
626 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
627 return false;
628
629 /* Find inner loop's simple reduction that uses var as initializer. */
630 reduction_p inner_re = NULL;
631 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
632 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
633 break;
634
635 if (inner_re == NULL
636 || inner_re->type != UNKNOWN_RTYPE
637 || inner_re->producer != phi)
638 return false;
639
640 /* In case of double reduction, outer loop's reduction should be updated
641 by inner loop's simple reduction. */
642 if (next_def != inner_re->lcssa_phi)
643 return false;
644
645 /* Outer loop's reduction should only be used to initialize inner loop's
646 simple reduction. */
647 if (! single_imm_use (var, &use_p, &stmt)
648 || stmt != inner_re->phi)
649 return false;
650
651 /* Check this reduction is correctly used outside of loop via lcssa phi. */
652 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
653 {
654 stmt = USE_STMT (use_p);
655 if (is_gimple_debug (stmt))
656 continue;
657
658 /* Or else it's used in PHI itself. */
659 use_phi = dyn_cast <gphi *> (stmt);
660 if (use_phi == phi)
661 continue;
662
663 if (lcssa_phi == NULL
664 && use_phi != NULL
665 && gimple_bb (stmt) == m_exit->dest
666 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
667 lcssa_phi = use_phi;
668 else
669 return false;
670 }
671 if (!lcssa_phi)
672 return false;
673
674 re = XCNEW (struct reduction);
675 re->var = var;
676 re->init = init;
677 re->next = next;
678 re->phi = phi;
679 re->lcssa_phi = lcssa_phi;
680 re->type = DOUBLE_RTYPE;
681 inner_re->type = DOUBLE_RTYPE;
682
683 if (dump_file && (dump_flags & TDF_DETAILS))
684 dump_reduction (re);
685
686 m_reductions.safe_push (re);
687 return true;
688}
689
690/* Return true if VAR is induction variable of current loop whose scev is
691 specified by CHREC. */
692
693bool
694loop_cand::analyze_induction_var (tree var, tree chrec)
695{
696 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
697 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
698
699 /* Var is loop invariant, though it's unlikely to happen. */
700 if (tree_does_not_contain_chrecs (chrec))
701 {
702 struct induction *iv = XCNEW (struct induction);
703 iv->var = var;
704 iv->init_val = init;
705 iv->init_expr = chrec;
706 iv->step = build_int_cst (TREE_TYPE (chrec), 0);
707 m_inductions.safe_push (iv);
708 return true;
709 }
710
711 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
712 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
713 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
714 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
715 return false;
716
717 struct induction *iv = XCNEW (struct induction);
718 iv->var = var;
719 iv->init_val = init;
720 iv->init_expr = CHREC_LEFT (chrec);
721 iv->step = CHREC_RIGHT (chrec);
722
723 if (dump_file && (dump_flags & TDF_DETAILS))
724 dump_induction (m_loop, iv);
725
726 m_inductions.safe_push (iv);
727 return true;
728}
729
730/* Return true if all loop carried variables defined in loop header can
731 be successfully analyzed. */
732
733bool
734loop_cand::analyze_carried_vars (loop_cand *iloop)
735{
736 edge e = loop_preheader_edge (m_outer);
737 gphi_iterator gsi;
738
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
741
742 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
743 {
744 gphi *phi = gsi.phi ();
745
746 tree var = PHI_RESULT (phi);
747 if (virtual_operand_p (var))
748 continue;
749
750 tree chrec = analyze_scalar_evolution (m_loop, var);
751 chrec = instantiate_scev (e, m_loop, chrec);
752
753 /* Analyze var as reduction variable. */
754 if (chrec_contains_undetermined (chrec)
755 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
756 {
757 if (iloop && !analyze_oloop_reduction_var (iloop, var))
758 return false;
759 if (!iloop && !analyze_iloop_reduction_var (var))
760 return false;
761 }
762 /* Analyze var as induction variable. */
763 else if (!analyze_induction_var (var, chrec))
764 return false;
765 }
766
767 return true;
768}
769
770/* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
771
772bool
773loop_cand::analyze_lcssa_phis (void)
774{
775 gphi_iterator gsi;
776 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
777 {
778 gphi *phi = gsi.phi ();
779
780 if (virtual_operand_p (PHI_RESULT (phi)))
781 continue;
782
783 /* TODO: We only support lcssa phi for reduction for now. */
784 if (!find_reduction_by_stmt (phi))
785 return false;
786 }
787
788 return true;
789}
790
791/* CONSUMER is a stmt in BB storing reduction result into memory object.
792 When the reduction is intialized from constant value, we need to add
793 a stmt loading from the memory object to target basic block in inner
794 loop during undoing the reduction. Problem is that memory reference
795 may use ssa variables not dominating the target basic block. This
796 function finds all stmts on which CONSUMER depends in basic block BB,
797 records and returns them via STMTS. */
798
799static void
800find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
801{
802 auto_vec<gimple *, 4> worklist;
803 use_operand_p use_p;
804 ssa_op_iter iter;
805 gimple *stmt, *def_stmt;
806 gimple_stmt_iterator gsi;
807
808 /* First clear flag for stmts in bb. */
809 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
810 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
811
812 /* DFS search all depended stmts in bb and mark flag for these stmts. */
813 worklist.safe_push (consumer);
814 while (!worklist.is_empty ())
815 {
816 stmt = worklist.pop ();
817 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
818 {
819 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
820
821 if (is_a <gphi *> (def_stmt)
822 || gimple_bb (def_stmt) != bb
823 || gimple_plf (def_stmt, GF_PLF_1))
824 continue;
825
826 worklist.safe_push (def_stmt);
827 }
828 gimple_set_plf (stmt, GF_PLF_1, true);
829 }
830 for (gsi = gsi_start_nondebug_bb (bb);
831 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
832 {
833 /* Move dep stmts to sequence STMTS. */
834 if (gimple_plf (stmt, GF_PLF_1))
835 {
836 gsi_remove (&gsi, false);
837 gimple_seq_add_stmt_without_update (stmts, stmt);
838 }
839 else
840 gsi_next_nondebug (&gsi);
841 }
842}
843
844/* User can write, optimizers can generate simple reduction RE for inner
845 loop. In order to make interchange valid, we have to undo reduction by
846 moving producer and consumer stmts into the inner loop. For example,
847 below code:
848
849 init = MEM_REF[idx]; //producer
850 loop:
851 var = phi<init, next>
852 next = var op ...
853 reduc_sum = phi<next>
854 MEM_REF[idx] = reduc_sum //consumer
855
856 is transformed into:
857
858 loop:
859 new_var = MEM_REF[idx]; //producer after moving
860 next = new_var op ...
861 MEM_REF[idx] = next; //consumer after moving
862
863 Note if the reduction variable is initialized to constant, like:
864
865 var = phi<0.0, next>
866
867 we compute new_var as below:
868
869 loop:
870 tmp = MEM_REF[idx];
871 new_var = !first_iteration ? tmp : 0.0;
872
873 so that the initial const is used in the first iteration of loop. Also
874 record ssa variables for dead code elimination in DCE_SEEDS. */
875
876void
877loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
878{
879 gimple *stmt;
880 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
881 gimple_seq stmts = NULL;
882 tree new_var;
883
884 /* Prepare the initialization stmts and insert it to inner loop. */
885 if (re->producer != NULL)
886 {
887 gimple_set_vuse (re->producer, NULL_TREE);
888 from = gsi_for_stmt (re->producer);
889 gsi_remove (&from, false);
890 gimple_seq_add_stmt_without_update (&stmts, re->producer);
891 new_var = re->init;
892 }
893 else
894 {
895 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
896 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
897 /* Because we generate new stmt loading from the MEM_REF to TMP. */
898 tree cond, tmp = copy_ssa_name (re->var);
899 stmt = gimple_build_assign (tmp, re->init_ref);
900 gimple_seq_add_stmt_without_update (&stmts, stmt);
901
902 /* Init new_var to MEM_REF or CONST depending on if it is the first
903 iteration. */
904 induction_p iv = m_inductions[0];
905 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
906 new_var = copy_ssa_name (re->var);
907 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
908 gimple_seq_add_stmt_without_update (&stmts, stmt);
909 }
910 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
911
912 /* Replace all uses of reduction var with new variable. */
913 use_operand_p use_p;
914 imm_use_iterator iterator;
915 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
916 {
917 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
918 SET_USE (use_p, new_var);
919
920 update_stmt (stmt);
921 }
922
923 /* Move consumer stmt into inner loop, just after reduction next's def. */
924 unlink_stmt_vdef (re->consumer);
925 release_ssa_name (gimple_vdef (re->consumer));
926 gimple_set_vdef (re->consumer, NULL_TREE);
927 gimple_set_vuse (re->consumer, NULL_TREE);
928 gimple_assign_set_rhs1 (re->consumer, re->next);
929 from = gsi_for_stmt (re->consumer);
930 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
931 gsi_move_after (&from, &to);
932
933 /* Mark the reduction variables for DCE. */
934 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
935 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
936}
937
938/* Free DATAREFS and its auxiliary memory. */
939
940static void
941free_data_refs_with_aux (vec<data_reference_p> datarefs)
942{
943 data_reference_p dr;
944 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
945 if (dr->aux != NULL)
946 {
947 DR_ACCESS_STRIDE (dr)->release ();
948 delete (vec<tree> *) dr->aux;
949 }
950
951 free_data_refs (datarefs);
952}
953
954/* Class for loop interchange transformation. */
955
956class tree_loop_interchange
957{
958public:
959 tree_loop_interchange (vec<struct loop *> loop_nest)
960 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
961 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
962 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
963 bool interchange (vec<data_reference_p>, vec<ddr_p>);
964
965private:
966 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
967 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
968 void interchange_loops (loop_cand &, loop_cand &);
969 void map_inductions_to_loop (loop_cand &, loop_cand &);
970 void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
971
972 /* The whole loop nest in which interchange is ongoing. */
973 vec<struct loop *> m_loop_nest;
974 /* We create new IV which is only used in loop's exit condition check.
975 In case of 3-level loop nest interchange, when we interchange the
976 innermost two loops, new IV created in the middle level loop does
977 not need to be preserved in interchanging the outermost two loops
978 later. We record the IV so that it can be skipped. */
979 tree m_niters_iv_var;
980 /* Bitmap of seed variables for dead code elimination after interchange. */
981 bitmap m_dce_seeds;
982};
983
984/* Update data refs' access stride and dependence information after loop
985 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
986 nest. DATAREFS are data references. DDRS are data dependences. */
987
988void
989tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
990 vec<data_reference_p> datarefs,
991 vec<ddr_p> ddrs)
992{
993 struct data_reference *dr;
994 struct data_dependence_relation *ddr;
995
996 /* Update strides of data references. */
997 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
998 {
999 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1000 gcc_assert (stride->length () > i_idx);
1001 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
1002 }
1003 /* Update data dependences. */
1004 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1005 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1006 {
1007 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1008 {
1009 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1010 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1011 }
1012 }
1013}
1014
1015/* Check data dependence relations, return TRUE if it's valid to interchange
1016 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1017 loops is valid only if dist vector, after interchanging, doesn't have '>'
1018 as the leftmost non-'=' direction. Practically, this function have been
1019 conservative here by not checking some valid cases. */
1020
1021bool
1022tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1023 vec<ddr_p> ddrs)
1024{
1025 struct data_dependence_relation *ddr;
1026
1027 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1028 {
1029 /* Skip no-dependence case. */
1030 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1031 continue;
1032
1033 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1034 {
1035 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1036 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1037
1038 /* If there is no carried dependence. */
1039 if (level == 0)
1040 continue;
1041
1042 level --;
1043
1044 /* If dependence is not carried by any loop in between the two
1045 loops [oloop, iloop] to interchange. */
1046 if (level < o_idx || level > i_idx)
1047 continue;
1048
1049 /* Be conservative, skip case if either direction at i_idx/o_idx
1050 levels is not '=' or '<'. */
1051 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1052 return false;
1053 }
1054 }
1055
1056 return true;
1057}
1058
1059/* Interchange two loops specified by ILOOP and OLOOP. */
1060
1061void
1062tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1063{
1064 reduction_p re;
1065 gimple_stmt_iterator gsi;
1066 tree i_niters, o_niters, var_after;
1067
1068 /* Undo inner loop's simple reduction. */
1069 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1070 if (re->type != DOUBLE_RTYPE)
1071 {
1072 if (re->producer)
1073 reset_debug_uses (re->producer);
1074
1075 iloop.undo_simple_reduction (re, m_dce_seeds);
1076 }
1077
1078 /* Only need to reset debug uses for double reduction. */
1079 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1080 {
1081 gcc_assert (re->type == DOUBLE_RTYPE);
1082 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1083 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1084 }
1085
1086 /* Prepare niters for both loops. */
1087 struct loop *loop_nest = m_loop_nest[0];
1088 edge instantiate_below = loop_preheader_edge (loop_nest);
1089 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1090 i_niters = number_of_latch_executions (iloop.m_loop);
1091 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1092 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1093 i_niters);
1094 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1095 NULL_TREE, false, GSI_CONTINUE_LINKING);
1096 o_niters = number_of_latch_executions (oloop.m_loop);
1097 if (oloop.m_loop != loop_nest)
1098 {
1099 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1100 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1101 o_niters);
1102 }
1103 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1104 NULL_TREE, false, GSI_CONTINUE_LINKING);
1105
1106 /* Move src's code to tgt loop. This is necessary when src is the outer
1107 loop and tgt is the inner loop. */
1108 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1109
1110 /* Map outer loop's IV to inner loop, and vice versa. */
1111 map_inductions_to_loop (oloop, iloop);
1112 map_inductions_to_loop (iloop, oloop);
1113
1114 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1115 loop is actually from inner/outer loop. Also we record the new IV
1116 created for the outer loop so that it can be skipped in later loop
1117 interchange. */
1118 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1119 i_niters, &m_niters_iv_var, &var_after);
1120 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1121 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1122 o_niters, NULL, &var_after);
1123 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1124
1125 /* Scrap niters estimation of interchanged loops. */
1126 iloop.m_loop->any_upper_bound = false;
1127 iloop.m_loop->any_likely_upper_bound = false;
1128 free_numbers_of_iterations_estimates (iloop.m_loop);
1129 oloop.m_loop->any_upper_bound = false;
1130 oloop.m_loop->any_likely_upper_bound = false;
1131 free_numbers_of_iterations_estimates (oloop.m_loop);
1132
1133 /* ??? The association between the loop data structure and the
1134 CFG changed, so what was loop N at the source level is now
1135 loop M. We should think of retaining the association or breaking
1136 it fully by creating a new loop instead of re-using the "wrong" one. */
1137}
1138
1139/* Map induction variables of SRC loop to TGT loop. The function firstly
1140 creates the same IV of SRC loop in TGT loop, then deletes the original
1141 IV and re-initialize it using the newly created IV. For example, loop
1142 nest:
1143
1144 for (i = 0; i < N; i++)
1145 for (j = 0; j < M; j++)
1146 {
1147 //use of i;
1148 //use of j;
1149 }
1150
1151 will be transformed into:
1152
1153 for (jj = 0; jj < M; jj++)
1154 for (ii = 0; ii < N; ii++)
1155 {
1156 //use of ii;
1157 //use of jj;
1158 }
1159
1160 after loop interchange. */
1161
1162void
1163tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1164{
1165 induction_p iv;
1166 edge e = tgt.m_exit;
1167 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1168
1169 /* Map source loop's IV to target loop. */
1170 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1171 {
1172 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1173 gcc_assert (is_a <gphi *> (stmt));
1174
1175 use_operand_p use_p;
1176 /* Only map original IV to target loop. */
1177 if (m_niters_iv_var != iv->var)
1178 {
1179 /* Map the IV by creating the same one in target loop. */
1180 tree var_before, var_after;
1181 tree base = unshare_expr (iv->init_expr);
1182 tree step = unshare_expr (iv->step);
1183 create_iv (base, step, SSA_NAME_VAR (iv->var),
1184 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1185 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1186 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1187
1188 /* Replace uses of the original IV var with newly created IV var. */
1189 imm_use_iterator imm_iter;
1190 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1191 {
1192 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1193 SET_USE (use_p, var_before);
1194
1195 update_stmt (use_stmt);
1196 }
1197 }
1198
1199 /* Mark all uses for DCE. */
1200 ssa_op_iter op_iter;
1201 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1202 {
1203 tree use = USE_FROM_PTR (use_p);
1204 if (TREE_CODE (use) == SSA_NAME
1205 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1206 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1207 }
1208
1209 /* Delete definition of the original IV in the source loop. */
1210 gsi = gsi_for_stmt (stmt);
1211 remove_phi_node (&gsi, true);
1212 }
1213}
1214
1215/* Move stmts of outer loop to inner loop. */
1216
1217void
1218tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
1219 struct loop *inner,
1220 basic_block *outer_bbs)
1221{
1222 basic_block oloop_exit_bb = single_exit (outer)->src;
1223 gimple_stmt_iterator gsi, to;
1224
1225 for (unsigned i = 0; i < outer->num_nodes; i++)
1226 {
1227 basic_block bb = outer_bbs[i];
1228
1229 /* Skip basic blocks of inner loop. */
1230 if (flow_bb_inside_loop_p (inner, bb))
1231 continue;
1232
1233 /* Move code from header/latch to header/latch. */
1234 if (bb == outer->header)
1235 to = gsi_after_labels (inner->header);
1236 else if (bb == outer->latch)
1237 to = gsi_after_labels (inner->latch);
1238 else
1239 /* Otherwise, simply move to exit->src. */
1240 to = gsi_last_bb (single_exit (inner)->src);
1241
1242 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1243 {
1244 gimple *stmt = gsi_stmt (gsi);
1245
1246 if (oloop_exit_bb == bb
1247 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1248 {
1249 gsi_next (&gsi);
1250 continue;
1251 }
1252
1253 if (gimple_vuse (stmt))
1254 gimple_set_vuse (stmt, NULL_TREE);
1255 if (gimple_vdef (stmt))
1256 {
1257 unlink_stmt_vdef (stmt);
1258 release_ssa_name (gimple_vdef (stmt));
1259 gimple_set_vdef (stmt, NULL_TREE);
1260 }
1261
1262 reset_debug_uses (stmt);
1263 gsi_move_before (&gsi, &to);
1264 }
1265 }
1266}
1267
1268/* Given data reference DR in LOOP_NEST, the function computes DR's access
1269 stride at each level of loop from innermost LOOP to outer. On success,
1270 it saves access stride at each level loop in a vector which is pointed
1271 by DR->aux. For example:
1272
1273 int arr[100][100][100];
1274 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1275 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1276 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1277 arr[i][j - 1][k] = 0; */
1278
1279static void
1280compute_access_stride (struct loop *loop_nest, struct loop *loop,
1281 data_reference_p dr)
1282{
1283 vec<tree> *strides = new vec<tree> ();
1284 basic_block bb = gimple_bb (DR_STMT (dr));
1285
1286 while (!flow_bb_inside_loop_p (loop, bb))
1287 {
1288 strides->safe_push (build_int_cst (sizetype, 0));
1289 loop = loop_outer (loop);
1290 }
1291 gcc_assert (loop == bb->loop_father);
1292
1293 tree ref = DR_REF (dr);
1294 if (TREE_CODE (ref) == COMPONENT_REF
1295 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1296 {
1297 /* We can't take address of bitfields. If the bitfield is at constant
1298 offset from the start of the struct, just use address of the
1299 struct, for analysis of the strides that shouldn't matter. */
1300 if (!TREE_OPERAND (ref, 2)
1301 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1302 ref = TREE_OPERAND (ref, 0);
1303 /* Otherwise, if we have a bit field representative, use that. */
1304 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1305 != NULL_TREE)
1306 {
1307 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1308 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1309 repr, TREE_OPERAND (ref, 2));
1310 }
1311 /* Otherwise punt. */
1312 else
1313 {
1314 dr->aux = strides;
1315 return;
1316 }
1317 }
1318 tree scev_base = build_fold_addr_expr (ref);
1319 tree scev = analyze_scalar_evolution (loop, scev_base);
1320 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1321 if (! chrec_contains_undetermined (scev))
1322 {
1323 tree sl = scev;
1324 struct loop *expected = loop;
1325 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1326 {
1327 struct loop *sl_loop = get_chrec_loop (sl);
1328 while (sl_loop != expected)
1329 {
1330 strides->safe_push (size_int (0));
1331 expected = loop_outer (expected);
1332 }
1333 strides->safe_push (CHREC_RIGHT (sl));
1334 sl = CHREC_LEFT (sl);
1335 expected = loop_outer (expected);
1336 }
1337 if (! tree_contains_chrecs (sl, NULL))
1338 while (expected != loop_outer (loop_nest))
1339 {
1340 strides->safe_push (size_int (0));
1341 expected = loop_outer (expected);
1342 }
1343 }
1344
1345 dr->aux = strides;
1346}
1347
1348/* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1349 access strides with respect to each level loop for all data refs in
1350 DATAREFS from inner loop to outer loop. On success, it returns the
1351 outermost loop that access strides can be computed successfully for
1352 all data references. If access strides cannot be computed at least
1353 for two levels of loop for any data reference, it returns NULL. */
1354
1355static struct loop *
1356compute_access_strides (struct loop *loop_nest, struct loop *loop,
1357 vec<data_reference_p> datarefs)
1358{
1359 unsigned i, j, num_loops = (unsigned) -1;
1360 data_reference_p dr;
1361 vec<tree> *stride;
1362
1363 for (i = 0; datarefs.iterate (i, &dr); ++i)
1364 {
1365 compute_access_stride (loop_nest, loop, dr);
1366 stride = DR_ACCESS_STRIDE (dr);
1367 if (stride->length () < num_loops)
1368 {
1369 num_loops = stride->length ();
1370 if (num_loops < 2)
1371 return NULL;
1372 }
1373 }
1374
1375 for (i = 0; datarefs.iterate (i, &dr); ++i)
1376 {
1377 stride = DR_ACCESS_STRIDE (dr);
1378 if (stride->length () > num_loops)
1379 stride->truncate (num_loops);
1380
1381 for (j = 0; j < (num_loops >> 1); ++j)
1382 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1383 }
1384
1385 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1386 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1387 return loop;
1388}
1389
1390/* Prune access strides for data references in DATAREFS by removing strides
1391 of loops that isn't in current LOOP_NEST. */
1392
1393static void
1394prune_access_strides_not_in_loop (struct loop *loop_nest,
1395 struct loop *innermost,
1396 vec<data_reference_p> datarefs)
1397{
1398 data_reference_p dr;
1399 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1400 gcc_assert (num_loops > 1);
1401
1402 /* Block remove strides of loops that is not in current loop nest. */
1403 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1404 {
1405 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1406 if (stride->length () > num_loops)
1407 stride->block_remove (0, stride->length () - num_loops);
1408 }
1409}
1410
1411/* Dump access strides for all DATAREFS. */
1412
1413static void
1414dump_access_strides (vec<data_reference_p> datarefs)
1415{
1416 data_reference_p dr;
1417 fprintf (dump_file, "Access Strides for DRs:\n");
1418 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1419 {
1420 fprintf (dump_file, " ");
1421 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1422 fprintf (dump_file, ":\t\t<");
1423
1424 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1425 unsigned num_loops = stride->length ();
1426 for (unsigned j = 0; j < num_loops; ++j)
1427 {
1428 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1429 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1430 }
1431 }
1432}
1433
1434/* Return true if it's profitable to interchange two loops whose index
1435 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1436 computes and compares three types information from all DATAREFS:
1437 1) Access stride for loop I_IDX and O_IDX.
1438 2) Number of invariant memory references with respect to I_IDX before
1439 and after loop interchange.
1440 3) Flags indicating if all memory references access sequential memory
1441 in ILOOP, before and after loop interchange.
1442 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1443 innermost loops in loop nest. This function also dumps information if
1444 DUMP_INFO_P is true. */
1445
1446static bool
1447should_interchange_loops (unsigned i_idx, unsigned o_idx,
1448 vec<data_reference_p> datarefs,
1449 bool innermost_loops_p, bool dump_info_p = true)
1450{
1451 unsigned HOST_WIDE_INT ratio;
1452 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1453 struct data_reference *dr;
1454 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1455 widest_int iloop_strides = 0, oloop_strides = 0;
1456 unsigned num_unresolved_drs = 0;
1457 unsigned num_resolved_ok_drs = 0;
1458 unsigned num_resolved_not_ok_drs = 0;
1459
1460 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1461 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1462
1463 for (i = 0; datarefs.iterate (i, &dr); ++i)
1464 {
1465 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1466 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1467
1468 bool subloop_stride_p = false;
1469 /* Data ref can't be invariant or sequential access at current loop if
1470 its address changes with respect to any subloops. */
1471 for (j = i_idx + 1; j < stride->length (); ++j)
1472 if (!integer_zerop ((*stride)[j]))
1473 {
1474 subloop_stride_p = true;
1475 break;
1476 }
1477
1478 if (integer_zerop (iloop_stride))
1479 {
1480 if (!subloop_stride_p)
1481 num_old_inv_drs++;
1482 }
1483 if (integer_zerop (oloop_stride))
1484 {
1485 if (!subloop_stride_p)
1486 num_new_inv_drs++;
1487 }
1488
1489 if (TREE_CODE (iloop_stride) == INTEGER_CST
1490 && TREE_CODE (oloop_stride) == INTEGER_CST)
1491 {
1492 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1493 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1494 }
1495 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1496 iloop_stride, oloop_stride))
1497 num_resolved_ok_drs++;
1498 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1499 oloop_stride, iloop_stride))
1500 num_resolved_not_ok_drs++;
1501 else
1502 num_unresolved_drs++;
1503
1504 /* Data ref can't be sequential access if its address changes in sub
1505 loop. */
1506 if (subloop_stride_p)
1507 {
1508 all_seq_dr_before_p = false;
1509 all_seq_dr_after_p = false;
1510 continue;
1511 }
1512 /* Track if all data references are sequential accesses before/after loop
1513 interchange. Note invariant is considered sequential here. */
1514 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1515 if (all_seq_dr_before_p
1516 && ! (integer_zerop (iloop_stride)
1517 || operand_equal_p (access_size, iloop_stride, 0)))
1518 all_seq_dr_before_p = false;
1519 if (all_seq_dr_after_p
1520 && ! (integer_zerop (oloop_stride)
1521 || operand_equal_p (access_size, oloop_stride, 0)))
1522 all_seq_dr_after_p = false;
1523 }
1524
1525 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1526 {
1527 fprintf (dump_file, "\toverall:\t\t");
1528 print_decu (iloop_strides, dump_file);
1529 fprintf (dump_file, "\t");
1530 print_decu (oloop_strides, dump_file);
1531 fprintf (dump_file, "\n");
1532
1533 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1534 num_old_inv_drs, num_new_inv_drs);
1535 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1536 all_seq_dr_before_p ? "true" : "false",
1537 all_seq_dr_after_p ? "true" : "false");
1538 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1539 num_resolved_ok_drs);
1540 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1541 num_resolved_not_ok_drs);
1542 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1543 num_unresolved_drs);
1544 }
1545
1546 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1547 return false;
1548
1549 /* We use different stride comparison ratio for interchanging innermost
1550 two loops or not. The idea is to be conservative in interchange for
1551 the innermost loops. */
1552 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1553 /* Do interchange if it gives better data locality behavior. */
1554 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1555 return true;
1556 if (wi::gtu_p (iloop_strides, oloop_strides))
1557 {
1558 /* Or it creates more invariant memory references. */
1559 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1560 && num_new_inv_drs > num_old_inv_drs)
1561 return true;
1562 /* Or it makes all memory references sequential. */
1563 if (num_new_inv_drs >= num_old_inv_drs
1564 && !all_seq_dr_before_p && all_seq_dr_after_p)
1565 return true;
1566 }
1567
1568 return false;
1569}
1570
1571/* Try to interchange inner loop of a loop nest to outer level. */
1572
1573bool
1574tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1575 vec<ddr_p> ddrs)
1576{
1577 location_t loc = find_loop_location (m_loop_nest[0]);
1578 bool changed_p = false;
1579 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1580 The overall effect is to push inner loop to outermost level in whole
1581 loop nest. */
1582 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1583 {
1584 unsigned i_idx = i - 1, o_idx = i - 2;
1585
1586 /* Check validity for loop interchange. */
1587 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1588 break;
1589
1590 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1591 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1592
1593 /* Check if we can do transformation for loop interchange. */
1594 if (!iloop.analyze_carried_vars (NULL)
1595 || !iloop.analyze_lcssa_phis ()
1596 || !oloop.analyze_carried_vars (&iloop)
1597 || !oloop.analyze_lcssa_phis ()
1598 || !iloop.can_interchange_p (NULL)
1599 || !oloop.can_interchange_p (&iloop))
1600 break;
1601
1602 /* Check profitability for loop interchange. */
1603 if (should_interchange_loops (i_idx, o_idx, datarefs,
1604 iloop.m_loop->inner == NULL))
1605 {
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file,
1608 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1609 oloop.m_loop->num, iloop.m_loop->num);
1610
1611 changed_p = true;
1612 interchange_loops (iloop, oloop);
1613 /* No need to update if there is no further loop interchange. */
1614 if (o_idx > 0)
1615 update_data_info (i_idx, o_idx, datarefs, ddrs);
1616 }
1617 else
1618 {
1619 if (dump_file && (dump_flags & TDF_DETAILS))
1620 fprintf (dump_file,
1621 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1622 oloop.m_loop->num, iloop.m_loop->num);
1623 }
1624 }
1625 simple_dce_from_worklist (m_dce_seeds);
1626
1627 if (changed_p)
1628 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1629 "loops interchanged in loop nest\n");
1630
1631 return changed_p;
1632}
1633
1634
1635/* Loop interchange pass. */
1636
1637namespace {
1638
1639const pass_data pass_data_linterchange =
1640{
1641 GIMPLE_PASS, /* type */
1642 "linterchange", /* name */
1643 OPTGROUP_LOOP, /* optinfo_flags */
1644 TV_LINTERCHANGE, /* tv_id */
1645 PROP_cfg, /* properties_required */
1646 0, /* properties_provided */
1647 0, /* properties_destroyed */
1648 0, /* todo_flags_start */
1649 0, /* todo_flags_finish */
1650};
1651
1652class pass_linterchange : public gimple_opt_pass
1653{
1654public:
1655 pass_linterchange (gcc::context *ctxt)
1656 : gimple_opt_pass (pass_data_linterchange, ctxt)
1657 {}
1658
1659 /* opt_pass methods: */
1660 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1661 virtual bool gate (function *) { return flag_loop_interchange; }
1662 virtual unsigned int execute (function *);
1663
1664}; // class pass_linterchange
1665
1666
1667/* Return true if LOOP has proper form for interchange. We check three
1668 conditions in the function:
1669 1) In general, a loop can be interchanged only if it doesn't have
1670 basic blocks other than header, exit and latch besides possible
1671 inner loop nest. This basically restricts loop interchange to
1672 below form loop nests:
1673
1674 header<---+
1675 | |
1676 v |
1677 INNER_LOOP |
1678 | |
1679 v |
1680 exit--->latch
1681
1682 2) Data reference in basic block that executes in different times
1683 than loop head/exit is not allowed.
1684 3) Record the innermost outer loop that doesn't form rectangle loop
1685 nest with LOOP. */
1686
1687static bool
1688proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
1689{
1690 edge e0, e1, exit;
1691
1692 /* Don't interchange if loop has unsupported information for the moment. */
1693 if (loop->safelen > 0
1694 || loop->constraints != 0
1695 || loop->can_be_parallel
1696 || loop->dont_vectorize
1697 || loop->force_vectorize
1698 || loop->in_oacc_kernels_region
1699 || loop->orig_loop_num != 0
1700 || loop->simduid != NULL_TREE)
1701 return false;
1702
1703 /* Don't interchange if outer loop has basic block other than header, exit
1704 and latch. */
1705 if (loop->inner != NULL
1706 && loop->num_nodes != loop->inner->num_nodes + 3)
1707 return false;
1708
1709 if ((exit = single_dom_exit (loop)) == NULL)
1710 return false;
1711
1712 /* Check control flow on loop header/exit blocks. */
1713 if (loop->header == exit->src
1714 && (EDGE_COUNT (loop->header->preds) != 2
1715 || EDGE_COUNT (loop->header->succs) != 2))
1716 return false;
1717 else if (loop->header != exit->src
1718 && (EDGE_COUNT (loop->header->preds) != 2
1719 || !single_succ_p (loop->header)
1720 || unsupported_edge (single_succ_edge (loop->header))
1721 || EDGE_COUNT (exit->src->succs) != 2
1722 || !single_pred_p (exit->src)
1723 || unsupported_edge (single_pred_edge (exit->src))))
1724 return false;
1725
1726 e0 = EDGE_PRED (loop->header, 0);
1727 e1 = EDGE_PRED (loop->header, 1);
1728 if (unsupported_edge (e0) || unsupported_edge (e1)
1729 || (e0->src != loop->latch && e1->src != loop->latch)
1730 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1731 return false;
1732
1733 e0 = EDGE_SUCC (exit->src, 0);
1734 e1 = EDGE_SUCC (exit->src, 1);
1735 if (unsupported_edge (e0) || unsupported_edge (e1)
1736 || (e0->dest != loop->latch && e1->dest != loop->latch)
1737 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1738 return false;
1739
1740 /* Don't interchange if any reference is in basic block that doesn't
1741 dominate exit block. */
1742 basic_block *bbs = get_loop_body (loop);
1743 for (unsigned i = 0; i < loop->num_nodes; i++)
1744 {
1745 basic_block bb = bbs[i];
1746
1747 if (bb->loop_father != loop
1748 || bb == loop->header || bb == exit->src
1749 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1750 continue;
1751
1752 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1753 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1754 if (gimple_vuse (gsi_stmt (gsi)))
1755 {
1756 free (bbs);
1757 return false;
1758 }
1759 }
1760 free (bbs);
1761
1762 tree niters = number_of_latch_executions (loop);
1763 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1764 if (!niters || chrec_contains_undetermined (niters))
1765 return false;
1766
1767 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1768 for (loop_p loop2 = loop_outer (loop);
1769 loop2 && flow_loop_nested_p (*min_outer, loop2);
1770 loop2 = loop_outer (loop2))
1771 {
1772 niters = instantiate_scev (loop_preheader_edge (loop2),
1773 loop_outer (loop), niters);
1774 if (!evolution_function_is_invariant_p (niters, loop2->num))
1775 {
1776 *min_outer = loop2;
1777 break;
1778 }
1779 }
1780 return true;
1781}
1782
1783/* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1784 should be interchanged by looking into all DATAREFS. */
1785
1786static bool
1787should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
1788 vec<data_reference_p> datarefs)
1789{
1790 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1791 gcc_assert (idx > 0);
1792
1793 /* Check if any two adjacent loops should be interchanged. */
1794 for (struct loop *loop = innermost;
1795 loop != loop_nest; loop = loop_outer (loop), idx--)
1796 if (should_interchange_loops (idx, idx - 1, datarefs,
1797 loop == innermost, false))
1798 return true;
1799
1800 return false;
1801}
1802
1803/* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1804 dependences for loop interchange and store it in DDRS. Note we compute
1805 dependences directly rather than call generic interface so that we can
1806 return on unknown dependence instantly. */
1807
1808static bool
1809tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1810 vec<data_reference_p> datarefs,
1811 vec<ddr_p> *ddrs)
1812{
1813 struct data_reference *a, *b;
1814 struct loop *innermost = loop_nest.last ();
1815
1816 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1817 {
1818 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1819 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1820 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1821 {
1822 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1823 /* Don't support multiple write references in outer loop. */
1824 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1825 return false;
1826
1827 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1828 ddrs->safe_push (ddr);
1829 compute_affine_dependence (ddr, loop_nest[0]);
1830
1831 /* Give up if ddr is unknown dependence or classic direct vector
1832 is not available. */
1833 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1834 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1835 && DDR_NUM_DIR_VECTS (ddr) == 0))
1836 return false;
1837
1838 /* If either data references is in outer loop of nest, we require
1839 no dependence here because the data reference need to be moved
1840 into inner loop during interchange. */
1841 if (a_outer_p && b_outer_p
1842 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1843 continue;
1844 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1845 && (a_outer_p || b_outer_p))
1846 return false;
1847 }
1848 }
1849
1850 return true;
1851}
1852
1853/* Prune DATAREFS by removing any data reference not inside of LOOP. */
1854
1855static inline void
1856prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
1857{
1858 unsigned i, j;
1859 struct data_reference *dr;
1860
1861 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1862 {
1863 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1864 datarefs[j++] = dr;
1865 else
1866 {
1867 if (dr->aux)
1868 {
1869 DR_ACCESS_STRIDE (dr)->release ();
1870 delete (vec<tree> *) dr->aux;
1871 }
1872 free_data_ref (dr);
1873 }
1874 }
1875 datarefs.truncate (j);
1876}
1877
1878/* Find and store data references in DATAREFS for LOOP nest. If there's
1879 difficult data reference in a basic block, we shrink the loop nest to
1880 inner loop of that basic block's father loop. On success, return the
1881 outer loop of the result loop nest. */
1882
1883static struct loop *
1884prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
1885{
1886 struct loop *loop_nest = loop;
1887 vec<data_reference_p> *bb_refs;
1888 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1889
1890 for (unsigned i = 0; i < loop->num_nodes; i++)
1891 bbs[i]->aux = NULL;
1892
1893 /* Find data references for all basic blocks. Shrink loop nest on difficult
1894 data reference. */
1895 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1896 {
1897 bb = bbs[i];
1898 if (!flow_bb_inside_loop_p (loop_nest, bb))
1899 continue;
1900
1901 bb_refs = new vec<data_reference_p> ();
1902 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1903 {
1904 loop_nest = bb->loop_father->inner;
1905 if (loop_nest && !loop_nest->inner)
1906 loop_nest = NULL;
1907
1908 free_data_refs (*bb_refs);
1909 delete bb_refs;
1910 }
1911 else if (bb_refs->is_empty ())
1912 delete bb_refs;
1913 else
1914 bb->aux = bb_refs;
1915 }
1916
1917 /* Collect all data references in loop nest. */
1918 for (unsigned i = 0; i < loop->num_nodes; i++)
1919 {
1920 bb = bbs[i];
1921 if (!bb->aux)
1922 continue;
1923
1924 bb_refs = (vec<data_reference_p> *) bb->aux;
1925 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1926 datarefs->safe_splice (*bb_refs);
1927 else
1928 free_data_refs (*bb_refs);
1929
1930 delete bb_refs;
1931 bb->aux = NULL;
1932 }
1933 free (bbs);
1934
1935 return loop_nest;
1936}
1937
1938/* Given innermost LOOP, return true if perfect loop nest can be found and
1939 data dependences can be computed. If succeed, record the perfect loop
1940 nest in LOOP_NEST; record all data references in DATAREFS and record all
1941 data dependence relations in DDRS.
1942
1943 We do support a restricted form of imperfect loop nest, i.e, loop nest
1944 with load/store in outer loop initializing/finalizing simple reduction
1945 of the innermost loop. For such outer loop reference, we require that
1946 it has no dependence with others sinve it will be moved to inner loop
1947 in interchange. */
1948
1949static bool
1950prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
1951 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1952{
1953 struct loop *start_loop = NULL, *innermost = loop;
1954 struct loop *outermost = loops_for_fn (cfun)->tree_root;
1955
1956 /* Find loop nest from the innermost loop. The outermost is the innermost
1957 outer*/
1958 while (loop->num != 0 && loop->inner == start_loop
1959 && flow_loop_nested_p (outermost, loop))
1960 {
1961 if (!proper_loop_form_for_interchange (loop, &outermost))
1962 break;
1963
1964 start_loop = loop;
1965 /* If this loop has sibling loop, the father loop won't be in perfect
1966 loop nest. */
1967 if (loop->next != NULL)
1968 break;
1969
1970 loop = loop_outer (loop);
1971 }
1972 if (!start_loop || !start_loop->inner)
1973 return false;
1974
1975 /* Prepare the data reference vector for the loop nest, pruning outer
1976 loops we cannot handle. */
1977 start_loop = prepare_data_references (start_loop, datarefs);
1978 if (!start_loop
1979 /* Check if there is no data reference. */
1980 || datarefs->is_empty ()
1981 /* Check if there are too many of data references. */
1982 || (int) datarefs->length () > MAX_DATAREFS)
1983 return false;
1984
1985 /* Compute access strides for all data references, pruning outer
1986 loops we cannot analyze refs in. */
1987 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
1988 if (!start_loop)
1989 return false;
1990
1991 /* Check if any interchange is profitable in the loop nest. */
1992 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
1993 return false;
1994
1995 /* Check if data dependences can be computed for loop nest starting from
1996 start_loop. */
1997 loop = start_loop;
1998 do {
1999 loop_nest->truncate (0);
2000
2001 if (loop != start_loop)
2002 prune_datarefs_not_in_loop (start_loop, *datarefs);
2003
2004 if (find_loop_nest (start_loop, loop_nest)
2005 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2006 {
2007 if (dump_file && (dump_flags & TDF_DETAILS))
2008 fprintf (dump_file,
2009 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2010 start_loop->num, innermost->num);
2011
2012 if (loop != start_loop)
2013 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2014
2015 if (dump_file && (dump_flags & TDF_DETAILS))
2016 dump_access_strides (*datarefs);
2017
2018 return true;
2019 }
2020
2021 free_dependence_relations (*ddrs);
2022 *ddrs = vNULL;
2023 /* Try to compute data dependences with the outermost loop stripped. */
2024 loop = start_loop;
2025 start_loop = start_loop->inner;
2026 } while (start_loop && start_loop->inner);
2027
2028 return false;
2029}
2030
2031/* Main entry for loop interchange pass. */
2032
2033unsigned int
2034pass_linterchange::execute (function *fun)
2035{
2036 if (number_of_loops (fun) <= 2)
2037 return 0;
2038
2039 bool changed_p = false;
2040 struct loop *loop;
2041 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2042 {
2043 vec<loop_p> loop_nest = vNULL;
2044 vec<data_reference_p> datarefs = vNULL;
2045 vec<ddr_p> ddrs = vNULL;
2046 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2047 {
2048 tree_loop_interchange loop_interchange (loop_nest);
2049 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2050 }
2051 free_dependence_relations (ddrs);
2052 free_data_refs_with_aux (datarefs);
2053 loop_nest.release ();
2054 }
2055
2056 if (changed_p)
2057 scev_reset_htab ();
2058
2059 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2060}
2061
2062} // anon namespace
2063
2064gimple_opt_pass *
2065make_pass_linterchange (gcc::context *ctxt)
2066{
2067 return new pass_linterchange (ctxt);
2068}
2069