1/* If-conversion for vectorizer.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
24
25 A short description of if-conversion:
26
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
37
38 Sample transformation:
39
40 INPUT
41 -----
42
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
47
48 <L17>:;
49 goto <bb 3> (<L3>);
50
51 <L1>:;
52
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
58
59 <L19>:;
60 goto <bb 1> (<L0>);
61
62 <L18>:;
63
64 OUTPUT
65 ------
66
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
70
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
76
77 <L19>:;
78 goto <bb 1> (<L0>);
79
80 <L18>:;
81*/
82
83#include "config.h"
84#include "system.h"
85#include "coretypes.h"
86#include "backend.h"
87#include "rtl.h"
88#include "tree.h"
89#include "gimple.h"
90#include "cfghooks.h"
91#include "tree-pass.h"
92#include "ssa.h"
93#include "expmed.h"
94#include "expr.h"
95#include "optabs-tree.h"
96#include "gimple-pretty-print.h"
97#include "alias.h"
98#include "fold-const.h"
99#include "stor-layout.h"
100#include "gimple-iterator.h"
101#include "gimple-fold.h"
102#include "gimplify.h"
103#include "gimplify-me.h"
104#include "tree-cfg.h"
105#include "tree-into-ssa.h"
106#include "tree-ssa.h"
107#include "cfgloop.h"
108#include "tree-data-ref.h"
109#include "tree-scalar-evolution.h"
110#include "tree-ssa-loop.h"
111#include "tree-ssa-loop-niter.h"
112#include "tree-ssa-loop-ivopts.h"
113#include "tree-ssa-address.h"
114#include "dbgcnt.h"
115#include "tree-hash-traits.h"
116#include "varasm.h"
117#include "builtins.h"
118#include "cfganal.h"
119#include "internal-fn.h"
120#include "fold-const.h"
121#include "tree-ssa-sccvn.h"
122#include "tree-cfgcleanup.h"
123#include "tree-ssa-dse.h"
124#include "tree-vectorizer.h"
125#include "tree-eh.h"
126#include "cgraph.h"
127
128/* For lang_hooks.types.type_for_mode. */
129#include "langhooks.h"
130
131/* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133#define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
135
136/* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140static bool need_to_predicate;
141
142/* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145static bool need_to_rewrite_undefined;
146
147/* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151static bool any_complicated_phi;
152
153/* True if we have bitfield accesses we can lower. */
154static bool need_to_lower_bitfields;
155
156/* True if there is any ifcvting to be done. */
157static bool need_to_ifcvt;
158
159/* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
161
162struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163{
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
167};
168
169inline hashval_t
170innermost_loop_behavior_hash::hash (const value_type &e)
171{
172 hashval_t hash;
173
174 hash = iterative_hash_expr (tree: e->base_address, seed: 0);
175 hash = iterative_hash_expr (tree: e->offset, seed: hash);
176 hash = iterative_hash_expr (tree: e->init, seed: hash);
177 return iterative_hash_expr (tree: e->step, seed: hash);
178}
179
180inline bool
181innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
183{
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
193
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, flags: 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, flags: 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, flags: 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, flags: 0))
205 return false;
206
207 return true;
208}
209
210/* List of basic blocks in if-conversion-suitable order. */
211static basic_block *ifc_bbs;
212
213/* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
216
217/* Hash table to store <base reference, DR> pairs. */
218static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219
220/* List of redundant SSA names: the first should be replaced by the second. */
221static vec< std::pair<tree, tree> > redundant_ssa_names;
222
223/* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225struct bb_predicate {
226
227 /* The condition under which this basic block is executed. */
228 tree predicate;
229
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
234
235 /* Records the number of statements recorded into
236 PREDICATE_GIMPLIFIED_STMTS. */
237 unsigned no_predicate_stmts;
238};
239
240/* Returns true when the basic block BB has a predicate. */
241
242static inline bool
243bb_has_predicate (basic_block bb)
244{
245 return bb->aux != NULL;
246}
247
248/* Returns the gimplified predicate for basic block BB. */
249
250static inline tree
251bb_predicate (basic_block bb)
252{
253 return ((struct bb_predicate *) bb->aux)->predicate;
254}
255
256/* Sets the gimplified predicate COND for basic block BB. */
257
258static inline void
259set_bb_predicate (basic_block bb, tree cond)
260{
261 auto aux = (struct bb_predicate *) bb->aux;
262 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 && is_gimple_val (TREE_OPERAND (cond, 0)))
264 || is_gimple_val (cond));
265 aux->predicate = cond;
266 aux->no_predicate_stmts++;
267
268 if (dump_file && (dump_flags & TDF_DETAILS))
269 fprintf (stream: dump_file, format: "Recording block %d value %d\n", bb->index,
270 aux->no_predicate_stmts);
271}
272
273/* Returns the sequence of statements of the gimplification of the
274 predicate for basic block BB. */
275
276static inline gimple_seq
277bb_predicate_gimplified_stmts (basic_block bb)
278{
279 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
280}
281
282/* Sets the sequence of statements STMTS of the gimplification of the
283 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 counts. */
285
286static inline void
287set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 bool preserve_counts)
289{
290 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 if (stmts == NULL && !preserve_counts)
292 ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
293}
294
295/* Adds the sequence of statements STMTS to the sequence of statements
296 of the predicate for basic block BB. */
297
298static inline void
299add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
300{
301 /* We might have updated some stmts in STMTS via force_gimple_operand
302 calling fold_stmt and that producing multiple stmts. Delink immediate
303 uses so update_ssa after loop versioning doesn't get confused for
304 the not yet inserted predicates.
305 ??? This should go away once we reliably avoid updating stmts
306 not in any BB. */
307 for (gimple_stmt_iterator gsi = gsi_start (seq&: stmts);
308 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
309 {
310 gimple *stmt = gsi_stmt (i: gsi);
311 delink_stmt_imm_use (stmt);
312 gimple_set_modified (s: stmt, modifiedp: true);
313 ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
314 }
315 gimple_seq_add_seq_without_update
316 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
317}
318
319/* Return the number of statements the predicate of the basic block consists
320 of. */
321
322static inline unsigned
323get_bb_num_predicate_stmts (basic_block bb)
324{
325 return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
326}
327
328/* Initializes to TRUE the predicate of basic block BB. */
329
330static inline void
331init_bb_predicate (basic_block bb)
332{
333 bb->aux = XNEW (struct bb_predicate);
334 set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: false);
335 set_bb_predicate (bb, boolean_true_node);
336}
337
338/* Release the SSA_NAMEs associated with the predicate of basic block BB. */
339
340static inline void
341release_bb_predicate (basic_block bb)
342{
343 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 if (stmts)
345 {
346 /* Ensure that these stmts haven't yet been added to a bb. */
347 if (flag_checking)
348 for (gimple_stmt_iterator i = gsi_start (seq&: stmts);
349 !gsi_end_p (i); gsi_next (i: &i))
350 gcc_assert (! gimple_bb (gsi_stmt (i)));
351
352 /* Discard them. */
353 gimple_seq_discard (stmts);
354 set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: false);
355 }
356}
357
358/* Free the predicate of basic block BB. */
359
360static inline void
361free_bb_predicate (basic_block bb)
362{
363 if (!bb_has_predicate (bb))
364 return;
365
366 release_bb_predicate (bb);
367 free (ptr: bb->aux);
368 bb->aux = NULL;
369}
370
371/* Reinitialize predicate of BB with the true predicate. */
372
373static inline void
374reset_bb_predicate (basic_block bb)
375{
376 if (!bb_has_predicate (bb))
377 init_bb_predicate (bb);
378 else
379 {
380 release_bb_predicate (bb);
381 set_bb_predicate (bb, boolean_true_node);
382 }
383}
384
385/* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 the expression EXPR. Inserts the statement created for this
387 computation before GSI and leaves the iterator GSI at the same
388 statement. */
389
390static tree
391ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
392{
393 tree new_name = make_temp_ssa_name (type, NULL, name: "_ifc_");
394 gimple *stmt = gimple_build_assign (new_name, expr);
395 gimple_set_vuse (g: stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi)));
396 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 return new_name;
398}
399
400/* Return true when COND is a false predicate. */
401
402static inline bool
403is_false_predicate (tree cond)
404{
405 return (cond != NULL_TREE
406 && (cond == boolean_false_node
407 || integer_zerop (cond)));
408}
409
410/* Return true when COND is a true predicate. */
411
412static inline bool
413is_true_predicate (tree cond)
414{
415 return (cond == NULL_TREE
416 || cond == boolean_true_node
417 || integer_onep (cond));
418}
419
420/* Returns true when BB has a predicate that is not trivial: true or
421 NULL_TREE. */
422
423static inline bool
424is_predicated (basic_block bb)
425{
426 return !is_true_predicate (cond: bb_predicate (bb));
427}
428
429/* Parses the predicate COND and returns its comparison code and
430 operands OP0 and OP1. */
431
432static enum tree_code
433parse_predicate (tree cond, tree *op0, tree *op1)
434{
435 gimple *s;
436
437 if (TREE_CODE (cond) == SSA_NAME
438 && is_gimple_assign (gs: s = SSA_NAME_DEF_STMT (cond)))
439 {
440 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
441 {
442 *op0 = gimple_assign_rhs1 (gs: s);
443 *op1 = gimple_assign_rhs2 (gs: s);
444 return gimple_assign_rhs_code (gs: s);
445 }
446
447 else if (gimple_assign_rhs_code (gs: s) == TRUTH_NOT_EXPR)
448 {
449 tree op = gimple_assign_rhs1 (gs: s);
450 tree type = TREE_TYPE (op);
451 enum tree_code code = parse_predicate (cond: op, op0, op1);
452
453 return code == ERROR_MARK ? ERROR_MARK
454 : invert_tree_comparison (code, HONOR_NANS (type));
455 }
456
457 return ERROR_MARK;
458 }
459
460 if (COMPARISON_CLASS_P (cond))
461 {
462 *op0 = TREE_OPERAND (cond, 0);
463 *op1 = TREE_OPERAND (cond, 1);
464 return TREE_CODE (cond);
465 }
466
467 return ERROR_MARK;
468}
469
470/* Returns the fold of predicate C1 OR C2 at location LOC. */
471
472static tree
473fold_or_predicates (location_t loc, tree c1, tree c2)
474{
475 tree op1a, op1b, op2a, op2b;
476 enum tree_code code1 = parse_predicate (cond: c1, op0: &op1a, op1: &op1b);
477 enum tree_code code2 = parse_predicate (cond: c2, op0: &op2a, op1: &op2b);
478
479 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
480 {
481 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 code2, op2a, op2b);
483 if (t)
484 return t;
485 }
486
487 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
488}
489
490/* Returns either a COND_EXPR or the folded expression if the folded
491 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 a constant or a SSA_NAME. */
493
494static tree
495fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
496{
497 /* If COND is comparison r != 0 and r has boolean type, convert COND
498 to SSA_NAME to accept by vect bool pattern. */
499 if (TREE_CODE (cond) == NE_EXPR)
500 {
501 tree op0 = TREE_OPERAND (cond, 0);
502 tree op1 = TREE_OPERAND (cond, 1);
503 if (TREE_CODE (op0) == SSA_NAME
504 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
505 && (integer_zerop (op1)))
506 cond = op0;
507 }
508
509 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
510 type, cond, rhs, lhs);
511 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
512 {
513 if (gimple_simplified_result_is_gimple_val (op: &cexpr))
514 return cexpr.ops[0];
515 else if (cexpr.code == ABS_EXPR)
516 return build1 (ABS_EXPR, type, cexpr.ops[0]);
517 else if (cexpr.code == MIN_EXPR
518 || cexpr.code == MAX_EXPR)
519 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
520 }
521
522 return build3 (COND_EXPR, type, cond, rhs, lhs);
523}
524
525/* Add condition NC to the predicate list of basic block BB. LOOP is
526 the loop to be if-converted. Use predicate of cd-equivalent block
527 for join bb if it exists: we call basic blocks bb1 and bb2
528 cd-equivalent if they are executed under the same condition. */
529
530static inline void
531add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
532{
533 tree bc, *tp;
534 basic_block dom_bb;
535
536 if (is_true_predicate (cond: nc))
537 return;
538
539 /* If dominance tells us this basic block is always executed,
540 don't record any predicates for it. */
541 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
542 return;
543
544 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
545 /* We use notion of cd equivalence to get simpler predicate for
546 join block, e.g. if join block has 2 predecessors with predicates
547 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
548 p1 & p2 | p1 & !p2. */
549 if (dom_bb != loop->header
550 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
551 {
552 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
553 bc = bb_predicate (bb: dom_bb);
554 if (!is_true_predicate (cond: bc))
555 set_bb_predicate (bb, cond: bc);
556 else
557 gcc_assert (is_true_predicate (bb_predicate (bb)));
558 if (dump_file && (dump_flags & TDF_DETAILS))
559 fprintf (stream: dump_file, format: "Use predicate of bb#%d for bb#%d\n",
560 dom_bb->index, bb->index);
561 return;
562 }
563
564 if (!is_predicated (bb))
565 bc = nc;
566 else
567 {
568 bc = bb_predicate (bb);
569 bc = fold_or_predicates (EXPR_LOCATION (bc), c1: nc, c2: bc);
570 if (is_true_predicate (cond: bc))
571 {
572 reset_bb_predicate (bb);
573 return;
574 }
575 }
576
577 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
578 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
579 tp = &TREE_OPERAND (bc, 0);
580 else
581 tp = &bc;
582 if (!is_gimple_val (*tp))
583 {
584 gimple_seq stmts;
585 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
586 add_bb_predicate_gimplified_stmts (bb, stmts);
587 }
588 set_bb_predicate (bb, cond: bc);
589}
590
591/* Add the condition COND to the previous condition PREV_COND, and add
592 this to the predicate list of the destination of edge E. LOOP is
593 the loop to be if-converted. */
594
595static void
596add_to_dst_predicate_list (class loop *loop, edge e,
597 tree prev_cond, tree cond)
598{
599 if (!flow_bb_inside_loop_p (loop, e->dest))
600 return;
601
602 if (!is_true_predicate (cond: prev_cond))
603 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
604 prev_cond, cond);
605
606 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
607 add_to_predicate_list (loop, bb: e->dest, nc: cond);
608}
609
610/* Return true if one of the successor edges of BB exits LOOP. */
611
612static bool
613bb_with_exit_edge_p (const class loop *loop, basic_block bb)
614{
615 edge e;
616 edge_iterator ei;
617
618 FOR_EACH_EDGE (e, ei, bb->succs)
619 if (loop_exit_edge_p (loop, e))
620 return true;
621
622 return false;
623}
624
625/* Given PHI which has more than two arguments, this function checks if
626 it's if-convertible by degenerating its arguments. Specifically, if
627 below two conditions are satisfied:
628
629 1) Number of PHI arguments with different values equals to 2 and one
630 argument has the only occurrence.
631 2) The edge corresponding to the unique argument isn't critical edge.
632
633 Such PHI can be handled as PHIs have only two arguments. For example,
634 below PHI:
635
636 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
637
638 can be transformed into:
639
640 res = (predicate of e3) ? A_2 : A_1;
641
642 Return TRUE if it is the case, FALSE otherwise. */
643
644static bool
645phi_convertible_by_degenerating_args (gphi *phi)
646{
647 edge e;
648 tree arg, t1 = NULL, t2 = NULL;
649 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
650 unsigned int num_args = gimple_phi_num_args (gs: phi);
651
652 gcc_assert (num_args > 2);
653
654 for (i = 0; i < num_args; i++)
655 {
656 arg = gimple_phi_arg_def (gs: phi, index: i);
657 if (t1 == NULL || operand_equal_p (t1, arg, flags: 0))
658 {
659 n1++;
660 i1 = i;
661 t1 = arg;
662 }
663 else if (t2 == NULL || operand_equal_p (t2, arg, flags: 0))
664 {
665 n2++;
666 i2 = i;
667 t2 = arg;
668 }
669 else
670 return false;
671 }
672
673 if (n1 != 1 && n2 != 1)
674 return false;
675
676 /* Check if the edge corresponding to the unique arg is critical. */
677 e = gimple_phi_arg_edge (phi, i: (n1 == 1) ? i1 : i2);
678 if (EDGE_COUNT (e->src->succs) > 1)
679 return false;
680
681 return true;
682}
683
684/* Return true when PHI is if-convertible. PHI is part of loop LOOP
685 and it belongs to basic block BB. Note at this point, it is sure
686 that PHI is if-convertible. This function updates global variable
687 ANY_COMPLICATED_PHI if PHI is complicated. */
688
689static bool
690if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
691{
692 if (dump_file && (dump_flags & TDF_DETAILS))
693 {
694 fprintf (stream: dump_file, format: "-------------------------\n");
695 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
696 }
697
698 if (bb != loop->header
699 && gimple_phi_num_args (gs: phi) > 2
700 && !phi_convertible_by_degenerating_args (phi))
701 any_complicated_phi = true;
702
703 return true;
704}
705
706/* Records the status of a data reference. This struct is attached to
707 each DR->aux field. */
708
709struct ifc_dr {
710 bool rw_unconditionally;
711 bool w_unconditionally;
712 bool written_at_least_once;
713
714 tree rw_predicate;
715 tree w_predicate;
716 tree base_w_predicate;
717};
718
719#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
720#define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
721#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
722#define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
723
724/* Iterates over DR's and stores refs, DR and base refs, DR pairs in
725 HASH tables. While storing them in HASH table, it checks if the
726 reference is unconditionally read or written and stores that as a flag
727 information. For base reference it checks if it is written atlest once
728 unconditionally and stores it as flag information along with DR.
729 In other words for every data reference A in STMT there exist other
730 accesses to a data reference with the same base with predicates that
731 add up (OR-up) to the true predicate: this ensures that the data
732 reference A is touched (read or written) on every iteration of the
733 if-converted loop. */
734static void
735hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
736{
737
738 data_reference_p *master_dr, *base_master_dr;
739 tree base_ref = DR_BASE_OBJECT (a);
740 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
741 tree ca = bb_predicate (bb: gimple_bb (DR_STMT (a)));
742 bool exist1, exist2;
743
744 master_dr = &innermost_DR_map->get_or_insert (k: innermost, existed: &exist1);
745 if (!exist1)
746 *master_dr = a;
747
748 if (DR_IS_WRITE (a))
749 {
750 IFC_DR (*master_dr)->w_predicate
751 = fold_or_predicates (UNKNOWN_LOCATION, c1: ca,
752 IFC_DR (*master_dr)->w_predicate);
753 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
754 DR_W_UNCONDITIONALLY (*master_dr) = true;
755 }
756 IFC_DR (*master_dr)->rw_predicate
757 = fold_or_predicates (UNKNOWN_LOCATION, c1: ca,
758 IFC_DR (*master_dr)->rw_predicate);
759 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
760 DR_RW_UNCONDITIONALLY (*master_dr) = true;
761
762 if (DR_IS_WRITE (a))
763 {
764 base_master_dr = &baseref_DR_map->get_or_insert (k: base_ref, existed: &exist2);
765 if (!exist2)
766 *base_master_dr = a;
767 IFC_DR (*base_master_dr)->base_w_predicate
768 = fold_or_predicates (UNKNOWN_LOCATION, c1: ca,
769 IFC_DR (*base_master_dr)->base_w_predicate);
770 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
771 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
772 }
773}
774
775/* Return TRUE if can prove the index IDX of an array reference REF is
776 within array bound. Return false otherwise. */
777
778static bool
779idx_within_array_bound (tree ref, tree *idx, void *dta)
780{
781 wi::overflow_type overflow;
782 widest_int niter, valid_niter, delta, wi_step;
783 tree ev, init, step;
784 tree low, high;
785 class loop *loop = (class loop*) dta;
786
787 /* Only support within-bound access for array references. */
788 if (TREE_CODE (ref) != ARRAY_REF)
789 return false;
790
791 /* For arrays that might have flexible sizes, it is not guaranteed that they
792 do not extend over their declared size. */
793 if (array_ref_flexible_size_p (ref))
794 return false;
795
796 ev = analyze_scalar_evolution (loop, *idx);
797 ev = instantiate_parameters (loop, chrec: ev);
798 init = initial_condition (ev);
799 step = evolution_part_in_loop_num (ev, loop->num);
800
801 if (!init || TREE_CODE (init) != INTEGER_CST
802 || (step && TREE_CODE (step) != INTEGER_CST))
803 return false;
804
805 low = array_ref_low_bound (ref);
806 high = array_ref_up_bound (ref);
807
808 /* The case of nonconstant bounds could be handled, but it would be
809 complicated. */
810 if (TREE_CODE (low) != INTEGER_CST
811 || !high || TREE_CODE (high) != INTEGER_CST)
812 return false;
813
814 /* Check if the intial idx is within bound. */
815 if (wi::to_widest (t: init) < wi::to_widest (t: low)
816 || wi::to_widest (t: init) > wi::to_widest (t: high))
817 return false;
818
819 /* The idx is always within bound. */
820 if (!step || integer_zerop (step))
821 return true;
822
823 if (!max_loop_iterations (loop, &niter))
824 return false;
825
826 if (wi::to_widest (t: step) < 0)
827 {
828 delta = wi::to_widest (t: init) - wi::to_widest (t: low);
829 wi_step = -wi::to_widest (t: step);
830 }
831 else
832 {
833 delta = wi::to_widest (t: high) - wi::to_widest (t: init);
834 wi_step = wi::to_widest (t: step);
835 }
836
837 valid_niter = wi::div_floor (x: delta, y: wi_step, sgn: SIGNED, overflow: &overflow);
838 /* The iteration space of idx is within array bound. */
839 if (!overflow && niter <= valid_niter)
840 return true;
841
842 return false;
843}
844
845/* Return TRUE if ref is a within bound array reference. */
846
847bool
848ref_within_array_bound (gimple *stmt, tree ref)
849{
850 class loop *loop = loop_containing_stmt (stmt);
851
852 gcc_assert (loop != NULL);
853 return for_each_index (&ref, idx_within_array_bound, loop);
854}
855
856
857/* Given a memory reference expression T, return TRUE if base object
858 it refers to is writable. The base object of a memory reference
859 is the main object being referenced, which is returned by function
860 get_base_address. */
861
862static bool
863base_object_writable (tree ref)
864{
865 tree base_tree = get_base_address (t: ref);
866
867 return (base_tree
868 && DECL_P (base_tree)
869 && decl_binds_to_current_def_p (base_tree)
870 && !TREE_READONLY (base_tree));
871}
872
873/* Return true when the memory references of STMT won't trap in the
874 if-converted code. There are two things that we have to check for:
875
876 - writes to memory occur to writable memory: if-conversion of
877 memory writes transforms the conditional memory writes into
878 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
879 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
880 be executed at all in the original code, it may be a readonly
881 memory. To check that A is not const-qualified, we check that
882 there exists at least an unconditional write to A in the current
883 function.
884
885 - reads or writes to memory are valid memory accesses for every
886 iteration. To check that the memory accesses are correctly formed
887 and that we are allowed to read and write in these locations, we
888 check that the memory accesses to be if-converted occur at every
889 iteration unconditionally.
890
891 Returns true for the memory reference in STMT, same memory reference
892 is read or written unconditionally atleast once and the base memory
893 reference is written unconditionally once. This is to check reference
894 will not write fault. Also retuns true if the memory reference is
895 unconditionally read once then we are conditionally writing to memory
896 which is defined as read and write and is bound to the definition
897 we are seeing. */
898static bool
899ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
900{
901 /* If DR didn't see a reference here we can't use it to tell
902 whether the ref traps or not. */
903 if (gimple_uid (g: stmt) == 0)
904 return false;
905
906 data_reference_p *master_dr, *base_master_dr;
907 data_reference_p a = drs[gimple_uid (g: stmt) - 1];
908
909 tree base = DR_BASE_OBJECT (a);
910 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
911
912 gcc_assert (DR_STMT (a) == stmt);
913 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
914 || DR_INIT (a) || DR_STEP (a));
915
916 master_dr = innermost_DR_map->get (k: innermost);
917 gcc_assert (master_dr != NULL);
918
919 base_master_dr = baseref_DR_map->get (k: base);
920
921 /* If a is unconditionally written to it doesn't trap. */
922 if (DR_W_UNCONDITIONALLY (*master_dr))
923 return true;
924
925 /* If a is unconditionally accessed then ...
926
927 Even a is conditional access, we can treat it as an unconditional
928 one if it's an array reference and all its index are within array
929 bound. */
930 if (DR_RW_UNCONDITIONALLY (*master_dr)
931 || ref_within_array_bound (stmt, DR_REF (a)))
932 {
933 /* an unconditional read won't trap. */
934 if (DR_IS_READ (a))
935 return true;
936
937 /* an unconditionaly write won't trap if the base is written
938 to unconditionally. */
939 if (base_master_dr
940 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
941 return flag_store_data_races;
942 /* or the base is known to be not readonly. */
943 else if (base_object_writable (DR_REF (a)))
944 return flag_store_data_races;
945 }
946
947 return false;
948}
949
950/* Return true if STMT could be converted into a masked load or store
951 (conditional load or store based on a mask computed from bb predicate). */
952
953static bool
954ifcvt_can_use_mask_load_store (gimple *stmt)
955{
956 /* Check whether this is a load or store. */
957 tree lhs = gimple_assign_lhs (gs: stmt);
958 bool is_load;
959 tree ref;
960 if (gimple_store_p (gs: stmt))
961 {
962 if (!is_gimple_val (gimple_assign_rhs1 (gs: stmt)))
963 return false;
964 is_load = false;
965 ref = lhs;
966 }
967 else if (gimple_assign_load_p (stmt))
968 {
969 is_load = true;
970 ref = gimple_assign_rhs1 (gs: stmt);
971 }
972 else
973 return false;
974
975 if (may_be_nonaddressable_p (expr: ref))
976 return false;
977
978 /* Mask should be integer mode of the same size as the load/store
979 mode. */
980 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
981 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
982 return false;
983
984 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
985 return true;
986
987 return false;
988}
989
990/* Return true if STMT could be converted from an operation that is
991 unconditional to one that is conditional on a bb predicate mask. */
992
993static bool
994ifcvt_can_predicate (gimple *stmt)
995{
996 basic_block bb = gimple_bb (g: stmt);
997
998 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
999 || bb->loop_father->dont_vectorize
1000 || gimple_has_volatile_ops (stmt))
1001 return false;
1002
1003 if (gimple_assign_single_p (gs: stmt))
1004 return ifcvt_can_use_mask_load_store (stmt);
1005
1006 tree_code code = gimple_assign_rhs_code (gs: stmt);
1007 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1008 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1009 if (!types_compatible_p (type1: lhs_type, type2: rhs_type))
1010 return false;
1011 internal_fn cond_fn = get_conditional_internal_fn (code);
1012 return (cond_fn != IFN_LAST
1013 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1014}
1015
1016/* Return true when STMT is if-convertible.
1017
1018 GIMPLE_ASSIGN statement is not if-convertible if,
1019 - it is not movable,
1020 - it could trap,
1021 - LHS is not var decl. */
1022
1023static bool
1024if_convertible_gimple_assign_stmt_p (gimple *stmt,
1025 vec<data_reference_p> refs)
1026{
1027 tree lhs = gimple_assign_lhs (gs: stmt);
1028
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 {
1031 fprintf (stream: dump_file, format: "-------------------------\n");
1032 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1033 }
1034
1035 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1036 return false;
1037
1038 /* Some of these constrains might be too conservative. */
1039 if (stmt_ends_bb_p (stmt)
1040 || gimple_has_volatile_ops (stmt)
1041 || (TREE_CODE (lhs) == SSA_NAME
1042 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1043 || gimple_has_side_effects (stmt))
1044 {
1045 if (dump_file && (dump_flags & TDF_DETAILS))
1046 fprintf (stream: dump_file, format: "stmt not suitable for ifcvt\n");
1047 return false;
1048 }
1049
1050 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1051 in between if_convertible_loop_p and combine_blocks
1052 we can perform loop versioning. */
1053 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: false);
1054
1055 if ((! gimple_vuse (g: stmt)
1056 || gimple_could_trap_p_1 (stmt, false, false)
1057 || ! ifcvt_memrefs_wont_trap (stmt, drs: refs))
1058 && gimple_could_trap_p (stmt))
1059 {
1060 if (ifcvt_can_predicate (stmt))
1061 {
1062 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
1063 need_to_predicate = true;
1064 return true;
1065 }
1066 if (dump_file && (dump_flags & TDF_DETAILS))
1067 fprintf (stream: dump_file, format: "tree could trap...\n");
1068 return false;
1069 }
1070 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1071 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1072 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1073 && arith_code_with_undefined_signed_overflow
1074 (gimple_assign_rhs_code (gs: stmt)))
1075 /* We have to rewrite stmts with undefined overflow. */
1076 need_to_rewrite_undefined = true;
1077
1078 /* When if-converting stores force versioning, likewise if we
1079 ended up generating store data races. */
1080 if (gimple_vdef (g: stmt))
1081 need_to_predicate = true;
1082
1083 return true;
1084}
1085
1086/* Return true when STMT is if-convertible.
1087
1088 A statement is if-convertible if:
1089 - it is an if-convertible GIMPLE_ASSIGN,
1090 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1091 - it is builtins call,
1092 - it is a call to a function with a SIMD clone. */
1093
1094static bool
1095if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1096{
1097 switch (gimple_code (g: stmt))
1098 {
1099 case GIMPLE_LABEL:
1100 case GIMPLE_DEBUG:
1101 case GIMPLE_COND:
1102 return true;
1103
1104 case GIMPLE_ASSIGN:
1105 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1106
1107 case GIMPLE_CALL:
1108 {
1109 /* There are some IFN_s that are used to replace builtins but have the
1110 same semantics. Even if MASK_CALL cannot handle them vectorable_call
1111 will insert the proper selection, so do not block conversion. */
1112 int flags = gimple_call_flags (stmt);
1113 if ((flags & ECF_CONST)
1114 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1115 && gimple_call_combined_fn (stmt) != CFN_LAST)
1116 return true;
1117
1118 tree fndecl = gimple_call_fndecl (gs: stmt);
1119 if (fndecl)
1120 {
1121 /* We can vectorize some builtins and functions with SIMD
1122 "inbranch" clones. */
1123 struct cgraph_node *node = cgraph_node::get (decl: fndecl);
1124 if (node && node->simd_clones != NULL)
1125 /* Ensure that at least one clone can be "inbranch". */
1126 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1127 n = n->simdclone->next_clone)
1128 if (n->simdclone->inbranch)
1129 {
1130 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
1131 need_to_predicate = true;
1132 return true;
1133 }
1134 }
1135
1136 return false;
1137 }
1138
1139 default:
1140 /* Don't know what to do with 'em so don't do anything. */
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 {
1143 fprintf (stream: dump_file, format: "don't know what to do\n");
1144 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1145 }
1146 return false;
1147 }
1148}
1149
1150/* Assumes that BB has more than 1 predecessors.
1151 Returns false if at least one successor is not on critical edge
1152 and true otherwise. */
1153
1154static inline bool
1155all_preds_critical_p (basic_block bb)
1156{
1157 edge e;
1158 edge_iterator ei;
1159
1160 FOR_EACH_EDGE (e, ei, bb->preds)
1161 if (EDGE_COUNT (e->src->succs) == 1)
1162 return false;
1163 return true;
1164}
1165
1166/* Return true when BB is if-convertible. This routine does not check
1167 basic block's statements and phis.
1168
1169 A basic block is not if-convertible if:
1170 - it is non-empty and it is after the exit block (in BFS order),
1171 - it is after the exit block but before the latch,
1172 - its edges are not normal.
1173
1174 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1175 inside LOOP. */
1176
1177static bool
1178if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1179{
1180 edge e;
1181 edge_iterator ei;
1182
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 fprintf (stream: dump_file, format: "----------[%d]-------------\n", bb->index);
1185
1186 if (EDGE_COUNT (bb->succs) > 2)
1187 return false;
1188
1189 if (gcall *call = safe_dyn_cast <gcall *> (p: *gsi_last_bb (bb)))
1190 if (gimple_call_ctrl_altering_p (gs: call))
1191 return false;
1192
1193 if (exit_bb)
1194 {
1195 if (bb != loop->latch)
1196 {
1197 if (dump_file && (dump_flags & TDF_DETAILS))
1198 fprintf (stream: dump_file, format: "basic block after exit bb but before latch\n");
1199 return false;
1200 }
1201 else if (!empty_block_p (bb))
1202 {
1203 if (dump_file && (dump_flags & TDF_DETAILS))
1204 fprintf (stream: dump_file, format: "non empty basic block after exit bb\n");
1205 return false;
1206 }
1207 else if (bb == loop->latch
1208 && bb != exit_bb
1209 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1210 {
1211 if (dump_file && (dump_flags & TDF_DETAILS))
1212 fprintf (stream: dump_file, format: "latch is not dominated by exit_block\n");
1213 return false;
1214 }
1215 }
1216
1217 /* Be less adventurous and handle only normal edges. */
1218 FOR_EACH_EDGE (e, ei, bb->succs)
1219 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1220 {
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1222 fprintf (stream: dump_file, format: "Difficult to handle edges\n");
1223 return false;
1224 }
1225
1226 return true;
1227}
1228
1229/* Return true when all predecessor blocks of BB are visited. The
1230 VISITED bitmap keeps track of the visited blocks. */
1231
1232static bool
1233pred_blocks_visited_p (basic_block bb, bitmap *visited)
1234{
1235 edge e;
1236 edge_iterator ei;
1237 FOR_EACH_EDGE (e, ei, bb->preds)
1238 if (!bitmap_bit_p (*visited, e->src->index))
1239 return false;
1240
1241 return true;
1242}
1243
1244/* Get body of a LOOP in suitable order for if-conversion. It is
1245 caller's responsibility to deallocate basic block list.
1246 If-conversion suitable order is, breadth first sort (BFS) order
1247 with an additional constraint: select a block only if all its
1248 predecessors are already selected. */
1249
1250static basic_block *
1251get_loop_body_in_if_conv_order (const class loop *loop)
1252{
1253 basic_block *blocks, *blocks_in_bfs_order;
1254 basic_block bb;
1255 bitmap visited;
1256 unsigned int index = 0;
1257 unsigned int visited_count = 0;
1258
1259 gcc_assert (loop->num_nodes);
1260 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1261
1262 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1263 visited = BITMAP_ALLOC (NULL);
1264
1265 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1266
1267 index = 0;
1268 while (index < loop->num_nodes)
1269 {
1270 bb = blocks_in_bfs_order [index];
1271
1272 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1273 {
1274 free (ptr: blocks_in_bfs_order);
1275 BITMAP_FREE (visited);
1276 free (ptr: blocks);
1277 return NULL;
1278 }
1279
1280 if (!bitmap_bit_p (visited, bb->index))
1281 {
1282 if (pred_blocks_visited_p (bb, visited: &visited)
1283 || bb == loop->header)
1284 {
1285 /* This block is now visited. */
1286 bitmap_set_bit (visited, bb->index);
1287 blocks[visited_count++] = bb;
1288 }
1289 }
1290
1291 index++;
1292
1293 if (index == loop->num_nodes
1294 && visited_count != loop->num_nodes)
1295 /* Not done yet. */
1296 index = 0;
1297 }
1298 free (ptr: blocks_in_bfs_order);
1299 BITMAP_FREE (visited);
1300
1301 /* Go through loop and reject if-conversion or lowering of bitfields if we
1302 encounter statements we do not believe the vectorizer will be able to
1303 handle. If adding a new type of statement here, make sure
1304 'ifcvt_local_dce' is also able to handle it propertly. */
1305 for (index = 0; index < loop->num_nodes; index++)
1306 {
1307 basic_block bb = blocks[index];
1308 gimple_stmt_iterator gsi;
1309
1310 bool may_have_nonlocal_labels
1311 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1312 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1313 switch (gimple_code (g: gsi_stmt (i: gsi)))
1314 {
1315 case GIMPLE_LABEL:
1316 if (!may_have_nonlocal_labels)
1317 {
1318 tree label
1319 = gimple_label_label (gs: as_a <glabel *> (p: gsi_stmt (i: gsi)));
1320 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1321 {
1322 free (ptr: blocks);
1323 return NULL;
1324 }
1325 }
1326 /* Fallthru. */
1327 case GIMPLE_ASSIGN:
1328 case GIMPLE_CALL:
1329 case GIMPLE_DEBUG:
1330 case GIMPLE_COND:
1331 gimple_set_uid (g: gsi_stmt (i: gsi), uid: 0);
1332 break;
1333 default:
1334 free (ptr: blocks);
1335 return NULL;
1336 }
1337 }
1338 return blocks;
1339}
1340
1341/* Returns true when the analysis of the predicates for all the basic
1342 blocks in LOOP succeeded.
1343
1344 predicate_bbs first allocates the predicates of the basic blocks.
1345 These fields are then initialized with the tree expressions
1346 representing the predicates under which a basic block is executed
1347 in the LOOP. As the loop->header is executed at each iteration, it
1348 has the "true" predicate. Other statements executed under a
1349 condition are predicated with that condition, for example
1350
1351 | if (x)
1352 | S1;
1353 | else
1354 | S2;
1355
1356 S1 will be predicated with "x", and
1357 S2 will be predicated with "!x". */
1358
1359static void
1360predicate_bbs (loop_p loop)
1361{
1362 unsigned int i;
1363
1364 for (i = 0; i < loop->num_nodes; i++)
1365 init_bb_predicate (bb: ifc_bbs[i]);
1366
1367 for (i = 0; i < loop->num_nodes; i++)
1368 {
1369 basic_block bb = ifc_bbs[i];
1370 tree cond;
1371
1372 /* The loop latch and loop exit block are always executed and
1373 have no extra conditions to be processed: skip them. */
1374 if (bb == loop->latch
1375 || bb_with_exit_edge_p (loop, bb))
1376 {
1377 reset_bb_predicate (bb);
1378 continue;
1379 }
1380
1381 cond = bb_predicate (bb);
1382 if (gcond *stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)))
1383 {
1384 tree c2;
1385 edge true_edge, false_edge;
1386 location_t loc = gimple_location (g: stmt);
1387 tree c;
1388 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1389 conditions can remain unfolded because of multiple uses so
1390 try to re-fold here, especially to get precision changing
1391 conversions sorted out. Do not simply fold the stmt since
1392 this is analysis only. When conditions were embedded in
1393 COND_EXPRs those were folded separately before folding the
1394 COND_EXPR but as they are now outside we have to make sure
1395 to fold them. Do it here - another opportunity would be to
1396 fold predicates as they are inserted. */
1397 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1398 gimple_cond_code (gs: stmt),
1399 boolean_type_node,
1400 gimple_cond_lhs (gs: stmt),
1401 gimple_cond_rhs (gs: stmt));
1402 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1403 && cexpr.code.is_tree_code ()
1404 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1405 c = build2_loc (loc, code: (tree_code)cexpr.code, boolean_type_node,
1406 arg0: cexpr.ops[0], arg1: cexpr.ops[1]);
1407 else
1408 c = build2_loc (loc, code: gimple_cond_code (gs: stmt),
1409 boolean_type_node,
1410 arg0: gimple_cond_lhs (gs: stmt),
1411 arg1: gimple_cond_rhs (gs: stmt));
1412
1413 /* Add new condition into destination's predicate list. */
1414 extract_true_false_edges_from_block (gimple_bb (g: stmt),
1415 &true_edge, &false_edge);
1416
1417 /* If C is true, then TRUE_EDGE is taken. */
1418 add_to_dst_predicate_list (loop, e: true_edge, prev_cond: unshare_expr (cond),
1419 cond: unshare_expr (c));
1420
1421 /* If C is false, then FALSE_EDGE is taken. */
1422 c2 = build1_loc (loc, code: TRUTH_NOT_EXPR, boolean_type_node,
1423 arg1: unshare_expr (c));
1424 add_to_dst_predicate_list (loop, e: false_edge,
1425 prev_cond: unshare_expr (cond), cond: c2);
1426
1427 cond = NULL_TREE;
1428 }
1429
1430 /* If current bb has only one successor, then consider it as an
1431 unconditional goto. */
1432 if (single_succ_p (bb))
1433 {
1434 basic_block bb_n = single_succ (bb);
1435
1436 /* The successor bb inherits the predicate of its
1437 predecessor. If there is no predicate in the predecessor
1438 bb, then consider the successor bb as always executed. */
1439 if (cond == NULL_TREE)
1440 cond = boolean_true_node;
1441
1442 add_to_predicate_list (loop, bb: bb_n, nc: cond);
1443 }
1444 }
1445
1446 /* The loop header is always executed. */
1447 reset_bb_predicate (bb: loop->header);
1448 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1449 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1450}
1451
1452/* Build region by adding loop pre-header and post-header blocks. */
1453
1454static vec<basic_block>
1455build_region (class loop *loop)
1456{
1457 vec<basic_block> region = vNULL;
1458 basic_block exit_bb = NULL;
1459
1460 gcc_assert (ifc_bbs);
1461 /* The first element is loop pre-header. */
1462 region.safe_push (obj: loop_preheader_edge (loop)->src);
1463
1464 for (unsigned int i = 0; i < loop->num_nodes; i++)
1465 {
1466 basic_block bb = ifc_bbs[i];
1467 region.safe_push (obj: bb);
1468 /* Find loop postheader. */
1469 edge e;
1470 edge_iterator ei;
1471 FOR_EACH_EDGE (e, ei, bb->succs)
1472 if (loop_exit_edge_p (loop, e))
1473 {
1474 exit_bb = e->dest;
1475 break;
1476 }
1477 }
1478 /* The last element is loop post-header. */
1479 gcc_assert (exit_bb);
1480 region.safe_push (obj: exit_bb);
1481 return region;
1482}
1483
1484/* Return true when LOOP is if-convertible. This is a helper function
1485 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1486 in if_convertible_loop_p. */
1487
1488static bool
1489if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1490{
1491 unsigned int i;
1492 basic_block exit_bb = NULL;
1493 vec<basic_block> region;
1494
1495 calculate_dominance_info (CDI_DOMINATORS);
1496
1497 for (i = 0; i < loop->num_nodes; i++)
1498 {
1499 basic_block bb = ifc_bbs[i];
1500
1501 if (!if_convertible_bb_p (loop, bb, exit_bb))
1502 return false;
1503
1504 if (bb_with_exit_edge_p (loop, bb))
1505 exit_bb = bb;
1506 }
1507
1508 data_reference_p dr;
1509
1510 innermost_DR_map
1511 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1512 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1513
1514 /* Compute post-dominator tree locally. */
1515 region = build_region (loop);
1516 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1517
1518 predicate_bbs (loop);
1519
1520 /* Free post-dominator tree since it is not used after predication. */
1521 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1522 region.release ();
1523
1524 for (i = 0; refs->iterate (ix: i, ptr: &dr); i++)
1525 {
1526 tree ref = DR_REF (dr);
1527
1528 dr->aux = XNEW (struct ifc_dr);
1529 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1530 DR_RW_UNCONDITIONALLY (dr) = false;
1531 DR_W_UNCONDITIONALLY (dr) = false;
1532 IFC_DR (dr)->rw_predicate = boolean_false_node;
1533 IFC_DR (dr)->w_predicate = boolean_false_node;
1534 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1535 if (gimple_uid (DR_STMT (dr)) == 0)
1536 gimple_set_uid (DR_STMT (dr), uid: i + 1);
1537
1538 /* If DR doesn't have innermost loop behavior or it's a compound
1539 memory reference, we synthesize its innermost loop behavior
1540 for hashing. */
1541 if (TREE_CODE (ref) == COMPONENT_REF
1542 || TREE_CODE (ref) == IMAGPART_EXPR
1543 || TREE_CODE (ref) == REALPART_EXPR
1544 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1545 || DR_INIT (dr) || DR_STEP (dr)))
1546 {
1547 while (TREE_CODE (ref) == COMPONENT_REF
1548 || TREE_CODE (ref) == IMAGPART_EXPR
1549 || TREE_CODE (ref) == REALPART_EXPR)
1550 ref = TREE_OPERAND (ref, 0);
1551
1552 memset (s: &DR_INNERMOST (dr), c: 0, n: sizeof (DR_INNERMOST (dr)));
1553 DR_BASE_ADDRESS (dr) = ref;
1554 }
1555 hash_memrefs_baserefs_and_store_DRs_read_written_info (a: dr);
1556 }
1557
1558 for (i = 0; i < loop->num_nodes; i++)
1559 {
1560 basic_block bb = ifc_bbs[i];
1561 gimple_stmt_iterator itr;
1562
1563 /* Check the if-convertibility of statements in predicated BBs. */
1564 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1565 for (itr = gsi_start_bb (bb); !gsi_end_p (i: itr); gsi_next (i: &itr))
1566 if (!if_convertible_stmt_p (stmt: gsi_stmt (i: itr), refs: *refs))
1567 return false;
1568 }
1569
1570 /* Checking PHIs needs to be done after stmts, as the fact whether there
1571 are any masked loads or stores affects the tests. */
1572 for (i = 0; i < loop->num_nodes; i++)
1573 {
1574 basic_block bb = ifc_bbs[i];
1575 gphi_iterator itr;
1576
1577 for (itr = gsi_start_phis (bb); !gsi_end_p (i: itr); gsi_next (i: &itr))
1578 if (!if_convertible_phi_p (loop, bb, phi: itr.phi ()))
1579 return false;
1580 }
1581
1582 if (dump_file)
1583 fprintf (stream: dump_file, format: "Applying if-conversion\n");
1584
1585 return true;
1586}
1587
1588/* Return true when LOOP is if-convertible.
1589 LOOP is if-convertible if:
1590 - it is innermost,
1591 - it has two or more basic blocks,
1592 - it has only one exit,
1593 - loop header is not the exit edge,
1594 - if its basic blocks and phi nodes are if convertible. */
1595
1596static bool
1597if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1598{
1599 edge e;
1600 edge_iterator ei;
1601 bool res = false;
1602
1603 /* Handle only innermost loop. */
1604 if (!loop || loop->inner)
1605 {
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (stream: dump_file, format: "not innermost loop\n");
1608 return false;
1609 }
1610
1611 /* If only one block, no need for if-conversion. */
1612 if (loop->num_nodes <= 2)
1613 {
1614 if (dump_file && (dump_flags & TDF_DETAILS))
1615 fprintf (stream: dump_file, format: "less than 2 basic blocks\n");
1616 return false;
1617 }
1618
1619 /* If one of the loop header's edge is an exit edge then do not
1620 apply if-conversion. */
1621 FOR_EACH_EDGE (e, ei, loop->header->succs)
1622 if (loop_exit_edge_p (loop, e))
1623 return false;
1624
1625 res = if_convertible_loop_p_1 (loop, refs);
1626
1627 delete innermost_DR_map;
1628 innermost_DR_map = NULL;
1629
1630 delete baseref_DR_map;
1631 baseref_DR_map = NULL;
1632
1633 return res;
1634}
1635
1636/* Return reduc_1 if has_nop.
1637
1638 if (...)
1639 tmp1 = (unsigned type) reduc_1;
1640 tmp2 = tmp1 + rhs2;
1641 reduc_3 = (signed type) tmp2. */
1642static tree
1643strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1644{
1645 if (!has_nop)
1646 return op;
1647
1648 if (TREE_CODE (op) != SSA_NAME)
1649 return NULL_TREE;
1650
1651 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1652 if (!stmt
1653 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1654 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1655 (gimple_assign_rhs1 (stmt))))
1656 return NULL_TREE;
1657
1658 return gimple_assign_rhs1 (gs: stmt);
1659}
1660
1661/* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1662 which is in predicated basic block.
1663 In fact, the following PHI pattern is searching:
1664 loop-header:
1665 reduc_1 = PHI <..., reduc_2>
1666 ...
1667 if (...)
1668 reduc_3 = ...
1669 reduc_2 = PHI <reduc_1, reduc_3>
1670
1671 ARG_0 and ARG_1 are correspondent PHI arguments.
1672 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1673 EXTENDED is true if PHI has > 2 arguments. */
1674
1675static bool
1676is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1677 tree *op0, tree *op1, bool extended, bool* has_nop,
1678 gimple **nop_reduc)
1679{
1680 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1681 gimple *stmt;
1682 gimple *header_phi = NULL;
1683 enum tree_code reduction_op;
1684 basic_block bb = gimple_bb (g: phi);
1685 class loop *loop = bb->loop_father;
1686 edge latch_e = loop_latch_edge (loop);
1687 imm_use_iterator imm_iter;
1688 use_operand_p use_p;
1689 edge e;
1690 edge_iterator ei;
1691 bool result = *has_nop = false;
1692 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1693 return false;
1694
1695 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1696 {
1697 lhs = arg_1;
1698 header_phi = SSA_NAME_DEF_STMT (arg_0);
1699 stmt = SSA_NAME_DEF_STMT (arg_1);
1700 }
1701 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1702 {
1703 lhs = arg_0;
1704 header_phi = SSA_NAME_DEF_STMT (arg_1);
1705 stmt = SSA_NAME_DEF_STMT (arg_0);
1706 }
1707 else
1708 return false;
1709 if (gimple_bb (g: header_phi) != loop->header)
1710 return false;
1711
1712 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1713 return false;
1714
1715 if (gimple_code (g: stmt) != GIMPLE_ASSIGN
1716 || gimple_has_volatile_ops (stmt))
1717 return false;
1718
1719 if (!flow_bb_inside_loop_p (loop, gimple_bb (g: stmt)))
1720 return false;
1721
1722 if (!is_predicated (bb: gimple_bb (g: stmt)))
1723 return false;
1724
1725 /* Check that stmt-block is predecessor of phi-block. */
1726 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1727 if (e->dest == bb)
1728 {
1729 result = true;
1730 break;
1731 }
1732 if (!result)
1733 return false;
1734
1735 if (!has_single_use (var: lhs))
1736 return false;
1737
1738 reduction_op = gimple_assign_rhs_code (gs: stmt);
1739
1740 /* Catch something like below
1741
1742 loop-header:
1743 reduc_1 = PHI <..., reduc_2>
1744 ...
1745 if (...)
1746 tmp1 = (unsigned type) reduc_1;
1747 tmp2 = tmp1 + rhs2;
1748 reduc_3 = (signed type) tmp2;
1749
1750 reduc_2 = PHI <reduc_1, reduc_3>
1751
1752 and convert to
1753
1754 reduc_2 = PHI <0, reduc_1>
1755 tmp1 = (unsigned type)reduc_1;
1756 ifcvt = cond_expr ? rhs2 : 0
1757 tmp2 = tmp1 +/- ifcvt;
1758 reduc_1 = (signed type)tmp2; */
1759
1760 if (CONVERT_EXPR_CODE_P (reduction_op))
1761 {
1762 lhs = gimple_assign_rhs1 (gs: stmt);
1763 if (TREE_CODE (lhs) != SSA_NAME
1764 || !has_single_use (var: lhs))
1765 return false;
1766
1767 *nop_reduc = stmt;
1768 stmt = SSA_NAME_DEF_STMT (lhs);
1769 if (gimple_bb (g: stmt) != gimple_bb (g: *nop_reduc)
1770 || !is_gimple_assign (gs: stmt))
1771 return false;
1772
1773 *has_nop = true;
1774 reduction_op = gimple_assign_rhs_code (gs: stmt);
1775 }
1776
1777 if (reduction_op != PLUS_EXPR
1778 && reduction_op != MINUS_EXPR
1779 && reduction_op != MULT_EXPR
1780 && reduction_op != BIT_IOR_EXPR
1781 && reduction_op != BIT_XOR_EXPR
1782 && reduction_op != BIT_AND_EXPR)
1783 return false;
1784 r_op1 = gimple_assign_rhs1 (gs: stmt);
1785 r_op2 = gimple_assign_rhs2 (gs: stmt);
1786
1787 r_nop1 = strip_nop_cond_scalar_reduction (has_nop: *has_nop, op: r_op1);
1788 r_nop2 = strip_nop_cond_scalar_reduction (has_nop: *has_nop, op: r_op2);
1789
1790 /* Make R_OP1 to hold reduction variable. */
1791 if (r_nop2 == PHI_RESULT (header_phi)
1792 && commutative_tree_code (reduction_op))
1793 {
1794 std::swap (a&: r_op1, b&: r_op2);
1795 std::swap (a&: r_nop1, b&: r_nop2);
1796 }
1797 else if (r_nop1 != PHI_RESULT (header_phi))
1798 return false;
1799
1800 if (*has_nop)
1801 {
1802 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1803 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1804 {
1805 gimple *use_stmt = USE_STMT (use_p);
1806 if (is_gimple_debug (gs: use_stmt))
1807 continue;
1808 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1809 continue;
1810 if (use_stmt != phi)
1811 return false;
1812 }
1813 }
1814
1815 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1816 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1817 {
1818 gimple *use_stmt = USE_STMT (use_p);
1819 if (is_gimple_debug (gs: use_stmt))
1820 continue;
1821 if (use_stmt == stmt)
1822 continue;
1823 if (gimple_code (g: use_stmt) != GIMPLE_PHI)
1824 return false;
1825 }
1826
1827 *op0 = r_op1; *op1 = r_op2;
1828 *reduc = stmt;
1829 return true;
1830}
1831
1832/* Converts conditional scalar reduction into unconditional form, e.g.
1833 bb_4
1834 if (_5 != 0) goto bb_5 else goto bb_6
1835 end_bb_4
1836 bb_5
1837 res_6 = res_13 + 1;
1838 end_bb_5
1839 bb_6
1840 # res_2 = PHI <res_13(4), res_6(5)>
1841 end_bb_6
1842
1843 will be converted into sequence
1844 _ifc__1 = _5 != 0 ? 1 : 0;
1845 res_2 = res_13 + _ifc__1;
1846 Argument SWAP tells that arguments of conditional expression should be
1847 swapped.
1848 If LOOP_VERSIONED is true if we assume that we versioned the loop for
1849 vectorization. In that case we can create a COND_OP.
1850 Returns rhs of resulting PHI assignment. */
1851
1852static tree
1853convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1854 tree cond, tree op0, tree op1, bool swap,
1855 bool has_nop, gimple* nop_reduc,
1856 bool loop_versioned)
1857{
1858 gimple_stmt_iterator stmt_it;
1859 gimple *new_assign;
1860 tree rhs;
1861 tree rhs1 = gimple_assign_rhs1 (gs: reduc);
1862 tree lhs = gimple_assign_lhs (gs: reduc);
1863 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, name: "_ifc_");
1864 tree c;
1865 enum tree_code reduction_op = gimple_assign_rhs_code (gs: reduc);
1866 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
1867 NULL, false);
1868 gimple_seq stmts = NULL;
1869
1870 if (dump_file && (dump_flags & TDF_DETAILS))
1871 {
1872 fprintf (stream: dump_file, format: "Found cond scalar reduction.\n");
1873 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1874 }
1875
1876 /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
1877 The COND_OP will have a neutral_op else value. */
1878 internal_fn ifn;
1879 ifn = get_conditional_internal_fn (reduction_op);
1880 if (loop_versioned && ifn != IFN_LAST
1881 && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
1882 && !swap)
1883 {
1884 gcall *cond_call = gimple_build_call_internal (ifn, 4,
1885 unshare_expr (cond),
1886 op0, op1, op0);
1887 gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
1888 gimple_call_set_lhs (gs: cond_call, lhs: tmp);
1889 rhs = tmp;
1890 }
1891 else
1892 {
1893 /* Build cond expression using COND and constant operand
1894 of reduction rhs. */
1895 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1896 cond: unshare_expr (cond),
1897 rhs: swap ? op_nochange : op1,
1898 lhs: swap ? op1 : op_nochange);
1899 /* Create assignment stmt and insert it at GSI. */
1900 new_assign = gimple_build_assign (tmp, c);
1901 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1902 /* Build rhs for unconditional increment/decrement/logic_operation. */
1903 rhs = gimple_build (seq: &stmts, code: reduction_op,
1904 TREE_TYPE (rhs1), ops: op0, ops: tmp);
1905 }
1906
1907 if (has_nop)
1908 {
1909 rhs = gimple_convert (seq: &stmts,
1910 TREE_TYPE (gimple_assign_lhs (nop_reduc)), op: rhs);
1911 stmt_it = gsi_for_stmt (nop_reduc);
1912 gsi_remove (&stmt_it, true);
1913 release_defs (nop_reduc);
1914 }
1915 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1916
1917 /* Delete original reduction stmt. */
1918 stmt_it = gsi_for_stmt (reduc);
1919 gsi_remove (&stmt_it, true);
1920 release_defs (reduc);
1921 return rhs;
1922}
1923
1924/* Generate a simplified conditional. */
1925
1926static tree
1927gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
1928{
1929 /* Check if the value is already live in a previous branch. This resolves
1930 nested conditionals from diamond PHI reductions. */
1931 if (TREE_CODE (cond) == SSA_NAME)
1932 {
1933 gimple *stmt = SSA_NAME_DEF_STMT (cond);
1934 gassign *assign = NULL;
1935 if ((assign = as_a <gassign *> (p: stmt))
1936 && gimple_assign_rhs_code (gs: assign) == BIT_AND_EXPR)
1937 {
1938 tree arg1 = gimple_assign_rhs1 (gs: assign);
1939 tree arg2 = gimple_assign_rhs2 (gs: assign);
1940 if (cond_set.contains (k: { arg1, 1 }))
1941 arg1 = boolean_true_node;
1942 else
1943 arg1 = gen_simplified_condition (cond: arg1, cond_set);
1944
1945 if (cond_set.contains (k: { arg2, 1 }))
1946 arg2 = boolean_true_node;
1947 else
1948 arg2 = gen_simplified_condition (cond: arg2, cond_set);
1949
1950 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
1951 }
1952 }
1953 return cond;
1954}
1955
1956/* Structure used to track meta-data on PHI arguments used to generate
1957 most efficient comparison sequence to slatten a PHI node. */
1958
1959typedef struct ifcvt_arg_entry
1960{
1961 /* The PHI node argument value. */
1962 tree arg;
1963
1964 /* The number of compares required to reach this PHI node from start of the
1965 BB being if-converted. */
1966 unsigned num_compares;
1967
1968 /* The number of times this PHI node argument appears in the current PHI
1969 node. */
1970 unsigned occurs;
1971
1972 /* The indices at which this PHI arg occurs inside the PHI node. */
1973 vec <int> *indexes;
1974} ifcvt_arg_entry_t;
1975
1976/* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1977 as to whether the condition is inverted. */
1978
1979static tree
1980gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
1981 gimple_stmt_iterator *gsi,
1982 scalar_cond_masked_set_type &cond_set, bool *invert)
1983{
1984 int len;
1985 int i;
1986 tree cond = NULL_TREE;
1987 tree c;
1988 edge e;
1989
1990 *invert = false;
1991 len = arg.indexes->length ();
1992 gcc_assert (len > 0);
1993 for (i = 0; i < len; i++)
1994 {
1995 e = gimple_phi_arg_edge (phi, i: (*arg.indexes)[i]);
1996 c = bb_predicate (bb: e->src);
1997 if (is_true_predicate (cond: c))
1998 {
1999 cond = c;
2000 break;
2001 }
2002 /* If we have just a single inverted predicate, signal that and
2003 instead invert the COND_EXPR arms. */
2004 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
2005 {
2006 c = TREE_OPERAND (c, 0);
2007 *invert = true;
2008 }
2009
2010 c = gen_simplified_condition (cond: c, cond_set);
2011 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
2012 true, NULL_TREE, true, GSI_SAME_STMT);
2013 if (cond != NULL_TREE)
2014 {
2015 /* Must build OR expression. */
2016 cond = fold_or_predicates (EXPR_LOCATION (c), c1: c, c2: cond);
2017 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2018 NULL_TREE, true, GSI_SAME_STMT);
2019 }
2020 else
2021 cond = c;
2022
2023 /* Register the new possibly simplified conditional. When more than 2
2024 entries in a phi node we chain entries in the false branch, so the
2025 inverted condition is active. */
2026 scalar_cond_masked_key pred_cond ({ cond, 1 });
2027 if (!*invert)
2028 pred_cond.inverted_p = !pred_cond.inverted_p;
2029 cond_set.add (k: pred_cond);
2030 }
2031 gcc_assert (cond != NULL_TREE);
2032 return cond;
2033}
2034
2035/* Create the smallest nested conditional possible. On pre-order we record
2036 which conditionals are live, and on post-order rewrite the chain by removing
2037 already active conditions.
2038
2039 As an example we simplify:
2040
2041 _7 = a_10 < 0;
2042 _21 = a_10 >= 0;
2043 _22 = a_10 < e_11(D);
2044 _23 = _21 & _22;
2045 _ifc__42 = _23 ? t_13 : 0;
2046 t_6 = _7 ? 1 : _ifc__42
2047
2048 into
2049
2050 _7 = a_10 < 0;
2051 _22 = a_10 < e_11(D);
2052 _ifc__42 = _22 ? t_13 : 0;
2053 t_6 = _7 ? 1 : _ifc__42;
2054
2055 which produces better code. */
2056
2057static tree
2058gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2059 scalar_cond_masked_set_type &cond_set, tree type,
2060 gimple **res_stmt, tree lhs0,
2061 vec<struct ifcvt_arg_entry> &args, unsigned idx)
2062{
2063 if (idx == args.length ())
2064 return args[idx - 1].arg;
2065
2066 bool invert;
2067 tree cond = gen_phi_arg_condition (phi, arg&: args[idx - 1], gsi, cond_set,
2068 invert: &invert);
2069 tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2070 args, idx: idx + 1);
2071
2072 unsigned prev = idx;
2073 unsigned curr = prev - 1;
2074 tree arg0 = args[curr].arg;
2075 tree rhs, lhs;
2076 if (idx > 1)
2077 lhs = make_temp_ssa_name (type, NULL, name: "_ifc_");
2078 else
2079 lhs = lhs0;
2080
2081 if (invert)
2082 rhs = fold_build_cond_expr (type, cond: unshare_expr (cond),
2083 rhs: arg1, lhs: arg0);
2084 else
2085 rhs = fold_build_cond_expr (type, cond: unshare_expr (cond),
2086 rhs: arg0, lhs: arg1);
2087 gassign *new_stmt = gimple_build_assign (lhs, rhs);
2088 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2089 update_stmt (s: new_stmt);
2090 *res_stmt = new_stmt;
2091 return lhs;
2092}
2093
2094/* When flattening a PHI node we have a choice of which conditions to test to
2095 for all the paths from the start of the dominator block of the BB with the
2096 PHI node. If the PHI node has X arguments we have to only test X - 1
2097 conditions as the last one is implicit. It does matter which conditions we
2098 test first. We should test the shortest condition first (distance here is
2099 measures in the number of logical operators in the condition) and the
2100 longest one last. This allows us to skip testing the most expensive
2101 condition. To accomplish this we need to sort the conditions. P1 and P2
2102 are sorted first based on the number of logical operations (num_compares)
2103 and then by how often they occur in the PHI node. */
2104
2105static int
2106cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2107{
2108 const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2109 const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2110
2111 if (sval1.num_compares < sval2.num_compares)
2112 return -1;
2113 else if (sval1.num_compares > sval2.num_compares)
2114 return 1;
2115
2116 if (sval1.occurs < sval2.occurs)
2117 return -1;
2118 else if (sval1.occurs > sval2.occurs)
2119 return 1;
2120
2121 return 0;
2122}
2123
2124/* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2125 This routine can handle PHI nodes with more than two arguments.
2126
2127 For example,
2128 S1: A = PHI <x1(1), x2(5)>
2129 is converted into,
2130 S2: A = cond ? x1 : x2;
2131
2132 The generated code is inserted at GSI that points to the top of
2133 basic block's statement list.
2134 If PHI node has more than two arguments a chain of conditional
2135 expression is produced.
2136 LOOP_VERSIONED should be true if we know that the loop was versioned for
2137 vectorization. */
2138
2139
2140static void
2141predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
2142{
2143 gimple *new_stmt = NULL, *reduc, *nop_reduc;
2144 tree rhs, res, arg0, arg1, op0, op1, scev;
2145 tree cond;
2146 unsigned int index0;
2147 edge e;
2148 basic_block bb;
2149 unsigned int i;
2150 bool has_nop;
2151
2152 res = gimple_phi_result (gs: phi);
2153 if (virtual_operand_p (op: res))
2154 return;
2155
2156 if ((rhs = degenerate_phi_result (phi))
2157 || ((scev = analyze_scalar_evolution (gimple_bb (g: phi)->loop_father,
2158 res))
2159 && !chrec_contains_undetermined (scev)
2160 && scev != res
2161 && (rhs = gimple_phi_arg_def (gs: phi, index: 0))))
2162 {
2163 if (dump_file && (dump_flags & TDF_DETAILS))
2164 {
2165 fprintf (stream: dump_file, format: "Degenerate phi!\n");
2166 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2167 }
2168 new_stmt = gimple_build_assign (res, rhs);
2169 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2170 update_stmt (s: new_stmt);
2171 return;
2172 }
2173
2174 bb = gimple_bb (g: phi);
2175 /* Keep track of conditionals already seen. */
2176 scalar_cond_masked_set_type cond_set;
2177 if (EDGE_COUNT (bb->preds) == 2)
2178 {
2179 /* Predicate ordinary PHI node with 2 arguments. */
2180 edge first_edge, second_edge;
2181 basic_block true_bb;
2182 first_edge = EDGE_PRED (bb, 0);
2183 second_edge = EDGE_PRED (bb, 1);
2184 cond = bb_predicate (bb: first_edge->src);
2185 cond_set.add (k: { cond, 1 });
2186 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2187 std::swap (a&: first_edge, b&: second_edge);
2188 if (EDGE_COUNT (first_edge->src->succs) > 1)
2189 {
2190 cond = bb_predicate (bb: second_edge->src);
2191 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2192 cond = TREE_OPERAND (cond, 0);
2193 else
2194 first_edge = second_edge;
2195 }
2196 else
2197 cond = bb_predicate (bb: first_edge->src);
2198
2199 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2200 cond = gen_simplified_condition (cond, cond_set);
2201 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2202 NULL_TREE, true, GSI_SAME_STMT);
2203 true_bb = first_edge->src;
2204 if (EDGE_PRED (bb, 1)->src == true_bb)
2205 {
2206 arg0 = gimple_phi_arg_def (gs: phi, index: 1);
2207 arg1 = gimple_phi_arg_def (gs: phi, index: 0);
2208 }
2209 else
2210 {
2211 arg0 = gimple_phi_arg_def (gs: phi, index: 0);
2212 arg1 = gimple_phi_arg_def (gs: phi, index: 1);
2213 }
2214 if (is_cond_scalar_reduction (phi, reduc: &reduc, arg_0: arg0, arg_1: arg1,
2215 op0: &op0, op1: &op1, extended: false, has_nop: &has_nop,
2216 nop_reduc: &nop_reduc))
2217 {
2218 /* Convert reduction stmt into vectorizable form. */
2219 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2220 swap: true_bb != gimple_bb (g: reduc),
2221 has_nop, nop_reduc,
2222 loop_versioned);
2223 redundant_ssa_names.safe_push (obj: std::make_pair (x&: res, y&: rhs));
2224 }
2225 else
2226 /* Build new RHS using selected condition and arguments. */
2227 rhs = fold_build_cond_expr (TREE_TYPE (res), cond: unshare_expr (cond),
2228 rhs: arg0, lhs: arg1);
2229 new_stmt = gimple_build_assign (res, rhs);
2230 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2231 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2232 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2233 {
2234 new_stmt = gsi_stmt (i: new_gsi);
2235 update_stmt (s: new_stmt);
2236 }
2237
2238 if (dump_file && (dump_flags & TDF_DETAILS))
2239 {
2240 fprintf (stream: dump_file, format: "new phi replacement stmt\n");
2241 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2242 }
2243 return;
2244 }
2245
2246 /* Create hashmap for PHI node which contain vector of argument indexes
2247 having the same value. */
2248 bool swap = false;
2249 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2250 unsigned int num_args = gimple_phi_num_args (gs: phi);
2251 /* Vector of different PHI argument values. */
2252 auto_vec<ifcvt_arg_entry_t> args;
2253
2254 /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2255 where they are in the PHI node. The indices will be used to determine
2256 the conditions to apply and their complexity. */
2257 for (i = 0; i < num_args; i++)
2258 {
2259 tree arg;
2260
2261 arg = gimple_phi_arg_def (gs: phi, index: i);
2262 if (!phi_arg_map.get (k: arg))
2263 args.safe_push (obj: { .arg: arg, .num_compares: 0, .occurs: 0, NULL });
2264 phi_arg_map.get_or_insert (k: arg).safe_push (obj: i);
2265 }
2266
2267 /* Determine element with max number of occurrences and complexity. Looking
2268 at only number of occurrences as a measure for complexity isn't enough as
2269 all usages can be unique but the comparisons to reach the PHI node differ
2270 per branch. */
2271 for (unsigned i = 0; i < args.length (); i++)
2272 {
2273 unsigned int len = 0;
2274 vec<int> *indices = phi_arg_map.get (k: args[i].arg);
2275 for (int index : *indices)
2276 {
2277 edge e = gimple_phi_arg_edge (phi, i: index);
2278 len += get_bb_num_predicate_stmts (bb: e->src);
2279 }
2280
2281 unsigned occur = indices->length ();
2282 if (dump_file && (dump_flags & TDF_DETAILS))
2283 fprintf (stream: dump_file, format: "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2284 args[i].num_compares = len;
2285 args[i].occurs = occur;
2286 args[i].indexes = indices;
2287 }
2288
2289 /* Sort elements based on rankings ARGS. */
2290 args.stablesort (cmp: cmp_arg_entry, NULL);
2291
2292 /* Handle one special case when number of arguments with different values
2293 is equal 2 and one argument has the only occurrence. Such PHI can be
2294 handled as if would have only 2 arguments. */
2295 if (args.length () == 2
2296 && args[0].indexes->length () == 1)
2297 {
2298 index0 = (*args[0].indexes)[0];
2299 arg0 = args[0].arg;
2300 arg1 = args[1].arg;
2301 e = gimple_phi_arg_edge (phi, i: index0);
2302 cond = bb_predicate (bb: e->src);
2303 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2304 {
2305 swap = true;
2306 cond = TREE_OPERAND (cond, 0);
2307 }
2308 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2309 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2310 NULL_TREE, true, GSI_SAME_STMT);
2311 if (!(is_cond_scalar_reduction (phi, reduc: &reduc, arg_0: arg0 , arg_1: arg1,
2312 op0: &op0, op1: &op1, extended: true, has_nop: &has_nop, nop_reduc: &nop_reduc)))
2313 rhs = fold_build_cond_expr (TREE_TYPE (res), cond: unshare_expr (cond),
2314 rhs: swap ? arg1 : arg0,
2315 lhs: swap ? arg0 : arg1);
2316 else
2317 {
2318 /* Convert reduction stmt into vectorizable form. */
2319 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2320 swap, has_nop, nop_reduc,
2321 loop_versioned);
2322 redundant_ssa_names.safe_push (obj: std::make_pair (x&: res, y&: rhs));
2323 }
2324 new_stmt = gimple_build_assign (res, rhs);
2325 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2326 update_stmt (s: new_stmt);
2327 }
2328 else
2329 {
2330 /* Common case. */
2331 tree type = TREE_TYPE (gimple_phi_result (phi));
2332 gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt: &new_stmt, lhs0: res,
2333 args, idx: 1);
2334 }
2335
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 {
2338 fprintf (stream: dump_file, format: "new extended phi replacement stmt\n");
2339 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2340 }
2341}
2342
2343/* Replaces in LOOP all the scalar phi nodes other than those in the
2344 LOOP->header block with conditional modify expressions.
2345 LOOP_VERSIONED should be true if we know that the loop was versioned for
2346 vectorization. */
2347
2348static void
2349predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
2350{
2351 basic_block bb;
2352 unsigned int orig_loop_num_nodes = loop->num_nodes;
2353 unsigned int i;
2354
2355 for (i = 1; i < orig_loop_num_nodes; i++)
2356 {
2357 gphi *phi;
2358 gimple_stmt_iterator gsi;
2359 gphi_iterator phi_gsi;
2360 bb = ifc_bbs[i];
2361
2362 if (bb == loop->header)
2363 continue;
2364
2365 phi_gsi = gsi_start_phis (bb);
2366 if (gsi_end_p (i: phi_gsi))
2367 continue;
2368
2369 gsi = gsi_after_labels (bb);
2370 while (!gsi_end_p (i: phi_gsi))
2371 {
2372 phi = phi_gsi.phi ();
2373 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
2374 gsi_next (i: &phi_gsi);
2375 else
2376 {
2377 predicate_scalar_phi (phi, gsi: &gsi, loop_versioned);
2378 remove_phi_node (&phi_gsi, false);
2379 }
2380 }
2381 }
2382}
2383
2384/* Insert in each basic block of LOOP the statements produced by the
2385 gimplification of the predicates. */
2386
2387static void
2388insert_gimplified_predicates (loop_p loop)
2389{
2390 unsigned int i;
2391
2392 for (i = 0; i < loop->num_nodes; i++)
2393 {
2394 basic_block bb = ifc_bbs[i];
2395 gimple_seq stmts;
2396 if (!is_predicated (bb))
2397 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2398 if (!is_predicated (bb))
2399 {
2400 /* Do not insert statements for a basic block that is not
2401 predicated. Also make sure that the predicate of the
2402 basic block is set to true. */
2403 reset_bb_predicate (bb);
2404 continue;
2405 }
2406
2407 stmts = bb_predicate_gimplified_stmts (bb);
2408 if (stmts)
2409 {
2410 if (need_to_predicate)
2411 {
2412 /* Insert the predicate of the BB just after the label,
2413 as the if-conversion of memory writes will use this
2414 predicate. */
2415 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2416 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2417 }
2418 else
2419 {
2420 /* Insert the predicate of the BB at the end of the BB
2421 as this would reduce the register pressure: the only
2422 use of this predicate will be in successor BBs. */
2423 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2424
2425 if (gsi_end_p (i: gsi)
2426 || stmt_ends_bb_p (gsi_stmt (i: gsi)))
2427 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2428 else
2429 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2430 }
2431
2432 /* Once the sequence is code generated, set it to NULL. */
2433 set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: true);
2434 }
2435 }
2436}
2437
2438/* Helper function for predicate_statements. Returns index of existent
2439 mask if it was created for given SIZE and -1 otherwise. */
2440
2441static int
2442mask_exists (int size, const vec<int> &vec)
2443{
2444 unsigned int ix;
2445 int v;
2446 FOR_EACH_VEC_ELT (vec, ix, v)
2447 if (v == size)
2448 return (int) ix;
2449 return -1;
2450}
2451
2452/* Helper function for predicate_statements. STMT is a memory read or
2453 write and it needs to be predicated by MASK. Return a statement
2454 that does so. */
2455
2456static gimple *
2457predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2458{
2459 gcall *new_stmt;
2460
2461 tree lhs = gimple_assign_lhs (gs: stmt);
2462 tree rhs = gimple_assign_rhs1 (gs: stmt);
2463 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2464 mark_addressable (ref);
2465 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2466 true, NULL_TREE, true, GSI_SAME_STMT);
2467 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2468 get_object_alignment (ref));
2469 /* Copy points-to info if possible. */
2470 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2471 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2472 ref);
2473 if (TREE_CODE (lhs) == SSA_NAME)
2474 {
2475 new_stmt
2476 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2477 ptr, mask);
2478 gimple_call_set_lhs (gs: new_stmt, lhs);
2479 gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: stmt));
2480 }
2481 else
2482 {
2483 new_stmt
2484 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2485 mask, rhs);
2486 gimple_move_vops (new_stmt, stmt);
2487 }
2488 gimple_call_set_nothrow (s: new_stmt, nothrow_p: true);
2489 return new_stmt;
2490}
2491
2492/* STMT uses OP_LHS. Check whether it is equivalent to:
2493
2494 ... = OP_MASK ? OP_LHS : X;
2495
2496 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2497 known to have value OP_COND. */
2498
2499static tree
2500check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2501 tree op_lhs)
2502{
2503 gassign *assign = dyn_cast <gassign *> (p: stmt);
2504 if (!assign || gimple_assign_rhs_code (gs: assign) != COND_EXPR)
2505 return NULL_TREE;
2506
2507 tree use_cond = gimple_assign_rhs1 (gs: assign);
2508 tree if_true = gimple_assign_rhs2 (gs: assign);
2509 tree if_false = gimple_assign_rhs3 (gs: assign);
2510
2511 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, flags: 0))
2512 && if_true == op_lhs)
2513 return if_false;
2514
2515 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2516 return if_true;
2517
2518 return NULL_TREE;
2519}
2520
2521/* Return true if VALUE is available for use at STMT. SSA_NAMES is
2522 the set of SSA names defined earlier in STMT's block. */
2523
2524static bool
2525value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2526 tree value)
2527{
2528 if (is_gimple_min_invariant (value))
2529 return true;
2530
2531 if (TREE_CODE (value) == SSA_NAME)
2532 {
2533 if (SSA_NAME_IS_DEFAULT_DEF (value))
2534 return true;
2535
2536 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2537 basic_block use_bb = gimple_bb (g: stmt);
2538 return (def_bb == use_bb
2539 ? ssa_names->contains (k: value)
2540 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2541 }
2542
2543 return false;
2544}
2545
2546/* Helper function for predicate_statements. STMT is a potentially-trapping
2547 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2548 has value COND. Return a statement that does so. SSA_NAMES is the set of
2549 SSA names defined earlier in STMT's block. */
2550
2551static gimple *
2552predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2553 hash_set<tree_ssa_name_hash> *ssa_names)
2554{
2555 tree lhs = gimple_assign_lhs (gs: stmt);
2556 tree_code code = gimple_assign_rhs_code (gs: stmt);
2557 unsigned int nops = gimple_num_ops (gs: stmt);
2558 internal_fn cond_fn = get_conditional_internal_fn (code);
2559
2560 /* Construct the arguments to the conditional internal function. */
2561 auto_vec<tree, 8> args;
2562 args.safe_grow (len: nops + 1, exact: true);
2563 args[0] = mask;
2564 for (unsigned int i = 1; i < nops; ++i)
2565 args[i] = gimple_op (gs: stmt, i);
2566 args[nops] = NULL_TREE;
2567
2568 /* Look for uses of the result to see whether they are COND_EXPRs that can
2569 be folded into the conditional call. */
2570 imm_use_iterator imm_iter;
2571 gimple *use_stmt;
2572 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2573 {
2574 tree new_else = check_redundant_cond_expr (stmt: use_stmt, op_mask: mask, op_cond: cond, op_lhs: lhs);
2575 if (new_else && value_available_p (stmt, ssa_names, value: new_else))
2576 {
2577 if (!args[nops])
2578 args[nops] = new_else;
2579 if (operand_equal_p (new_else, args[nops], flags: 0))
2580 {
2581 /* We have:
2582
2583 LHS = IFN_COND (MASK, ..., ELSE);
2584 X = MASK ? LHS : ELSE;
2585
2586 which makes X equivalent to LHS. */
2587 tree use_lhs = gimple_assign_lhs (gs: use_stmt);
2588 redundant_ssa_names.safe_push (obj: std::make_pair (x&: use_lhs, y&: lhs));
2589 }
2590 }
2591 }
2592 if (!args[nops])
2593 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2594 nops - 1, &args[1]);
2595
2596 /* Create and insert the call. */
2597 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2598 gimple_call_set_lhs (gs: new_stmt, lhs);
2599 gimple_call_set_nothrow (s: new_stmt, nothrow_p: true);
2600
2601 return new_stmt;
2602}
2603
2604/* Predicate each write to memory in LOOP.
2605
2606 This function transforms control flow constructs containing memory
2607 writes of the form:
2608
2609 | for (i = 0; i < N; i++)
2610 | if (cond)
2611 | A[i] = expr;
2612
2613 into the following form that does not contain control flow:
2614
2615 | for (i = 0; i < N; i++)
2616 | A[i] = cond ? expr : A[i];
2617
2618 The original CFG looks like this:
2619
2620 | bb_0
2621 | i = 0
2622 | end_bb_0
2623 |
2624 | bb_1
2625 | if (i < N) goto bb_5 else goto bb_2
2626 | end_bb_1
2627 |
2628 | bb_2
2629 | cond = some_computation;
2630 | if (cond) goto bb_3 else goto bb_4
2631 | end_bb_2
2632 |
2633 | bb_3
2634 | A[i] = expr;
2635 | goto bb_4
2636 | end_bb_3
2637 |
2638 | bb_4
2639 | goto bb_1
2640 | end_bb_4
2641
2642 insert_gimplified_predicates inserts the computation of the COND
2643 expression at the beginning of the destination basic block:
2644
2645 | bb_0
2646 | i = 0
2647 | end_bb_0
2648 |
2649 | bb_1
2650 | if (i < N) goto bb_5 else goto bb_2
2651 | end_bb_1
2652 |
2653 | bb_2
2654 | cond = some_computation;
2655 | if (cond) goto bb_3 else goto bb_4
2656 | end_bb_2
2657 |
2658 | bb_3
2659 | cond = some_computation;
2660 | A[i] = expr;
2661 | goto bb_4
2662 | end_bb_3
2663 |
2664 | bb_4
2665 | goto bb_1
2666 | end_bb_4
2667
2668 predicate_statements is then predicating the memory write as follows:
2669
2670 | bb_0
2671 | i = 0
2672 | end_bb_0
2673 |
2674 | bb_1
2675 | if (i < N) goto bb_5 else goto bb_2
2676 | end_bb_1
2677 |
2678 | bb_2
2679 | if (cond) goto bb_3 else goto bb_4
2680 | end_bb_2
2681 |
2682 | bb_3
2683 | cond = some_computation;
2684 | A[i] = cond ? expr : A[i];
2685 | goto bb_4
2686 | end_bb_3
2687 |
2688 | bb_4
2689 | goto bb_1
2690 | end_bb_4
2691
2692 and finally combine_blocks removes the basic block boundaries making
2693 the loop vectorizable:
2694
2695 | bb_0
2696 | i = 0
2697 | if (i < N) goto bb_5 else goto bb_1
2698 | end_bb_0
2699 |
2700 | bb_1
2701 | cond = some_computation;
2702 | A[i] = cond ? expr : A[i];
2703 | if (i < N) goto bb_5 else goto bb_4
2704 | end_bb_1
2705 |
2706 | bb_4
2707 | goto bb_1
2708 | end_bb_4
2709*/
2710
2711static void
2712predicate_statements (loop_p loop)
2713{
2714 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2715 auto_vec<int, 1> vect_sizes;
2716 auto_vec<tree, 1> vect_masks;
2717 hash_set<tree_ssa_name_hash> ssa_names;
2718
2719 for (i = 1; i < orig_loop_num_nodes; i++)
2720 {
2721 gimple_stmt_iterator gsi;
2722 basic_block bb = ifc_bbs[i];
2723 tree cond = bb_predicate (bb);
2724 bool swap;
2725 int index;
2726
2727 if (is_true_predicate (cond))
2728 continue;
2729
2730 swap = false;
2731 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2732 {
2733 swap = true;
2734 cond = TREE_OPERAND (cond, 0);
2735 }
2736
2737 vect_sizes.truncate (size: 0);
2738 vect_masks.truncate (size: 0);
2739
2740 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);)
2741 {
2742 gassign *stmt = dyn_cast <gassign *> (p: gsi_stmt (i: gsi));
2743 tree lhs;
2744 if (!stmt)
2745 ;
2746 else if (is_false_predicate (cond)
2747 && gimple_vdef (g: stmt))
2748 {
2749 unlink_stmt_vdef (stmt);
2750 gsi_remove (&gsi, true);
2751 release_defs (stmt);
2752 continue;
2753 }
2754 else if (gimple_plf (stmt, plf: GF_PLF_2)
2755 && is_gimple_assign (gs: stmt))
2756 {
2757 tree lhs = gimple_assign_lhs (gs: stmt);
2758 tree mask;
2759 gimple *new_stmt;
2760 gimple_seq stmts = NULL;
2761 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2762 /* We checked before setting GF_PLF_2 that an equivalent
2763 integer mode exists. */
2764 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2765 if (!vect_sizes.is_empty ()
2766 && (index = mask_exists (size: bitsize, vec: vect_sizes)) != -1)
2767 /* Use created mask. */
2768 mask = vect_masks[index];
2769 else
2770 {
2771 if (COMPARISON_CLASS_P (cond))
2772 mask = gimple_build (seq: &stmts, TREE_CODE (cond),
2773 boolean_type_node,
2774 TREE_OPERAND (cond, 0),
2775 TREE_OPERAND (cond, 1));
2776 else
2777 mask = cond;
2778
2779 if (swap)
2780 {
2781 tree true_val
2782 = constant_boolean_node (true, TREE_TYPE (mask));
2783 mask = gimple_build (seq: &stmts, code: BIT_XOR_EXPR,
2784 TREE_TYPE (mask), ops: mask, ops: true_val);
2785 }
2786 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2787
2788 /* Save mask and its size for further use. */
2789 vect_sizes.safe_push (obj: bitsize);
2790 vect_masks.safe_push (obj: mask);
2791 }
2792 if (gimple_assign_single_p (gs: stmt))
2793 new_stmt = predicate_load_or_store (gsi: &gsi, stmt, mask);
2794 else
2795 new_stmt = predicate_rhs_code (stmt, mask, cond, ssa_names: &ssa_names);
2796
2797 gsi_replace (&gsi, new_stmt, true);
2798 }
2799 else if (((lhs = gimple_assign_lhs (gs: stmt)), true)
2800 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2801 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2802 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2803 && arith_code_with_undefined_signed_overflow
2804 (gimple_assign_rhs_code (gs: stmt)))
2805 rewrite_to_defined_overflow (&gsi);
2806 else if (gimple_vdef (g: stmt))
2807 {
2808 tree lhs = gimple_assign_lhs (gs: stmt);
2809 tree rhs = gimple_assign_rhs1 (gs: stmt);
2810 tree type = TREE_TYPE (lhs);
2811
2812 lhs = ifc_temp_var (type, expr: unshare_expr (lhs), gsi: &gsi);
2813 rhs = ifc_temp_var (type, expr: unshare_expr (rhs), gsi: &gsi);
2814 if (swap)
2815 std::swap (a&: lhs, b&: rhs);
2816 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2817 NULL_TREE, true, GSI_SAME_STMT);
2818 rhs = fold_build_cond_expr (type, cond: unshare_expr (cond), rhs, lhs);
2819 gimple_assign_set_rhs1 (gs: stmt, rhs: ifc_temp_var (type, expr: rhs, gsi: &gsi));
2820 update_stmt (s: stmt);
2821 }
2822
2823 if (gimple_plf (stmt: gsi_stmt (i: gsi), plf: GF_PLF_2)
2824 && is_gimple_call (gs: gsi_stmt (i: gsi)))
2825 {
2826 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2827 This will cause the vectorizer to match the "in branch"
2828 clone variants, and serves to build the mask vector
2829 in a natural way. */
2830 gcall *call = dyn_cast <gcall *> (p: gsi_stmt (i: gsi));
2831 tree orig_fn = gimple_call_fn (gs: call);
2832 int orig_nargs = gimple_call_num_args (gs: call);
2833 auto_vec<tree> args;
2834 args.safe_push (obj: orig_fn);
2835 for (int i = 0; i < orig_nargs; i++)
2836 args.safe_push (obj: gimple_call_arg (gs: call, index: i));
2837 args.safe_push (obj: cond);
2838
2839 /* Replace the call with a IFN_MASK_CALL that has the extra
2840 condition parameter. */
2841 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2842 args);
2843 gimple_call_set_lhs (gs: new_call, lhs: gimple_call_lhs (gs: call));
2844 gsi_replace (&gsi, new_call, true);
2845 }
2846
2847 lhs = gimple_get_lhs (gsi_stmt (i: gsi));
2848 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2849 ssa_names.add (k: lhs);
2850 gsi_next (i: &gsi);
2851 }
2852 ssa_names.empty ();
2853 }
2854}
2855
2856/* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2857 other than the exit and latch of the LOOP. Also resets the
2858 GIMPLE_DEBUG information. */
2859
2860static void
2861remove_conditions_and_labels (loop_p loop)
2862{
2863 gimple_stmt_iterator gsi;
2864 unsigned int i;
2865
2866 for (i = 0; i < loop->num_nodes; i++)
2867 {
2868 basic_block bb = ifc_bbs[i];
2869
2870 if (bb_with_exit_edge_p (loop, bb)
2871 || bb == loop->latch)
2872 continue;
2873
2874 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); )
2875 switch (gimple_code (g: gsi_stmt (i: gsi)))
2876 {
2877 case GIMPLE_COND:
2878 case GIMPLE_LABEL:
2879 gsi_remove (&gsi, true);
2880 break;
2881
2882 case GIMPLE_DEBUG:
2883 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2884 if (gimple_debug_bind_p (s: gsi_stmt (i: gsi)))
2885 {
2886 gimple_debug_bind_reset_value (dbg: gsi_stmt (i: gsi));
2887 update_stmt (s: gsi_stmt (i: gsi));
2888 }
2889 gsi_next (i: &gsi);
2890 break;
2891
2892 default:
2893 gsi_next (i: &gsi);
2894 }
2895 }
2896}
2897
2898/* Combine all the basic blocks from LOOP into one or two super basic
2899 blocks. Replace PHI nodes with conditional modify expressions.
2900 LOOP_VERSIONED should be true if we know that the loop was versioned for
2901 vectorization. */
2902
2903static void
2904combine_blocks (class loop *loop, bool loop_versioned)
2905{
2906 basic_block bb, exit_bb, merge_target_bb;
2907 unsigned int orig_loop_num_nodes = loop->num_nodes;
2908 unsigned int i;
2909 edge e;
2910 edge_iterator ei;
2911
2912 /* Reset flow-sensitive info before predicating stmts or PHIs we
2913 might fold. */
2914 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2915 for (i = 0; i < orig_loop_num_nodes; i++)
2916 {
2917 bb = ifc_bbs[i];
2918 predicated[i] = is_predicated (bb);
2919 if (predicated[i])
2920 {
2921 for (auto gsi = gsi_start_phis (bb);
2922 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2923 reset_flow_sensitive_info (gimple_phi_result (gs: *gsi));
2924 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2925 {
2926 gimple *stmt = gsi_stmt (i: gsi);
2927 ssa_op_iter i;
2928 tree op;
2929 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2930 reset_flow_sensitive_info (op);
2931 }
2932 }
2933 }
2934
2935 remove_conditions_and_labels (loop);
2936 insert_gimplified_predicates (loop);
2937 predicate_all_scalar_phis (loop, loop_versioned);
2938
2939 if (need_to_predicate || need_to_rewrite_undefined)
2940 predicate_statements (loop);
2941
2942 /* Merge basic blocks. */
2943 exit_bb = single_exit (loop)->src;
2944 gcc_assert (exit_bb != loop->latch);
2945 for (i = 0; i < orig_loop_num_nodes; i++)
2946 {
2947 bb = ifc_bbs[i];
2948 free_bb_predicate (bb);
2949 }
2950
2951 merge_target_bb = loop->header;
2952
2953 /* Get at the virtual def valid for uses starting at the first block
2954 we merge into the header. Without a virtual PHI the loop has the
2955 same virtual use on all stmts. */
2956 gphi *vphi = get_virtual_phi (loop->header);
2957 tree last_vdef = NULL_TREE;
2958 if (vphi)
2959 {
2960 last_vdef = gimple_phi_result (gs: vphi);
2961 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: loop->header);
2962 ! gsi_end_p (i: gsi); gsi_next (i: &gsi))
2963 if (gimple_vdef (g: gsi_stmt (i: gsi)))
2964 last_vdef = gimple_vdef (g: gsi_stmt (i: gsi));
2965 }
2966 for (i = 1; i < orig_loop_num_nodes; i++)
2967 {
2968 gimple_stmt_iterator gsi;
2969 gimple_stmt_iterator last;
2970
2971 bb = ifc_bbs[i];
2972
2973 if (bb == exit_bb || bb == loop->latch)
2974 continue;
2975
2976 /* We release virtual PHIs late because we have to propagate them
2977 out using the current VUSE. The def might be the one used
2978 after the loop. */
2979 vphi = get_virtual_phi (bb);
2980 if (vphi)
2981 {
2982 /* When there's just loads inside the loop a stray virtual
2983 PHI merging the uses can appear, update last_vdef from
2984 it. */
2985 if (!last_vdef)
2986 last_vdef = gimple_phi_arg_def (gs: vphi, index: 0);
2987 imm_use_iterator iter;
2988 use_operand_p use_p;
2989 gimple *use_stmt;
2990 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2991 {
2992 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2993 SET_USE (use_p, last_vdef);
2994 }
2995 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2996 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2997 gsi = gsi_for_stmt (vphi);
2998 remove_phi_node (&gsi, true);
2999 }
3000
3001 /* Make stmts member of loop->header and clear range info from all stmts
3002 in BB which is now no longer executed conditional on a predicate we
3003 could have derived it from. */
3004 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3005 {
3006 gimple *stmt = gsi_stmt (i: gsi);
3007 gimple_set_bb (stmt, merge_target_bb);
3008 /* Update virtual operands. */
3009 if (last_vdef)
3010 {
3011 use_operand_p use_p = ssa_vuse_operand (stmt);
3012 if (use_p
3013 && USE_FROM_PTR (use_p) != last_vdef)
3014 SET_USE (use_p, last_vdef);
3015 if (gimple_vdef (g: stmt))
3016 last_vdef = gimple_vdef (g: stmt);
3017 }
3018 else
3019 /* If this is the first load we arrive at update last_vdef
3020 so we handle stray PHIs correctly. */
3021 last_vdef = gimple_vuse (g: stmt);
3022 }
3023
3024 /* Update stmt list. */
3025 last = gsi_last_bb (bb: merge_target_bb);
3026 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3027 set_bb_seq (bb, NULL);
3028 }
3029
3030 /* Fixup virtual operands in the exit block. */
3031 if (exit_bb
3032 && exit_bb != loop->header)
3033 {
3034 /* We release virtual PHIs late because we have to propagate them
3035 out using the current VUSE. The def might be the one used
3036 after the loop. */
3037 vphi = get_virtual_phi (exit_bb);
3038 if (vphi)
3039 {
3040 /* When there's just loads inside the loop a stray virtual
3041 PHI merging the uses can appear, update last_vdef from
3042 it. */
3043 if (!last_vdef)
3044 last_vdef = gimple_phi_arg_def (gs: vphi, index: 0);
3045 imm_use_iterator iter;
3046 use_operand_p use_p;
3047 gimple *use_stmt;
3048 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3049 {
3050 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3051 SET_USE (use_p, last_vdef);
3052 }
3053 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3054 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3055 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3056 remove_phi_node (&gsi, true);
3057 }
3058 }
3059
3060 /* Now remove all the edges in the loop, except for those from the exit
3061 block and delete the blocks we elided. */
3062 for (i = 1; i < orig_loop_num_nodes; i++)
3063 {
3064 bb = ifc_bbs[i];
3065
3066 for (ei = ei_start (bb->preds); (e = ei_safe_edge (i: ei));)
3067 {
3068 if (e->src == exit_bb)
3069 ei_next (i: &ei);
3070 else
3071 remove_edge (e);
3072 }
3073 }
3074 for (i = 1; i < orig_loop_num_nodes; i++)
3075 {
3076 bb = ifc_bbs[i];
3077
3078 if (bb == exit_bb || bb == loop->latch)
3079 continue;
3080
3081 delete_basic_block (bb);
3082 }
3083
3084 /* Re-connect the exit block. */
3085 if (exit_bb != NULL)
3086 {
3087 if (exit_bb != loop->header)
3088 {
3089 /* Connect this node to loop header. */
3090 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3091 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3092 }
3093
3094 /* Redirect non-exit edges to loop->latch. */
3095 FOR_EACH_EDGE (e, ei, exit_bb->succs)
3096 {
3097 if (!loop_exit_edge_p (loop, e))
3098 redirect_edge_and_branch (e, loop->latch);
3099 }
3100 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3101 }
3102 else
3103 {
3104 /* If the loop does not have an exit, reconnect header and latch. */
3105 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3106 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3107 }
3108
3109 /* If possible, merge loop header to the block with the exit edge.
3110 This reduces the number of basic blocks to two, to please the
3111 vectorizer that handles only loops with two nodes. */
3112 if (exit_bb
3113 && exit_bb != loop->header)
3114 {
3115 if (can_merge_blocks_p (loop->header, exit_bb))
3116 merge_blocks (loop->header, exit_bb);
3117 }
3118
3119 free (ptr: ifc_bbs);
3120 ifc_bbs = NULL;
3121 free (ptr: predicated);
3122}
3123
3124/* Version LOOP before if-converting it; the original loop
3125 will be if-converted, the new copy of the loop will not,
3126 and the LOOP_VECTORIZED internal call will be guarding which
3127 loop to execute. The vectorizer pass will fold this
3128 internal call into either true or false.
3129
3130 Note that this function intentionally invalidates profile. Both edges
3131 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3132 consistent after the condition is folded in the vectorizer. */
3133
3134static class loop *
3135version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3136{
3137 basic_block cond_bb;
3138 tree cond = make_ssa_name (boolean_type_node);
3139 class loop *new_loop;
3140 gimple *g;
3141 gimple_stmt_iterator gsi;
3142 unsigned int save_length = 0;
3143
3144 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3145 build_int_cst (integer_type_node, loop->num),
3146 integer_zero_node);
3147 gimple_call_set_lhs (gs: g, lhs: cond);
3148
3149 void **saved_preds = NULL;
3150 if (any_complicated_phi || need_to_predicate)
3151 {
3152 /* Save BB->aux around loop_version as that uses the same field. */
3153 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3154 saved_preds = XALLOCAVEC (void *, save_length);
3155 for (unsigned i = 0; i < save_length; i++)
3156 saved_preds[i] = ifc_bbs[i]->aux;
3157 }
3158
3159 initialize_original_copy_tables ();
3160 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3161 is re-merged in the vectorizer. */
3162 new_loop = loop_version (loop, cond, &cond_bb,
3163 profile_probability::always (),
3164 profile_probability::always (),
3165 profile_probability::always (),
3166 profile_probability::always (), true);
3167 free_original_copy_tables ();
3168
3169 if (any_complicated_phi || need_to_predicate)
3170 for (unsigned i = 0; i < save_length; i++)
3171 ifc_bbs[i]->aux = saved_preds[i];
3172
3173 if (new_loop == NULL)
3174 return NULL;
3175
3176 new_loop->dont_vectorize = true;
3177 new_loop->force_vectorize = false;
3178 gsi = gsi_last_bb (bb: cond_bb);
3179 gimple_call_set_arg (gs: g, index: 1, arg: build_int_cst (integer_type_node, new_loop->num));
3180 if (preds)
3181 preds->safe_push (obj: g);
3182 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3183 update_ssa (TODO_update_ssa_no_phi);
3184 return new_loop;
3185}
3186
3187/* Return true when LOOP satisfies the follow conditions that will
3188 allow it to be recognized by the vectorizer for outer-loop
3189 vectorization:
3190 - The loop is not the root node of the loop tree.
3191 - The loop has exactly one inner loop.
3192 - The loop has a single exit.
3193 - The loop header has a single successor, which is the inner
3194 loop header.
3195 - Each of the inner and outer loop latches have a single
3196 predecessor.
3197 - The loop exit block has a single predecessor, which is the
3198 inner loop's exit block. */
3199
3200static bool
3201versionable_outer_loop_p (class loop *loop)
3202{
3203 if (!loop_outer (loop)
3204 || loop->dont_vectorize
3205 || !loop->inner
3206 || loop->inner->next
3207 || !single_exit (loop)
3208 || !single_succ_p (bb: loop->header)
3209 || single_succ (bb: loop->header) != loop->inner->header
3210 || !single_pred_p (bb: loop->latch)
3211 || !single_pred_p (bb: loop->inner->latch))
3212 return false;
3213
3214 basic_block outer_exit = single_pred (bb: loop->latch);
3215 basic_block inner_exit = single_pred (bb: loop->inner->latch);
3216
3217 if (!single_pred_p (bb: outer_exit) || single_pred (bb: outer_exit) != inner_exit)
3218 return false;
3219
3220 if (dump_file)
3221 fprintf (stream: dump_file, format: "Found vectorizable outer loop for versioning\n");
3222
3223 return true;
3224}
3225
3226/* Performs splitting of critical edges. Skip splitting and return false
3227 if LOOP will not be converted because:
3228
3229 - LOOP is not well formed.
3230 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3231
3232 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3233
3234static bool
3235ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3236{
3237 basic_block *body;
3238 basic_block bb;
3239 unsigned int num = loop->num_nodes;
3240 unsigned int i;
3241 edge e;
3242 edge_iterator ei;
3243 auto_vec<edge> critical_edges;
3244
3245 /* Loop is not well formed. */
3246 if (loop->inner)
3247 return false;
3248
3249 body = get_loop_body (loop);
3250 for (i = 0; i < num; i++)
3251 {
3252 bb = body[i];
3253 if (!aggressive_if_conv
3254 && phi_nodes (bb)
3255 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3256 {
3257 if (dump_file && (dump_flags & TDF_DETAILS))
3258 fprintf (stream: dump_file,
3259 format: "BB %d has complicated PHI with more than %u args.\n",
3260 bb->index, MAX_PHI_ARG_NUM);
3261
3262 free (ptr: body);
3263 return false;
3264 }
3265 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3266 continue;
3267
3268 /* Skip basic blocks not ending with conditional branch. */
3269 if (!safe_is_a <gcond *> (p: *gsi_last_bb (bb)))
3270 continue;
3271
3272 FOR_EACH_EDGE (e, ei, bb->succs)
3273 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3274 critical_edges.safe_push (obj: e);
3275 }
3276 free (ptr: body);
3277
3278 while (critical_edges.length () > 0)
3279 {
3280 e = critical_edges.pop ();
3281 /* Don't split if bb can be predicated along non-critical edge. */
3282 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (bb: e->dest))
3283 split_edge (e);
3284 }
3285
3286 return true;
3287}
3288
3289/* Delete redundant statements produced by predication which prevents
3290 loop vectorization. */
3291
3292static void
3293ifcvt_local_dce (class loop *loop)
3294{
3295 gimple *stmt;
3296 gimple *stmt1;
3297 gimple *phi;
3298 gimple_stmt_iterator gsi;
3299 auto_vec<gimple *> worklist;
3300 enum gimple_code code;
3301 use_operand_p use_p;
3302 imm_use_iterator imm_iter;
3303
3304 /* The loop has a single BB only. */
3305 basic_block bb = loop->header;
3306 tree latch_vdef = NULL_TREE;
3307
3308 worklist.create (nelems: 64);
3309 /* Consider all phi as live statements. */
3310 for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3311 {
3312 phi = gsi_stmt (i: gsi);
3313 gimple_set_plf (stmt: phi, plf: GF_PLF_2, val_p: true);
3314 worklist.safe_push (obj: phi);
3315 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
3316 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3317 }
3318 /* Consider load/store statements, CALL and COND as live. */
3319 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3320 {
3321 stmt = gsi_stmt (i: gsi);
3322 if (is_gimple_debug (gs: stmt))
3323 {
3324 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
3325 continue;
3326 }
3327 if (gimple_store_p (gs: stmt) || gimple_assign_load_p (stmt))
3328 {
3329 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
3330 worklist.safe_push (obj: stmt);
3331 continue;
3332 }
3333 code = gimple_code (g: stmt);
3334 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3335 {
3336 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
3337 worklist.safe_push (obj: stmt);
3338 continue;
3339 }
3340 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: false);
3341
3342 if (code == GIMPLE_ASSIGN)
3343 {
3344 tree lhs = gimple_assign_lhs (gs: stmt);
3345 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3346 {
3347 stmt1 = USE_STMT (use_p);
3348 if (!is_gimple_debug (gs: stmt1) && gimple_bb (g: stmt1) != bb)
3349 {
3350 gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true);
3351 worklist.safe_push (obj: stmt);
3352 break;
3353 }
3354 }
3355 }
3356 }
3357 /* Propagate liveness through arguments of live stmt. */
3358 while (worklist.length () > 0)
3359 {
3360 ssa_op_iter iter;
3361 use_operand_p use_p;
3362 tree use;
3363
3364 stmt = worklist.pop ();
3365 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3366 {
3367 use = USE_FROM_PTR (use_p);
3368 if (TREE_CODE (use) != SSA_NAME)
3369 continue;
3370 stmt1 = SSA_NAME_DEF_STMT (use);
3371 if (gimple_bb (g: stmt1) != bb || gimple_plf (stmt: stmt1, plf: GF_PLF_2))
3372 continue;
3373 gimple_set_plf (stmt: stmt1, plf: GF_PLF_2, val_p: true);
3374 worklist.safe_push (obj: stmt1);
3375 }
3376 }
3377 /* Delete dead statements. */
3378 gsi = gsi_last_bb (bb);
3379 while (!gsi_end_p (i: gsi))
3380 {
3381 gimple_stmt_iterator gsiprev = gsi;
3382 gsi_prev (i: &gsiprev);
3383 stmt = gsi_stmt (i: gsi);
3384 if (gimple_store_p (gs: stmt) && gimple_vdef (g: stmt))
3385 {
3386 tree lhs = gimple_get_lhs (stmt);
3387 ao_ref write;
3388 ao_ref_init (&write, lhs);
3389
3390 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3391 == DSE_STORE_DEAD)
3392 delete_dead_or_redundant_assignment (&gsi, "dead");
3393 gsi = gsiprev;
3394 continue;
3395 }
3396
3397 if (gimple_plf (stmt, plf: GF_PLF_2))
3398 {
3399 gsi = gsiprev;
3400 continue;
3401 }
3402 if (dump_file && (dump_flags & TDF_DETAILS))
3403 {
3404 fprintf (stream: dump_file, format: "Delete dead stmt in bb#%d\n", bb->index);
3405 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3406 }
3407 gsi_remove (&gsi, true);
3408 release_defs (stmt);
3409 gsi = gsiprev;
3410 }
3411}
3412
3413/* Return true if VALUE is already available on edge PE. */
3414
3415static bool
3416ifcvt_available_on_edge_p (edge pe, tree value)
3417{
3418 if (is_gimple_min_invariant (value))
3419 return true;
3420
3421 if (TREE_CODE (value) == SSA_NAME)
3422 {
3423 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3424 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3425 return true;
3426 }
3427
3428 return false;
3429}
3430
3431/* Return true if STMT can be hoisted from if-converted loop LOOP to
3432 edge PE. */
3433
3434static bool
3435ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3436{
3437 if (auto *call = dyn_cast<gcall *> (p: stmt))
3438 {
3439 if (gimple_call_internal_p (gs: call)
3440 && internal_fn_mask_index (gimple_call_internal_fn (gs: call)) >= 0)
3441 return false;
3442 }
3443 else if (auto *assign = dyn_cast<gassign *> (p: stmt))
3444 {
3445 if (gimple_assign_rhs_code (gs: assign) == COND_EXPR)
3446 return false;
3447 }
3448 else
3449 return false;
3450
3451 if (gimple_has_side_effects (stmt)
3452 || gimple_could_trap_p (stmt)
3453 || stmt_could_throw_p (cfun, stmt)
3454 || gimple_vdef (g: stmt)
3455 || gimple_vuse (g: stmt))
3456 return false;
3457
3458 int num_args = gimple_num_args (gs: stmt);
3459 if (pe != loop_preheader_edge (loop))
3460 {
3461 for (int i = 0; i < num_args; ++i)
3462 if (!ifcvt_available_on_edge_p (pe, value: gimple_arg (gs: stmt, i)))
3463 return false;
3464 }
3465 else
3466 {
3467 for (int i = 0; i < num_args; ++i)
3468 if (!expr_invariant_in_loop_p (loop, gimple_arg (gs: stmt, i)))
3469 return false;
3470 }
3471
3472 return true;
3473}
3474
3475/* Hoist invariant statements from LOOP to edge PE. */
3476
3477static void
3478ifcvt_hoist_invariants (class loop *loop, edge pe)
3479{
3480 /* Only hoist from the now unconditionally executed part of the loop. */
3481 basic_block bb = loop->header;
3482 gimple_stmt_iterator hoist_gsi = {};
3483 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);)
3484 {
3485 gimple *stmt = gsi_stmt (i: gsi);
3486 if (ifcvt_can_hoist (loop, pe, stmt))
3487 {
3488 /* Once we've hoisted one statement, insert other statements
3489 after it. */
3490 gsi_remove (&gsi, false);
3491 if (hoist_gsi.ptr)
3492 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3493 else
3494 {
3495 gsi_insert_on_edge_immediate (pe, stmt);
3496 hoist_gsi = gsi_for_stmt (stmt);
3497 }
3498 continue;
3499 }
3500 gsi_next (i: &gsi);
3501 }
3502}
3503
3504/* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3505 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3506 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3507 if not NULL, will hold the tree representing the base struct of this
3508 bitfield. */
3509
3510static tree
3511get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3512 tree *struct_expr)
3513{
3514 tree comp_ref = write ? gimple_assign_lhs (gs: stmt)
3515 : gimple_assign_rhs1 (gs: stmt);
3516
3517 tree field_decl = TREE_OPERAND (comp_ref, 1);
3518 tree ref_offset = component_ref_field_offset (comp_ref);
3519 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3520
3521 /* Bail out if the representative is not a suitable type for a scalar
3522 register variable. */
3523 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3524 return NULL_TREE;
3525
3526 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3527 precision. */
3528 unsigned HOST_WIDE_INT bf_prec
3529 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3530 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3531 return NULL_TREE;
3532
3533 if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3534 || TREE_CODE (ref_offset) != INTEGER_CST)
3535 {
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 fprintf (stream: dump_file, format: "\t Bitfield NOT OK to lower,"
3538 " offset is non-constant.\n");
3539 return NULL_TREE;
3540 }
3541
3542 if (struct_expr)
3543 *struct_expr = TREE_OPERAND (comp_ref, 0);
3544
3545 if (bitpos)
3546 {
3547 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3548 where our bitfield starts in relation to the container REP_DECL. The
3549 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3550 us how many bytes from the start of the structure there are until the
3551 start of the group of bitfield members the FIELD_DECL belongs to,
3552 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3553 position our actual bitfield member starts. For the container
3554 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3555 us the distance between the start of the structure and the start of
3556 the container, though the first is in bytes and the later other in
3557 bits. With this in mind we calculate the bit position of our new
3558 BITFIELD_REF by subtracting the number of bits between the start of
3559 the structure and the container from the number of bits from the start
3560 of the structure and the actual bitfield member. */
3561 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3562 ref_offset,
3563 build_int_cst (bitsizetype, BITS_PER_UNIT));
3564 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3565 DECL_FIELD_BIT_OFFSET (field_decl));
3566 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3567 DECL_FIELD_OFFSET (rep_decl),
3568 build_int_cst (bitsizetype, BITS_PER_UNIT));
3569 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3570 DECL_FIELD_BIT_OFFSET (rep_decl));
3571
3572 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3573 }
3574
3575 return rep_decl;
3576
3577}
3578
3579/* Lowers the bitfield described by DATA.
3580 For a write like:
3581
3582 struct.bf = _1;
3583
3584 lower to:
3585
3586 __ifc_1 = struct.<representative>;
3587 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3588 struct.<representative> = __ifc_2;
3589
3590 For a read:
3591
3592 _1 = struct.bf;
3593
3594 lower to:
3595
3596 __ifc_1 = struct.<representative>;
3597 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3598
3599 where representative is a legal load that contains the bitfield value,
3600 bitsize is the size of the bitfield and bitpos the offset to the start of
3601 the bitfield within the representative. */
3602
3603static void
3604lower_bitfield (gassign *stmt, bool write)
3605{
3606 tree struct_expr;
3607 tree bitpos;
3608 tree rep_decl = get_bitfield_rep (stmt, write, bitpos: &bitpos, struct_expr: &struct_expr);
3609 tree rep_type = TREE_TYPE (rep_decl);
3610 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3611
3612 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3613 if (dump_file && (dump_flags & TDF_DETAILS))
3614 {
3615 fprintf (stream: dump_file, format: "Lowering:\n");
3616 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3617 fprintf (stream: dump_file, format: "to:\n");
3618 }
3619
3620 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3621 defining SSA_NAME. */
3622 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3623 NULL_TREE);
3624 tree new_val = ifc_temp_var (type: rep_type, expr: rep_comp_ref, gsi: &gsi);
3625
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3627 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3628
3629 if (write)
3630 {
3631 new_val = ifc_temp_var (type: rep_type,
3632 expr: build3 (BIT_INSERT_EXPR, rep_type, new_val,
3633 unshare_expr (gimple_assign_rhs1 (gs: stmt)),
3634 bitpos), gsi: &gsi);
3635
3636 if (dump_file && (dump_flags & TDF_DETAILS))
3637 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3638
3639 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3640 new_val);
3641 gimple_move_vops (new_stmt, stmt);
3642 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3643
3644 if (dump_file && (dump_flags & TDF_DETAILS))
3645 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3646 }
3647 else
3648 {
3649 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3650 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3651 bitpos);
3652 new_val = ifc_temp_var (type: bf_type, expr: bfr, gsi: &gsi);
3653
3654 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (gs: stmt),
3655 new_val);
3656 gimple_move_vops (new_stmt, stmt);
3657 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3658
3659 if (dump_file && (dump_flags & TDF_DETAILS))
3660 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3661 }
3662
3663 gsi_remove (&gsi, true);
3664}
3665
3666/* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3667 with data structures representing these bitfields. */
3668
3669static bool
3670bitfields_to_lower_p (class loop *loop,
3671 vec <gassign *> &reads_to_lower,
3672 vec <gassign *> &writes_to_lower)
3673{
3674 gimple_stmt_iterator gsi;
3675
3676 if (dump_file && (dump_flags & TDF_DETAILS))
3677 {
3678 fprintf (stream: dump_file, format: "Analyzing loop %d for bitfields:\n", loop->num);
3679 }
3680
3681 for (unsigned i = 0; i < loop->num_nodes; ++i)
3682 {
3683 basic_block bb = ifc_bbs[i];
3684 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3685 {
3686 gassign *stmt = dyn_cast<gassign*> (p: gsi_stmt (i: gsi));
3687 if (!stmt)
3688 continue;
3689
3690 tree op = gimple_assign_lhs (gs: stmt);
3691 bool write = TREE_CODE (op) == COMPONENT_REF;
3692
3693 if (!write)
3694 op = gimple_assign_rhs1 (gs: stmt);
3695
3696 if (TREE_CODE (op) != COMPONENT_REF)
3697 continue;
3698
3699 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3700 {
3701 if (dump_file && (dump_flags & TDF_DETAILS))
3702 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3703
3704 if (TREE_THIS_VOLATILE (op))
3705 {
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3707 fprintf (stream: dump_file, format: "\t Bitfield NO OK to lower,"
3708 " the access is volatile.\n");
3709 return false;
3710 }
3711
3712 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3713 {
3714 if (dump_file && (dump_flags & TDF_DETAILS))
3715 fprintf (stream: dump_file, format: "\t Bitfield NO OK to lower,"
3716 " field type is not Integral.\n");
3717 return false;
3718 }
3719
3720 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3721 {
3722 if (dump_file && (dump_flags & TDF_DETAILS))
3723 fprintf (stream: dump_file, format: "\t Bitfield NOT OK to lower,"
3724 " representative is BLKmode.\n");
3725 return false;
3726 }
3727
3728 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (stream: dump_file, format: "\tBitfield OK to lower.\n");
3730 if (write)
3731 writes_to_lower.safe_push (obj: stmt);
3732 else
3733 reads_to_lower.safe_push (obj: stmt);
3734 }
3735 }
3736 }
3737 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3738}
3739
3740
3741/* If-convert LOOP when it is legal. For the moment this pass has no
3742 profitability analysis. Returns non-zero todo flags when something
3743 changed. */
3744
3745unsigned int
3746tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3747{
3748 unsigned int todo = 0;
3749 bool aggressive_if_conv;
3750 class loop *rloop;
3751 auto_vec <gassign *, 4> reads_to_lower;
3752 auto_vec <gassign *, 4> writes_to_lower;
3753 bitmap exit_bbs;
3754 edge pe;
3755 auto_vec<data_reference_p, 10> refs;
3756 bool loop_versioned;
3757
3758 again:
3759 rloop = NULL;
3760 ifc_bbs = NULL;
3761 need_to_lower_bitfields = false;
3762 need_to_ifcvt = false;
3763 need_to_predicate = false;
3764 need_to_rewrite_undefined = false;
3765 any_complicated_phi = false;
3766 loop_versioned = false;
3767
3768 /* Apply more aggressive if-conversion when loop or its outer loop were
3769 marked with simd pragma. When that's the case, we try to if-convert
3770 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3771 aggressive_if_conv = loop->force_vectorize;
3772 if (!aggressive_if_conv)
3773 {
3774 class loop *outer_loop = loop_outer (loop);
3775 if (outer_loop && outer_loop->force_vectorize)
3776 aggressive_if_conv = true;
3777 }
3778
3779 /* If there are more than two BBs in the loop then there is at least one if
3780 to convert. */
3781 if (loop->num_nodes > 2
3782 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3783 goto cleanup;
3784
3785 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3786 if (!ifc_bbs)
3787 {
3788 if (dump_file && (dump_flags & TDF_DETAILS))
3789 fprintf (stream: dump_file, format: "Irreducible loop\n");
3790 goto cleanup;
3791 }
3792
3793 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3794 goto cleanup;
3795
3796 if (loop->num_nodes > 2)
3797 {
3798 /* More than one loop exit is too much to handle. */
3799 if (!single_exit (loop))
3800 {
3801 if (dump_file && (dump_flags & TDF_DETAILS))
3802 fprintf (stream: dump_file, format: "Can not ifcvt due to multiple exits\n");
3803 }
3804 else
3805 {
3806 need_to_ifcvt = true;
3807
3808 if (!if_convertible_loop_p (loop, refs: &refs)
3809 || !dbg_cnt (index: if_conversion_tree))
3810 goto cleanup;
3811
3812 if ((need_to_predicate || any_complicated_phi)
3813 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3814 || loop->dont_vectorize))
3815 goto cleanup;
3816 }
3817 }
3818
3819 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3820 && !loop->dont_vectorize)
3821 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3822 writes_to_lower);
3823
3824 if (!need_to_ifcvt && !need_to_lower_bitfields)
3825 goto cleanup;
3826
3827 /* The edge to insert invariant stmts on. */
3828 pe = loop_preheader_edge (loop);
3829
3830 /* Since we have no cost model, always version loops unless the user
3831 specified -ftree-loop-if-convert or unless versioning is required.
3832 Either version this loop, or if the pattern is right for outer-loop
3833 vectorization, version the outer loop. In the latter case we will
3834 still if-convert the original inner loop. */
3835 if (need_to_lower_bitfields
3836 || need_to_predicate
3837 || any_complicated_phi
3838 || flag_tree_loop_if_convert != 1)
3839 {
3840 class loop *vloop
3841 = (versionable_outer_loop_p (loop: loop_outer (loop))
3842 ? loop_outer (loop) : loop);
3843 class loop *nloop = version_loop_for_if_conversion (loop: vloop, preds);
3844 if (nloop == NULL)
3845 goto cleanup;
3846 if (vloop != loop)
3847 {
3848 /* If versionable_outer_loop_p decided to version the
3849 outer loop, version also the inner loop of the non-vectorized
3850 loop copy. So we transform:
3851 loop1
3852 loop2
3853 into:
3854 if (LOOP_VECTORIZED (1, 3))
3855 {
3856 loop1
3857 loop2
3858 }
3859 else
3860 loop3 (copy of loop1)
3861 if (LOOP_VECTORIZED (4, 5))
3862 loop4 (copy of loop2)
3863 else
3864 loop5 (copy of loop4) */
3865 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3866 rloop = nloop->inner;
3867 }
3868 else
3869 /* If we versioned loop then make sure to insert invariant
3870 stmts before the .LOOP_VECTORIZED check since the vectorizer
3871 will re-use that for things like runtime alias versioning
3872 whose condition can end up using those invariants. */
3873 pe = single_pred_edge (bb: gimple_bb (g: preds->last ()));
3874
3875 loop_versioned = true;
3876 }
3877
3878 if (need_to_lower_bitfields)
3879 {
3880 if (dump_file && (dump_flags & TDF_DETAILS))
3881 {
3882 fprintf (stream: dump_file, format: "-------------------------\n");
3883 fprintf (stream: dump_file, format: "Start lowering bitfields\n");
3884 }
3885 while (!reads_to_lower.is_empty ())
3886 lower_bitfield (stmt: reads_to_lower.pop (), write: false);
3887 while (!writes_to_lower.is_empty ())
3888 lower_bitfield (stmt: writes_to_lower.pop (), write: true);
3889
3890 if (dump_file && (dump_flags & TDF_DETAILS))
3891 {
3892 fprintf (stream: dump_file, format: "Done lowering bitfields\n");
3893 fprintf (stream: dump_file, format: "-------------------------\n");
3894 }
3895 }
3896 if (need_to_ifcvt)
3897 {
3898 /* Before we rewrite edges we'll record their original position in the
3899 edge map such that we can map the edges between the ifcvt and the
3900 non-ifcvt loop during peeling. */
3901 uintptr_t idx = 0;
3902 for (edge exit : get_loop_exit_edges (loop))
3903 exit->aux = (void*)idx++;
3904
3905 /* Now all statements are if-convertible. Combine all the basic
3906 blocks into one huge basic block doing the if-conversion
3907 on-the-fly. */
3908 combine_blocks (loop, loop_versioned);
3909 }
3910
3911 std::pair <tree, tree> *name_pair;
3912 unsigned ssa_names_idx;
3913 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3914 replace_uses_by (name_pair->first, name_pair->second);
3915 redundant_ssa_names.release ();
3916
3917 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3918 and stores are involved. CSE only the loop body, not the entry
3919 PHIs, those are to be kept in sync with the non-if-converted copy.
3920 ??? We'll still keep dead stores though. */
3921 exit_bbs = BITMAP_ALLOC (NULL);
3922 for (edge exit : get_loop_exit_edges (loop))
3923 bitmap_set_bit (exit_bbs, exit->dest->index);
3924 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
3925 false, true, true);
3926
3927 /* Delete dead predicate computations. */
3928 ifcvt_local_dce (loop);
3929 BITMAP_FREE (exit_bbs);
3930
3931 ifcvt_hoist_invariants (loop, pe);
3932
3933 todo |= TODO_cleanup_cfg;
3934
3935 cleanup:
3936 data_reference_p dr;
3937 unsigned int i;
3938 for (i = 0; refs.iterate (ix: i, ptr: &dr); i++)
3939 {
3940 free (ptr: dr->aux);
3941 free_data_ref (dr);
3942 }
3943 refs.truncate (size: 0);
3944
3945 if (ifc_bbs)
3946 {
3947 unsigned int i;
3948
3949 for (i = 0; i < loop->num_nodes; i++)
3950 free_bb_predicate (bb: ifc_bbs[i]);
3951
3952 free (ptr: ifc_bbs);
3953 ifc_bbs = NULL;
3954 }
3955 if (rloop != NULL)
3956 {
3957 loop = rloop;
3958 reads_to_lower.truncate (size: 0);
3959 writes_to_lower.truncate (size: 0);
3960 goto again;
3961 }
3962
3963 return todo;
3964}
3965
3966/* Tree if-conversion pass management. */
3967
3968namespace {
3969
3970const pass_data pass_data_if_conversion =
3971{
3972 .type: GIMPLE_PASS, /* type */
3973 .name: "ifcvt", /* name */
3974 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
3975 .tv_id: TV_TREE_LOOP_IFCVT, /* tv_id */
3976 .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */
3977 .properties_provided: 0, /* properties_provided */
3978 .properties_destroyed: 0, /* properties_destroyed */
3979 .todo_flags_start: 0, /* todo_flags_start */
3980 .todo_flags_finish: 0, /* todo_flags_finish */
3981};
3982
3983class pass_if_conversion : public gimple_opt_pass
3984{
3985public:
3986 pass_if_conversion (gcc::context *ctxt)
3987 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3988 {}
3989
3990 /* opt_pass methods: */
3991 bool gate (function *) final override;
3992 unsigned int execute (function *) final override;
3993
3994}; // class pass_if_conversion
3995
3996bool
3997pass_if_conversion::gate (function *fun)
3998{
3999 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
4000 && flag_tree_loop_if_convert != 0)
4001 || flag_tree_loop_if_convert == 1);
4002}
4003
4004unsigned int
4005pass_if_conversion::execute (function *fun)
4006{
4007 unsigned todo = 0;
4008
4009 if (number_of_loops (fn: fun) <= 1)
4010 return 0;
4011
4012 auto_vec<gimple *> preds;
4013 for (auto loop : loops_list (cfun, 0))
4014 if (flag_tree_loop_if_convert == 1
4015 || ((flag_tree_loop_vectorize || loop->force_vectorize)
4016 && !loop->dont_vectorize))
4017 todo |= tree_if_conversion (loop, preds: &preds);
4018
4019 if (todo)
4020 {
4021 free_numbers_of_iterations_estimates (fun);
4022 scev_reset ();
4023 }
4024
4025 if (flag_checking)
4026 {
4027 basic_block bb;
4028 FOR_EACH_BB_FN (bb, fun)
4029 gcc_assert (!bb->aux);
4030 }
4031
4032 /* Perform IL update now, it might elide some loops. */
4033 if (todo & TODO_cleanup_cfg)
4034 {
4035 cleanup_tree_cfg ();
4036 if (need_ssa_update_p (fun))
4037 todo |= TODO_update_ssa;
4038 }
4039 if (todo & TODO_update_ssa_any)
4040 update_ssa (todo & TODO_update_ssa_any);
4041
4042 /* If if-conversion elided the loop fall back to the original one. Likewise
4043 if the loops are not nested in the same outer loop. */
4044 for (unsigned i = 0; i < preds.length (); ++i)
4045 {
4046 gimple *g = preds[i];
4047 if (!gimple_bb (g))
4048 continue;
4049 auto ifcvt_loop = get_loop (fn: fun, num: tree_to_uhwi (gimple_call_arg (gs: g, index: 0)));
4050 auto orig_loop = get_loop (fn: fun, num: tree_to_uhwi (gimple_call_arg (gs: g, index: 1)));
4051 if (!ifcvt_loop || !orig_loop)
4052 {
4053 if (dump_file)
4054 fprintf (stream: dump_file, format: "If-converted loop vanished\n");
4055 fold_loop_internal_call (g, boolean_false_node);
4056 }
4057 else if (loop_outer (loop: ifcvt_loop) != loop_outer (loop: orig_loop))
4058 {
4059 if (dump_file)
4060 fprintf (stream: dump_file, format: "If-converted loop in different outer loop\n");
4061 fold_loop_internal_call (g, boolean_false_node);
4062 }
4063 }
4064
4065 return 0;
4066}
4067
4068} // anon namespace
4069
4070gimple_opt_pass *
4071make_pass_if_conversion (gcc::context *ctxt)
4072{
4073 return new pass_if_conversion (ctxt);
4074}
4075

source code of gcc/tree-if-conv.cc