1/* Loop header copying on trees.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "tree.h"
25#include "gimple.h"
26#include "cfghooks.h"
27#include "tree-pass.h"
28#include "gimple-ssa.h"
29#include "gimple-iterator.h"
30#include "tree-cfg.h"
31#include "tree-into-ssa.h"
32#include "cfgloop.h"
33#include "tree-inline.h"
34#include "tree-ssa-threadedge.h"
35#include "tree-ssa-sccvn.h"
36#include "tree-phinodes.h"
37#include "ssa-iterators.h"
38#include "value-range.h"
39#include "gimple-range.h"
40#include "gimple-range-path.h"
41#include "gimple-pretty-print.h"
42#include "cfganal.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
45#include "tree-scalar-evolution.h"
46
47/* Return path query insteance for testing ranges of statements
48 in headers of LOOP contained in basic block BB.
49 Use RANGER instance. */
50
51static path_range_query *
52get_range_query (class loop *loop,
53 basic_block bb,
54 gimple_ranger &ranger)
55{
56 auto_vec<basic_block, 8> path;
57 for (; bb != loop->header; bb = single_pred_edge (bb)->src)
58 path.safe_push (obj: bb);
59 path.safe_push (obj: loop->header);
60 path.safe_push (obj: loop_preheader_edge (loop)->src);
61 return new path_range_query (ranger, path);
62}
63
64/* Return edge that is true in the first iteration of the loop
65 and NULL otherwise.
66 Formulate corrent ranger query to RANGER. */
67
68static edge
69static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger,
70 path_range_query *&query)
71{
72 gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb));
73 edge ret_e;
74
75 if (!last)
76 return NULL;
77
78 edge true_e, false_e;
79 extract_true_false_edges_from_block (bb, &true_e, &false_e);
80
81 /* If neither edge is the exit edge, this is not a case we'd like to
82 special-case. */
83 if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
84 return NULL;
85
86 int_range<1> desired_static_range;
87 if (loop_exit_edge_p (l, true_e))
88 {
89 desired_static_range = range_false ();
90 ret_e = true_e;
91 }
92 else
93 {
94 desired_static_range = range_true ();
95 ret_e = false_e;
96 }
97
98 if (!query)
99 query = get_range_query (loop: l, bb: gimple_bb (g: last), ranger);
100
101 int_range<2> r;
102 if (!query->range_of_stmt (r, last))
103 return NULL;
104 return r == desired_static_range ? ret_e : NULL;
105}
106
107/* Return true if STMT is static in LOOP. This means that its value
108 is constant in the first iteration.
109 Use RANGER and formulate query cached in QUERY. */
110
111static bool
112loop_static_stmt_p (class loop *loop,
113 gimple_ranger &ranger,
114 path_range_query *&query,
115 gimple *stmt)
116{
117 tree type = gimple_range_type (s: stmt);
118 if (!type || !Value_Range::supports_type_p (type))
119 return false;
120
121 if (!query)
122 query = get_range_query (loop, bb: gimple_bb (g: stmt), ranger);
123
124 Value_Range r (gimple_range_type (s: stmt));
125 if (!query->range_of_stmt (r, stmt))
126 return false;
127 return r.singleton_p ();
128}
129
130/* Return true if OP is invariant. */
131
132static bool
133loop_invariant_op_p (class loop *loop,
134 tree op)
135{
136 if (is_gimple_min_invariant (op))
137 return true;
138 if (SSA_NAME_IS_DEFAULT_DEF (op)
139 || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
140 return true;
141 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
142}
143
144/* Return true if OP combines outcome of static and
145 loop invariant conditional. */
146
147static bool
148loop_static_op_p (class loop *loop, tree op)
149{
150 /* Always check for invariant first. */
151 gcc_checking_assert (!is_gimple_min_invariant (op)
152 && !SSA_NAME_IS_DEFAULT_DEF (op)
153 && flow_bb_inside_loop_p (loop,
154 gimple_bb (SSA_NAME_DEF_STMT (op))));
155 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2;
156}
157
158
159/* Return true if OP combines outcome of static and
160 loop invariant conditional. */
161
162static bool
163loop_combined_static_and_iv_p (class loop *loop,
164 tree op)
165{
166 /* Always check for invariant first. */
167 gcc_checking_assert (!is_gimple_min_invariant (op)
168 && !SSA_NAME_IS_DEFAULT_DEF (op)
169 && flow_bb_inside_loop_p (loop,
170 gimple_bb (SSA_NAME_DEF_STMT (op))));
171 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4;
172}
173
174/* Decision about posibility of copying a given header. */
175
176enum ch_decision
177{
178 /* We do not want to copy this header. */
179 ch_impossible,
180 /* We can copy it if it enables wins. */
181 ch_possible,
182 /* We can "copy" it if it enables wins and doing
183 so will introduce no new code. */
184 ch_possible_zero_cost,
185 /* We want to copy. */
186 ch_win,
187 /* We want to copy and we will eliminate loop exit. */
188 ch_win_invariant_exit
189};
190
191/* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
192 instructions should be duplicated, limit is decreased by the actual
193 amount. */
194
195static ch_decision
196should_duplicate_loop_header_p (basic_block header, class loop *loop,
197 gimple_ranger *ranger,
198 int *limit,
199 hash_set <edge> *invariant_exits,
200 hash_set <edge> *static_exits)
201{
202 gimple_stmt_iterator bsi;
203
204 gcc_assert (!header->aux);
205
206 gcc_assert (EDGE_COUNT (header->succs) > 0);
207 if (single_succ_p (bb: header))
208 {
209 if (dump_file && (dump_flags & TDF_DETAILS))
210 fprintf (stream: dump_file,
211 format: " Not duplicating bb %i: it is single succ.\n",
212 header->index);
213 return ch_impossible;
214 }
215
216 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
217 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
218 {
219 if (dump_file && (dump_flags & TDF_DETAILS))
220 fprintf (stream: dump_file,
221 format: " Not duplicating bb %i: both successors are in loop.\n",
222 loop->num);
223 return ch_impossible;
224 }
225
226 /* If this is not the original loop header, we want it to have just
227 one predecessor in order to match the && pattern. */
228 if (header != loop->header && !single_pred_p (bb: header))
229 {
230 if (dump_file && (dump_flags & TDF_DETAILS))
231 fprintf (stream: dump_file,
232 format: " Not duplicating bb %i: it has mutiple predecestors.\n",
233 header->index);
234 return ch_impossible;
235 }
236
237 gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: header));
238 if (!last)
239 {
240 if (dump_file && (dump_flags & TDF_DETAILS))
241 fprintf (stream: dump_file,
242 format: " Not duplicating bb %i: it does not end by conditional.\n",
243 header->index);
244 return ch_impossible;
245 }
246
247 path_range_query *query = NULL;
248 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (i: psi);
249 gsi_next (i: &psi))
250 if (!virtual_operand_p (op: gimple_phi_result (gs: psi.phi ())))
251 {
252 bool static_p = loop_static_stmt_p (loop, ranger&: *ranger,
253 query, stmt: psi.phi ());
254 gimple_set_uid (g: psi.phi (), uid: static_p ? 2 : 0);
255 }
256 bool code_size_cost = false;
257
258 /* Count number of instructions and punt on calls.
259 Populate stmts INV flag to later apply heuristics to the
260 kind of conditions we want to copy. */
261 for (bsi = gsi_start_bb (bb: header); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
262 {
263 gimple *last = gsi_stmt (i: bsi);
264
265 if (gimple_code (g: last) == GIMPLE_LABEL)
266 continue;
267
268 if (is_gimple_debug (gs: last))
269 continue;
270
271 if (gimple_code (g: last) == GIMPLE_COND)
272 break;
273
274 if (dump_file && (dump_flags & TDF_DETAILS))
275 {
276 fprintf (stream: dump_file, format: " Analyzing: ");
277 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
278 }
279
280 if (gimple_code (g: last) == GIMPLE_CALL
281 && (!gimple_inexpensive_call_p (as_a <gcall *> (p: last))
282 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
283 at current loop's header. Don't copy in this case. */
284 || gimple_call_internal_p (gs: last, fn: IFN_LOOP_DIST_ALIAS)))
285 {
286 if (dump_file && (dump_flags & TDF_DETAILS))
287 fprintf (stream: dump_file,
288 format: " Not duplicating bb %i: it contains call.\n",
289 header->index);
290 if (query)
291 delete query;
292 return ch_impossible;
293 }
294
295 /* Classify the stmt is invariant in the loop. */
296 gimple_set_uid (g: last, uid: 0);
297 if (!gimple_vuse (g: last)
298 && gimple_code (g: last) != GIMPLE_ASM
299 && (gimple_code (g: last) != GIMPLE_CALL
300 || gimple_call_flags (last) & ECF_CONST))
301 {
302 bool inv = true, static_p = false;
303 ssa_op_iter i;
304 tree op;
305 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
306 if (!loop_invariant_op_p (loop, op))
307 inv = false;
308 /* Assume that code is reasonably optimized and invariant
309 constants are already identified. */
310 if (!inv)
311 static_p = loop_static_stmt_p (loop, ranger&: *ranger, query, stmt: last);
312 gimple_set_uid (g: last, uid: (inv ? 1 : 0) | (static_p ? 2 : 0));
313 if (dump_file && (dump_flags & TDF_DETAILS))
314 {
315 if (inv)
316 fprintf (stream: dump_file, format: " Stmt is loop invariant\n");
317 if (static_p)
318 fprintf (stream: dump_file, format: " Stmt is static "
319 "(constant in the first iteration)\n");
320 }
321 /* Loop invariants will be optimized out in loop body after
322 duplication; do not account invariant computation in code
323 size costs.
324
325 Similarly static computations will be optimized out in the
326 duplicatd header. */
327 if (inv || static_p)
328 continue;
329
330 /* Match the following:
331 _1 = i_1 < 10 <- static condtion
332 _2 = n != 0 <- loop invariant condition
333 _3 = _1 & _2 <- combined static and iv statement. */
334 tree_code code;
335 if (gimple_code (g: last) == GIMPLE_ASSIGN
336 && ((code = gimple_assign_rhs_code (gs: last)) == BIT_AND_EXPR
337 || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR))
338 {
339 tree op1 = gimple_assign_rhs1 (gs: last);
340 tree op2 = gimple_assign_rhs2 (gs: last);
341
342 if ((loop_invariant_op_p (loop, op: op1)
343 || loop_combined_static_and_iv_p (loop, op: op1)
344 || loop_static_op_p (loop, op: op1))
345 && (loop_invariant_op_p (loop, op: op2)
346 || loop_combined_static_and_iv_p (loop, op: op2)
347 || loop_static_op_p (loop, op: op2)))
348 {
349 /* Duplicating loop header with combned conditional will
350 remove this statement in each copy. But we account for
351 that later when seeing that condition.
352
353 Note that this may be overly optimistic for bit operations
354 where the static parameter may still result in non-trivial
355 bit operation. */
356 gimple_set_uid (g: last, uid: 4);
357 if (dump_file && (dump_flags & TDF_DETAILS))
358 fprintf (stream: dump_file,
359 format: " Stmt combines static and invariant op\n");
360 continue;
361 }
362 }
363 }
364
365 int insns = estimate_num_insns (last, &eni_size_weights);
366 *limit -= insns;
367 if (insns)
368 code_size_cost = true;
369 if (dump_file && (dump_flags & TDF_DETAILS))
370 fprintf (stream: dump_file,
371 format: " Acconting stmt as %i insns\n", insns);
372 if (*limit < 0)
373 {
374 if (dump_file && (dump_flags & TDF_DETAILS))
375 fprintf (stream: dump_file,
376 format: " Not duplicating bb %i contains too many insns.\n",
377 header->index);
378 if (query)
379 delete query;
380 return ch_impossible;
381 }
382 }
383
384 if (dump_file && (dump_flags & TDF_DETAILS))
385 {
386 fprintf (stream: dump_file, format: " Analyzing: ");
387 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
388 }
389
390 /* If the condition tests a non-IV loop variant we do not want to rotate
391 the loop further. Unless this is the original loop header. */
392 tree lhs = gimple_cond_lhs (gs: last);
393 tree rhs = gimple_cond_rhs (gs: last);
394 bool lhs_invariant = loop_invariant_op_p (loop, op: lhs);
395 bool rhs_invariant = loop_invariant_op_p (loop, op: rhs);
396
397 /* Combined conditional is a result of if combining:
398
399 _1 = i_1 < 10 <- static condtion
400 _2 = n != 0 <- loop invariant condition
401 _3 = _1 & _2 <- combined static and iv statement
402 if (_3 != 0) <- combined conditional
403
404 Combined conditionals will not be optimized out in either copy.
405 However duplicaed header simplifies to:
406
407 if (n < 10)
408
409 while loop body to
410
411 if (i_1 < 10)
412
413 So effectively the resulting code sequence will be of same length as
414 the original code.
415
416 Combined conditionals are never static or invariant, so save some work
417 below. */
418 if (lhs_invariant != rhs_invariant
419 && (lhs_invariant
420 || loop_combined_static_and_iv_p (loop, op: lhs))
421 && (rhs_invariant
422 || loop_combined_static_and_iv_p (loop, op: rhs)))
423 {
424 if (query)
425 delete query;
426 if (dump_file && (dump_flags & TDF_DETAILS))
427 fprintf (stream: dump_file,
428 format: " Conditional combines static and invariant op.\n");
429 return ch_win;
430 }
431
432 edge static_exit = static_loop_exit (l: loop, bb: header, ranger&: *ranger, query);
433 if (query)
434 delete query;
435
436 if (static_exit && static_exits)
437 {
438 static_exits->add (k: static_exit);
439 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (stream: dump_file,
441 format: " Will eliminate peeled conditional in bb %d.\n",
442 static_exit->src->index);
443 /* Still look for invariant exits; exit may be both. */
444 }
445 if (lhs_invariant && rhs_invariant)
446 {
447 if (invariant_exits)
448 {
449 edge e;
450 edge_iterator ei;
451 FOR_EACH_EDGE (e, ei, header->succs)
452 if (loop_exit_edge_p (loop, e))
453 {
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (stream: dump_file,
456 format: " Will elliminate invariant exit %i->%i\n",
457 e->src->index, e->dest->index);
458 invariant_exits->add (k: e);
459 }
460 }
461 return ch_win_invariant_exit;
462 }
463
464 /* If the static exit fully optimize out, it is win to "duplicate"
465 it.
466
467 TODO: Even if duplication costs some size we may opt to do so in case
468 exit probability is significant enough (do partial peeling). */
469 if (static_exit)
470 return !code_size_cost ? ch_possible_zero_cost : ch_possible;
471
472 /* We was not able to prove that conditional will be eliminated. */
473 int insns = estimate_num_insns (last, &eni_size_weights);
474 *limit -= insns;
475 if (dump_file && (dump_flags & TDF_DETAILS))
476 fprintf (stream: dump_file,
477 format: " Acconting stmt as %i insns\n", insns);
478 if (*limit < 0)
479 {
480 if (dump_file && (dump_flags & TDF_DETAILS))
481 fprintf (stream: dump_file,
482 format: " Not duplicating bb %i contains too many insns.\n",
483 header->index);
484 return ch_impossible;
485 }
486
487 return ch_possible;
488}
489
490/* Checks whether LOOP is a do-while style loop. */
491
492static bool
493do_while_loop_p (class loop *loop)
494{
495 gimple *stmt = last_nondebug_stmt (loop->latch);
496
497 /* If the latch of the loop is not empty, it is not a do-while loop. */
498 if (stmt
499 && gimple_code (g: stmt) != GIMPLE_LABEL)
500 {
501 if (dump_file && (dump_flags & TDF_DETAILS))
502 fprintf (stream: dump_file,
503 format: "Loop %i is not do-while loop: latch is not empty.\n",
504 loop->num);
505 return false;
506 }
507
508 /* If the latch does not have a single predecessor, it is not a
509 do-while loop. */
510 if (!single_pred_p (bb: loop->latch))
511 {
512 if (dump_file && (dump_flags & TDF_DETAILS))
513 fprintf (stream: dump_file,
514 format: "Loop %i is not do-while loop: latch has multiple "
515 "predecessors.\n", loop->num);
516 return false;
517 }
518 basic_block pred = single_pred (bb: loop->latch);
519
520 /* If the latch predecessor doesn't exit the loop, it is not a
521 do-while loop. */
522 if (!loop_exits_from_bb_p (loop, pred))
523 {
524 if (dump_file && (dump_flags & TDF_DETAILS))
525 fprintf (stream: dump_file,
526 format: "Loop %i is not do-while loop: latch predecessor "
527 "does not exit loop.\n", loop->num);
528 return false;
529 }
530 gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred));
531 if (last
532 && (gimple_cond_lhs (gs: last) == boolean_false_node
533 || gimple_cond_lhs (gs: last) == boolean_true_node)
534 && gimple_cond_rhs (gs: last) == boolean_false_node)
535 {
536 if (dump_file && (dump_flags & TDF_DETAILS))
537 fprintf (stream: dump_file,
538 format: "Loop %i is not do-while loop: latch predecessor "
539 "contains exit we optimized out.\n", loop->num);
540 return false;
541 }
542
543 if (dump_file && (dump_flags & TDF_DETAILS))
544 fprintf (stream: dump_file, format: "Loop %i is do-while loop\n", loop->num);
545
546 return true;
547}
548
549/* Update profile after header copying of LOOP.
550 REGION is the original (in loop) sequence, REGION_COPY is the
551 duplicated header (now outside of loop). N_REGION is number of
552 bbs duplicated.
553 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
554 INVARIANT_EXITS are edges in the loop body to be elimianted
555 since they are loop invariants
556
557 So We expect the following:
558
559 // region_copy_start entry will be scaled to entry_count
560 if (cond1) <- this condition will become false
561 and we update probabilities
562 goto loop_exit;
563 if (cond2) <- this condition is loop invariant
564 goto loop_exit;
565 goto loop_header <- this will be redirected to loop.
566 // region_copy_end
567 loop:
568 <body>
569 // region start
570 loop_header:
571 if (cond1) <- we need to update probability here
572 goto loop_exit;
573 if (cond2) <- and determine scaling factor here.
574 moreover cond2 is now always true
575 goto loop_exit;
576 else
577 goto loop;
578 // region end
579
580 Adding support for more exits can be done similarly,
581 but only consumer so far is tree-ssa-loop-ch and it uses only this
582 to handle the common case of peeling headers which have
583 conditionals known to be always true upon entry. */
584
585static void
586update_profile_after_ch (class loop *loop,
587 basic_block *region, basic_block *region_copy,
588 unsigned n_region,
589 hash_set <edge> *invariant_exits,
590 hash_set <edge> *static_exits,
591 profile_count entry_count)
592{
593 for (unsigned int i = 0; i < n_region; i++)
594 {
595 edge exit_e, exit_e_copy, e, e_copy;
596 if (EDGE_COUNT (region[i]->succs) == 1)
597 {
598 region_copy[i]->count = entry_count;
599 region[i]->count -= entry_count;
600 continue;
601 }
602
603 gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2);
604 if (loop_exit_edge_p (loop,
605 EDGE_SUCC (region[i], 0)))
606 {
607 exit_e = EDGE_SUCC (region[i], 0);
608 exit_e_copy = EDGE_SUCC (region_copy[i], 0);
609 e = EDGE_SUCC (region[i], 1);
610 e_copy = EDGE_SUCC (region_copy[i], 1);
611 }
612 else
613 {
614 exit_e = EDGE_SUCC (region[i], 1);
615 exit_e_copy = EDGE_SUCC (region_copy[i], 1);
616 e = EDGE_SUCC (region[i], 0);
617 e_copy = EDGE_SUCC (region_copy[i], 0);
618 }
619 gcc_assert (i == n_region - 1
620 || (e->dest == region[i + 1]
621 && e_copy->dest == region_copy[i + 1]));
622 region_copy[i]->count = entry_count;
623 profile_count exit_e_count = exit_e->count ();
624 bool was_static = false;
625 if (static_exits->contains (k: exit_e))
626 {
627 /* Update profile and the conditional.
628 CFG update is done by caller. */
629 static_exits->remove (k: exit_e);
630 was_static = true;
631 e_copy->probability = profile_probability::always ();
632 exit_e_copy->probability = profile_probability::never ();
633 gcond *cond_stmt
634 = as_a <gcond *>(p: *gsi_last_bb (bb: region_copy[i]));
635 if (e_copy->flags & EDGE_TRUE_VALUE)
636 gimple_cond_make_true (gs: cond_stmt);
637 else
638 gimple_cond_make_false (gs: cond_stmt);
639 update_stmt (s: cond_stmt);
640 /* Header copying is a special case of jump threading, so use
641 common code to update loop body exit condition. */
642 update_bb_profile_for_threading (region[i], entry_count, e);
643 }
644 else
645 region[i]->count -= region_copy[i]->count;
646 if (invariant_exits->contains (k: exit_e))
647 {
648 invariant_exits->remove (k: exit_e);
649 /* All exits will happen in exit_e_copy which is out of the
650 loop, so increase probability accordingly.
651 If the edge is eliminated_edge we already corrected
652 profile above. */
653 if (entry_count.nonzero_p () && !was_static)
654 set_edge_probability_and_rescale_others
655 (exit_e_copy, exit_e_count.probability_in
656 (overall: entry_count));
657 /* Eliminate in-loop conditional. */
658 e->probability = profile_probability::always ();
659 exit_e->probability = profile_probability::never ();
660 gcond *cond_stmt = as_a <gcond *>(p: *gsi_last_bb (bb: region[i]));
661 if (e->flags & EDGE_TRUE_VALUE)
662 gimple_cond_make_true (gs: cond_stmt);
663 else
664 gimple_cond_make_false (gs: cond_stmt);
665 update_stmt (s: cond_stmt);
666 }
667 entry_count = e_copy->count ();
668 }
669 /* Be sure that we seen all invariant exit edges we are supposed to update.
670 We may have recorded some static exists we decided to not duplicate. */
671 gcc_checking_assert (invariant_exits->is_empty ());
672}
673
674namespace {
675
676/* Common superclass for both header-copying phases. */
677class ch_base : public gimple_opt_pass
678{
679 protected:
680 ch_base (pass_data data, gcc::context *ctxt)
681 : gimple_opt_pass (data, ctxt)
682 {}
683
684 /* Copies headers of all loops in FUN for which process_loop_p is true. */
685 unsigned int copy_headers (function *fun);
686
687 /* Return true to copy headers of LOOP or false to skip. */
688 virtual bool process_loop_p (class loop *loop) = 0;
689};
690
691const pass_data pass_data_ch =
692{
693 .type: GIMPLE_PASS, /* type */
694 .name: "ch", /* name */
695 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
696 .tv_id: TV_TREE_CH, /* tv_id */
697 .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */
698 .properties_provided: 0, /* properties_provided */
699 .properties_destroyed: 0, /* properties_destroyed */
700 .todo_flags_start: 0, /* todo_flags_start */
701 .todo_flags_finish: 0, /* todo_flags_finish */
702};
703
704class pass_ch : public ch_base
705{
706public:
707 pass_ch (gcc::context *ctxt)
708 : ch_base (pass_data_ch, ctxt)
709 {}
710
711 /* opt_pass methods: */
712 bool gate (function *) final override { return flag_tree_ch != 0; }
713
714 /* Initialize and finalize loop structures, copying headers inbetween. */
715 unsigned int execute (function *) final override;
716
717 opt_pass * clone () final override { return new pass_ch (m_ctxt); }
718
719protected:
720 /* ch_base method: */
721 bool process_loop_p (class loop *loop) final override;
722}; // class pass_ch
723
724const pass_data pass_data_ch_vect =
725{
726 .type: GIMPLE_PASS, /* type */
727 .name: "ch_vect", /* name */
728 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
729 .tv_id: TV_TREE_CH, /* tv_id */
730 .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */
731 .properties_provided: 0, /* properties_provided */
732 .properties_destroyed: 0, /* properties_destroyed */
733 .todo_flags_start: 0, /* todo_flags_start */
734 .todo_flags_finish: 0, /* todo_flags_finish */
735};
736
737/* This is a more aggressive version of the same pass, designed to run just
738 before if-conversion and vectorization, to put more loops into the form
739 required for those phases. */
740class pass_ch_vect : public ch_base
741{
742public:
743 pass_ch_vect (gcc::context *ctxt)
744 : ch_base (pass_data_ch_vect, ctxt)
745 {}
746
747 /* opt_pass methods: */
748 bool gate (function *fun) final override
749 {
750 return flag_tree_ch != 0
751 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
752 }
753
754 /* Just copy headers, no initialization/finalization of loop structures. */
755 unsigned int execute (function *) final override;
756
757protected:
758 /* ch_base method: */
759 bool process_loop_p (class loop *loop) final override;
760}; // class pass_ch_vect
761
762/* For all loops, copy the condition at the end of the loop body in front
763 of the loop. This is beneficial since it increases efficiency of
764 code motion optimizations. It also saves one jump on entry to the loop. */
765
766unsigned int
767ch_base::copy_headers (function *fun)
768{
769 basic_block *bbs, *copied_bbs;
770 unsigned bbs_size;
771 bool changed = false;
772
773 if (number_of_loops (fn: fun) <= 1)
774 return 0;
775
776 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
777 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
778 bbs_size = n_basic_blocks_for_fn (fun);
779
780 struct candidate
781 {
782 class loop *loop;
783 unsigned int nheaders;
784 hash_set <edge> *invariant_exits, *static_exits;
785 };
786 auto_vec<struct candidate> candidates;
787 auto_vec<std::pair<edge, loop_p> > copied;
788 auto_vec<class loop *> loops_to_unloop;
789 auto_vec<int> loops_to_unloop_nunroll;
790
791 mark_dfs_back_edges ();
792 gimple_ranger *ranger = new gimple_ranger;
793 for (auto loop : loops_list (cfun, 0))
794 {
795 int initial_limit = optimize_loop_for_speed_p (loop)
796 ? param_max_loop_header_insns : 0;
797 int remaining_limit = initial_limit;
798 if (dump_file && (dump_flags & TDF_DETAILS))
799 fprintf (stream: dump_file,
800 format: "Analyzing loop %i\n", loop->num);
801
802 /* If the loop is already a do-while style one (either because it was
803 written as such, or because jump threading transformed it into one),
804 we might be in fact peeling the first iteration of the loop. This
805 in general is not a good idea. Also avoid touching infinite loops. */
806 if (!loop_has_exit_edges (loop)
807 || !process_loop_p (loop))
808 continue;
809
810 basic_block header = loop->header;
811 estimate_numbers_of_iterations (loop);
812 if (!get_max_loop_iterations_int (loop))
813 {
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (stream: dump_file, format: "Loop %d never loops.\n", loop->num);
816 scale_loop_profile (loop, profile_probability::always (), 0);
817 loops_to_unloop.safe_push (obj: loop);
818 loops_to_unloop_nunroll.safe_push (obj: 0);
819 continue;
820 }
821
822 /* Iterate the header copying up to limit; this takes care of the cases
823 like while (a && b) {...}, where we want to have both of the conditions
824 copied. TODO -- handle while (a || b) - like cases, by not requiring
825 the header to have just a single successor and copying up to
826 postdominator. */
827 int nheaders = 0;
828 int last_win_nheaders = 0;
829 bool last_win_invariant_exit = false;
830 ch_decision ret;
831 auto_vec <ch_decision, 32> decision;
832 hash_set <edge> *invariant_exits = new hash_set <edge>;
833 hash_set <edge> *static_exits = new hash_set <edge>;
834 while ((ret = should_duplicate_loop_header_p (header, loop, ranger,
835 limit: &remaining_limit,
836 invariant_exits,
837 static_exits))
838 != ch_impossible)
839 {
840 nheaders++;
841 decision.safe_push (obj: ret);
842 if (ret >= ch_win)
843 {
844 last_win_nheaders = nheaders;
845 last_win_invariant_exit = (ret == ch_win_invariant_exit);
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (stream: dump_file, format: " Duplicating bb %i is a win\n",
848 header->index);
849 }
850 else
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (stream: dump_file, format: " May duplicate bb %i\n", header->index);
853
854 /* Find a successor of header that is inside a loop; i.e. the new
855 header after the condition is copied. */
856 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
857 header = EDGE_SUCC (header, 0)->dest;
858 else
859 header = EDGE_SUCC (header, 1)->dest;
860 }
861
862 /* Try to turn loop into do-while. This means ensuring that
863 last duplicated header will not have loop invariant exit. */
864 if (last_win_nheaders && last_win_invariant_exit
865 && nheaders > last_win_nheaders)
866 {
867 last_win_nheaders++;
868 if (dump_file && (dump_flags & TDF_DETAILS))
869 fprintf (stream: dump_file,
870 format: " Duplicating additional BB to obtain do-while loop\n");
871 }
872 else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop))
873 {
874 last_win_nheaders = 1;
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (stream: dump_file,
877 format: " Duplicating header BB to obtain do-while loop\n");
878 }
879 /* "Duplicate" all BBs with zero cost following last basic blocks we
880 decided to copy. */
881 while (last_win_nheaders < (int)decision.length ()
882 && decision[last_win_nheaders] == ch_possible_zero_cost)
883 {
884 if (dump_file && (dump_flags & TDF_DETAILS))
885 fprintf (stream: dump_file,
886 format: " Duplicating extra bb is a win; it has zero cost\n");
887 last_win_nheaders++;
888 }
889
890 if (last_win_nheaders)
891 candidates.safe_push (obj: {.loop: loop, .nheaders: last_win_nheaders,
892 .invariant_exits: invariant_exits, .static_exits: static_exits});
893 else
894 {
895 delete invariant_exits;
896 delete static_exits;
897 }
898 }
899 /* Do not use ranger after we change the IL and not have updated SSA. */
900 delete ranger;
901
902 for (auto candidate : candidates)
903 {
904 class loop *loop = candidate.loop;
905 if (dump_file && (dump_flags & TDF_DETAILS))
906 fprintf (stream: dump_file,
907 format: "Copying headers of loop %i\n", loop->num);
908
909 basic_block header = loop->header;
910 edge nonexit = NULL;
911 edge exit = NULL;
912 unsigned int n_bbs = 0;
913 int nexits = 0;
914 profile_count exit_count = profile_count::zero ();
915 profile_count entry_count = profile_count::zero ();
916 edge e;
917 edge_iterator ei;
918
919 FOR_EACH_EDGE (e, ei, loop->header->preds)
920 if (e->src != loop->latch)
921 entry_count += e->count ();
922 while (n_bbs < candidate.nheaders)
923 {
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 fprintf (stream: dump_file, format: " Will duplicate bb %i\n", header->index);
926 /* Find a successor of header that is inside a loop; i.e. the new
927 header after the condition is copied. */
928 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
929 {
930 nonexit = EDGE_SUCC (header, 0);
931 exit = EDGE_SUCC (header, 1);
932 }
933 else
934 {
935 nonexit = EDGE_SUCC (header, 1);
936 exit = EDGE_SUCC (header, 0);
937 }
938 exit_count += exit->count ();
939 nexits++;
940 bbs[n_bbs++] = header;
941 gcc_assert (bbs_size > n_bbs);
942 header = nonexit->dest;
943 }
944
945 if (dump_file && (dump_flags & TDF_DETAILS))
946 fprintf (stream: dump_file,
947 format: "Duplicating header of the loop %d up to edge %d->%d\n",
948 loop->num, exit->src->index, exit->dest->index);
949
950 /* Ensure that the header will have just the latch as a predecessor
951 inside the loop. */
952 if (!single_pred_p (bb: nonexit->dest))
953 {
954 header = split_edge (nonexit);
955 exit = single_pred_edge (bb: header);
956 }
957
958 edge entry = loop_preheader_edge (loop);
959
960 propagate_threaded_block_debug_into (nonexit->dest, entry->dest);
961 if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs,
962 true))
963 {
964 delete candidate.static_exits;
965 delete candidate.invariant_exits;
966 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (stream: dump_file, format: "Duplication failed.\n");
968 continue;
969 }
970 if (loop->header->count.initialized_p ())
971 update_profile_after_ch (loop, region: bbs, region_copy: copied_bbs, n_region: n_bbs,
972 invariant_exits: candidate.invariant_exits,
973 static_exits: candidate.static_exits,
974 entry_count);
975 delete candidate.static_exits;
976 delete candidate.invariant_exits;
977 copied.safe_push (obj: std::make_pair (x&: entry, y&: loop));
978
979 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
980 this copying can introduce a case where we rely on undefined
981 signed overflow to eliminate the preheader condition, because
982 we assume that "j < j + 10" is true. We don't want to warn
983 about that case for -Wstrict-overflow, because in general we
984 don't warn about overflow involving loops. Prevent the
985 warning by setting the no_warning flag in the condition. */
986 if (warn_strict_overflow > 0)
987 {
988 unsigned int i;
989
990 for (i = 0; i < n_bbs; ++i)
991 {
992 gimple_stmt_iterator bsi;
993
994 for (bsi = gsi_start_bb (bb: copied_bbs[i]);
995 !gsi_end_p (i: bsi);
996 gsi_next (i: &bsi))
997 {
998 gimple *stmt = gsi_stmt (i: bsi);
999 if (gimple_code (g: stmt) == GIMPLE_COND)
1000 {
1001 tree lhs = gimple_cond_lhs (gs: stmt);
1002 if (gimple_cond_code (gs: stmt) != EQ_EXPR
1003 && gimple_cond_code (gs: stmt) != NE_EXPR
1004 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1005 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
1006 suppress_warning (stmt, OPT_Wstrict_overflow_);
1007 }
1008 else if (is_gimple_assign (gs: stmt))
1009 {
1010 enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt);
1011 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
1012 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
1013 && rhs_code != EQ_EXPR
1014 && rhs_code != NE_EXPR
1015 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1016 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
1017 suppress_warning (stmt, OPT_Wstrict_overflow_);
1018 }
1019 }
1020 }
1021 }
1022
1023 /* Update header of the loop. */
1024 loop->header = header;
1025 /* Find correct latch. We only duplicate chain of conditionals so
1026 there should be precisely two edges to the new header. One entry
1027 edge and one to latch. */
1028 FOR_EACH_EDGE (e, ei, loop->header->preds)
1029 if (header != e->src)
1030 {
1031 loop->latch = e->src;
1032 break;
1033 }
1034 /* Ensure that the latch is simple. */
1035 if (!single_succ_p (bb: loop_latch_edge (loop)->src))
1036 split_edge (loop_latch_edge (loop));
1037
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 {
1040 if (do_while_loop_p (loop))
1041 fprintf (stream: dump_file, format: "Loop %d is now do-while loop.\n", loop->num);
1042 else
1043 fprintf (stream: dump_file, format: "Loop %d is still not do-while loop.\n",
1044 loop->num);
1045 fprintf (stream: dump_file, format: "Exit count: ");
1046 exit_count.dump (f: dump_file);
1047 fprintf (stream: dump_file, format: "\nEntry count: ");
1048 entry_count.dump (f: dump_file);
1049 fprintf (stream: dump_file, format: "\n");
1050 }
1051
1052 /* We possibly decreased number of iterations by 1. */
1053 auto_vec<edge> exits = get_loop_exit_edges (loop);
1054 bool precise = (nexits == (int) exits.length ());
1055 /* Check that loop may not terminate in other way than via
1056 basic blocks we duplicated. */
1057 if (precise)
1058 {
1059 basic_block *bbs = get_loop_body (loop);
1060 for (unsigned i = 0; i < loop->num_nodes && precise; ++i)
1061 {
1062 basic_block bb = bbs[i];
1063 bool found_exit = false;
1064 FOR_EACH_EDGE (e, ei, bb->succs)
1065 if (!flow_bb_inside_loop_p (loop, e->dest))
1066 {
1067 found_exit = true;
1068 break;
1069 }
1070 /* If BB has exit, it was duplicated. */
1071 if (found_exit)
1072 continue;
1073 /* Give up on irreducible loops. */
1074 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1075 {
1076 precise = false;
1077 break;
1078 }
1079 /* Check that inner loops are finite. */
1080 for (class loop *l = bb->loop_father; l != loop && precise;
1081 l = loop_outer (loop: l))
1082 if (!l->finite_p)
1083 {
1084 precise = false;
1085 break;
1086 }
1087 /* Verify that there is no statement that may be terminate
1088 execution in a way not visible to CFG. */
1089 for (gimple_stmt_iterator bsi = gsi_start_bb (bb);
1090 !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1091 if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi)))
1092 precise = false;
1093 }
1094 free (ptr: bbs);
1095 }
1096 if (precise
1097 && get_max_loop_iterations_int (loop) == 1)
1098 {
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 fprintf (stream: dump_file, format: "Loop %d no longer loops.\n", loop->num);
1101 scale_loop_profile (loop, profile_probability::always (), 0);
1102 loops_to_unloop.safe_push (obj: loop);
1103 loops_to_unloop_nunroll.safe_push (obj: 0);
1104 }
1105 else if (precise)
1106 {
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1108 fprintf (stream: dump_file,
1109 format: "Peeled all exits:"
1110 " decreased number of iterations of loop %d by 1.\n",
1111 loop->num);
1112 adjust_loop_info_after_peeling (loop, npeel: 1, precise: true);
1113 }
1114 else if (exit_count >= entry_count.apply_scale (num: 9, den: 10))
1115 {
1116 if (dump_file && (dump_flags & TDF_DETAILS))
1117 fprintf (stream: dump_file,
1118 format: "Peeled likely exits: likely decreased number "
1119 "of iterations of loop %d by 1.\n", loop->num);
1120 adjust_loop_info_after_peeling (loop, npeel: 1, precise: false);
1121 }
1122 else if (dump_file && (dump_flags & TDF_DETAILS))
1123 fprintf (stream: dump_file,
1124 format: "Not decreased number"
1125 " of iterations of loop %d; likely exits remains.\n",
1126 loop->num);
1127
1128 changed = true;
1129 }
1130
1131 if (changed)
1132 {
1133 update_ssa (TODO_update_ssa);
1134 /* After updating SSA form perform CSE on the loop header
1135 copies. This is esp. required for the pass before
1136 vectorization since nothing cleans up copied exit tests
1137 that can now be simplified. CSE from the entry of the
1138 region we copied till all loop exit blocks but not
1139 entering the loop itself. */
1140 for (unsigned i = 0; i < copied.length (); ++i)
1141 {
1142 edge entry = copied[i].first;
1143 loop_p loop = copied[i].second;
1144 auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
1145 bitmap exit_bbs = BITMAP_ALLOC (NULL);
1146 for (unsigned j = 0; j < exit_edges.length (); ++j)
1147 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
1148 bitmap_set_bit (exit_bbs, loop->header->index);
1149 do_rpo_vn (cfun, entry, exit_bbs);
1150 BITMAP_FREE (exit_bbs);
1151 }
1152 }
1153 if (!loops_to_unloop.is_empty ())
1154 {
1155 bool irred_invalidated;
1156 auto_bitmap lc_invalidated;
1157 auto_vec<edge> edges_to_remove;
1158 unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1159 loop_closed_ssa_invalidated: lc_invalidated, irred_invalidated: &irred_invalidated);
1160 if (loops_state_satisfies_p (fn: fun, flags: LOOP_CLOSED_SSA)
1161 && !bitmap_empty_p (map: lc_invalidated))
1162 rewrite_into_loop_closed_ssa (NULL, 0);
1163 changed = true;
1164 }
1165 free (ptr: bbs);
1166 free (ptr: copied_bbs);
1167
1168 return changed ? TODO_cleanup_cfg : 0;
1169}
1170
1171/* Initialize the loop structures we need, and finalize after. */
1172
1173unsigned int
1174pass_ch::execute (function *fun)
1175{
1176 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1177 scev_initialize ();
1178
1179 unsigned int res = copy_headers (fun);
1180
1181 scev_finalize ();
1182 loop_optimizer_finalize ();
1183 return res;
1184}
1185
1186/* Assume an earlier phase has already initialized all the loop structures that
1187 we need here (and perhaps others too), and that these will be finalized by
1188 a later phase. */
1189
1190unsigned int
1191pass_ch_vect::execute (function *fun)
1192{
1193 return copy_headers (fun);
1194}
1195
1196/* Apply header copying according to a very simple test of do-while shape. */
1197
1198bool
1199pass_ch::process_loop_p (class loop *)
1200{
1201 return true;
1202}
1203
1204/* Apply header-copying to loops where we might enable vectorization. */
1205
1206bool
1207pass_ch_vect::process_loop_p (class loop *loop)
1208{
1209 if (!flag_tree_loop_vectorize && !loop->force_vectorize)
1210 return false;
1211
1212 if (loop->dont_vectorize)
1213 return false;
1214
1215 /* The vectorizer won't handle anything with multiple exits, so skip. */
1216 edge exit = single_exit (loop);
1217 if (!exit)
1218 return false;
1219
1220 if (!do_while_loop_p (loop))
1221 return true;
1222
1223 return false;
1224}
1225
1226} // anon namespace
1227
1228gimple_opt_pass *
1229make_pass_ch_vect (gcc::context *ctxt)
1230{
1231 return new pass_ch_vect (ctxt);
1232}
1233
1234gimple_opt_pass *
1235make_pass_ch (gcc::context *ctxt)
1236{
1237 return new pass_ch (ctxt);
1238}
1239

source code of gcc/tree-ssa-loop-ch.cc