1 | /* Loop splitting. |
2 | Copyright (C) 2015-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "tree-pass.h" |
27 | #include "ssa.h" |
28 | #include "fold-const.h" |
29 | #include "tree-cfg.h" |
30 | #include "tree-ssa.h" |
31 | #include "tree-ssa-loop-niter.h" |
32 | #include "tree-ssa-loop.h" |
33 | #include "tree-ssa-loop-manip.h" |
34 | #include "tree-into-ssa.h" |
35 | #include "tree-inline.h" |
36 | #include "tree-cfgcleanup.h" |
37 | #include "cfgloop.h" |
38 | #include "tree-scalar-evolution.h" |
39 | #include "gimple-iterator.h" |
40 | #include "gimple-pretty-print.h" |
41 | #include "cfghooks.h" |
42 | #include "gimple-fold.h" |
43 | #include "gimplify-me.h" |
44 | #include "print-tree.h" |
45 | #include "value-query.h" |
46 | #include "sreal.h" |
47 | |
48 | /* This file implements two kinds of loop splitting. |
49 | |
50 | One transformation of loops like: |
51 | |
52 | for (i = 0; i < 100; i++) |
53 | { |
54 | if (i < 50) |
55 | A; |
56 | else |
57 | B; |
58 | } |
59 | |
60 | into: |
61 | |
62 | for (i = 0; i < 50; i++) |
63 | { |
64 | A; |
65 | } |
66 | for (; i < 100; i++) |
67 | { |
68 | B; |
69 | } |
70 | |
71 | */ |
72 | |
73 | /* Return true when BB inside LOOP is a potential iteration space |
74 | split point, i.e. ends with a condition like "IV < comp", which |
75 | is true on one side of the iteration space and false on the other, |
76 | and the split point can be computed. If so, also return the border |
77 | point in *BORDER and the comparison induction variable in IV. */ |
78 | |
79 | static tree |
80 | split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv, |
81 | enum tree_code *guard_code) |
82 | { |
83 | gcond *stmt; |
84 | affine_iv iv2; |
85 | |
86 | /* BB must end in a simple conditional jump. */ |
87 | stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
88 | if (!stmt) |
89 | return NULL_TREE; |
90 | |
91 | enum tree_code code = gimple_cond_code (gs: stmt); |
92 | |
93 | if (loop_exits_from_bb_p (loop, bb)) |
94 | return NULL_TREE; |
95 | |
96 | tree op0 = gimple_cond_lhs (gs: stmt); |
97 | tree op1 = gimple_cond_rhs (gs: stmt); |
98 | class loop *useloop = loop_containing_stmt (stmt); |
99 | |
100 | if (!simple_iv (loop, useloop, op0, iv, false)) |
101 | return NULL_TREE; |
102 | if (!simple_iv (loop, useloop, op1, &iv2, false)) |
103 | return NULL_TREE; |
104 | |
105 | /* Make it so that the first argument of the condition is |
106 | the looping one. */ |
107 | if (!integer_zerop (iv2.step)) |
108 | { |
109 | std::swap (a&: op0, b&: op1); |
110 | std::swap (a&: *iv, b&: iv2); |
111 | code = swap_tree_comparison (code); |
112 | gimple_cond_set_condition (stmt, code, lhs: op0, rhs: op1); |
113 | update_stmt (s: stmt); |
114 | } |
115 | else if (integer_zerop (iv->step)) |
116 | return NULL_TREE; |
117 | if (!integer_zerop (iv2.step)) |
118 | return NULL_TREE; |
119 | if (!iv->no_overflow) |
120 | return NULL_TREE; |
121 | |
122 | /* Only handle relational comparisons, for equality and non-equality |
123 | we'd have to split the loop into two loops and a middle statement. */ |
124 | switch (code) |
125 | { |
126 | case LT_EXPR: |
127 | case LE_EXPR: |
128 | case GT_EXPR: |
129 | case GE_EXPR: |
130 | break; |
131 | case NE_EXPR: |
132 | case EQ_EXPR: |
133 | /* If the test check for first iteration, we can handle NE/EQ |
134 | with only one split loop. */ |
135 | if (operand_equal_p (iv->base, iv2.base, flags: 0)) |
136 | { |
137 | if (code == EQ_EXPR) |
138 | code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR; |
139 | else |
140 | code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR; |
141 | break; |
142 | } |
143 | /* Similarly when the test checks for minimal or maximal |
144 | value range. */ |
145 | else |
146 | { |
147 | int_range<2> r; |
148 | get_global_range_query ()->range_of_expr (r, expr: op0, stmt); |
149 | if (!r.varying_p () && !r.undefined_p () |
150 | && TREE_CODE (op1) == INTEGER_CST) |
151 | { |
152 | wide_int val = wi::to_wide (t: op1); |
153 | if (known_eq (val, r.lower_bound ())) |
154 | { |
155 | code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR; |
156 | break; |
157 | } |
158 | else if (known_eq (val, r.upper_bound ())) |
159 | { |
160 | code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR; |
161 | break; |
162 | } |
163 | } |
164 | } |
165 | /* TODO: We can compare with exit condition; it seems that testing for |
166 | last iteration is common case. */ |
167 | return NULL_TREE; |
168 | default: |
169 | return NULL_TREE; |
170 | } |
171 | |
172 | if (dump_file && (dump_flags & TDF_DETAILS)) |
173 | { |
174 | fprintf (stream: dump_file, format: "Found potential split point: " ); |
175 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
176 | fprintf (stream: dump_file, format: " { " ); |
177 | print_generic_expr (dump_file, iv->base, TDF_SLIM); |
178 | fprintf (stream: dump_file, format: " + I*" ); |
179 | print_generic_expr (dump_file, iv->step, TDF_SLIM); |
180 | fprintf (stream: dump_file, format: " } %s " , get_tree_code_name (code)); |
181 | print_generic_expr (dump_file, iv2.base, TDF_SLIM); |
182 | fprintf (stream: dump_file, format: "\n" ); |
183 | } |
184 | |
185 | *border = iv2.base; |
186 | *guard_code = code; |
187 | return op0; |
188 | } |
189 | |
190 | /* Given a GUARD conditional stmt inside LOOP, which we want to make always |
191 | true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL |
192 | (a post-increment IV) and NEWBOUND (the comparator) adjust the loop |
193 | exit test statement to loop back only if the GUARD statement will |
194 | also be true/false in the next iteration. */ |
195 | |
196 | static void |
197 | patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval, |
198 | tree newbound, bool initial_true) |
199 | { |
200 | edge exit = single_exit (loop); |
201 | gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
202 | gimple_cond_set_condition (stmt, code: guard_code, lhs: nextval, rhs: newbound); |
203 | update_stmt (s: stmt); |
204 | |
205 | edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); |
206 | |
207 | exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); |
208 | stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); |
209 | |
210 | if (initial_true) |
211 | { |
212 | exit->flags |= EDGE_FALSE_VALUE; |
213 | stay->flags |= EDGE_TRUE_VALUE; |
214 | } |
215 | else |
216 | { |
217 | exit->flags |= EDGE_TRUE_VALUE; |
218 | stay->flags |= EDGE_FALSE_VALUE; |
219 | } |
220 | } |
221 | |
222 | /* Give an induction variable GUARD_IV, and its affine descriptor IV, |
223 | find the loop phi node in LOOP defining it directly, or create |
224 | such phi node. Return that phi node. */ |
225 | |
226 | static gphi * |
227 | find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/) |
228 | { |
229 | gimple *def = SSA_NAME_DEF_STMT (guard_iv); |
230 | gphi *phi; |
231 | if ((phi = dyn_cast <gphi *> (p: def)) |
232 | && gimple_bb (g: phi) == loop->header) |
233 | return phi; |
234 | |
235 | /* XXX Create the PHI instead. */ |
236 | return NULL; |
237 | } |
238 | |
239 | /* Returns true if the exit values of all loop phi nodes can be |
240 | determined easily (i.e. that connect_loop_phis can determine them). */ |
241 | |
242 | static bool |
243 | easy_exit_values (class loop *loop) |
244 | { |
245 | edge exit = single_exit (loop); |
246 | edge latch = loop_latch_edge (loop); |
247 | gphi_iterator psi; |
248 | |
249 | /* Currently we regard the exit values as easy if they are the same |
250 | as the value over the backedge. Which is the case if the definition |
251 | of the backedge value dominates the exit edge. */ |
252 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi)) |
253 | { |
254 | gphi *phi = psi.phi (); |
255 | tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch); |
256 | basic_block bb; |
257 | if (TREE_CODE (next) == SSA_NAME |
258 | && (bb = gimple_bb (SSA_NAME_DEF_STMT (next))) |
259 | && !dominated_by_p (CDI_DOMINATORS, exit->src, bb)) |
260 | return false; |
261 | } |
262 | |
263 | return true; |
264 | } |
265 | |
266 | /* This function updates the SSA form after connect_loops made a new |
267 | edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate |
268 | conditional). I.e. the second loop can now be entered either |
269 | via the original entry or via NEW_E, so the entry values of LOOP2 |
270 | phi nodes are either the original ones or those at the exit |
271 | of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting |
272 | this. The loops need to fulfill easy_exit_values(). */ |
273 | |
274 | static void |
275 | connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) |
276 | { |
277 | basic_block rest = loop_preheader_edge (loop2)->src; |
278 | gcc_assert (new_e->dest == rest); |
279 | edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e); |
280 | |
281 | edge firste = loop_preheader_edge (loop1); |
282 | edge seconde = loop_preheader_edge (loop2); |
283 | edge firstn = loop_latch_edge (loop1); |
284 | gphi_iterator psi_first, psi_second; |
285 | for (psi_first = gsi_start_phis (loop1->header), |
286 | psi_second = gsi_start_phis (loop2->header); |
287 | !gsi_end_p (i: psi_first); |
288 | gsi_next (i: &psi_first), gsi_next (i: &psi_second)) |
289 | { |
290 | tree init, next, new_init; |
291 | use_operand_p op; |
292 | gphi *phi_first = psi_first.phi (); |
293 | gphi *phi_second = psi_second.phi (); |
294 | |
295 | init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste); |
296 | next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn); |
297 | op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde); |
298 | gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); |
299 | |
300 | /* Prefer using original variable as a base for the new ssa name. |
301 | This is necessary for virtual ops, and useful in order to avoid |
302 | losing debug info for real ops. */ |
303 | if (TREE_CODE (next) == SSA_NAME |
304 | && useless_type_conversion_p (TREE_TYPE (next), |
305 | TREE_TYPE (init))) |
306 | new_init = copy_ssa_name (var: next); |
307 | else if (TREE_CODE (init) == SSA_NAME |
308 | && useless_type_conversion_p (TREE_TYPE (init), |
309 | TREE_TYPE (next))) |
310 | new_init = copy_ssa_name (var: init); |
311 | else if (useless_type_conversion_p (TREE_TYPE (next), |
312 | TREE_TYPE (init))) |
313 | new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, |
314 | name: "unrinittmp" ); |
315 | else |
316 | new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, |
317 | name: "unrinittmp" ); |
318 | |
319 | gphi * newphi = create_phi_node (new_init, rest); |
320 | add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); |
321 | add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); |
322 | SET_USE (op, new_init); |
323 | } |
324 | } |
325 | |
326 | /* The two loops LOOP1 and LOOP2 were just created by loop versioning, |
327 | they are still equivalent and placed in two arms of a diamond, like so: |
328 | |
329 | .------if (cond)------. |
330 | v v |
331 | pre1 pre2 |
332 | | | |
333 | .--->h1 h2<----. |
334 | | | | | |
335 | | ex1---. .---ex2 | |
336 | | / | | \ | |
337 | '---l1 X | l2---' |
338 | | | |
339 | | | |
340 | '--->join<---' |
341 | |
342 | This function transforms the program such that LOOP1 is conditionally |
343 | falling through to LOOP2, or skipping it. This is done by splitting |
344 | the ex1->join edge at X in the diagram above, and inserting a condition |
345 | whose one arm goes to pre2, resulting in this situation: |
346 | |
347 | .------if (cond)------. |
348 | v v |
349 | pre1 .---------->pre2 |
350 | | | | |
351 | .--->h1 | h2<----. |
352 | | | | | | |
353 | | ex1---. | .---ex2 | |
354 | | / v | | \ | |
355 | '---l1 skip---' | l2---' |
356 | | | |
357 | | | |
358 | '--->join<---' |
359 | |
360 | |
361 | The condition used is the exit condition of LOOP1, which effectively means |
362 | that when the first loop exits (for whatever reason) but the real original |
363 | exit expression is still false the second loop will be entered. |
364 | The function returns the new edge cond->pre2. |
365 | |
366 | This doesn't update the SSA form, see connect_loop_phis for that. */ |
367 | |
368 | static edge |
369 | connect_loops (class loop *loop1, class loop *loop2) |
370 | { |
371 | edge exit = single_exit (loop1); |
372 | basic_block skip_bb = split_edge (exit); |
373 | gcond *skip_stmt; |
374 | gimple_stmt_iterator gsi; |
375 | edge new_e, skip_e; |
376 | |
377 | gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
378 | skip_stmt = gimple_build_cond (gimple_cond_code (gs: stmt), |
379 | gimple_cond_lhs (gs: stmt), |
380 | gimple_cond_rhs (gs: stmt), |
381 | NULL_TREE, NULL_TREE); |
382 | gsi = gsi_last_bb (bb: skip_bb); |
383 | gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT); |
384 | |
385 | skip_e = EDGE_SUCC (skip_bb, 0); |
386 | skip_e->flags &= ~EDGE_FALLTHRU; |
387 | new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0); |
388 | if (exit->flags & EDGE_TRUE_VALUE) |
389 | { |
390 | skip_e->flags |= EDGE_TRUE_VALUE; |
391 | new_e->flags |= EDGE_FALSE_VALUE; |
392 | } |
393 | else |
394 | { |
395 | skip_e->flags |= EDGE_FALSE_VALUE; |
396 | new_e->flags |= EDGE_TRUE_VALUE; |
397 | } |
398 | |
399 | new_e->probability = profile_probability::very_likely (); |
400 | skip_e->probability = new_e->probability.invert (); |
401 | |
402 | return new_e; |
403 | } |
404 | |
405 | /* This returns the new bound for iterations given the original iteration |
406 | space in NITER, an arbitrary new bound BORDER, assumed to be some |
407 | comparison value with a different IV, the initial value GUARD_INIT of |
408 | that other IV, and the comparison code GUARD_CODE that compares |
409 | that other IV with BORDER. We return an SSA name, and place any |
410 | necessary statements for that computation into *STMTS. |
411 | |
412 | For example for such a loop: |
413 | |
414 | for (i = beg, j = guard_init; i < end; i++, j++) |
415 | if (j < border) // this is supposed to be true/false |
416 | ... |
417 | |
418 | we want to return a new bound (on j) that makes the loop iterate |
419 | as long as the condition j < border stays true. We also don't want |
420 | to iterate more often than the original loop, so we have to introduce |
421 | some cut-off as well (via min/max), effectively resulting in: |
422 | |
423 | newend = min (end+guard_init-beg, border) |
424 | for (i = beg; j = guard_init; j < newend; i++, j++) |
425 | if (j < c) |
426 | ... |
427 | |
428 | Depending on the direction of the IVs and if the exit tests |
429 | are strict or non-strict we need to use MIN or MAX, |
430 | and add or subtract 1. This routine computes newend above. */ |
431 | |
432 | static tree |
433 | compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter, |
434 | tree border, |
435 | enum tree_code guard_code, tree guard_init) |
436 | { |
437 | /* The niter structure contains the after-increment IV, we need |
438 | the loop-enter base, so subtract STEP once. */ |
439 | tree controlbase = force_gimple_operand (niter->control.base, |
440 | stmts, true, NULL_TREE); |
441 | tree controlstep = niter->control.step; |
442 | tree enddiff; |
443 | if (POINTER_TYPE_P (TREE_TYPE (controlbase))) |
444 | { |
445 | controlstep = gimple_build (seq: stmts, code: NEGATE_EXPR, |
446 | TREE_TYPE (controlstep), ops: controlstep); |
447 | enddiff = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR, |
448 | TREE_TYPE (controlbase), |
449 | ops: controlbase, ops: controlstep); |
450 | } |
451 | else |
452 | enddiff = gimple_build (seq: stmts, code: MINUS_EXPR, |
453 | TREE_TYPE (controlbase), |
454 | ops: controlbase, ops: controlstep); |
455 | |
456 | /* Compute end-beg. */ |
457 | gimple_seq stmts2; |
458 | tree end = force_gimple_operand (niter->bound, &stmts2, |
459 | true, NULL_TREE); |
460 | gimple_seq_add_seq_without_update (stmts, stmts2); |
461 | if (POINTER_TYPE_P (TREE_TYPE (enddiff))) |
462 | { |
463 | tree tem = gimple_convert (seq: stmts, sizetype, op: enddiff); |
464 | tem = gimple_build (seq: stmts, code: NEGATE_EXPR, sizetype, ops: tem); |
465 | enddiff = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR, |
466 | TREE_TYPE (enddiff), |
467 | ops: end, ops: tem); |
468 | } |
469 | else |
470 | enddiff = gimple_build (seq: stmts, code: MINUS_EXPR, TREE_TYPE (enddiff), |
471 | ops: end, ops: enddiff); |
472 | |
473 | /* Compute guard_init + (end-beg). */ |
474 | tree newbound; |
475 | enddiff = gimple_convert (seq: stmts, TREE_TYPE (guard_init), op: enddiff); |
476 | if (POINTER_TYPE_P (TREE_TYPE (guard_init))) |
477 | { |
478 | enddiff = gimple_convert (seq: stmts, sizetype, op: enddiff); |
479 | newbound = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR, |
480 | TREE_TYPE (guard_init), |
481 | ops: guard_init, ops: enddiff); |
482 | } |
483 | else |
484 | newbound = gimple_build (seq: stmts, code: PLUS_EXPR, TREE_TYPE (guard_init), |
485 | ops: guard_init, ops: enddiff); |
486 | |
487 | /* Depending on the direction of the IVs the new bound for the first |
488 | loop is the minimum or maximum of old bound and border. |
489 | Also, if the guard condition isn't strictly less or greater, |
490 | we need to adjust the bound. */ |
491 | int addbound = 0; |
492 | enum tree_code minmax; |
493 | if (niter->cmp == LT_EXPR) |
494 | { |
495 | /* GT and LE are the same, inverted. */ |
496 | if (guard_code == GT_EXPR || guard_code == LE_EXPR) |
497 | addbound = -1; |
498 | minmax = MIN_EXPR; |
499 | } |
500 | else |
501 | { |
502 | gcc_assert (niter->cmp == GT_EXPR); |
503 | if (guard_code == GE_EXPR || guard_code == LT_EXPR) |
504 | addbound = 1; |
505 | minmax = MAX_EXPR; |
506 | } |
507 | |
508 | if (addbound) |
509 | { |
510 | tree type2 = TREE_TYPE (newbound); |
511 | if (POINTER_TYPE_P (type2)) |
512 | type2 = sizetype; |
513 | newbound = gimple_build (seq: stmts, |
514 | POINTER_TYPE_P (TREE_TYPE (newbound)) |
515 | ? POINTER_PLUS_EXPR : PLUS_EXPR, |
516 | TREE_TYPE (newbound), |
517 | ops: newbound, |
518 | ops: build_int_cst (type2, addbound)); |
519 | } |
520 | |
521 | tree newend = gimple_build (seq: stmts, code: minmax, TREE_TYPE (border), |
522 | ops: border, ops: newbound); |
523 | return newend; |
524 | } |
525 | |
526 | /* Fix the two loop's bb count after split based on the split edge probability, |
527 | don't adjust the bbs dominated by true branches of that loop to avoid |
528 | dropping 1s down. */ |
529 | static void |
530 | fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge, |
531 | edge false_edge) |
532 | { |
533 | /* Proportion first loop's bb counts except those dominated by true |
534 | branch to avoid drop 1s down. */ |
535 | basic_block *bbs1, *bbs2; |
536 | bbs1 = get_loop_body (loop1); |
537 | unsigned j; |
538 | for (j = 0; j < loop1->num_nodes; j++) |
539 | if (bbs1[j] == loop1->latch |
540 | /* Watch for case where the true conditional is empty. */ |
541 | || !single_pred_p (bb: true_edge->dest) |
542 | || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) |
543 | bbs1[j]->count |
544 | = bbs1[j]->count.apply_probability (prob: true_edge->probability); |
545 | free (ptr: bbs1); |
546 | |
547 | /* Proportion second loop's bb counts except those dominated by false |
548 | branch to avoid drop 1s down. */ |
549 | basic_block bbi_copy = get_bb_copy (false_edge->dest); |
550 | bbs2 = get_loop_body (loop2); |
551 | for (j = 0; j < loop2->num_nodes; j++) |
552 | if (bbs2[j] == loop2->latch |
553 | /* Watch for case where the flase conditional is empty. */ |
554 | || !single_pred_p (bb: bbi_copy) |
555 | || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) |
556 | bbs2[j]->count |
557 | = bbs2[j]->count.apply_probability (prob: true_edge->probability.invert ()); |
558 | free (ptr: bbs2); |
559 | } |
560 | |
561 | /* Checks if LOOP contains an conditional block whose condition |
562 | depends on which side in the iteration space it is, and if so |
563 | splits the iteration space into two loops. Returns true if the |
564 | loop was split. NITER must contain the iteration descriptor for the |
565 | single exit of LOOP. */ |
566 | |
567 | static bool |
568 | split_loop (class loop *loop1) |
569 | { |
570 | class tree_niter_desc niter; |
571 | basic_block *bbs; |
572 | unsigned i; |
573 | bool changed = false; |
574 | tree guard_iv; |
575 | tree border = NULL_TREE; |
576 | affine_iv iv; |
577 | edge exit1; |
578 | |
579 | if (!(exit1 = single_exit (loop1)) |
580 | || EDGE_COUNT (exit1->src->succs) != 2 |
581 | /* ??? We could handle non-empty latches when we split the latch edge |
582 | (not the exit edge), and put the new exit condition in the new block. |
583 | OTOH this executes some code unconditionally that might have been |
584 | skipped by the original exit before. */ |
585 | || !empty_block_p (loop1->latch) |
586 | || !easy_exit_values (loop: loop1) |
587 | || !number_of_iterations_exit (loop1, exit1, niter: &niter, false, every_iteration: true) |
588 | || niter.cmp == ERROR_MARK) |
589 | return false; |
590 | if (niter.cmp == NE_EXPR) |
591 | { |
592 | if (!niter.control.no_overflow) |
593 | return false; |
594 | if (tree_int_cst_sign_bit (niter.control.step)) |
595 | niter.cmp = GT_EXPR; |
596 | else |
597 | niter.cmp = LT_EXPR; |
598 | } |
599 | |
600 | bbs = get_loop_body (loop1); |
601 | |
602 | if (!can_copy_bbs_p (bbs, loop1->num_nodes)) |
603 | { |
604 | free (ptr: bbs); |
605 | return false; |
606 | } |
607 | |
608 | /* Find a splitting opportunity. */ |
609 | enum tree_code guard_code; |
610 | for (i = 0; i < loop1->num_nodes; i++) |
611 | if ((guard_iv = split_at_bb_p (loop: loop1, bb: bbs[i], border: &border, iv: &iv, guard_code: &guard_code))) |
612 | { |
613 | /* Handling opposite steps is not implemented yet. Neither |
614 | is handling different step sizes. */ |
615 | if ((tree_int_cst_sign_bit (iv.step) |
616 | != tree_int_cst_sign_bit (niter.control.step)) |
617 | || !tree_int_cst_equal (iv.step, niter.control.step)) |
618 | continue; |
619 | |
620 | /* Find a loop PHI node that defines guard_iv directly, |
621 | or create one doing that. */ |
622 | gphi *phi = find_or_create_guard_phi (loop: loop1, guard_iv, &iv); |
623 | if (!phi) |
624 | continue; |
625 | gcond *guard_stmt = as_a<gcond *> (p: *gsi_last_bb (bb: bbs[i])); |
626 | tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi, |
627 | loop_preheader_edge (loop1)); |
628 | |
629 | /* Loop splitting is implemented by versioning the loop, placing |
630 | the new loop after the old loop, make the first loop iterate |
631 | as long as the conditional stays true (or false) and let the |
632 | second (new) loop handle the rest of the iterations. |
633 | |
634 | First we need to determine if the condition will start being true |
635 | or false in the first loop. */ |
636 | bool initial_true; |
637 | switch (guard_code) |
638 | { |
639 | case LT_EXPR: |
640 | case LE_EXPR: |
641 | initial_true = !tree_int_cst_sign_bit (iv.step); |
642 | break; |
643 | case GT_EXPR: |
644 | case GE_EXPR: |
645 | initial_true = tree_int_cst_sign_bit (iv.step); |
646 | break; |
647 | default: |
648 | gcc_unreachable (); |
649 | } |
650 | |
651 | /* Build a condition that will skip the first loop when the |
652 | guard condition won't ever be true (or false). */ |
653 | gimple_seq stmts2; |
654 | border = force_gimple_operand (border, &stmts2, true, NULL_TREE); |
655 | if (stmts2) |
656 | { |
657 | /* When the split condition is not always evaluated make sure |
658 | to rewrite it to defined overflow. */ |
659 | if (!dominated_by_p (CDI_DOMINATORS, exit1->src, bbs[i])) |
660 | { |
661 | gimple_stmt_iterator gsi; |
662 | gsi = gsi_start (seq&: stmts2); |
663 | while (!gsi_end_p (i: gsi)) |
664 | { |
665 | gimple *stmt = gsi_stmt (i: gsi); |
666 | if (is_gimple_assign (gs: stmt) |
667 | && arith_code_with_undefined_signed_overflow |
668 | (gimple_assign_rhs_code (gs: stmt))) |
669 | rewrite_to_defined_overflow (&gsi); |
670 | gsi_next (i: &gsi); |
671 | } |
672 | } |
673 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1), |
674 | stmts2); |
675 | } |
676 | tree cond = fold_build2 (guard_code, boolean_type_node, |
677 | guard_init, border); |
678 | if (!initial_true) |
679 | cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); |
680 | |
681 | edge true_edge, false_edge; |
682 | extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge); |
683 | |
684 | /* Now version the loop, placing loop2 after loop1 connecting |
685 | them, and fix up SSA form for that. */ |
686 | initialize_original_copy_tables (); |
687 | basic_block cond_bb; |
688 | |
689 | profile_probability loop1_prob |
690 | = integer_onep (cond) ? profile_probability::always () |
691 | : true_edge->probability; |
692 | /* TODO: It is commonly a case that we know that both loops will be |
693 | entered. very_likely below is the probability that second loop will |
694 | be entered given by connect_loops. We should work out the common |
695 | case it is always true. */ |
696 | class loop *loop2 = loop_version (loop1, cond, &cond_bb, |
697 | loop1_prob, |
698 | /* Pass always as we will later |
699 | redirect first loop to second |
700 | loop. */ |
701 | profile_probability::always (), |
702 | profile_probability::always (), |
703 | profile_probability::very_likely (), |
704 | true); |
705 | gcc_assert (loop2); |
706 | /* Correct probability of edge cond_bb->preheader_of_loop2. */ |
707 | single_pred_edge |
708 | (bb: loop_preheader_edge (loop2)->src)->probability |
709 | = loop1_prob.invert (); |
710 | |
711 | fix_loop_bb_probability (loop1, loop2, true_edge, false_edge); |
712 | /* If conditional we split on has reliable profilea nd both |
713 | preconditionals of loop1 and loop2 are constant true, we can |
714 | only redistribute the iteration counts to the split loops. |
715 | |
716 | If the conditionals we insert before loop1 or loop2 are non-trivial |
717 | they increase expected loop count, so account this accordingly. |
718 | If we do not know the probability of split conditional, avoid |
719 | reudcing loop estimates, since we do not really know how they are |
720 | split between of the two new loops. Keep orignal estimate since |
721 | it is likely better then completely dropping it. |
722 | |
723 | TODO: If we know that one of the new loops has constant |
724 | number of iterations, we can do better. We could also update |
725 | upper bounds. */ |
726 | if (loop1->any_estimate |
727 | && wi::fits_shwi_p (x: loop1->nb_iterations_estimate)) |
728 | { |
729 | sreal scale = true_edge->probability.reliable_p () |
730 | ? true_edge->probability.to_sreal () : (sreal)1; |
731 | sreal scale2 = false_edge->probability.reliable_p () |
732 | ? false_edge->probability.to_sreal () : (sreal)1; |
733 | sreal div1 = loop1_prob.initialized_p () |
734 | ? loop1_prob.to_sreal () : (sreal)1/(sreal)2; |
735 | /* +1 to get header interations rather than latch iterations and then |
736 | -1 to convert back. */ |
737 | if (div1 != 0) |
738 | loop1->nb_iterations_estimate |
739 | = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1) |
740 | * scale / div1).to_nearest_int () - 1, 0); |
741 | else |
742 | loop1->any_estimate = false; |
743 | loop2->nb_iterations_estimate |
744 | = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2 |
745 | / profile_probability::very_likely ().to_sreal ()) |
746 | .to_nearest_int () - 1, 0); |
747 | } |
748 | update_loop_exit_probability_scale_dom_bbs (loop: loop1); |
749 | update_loop_exit_probability_scale_dom_bbs (loop: loop2); |
750 | |
751 | edge new_e = connect_loops (loop1, loop2); |
752 | connect_loop_phis (loop1, loop2, new_e); |
753 | |
754 | /* The iterations of the second loop is now already |
755 | exactly those that the first loop didn't do, but the |
756 | iteration space of the first loop is still the original one. |
757 | Compute the new bound for the guarding IV and patch the |
758 | loop exit to use it instead of original IV and bound. */ |
759 | gimple_seq stmts = NULL; |
760 | tree newend = compute_new_first_bound (stmts: &stmts, niter: &niter, border, |
761 | guard_code, guard_init); |
762 | if (stmts) |
763 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1), |
764 | stmts); |
765 | tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); |
766 | patch_loop_exit (loop: loop1, guard_code, nextval: guard_next, newbound: newend, initial_true); |
767 | |
768 | /* Finally patch out the two copies of the condition to be always |
769 | true/false (or opposite). */ |
770 | gcond *force_true = as_a<gcond *> (p: *gsi_last_bb (bb: bbs[i])); |
771 | gcond *force_false = as_a<gcond *> (p: *gsi_last_bb (bb: get_bb_copy (bbs[i]))); |
772 | if (!initial_true) |
773 | std::swap (a&: force_true, b&: force_false); |
774 | gimple_cond_make_true (gs: force_true); |
775 | gimple_cond_make_false (gs: force_false); |
776 | update_stmt (s: force_true); |
777 | update_stmt (s: force_false); |
778 | |
779 | free_original_copy_tables (); |
780 | |
781 | changed = true; |
782 | if (dump_file && (dump_flags & TDF_DETAILS)) |
783 | fprintf (stream: dump_file, format: ";; Loop split.\n" ); |
784 | |
785 | if (dump_enabled_p ()) |
786 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n" ); |
787 | |
788 | /* Only deal with the first opportunity. */ |
789 | break; |
790 | } |
791 | |
792 | free (ptr: bbs); |
793 | return changed; |
794 | } |
795 | |
796 | /* Another transformation of loops like: |
797 | |
798 | for (i = INIT (); CHECK (i); i = NEXT ()) |
799 | { |
800 | if (expr (a_1, a_2, ..., a_n)) // expr is pure |
801 | a_j = ...; // change at least one a_j |
802 | else |
803 | S; // not change any a_j |
804 | } |
805 | |
806 | into: |
807 | |
808 | for (i = INIT (); CHECK (i); i = NEXT ()) |
809 | { |
810 | if (expr (a_1, a_2, ..., a_n)) |
811 | a_j = ...; |
812 | else |
813 | { |
814 | S; |
815 | i = NEXT (); |
816 | break; |
817 | } |
818 | } |
819 | |
820 | for (; CHECK (i); i = NEXT ()) |
821 | { |
822 | S; |
823 | } |
824 | |
825 | */ |
826 | |
827 | /* Data structure to hold temporary information during loop split upon |
828 | semi-invariant conditional statement. */ |
829 | class split_info { |
830 | public: |
831 | /* Array of all basic blocks in a loop, returned by get_loop_body(). */ |
832 | basic_block *bbs; |
833 | |
834 | /* All memory store/clobber statements in a loop. */ |
835 | auto_vec<gimple *> memory_stores; |
836 | |
837 | /* Whether above memory stores vector has been filled. */ |
838 | int need_init; |
839 | |
840 | /* Control dependencies of basic blocks in a loop. */ |
841 | auto_vec<hash_set<basic_block> *> control_deps; |
842 | |
843 | split_info () : bbs (NULL), need_init (true) { } |
844 | |
845 | ~split_info () |
846 | { |
847 | if (bbs) |
848 | free (ptr: bbs); |
849 | |
850 | for (unsigned i = 0; i < control_deps.length (); i++) |
851 | delete control_deps[i]; |
852 | } |
853 | }; |
854 | |
855 | /* Find all statements with memory-write effect in LOOP, including memory |
856 | store and non-pure function call, and keep those in a vector. This work |
857 | is only done one time, for the vector should be constant during analysis |
858 | stage of semi-invariant condition. */ |
859 | |
860 | static void |
861 | find_vdef_in_loop (struct loop *loop) |
862 | { |
863 | split_info *info = (split_info *) loop->aux; |
864 | gphi *vphi = get_virtual_phi (loop->header); |
865 | |
866 | /* Indicate memory store vector has been filled. */ |
867 | info->need_init = false; |
868 | |
869 | /* If loop contains memory operation, there must be a virtual PHI node in |
870 | loop header basic block. */ |
871 | if (vphi == NULL) |
872 | return; |
873 | |
874 | /* All virtual SSA names inside the loop are connected to be a cyclic |
875 | graph via virtual PHI nodes. The virtual PHI node in loop header just |
876 | links the first and the last virtual SSA names, by using the last as |
877 | PHI operand to define the first. */ |
878 | const edge latch = loop_latch_edge (loop); |
879 | const tree first = gimple_phi_result (gs: vphi); |
880 | const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch); |
881 | |
882 | /* The virtual SSA cyclic graph might consist of only one SSA name, who |
883 | is defined by itself. |
884 | |
885 | .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)> |
886 | |
887 | This means the loop contains only memory loads, so we can skip it. */ |
888 | if (first == last) |
889 | return; |
890 | |
891 | auto_vec<gimple *> other_stores; |
892 | auto_vec<tree> worklist; |
893 | auto_bitmap visited; |
894 | |
895 | bitmap_set_bit (visited, SSA_NAME_VERSION (first)); |
896 | bitmap_set_bit (visited, SSA_NAME_VERSION (last)); |
897 | worklist.safe_push (obj: last); |
898 | |
899 | do |
900 | { |
901 | tree vuse = worklist.pop (); |
902 | gimple *stmt = SSA_NAME_DEF_STMT (vuse); |
903 | |
904 | /* We mark the first and last SSA names as visited at the beginning, |
905 | and reversely start the process from the last SSA name towards the |
906 | first, which ensures that this do-while will not touch SSA names |
907 | defined outside the loop. */ |
908 | gcc_assert (gimple_bb (stmt) |
909 | && flow_bb_inside_loop_p (loop, gimple_bb (stmt))); |
910 | |
911 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
912 | { |
913 | gphi *phi = as_a <gphi *> (p: stmt); |
914 | |
915 | for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i) |
916 | { |
917 | tree arg = gimple_phi_arg_def (gs: stmt, index: i); |
918 | |
919 | if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg))) |
920 | worklist.safe_push (obj: arg); |
921 | } |
922 | } |
923 | else |
924 | { |
925 | tree prev = gimple_vuse (g: stmt); |
926 | |
927 | /* Non-pure call statement is conservatively assumed to impact all |
928 | memory locations. So place call statements ahead of other memory |
929 | stores in the vector with an idea of using them as shortcut |
930 | terminators to memory alias analysis. */ |
931 | if (gimple_code (g: stmt) == GIMPLE_CALL) |
932 | info->memory_stores.safe_push (obj: stmt); |
933 | else |
934 | other_stores.safe_push (obj: stmt); |
935 | |
936 | if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev))) |
937 | worklist.safe_push (obj: prev); |
938 | } |
939 | } while (!worklist.is_empty ()); |
940 | |
941 | info->memory_stores.safe_splice (src: other_stores); |
942 | } |
943 | |
944 | /* Two basic blocks have equivalent control dependency if one dominates to |
945 | the other, and it is post-dominated by the latter. Given a basic block |
946 | BB in LOOP, find farest equivalent dominating basic block. For BB, there |
947 | is a constraint that BB does not post-dominate loop header of LOOP, this |
948 | means BB is control-dependent on at least one basic block in LOOP. */ |
949 | |
950 | static basic_block |
951 | get_control_equiv_head_block (struct loop *loop, basic_block bb) |
952 | { |
953 | while (!bb->aux) |
954 | { |
955 | basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); |
956 | |
957 | gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb)); |
958 | |
959 | if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb)) |
960 | break; |
961 | |
962 | bb = dom_bb; |
963 | } |
964 | return bb; |
965 | } |
966 | |
967 | /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control- |
968 | dependent on. */ |
969 | |
970 | static hash_set<basic_block> * |
971 | find_control_dep_blocks (struct loop *loop, basic_block bb) |
972 | { |
973 | /* BB has same control dependency as loop header, then it is not control- |
974 | dependent on any basic block in LOOP. */ |
975 | if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb)) |
976 | return NULL; |
977 | |
978 | basic_block equiv_head = get_control_equiv_head_block (loop, bb); |
979 | |
980 | if (equiv_head->aux) |
981 | { |
982 | /* There is a basic block containing control dependency equivalent |
983 | to BB. No need to recompute that, and also set this information |
984 | to other equivalent basic blocks. */ |
985 | for (; bb != equiv_head; |
986 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
987 | bb->aux = equiv_head->aux; |
988 | return (hash_set<basic_block> *) equiv_head->aux; |
989 | } |
990 | |
991 | /* A basic block X is control-dependent on another Y iff there exists |
992 | a path from X to Y, in which every basic block other than X and Y |
993 | is post-dominated by Y, but X is not post-dominated by Y. |
994 | |
995 | According to this rule, traverse basic blocks in the loop backwards |
996 | starting from BB, if a basic block is post-dominated by BB, extend |
997 | current post-dominating path to this block, otherwise it is another |
998 | one that BB is control-dependent on. */ |
999 | |
1000 | auto_vec<basic_block> pdom_worklist; |
1001 | hash_set<basic_block> pdom_visited; |
1002 | hash_set<basic_block> *dep_bbs = new hash_set<basic_block>; |
1003 | |
1004 | pdom_worklist.safe_push (obj: equiv_head); |
1005 | |
1006 | do |
1007 | { |
1008 | basic_block pdom_bb = pdom_worklist.pop (); |
1009 | edge_iterator ei; |
1010 | edge e; |
1011 | |
1012 | if (pdom_visited.add (k: pdom_bb)) |
1013 | continue; |
1014 | |
1015 | FOR_EACH_EDGE (e, ei, pdom_bb->preds) |
1016 | { |
1017 | basic_block pred_bb = e->src; |
1018 | |
1019 | if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb)) |
1020 | { |
1021 | dep_bbs->add (k: pred_bb); |
1022 | continue; |
1023 | } |
1024 | |
1025 | pred_bb = get_control_equiv_head_block (loop, bb: pred_bb); |
1026 | |
1027 | if (pdom_visited.contains (k: pred_bb)) |
1028 | continue; |
1029 | |
1030 | if (!pred_bb->aux) |
1031 | { |
1032 | pdom_worklist.safe_push (obj: pred_bb); |
1033 | continue; |
1034 | } |
1035 | |
1036 | /* If control dependency of basic block is available, fast extend |
1037 | post-dominating path using the information instead of advancing |
1038 | forward step-by-step. */ |
1039 | hash_set<basic_block> *pred_dep_bbs |
1040 | = (hash_set<basic_block> *) pred_bb->aux; |
1041 | |
1042 | for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin (); |
1043 | iter != pred_dep_bbs->end (); ++iter) |
1044 | { |
1045 | basic_block pred_dep_bb = *iter; |
1046 | |
1047 | /* Basic blocks can either be in control dependency of BB, or |
1048 | must be post-dominated by BB, if so, extend the path from |
1049 | these basic blocks. */ |
1050 | if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb)) |
1051 | dep_bbs->add (k: pred_dep_bb); |
1052 | else if (!pdom_visited.contains (k: pred_dep_bb)) |
1053 | pdom_worklist.safe_push (obj: pred_dep_bb); |
1054 | } |
1055 | } |
1056 | } while (!pdom_worklist.is_empty ()); |
1057 | |
1058 | /* Record computed control dependencies in loop so that we can reach them |
1059 | when reclaiming resources. */ |
1060 | ((split_info *) loop->aux)->control_deps.safe_push (obj: dep_bbs); |
1061 | |
1062 | /* Associate control dependence with related equivalent basic blocks. */ |
1063 | for (equiv_head->aux = dep_bbs; bb != equiv_head; |
1064 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
1065 | bb->aux = dep_bbs; |
1066 | |
1067 | return dep_bbs; |
1068 | } |
1069 | |
1070 | /* Forward declaration */ |
1071 | |
1072 | static bool |
1073 | stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, |
1074 | const_basic_block skip_head, |
1075 | hash_map<gimple *, bool> &stmt_stat); |
1076 | |
1077 | /* Given STMT, memory load or pure call statement, check whether it is impacted |
1078 | by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the |
1079 | trace is composed of SKIP_HEAD and those basic block dominated by it, always |
1080 | corresponds to one branch of a conditional statement). If SKIP_HEAD is |
1081 | NULL, all basic blocks of LOOP are checked. */ |
1082 | |
1083 | static bool |
1084 | vuse_semi_invariant_p (struct loop *loop, gimple *stmt, |
1085 | const_basic_block skip_head) |
1086 | { |
1087 | split_info *info = (split_info *) loop->aux; |
1088 | tree rhs = NULL_TREE; |
1089 | ao_ref ref; |
1090 | gimple *store; |
1091 | unsigned i; |
1092 | |
1093 | /* Collect memory store/clobber statements if haven't done that. */ |
1094 | if (info->need_init) |
1095 | find_vdef_in_loop (loop); |
1096 | |
1097 | if (is_gimple_assign (gs: stmt)) |
1098 | rhs = gimple_assign_rhs1 (gs: stmt); |
1099 | |
1100 | ao_ref_init (&ref, rhs); |
1101 | |
1102 | FOR_EACH_VEC_ELT (info->memory_stores, i, store) |
1103 | { |
1104 | /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */ |
1105 | if (skip_head |
1106 | && dominated_by_p (CDI_DOMINATORS, gimple_bb (g: store), skip_head)) |
1107 | continue; |
1108 | |
1109 | if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref)) |
1110 | return false; |
1111 | } |
1112 | |
1113 | return true; |
1114 | } |
1115 | |
1116 | /* Suppose one condition branch, led by SKIP_HEAD, is not executed since |
1117 | certain iteration of LOOP, check whether an SSA name (NAME) remains |
1118 | unchanged in next iteration. We call this characteristic semi- |
1119 | invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic |
1120 | blocks and control flows in the loop will be considered. Semi-invariant |
1121 | state of checked statement is cached in hash map STMT_STAT to avoid |
1122 | redundant computation in possible following re-check. */ |
1123 | |
1124 | static inline bool |
1125 | ssa_semi_invariant_p (struct loop *loop, tree name, |
1126 | const_basic_block skip_head, |
1127 | hash_map<gimple *, bool> &stmt_stat) |
1128 | { |
1129 | gimple *def = SSA_NAME_DEF_STMT (name); |
1130 | const_basic_block def_bb = gimple_bb (g: def); |
1131 | |
1132 | /* An SSA name defined outside loop is definitely semi-invariant. */ |
1133 | if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb)) |
1134 | return true; |
1135 | |
1136 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) |
1137 | return false; |
1138 | |
1139 | return stmt_semi_invariant_p_1 (loop, stmt: def, skip_head, stmt_stat); |
1140 | } |
1141 | |
1142 | /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is |
1143 | semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), |
1144 | are excluded from LOOP. */ |
1145 | |
1146 | static bool |
1147 | loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi, |
1148 | const_basic_block skip_head) |
1149 | { |
1150 | const_edge latch = loop_latch_edge (loop); |
1151 | tree name = gimple_phi_result (gs: loop_phi); |
1152 | tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch); |
1153 | |
1154 | gcc_checking_assert (from); |
1155 | |
1156 | /* Loop iteration PHI node locates in loop header, and it has two source |
1157 | operands, one is an initial value coming from outside the loop, the other |
1158 | is a value through latch of the loop, which is derived in last iteration, |
1159 | we call the latter latch value. From the PHI node to definition of latch |
1160 | value, if excluding branch trace starting from SKIP_HEAD, except copy- |
1161 | assignment or likewise, there is no other kind of value redefinition, SSA |
1162 | name defined by the PHI node is semi-invariant. |
1163 | |
1164 | loop entry |
1165 | | .--- latch ---. |
1166 | | | | |
1167 | v v | |
1168 | x_1 = PHI <x_0, x_3> | |
1169 | | | |
1170 | v | |
1171 | .------- if (cond) -------. | |
1172 | | | | |
1173 | | [ SKIP ] | |
1174 | | | | |
1175 | | x_2 = ... | |
1176 | | | | |
1177 | '---- T ---->.<---- F ----' | |
1178 | | | |
1179 | v | |
1180 | x_3 = PHI <x_1, x_2> | |
1181 | | | |
1182 | '----------------------' |
1183 | |
1184 | Suppose in certain iteration, execution flow in above graph goes through |
1185 | true branch, which means that one source value to define x_3 in false |
1186 | branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next |
1187 | iterations is defined by x_3, we know that x_1 will never changed if COND |
1188 | always chooses true branch from then on. */ |
1189 | |
1190 | while (from != name) |
1191 | { |
1192 | /* A new value comes from a CONSTANT. */ |
1193 | if (TREE_CODE (from) != SSA_NAME) |
1194 | return false; |
1195 | |
1196 | gimple *stmt = SSA_NAME_DEF_STMT (from); |
1197 | const_basic_block bb = gimple_bb (g: stmt); |
1198 | |
1199 | /* A new value comes from outside the loop. */ |
1200 | if (!bb || !flow_bb_inside_loop_p (loop, bb)) |
1201 | return false; |
1202 | |
1203 | from = NULL_TREE; |
1204 | |
1205 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
1206 | { |
1207 | gphi *phi = as_a <gphi *> (p: stmt); |
1208 | |
1209 | for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i) |
1210 | { |
1211 | if (skip_head) |
1212 | { |
1213 | const_edge e = gimple_phi_arg_edge (phi, i); |
1214 | |
1215 | /* Don't consider redefinitions in excluded basic blocks. */ |
1216 | if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head)) |
1217 | continue; |
1218 | } |
1219 | |
1220 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
1221 | |
1222 | if (!from) |
1223 | from = arg; |
1224 | else if (!operand_equal_p (from, arg, flags: 0)) |
1225 | /* There are more than one source operands that provide |
1226 | different values to the SSA name, it is variant. */ |
1227 | return false; |
1228 | } |
1229 | } |
1230 | else if (gimple_code (g: stmt) == GIMPLE_ASSIGN) |
1231 | { |
1232 | /* For simple value copy, check its rhs instead. */ |
1233 | if (gimple_assign_ssa_name_copy_p (stmt)) |
1234 | from = gimple_assign_rhs1 (gs: stmt); |
1235 | } |
1236 | |
1237 | /* Any other kind of definition is deemed to introduce a new value |
1238 | to the SSA name. */ |
1239 | if (!from) |
1240 | return false; |
1241 | } |
1242 | return true; |
1243 | } |
1244 | |
1245 | /* Check whether conditional predicates that BB is control-dependent on, are |
1246 | semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), |
1247 | are excluded from LOOP. Semi-invariant state of checked statement is cached |
1248 | in hash map STMT_STAT. */ |
1249 | |
1250 | static bool |
1251 | control_dep_semi_invariant_p (struct loop *loop, basic_block bb, |
1252 | const_basic_block skip_head, |
1253 | hash_map<gimple *, bool> &stmt_stat) |
1254 | { |
1255 | hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb); |
1256 | |
1257 | if (!dep_bbs) |
1258 | return true; |
1259 | |
1260 | for (hash_set<basic_block>::iterator iter = dep_bbs->begin (); |
1261 | iter != dep_bbs->end (); ++iter) |
1262 | { |
1263 | gimple *last = *gsi_last_bb (bb: *iter); |
1264 | if (!last) |
1265 | return false; |
1266 | |
1267 | /* Only check condition predicates. */ |
1268 | if (gimple_code (g: last) != GIMPLE_COND |
1269 | && gimple_code (g: last) != GIMPLE_SWITCH) |
1270 | return false; |
1271 | |
1272 | if (!stmt_semi_invariant_p_1 (loop, stmt: last, skip_head, stmt_stat)) |
1273 | return false; |
1274 | } |
1275 | |
1276 | return true; |
1277 | } |
1278 | |
1279 | /* Check whether STMT is semi-invariant in LOOP, iff all its operands are |
1280 | semi-invariant, consequently, all its defined values are semi-invariant. |
1281 | Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. |
1282 | Semi-invariant state of checked statement is cached in hash map |
1283 | STMT_STAT. */ |
1284 | |
1285 | static bool |
1286 | stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, |
1287 | const_basic_block skip_head, |
1288 | hash_map<gimple *, bool> &stmt_stat) |
1289 | { |
1290 | bool existed; |
1291 | bool &invar = stmt_stat.get_or_insert (k: stmt, existed: &existed); |
1292 | |
1293 | if (existed) |
1294 | return invar; |
1295 | |
1296 | /* A statement might depend on itself, which is treated as variant. So set |
1297 | state of statement under check to be variant to ensure that. */ |
1298 | invar = false; |
1299 | |
1300 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
1301 | { |
1302 | gphi *phi = as_a <gphi *> (p: stmt); |
1303 | |
1304 | if (gimple_bb (g: stmt) == loop->header) |
1305 | { |
1306 | /* If the entry value is subject to abnormal coalescing |
1307 | avoid the transform since we're going to duplicate the |
1308 | loop header and thus likely introduce overlapping life-ranges |
1309 | between the PHI def and the entry on the path when the |
1310 | first loop is skipped. */ |
1311 | tree entry_def |
1312 | = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
1313 | if (TREE_CODE (entry_def) == SSA_NAME |
1314 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def)) |
1315 | return false; |
1316 | invar = loop_iter_phi_semi_invariant_p (loop, loop_phi: phi, skip_head); |
1317 | return invar; |
1318 | } |
1319 | |
1320 | /* For a loop PHI node that does not locate in loop header, it is semi- |
1321 | invariant only if two conditions are met. The first is its source |
1322 | values are derived from CONSTANT (including loop-invariant value), or |
1323 | from SSA name defined by semi-invariant loop iteration PHI node. The |
1324 | second is its source incoming edges are control-dependent on semi- |
1325 | invariant conditional predicates. */ |
1326 | for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i) |
1327 | { |
1328 | const_edge e = gimple_phi_arg_edge (phi, i); |
1329 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
1330 | |
1331 | if (TREE_CODE (arg) == SSA_NAME) |
1332 | { |
1333 | if (!ssa_semi_invariant_p (loop, name: arg, skip_head, stmt_stat)) |
1334 | return false; |
1335 | |
1336 | /* If source value is defined in location from where the source |
1337 | edge comes in, no need to check control dependency again |
1338 | since this has been done in above SSA name check stage. */ |
1339 | if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg))) |
1340 | continue; |
1341 | } |
1342 | |
1343 | if (!control_dep_semi_invariant_p (loop, bb: e->src, skip_head, |
1344 | stmt_stat)) |
1345 | return false; |
1346 | } |
1347 | } |
1348 | else |
1349 | { |
1350 | ssa_op_iter iter; |
1351 | tree use; |
1352 | |
1353 | /* Volatile memory load or return of normal (non-const/non-pure) call |
1354 | should not be treated as constant in each iteration of loop. */ |
1355 | if (gimple_has_side_effects (stmt)) |
1356 | return false; |
1357 | |
1358 | /* Check if any memory store may kill memory load at this place. */ |
1359 | if (gimple_vuse (g: stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head)) |
1360 | return false; |
1361 | |
1362 | /* Although operand of a statement might be SSA name, CONSTANT or |
1363 | VARDECL, here we only need to check SSA name operands. This is |
1364 | because check on VARDECL operands, which involve memory loads, |
1365 | must have been done prior to invocation of this function in |
1366 | vuse_semi_invariant_p. */ |
1367 | FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) |
1368 | if (!ssa_semi_invariant_p (loop, name: use, skip_head, stmt_stat)) |
1369 | return false; |
1370 | } |
1371 | |
1372 | if (!control_dep_semi_invariant_p (loop, bb: gimple_bb (g: stmt), skip_head, |
1373 | stmt_stat)) |
1374 | return false; |
1375 | |
1376 | /* Here we SHOULD NOT use invar = true, since hash map might be changed due |
1377 | to new insertion, and thus invar may point to invalid memory. */ |
1378 | stmt_stat.put (k: stmt, v: true); |
1379 | return true; |
1380 | } |
1381 | |
1382 | /* A helper function to check whether STMT is semi-invariant in LOOP. Basic |
1383 | blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */ |
1384 | |
1385 | static bool |
1386 | stmt_semi_invariant_p (struct loop *loop, gimple *stmt, |
1387 | const_basic_block skip_head) |
1388 | { |
1389 | hash_map<gimple *, bool> stmt_stat; |
1390 | return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat); |
1391 | } |
1392 | |
1393 | /* Determine when conditional statement never transfers execution to one of its |
1394 | branch, whether we can remove the branch's leading basic block (BRANCH_BB) |
1395 | and those basic blocks dominated by BRANCH_BB. */ |
1396 | |
1397 | static bool |
1398 | branch_removable_p (basic_block branch_bb) |
1399 | { |
1400 | edge_iterator ei; |
1401 | edge e; |
1402 | |
1403 | if (single_pred_p (bb: branch_bb)) |
1404 | return true; |
1405 | |
1406 | FOR_EACH_EDGE (e, ei, branch_bb->preds) |
1407 | { |
1408 | if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb)) |
1409 | continue; |
1410 | |
1411 | if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src)) |
1412 | continue; |
1413 | |
1414 | /* The branch can be reached from opposite branch, or from some |
1415 | statement not dominated by the conditional statement. */ |
1416 | return false; |
1417 | } |
1418 | |
1419 | return true; |
1420 | } |
1421 | |
1422 | /* Find out which branch of a conditional statement (COND) is invariant in the |
1423 | execution context of LOOP. That is: once the branch is selected in certain |
1424 | iteration of the loop, any operand that contributes to computation of the |
1425 | conditional statement remains unchanged in all following iterations. */ |
1426 | |
1427 | static edge |
1428 | get_cond_invariant_branch (struct loop *loop, gcond *cond) |
1429 | { |
1430 | basic_block cond_bb = gimple_bb (g: cond); |
1431 | basic_block targ_bb[2]; |
1432 | bool invar[2]; |
1433 | unsigned invar_checks = 0; |
1434 | |
1435 | for (unsigned i = 0; i < 2; i++) |
1436 | { |
1437 | targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest; |
1438 | |
1439 | /* One branch directs to loop exit, no need to perform loop split upon |
1440 | this conditional statement. Firstly, it is trivial if the exit branch |
1441 | is semi-invariant, for the statement is just to break loop. Secondly, |
1442 | if the opposite branch is semi-invariant, it means that the statement |
1443 | is real loop-invariant, which is covered by loop unswitch. */ |
1444 | if (!flow_bb_inside_loop_p (loop, targ_bb[i])) |
1445 | return NULL; |
1446 | } |
1447 | |
1448 | for (unsigned i = 0; i < 2; i++) |
1449 | { |
1450 | invar[!i] = false; |
1451 | |
1452 | if (!branch_removable_p (branch_bb: targ_bb[i])) |
1453 | continue; |
1454 | |
1455 | /* Given a semi-invariant branch, if its opposite branch dominates |
1456 | loop latch, it and its following trace will only be executed in |
1457 | final iteration of loop, namely it is not part of repeated body |
1458 | of the loop. Similar to the above case that the branch is loop |
1459 | exit, no need to split loop. */ |
1460 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i])) |
1461 | continue; |
1462 | |
1463 | invar[!i] = stmt_semi_invariant_p (loop, stmt: cond, skip_head: targ_bb[i]); |
1464 | invar_checks++; |
1465 | } |
1466 | |
1467 | /* With both branches being invariant (handled by loop unswitch) or |
1468 | variant is not what we want. */ |
1469 | if (invar[0] ^ !invar[1]) |
1470 | return NULL; |
1471 | |
1472 | /* Found a real loop-invariant condition, do nothing. */ |
1473 | if (invar_checks < 2 && stmt_semi_invariant_p (loop, stmt: cond, NULL)) |
1474 | return NULL; |
1475 | |
1476 | return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1); |
1477 | } |
1478 | |
1479 | /* Calculate increased code size measured by estimated insn number if applying |
1480 | loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */ |
1481 | |
1482 | static int |
1483 | compute_added_num_insns (struct loop *loop, const_edge branch_edge) |
1484 | { |
1485 | basic_block cond_bb = branch_edge->src; |
1486 | unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge; |
1487 | basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest; |
1488 | basic_block *bbs = ((split_info *) loop->aux)->bbs; |
1489 | int num = 0; |
1490 | |
1491 | for (unsigned i = 0; i < loop->num_nodes; i++) |
1492 | { |
1493 | /* Do no count basic blocks only in opposite branch. */ |
1494 | if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb)) |
1495 | continue; |
1496 | |
1497 | num += estimate_num_insns_seq (bb_seq (bb: bbs[i]), &eni_size_weights); |
1498 | } |
1499 | |
1500 | /* It is unnecessary to evaluate expression of the conditional statement |
1501 | in new loop that contains only invariant branch. This expression should |
1502 | be constant value (either true or false). Exclude code size of insns |
1503 | that contribute to computation of the expression. */ |
1504 | |
1505 | auto_vec<gimple *> worklist; |
1506 | hash_set<gimple *> removed; |
1507 | gimple *stmt = last_nondebug_stmt (cond_bb); |
1508 | |
1509 | worklist.safe_push (obj: stmt); |
1510 | removed.add (k: stmt); |
1511 | num -= estimate_num_insns (stmt, &eni_size_weights); |
1512 | |
1513 | do |
1514 | { |
1515 | ssa_op_iter opnd_iter; |
1516 | use_operand_p opnd_p; |
1517 | |
1518 | stmt = worklist.pop (); |
1519 | FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE) |
1520 | { |
1521 | tree opnd = USE_FROM_PTR (opnd_p); |
1522 | |
1523 | if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd)) |
1524 | continue; |
1525 | |
1526 | gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd); |
1527 | use_operand_p use_p; |
1528 | imm_use_iterator use_iter; |
1529 | |
1530 | if (removed.contains (k: opnd_stmt) |
1531 | || !flow_bb_inside_loop_p (loop, gimple_bb (g: opnd_stmt))) |
1532 | continue; |
1533 | |
1534 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd) |
1535 | { |
1536 | gimple *use_stmt = USE_STMT (use_p); |
1537 | |
1538 | if (!is_gimple_debug (gs: use_stmt) && !removed.contains (k: use_stmt)) |
1539 | { |
1540 | opnd_stmt = NULL; |
1541 | break; |
1542 | } |
1543 | } |
1544 | |
1545 | if (opnd_stmt) |
1546 | { |
1547 | worklist.safe_push (obj: opnd_stmt); |
1548 | removed.add (k: opnd_stmt); |
1549 | num -= estimate_num_insns (opnd_stmt, &eni_size_weights); |
1550 | } |
1551 | } |
1552 | } while (!worklist.is_empty ()); |
1553 | |
1554 | gcc_assert (num >= 0); |
1555 | return num; |
1556 | } |
1557 | |
1558 | /* Find out loop-invariant branch of a conditional statement (COND) if it has, |
1559 | and check whether it is eligible and profitable to perform loop split upon |
1560 | this branch in LOOP. */ |
1561 | |
1562 | static edge |
1563 | get_cond_branch_to_split_loop (struct loop *loop, gcond *cond) |
1564 | { |
1565 | edge invar_branch = get_cond_invariant_branch (loop, cond); |
1566 | if (!invar_branch) |
1567 | return NULL; |
1568 | |
1569 | /* When accurate profile information is available, and execution |
1570 | frequency of the branch is too low, just let it go. */ |
1571 | profile_probability prob = invar_branch->probability; |
1572 | if (prob.reliable_p ()) |
1573 | { |
1574 | int thres = param_min_loop_cond_split_prob; |
1575 | |
1576 | if (prob < profile_probability::always ().apply_scale (num: thres, den: 100)) |
1577 | return NULL; |
1578 | } |
1579 | |
1580 | /* Add a threshold for increased code size to disable loop split. */ |
1581 | if (compute_added_num_insns (loop, branch_edge: invar_branch) > param_max_peeled_insns) |
1582 | return NULL; |
1583 | |
1584 | return invar_branch; |
1585 | } |
1586 | |
1587 | /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some |
1588 | conditional statement, perform loop split transformation illustrated |
1589 | as the following graph. |
1590 | |
1591 | .-------T------ if (true) ------F------. |
1592 | | .---------------. | |
1593 | | | | | |
1594 | v | v v |
1595 | pre-header | pre-header |
1596 | | .------------. | | .------------. |
1597 | | | | | | | | |
1598 | | v | | | v | |
1599 | header | | header | |
1600 | | | | | | |
1601 | .--- if (cond) ---. | | .--- if (true) ---. | |
1602 | | | | | | | | |
1603 | invariant | | | invariant | | |
1604 | | | | | | | | |
1605 | '---T--->.<---F---' | | '---T--->.<---F---' | |
1606 | | | / | | |
1607 | stmts | / stmts | |
1608 | | F T | | |
1609 | / \ | / / \ | |
1610 | .-------* * [ if (cond) ] .-------* * | |
1611 | | | | | | | |
1612 | | latch | | latch | |
1613 | | | | | | | |
1614 | | '------------' | '------------' |
1615 | '------------------------. .-----------' |
1616 | loop1 | | loop2 |
1617 | v v |
1618 | exits |
1619 | |
1620 | In the graph, loop1 represents the part derived from original one, and |
1621 | loop2 is duplicated using loop_version (), which corresponds to the part |
1622 | of original one being splitted out. In original latch edge of loop1, we |
1623 | insert a new conditional statement duplicated from the semi-invariant cond, |
1624 | and one of its branch goes back to loop1 header as a latch edge, and the |
1625 | other branch goes to loop2 pre-header as an entry edge. And also in loop2, |
1626 | we abandon the variant branch of the conditional statement by setting a |
1627 | constant bool condition, based on which branch is semi-invariant. */ |
1628 | |
1629 | static bool |
1630 | do_split_loop_on_cond (struct loop *loop1, edge invar_branch) |
1631 | { |
1632 | basic_block cond_bb = invar_branch->src; |
1633 | bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE); |
1634 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: cond_bb)); |
1635 | |
1636 | gcc_assert (cond_bb->loop_father == loop1); |
1637 | |
1638 | if (dump_enabled_p ()) |
1639 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond, |
1640 | "loop split on semi-invariant condition at %s branch\n" , |
1641 | true_invar ? "true" : "false" ); |
1642 | |
1643 | initialize_original_copy_tables (); |
1644 | |
1645 | struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, |
1646 | invar_branch->probability.invert (), |
1647 | invar_branch->probability, |
1648 | profile_probability::always (), |
1649 | profile_probability::always (), |
1650 | true); |
1651 | if (!loop2) |
1652 | { |
1653 | free_original_copy_tables (); |
1654 | return false; |
1655 | } |
1656 | |
1657 | basic_block cond_bb_copy = get_bb_copy (cond_bb); |
1658 | gcond *cond_copy = as_a<gcond *> (p: *gsi_last_bb (bb: cond_bb_copy)); |
1659 | |
1660 | /* Replace the condition in loop2 with a bool constant to let PassManager |
1661 | remove the variant branch after current pass completes. */ |
1662 | if (true_invar) |
1663 | gimple_cond_make_true (gs: cond_copy); |
1664 | else |
1665 | gimple_cond_make_false (gs: cond_copy); |
1666 | |
1667 | update_stmt (s: cond_copy); |
1668 | |
1669 | /* Insert a new conditional statement on latch edge of loop1, its condition |
1670 | is duplicated from the semi-invariant. This statement acts as a switch |
1671 | to transfer execution from loop1 to loop2, when loop1 enters into |
1672 | invariant state. */ |
1673 | basic_block latch_bb = split_edge (loop_latch_edge (loop1)); |
1674 | basic_block break_bb = split_edge (single_pred_edge (bb: latch_bb)); |
1675 | gimple *break_cond = gimple_build_cond (gimple_cond_code(gs: cond), |
1676 | gimple_cond_lhs (gs: cond), |
1677 | gimple_cond_rhs (gs: cond), |
1678 | NULL_TREE, NULL_TREE); |
1679 | |
1680 | gimple_stmt_iterator gsi = gsi_last_bb (bb: break_bb); |
1681 | gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); |
1682 | |
1683 | edge to_loop1 = single_succ_edge (bb: break_bb); |
1684 | edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); |
1685 | |
1686 | to_loop1->flags &= ~EDGE_FALLTHRU; |
1687 | to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE; |
1688 | to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE; |
1689 | |
1690 | /* Due to introduction of a control flow edge from loop1 latch to loop2 |
1691 | pre-header, we should update PHIs in loop2 to reflect this connection |
1692 | between loop1 and loop2. */ |
1693 | connect_loop_phis (loop1, loop2, new_e: to_loop2); |
1694 | |
1695 | edge true_edge, false_edge, skip_edge1, skip_edge2; |
1696 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
1697 | |
1698 | skip_edge1 = true_invar ? false_edge : true_edge; |
1699 | skip_edge2 = true_invar ? true_edge : false_edge; |
1700 | fix_loop_bb_probability (loop1, loop2, true_edge: skip_edge1, false_edge: skip_edge2); |
1701 | |
1702 | /* Fix first loop's exit probability after scaling. */ |
1703 | to_loop1->probability = invar_branch->probability.invert (); |
1704 | to_loop2->probability = invar_branch->probability; |
1705 | |
1706 | free_original_copy_tables (); |
1707 | |
1708 | return true; |
1709 | } |
1710 | |
1711 | /* Traverse all conditional statements in LOOP, to find out a good candidate |
1712 | upon which we can do loop split. */ |
1713 | |
1714 | static bool |
1715 | split_loop_on_cond (struct loop *loop) |
1716 | { |
1717 | split_info *info = new split_info (); |
1718 | basic_block *bbs = info->bbs = get_loop_body (loop); |
1719 | bool do_split = false; |
1720 | |
1721 | /* Allocate an area to keep temporary info, and associate its address |
1722 | with loop aux field. */ |
1723 | loop->aux = info; |
1724 | |
1725 | for (unsigned i = 0; i < loop->num_nodes; i++) |
1726 | bbs[i]->aux = NULL; |
1727 | |
1728 | for (unsigned i = 0; i < loop->num_nodes; i++) |
1729 | { |
1730 | basic_block bb = bbs[i]; |
1731 | |
1732 | /* We only consider conditional statement, which be executed at most once |
1733 | in each iteration of the loop. So skip statements in inner loops. */ |
1734 | if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP)) |
1735 | continue; |
1736 | |
1737 | /* Actually this check is not a must constraint. With it, we can ensure |
1738 | conditional statement will always be executed in each iteration. */ |
1739 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
1740 | continue; |
1741 | |
1742 | gcond *cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
1743 | if (!cond) |
1744 | continue; |
1745 | |
1746 | edge branch_edge = get_cond_branch_to_split_loop (loop, cond); |
1747 | |
1748 | if (branch_edge) |
1749 | { |
1750 | do_split_loop_on_cond (loop1: loop, invar_branch: branch_edge); |
1751 | do_split = true; |
1752 | break; |
1753 | } |
1754 | } |
1755 | |
1756 | delete info; |
1757 | loop->aux = NULL; |
1758 | |
1759 | return do_split; |
1760 | } |
1761 | |
1762 | /* Main entry point. Perform loop splitting on all suitable loops. */ |
1763 | |
1764 | static unsigned int |
1765 | tree_ssa_split_loops (void) |
1766 | { |
1767 | bool changed = false; |
1768 | |
1769 | gcc_assert (scev_initialized_p ()); |
1770 | |
1771 | calculate_dominance_info (CDI_POST_DOMINATORS); |
1772 | |
1773 | for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT)) |
1774 | loop->aux = NULL; |
1775 | |
1776 | /* Go through all loops starting from innermost. */ |
1777 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
1778 | { |
1779 | if (loop->aux) |
1780 | { |
1781 | /* If any of our inner loops was split, don't split us, |
1782 | and mark our containing loop as having had splits as well. |
1783 | This allows for delaying SSA update. */ |
1784 | loop_outer (loop)->aux = loop; |
1785 | continue; |
1786 | } |
1787 | |
1788 | if (optimize_loop_for_size_p (loop)) |
1789 | continue; |
1790 | |
1791 | if (split_loop (loop1: loop) || split_loop_on_cond (loop)) |
1792 | { |
1793 | /* Mark our containing loop as having had some split inner loops. */ |
1794 | loop_outer (loop)->aux = loop; |
1795 | changed = true; |
1796 | } |
1797 | } |
1798 | |
1799 | for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT)) |
1800 | loop->aux = NULL; |
1801 | |
1802 | clear_aux_for_blocks (); |
1803 | |
1804 | free_dominance_info (CDI_POST_DOMINATORS); |
1805 | |
1806 | if (changed) |
1807 | { |
1808 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
1809 | return TODO_cleanup_cfg; |
1810 | } |
1811 | return 0; |
1812 | } |
1813 | |
1814 | /* Loop splitting pass. */ |
1815 | |
1816 | namespace { |
1817 | |
1818 | const pass_data pass_data_loop_split = |
1819 | { |
1820 | .type: GIMPLE_PASS, /* type */ |
1821 | .name: "lsplit" , /* name */ |
1822 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
1823 | .tv_id: TV_LOOP_SPLIT, /* tv_id */ |
1824 | PROP_cfg, /* properties_required */ |
1825 | .properties_provided: 0, /* properties_provided */ |
1826 | .properties_destroyed: 0, /* properties_destroyed */ |
1827 | .todo_flags_start: 0, /* todo_flags_start */ |
1828 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1829 | }; |
1830 | |
1831 | class pass_loop_split : public gimple_opt_pass |
1832 | { |
1833 | public: |
1834 | pass_loop_split (gcc::context *ctxt) |
1835 | : gimple_opt_pass (pass_data_loop_split, ctxt) |
1836 | {} |
1837 | |
1838 | /* opt_pass methods: */ |
1839 | bool gate (function *) final override { return flag_split_loops != 0; } |
1840 | unsigned int execute (function *) final override; |
1841 | |
1842 | }; // class pass_loop_split |
1843 | |
1844 | unsigned int |
1845 | pass_loop_split::execute (function *fun) |
1846 | { |
1847 | if (number_of_loops (fn: fun) <= 1) |
1848 | return 0; |
1849 | |
1850 | return tree_ssa_split_loops (); |
1851 | } |
1852 | |
1853 | } // anon namespace |
1854 | |
1855 | gimple_opt_pass * |
1856 | make_pass_loop_split (gcc::context *ctxt) |
1857 | { |
1858 | return new pass_loop_split (ctxt); |
1859 | } |
1860 | |