1 | /* Loop header copying on trees. |
2 | Copyright (C) 2004-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "gimple-ssa.h" |
29 | #include "gimple-iterator.h" |
30 | #include "tree-cfg.h" |
31 | #include "tree-into-ssa.h" |
32 | #include "cfgloop.h" |
33 | #include "tree-inline.h" |
34 | #include "tree-ssa-threadedge.h" |
35 | #include "tree-ssa-sccvn.h" |
36 | #include "tree-phinodes.h" |
37 | #include "ssa-iterators.h" |
38 | #include "value-range.h" |
39 | #include "gimple-range.h" |
40 | #include "gimple-range-path.h" |
41 | #include "gimple-pretty-print.h" |
42 | #include "cfganal.h" |
43 | #include "tree-ssa-loop-manip.h" |
44 | #include "tree-ssa-loop-niter.h" |
45 | #include "tree-scalar-evolution.h" |
46 | |
47 | /* Return path query insteance for testing ranges of statements |
48 | in headers of LOOP contained in basic block BB. |
49 | Use RANGER instance. */ |
50 | |
51 | static path_range_query * |
52 | get_range_query (class loop *loop, |
53 | basic_block bb, |
54 | gimple_ranger &ranger) |
55 | { |
56 | auto_vec<basic_block, 8> path; |
57 | for (; bb != loop->header; bb = single_pred_edge (bb)->src) |
58 | path.safe_push (obj: bb); |
59 | path.safe_push (obj: loop->header); |
60 | path.safe_push (obj: loop_preheader_edge (loop)->src); |
61 | return new path_range_query (ranger, path); |
62 | } |
63 | |
64 | /* Return edge that is true in the first iteration of the loop |
65 | and NULL otherwise. |
66 | Formulate corrent ranger query to RANGER. */ |
67 | |
68 | static edge |
69 | static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger, |
70 | path_range_query *&query) |
71 | { |
72 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
73 | edge ret_e; |
74 | |
75 | if (!last) |
76 | return NULL; |
77 | |
78 | edge true_e, false_e; |
79 | extract_true_false_edges_from_block (bb, &true_e, &false_e); |
80 | |
81 | /* If neither edge is the exit edge, this is not a case we'd like to |
82 | special-case. */ |
83 | if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e)) |
84 | return NULL; |
85 | |
86 | int_range<1> desired_static_range; |
87 | if (loop_exit_edge_p (l, true_e)) |
88 | { |
89 | desired_static_range = range_false (); |
90 | ret_e = true_e; |
91 | } |
92 | else |
93 | { |
94 | desired_static_range = range_true (); |
95 | ret_e = false_e; |
96 | } |
97 | |
98 | if (!query) |
99 | query = get_range_query (loop: l, bb: gimple_bb (g: last), ranger); |
100 | |
101 | int_range<2> r; |
102 | if (!query->range_of_stmt (r, last)) |
103 | return NULL; |
104 | return r == desired_static_range ? ret_e : NULL; |
105 | } |
106 | |
107 | /* Return true if STMT is static in LOOP. This means that its value |
108 | is constant in the first iteration. |
109 | Use RANGER and formulate query cached in QUERY. */ |
110 | |
111 | static bool |
112 | loop_static_stmt_p (class loop *loop, |
113 | gimple_ranger &ranger, |
114 | path_range_query *&query, |
115 | gimple *stmt) |
116 | { |
117 | tree type = gimple_range_type (s: stmt); |
118 | if (!type || !Value_Range::supports_type_p (type)) |
119 | return false; |
120 | |
121 | if (!query) |
122 | query = get_range_query (loop, bb: gimple_bb (g: stmt), ranger); |
123 | |
124 | Value_Range r (gimple_range_type (s: stmt)); |
125 | if (!query->range_of_stmt (r, stmt)) |
126 | return false; |
127 | return r.singleton_p (); |
128 | } |
129 | |
130 | /* Return true if OP is invariant. */ |
131 | |
132 | static bool |
133 | loop_invariant_op_p (class loop *loop, |
134 | tree op) |
135 | { |
136 | if (is_gimple_min_invariant (op)) |
137 | return true; |
138 | if (SSA_NAME_IS_DEFAULT_DEF (op) |
139 | || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op)))) |
140 | return true; |
141 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1; |
142 | } |
143 | |
144 | /* Return true if OP combines outcome of static and |
145 | loop invariant conditional. */ |
146 | |
147 | static bool |
148 | loop_static_op_p (class loop *loop, tree op) |
149 | { |
150 | /* Always check for invariant first. */ |
151 | gcc_checking_assert (!is_gimple_min_invariant (op) |
152 | && !SSA_NAME_IS_DEFAULT_DEF (op) |
153 | && flow_bb_inside_loop_p (loop, |
154 | gimple_bb (SSA_NAME_DEF_STMT (op)))); |
155 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2; |
156 | } |
157 | |
158 | |
159 | /* Return true if OP combines outcome of static and |
160 | loop invariant conditional. */ |
161 | |
162 | static bool |
163 | loop_combined_static_and_iv_p (class loop *loop, |
164 | tree op) |
165 | { |
166 | /* Always check for invariant first. */ |
167 | gcc_checking_assert (!is_gimple_min_invariant (op) |
168 | && !SSA_NAME_IS_DEFAULT_DEF (op) |
169 | && flow_bb_inside_loop_p (loop, |
170 | gimple_bb (SSA_NAME_DEF_STMT (op)))); |
171 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4; |
172 | } |
173 | |
174 | /* Decision about posibility of copying a given header. */ |
175 | |
176 | enum ch_decision |
177 | { |
178 | /* We do not want to copy this header. */ |
179 | ch_impossible, |
180 | /* We can copy it if it enables wins. */ |
181 | ch_possible, |
182 | /* We can "copy" it if it enables wins and doing |
183 | so will introduce no new code. */ |
184 | ch_possible_zero_cost, |
185 | /* We want to copy. */ |
186 | ch_win, |
187 | /* We want to copy and we will eliminate loop exit. */ |
188 | ch_win_invariant_exit |
189 | }; |
190 | |
191 | /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT |
192 | instructions should be duplicated, limit is decreased by the actual |
193 | amount. */ |
194 | |
195 | static ch_decision |
196 | (basic_block , class loop *loop, |
197 | gimple_ranger *ranger, |
198 | int *limit, |
199 | hash_set <edge> *invariant_exits, |
200 | hash_set <edge> *static_exits) |
201 | { |
202 | gimple_stmt_iterator bsi; |
203 | |
204 | gcc_assert (!header->aux); |
205 | |
206 | gcc_assert (EDGE_COUNT (header->succs) > 0); |
207 | if (single_succ_p (bb: header)) |
208 | { |
209 | if (dump_file && (dump_flags & TDF_DETAILS)) |
210 | fprintf (stream: dump_file, |
211 | format: " Not duplicating bb %i: it is single succ.\n" , |
212 | header->index); |
213 | return ch_impossible; |
214 | } |
215 | |
216 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) |
217 | && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) |
218 | { |
219 | if (dump_file && (dump_flags & TDF_DETAILS)) |
220 | fprintf (stream: dump_file, |
221 | format: " Not duplicating bb %i: both successors are in loop.\n" , |
222 | loop->num); |
223 | return ch_impossible; |
224 | } |
225 | |
226 | /* If this is not the original loop header, we want it to have just |
227 | one predecessor in order to match the && pattern. */ |
228 | if (header != loop->header && !single_pred_p (bb: header)) |
229 | { |
230 | if (dump_file && (dump_flags & TDF_DETAILS)) |
231 | fprintf (stream: dump_file, |
232 | format: " Not duplicating bb %i: it has mutiple predecestors.\n" , |
233 | header->index); |
234 | return ch_impossible; |
235 | } |
236 | |
237 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: header)); |
238 | if (!last) |
239 | { |
240 | if (dump_file && (dump_flags & TDF_DETAILS)) |
241 | fprintf (stream: dump_file, |
242 | format: " Not duplicating bb %i: it does not end by conditional.\n" , |
243 | header->index); |
244 | return ch_impossible; |
245 | } |
246 | |
247 | path_range_query *query = NULL; |
248 | for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (i: psi); |
249 | gsi_next (i: &psi)) |
250 | if (!virtual_operand_p (op: gimple_phi_result (gs: psi.phi ()))) |
251 | { |
252 | bool static_p = loop_static_stmt_p (loop, ranger&: *ranger, |
253 | query, stmt: psi.phi ()); |
254 | gimple_set_uid (g: psi.phi (), uid: static_p ? 2 : 0); |
255 | } |
256 | bool code_size_cost = false; |
257 | |
258 | /* Count number of instructions and punt on calls. |
259 | Populate stmts INV flag to later apply heuristics to the |
260 | kind of conditions we want to copy. */ |
261 | for (bsi = gsi_start_bb (bb: header); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
262 | { |
263 | gimple *last = gsi_stmt (i: bsi); |
264 | |
265 | if (gimple_code (g: last) == GIMPLE_LABEL) |
266 | continue; |
267 | |
268 | if (is_gimple_debug (gs: last)) |
269 | continue; |
270 | |
271 | if (gimple_code (g: last) == GIMPLE_COND) |
272 | break; |
273 | |
274 | if (dump_file && (dump_flags & TDF_DETAILS)) |
275 | { |
276 | fprintf (stream: dump_file, format: " Analyzing: " ); |
277 | print_gimple_stmt (dump_file, last, 0, TDF_SLIM); |
278 | } |
279 | |
280 | if (gimple_code (g: last) == GIMPLE_CALL |
281 | && (!gimple_inexpensive_call_p (as_a <gcall *> (p: last)) |
282 | /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed |
283 | at current loop's header. Don't copy in this case. */ |
284 | || gimple_call_internal_p (gs: last, fn: IFN_LOOP_DIST_ALIAS))) |
285 | { |
286 | if (dump_file && (dump_flags & TDF_DETAILS)) |
287 | fprintf (stream: dump_file, |
288 | format: " Not duplicating bb %i: it contains call.\n" , |
289 | header->index); |
290 | if (query) |
291 | delete query; |
292 | return ch_impossible; |
293 | } |
294 | |
295 | /* Classify the stmt is invariant in the loop. */ |
296 | gimple_set_uid (g: last, uid: 0); |
297 | if (!gimple_vuse (g: last) |
298 | && gimple_code (g: last) != GIMPLE_ASM |
299 | && (gimple_code (g: last) != GIMPLE_CALL |
300 | || gimple_call_flags (last) & ECF_CONST)) |
301 | { |
302 | bool inv = true, static_p = false; |
303 | ssa_op_iter i; |
304 | tree op; |
305 | FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) |
306 | if (!loop_invariant_op_p (loop, op)) |
307 | inv = false; |
308 | /* Assume that code is reasonably optimized and invariant |
309 | constants are already identified. */ |
310 | if (!inv) |
311 | static_p = loop_static_stmt_p (loop, ranger&: *ranger, query, stmt: last); |
312 | gimple_set_uid (g: last, uid: (inv ? 1 : 0) | (static_p ? 2 : 0)); |
313 | if (dump_file && (dump_flags & TDF_DETAILS)) |
314 | { |
315 | if (inv) |
316 | fprintf (stream: dump_file, format: " Stmt is loop invariant\n" ); |
317 | if (static_p) |
318 | fprintf (stream: dump_file, format: " Stmt is static " |
319 | "(constant in the first iteration)\n" ); |
320 | } |
321 | /* Loop invariants will be optimized out in loop body after |
322 | duplication; do not account invariant computation in code |
323 | size costs. |
324 | |
325 | Similarly static computations will be optimized out in the |
326 | duplicatd header. */ |
327 | if (inv || static_p) |
328 | continue; |
329 | |
330 | /* Match the following: |
331 | _1 = i_1 < 10 <- static condtion |
332 | _2 = n != 0 <- loop invariant condition |
333 | _3 = _1 & _2 <- combined static and iv statement. */ |
334 | tree_code code; |
335 | if (gimple_code (g: last) == GIMPLE_ASSIGN |
336 | && ((code = gimple_assign_rhs_code (gs: last)) == BIT_AND_EXPR |
337 | || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR)) |
338 | { |
339 | tree op1 = gimple_assign_rhs1 (gs: last); |
340 | tree op2 = gimple_assign_rhs2 (gs: last); |
341 | |
342 | if ((loop_invariant_op_p (loop, op: op1) |
343 | || loop_combined_static_and_iv_p (loop, op: op1) |
344 | || loop_static_op_p (loop, op: op1)) |
345 | && (loop_invariant_op_p (loop, op: op2) |
346 | || loop_combined_static_and_iv_p (loop, op: op2) |
347 | || loop_static_op_p (loop, op: op2))) |
348 | { |
349 | /* Duplicating loop header with combned conditional will |
350 | remove this statement in each copy. But we account for |
351 | that later when seeing that condition. |
352 | |
353 | Note that this may be overly optimistic for bit operations |
354 | where the static parameter may still result in non-trivial |
355 | bit operation. */ |
356 | gimple_set_uid (g: last, uid: 4); |
357 | if (dump_file && (dump_flags & TDF_DETAILS)) |
358 | fprintf (stream: dump_file, |
359 | format: " Stmt combines static and invariant op\n" ); |
360 | continue; |
361 | } |
362 | } |
363 | } |
364 | |
365 | int insns = estimate_num_insns (last, &eni_size_weights); |
366 | *limit -= insns; |
367 | if (insns) |
368 | code_size_cost = true; |
369 | if (dump_file && (dump_flags & TDF_DETAILS)) |
370 | fprintf (stream: dump_file, |
371 | format: " Acconting stmt as %i insns\n" , insns); |
372 | if (*limit < 0) |
373 | { |
374 | if (dump_file && (dump_flags & TDF_DETAILS)) |
375 | fprintf (stream: dump_file, |
376 | format: " Not duplicating bb %i contains too many insns.\n" , |
377 | header->index); |
378 | if (query) |
379 | delete query; |
380 | return ch_impossible; |
381 | } |
382 | } |
383 | |
384 | if (dump_file && (dump_flags & TDF_DETAILS)) |
385 | { |
386 | fprintf (stream: dump_file, format: " Analyzing: " ); |
387 | print_gimple_stmt (dump_file, last, 0, TDF_SLIM); |
388 | } |
389 | |
390 | /* If the condition tests a non-IV loop variant we do not want to rotate |
391 | the loop further. Unless this is the original loop header. */ |
392 | tree lhs = gimple_cond_lhs (gs: last); |
393 | tree rhs = gimple_cond_rhs (gs: last); |
394 | bool lhs_invariant = loop_invariant_op_p (loop, op: lhs); |
395 | bool rhs_invariant = loop_invariant_op_p (loop, op: rhs); |
396 | |
397 | /* Combined conditional is a result of if combining: |
398 | |
399 | _1 = i_1 < 10 <- static condtion |
400 | _2 = n != 0 <- loop invariant condition |
401 | _3 = _1 & _2 <- combined static and iv statement |
402 | if (_3 != 0) <- combined conditional |
403 | |
404 | Combined conditionals will not be optimized out in either copy. |
405 | However duplicaed header simplifies to: |
406 | |
407 | if (n < 10) |
408 | |
409 | while loop body to |
410 | |
411 | if (i_1 < 10) |
412 | |
413 | So effectively the resulting code sequence will be of same length as |
414 | the original code. |
415 | |
416 | Combined conditionals are never static or invariant, so save some work |
417 | below. */ |
418 | if (lhs_invariant != rhs_invariant |
419 | && (lhs_invariant |
420 | || loop_combined_static_and_iv_p (loop, op: lhs)) |
421 | && (rhs_invariant |
422 | || loop_combined_static_and_iv_p (loop, op: rhs))) |
423 | { |
424 | if (query) |
425 | delete query; |
426 | if (dump_file && (dump_flags & TDF_DETAILS)) |
427 | fprintf (stream: dump_file, |
428 | format: " Conditional combines static and invariant op.\n" ); |
429 | return ch_win; |
430 | } |
431 | |
432 | edge static_exit = static_loop_exit (l: loop, bb: header, ranger&: *ranger, query); |
433 | if (query) |
434 | delete query; |
435 | |
436 | if (static_exit && static_exits) |
437 | { |
438 | static_exits->add (k: static_exit); |
439 | if (dump_file && (dump_flags & TDF_DETAILS)) |
440 | fprintf (stream: dump_file, |
441 | format: " Will eliminate peeled conditional in bb %d.\n" , |
442 | static_exit->src->index); |
443 | /* Still look for invariant exits; exit may be both. */ |
444 | } |
445 | if (lhs_invariant && rhs_invariant) |
446 | { |
447 | if (invariant_exits) |
448 | { |
449 | edge e; |
450 | edge_iterator ei; |
451 | FOR_EACH_EDGE (e, ei, header->succs) |
452 | if (loop_exit_edge_p (loop, e)) |
453 | { |
454 | if (dump_file && (dump_flags & TDF_DETAILS)) |
455 | fprintf (stream: dump_file, |
456 | format: " Will elliminate invariant exit %i->%i\n" , |
457 | e->src->index, e->dest->index); |
458 | invariant_exits->add (k: e); |
459 | } |
460 | } |
461 | return ch_win_invariant_exit; |
462 | } |
463 | |
464 | /* If the static exit fully optimize out, it is win to "duplicate" |
465 | it. |
466 | |
467 | TODO: Even if duplication costs some size we may opt to do so in case |
468 | exit probability is significant enough (do partial peeling). */ |
469 | if (static_exit) |
470 | return !code_size_cost ? ch_possible_zero_cost : ch_possible; |
471 | |
472 | /* We was not able to prove that conditional will be eliminated. */ |
473 | int insns = estimate_num_insns (last, &eni_size_weights); |
474 | *limit -= insns; |
475 | if (dump_file && (dump_flags & TDF_DETAILS)) |
476 | fprintf (stream: dump_file, |
477 | format: " Acconting stmt as %i insns\n" , insns); |
478 | if (*limit < 0) |
479 | { |
480 | if (dump_file && (dump_flags & TDF_DETAILS)) |
481 | fprintf (stream: dump_file, |
482 | format: " Not duplicating bb %i contains too many insns.\n" , |
483 | header->index); |
484 | return ch_impossible; |
485 | } |
486 | |
487 | return ch_possible; |
488 | } |
489 | |
490 | /* Checks whether LOOP is a do-while style loop. */ |
491 | |
492 | static bool |
493 | do_while_loop_p (class loop *loop) |
494 | { |
495 | gimple *stmt = last_nondebug_stmt (loop->latch); |
496 | |
497 | /* If the latch of the loop is not empty, it is not a do-while loop. */ |
498 | if (stmt |
499 | && gimple_code (g: stmt) != GIMPLE_LABEL) |
500 | { |
501 | if (dump_file && (dump_flags & TDF_DETAILS)) |
502 | fprintf (stream: dump_file, |
503 | format: "Loop %i is not do-while loop: latch is not empty.\n" , |
504 | loop->num); |
505 | return false; |
506 | } |
507 | |
508 | /* If the latch does not have a single predecessor, it is not a |
509 | do-while loop. */ |
510 | if (!single_pred_p (bb: loop->latch)) |
511 | { |
512 | if (dump_file && (dump_flags & TDF_DETAILS)) |
513 | fprintf (stream: dump_file, |
514 | format: "Loop %i is not do-while loop: latch has multiple " |
515 | "predecessors.\n" , loop->num); |
516 | return false; |
517 | } |
518 | basic_block pred = single_pred (bb: loop->latch); |
519 | |
520 | /* If the latch predecessor doesn't exit the loop, it is not a |
521 | do-while loop. */ |
522 | if (!loop_exits_from_bb_p (loop, pred)) |
523 | { |
524 | if (dump_file && (dump_flags & TDF_DETAILS)) |
525 | fprintf (stream: dump_file, |
526 | format: "Loop %i is not do-while loop: latch predecessor " |
527 | "does not exit loop.\n" , loop->num); |
528 | return false; |
529 | } |
530 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred)); |
531 | if (last |
532 | && (gimple_cond_lhs (gs: last) == boolean_false_node |
533 | || gimple_cond_lhs (gs: last) == boolean_true_node) |
534 | && gimple_cond_rhs (gs: last) == boolean_false_node) |
535 | { |
536 | if (dump_file && (dump_flags & TDF_DETAILS)) |
537 | fprintf (stream: dump_file, |
538 | format: "Loop %i is not do-while loop: latch predecessor " |
539 | "contains exit we optimized out.\n" , loop->num); |
540 | return false; |
541 | } |
542 | |
543 | if (dump_file && (dump_flags & TDF_DETAILS)) |
544 | fprintf (stream: dump_file, format: "Loop %i is do-while loop\n" , loop->num); |
545 | |
546 | return true; |
547 | } |
548 | |
549 | /* Update profile after header copying of LOOP. |
550 | REGION is the original (in loop) sequence, REGION_COPY is the |
551 | duplicated header (now outside of loop). N_REGION is number of |
552 | bbs duplicated. |
553 | ELIMINATED_EDGE is edge to be removed from duplicated sequence. |
554 | INVARIANT_EXITS are edges in the loop body to be elimianted |
555 | since they are loop invariants |
556 | |
557 | So We expect the following: |
558 | |
559 | // region_copy_start entry will be scaled to entry_count |
560 | if (cond1) <- this condition will become false |
561 | and we update probabilities |
562 | goto loop_exit; |
563 | if (cond2) <- this condition is loop invariant |
564 | goto loop_exit; |
565 | goto loop_header <- this will be redirected to loop. |
566 | // region_copy_end |
567 | loop: |
568 | <body> |
569 | // region start |
570 | loop_header: |
571 | if (cond1) <- we need to update probability here |
572 | goto loop_exit; |
573 | if (cond2) <- and determine scaling factor here. |
574 | moreover cond2 is now always true |
575 | goto loop_exit; |
576 | else |
577 | goto loop; |
578 | // region end |
579 | |
580 | Adding support for more exits can be done similarly, |
581 | but only consumer so far is tree-ssa-loop-ch and it uses only this |
582 | to handle the common case of peeling headers which have |
583 | conditionals known to be always true upon entry. */ |
584 | |
585 | static void |
586 | update_profile_after_ch (class loop *loop, |
587 | basic_block *region, basic_block *region_copy, |
588 | unsigned n_region, |
589 | hash_set <edge> *invariant_exits, |
590 | hash_set <edge> *static_exits, |
591 | profile_count entry_count) |
592 | { |
593 | for (unsigned int i = 0; i < n_region; i++) |
594 | { |
595 | edge exit_e, exit_e_copy, e, e_copy; |
596 | if (EDGE_COUNT (region[i]->succs) == 1) |
597 | { |
598 | region_copy[i]->count = entry_count; |
599 | region[i]->count -= entry_count; |
600 | continue; |
601 | } |
602 | |
603 | gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2); |
604 | if (loop_exit_edge_p (loop, |
605 | EDGE_SUCC (region[i], 0))) |
606 | { |
607 | exit_e = EDGE_SUCC (region[i], 0); |
608 | exit_e_copy = EDGE_SUCC (region_copy[i], 0); |
609 | e = EDGE_SUCC (region[i], 1); |
610 | e_copy = EDGE_SUCC (region_copy[i], 1); |
611 | } |
612 | else |
613 | { |
614 | exit_e = EDGE_SUCC (region[i], 1); |
615 | exit_e_copy = EDGE_SUCC (region_copy[i], 1); |
616 | e = EDGE_SUCC (region[i], 0); |
617 | e_copy = EDGE_SUCC (region_copy[i], 0); |
618 | } |
619 | gcc_assert (i == n_region - 1 |
620 | || (e->dest == region[i + 1] |
621 | && e_copy->dest == region_copy[i + 1])); |
622 | region_copy[i]->count = entry_count; |
623 | profile_count exit_e_count = exit_e->count (); |
624 | bool was_static = false; |
625 | if (static_exits->contains (k: exit_e)) |
626 | { |
627 | /* Update profile and the conditional. |
628 | CFG update is done by caller. */ |
629 | static_exits->remove (k: exit_e); |
630 | was_static = true; |
631 | e_copy->probability = profile_probability::always (); |
632 | exit_e_copy->probability = profile_probability::never (); |
633 | gcond *cond_stmt |
634 | = as_a <gcond *>(p: *gsi_last_bb (bb: region_copy[i])); |
635 | if (e_copy->flags & EDGE_TRUE_VALUE) |
636 | gimple_cond_make_true (gs: cond_stmt); |
637 | else |
638 | gimple_cond_make_false (gs: cond_stmt); |
639 | update_stmt (s: cond_stmt); |
640 | /* Header copying is a special case of jump threading, so use |
641 | common code to update loop body exit condition. */ |
642 | update_bb_profile_for_threading (region[i], entry_count, e); |
643 | } |
644 | else |
645 | region[i]->count -= region_copy[i]->count; |
646 | if (invariant_exits->contains (k: exit_e)) |
647 | { |
648 | invariant_exits->remove (k: exit_e); |
649 | /* All exits will happen in exit_e_copy which is out of the |
650 | loop, so increase probability accordingly. |
651 | If the edge is eliminated_edge we already corrected |
652 | profile above. */ |
653 | if (entry_count.nonzero_p () && !was_static) |
654 | set_edge_probability_and_rescale_others |
655 | (exit_e_copy, exit_e_count.probability_in |
656 | (overall: entry_count)); |
657 | /* Eliminate in-loop conditional. */ |
658 | e->probability = profile_probability::always (); |
659 | exit_e->probability = profile_probability::never (); |
660 | gcond *cond_stmt = as_a <gcond *>(p: *gsi_last_bb (bb: region[i])); |
661 | if (e->flags & EDGE_TRUE_VALUE) |
662 | gimple_cond_make_true (gs: cond_stmt); |
663 | else |
664 | gimple_cond_make_false (gs: cond_stmt); |
665 | update_stmt (s: cond_stmt); |
666 | } |
667 | entry_count = e_copy->count (); |
668 | } |
669 | /* Be sure that we seen all invariant exit edges we are supposed to update. |
670 | We may have recorded some static exists we decided to not duplicate. */ |
671 | gcc_checking_assert (invariant_exits->is_empty ()); |
672 | } |
673 | |
674 | namespace { |
675 | |
676 | /* Common superclass for both header-copying phases. */ |
677 | class ch_base : public gimple_opt_pass |
678 | { |
679 | protected: |
680 | ch_base (pass_data data, gcc::context *ctxt) |
681 | : gimple_opt_pass (data, ctxt) |
682 | {} |
683 | |
684 | /* Copies headers of all loops in FUN for which process_loop_p is true. */ |
685 | unsigned int copy_headers (function *fun); |
686 | |
687 | /* Return true to copy headers of LOOP or false to skip. */ |
688 | virtual bool process_loop_p (class loop *loop) = 0; |
689 | }; |
690 | |
691 | const pass_data pass_data_ch = |
692 | { |
693 | .type: GIMPLE_PASS, /* type */ |
694 | .name: "ch" , /* name */ |
695 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
696 | .tv_id: TV_TREE_CH, /* tv_id */ |
697 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
698 | .properties_provided: 0, /* properties_provided */ |
699 | .properties_destroyed: 0, /* properties_destroyed */ |
700 | .todo_flags_start: 0, /* todo_flags_start */ |
701 | .todo_flags_finish: 0, /* todo_flags_finish */ |
702 | }; |
703 | |
704 | class pass_ch : public ch_base |
705 | { |
706 | public: |
707 | pass_ch (gcc::context *ctxt) |
708 | : ch_base (pass_data_ch, ctxt) |
709 | {} |
710 | |
711 | /* opt_pass methods: */ |
712 | bool gate (function *) final override { return flag_tree_ch != 0; } |
713 | |
714 | /* Initialize and finalize loop structures, copying headers inbetween. */ |
715 | unsigned int execute (function *) final override; |
716 | |
717 | opt_pass * clone () final override { return new pass_ch (m_ctxt); } |
718 | |
719 | protected: |
720 | /* ch_base method: */ |
721 | bool process_loop_p (class loop *loop) final override; |
722 | }; // class pass_ch |
723 | |
724 | const pass_data pass_data_ch_vect = |
725 | { |
726 | .type: GIMPLE_PASS, /* type */ |
727 | .name: "ch_vect" , /* name */ |
728 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
729 | .tv_id: TV_TREE_CH, /* tv_id */ |
730 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
731 | .properties_provided: 0, /* properties_provided */ |
732 | .properties_destroyed: 0, /* properties_destroyed */ |
733 | .todo_flags_start: 0, /* todo_flags_start */ |
734 | .todo_flags_finish: 0, /* todo_flags_finish */ |
735 | }; |
736 | |
737 | /* This is a more aggressive version of the same pass, designed to run just |
738 | before if-conversion and vectorization, to put more loops into the form |
739 | required for those phases. */ |
740 | class pass_ch_vect : public ch_base |
741 | { |
742 | public: |
743 | pass_ch_vect (gcc::context *ctxt) |
744 | : ch_base (pass_data_ch_vect, ctxt) |
745 | {} |
746 | |
747 | /* opt_pass methods: */ |
748 | bool gate (function *fun) final override |
749 | { |
750 | return flag_tree_ch != 0 |
751 | && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); |
752 | } |
753 | |
754 | /* Just copy headers, no initialization/finalization of loop structures. */ |
755 | unsigned int execute (function *) final override; |
756 | |
757 | protected: |
758 | /* ch_base method: */ |
759 | bool process_loop_p (class loop *loop) final override; |
760 | }; // class pass_ch_vect |
761 | |
762 | /* For all loops, copy the condition at the end of the loop body in front |
763 | of the loop. This is beneficial since it increases efficiency of |
764 | code motion optimizations. It also saves one jump on entry to the loop. */ |
765 | |
766 | unsigned int |
767 | ch_base:: (function *fun) |
768 | { |
769 | basic_block *bbs, *copied_bbs; |
770 | unsigned bbs_size; |
771 | bool changed = false; |
772 | |
773 | if (number_of_loops (fn: fun) <= 1) |
774 | return 0; |
775 | |
776 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); |
777 | copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); |
778 | bbs_size = n_basic_blocks_for_fn (fun); |
779 | |
780 | struct candidate |
781 | { |
782 | class loop *loop; |
783 | unsigned int ; |
784 | hash_set <edge> *invariant_exits, *static_exits; |
785 | }; |
786 | auto_vec<struct candidate> candidates; |
787 | auto_vec<std::pair<edge, loop_p> > copied; |
788 | auto_vec<class loop *> loops_to_unloop; |
789 | auto_vec<int> loops_to_unloop_nunroll; |
790 | |
791 | mark_dfs_back_edges (); |
792 | gimple_ranger *ranger = new gimple_ranger; |
793 | for (auto loop : loops_list (cfun, 0)) |
794 | { |
795 | int initial_limit = optimize_loop_for_speed_p (loop) |
796 | ? param_max_loop_header_insns : 0; |
797 | int remaining_limit = initial_limit; |
798 | if (dump_file && (dump_flags & TDF_DETAILS)) |
799 | fprintf (stream: dump_file, |
800 | format: "Analyzing loop %i\n" , loop->num); |
801 | |
802 | /* If the loop is already a do-while style one (either because it was |
803 | written as such, or because jump threading transformed it into one), |
804 | we might be in fact peeling the first iteration of the loop. This |
805 | in general is not a good idea. Also avoid touching infinite loops. */ |
806 | if (!loop_has_exit_edges (loop) |
807 | || !process_loop_p (loop)) |
808 | continue; |
809 | |
810 | basic_block = loop->header; |
811 | estimate_numbers_of_iterations (loop); |
812 | if (!get_max_loop_iterations_int (loop)) |
813 | { |
814 | if (dump_file && (dump_flags & TDF_DETAILS)) |
815 | fprintf (stream: dump_file, format: "Loop %d never loops.\n" , loop->num); |
816 | scale_loop_profile (loop, profile_probability::always (), 0); |
817 | loops_to_unloop.safe_push (obj: loop); |
818 | loops_to_unloop_nunroll.safe_push (obj: 0); |
819 | continue; |
820 | } |
821 | |
822 | /* Iterate the header copying up to limit; this takes care of the cases |
823 | like while (a && b) {...}, where we want to have both of the conditions |
824 | copied. TODO -- handle while (a || b) - like cases, by not requiring |
825 | the header to have just a single successor and copying up to |
826 | postdominator. */ |
827 | int = 0; |
828 | int = 0; |
829 | bool last_win_invariant_exit = false; |
830 | ch_decision ret; |
831 | auto_vec <ch_decision, 32> decision; |
832 | hash_set <edge> *invariant_exits = new hash_set <edge>; |
833 | hash_set <edge> *static_exits = new hash_set <edge>; |
834 | while ((ret = should_duplicate_loop_header_p (header, loop, ranger, |
835 | limit: &remaining_limit, |
836 | invariant_exits, |
837 | static_exits)) |
838 | != ch_impossible) |
839 | { |
840 | nheaders++; |
841 | decision.safe_push (obj: ret); |
842 | if (ret >= ch_win) |
843 | { |
844 | last_win_nheaders = nheaders; |
845 | last_win_invariant_exit = (ret == ch_win_invariant_exit); |
846 | if (dump_file && (dump_flags & TDF_DETAILS)) |
847 | fprintf (stream: dump_file, format: " Duplicating bb %i is a win\n" , |
848 | header->index); |
849 | } |
850 | else |
851 | if (dump_file && (dump_flags & TDF_DETAILS)) |
852 | fprintf (stream: dump_file, format: " May duplicate bb %i\n" , header->index); |
853 | |
854 | /* Find a successor of header that is inside a loop; i.e. the new |
855 | header after the condition is copied. */ |
856 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) |
857 | header = EDGE_SUCC (header, 0)->dest; |
858 | else |
859 | header = EDGE_SUCC (header, 1)->dest; |
860 | } |
861 | |
862 | /* Try to turn loop into do-while. This means ensuring that |
863 | last duplicated header will not have loop invariant exit. */ |
864 | if (last_win_nheaders && last_win_invariant_exit |
865 | && nheaders > last_win_nheaders) |
866 | { |
867 | last_win_nheaders++; |
868 | if (dump_file && (dump_flags & TDF_DETAILS)) |
869 | fprintf (stream: dump_file, |
870 | format: " Duplicating additional BB to obtain do-while loop\n" ); |
871 | } |
872 | else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop)) |
873 | { |
874 | last_win_nheaders = 1; |
875 | if (dump_file && (dump_flags & TDF_DETAILS)) |
876 | fprintf (stream: dump_file, |
877 | format: " Duplicating header BB to obtain do-while loop\n" ); |
878 | } |
879 | /* "Duplicate" all BBs with zero cost following last basic blocks we |
880 | decided to copy. */ |
881 | while (last_win_nheaders < (int)decision.length () |
882 | && decision[last_win_nheaders] == ch_possible_zero_cost) |
883 | { |
884 | if (dump_file && (dump_flags & TDF_DETAILS)) |
885 | fprintf (stream: dump_file, |
886 | format: " Duplicating extra bb is a win; it has zero cost\n" ); |
887 | last_win_nheaders++; |
888 | } |
889 | |
890 | if (last_win_nheaders) |
891 | candidates.safe_push (obj: {.loop: loop, .nheaders: last_win_nheaders, |
892 | .invariant_exits: invariant_exits, .static_exits: static_exits}); |
893 | else |
894 | { |
895 | delete invariant_exits; |
896 | delete static_exits; |
897 | } |
898 | } |
899 | /* Do not use ranger after we change the IL and not have updated SSA. */ |
900 | delete ranger; |
901 | |
902 | for (auto candidate : candidates) |
903 | { |
904 | class loop *loop = candidate.loop; |
905 | if (dump_file && (dump_flags & TDF_DETAILS)) |
906 | fprintf (stream: dump_file, |
907 | format: "Copying headers of loop %i\n" , loop->num); |
908 | |
909 | basic_block = loop->header; |
910 | edge nonexit = NULL; |
911 | edge exit = NULL; |
912 | unsigned int n_bbs = 0; |
913 | int nexits = 0; |
914 | profile_count exit_count = profile_count::zero (); |
915 | profile_count entry_count = profile_count::zero (); |
916 | edge e; |
917 | edge_iterator ei; |
918 | |
919 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
920 | if (e->src != loop->latch) |
921 | entry_count += e->count (); |
922 | while (n_bbs < candidate.nheaders) |
923 | { |
924 | if (dump_file && (dump_flags & TDF_DETAILS)) |
925 | fprintf (stream: dump_file, format: " Will duplicate bb %i\n" , header->index); |
926 | /* Find a successor of header that is inside a loop; i.e. the new |
927 | header after the condition is copied. */ |
928 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) |
929 | { |
930 | nonexit = EDGE_SUCC (header, 0); |
931 | exit = EDGE_SUCC (header, 1); |
932 | } |
933 | else |
934 | { |
935 | nonexit = EDGE_SUCC (header, 1); |
936 | exit = EDGE_SUCC (header, 0); |
937 | } |
938 | exit_count += exit->count (); |
939 | nexits++; |
940 | bbs[n_bbs++] = header; |
941 | gcc_assert (bbs_size > n_bbs); |
942 | header = nonexit->dest; |
943 | } |
944 | |
945 | if (dump_file && (dump_flags & TDF_DETAILS)) |
946 | fprintf (stream: dump_file, |
947 | format: "Duplicating header of the loop %d up to edge %d->%d\n" , |
948 | loop->num, exit->src->index, exit->dest->index); |
949 | |
950 | /* Ensure that the header will have just the latch as a predecessor |
951 | inside the loop. */ |
952 | if (!single_pred_p (bb: nonexit->dest)) |
953 | { |
954 | header = split_edge (nonexit); |
955 | exit = single_pred_edge (bb: header); |
956 | } |
957 | |
958 | edge entry = loop_preheader_edge (loop); |
959 | |
960 | propagate_threaded_block_debug_into (nonexit->dest, entry->dest); |
961 | if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs, |
962 | true)) |
963 | { |
964 | delete candidate.static_exits; |
965 | delete candidate.invariant_exits; |
966 | if (dump_file && (dump_flags & TDF_DETAILS)) |
967 | fprintf (stream: dump_file, format: "Duplication failed.\n" ); |
968 | continue; |
969 | } |
970 | if (loop->header->count.initialized_p ()) |
971 | update_profile_after_ch (loop, region: bbs, region_copy: copied_bbs, n_region: n_bbs, |
972 | invariant_exits: candidate.invariant_exits, |
973 | static_exits: candidate.static_exits, |
974 | entry_count); |
975 | delete candidate.static_exits; |
976 | delete candidate.invariant_exits; |
977 | copied.safe_push (obj: std::make_pair (x&: entry, y&: loop)); |
978 | |
979 | /* If the loop has the form "for (i = j; i < j + 10; i++)" then |
980 | this copying can introduce a case where we rely on undefined |
981 | signed overflow to eliminate the preheader condition, because |
982 | we assume that "j < j + 10" is true. We don't want to warn |
983 | about that case for -Wstrict-overflow, because in general we |
984 | don't warn about overflow involving loops. Prevent the |
985 | warning by setting the no_warning flag in the condition. */ |
986 | if (warn_strict_overflow > 0) |
987 | { |
988 | unsigned int i; |
989 | |
990 | for (i = 0; i < n_bbs; ++i) |
991 | { |
992 | gimple_stmt_iterator bsi; |
993 | |
994 | for (bsi = gsi_start_bb (bb: copied_bbs[i]); |
995 | !gsi_end_p (i: bsi); |
996 | gsi_next (i: &bsi)) |
997 | { |
998 | gimple *stmt = gsi_stmt (i: bsi); |
999 | if (gimple_code (g: stmt) == GIMPLE_COND) |
1000 | { |
1001 | tree lhs = gimple_cond_lhs (gs: stmt); |
1002 | if (gimple_cond_code (gs: stmt) != EQ_EXPR |
1003 | && gimple_cond_code (gs: stmt) != NE_EXPR |
1004 | && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1005 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) |
1006 | suppress_warning (stmt, OPT_Wstrict_overflow_); |
1007 | } |
1008 | else if (is_gimple_assign (gs: stmt)) |
1009 | { |
1010 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
1011 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
1012 | if (TREE_CODE_CLASS (rhs_code) == tcc_comparison |
1013 | && rhs_code != EQ_EXPR |
1014 | && rhs_code != NE_EXPR |
1015 | && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) |
1016 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))) |
1017 | suppress_warning (stmt, OPT_Wstrict_overflow_); |
1018 | } |
1019 | } |
1020 | } |
1021 | } |
1022 | |
1023 | /* Update header of the loop. */ |
1024 | loop->header = header; |
1025 | /* Find correct latch. We only duplicate chain of conditionals so |
1026 | there should be precisely two edges to the new header. One entry |
1027 | edge and one to latch. */ |
1028 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
1029 | if (header != e->src) |
1030 | { |
1031 | loop->latch = e->src; |
1032 | break; |
1033 | } |
1034 | /* Ensure that the latch is simple. */ |
1035 | if (!single_succ_p (bb: loop_latch_edge (loop)->src)) |
1036 | split_edge (loop_latch_edge (loop)); |
1037 | |
1038 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1039 | { |
1040 | if (do_while_loop_p (loop)) |
1041 | fprintf (stream: dump_file, format: "Loop %d is now do-while loop.\n" , loop->num); |
1042 | else |
1043 | fprintf (stream: dump_file, format: "Loop %d is still not do-while loop.\n" , |
1044 | loop->num); |
1045 | fprintf (stream: dump_file, format: "Exit count: " ); |
1046 | exit_count.dump (f: dump_file); |
1047 | fprintf (stream: dump_file, format: "\nEntry count: " ); |
1048 | entry_count.dump (f: dump_file); |
1049 | fprintf (stream: dump_file, format: "\n" ); |
1050 | } |
1051 | |
1052 | /* We possibly decreased number of iterations by 1. */ |
1053 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
1054 | bool precise = (nexits == (int) exits.length ()); |
1055 | /* Check that loop may not terminate in other way than via |
1056 | basic blocks we duplicated. */ |
1057 | if (precise) |
1058 | { |
1059 | basic_block *bbs = get_loop_body (loop); |
1060 | for (unsigned i = 0; i < loop->num_nodes && precise; ++i) |
1061 | { |
1062 | basic_block bb = bbs[i]; |
1063 | bool found_exit = false; |
1064 | FOR_EACH_EDGE (e, ei, bb->succs) |
1065 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
1066 | { |
1067 | found_exit = true; |
1068 | break; |
1069 | } |
1070 | /* If BB has exit, it was duplicated. */ |
1071 | if (found_exit) |
1072 | continue; |
1073 | /* Give up on irreducible loops. */ |
1074 | if (bb->flags & BB_IRREDUCIBLE_LOOP) |
1075 | { |
1076 | precise = false; |
1077 | break; |
1078 | } |
1079 | /* Check that inner loops are finite. */ |
1080 | for (class loop *l = bb->loop_father; l != loop && precise; |
1081 | l = loop_outer (loop: l)) |
1082 | if (!l->finite_p) |
1083 | { |
1084 | precise = false; |
1085 | break; |
1086 | } |
1087 | /* Verify that there is no statement that may be terminate |
1088 | execution in a way not visible to CFG. */ |
1089 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); |
1090 | !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1091 | if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi))) |
1092 | precise = false; |
1093 | } |
1094 | free (ptr: bbs); |
1095 | } |
1096 | if (precise |
1097 | && get_max_loop_iterations_int (loop) == 1) |
1098 | { |
1099 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1100 | fprintf (stream: dump_file, format: "Loop %d no longer loops.\n" , loop->num); |
1101 | scale_loop_profile (loop, profile_probability::always (), 0); |
1102 | loops_to_unloop.safe_push (obj: loop); |
1103 | loops_to_unloop_nunroll.safe_push (obj: 0); |
1104 | } |
1105 | else if (precise) |
1106 | { |
1107 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1108 | fprintf (stream: dump_file, |
1109 | format: "Peeled all exits:" |
1110 | " decreased number of iterations of loop %d by 1.\n" , |
1111 | loop->num); |
1112 | adjust_loop_info_after_peeling (loop, npeel: 1, precise: true); |
1113 | } |
1114 | else if (exit_count >= entry_count.apply_scale (num: 9, den: 10)) |
1115 | { |
1116 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1117 | fprintf (stream: dump_file, |
1118 | format: "Peeled likely exits: likely decreased number " |
1119 | "of iterations of loop %d by 1.\n" , loop->num); |
1120 | adjust_loop_info_after_peeling (loop, npeel: 1, precise: false); |
1121 | } |
1122 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
1123 | fprintf (stream: dump_file, |
1124 | format: "Not decreased number" |
1125 | " of iterations of loop %d; likely exits remains.\n" , |
1126 | loop->num); |
1127 | |
1128 | changed = true; |
1129 | } |
1130 | |
1131 | if (changed) |
1132 | { |
1133 | update_ssa (TODO_update_ssa); |
1134 | /* After updating SSA form perform CSE on the loop header |
1135 | copies. This is esp. required for the pass before |
1136 | vectorization since nothing cleans up copied exit tests |
1137 | that can now be simplified. CSE from the entry of the |
1138 | region we copied till all loop exit blocks but not |
1139 | entering the loop itself. */ |
1140 | for (unsigned i = 0; i < copied.length (); ++i) |
1141 | { |
1142 | edge entry = copied[i].first; |
1143 | loop_p loop = copied[i].second; |
1144 | auto_vec<edge> exit_edges = get_loop_exit_edges (loop); |
1145 | bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1146 | for (unsigned j = 0; j < exit_edges.length (); ++j) |
1147 | bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); |
1148 | bitmap_set_bit (exit_bbs, loop->header->index); |
1149 | do_rpo_vn (cfun, entry, exit_bbs); |
1150 | BITMAP_FREE (exit_bbs); |
1151 | } |
1152 | } |
1153 | if (!loops_to_unloop.is_empty ()) |
1154 | { |
1155 | bool irred_invalidated; |
1156 | auto_bitmap lc_invalidated; |
1157 | auto_vec<edge> edges_to_remove; |
1158 | unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove, |
1159 | loop_closed_ssa_invalidated: lc_invalidated, irred_invalidated: &irred_invalidated); |
1160 | if (loops_state_satisfies_p (fn: fun, flags: LOOP_CLOSED_SSA) |
1161 | && !bitmap_empty_p (map: lc_invalidated)) |
1162 | rewrite_into_loop_closed_ssa (NULL, 0); |
1163 | changed = true; |
1164 | } |
1165 | free (ptr: bbs); |
1166 | free (ptr: copied_bbs); |
1167 | |
1168 | return changed ? TODO_cleanup_cfg : 0; |
1169 | } |
1170 | |
1171 | /* Initialize the loop structures we need, and finalize after. */ |
1172 | |
1173 | unsigned int |
1174 | pass_ch::execute (function *fun) |
1175 | { |
1176 | loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
1177 | scev_initialize (); |
1178 | |
1179 | unsigned int res = copy_headers (fun); |
1180 | |
1181 | scev_finalize (); |
1182 | loop_optimizer_finalize (); |
1183 | return res; |
1184 | } |
1185 | |
1186 | /* Assume an earlier phase has already initialized all the loop structures that |
1187 | we need here (and perhaps others too), and that these will be finalized by |
1188 | a later phase. */ |
1189 | |
1190 | unsigned int |
1191 | pass_ch_vect::execute (function *fun) |
1192 | { |
1193 | return copy_headers (fun); |
1194 | } |
1195 | |
1196 | /* Apply header copying according to a very simple test of do-while shape. */ |
1197 | |
1198 | bool |
1199 | pass_ch::process_loop_p (class loop *) |
1200 | { |
1201 | return true; |
1202 | } |
1203 | |
1204 | /* Apply header-copying to loops where we might enable vectorization. */ |
1205 | |
1206 | bool |
1207 | pass_ch_vect::process_loop_p (class loop *loop) |
1208 | { |
1209 | if (!flag_tree_loop_vectorize && !loop->force_vectorize) |
1210 | return false; |
1211 | |
1212 | if (loop->dont_vectorize) |
1213 | return false; |
1214 | |
1215 | /* The vectorizer won't handle anything with multiple exits, so skip. */ |
1216 | edge exit = single_exit (loop); |
1217 | if (!exit) |
1218 | return false; |
1219 | |
1220 | if (!do_while_loop_p (loop)) |
1221 | return true; |
1222 | |
1223 | return false; |
1224 | } |
1225 | |
1226 | } // anon namespace |
1227 | |
1228 | gimple_opt_pass * |
1229 | make_pass_ch_vect (gcc::context *ctxt) |
1230 | { |
1231 | return new pass_ch_vect (ctxt); |
1232 | } |
1233 | |
1234 | gimple_opt_pass * |
1235 | make_pass_ch (gcc::context *ctxt) |
1236 | { |
1237 | return new pass_ch (ctxt); |
1238 | } |
1239 | |