1 | /* Thread edges through blocks and update the control flow and SSA graphs. |
2 | Copyright (C) 2004-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "fold-const.h" |
30 | #include "cfganal.h" |
31 | #include "gimple-iterator.h" |
32 | #include "tree-ssa.h" |
33 | #include "tree-ssa-threadupdate.h" |
34 | #include "cfgloop.h" |
35 | #include "dbgcnt.h" |
36 | #include "tree-cfg.h" |
37 | #include "tree-vectorizer.h" |
38 | |
39 | /* Given a block B, update the CFG and SSA graph to reflect redirecting |
40 | one or more in-edges to B to instead reach the destination of an |
41 | out-edge from B while preserving any side effects in B. |
42 | |
43 | i.e., given A->B and B->C, change A->B to be A->C yet still preserve the |
44 | side effects of executing B. |
45 | |
46 | 1. Make a copy of B (including its outgoing edges and statements). Call |
47 | the copy B'. Note B' has no incoming edges or PHIs at this time. |
48 | |
49 | 2. Remove the control statement at the end of B' and all outgoing edges |
50 | except B'->C. |
51 | |
52 | 3. Add a new argument to each PHI in C with the same value as the existing |
53 | argument associated with edge B->C. Associate the new PHI arguments |
54 | with the edge B'->C. |
55 | |
56 | 4. For each PHI in B, find or create a PHI in B' with an identical |
57 | PHI_RESULT. Add an argument to the PHI in B' which has the same |
58 | value as the PHI in B associated with the edge A->B. Associate |
59 | the new argument in the PHI in B' with the edge A->B. |
60 | |
61 | 5. Change the edge A->B to A->B'. |
62 | |
63 | 5a. This automatically deletes any PHI arguments associated with the |
64 | edge A->B in B. |
65 | |
66 | 5b. This automatically associates each new argument added in step 4 |
67 | with the edge A->B'. |
68 | |
69 | 6. Repeat for other incoming edges into B. |
70 | |
71 | 7. Put the duplicated resources in B and all the B' blocks into SSA form. |
72 | |
73 | Note that block duplication can be minimized by first collecting the |
74 | set of unique destination blocks that the incoming edges should |
75 | be threaded to. |
76 | |
77 | We reduce the number of edges and statements we create by not copying all |
78 | the outgoing edges and the control statement in step #1. We instead create |
79 | a template block without the outgoing edges and duplicate the template. |
80 | |
81 | Another case this code handles is threading through a "joiner" block. In |
82 | this case, we do not know the destination of the joiner block, but one |
83 | of the outgoing edges from the joiner block leads to a threadable path. This |
84 | case largely works as outlined above, except the duplicate of the joiner |
85 | block still contains a full set of outgoing edges and its control statement. |
86 | We just redirect one of its outgoing edges to our jump threading path. */ |
87 | |
88 | |
89 | /* Steps #5 and #6 of the above algorithm are best implemented by walking |
90 | all the incoming edges which thread to the same destination edge at |
91 | the same time. That avoids lots of table lookups to get information |
92 | for the destination edge. |
93 | |
94 | To realize that implementation we create a list of incoming edges |
95 | which thread to the same outgoing edge. Thus to implement steps |
96 | #5 and #6 we traverse our hash table of outgoing edge information. |
97 | For each entry we walk the list of incoming edges which thread to |
98 | the current outgoing edge. */ |
99 | |
100 | struct el |
101 | { |
102 | edge e; |
103 | struct el *next; |
104 | }; |
105 | |
106 | /* Main data structure recording information regarding B's duplicate |
107 | blocks. */ |
108 | |
109 | /* We need to efficiently record the unique thread destinations of this |
110 | block and specific information associated with those destinations. We |
111 | may have many incoming edges threaded to the same outgoing edge. This |
112 | can be naturally implemented with a hash table. */ |
113 | |
114 | struct redirection_data : free_ptr_hash<redirection_data> |
115 | { |
116 | /* We support wiring up two block duplicates in a jump threading path. |
117 | |
118 | One is a normal block copy where we remove the control statement |
119 | and wire up its single remaining outgoing edge to the thread path. |
120 | |
121 | The other is a joiner block where we leave the control statement |
122 | in place, but wire one of the outgoing edges to a thread path. |
123 | |
124 | In theory we could have multiple block duplicates in a jump |
125 | threading path, but I haven't tried that. |
126 | |
127 | The duplicate blocks appear in this array in the same order in |
128 | which they appear in the jump thread path. */ |
129 | basic_block dup_blocks[2]; |
130 | |
131 | /* The jump threading path. */ |
132 | vec<jump_thread_edge *> *path; |
133 | |
134 | /* A list of incoming edges which we want to thread to the |
135 | same path. */ |
136 | struct el *incoming_edges; |
137 | |
138 | /* hash_table support. */ |
139 | static inline hashval_t hash (const redirection_data *); |
140 | static inline int equal (const redirection_data *, const redirection_data *); |
141 | }; |
142 | |
143 | /* Dump a jump threading path, including annotations about each |
144 | edge in the path. */ |
145 | |
146 | static void |
147 | dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path, |
148 | bool registering) |
149 | { |
150 | fprintf (dump_file, |
151 | " %s%s jump thread: (%d, %d) incoming edge; " , |
152 | (registering ? "Registering" : "Cancelling" ), |
153 | (path[0]->type == EDGE_FSM_THREAD ? " FSM" : "" ), |
154 | path[0]->e->src->index, path[0]->e->dest->index); |
155 | |
156 | for (unsigned int i = 1; i < path.length (); i++) |
157 | { |
158 | /* We can get paths with a NULL edge when the final destination |
159 | of a jump thread turns out to be a constant address. We dump |
160 | those paths when debugging, so we have to be prepared for that |
161 | possibility here. */ |
162 | if (path[i]->e == NULL) |
163 | continue; |
164 | |
165 | if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
166 | fprintf (dump_file, " (%d, %d) joiner; " , |
167 | path[i]->e->src->index, path[i]->e->dest->index); |
168 | if (path[i]->type == EDGE_COPY_SRC_BLOCK) |
169 | fprintf (dump_file, " (%d, %d) normal;" , |
170 | path[i]->e->src->index, path[i]->e->dest->index); |
171 | if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK) |
172 | fprintf (dump_file, " (%d, %d) nocopy;" , |
173 | path[i]->e->src->index, path[i]->e->dest->index); |
174 | if (path[0]->type == EDGE_FSM_THREAD) |
175 | fprintf (dump_file, " (%d, %d) " , |
176 | path[i]->e->src->index, path[i]->e->dest->index); |
177 | } |
178 | fputc ('\n', dump_file); |
179 | } |
180 | |
181 | /* Simple hashing function. For any given incoming edge E, we're going |
182 | to be most concerned with the final destination of its jump thread |
183 | path. So hash on the block index of the final edge in the path. */ |
184 | |
185 | inline hashval_t |
186 | redirection_data::hash (const redirection_data *p) |
187 | { |
188 | vec<jump_thread_edge *> *path = p->path; |
189 | return path->last ()->e->dest->index; |
190 | } |
191 | |
192 | /* Given two hash table entries, return true if they have the same |
193 | jump threading path. */ |
194 | inline int |
195 | redirection_data::equal (const redirection_data *p1, const redirection_data *p2) |
196 | { |
197 | vec<jump_thread_edge *> *path1 = p1->path; |
198 | vec<jump_thread_edge *> *path2 = p2->path; |
199 | |
200 | if (path1->length () != path2->length ()) |
201 | return false; |
202 | |
203 | for (unsigned int i = 1; i < path1->length (); i++) |
204 | { |
205 | if ((*path1)[i]->type != (*path2)[i]->type |
206 | || (*path1)[i]->e != (*path2)[i]->e) |
207 | return false; |
208 | } |
209 | |
210 | return true; |
211 | } |
212 | |
213 | /* Rather than search all the edges in jump thread paths each time |
214 | DOM is able to simply if control statement, we build a hash table |
215 | with the deleted edges. We only care about the address of the edge, |
216 | not its contents. */ |
217 | struct removed_edges : nofree_ptr_hash<edge_def> |
218 | { |
219 | static hashval_t hash (edge e) { return htab_hash_pointer (e); } |
220 | static bool equal (edge e1, edge e2) { return e1 == e2; } |
221 | }; |
222 | |
223 | static hash_table<removed_edges> *removed_edges; |
224 | |
225 | /* Data structure of information to pass to hash table traversal routines. */ |
226 | struct ssa_local_info_t |
227 | { |
228 | /* The current block we are working on. */ |
229 | basic_block bb; |
230 | |
231 | /* We only create a template block for the first duplicated block in a |
232 | jump threading path as we may need many duplicates of that block. |
233 | |
234 | The second duplicate block in a path is specific to that path. Creating |
235 | and sharing a template for that block is considerably more difficult. */ |
236 | basic_block template_block; |
237 | |
238 | /* Blocks duplicated for the thread. */ |
239 | bitmap duplicate_blocks; |
240 | |
241 | /* TRUE if we thread one or more jumps, FALSE otherwise. */ |
242 | bool jumps_threaded; |
243 | |
244 | /* When we have multiple paths through a joiner which reach different |
245 | final destinations, then we may need to correct for potential |
246 | profile insanities. */ |
247 | bool need_profile_correction; |
248 | }; |
249 | |
250 | /* Passes which use the jump threading code register jump threading |
251 | opportunities as they are discovered. We keep the registered |
252 | jump threading opportunities in this vector as edge pairs |
253 | (original_edge, target_edge). */ |
254 | static vec<vec<jump_thread_edge *> *> paths; |
255 | |
256 | /* When we start updating the CFG for threading, data necessary for jump |
257 | threading is attached to the AUX field for the incoming edge. Use these |
258 | macros to access the underlying structure attached to the AUX field. */ |
259 | #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux) |
260 | |
261 | /* Jump threading statistics. */ |
262 | |
263 | struct thread_stats_d |
264 | { |
265 | unsigned long num_threaded_edges; |
266 | }; |
267 | |
268 | struct thread_stats_d thread_stats; |
269 | |
270 | |
271 | /* Remove the last statement in block BB if it is a control statement |
272 | Also remove all outgoing edges except the edge which reaches DEST_BB. |
273 | If DEST_BB is NULL, then remove all outgoing edges. */ |
274 | |
275 | void |
276 | remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
277 | { |
278 | gimple_stmt_iterator gsi; |
279 | edge e; |
280 | edge_iterator ei; |
281 | |
282 | gsi = gsi_last_bb (bb); |
283 | |
284 | /* If the duplicate ends with a control statement, then remove it. |
285 | |
286 | Note that if we are duplicating the template block rather than the |
287 | original basic block, then the duplicate might not have any real |
288 | statements in it. */ |
289 | if (!gsi_end_p (gsi) |
290 | && gsi_stmt (gsi) |
291 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND |
292 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO |
293 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH)) |
294 | gsi_remove (&gsi, true); |
295 | |
296 | for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) |
297 | { |
298 | if (e->dest != dest_bb) |
299 | { |
300 | free_dom_edge_info (e); |
301 | remove_edge (e); |
302 | } |
303 | else |
304 | { |
305 | e->probability = profile_probability::always (); |
306 | ei_next (&ei); |
307 | } |
308 | } |
309 | |
310 | /* If the remaining edge is a loop exit, there must have |
311 | a removed edge that was not a loop exit. |
312 | |
313 | In that case BB and possibly other blocks were previously |
314 | in the loop, but are now outside the loop. Thus, we need |
315 | to update the loop structures. */ |
316 | if (single_succ_p (bb) |
317 | && loop_outer (bb->loop_father) |
318 | && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb))) |
319 | loops_state_set (LOOPS_NEED_FIXUP); |
320 | } |
321 | |
322 | /* Create a duplicate of BB. Record the duplicate block in an array |
323 | indexed by COUNT stored in RD. */ |
324 | |
325 | static void |
326 | create_block_for_threading (basic_block bb, |
327 | struct redirection_data *rd, |
328 | unsigned int count, |
329 | bitmap *duplicate_blocks) |
330 | { |
331 | edge_iterator ei; |
332 | edge e; |
333 | |
334 | /* We can use the generic block duplication code and simply remove |
335 | the stuff we do not need. */ |
336 | rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); |
337 | |
338 | FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) |
339 | e->aux = NULL; |
340 | |
341 | /* Zero out the profile, since the block is unreachable for now. */ |
342 | rd->dup_blocks[count]->count = profile_count::uninitialized (); |
343 | if (duplicate_blocks) |
344 | bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); |
345 | } |
346 | |
347 | /* Main data structure to hold information for duplicates of BB. */ |
348 | |
349 | static hash_table<redirection_data> *redirection_data; |
350 | |
351 | /* Given an outgoing edge E lookup and return its entry in our hash table. |
352 | |
353 | If INSERT is true, then we insert the entry into the hash table if |
354 | it is not already present. INCOMING_EDGE is added to the list of incoming |
355 | edges associated with E in the hash table. */ |
356 | |
357 | static struct redirection_data * |
358 | lookup_redirection_data (edge e, enum insert_option insert) |
359 | { |
360 | struct redirection_data **slot; |
361 | struct redirection_data *elt; |
362 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
363 | |
364 | /* Build a hash table element so we can see if E is already |
365 | in the table. */ |
366 | elt = XNEW (struct redirection_data); |
367 | elt->path = path; |
368 | elt->dup_blocks[0] = NULL; |
369 | elt->dup_blocks[1] = NULL; |
370 | elt->incoming_edges = NULL; |
371 | |
372 | slot = redirection_data->find_slot (elt, insert); |
373 | |
374 | /* This will only happen if INSERT is false and the entry is not |
375 | in the hash table. */ |
376 | if (slot == NULL) |
377 | { |
378 | free (elt); |
379 | return NULL; |
380 | } |
381 | |
382 | /* This will only happen if E was not in the hash table and |
383 | INSERT is true. */ |
384 | if (*slot == NULL) |
385 | { |
386 | *slot = elt; |
387 | elt->incoming_edges = XNEW (struct el); |
388 | elt->incoming_edges->e = e; |
389 | elt->incoming_edges->next = NULL; |
390 | return elt; |
391 | } |
392 | /* E was in the hash table. */ |
393 | else |
394 | { |
395 | /* Free ELT as we do not need it anymore, we will extract the |
396 | relevant entry from the hash table itself. */ |
397 | free (elt); |
398 | |
399 | /* Get the entry stored in the hash table. */ |
400 | elt = *slot; |
401 | |
402 | /* If insertion was requested, then we need to add INCOMING_EDGE |
403 | to the list of incoming edges associated with E. */ |
404 | if (insert) |
405 | { |
406 | struct el *el = XNEW (struct el); |
407 | el->next = elt->incoming_edges; |
408 | el->e = e; |
409 | elt->incoming_edges = el; |
410 | } |
411 | |
412 | return elt; |
413 | } |
414 | } |
415 | |
416 | /* Similar to copy_phi_args, except that the PHI arg exists, it just |
417 | does not have a value associated with it. */ |
418 | |
419 | static void |
420 | copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e) |
421 | { |
422 | int src_idx = src_e->dest_idx; |
423 | int tgt_idx = tgt_e->dest_idx; |
424 | |
425 | /* Iterate over each PHI in e->dest. */ |
426 | for (gphi_iterator gsi = gsi_start_phis (src_e->dest), |
427 | gsi2 = gsi_start_phis (tgt_e->dest); |
428 | !gsi_end_p (gsi); |
429 | gsi_next (&gsi), gsi_next (&gsi2)) |
430 | { |
431 | gphi *src_phi = gsi.phi (); |
432 | gphi *dest_phi = gsi2.phi (); |
433 | tree val = gimple_phi_arg_def (src_phi, src_idx); |
434 | source_location locus = gimple_phi_arg_location (src_phi, src_idx); |
435 | |
436 | SET_PHI_ARG_DEF (dest_phi, tgt_idx, val); |
437 | gimple_phi_arg_set_location (dest_phi, tgt_idx, locus); |
438 | } |
439 | } |
440 | |
441 | /* Given ssa_name DEF, backtrack jump threading PATH from node IDX |
442 | to see if it has constant value in a flow sensitive manner. Set |
443 | LOCUS to location of the constant phi arg and return the value. |
444 | Return DEF directly if either PATH or idx is ZERO. */ |
445 | |
446 | static tree |
447 | get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path, |
448 | basic_block bb, int idx, source_location *locus) |
449 | { |
450 | tree arg; |
451 | gphi *def_phi; |
452 | basic_block def_bb; |
453 | |
454 | if (path == NULL || idx == 0) |
455 | return def; |
456 | |
457 | def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def)); |
458 | if (!def_phi) |
459 | return def; |
460 | |
461 | def_bb = gimple_bb (def_phi); |
462 | /* Don't propagate loop invariants into deeper loops. */ |
463 | if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb)) |
464 | return def; |
465 | |
466 | /* Backtrack jump threading path from IDX to see if def has constant |
467 | value. */ |
468 | for (int j = idx - 1; j >= 0; j--) |
469 | { |
470 | edge e = (*path)[j]->e; |
471 | if (e->dest == def_bb) |
472 | { |
473 | arg = gimple_phi_arg_def (def_phi, e->dest_idx); |
474 | if (is_gimple_min_invariant (arg)) |
475 | { |
476 | *locus = gimple_phi_arg_location (def_phi, e->dest_idx); |
477 | return arg; |
478 | } |
479 | break; |
480 | } |
481 | } |
482 | |
483 | return def; |
484 | } |
485 | |
486 | /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. |
487 | Try to backtrack jump threading PATH from node IDX to see if the arg |
488 | has constant value, copy constant value instead of argument itself |
489 | if yes. */ |
490 | |
491 | static void |
492 | copy_phi_args (basic_block bb, edge src_e, edge tgt_e, |
493 | vec<jump_thread_edge *> *path, int idx) |
494 | { |
495 | gphi_iterator gsi; |
496 | int src_indx = src_e->dest_idx; |
497 | |
498 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
499 | { |
500 | gphi *phi = gsi.phi (); |
501 | tree def = gimple_phi_arg_def (phi, src_indx); |
502 | source_location locus = gimple_phi_arg_location (phi, src_indx); |
503 | |
504 | if (TREE_CODE (def) == SSA_NAME |
505 | && !virtual_operand_p (gimple_phi_result (phi))) |
506 | def = get_value_locus_in_path (def, path, bb, idx, &locus); |
507 | |
508 | add_phi_arg (phi, def, tgt_e, locus); |
509 | } |
510 | } |
511 | |
512 | /* We have recently made a copy of ORIG_BB, including its outgoing |
513 | edges. The copy is NEW_BB. Every PHI node in every direct successor of |
514 | ORIG_BB has a new argument associated with edge from NEW_BB to the |
515 | successor. Initialize the PHI argument so that it is equal to the PHI |
516 | argument associated with the edge from ORIG_BB to the successor. |
517 | PATH and IDX are used to check if the new PHI argument has constant |
518 | value in a flow sensitive manner. */ |
519 | |
520 | static void |
521 | update_destination_phis (basic_block orig_bb, basic_block new_bb, |
522 | vec<jump_thread_edge *> *path, int idx) |
523 | { |
524 | edge_iterator ei; |
525 | edge e; |
526 | |
527 | FOR_EACH_EDGE (e, ei, orig_bb->succs) |
528 | { |
529 | edge e2 = find_edge (new_bb, e->dest); |
530 | copy_phi_args (e->dest, e, e2, path, idx); |
531 | } |
532 | } |
533 | |
534 | /* Given a duplicate block and its single destination (both stored |
535 | in RD). Create an edge between the duplicate and its single |
536 | destination. |
537 | |
538 | Add an additional argument to any PHI nodes at the single |
539 | destination. IDX is the start node in jump threading path |
540 | we start to check to see if the new PHI argument has constant |
541 | value along the jump threading path. */ |
542 | |
543 | static void |
544 | create_edge_and_update_destination_phis (struct redirection_data *rd, |
545 | basic_block bb, int idx) |
546 | { |
547 | edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU); |
548 | |
549 | rescan_loop_exit (e, true, false); |
550 | |
551 | /* We used to copy the thread path here. That was added in 2007 |
552 | and dutifully updated through the representation changes in 2013. |
553 | |
554 | In 2013 we added code to thread from an interior node through |
555 | the backedge to another interior node. That runs after the code |
556 | to thread through loop headers from outside the loop. |
557 | |
558 | The latter may delete edges in the CFG, including those |
559 | which appeared in the jump threading path we copied here. Thus |
560 | we'd end up using a dangling pointer. |
561 | |
562 | After reviewing the 2007/2011 code, I can't see how anything |
563 | depended on copying the AUX field and clearly copying the jump |
564 | threading path is problematical due to embedded edge pointers. |
565 | It has been removed. */ |
566 | e->aux = NULL; |
567 | |
568 | /* If there are any PHI nodes at the destination of the outgoing edge |
569 | from the duplicate block, then we will need to add a new argument |
570 | to them. The argument should have the same value as the argument |
571 | associated with the outgoing edge stored in RD. */ |
572 | copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx); |
573 | } |
574 | |
575 | /* Look through PATH beginning at START and return TRUE if there are |
576 | any additional blocks that need to be duplicated. Otherwise, |
577 | return FALSE. */ |
578 | static bool |
579 | any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, |
580 | unsigned int start) |
581 | { |
582 | for (unsigned int i = start + 1; i < path->length (); i++) |
583 | { |
584 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK |
585 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
586 | return true; |
587 | } |
588 | return false; |
589 | } |
590 | |
591 | |
592 | /* Compute the amount of profile count coming into the jump threading |
593 | path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and |
594 | PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the |
595 | duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to |
596 | identify blocks duplicated for jump threading, which have duplicated |
597 | edges that need to be ignored in the analysis. Return true if path contains |
598 | a joiner, false otherwise. |
599 | |
600 | In the non-joiner case, this is straightforward - all the counts |
601 | flowing into the jump threading path should flow through the duplicated |
602 | block and out of the duplicated path. |
603 | |
604 | In the joiner case, it is very tricky. Some of the counts flowing into |
605 | the original path go offpath at the joiner. The problem is that while |
606 | we know how much total count goes off-path in the original control flow, |
607 | we don't know how many of the counts corresponding to just the jump |
608 | threading path go offpath at the joiner. |
609 | |
610 | For example, assume we have the following control flow and identified |
611 | jump threading paths: |
612 | |
613 | A B C |
614 | \ | / |
615 | Ea \ |Eb / Ec |
616 | \ | / |
617 | v v v |
618 | J <-- Joiner |
619 | / \ |
620 | Eoff/ \Eon |
621 | / \ |
622 | v v |
623 | Soff Son <--- Normal |
624 | /\ |
625 | Ed/ \ Ee |
626 | / \ |
627 | v v |
628 | D E |
629 | |
630 | Jump threading paths: A -> J -> Son -> D (path 1) |
631 | C -> J -> Son -> E (path 2) |
632 | |
633 | Note that the control flow could be more complicated: |
634 | - Each jump threading path may have more than one incoming edge. I.e. A and |
635 | Ea could represent multiple incoming blocks/edges that are included in |
636 | path 1. |
637 | - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either |
638 | before or after the "normal" copy block). These are not duplicated onto |
639 | the jump threading path, as they are single-successor. |
640 | - Any of the blocks along the path may have other incoming edges that |
641 | are not part of any jump threading path, but add profile counts along |
642 | the path. |
643 | |
644 | In the above example, after all jump threading is complete, we will |
645 | end up with the following control flow: |
646 | |
647 | A B C |
648 | | | | |
649 | Ea| |Eb |Ec |
650 | | | | |
651 | v v v |
652 | Ja J Jc |
653 | / \ / \Eon' / \ |
654 | Eona/ \ ---/---\-------- \Eonc |
655 | / \ / / \ \ |
656 | v v v v v |
657 | Sona Soff Son Sonc |
658 | \ /\ / |
659 | \___________ / \ _____/ |
660 | \ / \/ |
661 | vv v |
662 | D E |
663 | |
664 | The main issue to notice here is that when we are processing path 1 |
665 | (A->J->Son->D) we need to figure out the outgoing edge weights to |
666 | the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the |
667 | sum of the incoming weights to D remain Ed. The problem with simply |
668 | assuming that Ja (and Jc when processing path 2) has the same outgoing |
669 | probabilities to its successors as the original block J, is that after |
670 | all paths are processed and other edges/counts removed (e.g. none |
671 | of Ec will reach D after processing path 2), we may end up with not |
672 | enough count flowing along duplicated edge Sona->D. |
673 | |
674 | Therefore, in the case of a joiner, we keep track of all counts |
675 | coming in along the current path, as well as from predecessors not |
676 | on any jump threading path (Eb in the above example). While we |
677 | first assume that the duplicated Eona for Ja->Sona has the same |
678 | probability as the original, we later compensate for other jump |
679 | threading paths that may eliminate edges. We do that by keep track |
680 | of all counts coming into the original path that are not in a jump |
681 | thread (Eb in the above example, but as noted earlier, there could |
682 | be other predecessors incoming to the path at various points, such |
683 | as at Son). Call this cumulative non-path count coming into the path |
684 | before D as Enonpath. We then ensure that the count from Sona->D is as at |
685 | least as big as (Ed - Enonpath), but no bigger than the minimum |
686 | weight along the jump threading path. The probabilities of both the |
687 | original and duplicated joiner block J and Ja will be adjusted |
688 | accordingly after the updates. */ |
689 | |
690 | static bool |
691 | compute_path_counts (struct redirection_data *rd, |
692 | ssa_local_info_t *local_info, |
693 | profile_count *path_in_count_ptr, |
694 | profile_count *path_out_count_ptr) |
695 | { |
696 | edge e = rd->incoming_edges->e; |
697 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
698 | edge elast = path->last ()->e; |
699 | profile_count nonpath_count = profile_count::zero (); |
700 | bool has_joiner = false; |
701 | profile_count path_in_count = profile_count::zero (); |
702 | |
703 | /* Start by accumulating incoming edge counts to the path's first bb |
704 | into a couple buckets: |
705 | path_in_count: total count of incoming edges that flow into the |
706 | current path. |
707 | nonpath_count: total count of incoming edges that are not |
708 | flowing along *any* path. These are the counts |
709 | that will still flow along the original path after |
710 | all path duplication is done by potentially multiple |
711 | calls to this routine. |
712 | (any other incoming edge counts are for a different jump threading |
713 | path that will be handled by a later call to this routine.) |
714 | To make this easier, start by recording all incoming edges that flow into |
715 | the current path in a bitmap. We could add up the path's incoming edge |
716 | counts here, but we still need to walk all the first bb's incoming edges |
717 | below to add up the counts of the other edges not included in this jump |
718 | threading path. */ |
719 | struct el *next, *el; |
720 | auto_bitmap in_edge_srcs; |
721 | for (el = rd->incoming_edges; el; el = next) |
722 | { |
723 | next = el->next; |
724 | bitmap_set_bit (in_edge_srcs, el->e->src->index); |
725 | } |
726 | edge ein; |
727 | edge_iterator ei; |
728 | FOR_EACH_EDGE (ein, ei, e->dest->preds) |
729 | { |
730 | vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein); |
731 | /* Simply check the incoming edge src against the set captured above. */ |
732 | if (ein_path |
733 | && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index)) |
734 | { |
735 | /* It is necessary but not sufficient that the last path edges |
736 | are identical. There may be different paths that share the |
737 | same last path edge in the case where the last edge has a nocopy |
738 | source block. */ |
739 | gcc_assert (ein_path->last ()->e == elast); |
740 | path_in_count += ein->count (); |
741 | } |
742 | else if (!ein_path) |
743 | { |
744 | /* Keep track of the incoming edges that are not on any jump-threading |
745 | path. These counts will still flow out of original path after all |
746 | jump threading is complete. */ |
747 | nonpath_count += ein->count (); |
748 | } |
749 | } |
750 | |
751 | /* Now compute the fraction of the total count coming into the first |
752 | path bb that is from the current threading path. */ |
753 | profile_count total_count = e->dest->count; |
754 | /* Handle incoming profile insanities. */ |
755 | if (total_count < path_in_count) |
756 | path_in_count = total_count; |
757 | profile_probability onpath_scale = path_in_count.probability_in (total_count); |
758 | |
759 | /* Walk the entire path to do some more computation in order to estimate |
760 | how much of the path_in_count will flow out of the duplicated threading |
761 | path. In the non-joiner case this is straightforward (it should be |
762 | the same as path_in_count, although we will handle incoming profile |
763 | insanities by setting it equal to the minimum count along the path). |
764 | |
765 | In the joiner case, we need to estimate how much of the path_in_count |
766 | will stay on the threading path after the joiner's conditional branch. |
767 | We don't really know for sure how much of the counts |
768 | associated with this path go to each successor of the joiner, but we'll |
769 | estimate based on the fraction of the total count coming into the path |
770 | bb was from the threading paths (computed above in onpath_scale). |
771 | Afterwards, we will need to do some fixup to account for other threading |
772 | paths and possible profile insanities. |
773 | |
774 | In order to estimate the joiner case's counts we also need to update |
775 | nonpath_count with any additional counts coming into the path. Other |
776 | blocks along the path may have additional predecessors from outside |
777 | the path. */ |
778 | profile_count path_out_count = path_in_count; |
779 | profile_count min_path_count = path_in_count; |
780 | for (unsigned int i = 1; i < path->length (); i++) |
781 | { |
782 | edge epath = (*path)[i]->e; |
783 | profile_count cur_count = epath->count (); |
784 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
785 | { |
786 | has_joiner = true; |
787 | cur_count = cur_count.apply_probability (onpath_scale); |
788 | } |
789 | /* In the joiner case we need to update nonpath_count for any edges |
790 | coming into the path that will contribute to the count flowing |
791 | into the path successor. */ |
792 | if (has_joiner && epath != elast) |
793 | { |
794 | /* Look for other incoming edges after joiner. */ |
795 | FOR_EACH_EDGE (ein, ei, epath->dest->preds) |
796 | { |
797 | if (ein != epath |
798 | /* Ignore in edges from blocks we have duplicated for a |
799 | threading path, which have duplicated edge counts until |
800 | they are redirected by an invocation of this routine. */ |
801 | && !bitmap_bit_p (local_info->duplicate_blocks, |
802 | ein->src->index)) |
803 | nonpath_count += ein->count (); |
804 | } |
805 | } |
806 | if (cur_count < path_out_count) |
807 | path_out_count = cur_count; |
808 | if (epath->count () < min_path_count) |
809 | min_path_count = epath->count (); |
810 | } |
811 | |
812 | /* We computed path_out_count above assuming that this path targeted |
813 | the joiner's on-path successor with the same likelihood as it |
814 | reached the joiner. However, other thread paths through the joiner |
815 | may take a different path through the normal copy source block |
816 | (i.e. they have a different elast), meaning that they do not |
817 | contribute any counts to this path's elast. As a result, it may |
818 | turn out that this path must have more count flowing to the on-path |
819 | successor of the joiner. Essentially, all of this path's elast |
820 | count must be contributed by this path and any nonpath counts |
821 | (since any path through the joiner with a different elast will not |
822 | include a copy of this elast in its duplicated path). |
823 | So ensure that this path's path_out_count is at least the |
824 | difference between elast->count () and nonpath_count. Otherwise the edge |
825 | counts after threading will not be sane. */ |
826 | if (local_info->need_profile_correction |
827 | && has_joiner && path_out_count < elast->count () - nonpath_count) |
828 | { |
829 | path_out_count = elast->count () - nonpath_count; |
830 | /* But neither can we go above the minimum count along the path |
831 | we are duplicating. This can be an issue due to profile |
832 | insanities coming in to this pass. */ |
833 | if (path_out_count > min_path_count) |
834 | path_out_count = min_path_count; |
835 | } |
836 | |
837 | *path_in_count_ptr = path_in_count; |
838 | *path_out_count_ptr = path_out_count; |
839 | return has_joiner; |
840 | } |
841 | |
842 | |
843 | /* Update the counts and frequencies for both an original path |
844 | edge EPATH and its duplicate EDUP. The duplicate source block |
845 | will get a count of PATH_IN_COUNT and PATH_IN_FREQ, |
846 | and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ |
847 | static void |
848 | update_profile (edge epath, edge edup, profile_count path_in_count, |
849 | profile_count path_out_count) |
850 | { |
851 | |
852 | /* First update the duplicated block's count. */ |
853 | if (edup) |
854 | { |
855 | basic_block dup_block = edup->src; |
856 | |
857 | /* Edup's count is reduced by path_out_count. We need to redistribute |
858 | probabilities to the remaining edges. */ |
859 | |
860 | edge esucc; |
861 | edge_iterator ei; |
862 | profile_probability edup_prob |
863 | = path_out_count.probability_in (path_in_count); |
864 | |
865 | /* Either scale up or down the remaining edges. |
866 | probabilities are always in range <0,1> and thus we can't do |
867 | both by same loop. */ |
868 | if (edup->probability > edup_prob) |
869 | { |
870 | profile_probability rev_scale |
871 | = (profile_probability::always () - edup->probability) |
872 | / (profile_probability::always () - edup_prob); |
873 | FOR_EACH_EDGE (esucc, ei, dup_block->succs) |
874 | if (esucc != edup) |
875 | esucc->probability /= rev_scale; |
876 | } |
877 | else if (edup->probability < edup_prob) |
878 | { |
879 | profile_probability scale |
880 | = (profile_probability::always () - edup_prob) |
881 | / (profile_probability::always () - edup->probability); |
882 | FOR_EACH_EDGE (esucc, ei, dup_block->succs) |
883 | if (esucc != edup) |
884 | esucc->probability *= scale; |
885 | } |
886 | if (edup_prob.initialized_p ()) |
887 | edup->probability = edup_prob; |
888 | |
889 | gcc_assert (!dup_block->count.initialized_p ()); |
890 | dup_block->count = path_in_count; |
891 | } |
892 | |
893 | if (path_in_count == profile_count::zero ()) |
894 | return; |
895 | |
896 | profile_count final_count = epath->count () - path_out_count; |
897 | |
898 | /* Now update the original block's count in the |
899 | opposite manner - remove the counts/freq that will flow |
900 | into the duplicated block. Handle underflow due to precision/ |
901 | rounding issues. */ |
902 | epath->src->count -= path_in_count; |
903 | |
904 | /* Next update this path edge's original and duplicated counts. We know |
905 | that the duplicated path will have path_out_count flowing |
906 | out of it (in the joiner case this is the count along the duplicated path |
907 | out of the duplicated joiner). This count can then be removed from the |
908 | original path edge. */ |
909 | |
910 | edge esucc; |
911 | edge_iterator ei; |
912 | profile_probability epath_prob = final_count.probability_in (epath->src->count); |
913 | |
914 | if (epath->probability > epath_prob) |
915 | { |
916 | profile_probability rev_scale |
917 | = (profile_probability::always () - epath->probability) |
918 | / (profile_probability::always () - epath_prob); |
919 | FOR_EACH_EDGE (esucc, ei, epath->src->succs) |
920 | if (esucc != epath) |
921 | esucc->probability /= rev_scale; |
922 | } |
923 | else if (epath->probability < epath_prob) |
924 | { |
925 | profile_probability scale |
926 | = (profile_probability::always () - epath_prob) |
927 | / (profile_probability::always () - epath->probability); |
928 | FOR_EACH_EDGE (esucc, ei, epath->src->succs) |
929 | if (esucc != epath) |
930 | esucc->probability *= scale; |
931 | } |
932 | if (epath_prob.initialized_p ()) |
933 | epath->probability = epath_prob; |
934 | } |
935 | |
936 | /* Wire up the outgoing edges from the duplicate blocks and |
937 | update any PHIs as needed. Also update the profile counts |
938 | on the original and duplicate blocks and edges. */ |
939 | void |
940 | ssa_fix_duplicate_block_edges (struct redirection_data *rd, |
941 | ssa_local_info_t *local_info) |
942 | { |
943 | bool multi_incomings = (rd->incoming_edges->next != NULL); |
944 | edge e = rd->incoming_edges->e; |
945 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
946 | edge elast = path->last ()->e; |
947 | profile_count path_in_count = profile_count::zero (); |
948 | profile_count path_out_count = profile_count::zero (); |
949 | |
950 | /* First determine how much profile count to move from original |
951 | path to the duplicate path. This is tricky in the presence of |
952 | a joiner (see comments for compute_path_counts), where some portion |
953 | of the path's counts will flow off-path from the joiner. In the |
954 | non-joiner case the path_in_count and path_out_count should be the |
955 | same. */ |
956 | bool has_joiner = compute_path_counts (rd, local_info, |
957 | &path_in_count, &path_out_count); |
958 | |
959 | for (unsigned int count = 0, i = 1; i < path->length (); i++) |
960 | { |
961 | edge epath = (*path)[i]->e; |
962 | |
963 | /* If we were threading through an joiner block, then we want |
964 | to keep its control statement and redirect an outgoing edge. |
965 | Else we want to remove the control statement & edges, then create |
966 | a new outgoing edge. In both cases we may need to update PHIs. */ |
967 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
968 | { |
969 | edge victim; |
970 | edge e2; |
971 | |
972 | gcc_assert (has_joiner); |
973 | |
974 | /* This updates the PHIs at the destination of the duplicate |
975 | block. Pass 0 instead of i if we are threading a path which |
976 | has multiple incoming edges. */ |
977 | update_destination_phis (local_info->bb, rd->dup_blocks[count], |
978 | path, multi_incomings ? 0 : i); |
979 | |
980 | /* Find the edge from the duplicate block to the block we're |
981 | threading through. That's the edge we want to redirect. */ |
982 | victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); |
983 | |
984 | /* If there are no remaining blocks on the path to duplicate, |
985 | then redirect VICTIM to the final destination of the jump |
986 | threading path. */ |
987 | if (!any_remaining_duplicated_blocks (path, i)) |
988 | { |
989 | e2 = redirect_edge_and_branch (victim, elast->dest); |
990 | /* If we redirected the edge, then we need to copy PHI arguments |
991 | at the target. If the edge already existed (e2 != victim |
992 | case), then the PHIs in the target already have the correct |
993 | arguments. */ |
994 | if (e2 == victim) |
995 | copy_phi_args (e2->dest, elast, e2, |
996 | path, multi_incomings ? 0 : i); |
997 | } |
998 | else |
999 | { |
1000 | /* Redirect VICTIM to the next duplicated block in the path. */ |
1001 | e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); |
1002 | |
1003 | /* We need to update the PHIs in the next duplicated block. We |
1004 | want the new PHI args to have the same value as they had |
1005 | in the source of the next duplicate block. |
1006 | |
1007 | Thus, we need to know which edge we traversed into the |
1008 | source of the duplicate. Furthermore, we may have |
1009 | traversed many edges to reach the source of the duplicate. |
1010 | |
1011 | Walk through the path starting at element I until we |
1012 | hit an edge marked with EDGE_COPY_SRC_BLOCK. We want |
1013 | the edge from the prior element. */ |
1014 | for (unsigned int j = i + 1; j < path->length (); j++) |
1015 | { |
1016 | if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) |
1017 | { |
1018 | copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); |
1019 | break; |
1020 | } |
1021 | } |
1022 | } |
1023 | |
1024 | /* Update the counts of both the original block |
1025 | and path edge, and the duplicates. The path duplicate's |
1026 | incoming count are the totals for all edges |
1027 | incoming to this jump threading path computed earlier. |
1028 | And we know that the duplicated path will have path_out_count |
1029 | flowing out of it (i.e. along the duplicated path out of the |
1030 | duplicated joiner). */ |
1031 | update_profile (epath, e2, path_in_count, path_out_count); |
1032 | } |
1033 | else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
1034 | { |
1035 | remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL); |
1036 | create_edge_and_update_destination_phis (rd, rd->dup_blocks[count], |
1037 | multi_incomings ? 0 : i); |
1038 | if (count == 1) |
1039 | single_succ_edge (rd->dup_blocks[1])->aux = NULL; |
1040 | |
1041 | /* Update the counts of both the original block |
1042 | and path edge, and the duplicates. Since we are now after |
1043 | any joiner that may have existed on the path, the count |
1044 | flowing along the duplicated threaded path is path_out_count. |
1045 | If we didn't have a joiner, then cur_path_freq was the sum |
1046 | of the total frequencies along all incoming edges to the |
1047 | thread path (path_in_freq). If we had a joiner, it would have |
1048 | been updated at the end of that handling to the edge frequency |
1049 | along the duplicated joiner path edge. */ |
1050 | update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), |
1051 | path_out_count, path_out_count); |
1052 | } |
1053 | else |
1054 | { |
1055 | /* No copy case. In this case we don't have an equivalent block |
1056 | on the duplicated thread path to update, but we do need |
1057 | to remove the portion of the counts/freqs that were moved |
1058 | to the duplicated path from the counts/freqs flowing through |
1059 | this block on the original path. Since all the no-copy edges |
1060 | are after any joiner, the removed count is the same as |
1061 | path_out_count. |
1062 | |
1063 | If we didn't have a joiner, then cur_path_freq was the sum |
1064 | of the total frequencies along all incoming edges to the |
1065 | thread path (path_in_freq). If we had a joiner, it would have |
1066 | been updated at the end of that handling to the edge frequency |
1067 | along the duplicated joiner path edge. */ |
1068 | update_profile (epath, NULL, path_out_count, path_out_count); |
1069 | } |
1070 | |
1071 | /* Increment the index into the duplicated path when we processed |
1072 | a duplicated block. */ |
1073 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK |
1074 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
1075 | { |
1076 | count++; |
1077 | } |
1078 | } |
1079 | } |
1080 | |
1081 | /* Hash table traversal callback routine to create duplicate blocks. */ |
1082 | |
1083 | int |
1084 | ssa_create_duplicates (struct redirection_data **slot, |
1085 | ssa_local_info_t *local_info) |
1086 | { |
1087 | struct redirection_data *rd = *slot; |
1088 | |
1089 | /* The second duplicated block in a jump threading path is specific |
1090 | to the path. So it gets stored in RD rather than in LOCAL_DATA. |
1091 | |
1092 | Each time we're called, we have to look through the path and see |
1093 | if a second block needs to be duplicated. |
1094 | |
1095 | Note the search starts with the third edge on the path. The first |
1096 | edge is the incoming edge, the second edge always has its source |
1097 | duplicated. Thus we start our search with the third edge. */ |
1098 | vec<jump_thread_edge *> *path = rd->path; |
1099 | for (unsigned int i = 2; i < path->length (); i++) |
1100 | { |
1101 | if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK |
1102 | || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1103 | { |
1104 | create_block_for_threading ((*path)[i]->e->src, rd, 1, |
1105 | &local_info->duplicate_blocks); |
1106 | break; |
1107 | } |
1108 | } |
1109 | |
1110 | /* Create a template block if we have not done so already. Otherwise |
1111 | use the template to create a new block. */ |
1112 | if (local_info->template_block == NULL) |
1113 | { |
1114 | create_block_for_threading ((*path)[1]->e->src, rd, 0, |
1115 | &local_info->duplicate_blocks); |
1116 | local_info->template_block = rd->dup_blocks[0]; |
1117 | |
1118 | /* We do not create any outgoing edges for the template. We will |
1119 | take care of that in a later traversal. That way we do not |
1120 | create edges that are going to just be deleted. */ |
1121 | } |
1122 | else |
1123 | { |
1124 | create_block_for_threading (local_info->template_block, rd, 0, |
1125 | &local_info->duplicate_blocks); |
1126 | |
1127 | /* Go ahead and wire up outgoing edges and update PHIs for the duplicate |
1128 | block. */ |
1129 | ssa_fix_duplicate_block_edges (rd, local_info); |
1130 | } |
1131 | |
1132 | /* Keep walking the hash table. */ |
1133 | return 1; |
1134 | } |
1135 | |
1136 | /* We did not create any outgoing edges for the template block during |
1137 | block creation. This hash table traversal callback creates the |
1138 | outgoing edge for the template block. */ |
1139 | |
1140 | inline int |
1141 | ssa_fixup_template_block (struct redirection_data **slot, |
1142 | ssa_local_info_t *local_info) |
1143 | { |
1144 | struct redirection_data *rd = *slot; |
1145 | |
1146 | /* If this is the template block halt the traversal after updating |
1147 | it appropriately. |
1148 | |
1149 | If we were threading through an joiner block, then we want |
1150 | to keep its control statement and redirect an outgoing edge. |
1151 | Else we want to remove the control statement & edges, then create |
1152 | a new outgoing edge. In both cases we may need to update PHIs. */ |
1153 | if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) |
1154 | { |
1155 | ssa_fix_duplicate_block_edges (rd, local_info); |
1156 | return 0; |
1157 | } |
1158 | |
1159 | return 1; |
1160 | } |
1161 | |
1162 | /* Hash table traversal callback to redirect each incoming edge |
1163 | associated with this hash table element to its new destination. */ |
1164 | |
1165 | int |
1166 | ssa_redirect_edges (struct redirection_data **slot, |
1167 | ssa_local_info_t *local_info) |
1168 | { |
1169 | struct redirection_data *rd = *slot; |
1170 | struct el *next, *el; |
1171 | |
1172 | /* Walk over all the incoming edges associated with this hash table |
1173 | entry. */ |
1174 | for (el = rd->incoming_edges; el; el = next) |
1175 | { |
1176 | edge e = el->e; |
1177 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1178 | |
1179 | /* Go ahead and free this element from the list. Doing this now |
1180 | avoids the need for another list walk when we destroy the hash |
1181 | table. */ |
1182 | next = el->next; |
1183 | free (el); |
1184 | |
1185 | thread_stats.num_threaded_edges++; |
1186 | |
1187 | if (rd->dup_blocks[0]) |
1188 | { |
1189 | edge e2; |
1190 | |
1191 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1192 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n" , |
1193 | e->src->index, e->dest->index, rd->dup_blocks[0]->index); |
1194 | |
1195 | /* Redirect the incoming edge (possibly to the joiner block) to the |
1196 | appropriate duplicate block. */ |
1197 | e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); |
1198 | gcc_assert (e == e2); |
1199 | flush_pending_stmts (e2); |
1200 | } |
1201 | |
1202 | /* Go ahead and clear E->aux. It's not needed anymore and failure |
1203 | to clear it will cause all kinds of unpleasant problems later. */ |
1204 | delete_jump_thread_path (path); |
1205 | e->aux = NULL; |
1206 | |
1207 | } |
1208 | |
1209 | /* Indicate that we actually threaded one or more jumps. */ |
1210 | if (rd->incoming_edges) |
1211 | local_info->jumps_threaded = true; |
1212 | |
1213 | return 1; |
1214 | } |
1215 | |
1216 | /* Return true if this block has no executable statements other than |
1217 | a simple ctrl flow instruction. When the number of outgoing edges |
1218 | is one, this is equivalent to a "forwarder" block. */ |
1219 | |
1220 | static bool |
1221 | redirection_block_p (basic_block bb) |
1222 | { |
1223 | gimple_stmt_iterator gsi; |
1224 | |
1225 | /* Advance to the first executable statement. */ |
1226 | gsi = gsi_start_bb (bb); |
1227 | while (!gsi_end_p (gsi) |
1228 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL |
1229 | || is_gimple_debug (gsi_stmt (gsi)) |
1230 | || gimple_nop_p (gsi_stmt (gsi)) |
1231 | || gimple_clobber_p (gsi_stmt (gsi)))) |
1232 | gsi_next (&gsi); |
1233 | |
1234 | /* Check if this is an empty block. */ |
1235 | if (gsi_end_p (gsi)) |
1236 | return true; |
1237 | |
1238 | /* Test that we've reached the terminating control statement. */ |
1239 | return gsi_stmt (gsi) |
1240 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND |
1241 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO |
1242 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); |
1243 | } |
1244 | |
1245 | /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB |
1246 | is reached via one or more specific incoming edges, we know which |
1247 | outgoing edge from BB will be traversed. |
1248 | |
1249 | We want to redirect those incoming edges to the target of the |
1250 | appropriate outgoing edge. Doing so avoids a conditional branch |
1251 | and may expose new optimization opportunities. Note that we have |
1252 | to update dominator tree and SSA graph after such changes. |
1253 | |
1254 | The key to keeping the SSA graph update manageable is to duplicate |
1255 | the side effects occurring in BB so that those side effects still |
1256 | occur on the paths which bypass BB after redirecting edges. |
1257 | |
1258 | We accomplish this by creating duplicates of BB and arranging for |
1259 | the duplicates to unconditionally pass control to one specific |
1260 | successor of BB. We then revector the incoming edges into BB to |
1261 | the appropriate duplicate of BB. |
1262 | |
1263 | If NOLOOP_ONLY is true, we only perform the threading as long as it |
1264 | does not affect the structure of the loops in a nontrivial way. |
1265 | |
1266 | If JOINERS is true, then thread through joiner blocks as well. */ |
1267 | |
1268 | static bool |
1269 | thread_block_1 (basic_block bb, bool noloop_only, bool joiners) |
1270 | { |
1271 | /* E is an incoming edge into BB that we may or may not want to |
1272 | redirect to a duplicate of BB. */ |
1273 | edge e, e2; |
1274 | edge_iterator ei; |
1275 | ssa_local_info_t local_info; |
1276 | |
1277 | local_info.duplicate_blocks = BITMAP_ALLOC (NULL); |
1278 | local_info.need_profile_correction = false; |
1279 | |
1280 | /* To avoid scanning a linear array for the element we need we instead |
1281 | use a hash table. For normal code there should be no noticeable |
1282 | difference. However, if we have a block with a large number of |
1283 | incoming and outgoing edges such linear searches can get expensive. */ |
1284 | redirection_data |
1285 | = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs)); |
1286 | |
1287 | /* Record each unique threaded destination into a hash table for |
1288 | efficient lookups. */ |
1289 | edge last = NULL; |
1290 | FOR_EACH_EDGE (e, ei, bb->preds) |
1291 | { |
1292 | if (e->aux == NULL) |
1293 | continue; |
1294 | |
1295 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1296 | |
1297 | if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners) |
1298 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners)) |
1299 | continue; |
1300 | |
1301 | e2 = path->last ()->e; |
1302 | if (!e2 || noloop_only) |
1303 | { |
1304 | /* If NOLOOP_ONLY is true, we only allow threading through the |
1305 | header of a loop to exit edges. */ |
1306 | |
1307 | /* One case occurs when there was loop header buried in a jump |
1308 | threading path that crosses loop boundaries. We do not try |
1309 | and thread this elsewhere, so just cancel the jump threading |
1310 | request by clearing the AUX field now. */ |
1311 | if (bb->loop_father != e2->src->loop_father |
1312 | && !loop_exit_edge_p (e2->src->loop_father, e2)) |
1313 | { |
1314 | /* Since this case is not handled by our special code |
1315 | to thread through a loop header, we must explicitly |
1316 | cancel the threading request here. */ |
1317 | delete_jump_thread_path (path); |
1318 | e->aux = NULL; |
1319 | continue; |
1320 | } |
1321 | |
1322 | /* Another case occurs when trying to thread through our |
1323 | own loop header, possibly from inside the loop. We will |
1324 | thread these later. */ |
1325 | unsigned int i; |
1326 | for (i = 1; i < path->length (); i++) |
1327 | { |
1328 | if ((*path)[i]->e->src == bb->loop_father->header |
1329 | && (!loop_exit_edge_p (bb->loop_father, e2) |
1330 | || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) |
1331 | break; |
1332 | } |
1333 | |
1334 | if (i != path->length ()) |
1335 | continue; |
1336 | } |
1337 | |
1338 | /* Insert the outgoing edge into the hash table if it is not |
1339 | already in the hash table. */ |
1340 | lookup_redirection_data (e, INSERT); |
1341 | |
1342 | /* When we have thread paths through a common joiner with different |
1343 | final destinations, then we may need corrections to deal with |
1344 | profile insanities. See the big comment before compute_path_counts. */ |
1345 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1346 | { |
1347 | if (!last) |
1348 | last = e2; |
1349 | else if (e2 != last) |
1350 | local_info.need_profile_correction = true; |
1351 | } |
1352 | } |
1353 | |
1354 | /* We do not update dominance info. */ |
1355 | free_dominance_info (CDI_DOMINATORS); |
1356 | |
1357 | /* We know we only thread through the loop header to loop exits. |
1358 | Let the basic block duplication hook know we are not creating |
1359 | a multiple entry loop. */ |
1360 | if (noloop_only |
1361 | && bb == bb->loop_father->header) |
1362 | set_loop_copy (bb->loop_father, loop_outer (bb->loop_father)); |
1363 | |
1364 | /* Now create duplicates of BB. |
1365 | |
1366 | Note that for a block with a high outgoing degree we can waste |
1367 | a lot of time and memory creating and destroying useless edges. |
1368 | |
1369 | So we first duplicate BB and remove the control structure at the |
1370 | tail of the duplicate as well as all outgoing edges from the |
1371 | duplicate. We then use that duplicate block as a template for |
1372 | the rest of the duplicates. */ |
1373 | local_info.template_block = NULL; |
1374 | local_info.bb = bb; |
1375 | local_info.jumps_threaded = false; |
1376 | redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates> |
1377 | (&local_info); |
1378 | |
1379 | /* The template does not have an outgoing edge. Create that outgoing |
1380 | edge and update PHI nodes as the edge's target as necessary. |
1381 | |
1382 | We do this after creating all the duplicates to avoid creating |
1383 | unnecessary edges. */ |
1384 | redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block> |
1385 | (&local_info); |
1386 | |
1387 | /* The hash table traversals above created the duplicate blocks (and the |
1388 | statements within the duplicate blocks). This loop creates PHI nodes for |
1389 | the duplicated blocks and redirects the incoming edges into BB to reach |
1390 | the duplicates of BB. */ |
1391 | redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges> |
1392 | (&local_info); |
1393 | |
1394 | /* Done with this block. Clear REDIRECTION_DATA. */ |
1395 | delete redirection_data; |
1396 | redirection_data = NULL; |
1397 | |
1398 | if (noloop_only |
1399 | && bb == bb->loop_father->header) |
1400 | set_loop_copy (bb->loop_father, NULL); |
1401 | |
1402 | BITMAP_FREE (local_info.duplicate_blocks); |
1403 | local_info.duplicate_blocks = NULL; |
1404 | |
1405 | /* Indicate to our caller whether or not any jumps were threaded. */ |
1406 | return local_info.jumps_threaded; |
1407 | } |
1408 | |
1409 | /* Wrapper for thread_block_1 so that we can first handle jump |
1410 | thread paths which do not involve copying joiner blocks, then |
1411 | handle jump thread paths which have joiner blocks. |
1412 | |
1413 | By doing things this way we can be as aggressive as possible and |
1414 | not worry that copying a joiner block will create a jump threading |
1415 | opportunity. */ |
1416 | |
1417 | static bool |
1418 | thread_block (basic_block bb, bool noloop_only) |
1419 | { |
1420 | bool retval; |
1421 | retval = thread_block_1 (bb, noloop_only, false); |
1422 | retval |= thread_block_1 (bb, noloop_only, true); |
1423 | return retval; |
1424 | } |
1425 | |
1426 | /* Callback for dfs_enumerate_from. Returns true if BB is different |
1427 | from STOP and DBDS_CE_STOP. */ |
1428 | |
1429 | static basic_block dbds_ce_stop; |
1430 | static bool |
1431 | dbds_continue_enumeration_p (const_basic_block bb, const void *stop) |
1432 | { |
1433 | return (bb != (const_basic_block) stop |
1434 | && bb != dbds_ce_stop); |
1435 | } |
1436 | |
1437 | /* Evaluates the dominance relationship of latch of the LOOP and BB, and |
1438 | returns the state. */ |
1439 | |
1440 | enum bb_dom_status |
1441 | determine_bb_domination_status (struct loop *loop, basic_block bb) |
1442 | { |
1443 | basic_block *bblocks; |
1444 | unsigned nblocks, i; |
1445 | bool bb_reachable = false; |
1446 | edge_iterator ei; |
1447 | edge e; |
1448 | |
1449 | /* This function assumes BB is a successor of LOOP->header. |
1450 | If that is not the case return DOMST_NONDOMINATING which |
1451 | is always safe. */ |
1452 | { |
1453 | bool ok = false; |
1454 | |
1455 | FOR_EACH_EDGE (e, ei, bb->preds) |
1456 | { |
1457 | if (e->src == loop->header) |
1458 | { |
1459 | ok = true; |
1460 | break; |
1461 | } |
1462 | } |
1463 | |
1464 | if (!ok) |
1465 | return DOMST_NONDOMINATING; |
1466 | } |
1467 | |
1468 | if (bb == loop->latch) |
1469 | return DOMST_DOMINATING; |
1470 | |
1471 | /* Check that BB dominates LOOP->latch, and that it is back-reachable |
1472 | from it. */ |
1473 | |
1474 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); |
1475 | dbds_ce_stop = loop->header; |
1476 | nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, |
1477 | bblocks, loop->num_nodes, bb); |
1478 | for (i = 0; i < nblocks; i++) |
1479 | FOR_EACH_EDGE (e, ei, bblocks[i]->preds) |
1480 | { |
1481 | if (e->src == loop->header) |
1482 | { |
1483 | free (bblocks); |
1484 | return DOMST_NONDOMINATING; |
1485 | } |
1486 | if (e->src == bb) |
1487 | bb_reachable = true; |
1488 | } |
1489 | |
1490 | free (bblocks); |
1491 | return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); |
1492 | } |
1493 | |
1494 | /* Thread jumps through the header of LOOP. Returns true if cfg changes. |
1495 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges |
1496 | to the inside of the loop. */ |
1497 | |
1498 | static bool |
1499 | (struct loop *loop, bool ) |
1500 | { |
1501 | basic_block = loop->header; |
1502 | edge e, tgt_edge, latch = loop_latch_edge (loop); |
1503 | edge_iterator ei; |
1504 | basic_block tgt_bb, atgt_bb; |
1505 | enum bb_dom_status domst; |
1506 | |
1507 | /* We have already threaded through headers to exits, so all the threading |
1508 | requests now are to the inside of the loop. We need to avoid creating |
1509 | irreducible regions (i.e., loops with more than one entry block), and |
1510 | also loop with several latch edges, or new subloops of the loop (although |
1511 | there are cases where it might be appropriate, it is difficult to decide, |
1512 | and doing it wrongly may confuse other optimizers). |
1513 | |
1514 | We could handle more general cases here. However, the intention is to |
1515 | preserve some information about the loop, which is impossible if its |
1516 | structure changes significantly, in a way that is not well understood. |
1517 | Thus we only handle few important special cases, in which also updating |
1518 | of the loop-carried information should be feasible: |
1519 | |
1520 | 1) Propagation of latch edge to a block that dominates the latch block |
1521 | of a loop. This aims to handle the following idiom: |
1522 | |
1523 | first = 1; |
1524 | while (1) |
1525 | { |
1526 | if (first) |
1527 | initialize; |
1528 | first = 0; |
1529 | body; |
1530 | } |
1531 | |
1532 | After threading the latch edge, this becomes |
1533 | |
1534 | first = 1; |
1535 | if (first) |
1536 | initialize; |
1537 | while (1) |
1538 | { |
1539 | first = 0; |
1540 | body; |
1541 | } |
1542 | |
1543 | The original header of the loop is moved out of it, and we may thread |
1544 | the remaining edges through it without further constraints. |
1545 | |
1546 | 2) All entry edges are propagated to a single basic block that dominates |
1547 | the latch block of the loop. This aims to handle the following idiom |
1548 | (normally created for "for" loops): |
1549 | |
1550 | i = 0; |
1551 | while (1) |
1552 | { |
1553 | if (i >= 100) |
1554 | break; |
1555 | body; |
1556 | i++; |
1557 | } |
1558 | |
1559 | This becomes |
1560 | |
1561 | i = 0; |
1562 | while (1) |
1563 | { |
1564 | body; |
1565 | i++; |
1566 | if (i >= 100) |
1567 | break; |
1568 | } |
1569 | */ |
1570 | |
1571 | /* Threading through the header won't improve the code if the header has just |
1572 | one successor. */ |
1573 | if (single_succ_p (header)) |
1574 | goto fail; |
1575 | |
1576 | if (!may_peel_loop_headers && !redirection_block_p (loop->header)) |
1577 | goto fail; |
1578 | else |
1579 | { |
1580 | tgt_bb = NULL; |
1581 | tgt_edge = NULL; |
1582 | FOR_EACH_EDGE (e, ei, header->preds) |
1583 | { |
1584 | if (!e->aux) |
1585 | { |
1586 | if (e == latch) |
1587 | continue; |
1588 | |
1589 | /* If latch is not threaded, and there is a header |
1590 | edge that is not threaded, we would create loop |
1591 | with multiple entries. */ |
1592 | goto fail; |
1593 | } |
1594 | |
1595 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1596 | |
1597 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1598 | goto fail; |
1599 | tgt_edge = (*path)[1]->e; |
1600 | atgt_bb = tgt_edge->dest; |
1601 | if (!tgt_bb) |
1602 | tgt_bb = atgt_bb; |
1603 | /* Two targets of threading would make us create loop |
1604 | with multiple entries. */ |
1605 | else if (tgt_bb != atgt_bb) |
1606 | goto fail; |
1607 | } |
1608 | |
1609 | if (!tgt_bb) |
1610 | { |
1611 | /* There are no threading requests. */ |
1612 | return false; |
1613 | } |
1614 | |
1615 | /* Redirecting to empty loop latch is useless. */ |
1616 | if (tgt_bb == loop->latch |
1617 | && empty_block_p (loop->latch)) |
1618 | goto fail; |
1619 | } |
1620 | |
1621 | /* The target block must dominate the loop latch, otherwise we would be |
1622 | creating a subloop. */ |
1623 | domst = determine_bb_domination_status (loop, tgt_bb); |
1624 | if (domst == DOMST_NONDOMINATING) |
1625 | goto fail; |
1626 | if (domst == DOMST_LOOP_BROKEN) |
1627 | { |
1628 | /* If the loop ceased to exist, mark it as such, and thread through its |
1629 | original header. */ |
1630 | mark_loop_for_removal (loop); |
1631 | return thread_block (header, false); |
1632 | } |
1633 | |
1634 | if (tgt_bb->loop_father->header == tgt_bb) |
1635 | { |
1636 | /* If the target of the threading is a header of a subloop, we need |
1637 | to create a preheader for it, so that the headers of the two loops |
1638 | do not merge. */ |
1639 | if (EDGE_COUNT (tgt_bb->preds) > 2) |
1640 | { |
1641 | tgt_bb = create_preheader (tgt_bb->loop_father, 0); |
1642 | gcc_assert (tgt_bb != NULL); |
1643 | } |
1644 | else |
1645 | tgt_bb = split_edge (tgt_edge); |
1646 | } |
1647 | |
1648 | basic_block ; |
1649 | |
1650 | /* Now consider the case entry edges are redirected to the new entry |
1651 | block. Remember one entry edge, so that we can find the new |
1652 | preheader (its destination after threading). */ |
1653 | FOR_EACH_EDGE (e, ei, header->preds) |
1654 | { |
1655 | if (e->aux) |
1656 | break; |
1657 | } |
1658 | |
1659 | /* The duplicate of the header is the new preheader of the loop. Ensure |
1660 | that it is placed correctly in the loop hierarchy. */ |
1661 | set_loop_copy (loop, loop_outer (loop)); |
1662 | |
1663 | thread_block (header, false); |
1664 | set_loop_copy (loop, NULL); |
1665 | new_preheader = e->dest; |
1666 | |
1667 | /* Create the new latch block. This is always necessary, as the latch |
1668 | must have only a single successor, but the original header had at |
1669 | least two successors. */ |
1670 | loop->latch = NULL; |
1671 | mfb_kj_edge = single_succ_edge (new_preheader); |
1672 | loop->header = mfb_kj_edge->dest; |
1673 | latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); |
1674 | loop->header = latch->dest; |
1675 | loop->latch = latch->src; |
1676 | return true; |
1677 | |
1678 | fail: |
1679 | /* We failed to thread anything. Cancel the requests. */ |
1680 | FOR_EACH_EDGE (e, ei, header->preds) |
1681 | { |
1682 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1683 | |
1684 | if (path) |
1685 | { |
1686 | delete_jump_thread_path (path); |
1687 | e->aux = NULL; |
1688 | } |
1689 | } |
1690 | return false; |
1691 | } |
1692 | |
1693 | /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1694 | PHI arguments associated with those edges are equal or there are no |
1695 | PHI arguments, otherwise return FALSE. */ |
1696 | |
1697 | static bool |
1698 | phi_args_equal_on_edges (edge e1, edge e2) |
1699 | { |
1700 | gphi_iterator gsi; |
1701 | int indx1 = e1->dest_idx; |
1702 | int indx2 = e2->dest_idx; |
1703 | |
1704 | for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi)) |
1705 | { |
1706 | gphi *phi = gsi.phi (); |
1707 | |
1708 | if (!operand_equal_p (gimple_phi_arg_def (phi, indx1), |
1709 | gimple_phi_arg_def (phi, indx2), 0)) |
1710 | return false; |
1711 | } |
1712 | return true; |
1713 | } |
1714 | |
1715 | /* Walk through the registered jump threads and convert them into a |
1716 | form convenient for this pass. |
1717 | |
1718 | Any block which has incoming edges threaded to outgoing edges |
1719 | will have its entry in THREADED_BLOCK set. |
1720 | |
1721 | Any threaded edge will have its new outgoing edge stored in the |
1722 | original edge's AUX field. |
1723 | |
1724 | This form avoids the need to walk all the edges in the CFG to |
1725 | discover blocks which need processing and avoids unnecessary |
1726 | hash table lookups to map from threaded edge to new target. */ |
1727 | |
1728 | static void |
1729 | mark_threaded_blocks (bitmap threaded_blocks) |
1730 | { |
1731 | unsigned int i; |
1732 | bitmap_iterator bi; |
1733 | auto_bitmap tmp; |
1734 | basic_block bb; |
1735 | edge e; |
1736 | edge_iterator ei; |
1737 | |
1738 | /* It is possible to have jump threads in which one is a subpath |
1739 | of the other. ie, (A, B), (B, C), (C, D) where B is a joiner |
1740 | block and (B, C), (C, D) where no joiner block exists. |
1741 | |
1742 | When this occurs ignore the jump thread request with the joiner |
1743 | block. It's totally subsumed by the simpler jump thread request. |
1744 | |
1745 | This results in less block copying, simpler CFGs. More importantly, |
1746 | when we duplicate the joiner block, B, in this case we will create |
1747 | a new threading opportunity that we wouldn't be able to optimize |
1748 | until the next jump threading iteration. |
1749 | |
1750 | So first convert the jump thread requests which do not require a |
1751 | joiner block. */ |
1752 | for (i = 0; i < paths.length (); i++) |
1753 | { |
1754 | vec<jump_thread_edge *> *path = paths[i]; |
1755 | |
1756 | if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) |
1757 | { |
1758 | edge e = (*path)[0]->e; |
1759 | e->aux = (void *)path; |
1760 | bitmap_set_bit (tmp, e->dest->index); |
1761 | } |
1762 | } |
1763 | |
1764 | /* Now iterate again, converting cases where we want to thread |
1765 | through a joiner block, but only if no other edge on the path |
1766 | already has a jump thread attached to it. We do this in two passes, |
1767 | to avoid situations where the order in the paths vec can hide overlapping |
1768 | threads (the path is recorded on the incoming edge, so we would miss |
1769 | cases where the second path starts at a downstream edge on the same |
1770 | path). First record all joiner paths, deleting any in the unexpected |
1771 | case where there is already a path for that incoming edge. */ |
1772 | for (i = 0; i < paths.length ();) |
1773 | { |
1774 | vec<jump_thread_edge *> *path = paths[i]; |
1775 | |
1776 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1777 | { |
1778 | /* Attach the path to the starting edge if none is yet recorded. */ |
1779 | if ((*path)[0]->e->aux == NULL) |
1780 | { |
1781 | (*path)[0]->e->aux = path; |
1782 | i++; |
1783 | } |
1784 | else |
1785 | { |
1786 | paths.unordered_remove (i); |
1787 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1788 | dump_jump_thread_path (dump_file, *path, false); |
1789 | delete_jump_thread_path (path); |
1790 | } |
1791 | } |
1792 | else |
1793 | { |
1794 | i++; |
1795 | } |
1796 | } |
1797 | |
1798 | /* Second, look for paths that have any other jump thread attached to |
1799 | them, and either finish converting them or cancel them. */ |
1800 | for (i = 0; i < paths.length ();) |
1801 | { |
1802 | vec<jump_thread_edge *> *path = paths[i]; |
1803 | edge e = (*path)[0]->e; |
1804 | |
1805 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path) |
1806 | { |
1807 | unsigned int j; |
1808 | for (j = 1; j < path->length (); j++) |
1809 | if ((*path)[j]->e->aux != NULL) |
1810 | break; |
1811 | |
1812 | /* If we iterated through the entire path without exiting the loop, |
1813 | then we are good to go, record it. */ |
1814 | if (j == path->length ()) |
1815 | { |
1816 | bitmap_set_bit (tmp, e->dest->index); |
1817 | i++; |
1818 | } |
1819 | else |
1820 | { |
1821 | e->aux = NULL; |
1822 | paths.unordered_remove (i); |
1823 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1824 | dump_jump_thread_path (dump_file, *path, false); |
1825 | delete_jump_thread_path (path); |
1826 | } |
1827 | } |
1828 | else |
1829 | { |
1830 | i++; |
1831 | } |
1832 | } |
1833 | |
1834 | /* If optimizing for size, only thread through block if we don't have |
1835 | to duplicate it or it's an otherwise empty redirection block. */ |
1836 | if (optimize_function_for_size_p (cfun)) |
1837 | { |
1838 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
1839 | { |
1840 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1841 | if (EDGE_COUNT (bb->preds) > 1 |
1842 | && !redirection_block_p (bb)) |
1843 | { |
1844 | FOR_EACH_EDGE (e, ei, bb->preds) |
1845 | { |
1846 | if (e->aux) |
1847 | { |
1848 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1849 | delete_jump_thread_path (path); |
1850 | e->aux = NULL; |
1851 | } |
1852 | } |
1853 | } |
1854 | else |
1855 | bitmap_set_bit (threaded_blocks, i); |
1856 | } |
1857 | } |
1858 | else |
1859 | bitmap_copy (threaded_blocks, tmp); |
1860 | |
1861 | /* If we have a joiner block (J) which has two successors S1 and S2 and |
1862 | we are threading though S1 and the final destination of the thread |
1863 | is S2, then we must verify that any PHI nodes in S2 have the same |
1864 | PHI arguments for the edge J->S2 and J->S1->...->S2. |
1865 | |
1866 | We used to detect this prior to registering the jump thread, but |
1867 | that prohibits propagation of edge equivalences into non-dominated |
1868 | PHI nodes as the equivalency test might occur before propagation. |
1869 | |
1870 | This must also occur after we truncate any jump threading paths |
1871 | as this scenario may only show up after truncation. |
1872 | |
1873 | This works for now, but will need improvement as part of the FSA |
1874 | optimization. |
1875 | |
1876 | Note since we've moved the thread request data to the edges, |
1877 | we have to iterate on those rather than the threaded_edges vector. */ |
1878 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
1879 | { |
1880 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1881 | FOR_EACH_EDGE (e, ei, bb->preds) |
1882 | { |
1883 | if (e->aux) |
1884 | { |
1885 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1886 | bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK); |
1887 | |
1888 | if (have_joiner) |
1889 | { |
1890 | basic_block joiner = e->dest; |
1891 | edge final_edge = path->last ()->e; |
1892 | basic_block final_dest = final_edge->dest; |
1893 | edge e2 = find_edge (joiner, final_dest); |
1894 | |
1895 | if (e2 && !phi_args_equal_on_edges (e2, final_edge)) |
1896 | { |
1897 | delete_jump_thread_path (path); |
1898 | e->aux = NULL; |
1899 | } |
1900 | } |
1901 | } |
1902 | } |
1903 | } |
1904 | |
1905 | /* Look for jump threading paths which cross multiple loop headers. |
1906 | |
1907 | The code to thread through loop headers will change the CFG in ways |
1908 | that invalidate the cached loop iteration information. So we must |
1909 | detect that case and wipe the cached information. */ |
1910 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
1911 | { |
1912 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1913 | FOR_EACH_EDGE (e, ei, bb->preds) |
1914 | { |
1915 | if (e->aux) |
1916 | { |
1917 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1918 | |
1919 | for (unsigned int i = 0, = 0; |
1920 | i < path->length (); |
1921 | i++) |
1922 | { |
1923 | basic_block dest = (*path)[i]->e->dest; |
1924 | basic_block src = (*path)[i]->e->src; |
1925 | /* If we enter a loop. */ |
1926 | if (flow_loop_nested_p (src->loop_father, dest->loop_father)) |
1927 | ++crossed_headers; |
1928 | /* If we step from a block outside an irreducible region |
1929 | to a block inside an irreducible region, then we have |
1930 | crossed into a loop. */ |
1931 | else if (! (src->flags & BB_IRREDUCIBLE_LOOP) |
1932 | && (dest->flags & BB_IRREDUCIBLE_LOOP)) |
1933 | ++crossed_headers; |
1934 | if (crossed_headers > 1) |
1935 | { |
1936 | vect_free_loop_info_assumptions |
1937 | ((*path)[path->length () - 1]->e->dest->loop_father); |
1938 | break; |
1939 | } |
1940 | } |
1941 | } |
1942 | } |
1943 | } |
1944 | } |
1945 | |
1946 | |
1947 | /* Verify that the REGION is a valid jump thread. A jump thread is a special |
1948 | case of SEME Single Entry Multiple Exits region in which all nodes in the |
1949 | REGION have exactly one incoming edge. The only exception is the first block |
1950 | that may not have been connected to the rest of the cfg yet. */ |
1951 | |
1952 | DEBUG_FUNCTION void |
1953 | verify_jump_thread (basic_block *region, unsigned n_region) |
1954 | { |
1955 | for (unsigned i = 0; i < n_region; i++) |
1956 | gcc_assert (EDGE_COUNT (region[i]->preds) <= 1); |
1957 | } |
1958 | |
1959 | /* Return true when BB is one of the first N items in BBS. */ |
1960 | |
1961 | static inline bool |
1962 | bb_in_bbs (basic_block bb, basic_block *bbs, int n) |
1963 | { |
1964 | for (int i = 0; i < n; i++) |
1965 | if (bb == bbs[i]) |
1966 | return true; |
1967 | |
1968 | return false; |
1969 | } |
1970 | |
1971 | /* Duplicates a jump-thread path of N_REGION basic blocks. |
1972 | The ENTRY edge is redirected to the duplicate of the region. |
1973 | |
1974 | Remove the last conditional statement in the last basic block in the REGION, |
1975 | and create a single fallthru edge pointing to the same destination as the |
1976 | EXIT edge. |
1977 | |
1978 | Returns false if it is unable to copy the region, true otherwise. */ |
1979 | |
1980 | static bool |
1981 | duplicate_thread_path (edge entry, edge exit, basic_block *region, |
1982 | unsigned n_region) |
1983 | { |
1984 | unsigned i; |
1985 | struct loop *loop = entry->dest->loop_father; |
1986 | edge exit_copy; |
1987 | edge redirected; |
1988 | profile_count curr_count; |
1989 | |
1990 | if (!can_copy_bbs_p (region, n_region)) |
1991 | return false; |
1992 | |
1993 | /* Some sanity checking. Note that we do not check for all possible |
1994 | missuses of the functions. I.e. if you ask to copy something weird, |
1995 | it will work, but the state of structures probably will not be |
1996 | correct. */ |
1997 | for (i = 0; i < n_region; i++) |
1998 | { |
1999 | /* We do not handle subloops, i.e. all the blocks must belong to the |
2000 | same loop. */ |
2001 | if (region[i]->loop_father != loop) |
2002 | return false; |
2003 | } |
2004 | |
2005 | initialize_original_copy_tables (); |
2006 | |
2007 | set_loop_copy (loop, loop); |
2008 | |
2009 | basic_block *region_copy = XNEWVEC (basic_block, n_region); |
2010 | copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, |
2011 | split_edge_bb_loc (entry), false); |
2012 | |
2013 | /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The |
2014 | following code ensures that all the edges exiting the jump-thread path are |
2015 | redirected back to the original code: these edges are exceptions |
2016 | invalidating the property that is propagated by executing all the blocks of |
2017 | the jump-thread path in order. */ |
2018 | |
2019 | curr_count = entry->count (); |
2020 | |
2021 | for (i = 0; i < n_region; i++) |
2022 | { |
2023 | edge e; |
2024 | edge_iterator ei; |
2025 | basic_block bb = region_copy[i]; |
2026 | |
2027 | /* Watch inconsistent profile. */ |
2028 | if (curr_count > region[i]->count) |
2029 | curr_count = region[i]->count; |
2030 | /* Scale current BB. */ |
2031 | if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) |
2032 | { |
2033 | /* In the middle of the path we only scale the frequencies. |
2034 | In last BB we need to update probabilities of outgoing edges |
2035 | because we know which one is taken at the threaded path. */ |
2036 | if (i + 1 != n_region) |
2037 | scale_bbs_frequencies_profile_count (region + i, 1, |
2038 | region[i]->count - curr_count, |
2039 | region[i]->count); |
2040 | else |
2041 | update_bb_profile_for_threading (region[i], |
2042 | curr_count, |
2043 | exit); |
2044 | scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, |
2045 | region_copy[i]->count); |
2046 | } |
2047 | |
2048 | if (single_succ_p (bb)) |
2049 | { |
2050 | /* Make sure the successor is the next node in the path. */ |
2051 | gcc_assert (i + 1 == n_region |
2052 | || region_copy[i + 1] == single_succ_edge (bb)->dest); |
2053 | if (i + 1 != n_region) |
2054 | { |
2055 | curr_count = single_succ_edge (bb)->count (); |
2056 | } |
2057 | continue; |
2058 | } |
2059 | |
2060 | /* Special case the last block on the path: make sure that it does not |
2061 | jump back on the copied path, including back to itself. */ |
2062 | if (i + 1 == n_region) |
2063 | { |
2064 | FOR_EACH_EDGE (e, ei, bb->succs) |
2065 | if (bb_in_bbs (e->dest, region_copy, n_region)) |
2066 | { |
2067 | basic_block orig = get_bb_original (e->dest); |
2068 | if (orig) |
2069 | redirect_edge_and_branch_force (e, orig); |
2070 | } |
2071 | continue; |
2072 | } |
2073 | |
2074 | /* Redirect all other edges jumping to non-adjacent blocks back to the |
2075 | original code. */ |
2076 | FOR_EACH_EDGE (e, ei, bb->succs) |
2077 | if (region_copy[i + 1] != e->dest) |
2078 | { |
2079 | basic_block orig = get_bb_original (e->dest); |
2080 | if (orig) |
2081 | redirect_edge_and_branch_force (e, orig); |
2082 | } |
2083 | else |
2084 | { |
2085 | curr_count = e->count (); |
2086 | } |
2087 | } |
2088 | |
2089 | |
2090 | if (flag_checking) |
2091 | verify_jump_thread (region_copy, n_region); |
2092 | |
2093 | /* Remove the last branch in the jump thread path. */ |
2094 | remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest); |
2095 | |
2096 | /* And fixup the flags on the single remaining edge. */ |
2097 | edge fix_e = find_edge (region_copy[n_region - 1], exit->dest); |
2098 | fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); |
2099 | fix_e->flags |= EDGE_FALLTHRU; |
2100 | |
2101 | edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU); |
2102 | |
2103 | if (e) |
2104 | { |
2105 | rescan_loop_exit (e, true, false); |
2106 | e->probability = profile_probability::always (); |
2107 | } |
2108 | |
2109 | /* Redirect the entry and add the phi node arguments. */ |
2110 | if (entry->dest == loop->header) |
2111 | mark_loop_for_removal (loop); |
2112 | redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest)); |
2113 | gcc_assert (redirected != NULL); |
2114 | flush_pending_stmts (entry); |
2115 | |
2116 | /* Add the other PHI node arguments. */ |
2117 | add_phi_args_after_copy (region_copy, n_region, NULL); |
2118 | |
2119 | free (region_copy); |
2120 | |
2121 | free_original_copy_tables (); |
2122 | return true; |
2123 | } |
2124 | |
2125 | /* Return true when PATH is a valid jump-thread path. */ |
2126 | |
2127 | static bool |
2128 | valid_jump_thread_path (vec<jump_thread_edge *> *path) |
2129 | { |
2130 | unsigned len = path->length (); |
2131 | |
2132 | /* Check that the path is connected. */ |
2133 | for (unsigned int j = 0; j < len - 1; j++) |
2134 | { |
2135 | edge e = (*path)[j]->e; |
2136 | if (e->dest != (*path)[j+1]->e->src) |
2137 | return false; |
2138 | } |
2139 | return true; |
2140 | } |
2141 | |
2142 | /* Remove any queued jump threads that include edge E. |
2143 | |
2144 | We don't actually remove them here, just record the edges into ax |
2145 | hash table. That way we can do the search once per iteration of |
2146 | DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */ |
2147 | |
2148 | void |
2149 | remove_jump_threads_including (edge_def *e) |
2150 | { |
2151 | if (!paths.exists ()) |
2152 | return; |
2153 | |
2154 | if (!removed_edges) |
2155 | removed_edges = new hash_table<struct removed_edges> (17); |
2156 | |
2157 | edge *slot = removed_edges->find_slot (e, INSERT); |
2158 | *slot = e; |
2159 | } |
2160 | |
2161 | /* Walk through all blocks and thread incoming edges to the appropriate |
2162 | outgoing edge for each edge pair recorded in THREADED_EDGES. |
2163 | |
2164 | It is the caller's responsibility to fix the dominance information |
2165 | and rewrite duplicated SSA_NAMEs back into SSA form. |
2166 | |
2167 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through |
2168 | loop headers if it does not simplify the loop. |
2169 | |
2170 | Returns true if one or more edges were threaded, false otherwise. */ |
2171 | |
2172 | bool |
2173 | thread_through_all_blocks (bool ) |
2174 | { |
2175 | bool retval = false; |
2176 | unsigned int i; |
2177 | struct loop *loop; |
2178 | auto_bitmap threaded_blocks; |
2179 | |
2180 | if (!paths.exists ()) |
2181 | { |
2182 | retval = false; |
2183 | goto out; |
2184 | } |
2185 | |
2186 | memset (&thread_stats, 0, sizeof (thread_stats)); |
2187 | |
2188 | /* Remove any paths that referenced removed edges. */ |
2189 | if (removed_edges) |
2190 | for (i = 0; i < paths.length (); ) |
2191 | { |
2192 | unsigned int j; |
2193 | vec<jump_thread_edge *> *path = paths[i]; |
2194 | |
2195 | for (j = 0; j < path->length (); j++) |
2196 | { |
2197 | edge e = (*path)[j]->e; |
2198 | if (removed_edges->find_slot (e, NO_INSERT)) |
2199 | break; |
2200 | } |
2201 | |
2202 | if (j != path->length ()) |
2203 | { |
2204 | delete_jump_thread_path (path); |
2205 | paths.unordered_remove (i); |
2206 | continue; |
2207 | } |
2208 | i++; |
2209 | } |
2210 | |
2211 | /* Jump-thread all FSM threads before other jump-threads. */ |
2212 | for (i = 0; i < paths.length ();) |
2213 | { |
2214 | vec<jump_thread_edge *> *path = paths[i]; |
2215 | edge entry = (*path)[0]->e; |
2216 | |
2217 | /* Only code-generate FSM jump-threads in this loop. */ |
2218 | if ((*path)[0]->type != EDGE_FSM_THREAD) |
2219 | { |
2220 | i++; |
2221 | continue; |
2222 | } |
2223 | |
2224 | /* Do not jump-thread twice from the same block. */ |
2225 | if (bitmap_bit_p (threaded_blocks, entry->src->index) |
2226 | /* We may not want to realize this jump thread path |
2227 | for various reasons. So check it first. */ |
2228 | || !valid_jump_thread_path (path)) |
2229 | { |
2230 | /* Remove invalid FSM jump-thread paths. */ |
2231 | delete_jump_thread_path (path); |
2232 | paths.unordered_remove (i); |
2233 | continue; |
2234 | } |
2235 | |
2236 | unsigned len = path->length (); |
2237 | edge exit = (*path)[len - 1]->e; |
2238 | basic_block *region = XNEWVEC (basic_block, len - 1); |
2239 | |
2240 | for (unsigned int j = 0; j < len - 1; j++) |
2241 | region[j] = (*path)[j]->e->dest; |
2242 | |
2243 | if (duplicate_thread_path (entry, exit, region, len - 1)) |
2244 | { |
2245 | /* We do not update dominance info. */ |
2246 | free_dominance_info (CDI_DOMINATORS); |
2247 | bitmap_set_bit (threaded_blocks, entry->src->index); |
2248 | retval = true; |
2249 | thread_stats.num_threaded_edges++; |
2250 | } |
2251 | |
2252 | delete_jump_thread_path (path); |
2253 | paths.unordered_remove (i); |
2254 | free (region); |
2255 | } |
2256 | |
2257 | /* Remove from PATHS all the jump-threads starting with an edge already |
2258 | jump-threaded. */ |
2259 | for (i = 0; i < paths.length ();) |
2260 | { |
2261 | vec<jump_thread_edge *> *path = paths[i]; |
2262 | edge entry = (*path)[0]->e; |
2263 | |
2264 | /* Do not jump-thread twice from the same block. */ |
2265 | if (bitmap_bit_p (threaded_blocks, entry->src->index)) |
2266 | { |
2267 | delete_jump_thread_path (path); |
2268 | paths.unordered_remove (i); |
2269 | } |
2270 | else |
2271 | i++; |
2272 | } |
2273 | |
2274 | bitmap_clear (threaded_blocks); |
2275 | |
2276 | mark_threaded_blocks (threaded_blocks); |
2277 | |
2278 | initialize_original_copy_tables (); |
2279 | |
2280 | /* The order in which we process jump threads can be important. |
2281 | |
2282 | Consider if we have two jump threading paths A and B. If the |
2283 | target edge of A is the starting edge of B and we thread path A |
2284 | first, then we create an additional incoming edge into B->dest that |
2285 | we can not discover as a jump threading path on this iteration. |
2286 | |
2287 | If we instead thread B first, then the edge into B->dest will have |
2288 | already been redirected before we process path A and path A will |
2289 | natually, with no further work, target the redirected path for B. |
2290 | |
2291 | An post-order is sufficient here. Compute the ordering first, then |
2292 | process the blocks. */ |
2293 | if (!bitmap_empty_p (threaded_blocks)) |
2294 | { |
2295 | int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
2296 | unsigned int postorder_num = post_order_compute (postorder, false, false); |
2297 | for (unsigned int i = 0; i < postorder_num; i++) |
2298 | { |
2299 | unsigned int indx = postorder[i]; |
2300 | if (bitmap_bit_p (threaded_blocks, indx)) |
2301 | { |
2302 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx); |
2303 | retval |= thread_block (bb, true); |
2304 | } |
2305 | } |
2306 | free (postorder); |
2307 | } |
2308 | |
2309 | /* Then perform the threading through loop headers. We start with the |
2310 | innermost loop, so that the changes in cfg we perform won't affect |
2311 | further threading. */ |
2312 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
2313 | { |
2314 | if (!loop->header |
2315 | || !bitmap_bit_p (threaded_blocks, loop->header->index)) |
2316 | continue; |
2317 | |
2318 | retval |= thread_through_loop_header (loop, may_peel_loop_headers); |
2319 | } |
2320 | |
2321 | /* All jump threading paths should have been resolved at this |
2322 | point. Verify that is the case. */ |
2323 | basic_block bb; |
2324 | FOR_EACH_BB_FN (bb, cfun) |
2325 | { |
2326 | edge_iterator ei; |
2327 | edge e; |
2328 | FOR_EACH_EDGE (e, ei, bb->preds) |
2329 | gcc_assert (e->aux == NULL); |
2330 | } |
2331 | |
2332 | statistics_counter_event (cfun, "Jumps threaded" , |
2333 | thread_stats.num_threaded_edges); |
2334 | |
2335 | free_original_copy_tables (); |
2336 | |
2337 | paths.release (); |
2338 | |
2339 | if (retval) |
2340 | loops_state_set (LOOPS_NEED_FIXUP); |
2341 | |
2342 | out: |
2343 | delete removed_edges; |
2344 | removed_edges = NULL; |
2345 | return retval; |
2346 | } |
2347 | |
2348 | /* Delete the jump threading path PATH. We have to explicitly delete |
2349 | each entry in the vector, then the container. */ |
2350 | |
2351 | void |
2352 | delete_jump_thread_path (vec<jump_thread_edge *> *path) |
2353 | { |
2354 | for (unsigned int i = 0; i < path->length (); i++) |
2355 | delete (*path)[i]; |
2356 | path->release(); |
2357 | delete path; |
2358 | } |
2359 | |
2360 | /* Register a jump threading opportunity. We queue up all the jump |
2361 | threading opportunities discovered by a pass and update the CFG |
2362 | and SSA form all at once. |
2363 | |
2364 | E is the edge we can thread, E2 is the new target edge, i.e., we |
2365 | are effectively recording that E->dest can be changed to E2->dest |
2366 | after fixing the SSA graph. */ |
2367 | |
2368 | void |
2369 | register_jump_thread (vec<jump_thread_edge *> *path) |
2370 | { |
2371 | if (!dbg_cnt (registered_jump_thread)) |
2372 | { |
2373 | delete_jump_thread_path (path); |
2374 | return; |
2375 | } |
2376 | |
2377 | /* First make sure there are no NULL outgoing edges on the jump threading |
2378 | path. That can happen for jumping to a constant address. */ |
2379 | for (unsigned int i = 0; i < path->length (); i++) |
2380 | { |
2381 | if ((*path)[i]->e == NULL) |
2382 | { |
2383 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2384 | { |
2385 | fprintf (dump_file, |
2386 | "Found NULL edge in jump threading path. Cancelling jump thread:\n" ); |
2387 | dump_jump_thread_path (dump_file, *path, false); |
2388 | } |
2389 | |
2390 | delete_jump_thread_path (path); |
2391 | return; |
2392 | } |
2393 | |
2394 | /* Only the FSM threader is allowed to thread across |
2395 | backedges in the CFG. */ |
2396 | if (flag_checking |
2397 | && (*path)[0]->type != EDGE_FSM_THREAD) |
2398 | gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0); |
2399 | } |
2400 | |
2401 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2402 | dump_jump_thread_path (dump_file, *path, true); |
2403 | |
2404 | if (!paths.exists ()) |
2405 | paths.create (5); |
2406 | |
2407 | paths.safe_push (path); |
2408 | } |
2409 | |