1 | /* Generic partial redundancy elimination with lazy code motion support. |
2 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* These routines are meant to be used by various optimization |
21 | passes which can be modeled as lazy code motion problems. |
22 | Including, but not limited to: |
23 | |
24 | * Traditional partial redundancy elimination. |
25 | |
26 | * Placement of caller/caller register save/restores. |
27 | |
28 | * Load/store motion. |
29 | |
30 | * Copy motion. |
31 | |
32 | * Conversion of flat register files to a stacked register |
33 | model. |
34 | |
35 | * Dead load/store elimination. |
36 | |
37 | These routines accept as input: |
38 | |
39 | * Basic block information (number of blocks, lists of |
40 | predecessors and successors). Note the granularity |
41 | does not need to be basic block, they could be statements |
42 | or functions. |
43 | |
44 | * Bitmaps of local properties (computed, transparent and |
45 | anticipatable expressions). |
46 | |
47 | The output of these routines is bitmap of redundant computations |
48 | and a bitmap of optimal placement points. */ |
49 | |
50 | |
51 | #include "config.h" |
52 | #include "system.h" |
53 | #include "coretypes.h" |
54 | #include "backend.h" |
55 | #include "cfganal.h" |
56 | #include "lcm.h" |
57 | |
58 | /* Edge based LCM routines. */ |
59 | static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *, |
60 | sbitmap *, sbitmap *); |
61 | static void compute_insert_delete (struct edge_list *edge_list, sbitmap *, |
62 | sbitmap *, sbitmap *, sbitmap *, sbitmap *); |
63 | |
64 | /* Edge based LCM routines on a reverse flowgraph. */ |
65 | static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *, |
66 | sbitmap*, sbitmap *, sbitmap *); |
67 | static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *, |
68 | sbitmap *, sbitmap *); |
69 | static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *, |
70 | sbitmap *, sbitmap *, sbitmap *, |
71 | sbitmap *); |
72 | |
73 | /* Edge based lcm routines. */ |
74 | |
75 | /* Compute expression anticipatability at entrance and exit of each block. |
76 | This is done based on the flow graph, and not on the pred-succ lists. |
77 | Other than that, its pretty much identical to compute_antinout. */ |
78 | |
79 | void |
80 | compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, |
81 | sbitmap *antout) |
82 | { |
83 | basic_block bb; |
84 | edge e; |
85 | basic_block *worklist, *qin, *qout, *qend; |
86 | unsigned int qlen; |
87 | edge_iterator ei; |
88 | |
89 | /* Allocate a worklist array/queue. Entries are only added to the |
90 | list if they were not already on the list. So the size is |
91 | bounded by the number of basic blocks. */ |
92 | qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
93 | |
94 | /* We want a maximal solution, so make an optimistic initialization of |
95 | ANTIN. */ |
96 | bitmap_vector_ones (antin, last_basic_block_for_fn (cfun)); |
97 | |
98 | /* Put every block on the worklist; this is necessary because of the |
99 | optimistic initialization of ANTIN above. Use reverse postorder |
100 | on the inverted graph to make the backward dataflow problem require |
101 | less iterations. */ |
102 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
103 | int n = inverted_rev_post_order_compute (cfun, rpo); |
104 | for (int i = 0; i < n; ++i) |
105 | { |
106 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
107 | if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) |
108 | || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
109 | continue; |
110 | *qin++ = bb; |
111 | bb->aux = bb; |
112 | } |
113 | free (ptr: rpo); |
114 | |
115 | qin = worklist; |
116 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
117 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
118 | |
119 | /* Mark blocks which are predecessors of the exit block so that we |
120 | can easily identify them below. */ |
121 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
122 | e->src->aux = EXIT_BLOCK_PTR_FOR_FN (cfun); |
123 | |
124 | /* Iterate until the worklist is empty. */ |
125 | while (qlen) |
126 | { |
127 | /* Take the first entry off the worklist. */ |
128 | bb = *qout++; |
129 | qlen--; |
130 | |
131 | if (qout >= qend) |
132 | qout = worklist; |
133 | |
134 | if (bb->aux == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
135 | /* Do not clear the aux field for blocks which are predecessors of |
136 | the EXIT block. That way we never add then to the worklist |
137 | again. */ |
138 | bitmap_clear (antout[bb->index]); |
139 | else |
140 | { |
141 | /* Clear the aux field of this block so that it can be added to |
142 | the worklist again if necessary. */ |
143 | bb->aux = NULL; |
144 | bitmap_intersection_of_succs (antout[bb->index], antin, bb); |
145 | } |
146 | |
147 | if (bitmap_or_and (antin[bb->index], antloc[bb->index], |
148 | transp[bb->index], antout[bb->index])) |
149 | /* If the in state of this block changed, then we need |
150 | to add the predecessors of this block to the worklist |
151 | if they are not already on the worklist. */ |
152 | FOR_EACH_EDGE (e, ei, bb->preds) |
153 | if (!e->src->aux && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
154 | { |
155 | *qin++ = e->src; |
156 | e->src->aux = e; |
157 | qlen++; |
158 | if (qin >= qend) |
159 | qin = worklist; |
160 | } |
161 | } |
162 | |
163 | clear_aux_for_edges (); |
164 | clear_aux_for_blocks (); |
165 | free (ptr: worklist); |
166 | } |
167 | |
168 | /* Compute the earliest vector for edge based lcm. */ |
169 | |
170 | void |
171 | compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin, |
172 | sbitmap *antout, sbitmap *avout, sbitmap *kill, |
173 | sbitmap *earliest) |
174 | { |
175 | int x, num_edges; |
176 | basic_block pred, succ; |
177 | |
178 | num_edges = NUM_EDGES (edge_list); |
179 | |
180 | auto_sbitmap difference (n_exprs), temp_bitmap (n_exprs); |
181 | for (x = 0; x < num_edges; x++) |
182 | { |
183 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
184 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); |
185 | if (pred == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
186 | bitmap_copy (earliest[x], antin[succ->index]); |
187 | else |
188 | { |
189 | if (succ == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
190 | bitmap_clear (earliest[x]); |
191 | else |
192 | { |
193 | bitmap_and_compl (difference, antin[succ->index], |
194 | avout[pred->index]); |
195 | bitmap_not (temp_bitmap, antout[pred->index]); |
196 | bitmap_and_or (earliest[x], difference, |
197 | kill[pred->index], temp_bitmap); |
198 | } |
199 | } |
200 | } |
201 | } |
202 | |
203 | /* later(p,s) is dependent on the calculation of laterin(p). |
204 | laterin(p) is dependent on the calculation of later(p2,p). |
205 | |
206 | laterin(ENTRY) is defined as all 0's |
207 | later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY) |
208 | laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)). |
209 | |
210 | If we progress in this manner, starting with all basic blocks |
211 | in the work list, anytime we change later(bb), we need to add |
212 | succs(bb) to the worklist if they are not already on the worklist. |
213 | |
214 | Boundary conditions: |
215 | |
216 | We prime the worklist all the normal basic blocks. The ENTRY block can |
217 | never be added to the worklist since it is never the successor of any |
218 | block. We explicitly prevent the EXIT block from being added to the |
219 | worklist. |
220 | |
221 | We optimistically initialize LATER. That is the only time this routine |
222 | will compute LATER for an edge out of the entry block since the entry |
223 | block is never on the worklist. Thus, LATERIN is neither used nor |
224 | computed for the ENTRY block. |
225 | |
226 | Since the EXIT block is never added to the worklist, we will neither |
227 | use nor compute LATERIN for the exit block. Edges which reach the |
228 | EXIT block are handled in the normal fashion inside the loop. However, |
229 | the insertion/deletion computation needs LATERIN(EXIT), so we have |
230 | to compute it. */ |
231 | |
232 | static void |
233 | compute_laterin (struct edge_list *edge_list, sbitmap *earliest, |
234 | sbitmap *antloc, sbitmap *later, sbitmap *laterin) |
235 | { |
236 | int num_edges, i; |
237 | edge e; |
238 | basic_block *worklist, *qin, *qout, *qend, bb; |
239 | unsigned int qlen; |
240 | edge_iterator ei; |
241 | |
242 | num_edges = NUM_EDGES (edge_list); |
243 | |
244 | /* Allocate a worklist array/queue. Entries are only added to the |
245 | list if they were not already on the list. So the size is |
246 | bounded by the number of basic blocks. */ |
247 | qin = qout = worklist |
248 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
249 | |
250 | /* Initialize a mapping from each edge to its index. */ |
251 | for (i = 0; i < num_edges; i++) |
252 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
253 | |
254 | /* We want a maximal solution, so initially consider LATER true for |
255 | all edges. This allows propagation through a loop since the incoming |
256 | loop edge will have LATER set, so if all the other incoming edges |
257 | to the loop are set, then LATERIN will be set for the head of the |
258 | loop. |
259 | |
260 | If the optimistic setting of LATER on that edge was incorrect (for |
261 | example the expression is ANTLOC in a block within the loop) then |
262 | this algorithm will detect it when we process the block at the head |
263 | of the optimistic edge. That will requeue the affected blocks. */ |
264 | bitmap_vector_ones (later, num_edges); |
265 | |
266 | /* Note that even though we want an optimistic setting of LATER, we |
267 | do not want to be overly optimistic. Consider an outgoing edge from |
268 | the entry block. That edge should always have a LATER value the |
269 | same as EARLIEST for that edge. */ |
270 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
271 | bitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); |
272 | |
273 | /* Add all the blocks to the worklist. This prevents an early exit from |
274 | the loop given our optimistic initialization of LATER above. */ |
275 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
276 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); |
277 | for (int i = 0; i < n; ++i) |
278 | { |
279 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
280 | *qin++ = bb; |
281 | bb->aux = bb; |
282 | } |
283 | free (ptr: rpo); |
284 | |
285 | /* Note that we do not use the last allocated element for our queue, |
286 | as EXIT_BLOCK is never inserted into it. */ |
287 | qin = worklist; |
288 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
289 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
290 | |
291 | /* Iterate until the worklist is empty. */ |
292 | while (qlen) |
293 | { |
294 | /* Take the first entry off the worklist. */ |
295 | bb = *qout++; |
296 | bb->aux = NULL; |
297 | qlen--; |
298 | if (qout >= qend) |
299 | qout = worklist; |
300 | |
301 | /* Compute LATERIN as the intersection of LATER for each incoming |
302 | edge to BB. */ |
303 | bitmap_ones (laterin[bb->index]); |
304 | FOR_EACH_EDGE (e, ei, bb->preds) |
305 | bitmap_and (laterin[bb->index], laterin[bb->index], |
306 | later[(size_t)e->aux]); |
307 | |
308 | /* Calculate LATER for all outgoing edges of BB. */ |
309 | FOR_EACH_EDGE (e, ei, bb->succs) |
310 | if (bitmap_ior_and_compl (later[(size_t) e->aux], |
311 | earliest[(size_t) e->aux], |
312 | laterin[bb->index], |
313 | antloc[bb->index]) |
314 | /* If LATER for an outgoing edge was changed, then we need |
315 | to add the target of the outgoing edge to the worklist. */ |
316 | && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) |
317 | { |
318 | *qin++ = e->dest; |
319 | e->dest->aux = e; |
320 | qlen++; |
321 | if (qin >= qend) |
322 | qin = worklist; |
323 | } |
324 | } |
325 | |
326 | /* Computation of insertion and deletion points requires computing LATERIN |
327 | for the EXIT block. We allocated an extra entry in the LATERIN array |
328 | for just this purpose. */ |
329 | bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); |
330 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
331 | bitmap_and (laterin[last_basic_block_for_fn (cfun)], |
332 | laterin[last_basic_block_for_fn (cfun)], |
333 | later[(size_t) e->aux]); |
334 | |
335 | clear_aux_for_edges (); |
336 | free (ptr: worklist); |
337 | } |
338 | |
339 | /* Compute the insertion and deletion points for edge based LCM. */ |
340 | |
341 | static void |
342 | compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc, |
343 | sbitmap *later, sbitmap *laterin, sbitmap *insert, |
344 | sbitmap *del) |
345 | { |
346 | int x; |
347 | basic_block bb; |
348 | |
349 | FOR_EACH_BB_FN (bb, cfun) |
350 | bitmap_and_compl (del[bb->index], antloc[bb->index], |
351 | laterin[bb->index]); |
352 | |
353 | for (x = 0; x < NUM_EDGES (edge_list); x++) |
354 | { |
355 | basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x); |
356 | |
357 | if (b == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
358 | bitmap_and_compl (insert[x], later[x], |
359 | laterin[last_basic_block_for_fn (cfun)]); |
360 | else |
361 | bitmap_and_compl (insert[x], later[x], laterin[b->index]); |
362 | } |
363 | } |
364 | |
365 | /* Given local properties TRANSP, ANTLOC, AVLOC, KILL return the insert and |
366 | delete vectors for edge based LCM and return the AVIN, AVOUT bitmap. |
367 | map the insert vector to what edge an expression should be inserted on. */ |
368 | |
369 | struct edge_list * |
370 | pre_edge_lcm_avs (int n_exprs, sbitmap *transp, |
371 | sbitmap *avloc, sbitmap *antloc, sbitmap *kill, |
372 | sbitmap *avin, sbitmap *avout, |
373 | sbitmap **insert, sbitmap **del) |
374 | { |
375 | sbitmap *antin, *antout, *earliest; |
376 | sbitmap *later, *laterin; |
377 | struct edge_list *edge_list; |
378 | int num_edges; |
379 | |
380 | edge_list = create_edge_list (); |
381 | num_edges = NUM_EDGES (edge_list); |
382 | |
383 | #ifdef LCM_DEBUG_INFO |
384 | if (dump_file) |
385 | { |
386 | fprintf (dump_file, "Edge List:\n" ); |
387 | verify_edge_list (dump_file, edge_list); |
388 | print_edge_list (dump_file, edge_list); |
389 | dump_bitmap_vector (dump_file, "transp" , "" , transp, |
390 | last_basic_block_for_fn (cfun)); |
391 | dump_bitmap_vector (dump_file, "antloc" , "" , antloc, |
392 | last_basic_block_for_fn (cfun)); |
393 | dump_bitmap_vector (dump_file, "avloc" , "" , avloc, |
394 | last_basic_block_for_fn (cfun)); |
395 | dump_bitmap_vector (dump_file, "kill" , "" , kill, |
396 | last_basic_block_for_fn (cfun)); |
397 | } |
398 | #endif |
399 | |
400 | /* Compute global availability. */ |
401 | compute_available (avloc, kill, avout, avin); |
402 | |
403 | /* Compute global anticipatability. */ |
404 | antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
405 | antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
406 | compute_antinout_edge (antloc, transp, antin, antout); |
407 | |
408 | #ifdef LCM_DEBUG_INFO |
409 | if (dump_file) |
410 | { |
411 | dump_bitmap_vector (dump_file, "antin" , "" , antin, |
412 | last_basic_block_for_fn (cfun)); |
413 | dump_bitmap_vector (dump_file, "antout" , "" , antout, |
414 | last_basic_block_for_fn (cfun)); |
415 | } |
416 | #endif |
417 | |
418 | /* Compute earliestness. */ |
419 | earliest = sbitmap_vector_alloc (num_edges, n_exprs); |
420 | compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest); |
421 | |
422 | #ifdef LCM_DEBUG_INFO |
423 | if (dump_file) |
424 | dump_bitmap_vector (dump_file, "earliest" , "" , earliest, num_edges); |
425 | #endif |
426 | |
427 | sbitmap_vector_free (vec: antout); |
428 | sbitmap_vector_free (vec: antin); |
429 | |
430 | later = sbitmap_vector_alloc (num_edges, n_exprs); |
431 | |
432 | /* Allocate an extra element for the exit block in the laterin vector. */ |
433 | laterin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun) + 1, |
434 | n_exprs); |
435 | compute_laterin (edge_list, earliest, antloc, later, laterin); |
436 | |
437 | #ifdef LCM_DEBUG_INFO |
438 | if (dump_file) |
439 | { |
440 | dump_bitmap_vector (dump_file, "laterin" , "" , laterin, |
441 | last_basic_block_for_fn (cfun) + 1); |
442 | dump_bitmap_vector (dump_file, "later" , "" , later, num_edges); |
443 | } |
444 | #endif |
445 | |
446 | sbitmap_vector_free (vec: earliest); |
447 | |
448 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); |
449 | *del = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
450 | bitmap_vector_clear (*insert, num_edges); |
451 | bitmap_vector_clear (*del, last_basic_block_for_fn (cfun)); |
452 | compute_insert_delete (edge_list, antloc, later, laterin, insert: *insert, del: *del); |
453 | |
454 | sbitmap_vector_free (vec: laterin); |
455 | sbitmap_vector_free (vec: later); |
456 | |
457 | #ifdef LCM_DEBUG_INFO |
458 | if (dump_file) |
459 | { |
460 | dump_bitmap_vector (dump_file, "pre_insert_map" , "" , *insert, num_edges); |
461 | dump_bitmap_vector (dump_file, "pre_delete_map" , "" , *del, |
462 | last_basic_block_for_fn (cfun)); |
463 | } |
464 | #endif |
465 | |
466 | return edge_list; |
467 | } |
468 | |
469 | /* Wrapper to allocate avin/avout and call pre_edge_lcm_avs. */ |
470 | |
471 | struct edge_list * |
472 | pre_edge_lcm (int n_exprs, sbitmap *transp, |
473 | sbitmap *avloc, sbitmap *antloc, sbitmap *kill, |
474 | sbitmap **insert, sbitmap **del) |
475 | { |
476 | struct edge_list *edge_list; |
477 | sbitmap *avin, *avout; |
478 | |
479 | avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
480 | avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
481 | |
482 | edge_list = pre_edge_lcm_avs (n_exprs, transp, avloc, antloc, kill, |
483 | avin, avout, insert, del); |
484 | |
485 | sbitmap_vector_free (vec: avout); |
486 | sbitmap_vector_free (vec: avin); |
487 | |
488 | return edge_list; |
489 | } |
490 | |
491 | /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors. |
492 | Return the number of passes we performed to iterate to a solution. */ |
493 | |
494 | void |
495 | compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout, |
496 | sbitmap *avin) |
497 | { |
498 | edge e; |
499 | basic_block *worklist, *qin, *qout, *qend, bb; |
500 | unsigned int qlen; |
501 | edge_iterator ei; |
502 | |
503 | /* Allocate a worklist array/queue. Entries are only added to the |
504 | list if they were not already on the list. So the size is |
505 | bounded by the number of basic blocks. */ |
506 | qin = qout = worklist = |
507 | XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
508 | |
509 | /* We want a maximal solution. */ |
510 | bitmap_vector_ones (avout, last_basic_block_for_fn (cfun)); |
511 | |
512 | /* Put every block on the worklist; this is necessary because of the |
513 | optimistic initialization of AVOUT above. Use reverse postorder |
514 | to make the forward dataflow problem require less iterations. */ |
515 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
516 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); |
517 | for (int i = 0; i < n; ++i) |
518 | { |
519 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
520 | *qin++ = bb; |
521 | bb->aux = bb; |
522 | } |
523 | free (ptr: rpo); |
524 | |
525 | qin = worklist; |
526 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
527 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
528 | |
529 | /* Mark blocks which are successors of the entry block so that we |
530 | can easily identify them below. */ |
531 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
532 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
533 | |
534 | /* Iterate until the worklist is empty. */ |
535 | while (qlen) |
536 | { |
537 | /* Take the first entry off the worklist. */ |
538 | bb = *qout++; |
539 | qlen--; |
540 | |
541 | if (qout >= qend) |
542 | qout = worklist; |
543 | |
544 | /* If one of the predecessor blocks is the ENTRY block, then the |
545 | intersection of avouts is the null set. We can identify such blocks |
546 | by the special value in the AUX field in the block structure. */ |
547 | if (bb->aux == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
548 | /* Do not clear the aux field for blocks which are successors of the |
549 | ENTRY block. That way we never add then to the worklist again. */ |
550 | bitmap_clear (avin[bb->index]); |
551 | else |
552 | { |
553 | /* Clear the aux field of this block so that it can be added to |
554 | the worklist again if necessary. */ |
555 | bb->aux = NULL; |
556 | bitmap_intersection_of_preds (avin[bb->index], avout, bb); |
557 | } |
558 | |
559 | if (bitmap_ior_and_compl (avout[bb->index], avloc[bb->index], |
560 | avin[bb->index], kill[bb->index])) |
561 | /* If the out state of this block changed, then we need |
562 | to add the successors of this block to the worklist |
563 | if they are not already on the worklist. */ |
564 | FOR_EACH_EDGE (e, ei, bb->succs) |
565 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
566 | { |
567 | *qin++ = e->dest; |
568 | e->dest->aux = e; |
569 | qlen++; |
570 | |
571 | if (qin >= qend) |
572 | qin = worklist; |
573 | } |
574 | } |
575 | |
576 | clear_aux_for_edges (); |
577 | clear_aux_for_blocks (); |
578 | free (ptr: worklist); |
579 | } |
580 | |
581 | /* Compute the farthest vector for edge based lcm. */ |
582 | |
583 | static void |
584 | compute_farthest (struct edge_list *edge_list, int n_exprs, |
585 | sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin, |
586 | sbitmap *kill, sbitmap *farthest) |
587 | { |
588 | int x, num_edges; |
589 | basic_block pred, succ; |
590 | |
591 | num_edges = NUM_EDGES (edge_list); |
592 | |
593 | auto_sbitmap difference (n_exprs), temp_bitmap (n_exprs); |
594 | for (x = 0; x < num_edges; x++) |
595 | { |
596 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
597 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); |
598 | if (succ == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
599 | bitmap_copy (farthest[x], st_avout[pred->index]); |
600 | else |
601 | { |
602 | if (pred == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
603 | bitmap_clear (farthest[x]); |
604 | else |
605 | { |
606 | bitmap_and_compl (difference, st_avout[pred->index], |
607 | st_antin[succ->index]); |
608 | bitmap_not (temp_bitmap, st_avin[succ->index]); |
609 | bitmap_and_or (farthest[x], difference, |
610 | kill[succ->index], temp_bitmap); |
611 | } |
612 | } |
613 | } |
614 | } |
615 | |
616 | /* Compute nearer and nearerout vectors for edge based lcm. |
617 | |
618 | This is the mirror of compute_laterin, additional comments on the |
619 | implementation can be found before compute_laterin. */ |
620 | |
621 | static void |
622 | compute_nearerout (struct edge_list *edge_list, sbitmap *farthest, |
623 | sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout) |
624 | { |
625 | int num_edges, i; |
626 | edge e; |
627 | basic_block *worklist, *tos, bb; |
628 | edge_iterator ei; |
629 | |
630 | num_edges = NUM_EDGES (edge_list); |
631 | |
632 | /* Allocate a worklist array/queue. Entries are only added to the |
633 | list if they were not already on the list. So the size is |
634 | bounded by the number of basic blocks. */ |
635 | tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1); |
636 | |
637 | /* Initialize NEARER for each edge and build a mapping from an edge to |
638 | its index. */ |
639 | for (i = 0; i < num_edges; i++) |
640 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
641 | |
642 | /* We want a maximal solution. */ |
643 | bitmap_vector_ones (nearer, num_edges); |
644 | |
645 | /* Note that even though we want an optimistic setting of NEARER, we |
646 | do not want to be overly optimistic. Consider an incoming edge to |
647 | the exit block. That edge should always have a NEARER value the |
648 | same as FARTHEST for that edge. */ |
649 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
650 | bitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]); |
651 | |
652 | /* Add all the blocks to the worklist. This prevents an early exit |
653 | from the loop given our optimistic initialization of NEARER. */ |
654 | FOR_EACH_BB_FN (bb, cfun) |
655 | { |
656 | *tos++ = bb; |
657 | bb->aux = bb; |
658 | } |
659 | |
660 | /* Iterate until the worklist is empty. */ |
661 | while (tos != worklist) |
662 | { |
663 | /* Take the first entry off the worklist. */ |
664 | bb = *--tos; |
665 | bb->aux = NULL; |
666 | |
667 | /* Compute the intersection of NEARER for each outgoing edge from B. */ |
668 | bitmap_ones (nearerout[bb->index]); |
669 | FOR_EACH_EDGE (e, ei, bb->succs) |
670 | bitmap_and (nearerout[bb->index], nearerout[bb->index], |
671 | nearer[(size_t) e->aux]); |
672 | |
673 | /* Calculate NEARER for all incoming edges. */ |
674 | FOR_EACH_EDGE (e, ei, bb->preds) |
675 | if (bitmap_ior_and_compl (nearer[(size_t) e->aux], |
676 | farthest[(size_t) e->aux], |
677 | nearerout[e->dest->index], |
678 | st_avloc[e->dest->index]) |
679 | /* If NEARER for an incoming edge was changed, then we need |
680 | to add the source of the incoming edge to the worklist. */ |
681 | && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && e->src->aux == 0) |
682 | { |
683 | *tos++ = e->src; |
684 | e->src->aux = e; |
685 | } |
686 | } |
687 | |
688 | /* Computation of insertion and deletion points requires computing NEAREROUT |
689 | for the ENTRY block. We allocated an extra entry in the NEAREROUT array |
690 | for just this purpose. */ |
691 | bitmap_ones (nearerout[last_basic_block_for_fn (cfun)]); |
692 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
693 | bitmap_and (nearerout[last_basic_block_for_fn (cfun)], |
694 | nearerout[last_basic_block_for_fn (cfun)], |
695 | nearer[(size_t) e->aux]); |
696 | |
697 | clear_aux_for_edges (); |
698 | free (ptr: tos); |
699 | } |
700 | |
701 | /* Compute the insertion and deletion points for edge based LCM. */ |
702 | |
703 | static void |
704 | compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc, |
705 | sbitmap *nearer, sbitmap *nearerout, |
706 | sbitmap *insert, sbitmap *del) |
707 | { |
708 | int x; |
709 | basic_block bb; |
710 | |
711 | FOR_EACH_BB_FN (bb, cfun) |
712 | bitmap_and_compl (del[bb->index], st_avloc[bb->index], |
713 | nearerout[bb->index]); |
714 | |
715 | for (x = 0; x < NUM_EDGES (edge_list); x++) |
716 | { |
717 | basic_block b = INDEX_EDGE_PRED_BB (edge_list, x); |
718 | if (b == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
719 | bitmap_and_compl (insert[x], nearer[x], |
720 | nearerout[last_basic_block_for_fn (cfun)]); |
721 | else |
722 | bitmap_and_compl (insert[x], nearer[x], nearerout[b->index]); |
723 | } |
724 | } |
725 | |
726 | /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the |
727 | insert and delete vectors for edge based reverse LCM. Returns an |
728 | edgelist which is used to map the insert vector to what edge |
729 | an expression should be inserted on. */ |
730 | |
731 | struct edge_list * |
732 | pre_edge_rev_lcm (int n_exprs, sbitmap *transp, |
733 | sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill, |
734 | sbitmap **insert, sbitmap **del) |
735 | { |
736 | sbitmap *st_antin, *st_antout; |
737 | sbitmap *st_avout, *st_avin, *farthest; |
738 | sbitmap *nearer, *nearerout; |
739 | struct edge_list *edge_list; |
740 | int num_edges; |
741 | |
742 | edge_list = create_edge_list (); |
743 | num_edges = NUM_EDGES (edge_list); |
744 | |
745 | st_antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
746 | st_antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
747 | bitmap_vector_clear (st_antin, last_basic_block_for_fn (cfun)); |
748 | bitmap_vector_clear (st_antout, last_basic_block_for_fn (cfun)); |
749 | compute_antinout_edge (antloc: st_antloc, transp, antin: st_antin, antout: st_antout); |
750 | |
751 | /* Compute global anticipatability. */ |
752 | st_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
753 | st_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
754 | compute_available (avloc: st_avloc, kill, avout: st_avout, avin: st_avin); |
755 | |
756 | #ifdef LCM_DEBUG_INFO |
757 | if (dump_file) |
758 | { |
759 | fprintf (dump_file, "Edge List:\n" ); |
760 | verify_edge_list (dump_file, edge_list); |
761 | print_edge_list (dump_file, edge_list); |
762 | dump_bitmap_vector (dump_file, "transp" , "" , transp, |
763 | last_basic_block_for_fn (cfun)); |
764 | dump_bitmap_vector (dump_file, "st_avloc" , "" , st_avloc, |
765 | last_basic_block_for_fn (cfun)); |
766 | dump_bitmap_vector (dump_file, "st_antloc" , "" , st_antloc, |
767 | last_basic_block_for_fn (cfun)); |
768 | dump_bitmap_vector (dump_file, "st_antin" , "" , st_antin, |
769 | last_basic_block_for_fn (cfun)); |
770 | dump_bitmap_vector (dump_file, "st_antout" , "" , st_antout, |
771 | last_basic_block_for_fn (cfun)); |
772 | dump_bitmap_vector (dump_file, "st_kill" , "" , kill, |
773 | last_basic_block_for_fn (cfun)); |
774 | } |
775 | #endif |
776 | |
777 | #ifdef LCM_DEBUG_INFO |
778 | if (dump_file) |
779 | { |
780 | dump_bitmap_vector (dump_file, "st_avout" , "" , st_avout, last_basic_block_for_fn (cfun)); |
781 | dump_bitmap_vector (dump_file, "st_avin" , "" , st_avin, last_basic_block_for_fn (cfun)); |
782 | } |
783 | #endif |
784 | |
785 | /* Compute farthestness. */ |
786 | farthest = sbitmap_vector_alloc (num_edges, n_exprs); |
787 | compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, |
788 | kill, farthest); |
789 | |
790 | #ifdef LCM_DEBUG_INFO |
791 | if (dump_file) |
792 | dump_bitmap_vector (dump_file, "farthest" , "" , farthest, num_edges); |
793 | #endif |
794 | |
795 | sbitmap_vector_free (vec: st_antin); |
796 | sbitmap_vector_free (vec: st_antout); |
797 | |
798 | sbitmap_vector_free (vec: st_avin); |
799 | sbitmap_vector_free (vec: st_avout); |
800 | |
801 | nearer = sbitmap_vector_alloc (num_edges, n_exprs); |
802 | |
803 | /* Allocate an extra element for the entry block. */ |
804 | nearerout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun) + 1, |
805 | n_exprs); |
806 | compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout); |
807 | |
808 | #ifdef LCM_DEBUG_INFO |
809 | if (dump_file) |
810 | { |
811 | dump_bitmap_vector (dump_file, "nearerout" , "" , nearerout, |
812 | last_basic_block_for_fn (cfun) + 1); |
813 | dump_bitmap_vector (dump_file, "nearer" , "" , nearer, num_edges); |
814 | } |
815 | #endif |
816 | |
817 | sbitmap_vector_free (vec: farthest); |
818 | |
819 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); |
820 | *del = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
821 | compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, |
822 | insert: *insert, del: *del); |
823 | |
824 | sbitmap_vector_free (vec: nearerout); |
825 | sbitmap_vector_free (vec: nearer); |
826 | |
827 | #ifdef LCM_DEBUG_INFO |
828 | if (dump_file) |
829 | { |
830 | dump_bitmap_vector (dump_file, "pre_insert_map" , "" , *insert, num_edges); |
831 | dump_bitmap_vector (dump_file, "pre_delete_map" , "" , *del, |
832 | last_basic_block_for_fn (cfun)); |
833 | } |
834 | #endif |
835 | return edge_list; |
836 | } |
837 | |
838 | |