1 | /* Loop invariant motion. |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "gimple-pretty-print.h" |
30 | #include "fold-const.h" |
31 | #include "cfganal.h" |
32 | #include "tree-eh.h" |
33 | #include "gimplify.h" |
34 | #include "gimple-iterator.h" |
35 | #include "tree-cfg.h" |
36 | #include "tree-ssa-loop-manip.h" |
37 | #include "tree-ssa-loop.h" |
38 | #include "tree-into-ssa.h" |
39 | #include "cfgloop.h" |
40 | #include "tree-affine.h" |
41 | #include "tree-ssa-propagate.h" |
42 | #include "trans-mem.h" |
43 | #include "gimple-fold.h" |
44 | #include "tree-scalar-evolution.h" |
45 | #include "tree-ssa-loop-niter.h" |
46 | #include "alias.h" |
47 | #include "builtins.h" |
48 | #include "tree-dfa.h" |
49 | #include "tree-ssa.h" |
50 | #include "dbgcnt.h" |
51 | #include "insn-codes.h" |
52 | #include "optabs-tree.h" |
53 | |
54 | /* TODO: Support for predicated code motion. I.e. |
55 | |
56 | while (1) |
57 | { |
58 | if (cond) |
59 | { |
60 | a = inv; |
61 | something; |
62 | } |
63 | } |
64 | |
65 | Where COND and INV are invariants, but evaluating INV may trap or be |
66 | invalid from some other reason if !COND. This may be transformed to |
67 | |
68 | if (cond) |
69 | a = inv; |
70 | while (1) |
71 | { |
72 | if (cond) |
73 | something; |
74 | } */ |
75 | |
76 | /* The auxiliary data kept for each statement. */ |
77 | |
78 | struct lim_aux_data |
79 | { |
80 | class loop *max_loop; /* The outermost loop in that the statement |
81 | is invariant. */ |
82 | |
83 | class loop *tgt_loop; /* The loop out of that we want to move the |
84 | invariant. */ |
85 | |
86 | class loop *always_executed_in; |
87 | /* The outermost loop for that we are sure |
88 | the statement is executed if the loop |
89 | is entered. */ |
90 | |
91 | unsigned cost; /* Cost of the computation performed by the |
92 | statement. */ |
93 | |
94 | unsigned ref; /* The simple_mem_ref in this stmt or 0. */ |
95 | |
96 | vec<gimple *> depends; /* Vector of statements that must be also |
97 | hoisted out of the loop when this statement |
98 | is hoisted; i.e. those that define the |
99 | operands of the statement and are inside of |
100 | the MAX_LOOP loop. */ |
101 | }; |
102 | |
103 | /* Maps statements to their lim_aux_data. */ |
104 | |
105 | static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map; |
106 | |
107 | /* Description of a memory reference location. */ |
108 | |
109 | struct mem_ref_loc |
110 | { |
111 | tree *ref; /* The reference itself. */ |
112 | gimple *stmt; /* The statement in that it occurs. */ |
113 | }; |
114 | |
115 | |
116 | /* Description of a memory reference. */ |
117 | |
118 | class im_mem_ref |
119 | { |
120 | public: |
121 | unsigned id : 30; /* ID assigned to the memory reference |
122 | (its index in memory_accesses.refs_list) */ |
123 | unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */ |
124 | unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */ |
125 | hashval_t hash; /* Its hash value. */ |
126 | |
127 | /* The memory access itself and associated caching of alias-oracle |
128 | query meta-data. We are using mem.ref == error_mark_node for the |
129 | case the reference is represented by its single access stmt |
130 | in accesses_in_loop[0]. */ |
131 | ao_ref mem; |
132 | |
133 | bitmap stored; /* The set of loops in that this memory location |
134 | is stored to. */ |
135 | bitmap loaded; /* The set of loops in that this memory location |
136 | is loaded from. */ |
137 | vec<mem_ref_loc> accesses_in_loop; |
138 | /* The locations of the accesses. */ |
139 | |
140 | /* The following set is computed on demand. */ |
141 | bitmap_head dep_loop; /* The set of loops in that the memory |
142 | reference is {in,}dependent in |
143 | different modes. */ |
144 | }; |
145 | |
146 | /* We use six bits per loop in the ref->dep_loop bitmap to record |
147 | the dep_kind x dep_state combinations. */ |
148 | |
149 | enum dep_kind { lim_raw, sm_war, sm_waw }; |
150 | enum dep_state { dep_unknown, dep_independent, dep_dependent }; |
151 | |
152 | /* coldest outermost loop for given loop. */ |
153 | vec<class loop *> coldest_outermost_loop; |
154 | /* hotter outer loop nearest to given loop. */ |
155 | vec<class loop *> hotter_than_inner_loop; |
156 | |
157 | /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */ |
158 | |
159 | static void |
160 | record_loop_dependence (class loop *loop, im_mem_ref *ref, |
161 | dep_kind kind, dep_state state) |
162 | { |
163 | gcc_assert (state != dep_unknown); |
164 | unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0; |
165 | bitmap_set_bit (&ref->dep_loop, bit); |
166 | } |
167 | |
168 | /* Query the loop dependence cache of REF for LOOP, KIND. */ |
169 | |
170 | static dep_state |
171 | query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind) |
172 | { |
173 | unsigned first_bit = 6 * loop->num + kind * 2; |
174 | if (bitmap_bit_p (&ref->dep_loop, first_bit)) |
175 | return dep_independent; |
176 | else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1)) |
177 | return dep_dependent; |
178 | return dep_unknown; |
179 | } |
180 | |
181 | /* Mem_ref hashtable helpers. */ |
182 | |
183 | struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref> |
184 | { |
185 | typedef ao_ref *compare_type; |
186 | static inline hashval_t hash (const im_mem_ref *); |
187 | static inline bool equal (const im_mem_ref *, const ao_ref *); |
188 | }; |
189 | |
190 | /* A hash function for class im_mem_ref object OBJ. */ |
191 | |
192 | inline hashval_t |
193 | mem_ref_hasher::hash (const im_mem_ref *mem) |
194 | { |
195 | return mem->hash; |
196 | } |
197 | |
198 | /* An equality function for class im_mem_ref object MEM1 with |
199 | memory reference OBJ2. */ |
200 | |
201 | inline bool |
202 | mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) |
203 | { |
204 | if (obj2->max_size_known_p ()) |
205 | return (mem1->ref_decomposed |
206 | && ((TREE_CODE (mem1->mem.base) == MEM_REF |
207 | && TREE_CODE (obj2->base) == MEM_REF |
208 | && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0), |
209 | TREE_OPERAND (obj2->base, 0), flags: 0) |
210 | && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset, |
211 | mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset)) |
212 | || (operand_equal_p (mem1->mem.base, obj2->base, flags: 0) |
213 | && known_eq (mem1->mem.offset, obj2->offset))) |
214 | && known_eq (mem1->mem.size, obj2->size) |
215 | && known_eq (mem1->mem.max_size, obj2->max_size) |
216 | && mem1->mem.volatile_p == obj2->volatile_p |
217 | && (mem1->mem.ref_alias_set == obj2->ref_alias_set |
218 | /* We are not canonicalizing alias-sets but for the |
219 | special-case we didn't canonicalize yet and the |
220 | incoming ref is a alias-set zero MEM we pick |
221 | the correct one already. */ |
222 | || (!mem1->ref_canonical |
223 | && (TREE_CODE (obj2->ref) == MEM_REF |
224 | || TREE_CODE (obj2->ref) == TARGET_MEM_REF) |
225 | && obj2->ref_alias_set == 0) |
226 | /* Likewise if there's a canonical ref with alias-set zero. */ |
227 | || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0)) |
228 | && types_compatible_p (TREE_TYPE (mem1->mem.ref), |
229 | TREE_TYPE (obj2->ref))); |
230 | else |
231 | return operand_equal_p (mem1->mem.ref, obj2->ref, flags: 0); |
232 | } |
233 | |
234 | |
235 | /* Description of memory accesses in loops. */ |
236 | |
237 | static struct |
238 | { |
239 | /* The hash table of memory references accessed in loops. */ |
240 | hash_table<mem_ref_hasher> *refs; |
241 | |
242 | /* The list of memory references. */ |
243 | vec<im_mem_ref *> refs_list; |
244 | |
245 | /* The set of memory references accessed in each loop. */ |
246 | vec<bitmap_head> refs_loaded_in_loop; |
247 | |
248 | /* The set of memory references stored in each loop. */ |
249 | vec<bitmap_head> refs_stored_in_loop; |
250 | |
251 | /* The set of memory references stored in each loop, including subloops . */ |
252 | vec<bitmap_head> all_refs_stored_in_loop; |
253 | |
254 | /* Cache for expanding memory addresses. */ |
255 | hash_map<tree, name_expansion *> *ttae_cache; |
256 | } memory_accesses; |
257 | |
258 | /* Obstack for the bitmaps in the above data structures. */ |
259 | static bitmap_obstack lim_bitmap_obstack; |
260 | static obstack mem_ref_obstack; |
261 | |
262 | static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind); |
263 | static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool); |
264 | static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true); |
265 | |
266 | /* Minimum cost of an expensive expression. */ |
267 | #define LIM_EXPENSIVE ((unsigned) param_lim_expensive) |
268 | |
269 | /* The outermost loop for which execution of the header guarantees that the |
270 | block will be executed. */ |
271 | #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux) |
272 | #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) |
273 | |
274 | /* ID of the shared unanalyzable mem. */ |
275 | #define UNANALYZABLE_MEM_ID 0 |
276 | |
277 | /* Whether the reference was analyzable. */ |
278 | #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID) |
279 | |
280 | static struct lim_aux_data * |
281 | init_lim_data (gimple *stmt) |
282 | { |
283 | lim_aux_data *p = XCNEW (struct lim_aux_data); |
284 | lim_aux_data_map->put (k: stmt, v: p); |
285 | |
286 | return p; |
287 | } |
288 | |
289 | static struct lim_aux_data * |
290 | get_lim_data (gimple *stmt) |
291 | { |
292 | lim_aux_data **p = lim_aux_data_map->get (k: stmt); |
293 | if (!p) |
294 | return NULL; |
295 | |
296 | return *p; |
297 | } |
298 | |
299 | /* Releases the memory occupied by DATA. */ |
300 | |
301 | static void |
302 | free_lim_aux_data (struct lim_aux_data *data) |
303 | { |
304 | data->depends.release (); |
305 | free (ptr: data); |
306 | } |
307 | |
308 | static void |
309 | clear_lim_data (gimple *stmt) |
310 | { |
311 | lim_aux_data **p = lim_aux_data_map->get (k: stmt); |
312 | if (!p) |
313 | return; |
314 | |
315 | free_lim_aux_data (data: *p); |
316 | *p = NULL; |
317 | } |
318 | |
319 | |
320 | /* The possibilities of statement movement. */ |
321 | enum move_pos |
322 | { |
323 | MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ |
324 | MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement |
325 | become executed -- memory accesses, ... */ |
326 | MOVE_POSSIBLE /* Unlimited movement. */ |
327 | }; |
328 | |
329 | |
330 | /* If it is possible to hoist the statement STMT unconditionally, |
331 | returns MOVE_POSSIBLE. |
332 | If it is possible to hoist the statement STMT, but we must avoid making |
333 | it executed if it would not be executed in the original program (e.g. |
334 | because it may trap), return MOVE_PRESERVE_EXECUTION. |
335 | Otherwise return MOVE_IMPOSSIBLE. */ |
336 | |
337 | static enum move_pos |
338 | movement_possibility_1 (gimple *stmt) |
339 | { |
340 | tree lhs; |
341 | enum move_pos ret = MOVE_POSSIBLE; |
342 | |
343 | if (flag_unswitch_loops |
344 | && gimple_code (g: stmt) == GIMPLE_COND) |
345 | { |
346 | /* If we perform unswitching, force the operands of the invariant |
347 | condition to be moved out of the loop. */ |
348 | return MOVE_POSSIBLE; |
349 | } |
350 | |
351 | if (gimple_code (g: stmt) == GIMPLE_PHI |
352 | && gimple_phi_num_args (gs: stmt) <= 2 |
353 | && !virtual_operand_p (op: gimple_phi_result (gs: stmt)) |
354 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt))) |
355 | return MOVE_POSSIBLE; |
356 | |
357 | if (gimple_get_lhs (stmt) == NULL_TREE) |
358 | return MOVE_IMPOSSIBLE; |
359 | |
360 | if (gimple_vdef (g: stmt)) |
361 | return MOVE_IMPOSSIBLE; |
362 | |
363 | if (stmt_ends_bb_p (stmt) |
364 | || gimple_has_volatile_ops (stmt) |
365 | || gimple_has_side_effects (stmt) |
366 | || stmt_could_throw_p (cfun, stmt)) |
367 | return MOVE_IMPOSSIBLE; |
368 | |
369 | if (is_gimple_call (gs: stmt)) |
370 | { |
371 | /* While pure or const call is guaranteed to have no side effects, we |
372 | cannot move it arbitrarily. Consider code like |
373 | |
374 | char *s = something (); |
375 | |
376 | while (1) |
377 | { |
378 | if (s) |
379 | t = strlen (s); |
380 | else |
381 | t = 0; |
382 | } |
383 | |
384 | Here the strlen call cannot be moved out of the loop, even though |
385 | s is invariant. In addition to possibly creating a call with |
386 | invalid arguments, moving out a function call that is not executed |
387 | may cause performance regressions in case the call is costly and |
388 | not executed at all. */ |
389 | ret = MOVE_PRESERVE_EXECUTION; |
390 | lhs = gimple_call_lhs (gs: stmt); |
391 | } |
392 | else if (is_gimple_assign (gs: stmt)) |
393 | lhs = gimple_assign_lhs (gs: stmt); |
394 | else |
395 | return MOVE_IMPOSSIBLE; |
396 | |
397 | if (TREE_CODE (lhs) == SSA_NAME |
398 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) |
399 | return MOVE_IMPOSSIBLE; |
400 | |
401 | if (TREE_CODE (lhs) != SSA_NAME |
402 | || gimple_could_trap_p (stmt)) |
403 | return MOVE_PRESERVE_EXECUTION; |
404 | |
405 | if (is_gimple_assign (gs: stmt)) |
406 | { |
407 | auto code = gimple_assign_rhs_code (gs: stmt); |
408 | tree type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
409 | /* For shifts and rotates and possibly out-of-bound shift operands |
410 | we currently cannot rewrite them into something unconditionally |
411 | well-defined. */ |
412 | if ((code == LSHIFT_EXPR |
413 | || code == RSHIFT_EXPR |
414 | || code == LROTATE_EXPR |
415 | || code == RROTATE_EXPR) |
416 | && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST |
417 | /* We cannot use ranges at 'stmt' here. */ |
418 | || wi::ltu_p (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)), |
419 | y: element_precision (type)))) |
420 | ret = MOVE_PRESERVE_EXECUTION; |
421 | } |
422 | |
423 | /* Non local loads in a transaction cannot be hoisted out. Well, |
424 | unless the load happens on every path out of the loop, but we |
425 | don't take this into account yet. */ |
426 | if (flag_tm |
427 | && gimple_in_transaction (stmt) |
428 | && gimple_assign_single_p (gs: stmt)) |
429 | { |
430 | tree rhs = gimple_assign_rhs1 (gs: stmt); |
431 | if (DECL_P (rhs) && is_global_var (t: rhs)) |
432 | { |
433 | if (dump_file) |
434 | { |
435 | fprintf (stream: dump_file, format: "Cannot hoist conditional load of " ); |
436 | print_generic_expr (dump_file, rhs, TDF_SLIM); |
437 | fprintf (stream: dump_file, format: " because it is in a transaction.\n" ); |
438 | } |
439 | return MOVE_IMPOSSIBLE; |
440 | } |
441 | } |
442 | |
443 | return ret; |
444 | } |
445 | |
446 | static enum move_pos |
447 | movement_possibility (gimple *stmt) |
448 | { |
449 | enum move_pos pos = movement_possibility_1 (stmt); |
450 | if (pos == MOVE_POSSIBLE) |
451 | { |
452 | use_operand_p use_p; |
453 | ssa_op_iter ssa_iter; |
454 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE) |
455 | if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME |
456 | && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p))) |
457 | return MOVE_PRESERVE_EXECUTION; |
458 | } |
459 | return pos; |
460 | } |
461 | |
462 | |
463 | /* Compare the profile count inequality of bb and loop's preheader, it is |
464 | three-state as stated in profile-count.h, FALSE is returned if inequality |
465 | cannot be decided. */ |
466 | bool |
467 | (basic_block bb, class loop *loop) |
468 | { |
469 | gcc_assert (bb && loop); |
470 | return bb->count < loop_preheader_edge (loop)->src->count; |
471 | } |
472 | |
473 | /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile |
474 | count. |
475 | It does three steps check: |
476 | 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just |
477 | return NULL which means it should not be moved out at all; |
478 | 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of |
479 | OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP; |
480 | 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a |
481 | hotter loop between OUTERMOST_LOOP and loop in pre-computed |
482 | HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return |
483 | OUTERMOST_LOOP. |
484 | At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as |
485 | the hoist target. */ |
486 | |
487 | static class loop * |
488 | get_coldest_out_loop (class loop *outermost_loop, class loop *loop, |
489 | basic_block curr_bb) |
490 | { |
491 | gcc_assert (outermost_loop == loop |
492 | || flow_loop_nested_p (outermost_loop, loop)); |
493 | |
494 | /* If bb_colder_than_loop_preheader returns false due to three-state |
495 | comparision, OUTERMOST_LOOP is returned finally to preserve the behavior. |
496 | Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */ |
497 | if (curr_bb && bb_colder_than_loop_preheader (bb: curr_bb, loop)) |
498 | return NULL; |
499 | |
500 | class loop *coldest_loop = coldest_outermost_loop[loop->num]; |
501 | if (loop_depth (loop: coldest_loop) < loop_depth (loop: outermost_loop)) |
502 | { |
503 | class loop *hotter_loop = hotter_than_inner_loop[loop->num]; |
504 | if (!hotter_loop |
505 | || loop_depth (loop: hotter_loop) < loop_depth (loop: outermost_loop)) |
506 | return outermost_loop; |
507 | |
508 | /* hotter_loop is between OUTERMOST_LOOP and LOOP like: |
509 | [loop tree root, ..., coldest_loop, ..., outermost_loop, ..., |
510 | hotter_loop, second_coldest_loop, ..., loop] |
511 | return second_coldest_loop to be the hoist target. */ |
512 | class loop *aloop; |
513 | for (aloop = hotter_loop->inner; aloop; aloop = aloop->next) |
514 | if (aloop == loop || flow_loop_nested_p (aloop, loop)) |
515 | return aloop; |
516 | } |
517 | return coldest_loop; |
518 | } |
519 | |
520 | /* Suppose that operand DEF is used inside the LOOP. Returns the outermost |
521 | loop to that we could move the expression using DEF if it did not have |
522 | other operands, i.e. the outermost loop enclosing LOOP in that the value |
523 | of DEF is invariant. */ |
524 | |
525 | static class loop * |
526 | outermost_invariant_loop (tree def, class loop *loop) |
527 | { |
528 | gimple *def_stmt; |
529 | basic_block def_bb; |
530 | class loop *max_loop; |
531 | struct lim_aux_data *lim_data; |
532 | |
533 | if (!def) |
534 | return superloop_at_depth (loop, 1); |
535 | |
536 | if (TREE_CODE (def) != SSA_NAME) |
537 | { |
538 | gcc_assert (is_gimple_min_invariant (def)); |
539 | return superloop_at_depth (loop, 1); |
540 | } |
541 | |
542 | def_stmt = SSA_NAME_DEF_STMT (def); |
543 | def_bb = gimple_bb (g: def_stmt); |
544 | if (!def_bb) |
545 | return superloop_at_depth (loop, 1); |
546 | |
547 | max_loop = find_common_loop (loop, def_bb->loop_father); |
548 | |
549 | lim_data = get_lim_data (stmt: def_stmt); |
550 | if (lim_data != NULL && lim_data->max_loop != NULL) |
551 | max_loop = find_common_loop (max_loop, |
552 | loop_outer (loop: lim_data->max_loop)); |
553 | if (max_loop == loop) |
554 | return NULL; |
555 | max_loop = superloop_at_depth (loop, loop_depth (loop: max_loop) + 1); |
556 | |
557 | return max_loop; |
558 | } |
559 | |
560 | /* DATA is a structure containing information associated with a statement |
561 | inside LOOP. DEF is one of the operands of this statement. |
562 | |
563 | Find the outermost loop enclosing LOOP in that value of DEF is invariant |
564 | and record this in DATA->max_loop field. If DEF itself is defined inside |
565 | this loop as well (i.e. we need to hoist it out of the loop if we want |
566 | to hoist the statement represented by DATA), record the statement in that |
567 | DEF is defined to the DATA->depends list. Additionally if ADD_COST is true, |
568 | add the cost of the computation of DEF to the DATA->cost. |
569 | |
570 | If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */ |
571 | |
572 | static bool |
573 | add_dependency (tree def, struct lim_aux_data *data, class loop *loop, |
574 | bool add_cost) |
575 | { |
576 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |
577 | basic_block def_bb = gimple_bb (g: def_stmt); |
578 | class loop *max_loop; |
579 | struct lim_aux_data *def_data; |
580 | |
581 | if (!def_bb) |
582 | return true; |
583 | |
584 | max_loop = outermost_invariant_loop (def, loop); |
585 | if (!max_loop) |
586 | return false; |
587 | |
588 | if (flow_loop_nested_p (data->max_loop, max_loop)) |
589 | data->max_loop = max_loop; |
590 | |
591 | def_data = get_lim_data (stmt: def_stmt); |
592 | if (!def_data) |
593 | return true; |
594 | |
595 | if (add_cost |
596 | /* Only add the cost if the statement defining DEF is inside LOOP, |
597 | i.e. if it is likely that by moving the invariants dependent |
598 | on it, we will be able to avoid creating a new register for |
599 | it (since it will be only used in these dependent invariants). */ |
600 | && def_bb->loop_father == loop) |
601 | data->cost += def_data->cost; |
602 | |
603 | data->depends.safe_push (obj: def_stmt); |
604 | |
605 | return true; |
606 | } |
607 | |
608 | /* Returns an estimate for a cost of statement STMT. The values here |
609 | are just ad-hoc constants, similar to costs for inlining. */ |
610 | |
611 | static unsigned |
612 | stmt_cost (gimple *stmt) |
613 | { |
614 | /* Always try to create possibilities for unswitching. */ |
615 | if (gimple_code (g: stmt) == GIMPLE_COND |
616 | || gimple_code (g: stmt) == GIMPLE_PHI) |
617 | return LIM_EXPENSIVE; |
618 | |
619 | /* We should be hoisting calls if possible. */ |
620 | if (is_gimple_call (gs: stmt)) |
621 | { |
622 | tree fndecl; |
623 | |
624 | /* Unless the call is a builtin_constant_p; this always folds to a |
625 | constant, so moving it is useless. */ |
626 | fndecl = gimple_call_fndecl (gs: stmt); |
627 | if (fndecl && fndecl_built_in_p (node: fndecl, name1: BUILT_IN_CONSTANT_P)) |
628 | return 0; |
629 | |
630 | return LIM_EXPENSIVE; |
631 | } |
632 | |
633 | /* Hoisting memory references out should almost surely be a win. */ |
634 | if (gimple_references_memory_p (stmt)) |
635 | return LIM_EXPENSIVE; |
636 | |
637 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
638 | return 1; |
639 | |
640 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
641 | switch (code) |
642 | { |
643 | case MULT_EXPR: |
644 | case WIDEN_MULT_EXPR: |
645 | case WIDEN_MULT_PLUS_EXPR: |
646 | case WIDEN_MULT_MINUS_EXPR: |
647 | case DOT_PROD_EXPR: |
648 | case TRUNC_DIV_EXPR: |
649 | case CEIL_DIV_EXPR: |
650 | case FLOOR_DIV_EXPR: |
651 | case ROUND_DIV_EXPR: |
652 | case EXACT_DIV_EXPR: |
653 | case CEIL_MOD_EXPR: |
654 | case FLOOR_MOD_EXPR: |
655 | case ROUND_MOD_EXPR: |
656 | case TRUNC_MOD_EXPR: |
657 | case RDIV_EXPR: |
658 | /* Division and multiplication are usually expensive. */ |
659 | return LIM_EXPENSIVE; |
660 | |
661 | case LSHIFT_EXPR: |
662 | case RSHIFT_EXPR: |
663 | case WIDEN_LSHIFT_EXPR: |
664 | case LROTATE_EXPR: |
665 | case RROTATE_EXPR: |
666 | /* Shifts and rotates are usually expensive. */ |
667 | return LIM_EXPENSIVE; |
668 | |
669 | case COND_EXPR: |
670 | case VEC_COND_EXPR: |
671 | /* Conditionals are expensive. */ |
672 | return LIM_EXPENSIVE; |
673 | |
674 | case CONSTRUCTOR: |
675 | /* Make vector construction cost proportional to the number |
676 | of elements. */ |
677 | return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)); |
678 | |
679 | case SSA_NAME: |
680 | case PAREN_EXPR: |
681 | /* Whether or not something is wrapped inside a PAREN_EXPR |
682 | should not change move cost. Nor should an intermediate |
683 | unpropagated SSA name copy. */ |
684 | return 0; |
685 | |
686 | default: |
687 | /* Comparisons are usually expensive. */ |
688 | if (TREE_CODE_CLASS (code) == tcc_comparison) |
689 | return LIM_EXPENSIVE; |
690 | return 1; |
691 | } |
692 | } |
693 | |
694 | /* Finds the outermost loop between OUTER and LOOP in that the memory reference |
695 | REF is independent. If REF is not independent in LOOP, NULL is returned |
696 | instead. */ |
697 | |
698 | static class loop * |
699 | outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref) |
700 | { |
701 | class loop *aloop; |
702 | |
703 | if (ref->stored && bitmap_bit_p (ref->stored, loop->num)) |
704 | return NULL; |
705 | |
706 | for (aloop = outer; |
707 | aloop != loop; |
708 | aloop = superloop_at_depth (loop, loop_depth (loop: aloop) + 1)) |
709 | if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num)) |
710 | && ref_indep_loop_p (aloop, ref, lim_raw)) |
711 | return aloop; |
712 | |
713 | if (ref_indep_loop_p (loop, ref, lim_raw)) |
714 | return loop; |
715 | else |
716 | return NULL; |
717 | } |
718 | |
719 | /* If there is a simple load or store to a memory reference in STMT, returns |
720 | the location of the memory reference, and sets IS_STORE according to whether |
721 | it is a store or load. Otherwise, returns NULL. */ |
722 | |
723 | static tree * |
724 | simple_mem_ref_in_stmt (gimple *stmt, bool *is_store) |
725 | { |
726 | tree *lhs, *rhs; |
727 | |
728 | /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */ |
729 | if (!gimple_assign_single_p (gs: stmt)) |
730 | return NULL; |
731 | |
732 | lhs = gimple_assign_lhs_ptr (gs: stmt); |
733 | rhs = gimple_assign_rhs1_ptr (gs: stmt); |
734 | |
735 | if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (g: stmt)) |
736 | { |
737 | *is_store = false; |
738 | return rhs; |
739 | } |
740 | else if (gimple_vdef (g: stmt) |
741 | && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs))) |
742 | { |
743 | *is_store = true; |
744 | return lhs; |
745 | } |
746 | else |
747 | return NULL; |
748 | } |
749 | |
750 | /* From a controlling predicate in DOM determine the arguments from |
751 | the PHI node PHI that are chosen if the predicate evaluates to |
752 | true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if |
753 | they are non-NULL. Returns true if the arguments can be determined, |
754 | else return false. */ |
755 | |
756 | static bool |
757 | (basic_block dom, gphi *phi, |
758 | tree *true_arg_p, tree *false_arg_p) |
759 | { |
760 | edge te, fe; |
761 | if (! extract_true_false_controlled_edges (dom, gimple_bb (g: phi), |
762 | &te, &fe)) |
763 | return false; |
764 | |
765 | if (true_arg_p) |
766 | *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx); |
767 | if (false_arg_p) |
768 | *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx); |
769 | |
770 | return true; |
771 | } |
772 | |
773 | /* Determine the outermost loop to that it is possible to hoist a statement |
774 | STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine |
775 | the outermost loop in that the value computed by STMT is invariant. |
776 | If MUST_PRESERVE_EXEC is true, additionally choose such a loop that |
777 | we preserve the fact whether STMT is executed. It also fills other related |
778 | information to LIM_DATA (STMT). |
779 | |
780 | The function returns false if STMT cannot be hoisted outside of the loop it |
781 | is defined in, and true otherwise. */ |
782 | |
783 | static bool |
784 | determine_max_movement (gimple *stmt, bool must_preserve_exec) |
785 | { |
786 | basic_block bb = gimple_bb (g: stmt); |
787 | class loop *loop = bb->loop_father; |
788 | class loop *level; |
789 | struct lim_aux_data *lim_data = get_lim_data (stmt); |
790 | tree val; |
791 | ssa_op_iter iter; |
792 | |
793 | if (must_preserve_exec) |
794 | level = ALWAYS_EXECUTED_IN (bb); |
795 | else |
796 | level = superloop_at_depth (loop, 1); |
797 | lim_data->max_loop = get_coldest_out_loop (outermost_loop: level, loop, curr_bb: bb); |
798 | if (!lim_data->max_loop) |
799 | return false; |
800 | |
801 | if (gphi *phi = dyn_cast <gphi *> (p: stmt)) |
802 | { |
803 | use_operand_p use_p; |
804 | unsigned min_cost = UINT_MAX; |
805 | unsigned total_cost = 0; |
806 | struct lim_aux_data *def_data; |
807 | |
808 | /* We will end up promoting dependencies to be unconditionally |
809 | evaluated. For this reason the PHI cost (and thus the |
810 | cost we remove from the loop by doing the invariant motion) |
811 | is that of the cheapest PHI argument dependency chain. */ |
812 | FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) |
813 | { |
814 | val = USE_FROM_PTR (use_p); |
815 | |
816 | if (TREE_CODE (val) != SSA_NAME) |
817 | { |
818 | /* Assign const 1 to constants. */ |
819 | min_cost = MIN (min_cost, 1); |
820 | total_cost += 1; |
821 | continue; |
822 | } |
823 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: false)) |
824 | return false; |
825 | |
826 | gimple *def_stmt = SSA_NAME_DEF_STMT (val); |
827 | if (gimple_bb (g: def_stmt) |
828 | && gimple_bb (g: def_stmt)->loop_father == loop) |
829 | { |
830 | def_data = get_lim_data (stmt: def_stmt); |
831 | if (def_data) |
832 | { |
833 | min_cost = MIN (min_cost, def_data->cost); |
834 | total_cost += def_data->cost; |
835 | } |
836 | } |
837 | } |
838 | |
839 | min_cost = MIN (min_cost, total_cost); |
840 | lim_data->cost += min_cost; |
841 | |
842 | if (gimple_phi_num_args (gs: phi) > 1) |
843 | { |
844 | basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); |
845 | gimple *cond; |
846 | if (gsi_end_p (i: gsi_last_bb (bb: dom))) |
847 | return false; |
848 | cond = gsi_stmt (i: gsi_last_bb (bb: dom)); |
849 | if (gimple_code (g: cond) != GIMPLE_COND) |
850 | return false; |
851 | /* Verify that this is an extended form of a diamond and |
852 | the PHI arguments are completely controlled by the |
853 | predicate in DOM. */ |
854 | if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL)) |
855 | return false; |
856 | |
857 | /* Check if one of the depedent statement is a vector compare whether |
858 | the target supports it, otherwise it's invalid to hoist it out of |
859 | the gcond it belonged to. */ |
860 | if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond)))) |
861 | { |
862 | tree type = TREE_TYPE (gimple_cond_lhs (cond)); |
863 | auto code = gimple_cond_code (gs: cond); |
864 | if (!target_supports_op_p (type, code, optab_vector)) |
865 | return false; |
866 | } |
867 | |
868 | /* Fold in dependencies and cost of the condition. */ |
869 | FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE) |
870 | { |
871 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: false)) |
872 | return false; |
873 | def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); |
874 | if (def_data) |
875 | lim_data->cost += def_data->cost; |
876 | } |
877 | |
878 | /* We want to avoid unconditionally executing very expensive |
879 | operations. As costs for our dependencies cannot be |
880 | negative just claim we are not invariand for this case. |
881 | We also are not sure whether the control-flow inside the |
882 | loop will vanish. */ |
883 | if (total_cost - min_cost >= 2 * LIM_EXPENSIVE |
884 | && !(min_cost != 0 |
885 | && total_cost / min_cost <= 2)) |
886 | return false; |
887 | |
888 | /* Assume that the control-flow in the loop will vanish. |
889 | ??? We should verify this and not artificially increase |
890 | the cost if that is not the case. */ |
891 | lim_data->cost += stmt_cost (stmt); |
892 | } |
893 | |
894 | return true; |
895 | } |
896 | |
897 | /* A stmt that receives abnormal edges cannot be hoisted. */ |
898 | if (is_a <gcall *> (p: stmt) |
899 | && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE)) |
900 | return false; |
901 | |
902 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) |
903 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: true)) |
904 | return false; |
905 | |
906 | if (gimple_vuse (g: stmt)) |
907 | { |
908 | im_mem_ref *ref |
909 | = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL; |
910 | if (ref |
911 | && MEM_ANALYZABLE (ref)) |
912 | { |
913 | lim_data->max_loop = outermost_indep_loop (outer: lim_data->max_loop, |
914 | loop, ref); |
915 | if (!lim_data->max_loop) |
916 | return false; |
917 | } |
918 | else if (! add_dependency (def: gimple_vuse (g: stmt), data: lim_data, loop, add_cost: false)) |
919 | return false; |
920 | } |
921 | |
922 | lim_data->cost += stmt_cost (stmt); |
923 | |
924 | return true; |
925 | } |
926 | |
927 | /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL, |
928 | and that one of the operands of this statement is computed by STMT. |
929 | Ensure that STMT (together with all the statements that define its |
930 | operands) is hoisted at least out of the loop LEVEL. */ |
931 | |
932 | static void |
933 | set_level (gimple *stmt, class loop *orig_loop, class loop *level) |
934 | { |
935 | class loop *stmt_loop = gimple_bb (g: stmt)->loop_father; |
936 | struct lim_aux_data *lim_data; |
937 | gimple *dep_stmt; |
938 | unsigned i; |
939 | |
940 | stmt_loop = find_common_loop (orig_loop, stmt_loop); |
941 | lim_data = get_lim_data (stmt); |
942 | if (lim_data != NULL && lim_data->tgt_loop != NULL) |
943 | stmt_loop = find_common_loop (stmt_loop, |
944 | loop_outer (loop: lim_data->tgt_loop)); |
945 | if (flow_loop_nested_p (stmt_loop, level)) |
946 | return; |
947 | |
948 | gcc_assert (level == lim_data->max_loop |
949 | || flow_loop_nested_p (lim_data->max_loop, level)); |
950 | |
951 | lim_data->tgt_loop = level; |
952 | FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt) |
953 | set_level (stmt: dep_stmt, orig_loop, level); |
954 | } |
955 | |
956 | /* Determines an outermost loop from that we want to hoist the statement STMT. |
957 | For now we chose the outermost possible loop. TODO -- use profiling |
958 | information to set it more sanely. */ |
959 | |
960 | static void |
961 | set_profitable_level (gimple *stmt) |
962 | { |
963 | set_level (stmt, orig_loop: gimple_bb (g: stmt)->loop_father, level: get_lim_data (stmt)->max_loop); |
964 | } |
965 | |
966 | /* Returns true if STMT is a call that has side effects. */ |
967 | |
968 | static bool |
969 | nonpure_call_p (gimple *stmt) |
970 | { |
971 | if (gimple_code (g: stmt) != GIMPLE_CALL) |
972 | return false; |
973 | |
974 | return gimple_has_side_effects (stmt); |
975 | } |
976 | |
977 | /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */ |
978 | |
979 | static gimple * |
980 | rewrite_reciprocal (gimple_stmt_iterator *bsi) |
981 | { |
982 | gassign *stmt, *stmt1, *stmt2; |
983 | tree name, lhs, type; |
984 | tree real_one; |
985 | gimple_stmt_iterator gsi; |
986 | |
987 | stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi)); |
988 | lhs = gimple_assign_lhs (gs: stmt); |
989 | type = TREE_TYPE (lhs); |
990 | |
991 | real_one = build_one_cst (type); |
992 | |
993 | name = make_temp_ssa_name (type, NULL, name: "reciptmp" ); |
994 | stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one, |
995 | gimple_assign_rhs2 (gs: stmt)); |
996 | stmt2 = gimple_build_assign (lhs, MULT_EXPR, name, |
997 | gimple_assign_rhs1 (gs: stmt)); |
998 | |
999 | /* Replace division stmt with reciprocal and multiply stmts. |
1000 | The multiply stmt is not invariant, so update iterator |
1001 | and avoid rescanning. */ |
1002 | gsi = *bsi; |
1003 | gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); |
1004 | gsi_replace (&gsi, stmt2, true); |
1005 | |
1006 | /* Continue processing with invariant reciprocal statement. */ |
1007 | return stmt1; |
1008 | } |
1009 | |
1010 | /* Check if the pattern at *BSI is a bittest of the form |
1011 | (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */ |
1012 | |
1013 | static gimple * |
1014 | rewrite_bittest (gimple_stmt_iterator *bsi) |
1015 | { |
1016 | gassign *stmt; |
1017 | gimple *stmt1; |
1018 | gassign *stmt2; |
1019 | gimple *use_stmt; |
1020 | gcond *cond_stmt; |
1021 | tree lhs, name, t, a, b; |
1022 | use_operand_p use; |
1023 | |
1024 | stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi)); |
1025 | lhs = gimple_assign_lhs (gs: stmt); |
1026 | |
1027 | /* Verify that the single use of lhs is a comparison against zero. */ |
1028 | if (TREE_CODE (lhs) != SSA_NAME |
1029 | || !single_imm_use (var: lhs, use_p: &use, stmt: &use_stmt)) |
1030 | return stmt; |
1031 | cond_stmt = dyn_cast <gcond *> (p: use_stmt); |
1032 | if (!cond_stmt) |
1033 | return stmt; |
1034 | if (gimple_cond_lhs (gs: cond_stmt) != lhs |
1035 | || (gimple_cond_code (gs: cond_stmt) != NE_EXPR |
1036 | && gimple_cond_code (gs: cond_stmt) != EQ_EXPR) |
1037 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt))) |
1038 | return stmt; |
1039 | |
1040 | /* Get at the operands of the shift. The rhs is TMP1 & 1. */ |
1041 | stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
1042 | if (gimple_code (g: stmt1) != GIMPLE_ASSIGN) |
1043 | return stmt; |
1044 | |
1045 | /* There is a conversion in between possibly inserted by fold. */ |
1046 | if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1))) |
1047 | { |
1048 | t = gimple_assign_rhs1 (gs: stmt1); |
1049 | if (TREE_CODE (t) != SSA_NAME |
1050 | || !has_single_use (var: t)) |
1051 | return stmt; |
1052 | stmt1 = SSA_NAME_DEF_STMT (t); |
1053 | if (gimple_code (g: stmt1) != GIMPLE_ASSIGN) |
1054 | return stmt; |
1055 | } |
1056 | |
1057 | /* Verify that B is loop invariant but A is not. Verify that with |
1058 | all the stmt walking we are still in the same loop. */ |
1059 | if (gimple_assign_rhs_code (gs: stmt1) != RSHIFT_EXPR |
1060 | || loop_containing_stmt (stmt: stmt1) != loop_containing_stmt (stmt)) |
1061 | return stmt; |
1062 | |
1063 | a = gimple_assign_rhs1 (gs: stmt1); |
1064 | b = gimple_assign_rhs2 (gs: stmt1); |
1065 | |
1066 | if (outermost_invariant_loop (def: b, loop: loop_containing_stmt (stmt: stmt1)) != NULL |
1067 | && outermost_invariant_loop (def: a, loop: loop_containing_stmt (stmt: stmt1)) == NULL) |
1068 | { |
1069 | gimple_stmt_iterator rsi; |
1070 | |
1071 | /* 1 << B */ |
1072 | t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a), |
1073 | build_int_cst (TREE_TYPE (a), 1), b); |
1074 | name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp" ); |
1075 | stmt1 = gimple_build_assign (name, t); |
1076 | |
1077 | /* A & (1 << B) */ |
1078 | t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name); |
1079 | name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp" ); |
1080 | stmt2 = gimple_build_assign (name, t); |
1081 | |
1082 | /* Replace the SSA_NAME we compare against zero. Adjust |
1083 | the type of zero accordingly. */ |
1084 | SET_USE (use, name); |
1085 | gimple_cond_set_rhs (gs: cond_stmt, |
1086 | rhs: build_int_cst_type (TREE_TYPE (name), |
1087 | 0)); |
1088 | |
1089 | /* Don't use gsi_replace here, none of the new assignments sets |
1090 | the variable originally set in stmt. Move bsi to stmt1, and |
1091 | then remove the original stmt, so that we get a chance to |
1092 | retain debug info for it. */ |
1093 | rsi = *bsi; |
1094 | gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); |
1095 | gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT); |
1096 | gimple *to_release = gsi_stmt (i: rsi); |
1097 | gsi_remove (&rsi, true); |
1098 | release_defs (to_release); |
1099 | |
1100 | return stmt1; |
1101 | } |
1102 | |
1103 | return stmt; |
1104 | } |
1105 | |
1106 | /* Determine the outermost loops in that statements in basic block BB are |
1107 | invariant, and record them to the LIM_DATA associated with the |
1108 | statements. */ |
1109 | |
1110 | static void |
1111 | compute_invariantness (basic_block bb) |
1112 | { |
1113 | enum move_pos pos; |
1114 | gimple_stmt_iterator bsi; |
1115 | gimple *stmt; |
1116 | bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; |
1117 | class loop *outermost = ALWAYS_EXECUTED_IN (bb); |
1118 | struct lim_aux_data *lim_data; |
1119 | |
1120 | if (!loop_outer (loop: bb->loop_father)) |
1121 | return; |
1122 | |
1123 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1124 | fprintf (stream: dump_file, format: "Basic block %d (loop %d -- depth %d):\n\n" , |
1125 | bb->index, bb->loop_father->num, loop_depth (loop: bb->loop_father)); |
1126 | |
1127 | /* Look at PHI nodes, but only if there is at most two. |
1128 | ??? We could relax this further by post-processing the inserted |
1129 | code and transforming adjacent cond-exprs with the same predicate |
1130 | to control flow again. */ |
1131 | bsi = gsi_start_phis (bb); |
1132 | if (!gsi_end_p (i: bsi) |
1133 | && ((gsi_next (i: &bsi), gsi_end_p (i: bsi)) |
1134 | || (gsi_next (i: &bsi), gsi_end_p (i: bsi)))) |
1135 | for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1136 | { |
1137 | stmt = gsi_stmt (i: bsi); |
1138 | |
1139 | pos = movement_possibility (stmt); |
1140 | if (pos == MOVE_IMPOSSIBLE) |
1141 | continue; |
1142 | |
1143 | lim_data = get_lim_data (stmt); |
1144 | if (! lim_data) |
1145 | lim_data = init_lim_data (stmt); |
1146 | lim_data->always_executed_in = outermost; |
1147 | |
1148 | if (!determine_max_movement (stmt, must_preserve_exec: false)) |
1149 | { |
1150 | lim_data->max_loop = NULL; |
1151 | continue; |
1152 | } |
1153 | |
1154 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1155 | { |
1156 | print_gimple_stmt (dump_file, stmt, 2); |
1157 | fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n" , |
1158 | loop_depth (loop: lim_data->max_loop), |
1159 | lim_data->cost); |
1160 | } |
1161 | |
1162 | if (lim_data->cost >= LIM_EXPENSIVE) |
1163 | set_profitable_level (stmt); |
1164 | } |
1165 | |
1166 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1167 | { |
1168 | stmt = gsi_stmt (i: bsi); |
1169 | |
1170 | pos = movement_possibility (stmt); |
1171 | if (pos == MOVE_IMPOSSIBLE) |
1172 | { |
1173 | if (nonpure_call_p (stmt)) |
1174 | { |
1175 | maybe_never = true; |
1176 | outermost = NULL; |
1177 | } |
1178 | /* Make sure to note always_executed_in for stores to make |
1179 | store-motion work. */ |
1180 | else if (stmt_makes_single_store (stmt)) |
1181 | { |
1182 | struct lim_aux_data *lim_data = get_lim_data (stmt); |
1183 | if (! lim_data) |
1184 | lim_data = init_lim_data (stmt); |
1185 | lim_data->always_executed_in = outermost; |
1186 | } |
1187 | continue; |
1188 | } |
1189 | |
1190 | if (is_gimple_assign (gs: stmt) |
1191 | && (get_gimple_rhs_class (code: gimple_assign_rhs_code (gs: stmt)) |
1192 | == GIMPLE_BINARY_RHS)) |
1193 | { |
1194 | tree op0 = gimple_assign_rhs1 (gs: stmt); |
1195 | tree op1 = gimple_assign_rhs2 (gs: stmt); |
1196 | class loop *ol1 = outermost_invariant_loop (def: op1, |
1197 | loop: loop_containing_stmt (stmt)); |
1198 | |
1199 | /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal |
1200 | to be hoisted out of loop, saving expensive divide. */ |
1201 | if (pos == MOVE_POSSIBLE |
1202 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR |
1203 | && flag_unsafe_math_optimizations |
1204 | && !flag_trapping_math |
1205 | && ol1 != NULL |
1206 | && outermost_invariant_loop (def: op0, loop: ol1) == NULL) |
1207 | stmt = rewrite_reciprocal (bsi: &bsi); |
1208 | |
1209 | /* If the shift count is invariant, convert (A >> B) & 1 to |
1210 | A & (1 << B) allowing the bit mask to be hoisted out of the loop |
1211 | saving an expensive shift. */ |
1212 | if (pos == MOVE_POSSIBLE |
1213 | && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR |
1214 | && integer_onep (op1) |
1215 | && TREE_CODE (op0) == SSA_NAME |
1216 | && has_single_use (var: op0)) |
1217 | stmt = rewrite_bittest (bsi: &bsi); |
1218 | } |
1219 | |
1220 | lim_data = get_lim_data (stmt); |
1221 | if (! lim_data) |
1222 | lim_data = init_lim_data (stmt); |
1223 | lim_data->always_executed_in = outermost; |
1224 | |
1225 | if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) |
1226 | continue; |
1227 | |
1228 | if (!determine_max_movement (stmt, must_preserve_exec: pos == MOVE_PRESERVE_EXECUTION)) |
1229 | { |
1230 | lim_data->max_loop = NULL; |
1231 | continue; |
1232 | } |
1233 | |
1234 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1235 | { |
1236 | print_gimple_stmt (dump_file, stmt, 2); |
1237 | fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n" , |
1238 | loop_depth (loop: lim_data->max_loop), |
1239 | lim_data->cost); |
1240 | } |
1241 | |
1242 | if (lim_data->cost >= LIM_EXPENSIVE) |
1243 | set_profitable_level (stmt); |
1244 | } |
1245 | } |
1246 | |
1247 | /* Hoist the statements in basic block BB out of the loops prescribed by |
1248 | data stored in LIM_DATA structures associated with each statement. Callback |
1249 | for walk_dominator_tree. */ |
1250 | |
1251 | unsigned int |
1252 | move_computations_worker (basic_block bb) |
1253 | { |
1254 | class loop *level; |
1255 | unsigned cost = 0; |
1256 | struct lim_aux_data *lim_data; |
1257 | unsigned int todo = 0; |
1258 | |
1259 | if (!loop_outer (loop: bb->loop_father)) |
1260 | return todo; |
1261 | |
1262 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); ) |
1263 | { |
1264 | gassign *new_stmt; |
1265 | gphi *stmt = bsi.phi (); |
1266 | |
1267 | lim_data = get_lim_data (stmt); |
1268 | if (lim_data == NULL) |
1269 | { |
1270 | gsi_next (i: &bsi); |
1271 | continue; |
1272 | } |
1273 | |
1274 | cost = lim_data->cost; |
1275 | level = lim_data->tgt_loop; |
1276 | clear_lim_data (stmt); |
1277 | |
1278 | if (!level) |
1279 | { |
1280 | gsi_next (i: &bsi); |
1281 | continue; |
1282 | } |
1283 | |
1284 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1285 | { |
1286 | fprintf (stream: dump_file, format: "Moving PHI node\n" ); |
1287 | print_gimple_stmt (dump_file, stmt, 0); |
1288 | fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n" , |
1289 | cost, level->num); |
1290 | } |
1291 | |
1292 | if (gimple_phi_num_args (gs: stmt) == 1) |
1293 | { |
1294 | tree arg = PHI_ARG_DEF (stmt, 0); |
1295 | new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt), |
1296 | TREE_CODE (arg), arg); |
1297 | } |
1298 | else |
1299 | { |
1300 | basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); |
1301 | gimple *cond = gsi_stmt (i: gsi_last_bb (bb: dom)); |
1302 | tree arg0 = NULL_TREE, arg1 = NULL_TREE, t; |
1303 | /* Get the PHI arguments corresponding to the true and false |
1304 | edges of COND. */ |
1305 | extract_true_false_args_from_phi (dom, phi: stmt, true_arg_p: &arg0, false_arg_p: &arg1); |
1306 | gcc_assert (arg0 && arg1); |
1307 | t = make_ssa_name (boolean_type_node); |
1308 | new_stmt = gimple_build_assign (t, gimple_cond_code (gs: cond), |
1309 | gimple_cond_lhs (gs: cond), |
1310 | gimple_cond_rhs (gs: cond)); |
1311 | gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); |
1312 | new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt), |
1313 | COND_EXPR, t, arg0, arg1); |
1314 | todo |= TODO_cleanup_cfg; |
1315 | } |
1316 | if (!ALWAYS_EXECUTED_IN (bb) |
1317 | || (ALWAYS_EXECUTED_IN (bb) != level |
1318 | && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))) |
1319 | reset_flow_sensitive_info (gimple_assign_lhs (gs: new_stmt)); |
1320 | gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); |
1321 | remove_phi_node (&bsi, false); |
1322 | } |
1323 | |
1324 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); ) |
1325 | { |
1326 | edge e; |
1327 | |
1328 | gimple *stmt = gsi_stmt (i: bsi); |
1329 | |
1330 | lim_data = get_lim_data (stmt); |
1331 | if (lim_data == NULL) |
1332 | { |
1333 | gsi_next (i: &bsi); |
1334 | continue; |
1335 | } |
1336 | |
1337 | cost = lim_data->cost; |
1338 | level = lim_data->tgt_loop; |
1339 | clear_lim_data (stmt); |
1340 | |
1341 | if (!level) |
1342 | { |
1343 | gsi_next (i: &bsi); |
1344 | continue; |
1345 | } |
1346 | |
1347 | /* We do not really want to move conditionals out of the loop; we just |
1348 | placed it here to force its operands to be moved if necessary. */ |
1349 | if (gimple_code (g: stmt) == GIMPLE_COND) |
1350 | { |
1351 | gsi_next (i: &bsi); |
1352 | continue; |
1353 | } |
1354 | |
1355 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1356 | { |
1357 | fprintf (stream: dump_file, format: "Moving statement\n" ); |
1358 | print_gimple_stmt (dump_file, stmt, 0); |
1359 | fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n" , |
1360 | cost, level->num); |
1361 | } |
1362 | |
1363 | e = loop_preheader_edge (level); |
1364 | gcc_assert (!gimple_vdef (stmt)); |
1365 | if (gimple_vuse (g: stmt)) |
1366 | { |
1367 | /* The new VUSE is the one from the virtual PHI in the loop |
1368 | header or the one already present. */ |
1369 | gphi_iterator gsi2; |
1370 | for (gsi2 = gsi_start_phis (e->dest); |
1371 | !gsi_end_p (i: gsi2); gsi_next (i: &gsi2)) |
1372 | { |
1373 | gphi *phi = gsi2.phi (); |
1374 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
1375 | { |
1376 | SET_USE (gimple_vuse_op (stmt), |
1377 | PHI_ARG_DEF_FROM_EDGE (phi, e)); |
1378 | break; |
1379 | } |
1380 | } |
1381 | } |
1382 | gsi_remove (&bsi, false); |
1383 | if (gimple_has_lhs (stmt) |
1384 | && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME |
1385 | && (!ALWAYS_EXECUTED_IN (bb) |
1386 | || !(ALWAYS_EXECUTED_IN (bb) == level |
1387 | || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) |
1388 | reset_flow_sensitive_info (gimple_get_lhs (stmt)); |
1389 | /* In case this is a stmt that is not unconditionally executed |
1390 | when the target loop header is executed and the stmt may |
1391 | invoke undefined integer or pointer overflow rewrite it to |
1392 | unsigned arithmetic. */ |
1393 | if (is_gimple_assign (gs: stmt) |
1394 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) |
1395 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) |
1396 | && arith_code_with_undefined_signed_overflow |
1397 | (gimple_assign_rhs_code (gs: stmt)) |
1398 | && (!ALWAYS_EXECUTED_IN (bb) |
1399 | || !(ALWAYS_EXECUTED_IN (bb) == level |
1400 | || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) |
1401 | gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt)); |
1402 | else |
1403 | gsi_insert_on_edge (e, stmt); |
1404 | } |
1405 | |
1406 | return todo; |
1407 | } |
1408 | |
1409 | /* Checks whether the statement defining variable *INDEX can be hoisted |
1410 | out of the loop passed in DATA. Callback for for_each_index. */ |
1411 | |
1412 | static bool |
1413 | may_move_till (tree ref, tree *index, void *data) |
1414 | { |
1415 | class loop *loop = (class loop *) data, *max_loop; |
1416 | |
1417 | /* If REF is an array reference, check also that the step and the lower |
1418 | bound is invariant in LOOP. */ |
1419 | if (TREE_CODE (ref) == ARRAY_REF) |
1420 | { |
1421 | tree step = TREE_OPERAND (ref, 3); |
1422 | tree lbound = TREE_OPERAND (ref, 2); |
1423 | |
1424 | max_loop = outermost_invariant_loop (def: step, loop); |
1425 | if (!max_loop) |
1426 | return false; |
1427 | |
1428 | max_loop = outermost_invariant_loop (def: lbound, loop); |
1429 | if (!max_loop) |
1430 | return false; |
1431 | } |
1432 | |
1433 | max_loop = outermost_invariant_loop (def: *index, loop); |
1434 | if (!max_loop) |
1435 | return false; |
1436 | |
1437 | return true; |
1438 | } |
1439 | |
1440 | /* If OP is SSA NAME, force the statement that defines it to be |
1441 | moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */ |
1442 | |
1443 | static void |
1444 | force_move_till_op (tree op, class loop *orig_loop, class loop *loop) |
1445 | { |
1446 | gimple *stmt; |
1447 | |
1448 | if (!op |
1449 | || is_gimple_min_invariant (op)) |
1450 | return; |
1451 | |
1452 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
1453 | |
1454 | stmt = SSA_NAME_DEF_STMT (op); |
1455 | if (gimple_nop_p (g: stmt)) |
1456 | return; |
1457 | |
1458 | set_level (stmt, orig_loop, level: loop); |
1459 | } |
1460 | |
1461 | /* Forces statement defining invariants in REF (and *INDEX) to be moved out of |
1462 | the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for |
1463 | for_each_index. */ |
1464 | |
1465 | struct fmt_data |
1466 | { |
1467 | class loop *loop; |
1468 | class loop *orig_loop; |
1469 | }; |
1470 | |
1471 | static bool |
1472 | force_move_till (tree ref, tree *index, void *data) |
1473 | { |
1474 | struct fmt_data *fmt_data = (struct fmt_data *) data; |
1475 | |
1476 | if (TREE_CODE (ref) == ARRAY_REF) |
1477 | { |
1478 | tree step = TREE_OPERAND (ref, 3); |
1479 | tree lbound = TREE_OPERAND (ref, 2); |
1480 | |
1481 | force_move_till_op (op: step, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1482 | force_move_till_op (op: lbound, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1483 | } |
1484 | |
1485 | force_move_till_op (op: *index, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1486 | |
1487 | return true; |
1488 | } |
1489 | |
1490 | /* A function to free the mem_ref object OBJ. */ |
1491 | |
1492 | static void |
1493 | memref_free (class im_mem_ref *mem) |
1494 | { |
1495 | mem->accesses_in_loop.release (); |
1496 | } |
1497 | |
1498 | /* Allocates and returns a memory reference description for MEM whose hash |
1499 | value is HASH and id is ID. */ |
1500 | |
1501 | static im_mem_ref * |
1502 | mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) |
1503 | { |
1504 | im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref); |
1505 | if (mem) |
1506 | ref->mem = *mem; |
1507 | else |
1508 | ao_ref_init (&ref->mem, error_mark_node); |
1509 | ref->id = id; |
1510 | ref->ref_canonical = false; |
1511 | ref->ref_decomposed = false; |
1512 | ref->hash = hash; |
1513 | ref->stored = NULL; |
1514 | ref->loaded = NULL; |
1515 | bitmap_initialize (head: &ref->dep_loop, obstack: &lim_bitmap_obstack); |
1516 | ref->accesses_in_loop.create (nelems: 1); |
1517 | |
1518 | return ref; |
1519 | } |
1520 | |
1521 | /* Records memory reference location *LOC in LOOP to the memory reference |
1522 | description REF. The reference occurs in statement STMT. */ |
1523 | |
1524 | static void |
1525 | record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc) |
1526 | { |
1527 | mem_ref_loc aref; |
1528 | aref.stmt = stmt; |
1529 | aref.ref = loc; |
1530 | ref->accesses_in_loop.safe_push (obj: aref); |
1531 | } |
1532 | |
1533 | /* Set the LOOP bit in REF stored bitmap and allocate that if |
1534 | necessary. Return whether a bit was changed. */ |
1535 | |
1536 | static bool |
1537 | set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop) |
1538 | { |
1539 | if (!ref->stored) |
1540 | ref->stored = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
1541 | return bitmap_set_bit (ref->stored, loop->num); |
1542 | } |
1543 | |
1544 | /* Marks reference REF as stored in LOOP. */ |
1545 | |
1546 | static void |
1547 | mark_ref_stored (im_mem_ref *ref, class loop *loop) |
1548 | { |
1549 | while (loop != current_loops->tree_root |
1550 | && set_ref_stored_in_loop (ref, loop)) |
1551 | loop = loop_outer (loop); |
1552 | } |
1553 | |
1554 | /* Set the LOOP bit in REF loaded bitmap and allocate that if |
1555 | necessary. Return whether a bit was changed. */ |
1556 | |
1557 | static bool |
1558 | set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) |
1559 | { |
1560 | if (!ref->loaded) |
1561 | ref->loaded = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
1562 | return bitmap_set_bit (ref->loaded, loop->num); |
1563 | } |
1564 | |
1565 | /* Marks reference REF as loaded in LOOP. */ |
1566 | |
1567 | static void |
1568 | mark_ref_loaded (im_mem_ref *ref, class loop *loop) |
1569 | { |
1570 | while (loop != current_loops->tree_root |
1571 | && set_ref_loaded_in_loop (ref, loop)) |
1572 | loop = loop_outer (loop); |
1573 | } |
1574 | |
1575 | /* Gathers memory references in statement STMT in LOOP, storing the |
1576 | information about them in the memory_accesses structure. Marks |
1577 | the vops accessed through unrecognized statements there as |
1578 | well. */ |
1579 | |
1580 | static void |
1581 | gather_mem_refs_stmt (class loop *loop, gimple *stmt) |
1582 | { |
1583 | tree *mem = NULL; |
1584 | hashval_t hash; |
1585 | im_mem_ref **slot; |
1586 | im_mem_ref *ref; |
1587 | bool is_stored; |
1588 | unsigned id; |
1589 | |
1590 | if (!gimple_vuse (g: stmt)) |
1591 | return; |
1592 | |
1593 | mem = simple_mem_ref_in_stmt (stmt, is_store: &is_stored); |
1594 | if (!mem && is_gimple_assign (gs: stmt)) |
1595 | { |
1596 | /* For aggregate copies record distinct references but use them |
1597 | only for disambiguation purposes. */ |
1598 | id = memory_accesses.refs_list.length (); |
1599 | ref = mem_ref_alloc (NULL, hash: 0, id); |
1600 | memory_accesses.refs_list.safe_push (obj: ref); |
1601 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1602 | { |
1603 | fprintf (stream: dump_file, format: "Unhandled memory reference %u: " , id); |
1604 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1605 | } |
1606 | record_mem_ref_loc (ref, stmt, loc: mem); |
1607 | is_stored = gimple_vdef (g: stmt); |
1608 | } |
1609 | else if (!mem) |
1610 | { |
1611 | /* We use the shared mem_ref for all unanalyzable refs. */ |
1612 | id = UNANALYZABLE_MEM_ID; |
1613 | ref = memory_accesses.refs_list[id]; |
1614 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1615 | { |
1616 | fprintf (stream: dump_file, format: "Unanalyzed memory reference %u: " , id); |
1617 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1618 | } |
1619 | is_stored = gimple_vdef (g: stmt); |
1620 | } |
1621 | else |
1622 | { |
1623 | /* We are looking for equal refs that might differ in structure |
1624 | such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but |
1625 | make sure we can canonicalize the ref in the hashtable if |
1626 | non-operand_equal_p refs are found. For the lookup we mark |
1627 | the case we want strict equality with aor.max_size == -1. */ |
1628 | ao_ref aor; |
1629 | ao_ref_init (&aor, *mem); |
1630 | ao_ref_base (&aor); |
1631 | ao_ref_alias_set (&aor); |
1632 | HOST_WIDE_INT offset, size, max_size; |
1633 | poly_int64 saved_maxsize = aor.max_size, mem_off; |
1634 | tree mem_base; |
1635 | bool ref_decomposed; |
1636 | if (aor.max_size_known_p () |
1637 | && aor.offset.is_constant (const_value: &offset) |
1638 | && aor.size.is_constant (const_value: &size) |
1639 | && aor.max_size.is_constant (const_value: &max_size) |
1640 | && size == max_size |
1641 | && (size % BITS_PER_UNIT) == 0 |
1642 | /* We're canonicalizing to a MEM where TYPE_SIZE specifies the |
1643 | size. Make sure this is consistent with the extraction. */ |
1644 | && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem))) |
1645 | && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))), |
1646 | aor.size) |
1647 | && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) |
1648 | { |
1649 | ref_decomposed = true; |
1650 | tree base = ao_ref_base (&aor); |
1651 | poly_int64 moffset; |
1652 | HOST_WIDE_INT mcoffset; |
1653 | if (TREE_CODE (base) == MEM_REF |
1654 | && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (r: &moffset) |
1655 | && moffset.is_constant (const_value: &mcoffset)) |
1656 | { |
1657 | hash = iterative_hash_expr (TREE_OPERAND (base, 0), seed: 0); |
1658 | hash = iterative_hash_host_wide_int (val: mcoffset, val2: hash); |
1659 | } |
1660 | else |
1661 | { |
1662 | hash = iterative_hash_expr (tree: base, seed: 0); |
1663 | hash = iterative_hash_host_wide_int (val: offset, val2: hash); |
1664 | } |
1665 | hash = iterative_hash_host_wide_int (val: size, val2: hash); |
1666 | } |
1667 | else |
1668 | { |
1669 | ref_decomposed = false; |
1670 | hash = iterative_hash_expr (tree: aor.ref, seed: 0); |
1671 | aor.max_size = -1; |
1672 | } |
1673 | slot = memory_accesses.refs->find_slot_with_hash (comparable: &aor, hash, insert: INSERT); |
1674 | aor.max_size = saved_maxsize; |
1675 | if (*slot) |
1676 | { |
1677 | if (!(*slot)->ref_canonical |
1678 | && !operand_equal_p (*mem, (*slot)->mem.ref, flags: 0)) |
1679 | { |
1680 | /* If we didn't yet canonicalize the hashtable ref (which |
1681 | we'll end up using for code insertion) and hit a second |
1682 | equal ref that is not structurally equivalent create |
1683 | a canonical ref which is a bare MEM_REF. */ |
1684 | if (TREE_CODE (*mem) == MEM_REF |
1685 | || TREE_CODE (*mem) == TARGET_MEM_REF) |
1686 | { |
1687 | (*slot)->mem.ref = *mem; |
1688 | (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor); |
1689 | } |
1690 | else |
1691 | { |
1692 | tree ref_alias_type = reference_alias_ptr_type (*mem); |
1693 | unsigned int ref_align = get_object_alignment (*mem); |
1694 | tree ref_type = TREE_TYPE (*mem); |
1695 | tree tmp = build1 (ADDR_EXPR, ptr_type_node, |
1696 | unshare_expr (mem_base)); |
1697 | if (TYPE_ALIGN (ref_type) != ref_align) |
1698 | ref_type = build_aligned_type (ref_type, ref_align); |
1699 | tree new_ref |
1700 | = fold_build2 (MEM_REF, ref_type, tmp, |
1701 | build_int_cst (ref_alias_type, mem_off)); |
1702 | if ((*slot)->mem.volatile_p) |
1703 | TREE_THIS_VOLATILE (new_ref) = 1; |
1704 | (*slot)->mem.ref = new_ref; |
1705 | /* Make sure the recorded base and offset are consistent |
1706 | with the newly built ref. */ |
1707 | if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR) |
1708 | ; |
1709 | else |
1710 | { |
1711 | (*slot)->mem.base = new_ref; |
1712 | (*slot)->mem.offset = 0; |
1713 | } |
1714 | gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF |
1715 | && is_gimple_mem_ref_addr |
1716 | (TREE_OPERAND ((*slot)->mem.ref, |
1717 | 0))); |
1718 | (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set; |
1719 | } |
1720 | (*slot)->ref_canonical = true; |
1721 | } |
1722 | ref = *slot; |
1723 | id = ref->id; |
1724 | } |
1725 | else |
1726 | { |
1727 | id = memory_accesses.refs_list.length (); |
1728 | ref = mem_ref_alloc (mem: &aor, hash, id); |
1729 | ref->ref_decomposed = ref_decomposed; |
1730 | memory_accesses.refs_list.safe_push (obj: ref); |
1731 | *slot = ref; |
1732 | |
1733 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1734 | { |
1735 | fprintf (stream: dump_file, format: "Memory reference %u: " , id); |
1736 | print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); |
1737 | fprintf (stream: dump_file, format: "\n" ); |
1738 | } |
1739 | } |
1740 | |
1741 | record_mem_ref_loc (ref, stmt, loc: mem); |
1742 | } |
1743 | if (is_stored) |
1744 | { |
1745 | bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); |
1746 | mark_ref_stored (ref, loop); |
1747 | } |
1748 | /* A not simple memory op is also a read when it is a write. */ |
1749 | if (!is_stored || id == UNANALYZABLE_MEM_ID |
1750 | || ref->mem.ref == error_mark_node) |
1751 | { |
1752 | bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); |
1753 | mark_ref_loaded (ref, loop); |
1754 | } |
1755 | init_lim_data (stmt)->ref = ref->id; |
1756 | return; |
1757 | } |
1758 | |
1759 | static unsigned *bb_loop_postorder; |
1760 | |
1761 | /* qsort sort function to sort blocks after their loop fathers postorder. */ |
1762 | |
1763 | static int |
1764 | sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_, |
1765 | void *bb_loop_postorder_) |
1766 | { |
1767 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1768 | basic_block bb1 = *(const basic_block *)bb1_; |
1769 | basic_block bb2 = *(const basic_block *)bb2_; |
1770 | class loop *loop1 = bb1->loop_father; |
1771 | class loop *loop2 = bb2->loop_father; |
1772 | if (loop1->num == loop2->num) |
1773 | return bb1->index - bb2->index; |
1774 | return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; |
1775 | } |
1776 | |
1777 | /* qsort sort function to sort ref locs after their loop fathers postorder. */ |
1778 | |
1779 | static int |
1780 | sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_, |
1781 | void *bb_loop_postorder_) |
1782 | { |
1783 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1784 | const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_; |
1785 | const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_; |
1786 | class loop *loop1 = gimple_bb (g: loc1->stmt)->loop_father; |
1787 | class loop *loop2 = gimple_bb (g: loc2->stmt)->loop_father; |
1788 | if (loop1->num == loop2->num) |
1789 | return 0; |
1790 | return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; |
1791 | } |
1792 | |
1793 | /* Gathers memory references in loops. */ |
1794 | |
1795 | static void |
1796 | analyze_memory_references (bool store_motion) |
1797 | { |
1798 | gimple_stmt_iterator bsi; |
1799 | basic_block bb, *bbs; |
1800 | class loop *outer; |
1801 | unsigned i, n; |
1802 | |
1803 | /* Collect all basic-blocks in loops and sort them after their |
1804 | loops postorder. */ |
1805 | i = 0; |
1806 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
1807 | FOR_EACH_BB_FN (bb, cfun) |
1808 | if (bb->loop_father != current_loops->tree_root) |
1809 | bbs[i++] = bb; |
1810 | n = i; |
1811 | gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp, |
1812 | bb_loop_postorder); |
1813 | |
1814 | /* Visit blocks in loop postorder and assign mem-ref IDs in that order. |
1815 | That results in better locality for all the bitmaps. It also |
1816 | automatically sorts the location list of gathered memory references |
1817 | after their loop postorder number allowing to binary-search it. */ |
1818 | for (i = 0; i < n; ++i) |
1819 | { |
1820 | basic_block bb = bbs[i]; |
1821 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1822 | gather_mem_refs_stmt (loop: bb->loop_father, stmt: gsi_stmt (i: bsi)); |
1823 | } |
1824 | |
1825 | /* Verify the list of gathered memory references is sorted after their |
1826 | loop postorder number. */ |
1827 | if (flag_checking) |
1828 | { |
1829 | im_mem_ref *ref; |
1830 | FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) |
1831 | for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j) |
1832 | gcc_assert (sort_locs_in_loop_postorder_cmp |
1833 | (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j], |
1834 | bb_loop_postorder) <= 0); |
1835 | } |
1836 | |
1837 | free (ptr: bbs); |
1838 | |
1839 | if (!store_motion) |
1840 | return; |
1841 | |
1842 | /* Propagate the information about accessed memory references up |
1843 | the loop hierarchy. */ |
1844 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
1845 | { |
1846 | /* Finalize the overall touched references (including subloops). */ |
1847 | bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], |
1848 | &memory_accesses.refs_stored_in_loop[loop->num]); |
1849 | |
1850 | /* Propagate the information about accessed memory references up |
1851 | the loop hierarchy. */ |
1852 | outer = loop_outer (loop); |
1853 | if (outer == current_loops->tree_root) |
1854 | continue; |
1855 | |
1856 | bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num], |
1857 | &memory_accesses.all_refs_stored_in_loop[loop->num]); |
1858 | } |
1859 | } |
1860 | |
1861 | /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in |
1862 | tree_to_aff_combination_expand. */ |
1863 | |
1864 | static bool |
1865 | mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, |
1866 | hash_map<tree, name_expansion *> **ttae_cache, |
1867 | bool tbaa_p) |
1868 | { |
1869 | gcc_checking_assert (mem1->mem.ref != error_mark_node |
1870 | && mem2->mem.ref != error_mark_node); |
1871 | |
1872 | /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same |
1873 | object and their offset differ in such a way that the locations cannot |
1874 | overlap, then they cannot alias. */ |
1875 | poly_widest_int size1, size2; |
1876 | aff_tree off1, off2; |
1877 | |
1878 | /* Perform basic offset and type-based disambiguation. */ |
1879 | if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p)) |
1880 | return false; |
1881 | |
1882 | /* The expansion of addresses may be a bit expensive, thus we only do |
1883 | the check at -O2 and higher optimization levels. */ |
1884 | if (optimize < 2) |
1885 | return true; |
1886 | |
1887 | get_inner_reference_aff (mem1->mem.ref, &off1, &size1); |
1888 | get_inner_reference_aff (mem2->mem.ref, &off2, &size2); |
1889 | aff_combination_expand (&off1, ttae_cache); |
1890 | aff_combination_expand (&off2, ttae_cache); |
1891 | aff_combination_scale (&off1, -1); |
1892 | aff_combination_add (&off2, &off1); |
1893 | |
1894 | if (aff_comb_cannot_overlap_p (&off2, size1, size2)) |
1895 | return false; |
1896 | |
1897 | return true; |
1898 | } |
1899 | |
1900 | /* Compare function for bsearch searching for reference locations |
1901 | in a loop. */ |
1902 | |
1903 | static int |
1904 | find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_, |
1905 | void *bb_loop_postorder_) |
1906 | { |
1907 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1908 | class loop *loop = (class loop *)const_cast<void *>(loop_); |
1909 | mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_); |
1910 | class loop *loc_loop = gimple_bb (g: loc->stmt)->loop_father; |
1911 | if (loop->num == loc_loop->num |
1912 | || flow_loop_nested_p (loop, loc_loop)) |
1913 | return 0; |
1914 | return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num] |
1915 | ? -1 : 1); |
1916 | } |
1917 | |
1918 | /* Iterates over all locations of REF in LOOP and its subloops calling |
1919 | fn.operator() with the location as argument. When that operator |
1920 | returns true the iteration is stopped and true is returned. |
1921 | Otherwise false is returned. */ |
1922 | |
1923 | template <typename FN> |
1924 | static bool |
1925 | for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn) |
1926 | { |
1927 | unsigned i; |
1928 | mem_ref_loc *loc; |
1929 | |
1930 | /* Search for the cluster of locs in the accesses_in_loop vector |
1931 | which is sorted after postorder index of the loop father. */ |
1932 | loc = ref->accesses_in_loop.bsearch (key: loop, cmp: find_ref_loc_in_loop_cmp, |
1933 | data: bb_loop_postorder); |
1934 | if (!loc) |
1935 | return false; |
1936 | |
1937 | /* We have found one location inside loop or its sub-loops. Iterate |
1938 | both forward and backward to cover the whole cluster. */ |
1939 | i = loc - ref->accesses_in_loop.address (); |
1940 | while (i > 0) |
1941 | { |
1942 | --i; |
1943 | mem_ref_loc *l = &ref->accesses_in_loop[i]; |
1944 | if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt))) |
1945 | break; |
1946 | if (fn (l)) |
1947 | return true; |
1948 | } |
1949 | for (i = loc - ref->accesses_in_loop.address (); |
1950 | i < ref->accesses_in_loop.length (); ++i) |
1951 | { |
1952 | mem_ref_loc *l = &ref->accesses_in_loop[i]; |
1953 | if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt))) |
1954 | break; |
1955 | if (fn (l)) |
1956 | return true; |
1957 | } |
1958 | |
1959 | return false; |
1960 | } |
1961 | |
1962 | /* Rewrites location LOC by TMP_VAR. */ |
1963 | |
1964 | class rewrite_mem_ref_loc |
1965 | { |
1966 | public: |
1967 | rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {} |
1968 | bool operator () (mem_ref_loc *loc); |
1969 | tree tmp_var; |
1970 | }; |
1971 | |
1972 | bool |
1973 | rewrite_mem_ref_loc::operator () (mem_ref_loc *loc) |
1974 | { |
1975 | *loc->ref = tmp_var; |
1976 | update_stmt (s: loc->stmt); |
1977 | return false; |
1978 | } |
1979 | |
1980 | /* Rewrites all references to REF in LOOP by variable TMP_VAR. */ |
1981 | |
1982 | static void |
1983 | rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var) |
1984 | { |
1985 | for_all_locs_in_loop (loop, ref, fn: rewrite_mem_ref_loc (tmp_var)); |
1986 | } |
1987 | |
1988 | /* Stores the first reference location in LOCP. */ |
1989 | |
1990 | class first_mem_ref_loc_1 |
1991 | { |
1992 | public: |
1993 | first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {} |
1994 | bool operator () (mem_ref_loc *loc); |
1995 | mem_ref_loc **locp; |
1996 | }; |
1997 | |
1998 | bool |
1999 | first_mem_ref_loc_1::operator () (mem_ref_loc *loc) |
2000 | { |
2001 | *locp = loc; |
2002 | return true; |
2003 | } |
2004 | |
2005 | /* Returns the first reference location to REF in LOOP. */ |
2006 | |
2007 | static mem_ref_loc * |
2008 | first_mem_ref_loc (class loop *loop, im_mem_ref *ref) |
2009 | { |
2010 | mem_ref_loc *locp = NULL; |
2011 | for_all_locs_in_loop (loop, ref, fn: first_mem_ref_loc_1 (&locp)); |
2012 | return locp; |
2013 | } |
2014 | |
2015 | /* Helper function for execute_sm. Emit code to store TMP_VAR into |
2016 | MEM along edge EX. |
2017 | |
2018 | The store is only done if MEM has changed. We do this so no |
2019 | changes to MEM occur on code paths that did not originally store |
2020 | into it. |
2021 | |
2022 | The common case for execute_sm will transform: |
2023 | |
2024 | for (...) { |
2025 | if (foo) |
2026 | stuff; |
2027 | else |
2028 | MEM = TMP_VAR; |
2029 | } |
2030 | |
2031 | into: |
2032 | |
2033 | lsm = MEM; |
2034 | for (...) { |
2035 | if (foo) |
2036 | stuff; |
2037 | else |
2038 | lsm = TMP_VAR; |
2039 | } |
2040 | MEM = lsm; |
2041 | |
2042 | This function will generate: |
2043 | |
2044 | lsm = MEM; |
2045 | |
2046 | lsm_flag = false; |
2047 | ... |
2048 | for (...) { |
2049 | if (foo) |
2050 | stuff; |
2051 | else { |
2052 | lsm = TMP_VAR; |
2053 | lsm_flag = true; |
2054 | } |
2055 | } |
2056 | if (lsm_flag) <-- |
2057 | MEM = lsm; <-- (X) |
2058 | |
2059 | In case MEM and TMP_VAR are NULL the function will return the then |
2060 | block so the caller can insert (X) and other related stmts. |
2061 | */ |
2062 | |
2063 | static basic_block |
2064 | execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, |
2065 | edge , hash_set <basic_block> *flag_bbs, |
2066 | edge &append_cond_position, edge &last_cond_fallthru) |
2067 | { |
2068 | basic_block new_bb, then_bb, old_dest; |
2069 | bool loop_has_only_one_exit; |
2070 | edge then_old_edge; |
2071 | gimple_stmt_iterator gsi; |
2072 | gimple *stmt; |
2073 | bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP; |
2074 | |
2075 | profile_count count_sum = profile_count::zero (); |
2076 | int nbbs = 0, ncount = 0; |
2077 | profile_probability flag_probability = profile_probability::uninitialized (); |
2078 | |
2079 | /* Flag is set in FLAG_BBS. Determine probability that flag will be true |
2080 | at loop exit. |
2081 | |
2082 | This code may look fancy, but it cannot update profile very realistically |
2083 | because we do not know the probability that flag will be true at given |
2084 | loop exit. |
2085 | |
2086 | We look for two interesting extremes |
2087 | - when exit is dominated by block setting the flag, we know it will |
2088 | always be true. This is a common case. |
2089 | - when all blocks setting the flag have very low frequency we know |
2090 | it will likely be false. |
2091 | In all other cases we default to 2/3 for flag being true. */ |
2092 | |
2093 | for (hash_set<basic_block>::iterator it = flag_bbs->begin (); |
2094 | it != flag_bbs->end (); ++it) |
2095 | { |
2096 | if ((*it)->count.initialized_p ()) |
2097 | count_sum += (*it)->count, ncount ++; |
2098 | if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) |
2099 | flag_probability = profile_probability::always (); |
2100 | nbbs++; |
2101 | } |
2102 | |
2103 | profile_probability cap |
2104 | = profile_probability::guessed_always ().apply_scale (num: 2, den: 3); |
2105 | |
2106 | if (flag_probability.initialized_p ()) |
2107 | ; |
2108 | else if (ncount == nbbs |
2109 | && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) |
2110 | { |
2111 | flag_probability = count_sum.probability_in (overall: preheader->count ()); |
2112 | if (flag_probability > cap) |
2113 | flag_probability = cap; |
2114 | } |
2115 | |
2116 | if (!flag_probability.initialized_p ()) |
2117 | flag_probability = cap; |
2118 | |
2119 | /* ?? Insert store after previous store if applicable. See note |
2120 | below. */ |
2121 | if (append_cond_position) |
2122 | ex = append_cond_position; |
2123 | |
2124 | loop_has_only_one_exit = single_pred_p (bb: ex->dest); |
2125 | |
2126 | if (loop_has_only_one_exit) |
2127 | ex = split_block_after_labels (ex->dest); |
2128 | else |
2129 | { |
2130 | for (gphi_iterator gpi = gsi_start_phis (ex->dest); |
2131 | !gsi_end_p (i: gpi); gsi_next (i: &gpi)) |
2132 | { |
2133 | gphi *phi = gpi.phi (); |
2134 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
2135 | continue; |
2136 | |
2137 | /* When the destination has a non-virtual PHI node with multiple |
2138 | predecessors make sure we preserve the PHI structure by |
2139 | forcing a forwarder block so that hoisting of that PHI will |
2140 | still work. */ |
2141 | split_edge (ex); |
2142 | break; |
2143 | } |
2144 | } |
2145 | |
2146 | old_dest = ex->dest; |
2147 | new_bb = split_edge (ex); |
2148 | if (append_cond_position) |
2149 | new_bb->count += last_cond_fallthru->count (); |
2150 | then_bb = create_empty_bb (new_bb); |
2151 | then_bb->count = new_bb->count.apply_probability (prob: flag_probability); |
2152 | if (irr) |
2153 | then_bb->flags = BB_IRREDUCIBLE_LOOP; |
2154 | add_bb_to_loop (then_bb, new_bb->loop_father); |
2155 | |
2156 | gsi = gsi_start_bb (bb: new_bb); |
2157 | stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node, |
2158 | NULL_TREE, NULL_TREE); |
2159 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2160 | |
2161 | /* Insert actual store. */ |
2162 | if (mem) |
2163 | { |
2164 | gsi = gsi_start_bb (bb: then_bb); |
2165 | stmt = gimple_build_assign (unshare_expr (mem), tmp_var); |
2166 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2167 | } |
2168 | |
2169 | edge e1 = single_succ_edge (bb: new_bb); |
2170 | edge e2 = make_edge (new_bb, then_bb, |
2171 | EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); |
2172 | e2->probability = flag_probability; |
2173 | |
2174 | e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0); |
2175 | e1->flags &= ~EDGE_FALLTHRU; |
2176 | |
2177 | e1->probability = flag_probability.invert (); |
2178 | |
2179 | then_old_edge = make_single_succ_edge (then_bb, old_dest, |
2180 | EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); |
2181 | |
2182 | set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); |
2183 | |
2184 | if (append_cond_position) |
2185 | { |
2186 | basic_block prevbb = last_cond_fallthru->src; |
2187 | redirect_edge_succ (last_cond_fallthru, new_bb); |
2188 | set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb); |
2189 | set_immediate_dominator (CDI_DOMINATORS, old_dest, |
2190 | recompute_dominator (CDI_DOMINATORS, old_dest)); |
2191 | } |
2192 | |
2193 | /* ?? Because stores may alias, they must happen in the exact |
2194 | sequence they originally happened. Save the position right after |
2195 | the (_lsm) store we just created so we can continue appending after |
2196 | it and maintain the original order. */ |
2197 | append_cond_position = then_old_edge; |
2198 | last_cond_fallthru = find_edge (new_bb, old_dest); |
2199 | |
2200 | if (!loop_has_only_one_exit) |
2201 | for (gphi_iterator gpi = gsi_start_phis (old_dest); |
2202 | !gsi_end_p (i: gpi); gsi_next (i: &gpi)) |
2203 | { |
2204 | gphi *phi = gpi.phi (); |
2205 | unsigned i; |
2206 | |
2207 | for (i = 0; i < gimple_phi_num_args (gs: phi); i++) |
2208 | if (gimple_phi_arg_edge (phi, i)->src == new_bb) |
2209 | { |
2210 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
2211 | add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION); |
2212 | update_stmt (s: phi); |
2213 | } |
2214 | } |
2215 | |
2216 | return then_bb; |
2217 | } |
2218 | |
2219 | /* When REF is set on the location, set flag indicating the store. */ |
2220 | |
2221 | class sm_set_flag_if_changed |
2222 | { |
2223 | public: |
2224 | sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_) |
2225 | : flag (flag_), bbs (bbs_) {} |
2226 | bool operator () (mem_ref_loc *loc); |
2227 | tree flag; |
2228 | hash_set <basic_block> *bbs; |
2229 | }; |
2230 | |
2231 | bool |
2232 | sm_set_flag_if_changed::operator () (mem_ref_loc *loc) |
2233 | { |
2234 | /* Only set the flag for writes. */ |
2235 | if (is_gimple_assign (gs: loc->stmt) |
2236 | && gimple_assign_lhs_ptr (gs: loc->stmt) == loc->ref) |
2237 | { |
2238 | gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt); |
2239 | gimple *stmt = gimple_build_assign (flag, boolean_true_node); |
2240 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2241 | bbs->add (k: gimple_bb (g: stmt)); |
2242 | } |
2243 | return false; |
2244 | } |
2245 | |
2246 | /* Helper function for execute_sm. On every location where REF is |
2247 | set, set an appropriate flag indicating the store. */ |
2248 | |
2249 | static tree |
2250 | execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref, |
2251 | hash_set <basic_block> *bbs) |
2252 | { |
2253 | tree flag; |
2254 | char *str = get_lsm_tmp_name (ref: ref->mem.ref, n: ~0, suffix: "_flag" ); |
2255 | flag = create_tmp_reg (boolean_type_node, str); |
2256 | for_all_locs_in_loop (loop, ref, fn: sm_set_flag_if_changed (flag, bbs)); |
2257 | return flag; |
2258 | } |
2259 | |
2260 | struct sm_aux |
2261 | { |
2262 | tree tmp_var; |
2263 | tree store_flag; |
2264 | hash_set <basic_block> flag_bbs; |
2265 | }; |
2266 | |
2267 | /* Executes store motion of memory reference REF from LOOP. |
2268 | Exits from the LOOP are stored in EXITS. The initialization of the |
2269 | temporary variable is put to the preheader of the loop, and assignments |
2270 | to the reference from the temporary variable are emitted to exits. */ |
2271 | |
2272 | static void |
2273 | execute_sm (class loop *loop, im_mem_ref *ref, |
2274 | hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt, |
2275 | bool use_other_flag_var) |
2276 | { |
2277 | gassign *load; |
2278 | struct fmt_data fmt_data; |
2279 | struct lim_aux_data *lim_data; |
2280 | bool multi_threaded_model_p = false; |
2281 | gimple_stmt_iterator gsi; |
2282 | sm_aux *aux = new sm_aux; |
2283 | |
2284 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2285 | { |
2286 | fprintf (stream: dump_file, format: "Executing store motion of " ); |
2287 | print_generic_expr (dump_file, ref->mem.ref); |
2288 | fprintf (stream: dump_file, format: " from loop %d\n" , loop->num); |
2289 | } |
2290 | |
2291 | aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref), |
2292 | get_lsm_tmp_name (ref: ref->mem.ref, n: ~0)); |
2293 | |
2294 | fmt_data.loop = loop; |
2295 | fmt_data.orig_loop = loop; |
2296 | for_each_index (&ref->mem.ref, force_move_till, &fmt_data); |
2297 | |
2298 | bool always_stored = ref_always_accessed_p (loop, ref, true); |
2299 | if (maybe_mt |
2300 | && (bb_in_transaction (bb: loop_preheader_edge (loop)->src) |
2301 | || (! flag_store_data_races && ! always_stored))) |
2302 | multi_threaded_model_p = true; |
2303 | |
2304 | if (multi_threaded_model_p && !use_other_flag_var) |
2305 | aux->store_flag |
2306 | = execute_sm_if_changed_flag_set (loop, ref, bbs: &aux->flag_bbs); |
2307 | else |
2308 | aux->store_flag = NULL_TREE; |
2309 | |
2310 | /* Remember variable setup. */ |
2311 | aux_map.put (k: ref, v: aux); |
2312 | |
2313 | rewrite_mem_refs (loop, ref, tmp_var: aux->tmp_var); |
2314 | |
2315 | /* Emit the load code on a random exit edge or into the latch if |
2316 | the loop does not exit, so that we are sure it will be processed |
2317 | by move_computations after all dependencies. */ |
2318 | gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); |
2319 | |
2320 | /* Avoid doing a load if there was no load of the ref in the loop. |
2321 | Esp. when the ref is not always stored we cannot optimize it |
2322 | away later. But when it is not always stored we must use a conditional |
2323 | store then. */ |
2324 | if ((!always_stored && !multi_threaded_model_p) |
2325 | || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) |
2326 | load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref)); |
2327 | else |
2328 | { |
2329 | /* If not emitting a load mark the uninitialized state on the |
2330 | loop entry as not to be warned for. */ |
2331 | tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var)); |
2332 | suppress_warning (uninit, OPT_Wuninitialized); |
2333 | load = gimple_build_assign (aux->tmp_var, uninit); |
2334 | } |
2335 | lim_data = init_lim_data (stmt: load); |
2336 | lim_data->max_loop = loop; |
2337 | lim_data->tgt_loop = loop; |
2338 | gsi_insert_before (&gsi, load, GSI_SAME_STMT); |
2339 | |
2340 | if (aux->store_flag) |
2341 | { |
2342 | load = gimple_build_assign (aux->store_flag, boolean_false_node); |
2343 | lim_data = init_lim_data (stmt: load); |
2344 | lim_data->max_loop = loop; |
2345 | lim_data->tgt_loop = loop; |
2346 | gsi_insert_before (&gsi, load, GSI_SAME_STMT); |
2347 | } |
2348 | } |
2349 | |
2350 | /* sm_ord is used for ordinary stores we can retain order with respect |
2351 | to other stores |
2352 | sm_unord is used for conditional executed stores which need to be |
2353 | able to execute in arbitrary order with respect to other stores |
2354 | sm_other is used for stores we do not try to apply store motion to. */ |
2355 | enum sm_kind { sm_ord, sm_unord, sm_other }; |
2356 | struct seq_entry |
2357 | { |
2358 | seq_entry () = default; |
2359 | seq_entry (unsigned f, sm_kind k, tree fr = NULL) |
2360 | : first (f), second (k), from (fr) {} |
2361 | unsigned first; |
2362 | sm_kind second; |
2363 | tree from; |
2364 | }; |
2365 | |
2366 | static void |
2367 | execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq, |
2368 | hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind, |
2369 | edge &append_cond_position, edge &last_cond_fallthru) |
2370 | { |
2371 | /* Sink the stores to exit from the loop. */ |
2372 | for (unsigned i = seq.length (); i > 0; --i) |
2373 | { |
2374 | im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first]; |
2375 | if (seq[i-1].second == sm_other) |
2376 | { |
2377 | gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE); |
2378 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2379 | { |
2380 | fprintf (stream: dump_file, format: "Re-issueing dependent store of " ); |
2381 | print_generic_expr (dump_file, ref->mem.ref); |
2382 | fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n" , |
2383 | loop->num, ex->src->index, ex->dest->index); |
2384 | } |
2385 | gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref), |
2386 | seq[i-1].from); |
2387 | gsi_insert_on_edge (ex, store); |
2388 | } |
2389 | else |
2390 | { |
2391 | sm_aux *aux = *aux_map.get (k: ref); |
2392 | if (!aux->store_flag || kind == sm_ord) |
2393 | { |
2394 | gassign *store; |
2395 | store = gimple_build_assign (unshare_expr (ref->mem.ref), |
2396 | aux->tmp_var); |
2397 | gsi_insert_on_edge (ex, store); |
2398 | } |
2399 | else |
2400 | execute_sm_if_changed (ex, mem: ref->mem.ref, tmp_var: aux->tmp_var, |
2401 | flag: aux->store_flag, |
2402 | preheader: loop_preheader_edge (loop), flag_bbs: &aux->flag_bbs, |
2403 | append_cond_position, last_cond_fallthru); |
2404 | } |
2405 | } |
2406 | } |
2407 | |
2408 | /* Push the SM candidate at index PTR in the sequence SEQ down until |
2409 | we hit the next SM candidate. Return true if that went OK and |
2410 | false if we could not disambiguate agains another unrelated ref. |
2411 | Update *AT to the index where the candidate now resides. */ |
2412 | |
2413 | static bool |
2414 | sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at) |
2415 | { |
2416 | *at = ptr; |
2417 | for (; ptr > 0; --ptr) |
2418 | { |
2419 | seq_entry &new_cand = seq[ptr]; |
2420 | seq_entry &against = seq[ptr-1]; |
2421 | if (against.second == sm_ord |
2422 | || (against.second == sm_other && against.from != NULL_TREE)) |
2423 | /* Found the tail of the sequence. */ |
2424 | break; |
2425 | /* We may not ignore self-dependences here. */ |
2426 | if (new_cand.first == against.first |
2427 | || !refs_independent_p (memory_accesses.refs_list[new_cand.first], |
2428 | memory_accesses.refs_list[against.first], |
2429 | false)) |
2430 | /* ??? Prune new_cand from the list of refs to apply SM to. */ |
2431 | return false; |
2432 | std::swap (a&: new_cand, b&: against); |
2433 | *at = ptr - 1; |
2434 | } |
2435 | return true; |
2436 | } |
2437 | |
2438 | /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ |
2439 | walking backwards from VDEF (or the end of BB if VDEF is NULL). */ |
2440 | |
2441 | static int |
2442 | sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, |
2443 | vec<seq_entry> &seq, bitmap refs_not_in_seq, |
2444 | bitmap refs_not_supported, bool forked, |
2445 | bitmap fully_visited) |
2446 | { |
2447 | if (!vdef) |
2448 | for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi); |
2449 | gsi_prev (i: &gsi)) |
2450 | { |
2451 | vdef = gimple_vdef (g: gsi_stmt (i: gsi)); |
2452 | if (vdef) |
2453 | break; |
2454 | } |
2455 | if (!vdef) |
2456 | { |
2457 | gphi *vphi = get_virtual_phi (bb); |
2458 | if (vphi) |
2459 | vdef = gimple_phi_result (gs: vphi); |
2460 | } |
2461 | if (!vdef) |
2462 | { |
2463 | if (single_pred_p (bb)) |
2464 | /* This handles the perfect nest case. */ |
2465 | return sm_seq_valid_bb (loop, bb: single_pred (bb), vdef, |
2466 | seq, refs_not_in_seq, refs_not_supported, |
2467 | forked, fully_visited); |
2468 | return 0; |
2469 | } |
2470 | do |
2471 | { |
2472 | gimple *def = SSA_NAME_DEF_STMT (vdef); |
2473 | if (gimple_bb (g: def) != bb) |
2474 | { |
2475 | /* If we forked by processing a PHI do not allow our walk to |
2476 | merge again until we handle that robustly. */ |
2477 | if (forked) |
2478 | { |
2479 | /* Mark refs_not_in_seq as unsupported. */ |
2480 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2481 | return 1; |
2482 | } |
2483 | /* Otherwise it doesn't really matter if we end up in different |
2484 | BBs. */ |
2485 | bb = gimple_bb (g: def); |
2486 | } |
2487 | if (gphi *phi = dyn_cast <gphi *> (p: def)) |
2488 | { |
2489 | /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb) |
2490 | this is still linear. |
2491 | Eventually we want to cache intermediate results per BB |
2492 | (but we can't easily cache for different exits?). */ |
2493 | /* Stop at PHIs with possible backedges. */ |
2494 | if (bb == bb->loop_father->header |
2495 | || bb->flags & BB_IRREDUCIBLE_LOOP) |
2496 | { |
2497 | /* Mark refs_not_in_seq as unsupported. */ |
2498 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2499 | return 1; |
2500 | } |
2501 | if (gimple_phi_num_args (gs: phi) == 1) |
2502 | return sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src, |
2503 | vdef: gimple_phi_arg_def (gs: phi, index: 0), seq, |
2504 | refs_not_in_seq, refs_not_supported, |
2505 | forked: false, fully_visited); |
2506 | if (bitmap_bit_p (fully_visited, |
2507 | SSA_NAME_VERSION (gimple_phi_result (phi)))) |
2508 | return 1; |
2509 | auto_vec<seq_entry> first_edge_seq; |
2510 | auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack); |
2511 | int eret; |
2512 | bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq); |
2513 | eret = sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src, |
2514 | vdef: gimple_phi_arg_def (gs: phi, index: 0), |
2515 | seq&: first_edge_seq, |
2516 | refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported, |
2517 | forked: true, fully_visited); |
2518 | if (eret != 1) |
2519 | return -1; |
2520 | /* Simplify our lives by pruning the sequence of !sm_ord. */ |
2521 | while (!first_edge_seq.is_empty () |
2522 | && first_edge_seq.last ().second != sm_ord) |
2523 | first_edge_seq.pop (); |
2524 | for (unsigned int i = 1; i < gimple_phi_num_args (gs: phi); ++i) |
2525 | { |
2526 | tree vuse = gimple_phi_arg_def (gs: phi, index: i); |
2527 | edge e = gimple_phi_arg_edge (phi, i); |
2528 | auto_vec<seq_entry> edge_seq; |
2529 | bitmap_and_compl (tem_refs_not_in_seq, |
2530 | refs_not_in_seq, refs_not_supported); |
2531 | /* If we've marked all refs we search for as unsupported |
2532 | we can stop processing and use the sequence as before |
2533 | the PHI. */ |
2534 | if (bitmap_empty_p (map: tem_refs_not_in_seq)) |
2535 | return 1; |
2536 | eret = sm_seq_valid_bb (loop, bb: e->src, vdef: vuse, seq&: edge_seq, |
2537 | refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported, |
2538 | forked: true, fully_visited); |
2539 | if (eret != 1) |
2540 | return -1; |
2541 | /* Simplify our lives by pruning the sequence of !sm_ord. */ |
2542 | while (!edge_seq.is_empty () |
2543 | && edge_seq.last ().second != sm_ord) |
2544 | edge_seq.pop (); |
2545 | unsigned min_len = MIN(first_edge_seq.length (), |
2546 | edge_seq.length ()); |
2547 | /* Incrementally merge seqs into first_edge_seq. */ |
2548 | int first_uneq = -1; |
2549 | auto_vec<seq_entry, 2> ; |
2550 | for (unsigned int i = 0; i < min_len; ++i) |
2551 | { |
2552 | /* ??? We can more intelligently merge when we face different |
2553 | order by additional sinking operations in one sequence. |
2554 | For now we simply mark them as to be processed by the |
2555 | not order-preserving SM code. */ |
2556 | if (first_edge_seq[i].first != edge_seq[i].first) |
2557 | { |
2558 | if (first_edge_seq[i].second == sm_ord) |
2559 | bitmap_set_bit (refs_not_supported, |
2560 | first_edge_seq[i].first); |
2561 | if (edge_seq[i].second == sm_ord) |
2562 | bitmap_set_bit (refs_not_supported, edge_seq[i].first); |
2563 | first_edge_seq[i].second = sm_other; |
2564 | first_edge_seq[i].from = NULL_TREE; |
2565 | /* Record the dropped refs for later processing. */ |
2566 | if (first_uneq == -1) |
2567 | first_uneq = i; |
2568 | extra_refs.safe_push (obj: seq_entry (edge_seq[i].first, |
2569 | sm_other, NULL_TREE)); |
2570 | } |
2571 | /* sm_other prevails. */ |
2572 | else if (first_edge_seq[i].second != edge_seq[i].second) |
2573 | { |
2574 | /* Make sure the ref is marked as not supported. */ |
2575 | bitmap_set_bit (refs_not_supported, |
2576 | first_edge_seq[i].first); |
2577 | first_edge_seq[i].second = sm_other; |
2578 | first_edge_seq[i].from = NULL_TREE; |
2579 | } |
2580 | else if (first_edge_seq[i].second == sm_other |
2581 | && first_edge_seq[i].from != NULL_TREE |
2582 | && (edge_seq[i].from == NULL_TREE |
2583 | || !operand_equal_p (first_edge_seq[i].from, |
2584 | edge_seq[i].from, flags: 0))) |
2585 | first_edge_seq[i].from = NULL_TREE; |
2586 | } |
2587 | /* Any excess elements become sm_other since they are now |
2588 | coonditionally executed. */ |
2589 | if (first_edge_seq.length () > edge_seq.length ()) |
2590 | { |
2591 | for (unsigned i = edge_seq.length (); |
2592 | i < first_edge_seq.length (); ++i) |
2593 | { |
2594 | if (first_edge_seq[i].second == sm_ord) |
2595 | bitmap_set_bit (refs_not_supported, |
2596 | first_edge_seq[i].first); |
2597 | first_edge_seq[i].second = sm_other; |
2598 | } |
2599 | } |
2600 | else if (edge_seq.length () > first_edge_seq.length ()) |
2601 | { |
2602 | if (first_uneq == -1) |
2603 | first_uneq = first_edge_seq.length (); |
2604 | for (unsigned i = first_edge_seq.length (); |
2605 | i < edge_seq.length (); ++i) |
2606 | { |
2607 | if (edge_seq[i].second == sm_ord) |
2608 | bitmap_set_bit (refs_not_supported, edge_seq[i].first); |
2609 | extra_refs.safe_push (obj: seq_entry (edge_seq[i].first, |
2610 | sm_other, NULL_TREE)); |
2611 | } |
2612 | } |
2613 | /* Put unmerged refs at first_uneq to force dependence checking |
2614 | on them. */ |
2615 | if (first_uneq != -1) |
2616 | { |
2617 | /* Missing ordered_splice_at. */ |
2618 | if ((unsigned)first_uneq == first_edge_seq.length ()) |
2619 | first_edge_seq.safe_splice (src: extra_refs); |
2620 | else |
2621 | { |
2622 | unsigned fes_length = first_edge_seq.length (); |
2623 | first_edge_seq.safe_grow (len: fes_length |
2624 | + extra_refs.length ()); |
2625 | memmove (dest: &first_edge_seq[first_uneq + extra_refs.length ()], |
2626 | src: &first_edge_seq[first_uneq], |
2627 | n: (fes_length - first_uneq) * sizeof (seq_entry)); |
2628 | memcpy (dest: &first_edge_seq[first_uneq], |
2629 | src: extra_refs.address (), |
2630 | n: extra_refs.length () * sizeof (seq_entry)); |
2631 | } |
2632 | } |
2633 | } |
2634 | /* Use the sequence from the first edge and push SMs down. */ |
2635 | for (unsigned i = 0; i < first_edge_seq.length (); ++i) |
2636 | { |
2637 | unsigned id = first_edge_seq[i].first; |
2638 | seq.safe_push (obj: first_edge_seq[i]); |
2639 | unsigned new_idx; |
2640 | if ((first_edge_seq[i].second == sm_ord |
2641 | || (first_edge_seq[i].second == sm_other |
2642 | && first_edge_seq[i].from != NULL_TREE)) |
2643 | && !sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx)) |
2644 | { |
2645 | if (first_edge_seq[i].second == sm_ord) |
2646 | bitmap_set_bit (refs_not_supported, id); |
2647 | /* Mark it sm_other. */ |
2648 | seq[new_idx].second = sm_other; |
2649 | seq[new_idx].from = NULL_TREE; |
2650 | } |
2651 | } |
2652 | bitmap_set_bit (fully_visited, |
2653 | SSA_NAME_VERSION (gimple_phi_result (phi))); |
2654 | return 1; |
2655 | } |
2656 | lim_aux_data *data = get_lim_data (stmt: def); |
2657 | gcc_assert (data); |
2658 | if (data->ref == UNANALYZABLE_MEM_ID) |
2659 | return -1; |
2660 | /* Stop at memory references which we can't move. */ |
2661 | else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node |
2662 | || TREE_THIS_VOLATILE |
2663 | (memory_accesses.refs_list[data->ref]->mem.ref)) |
2664 | { |
2665 | /* Mark refs_not_in_seq as unsupported. */ |
2666 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2667 | return 1; |
2668 | } |
2669 | /* One of the stores we want to apply SM to and we've not yet seen. */ |
2670 | else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) |
2671 | { |
2672 | seq.safe_push (obj: seq_entry (data->ref, sm_ord)); |
2673 | |
2674 | /* 1) push it down the queue until a SMed |
2675 | and not ignored ref is reached, skipping all not SMed refs |
2676 | and ignored refs via non-TBAA disambiguation. */ |
2677 | unsigned new_idx; |
2678 | if (!sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx) |
2679 | /* If that fails but we did not fork yet continue, we'll see |
2680 | to re-materialize all of the stores in the sequence then. |
2681 | Further stores will only be pushed up to this one. */ |
2682 | && forked) |
2683 | { |
2684 | bitmap_set_bit (refs_not_supported, data->ref); |
2685 | /* Mark it sm_other. */ |
2686 | seq[new_idx].second = sm_other; |
2687 | } |
2688 | |
2689 | /* 2) check whether we've seen all refs we want to SM and if so |
2690 | declare success for the active exit */ |
2691 | if (bitmap_empty_p (map: refs_not_in_seq)) |
2692 | return 1; |
2693 | } |
2694 | else |
2695 | /* Another store not part of the final sequence. Simply push it. */ |
2696 | seq.safe_push (obj: seq_entry (data->ref, sm_other, |
2697 | gimple_assign_rhs1 (gs: def))); |
2698 | |
2699 | vdef = gimple_vuse (g: def); |
2700 | } |
2701 | while (1); |
2702 | } |
2703 | |
2704 | /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit |
2705 | edges of the LOOP. */ |
2706 | |
2707 | static void |
2708 | hoist_memory_references (class loop *loop, bitmap mem_refs, |
2709 | const vec<edge> &exits) |
2710 | { |
2711 | im_mem_ref *ref; |
2712 | unsigned i; |
2713 | bitmap_iterator bi; |
2714 | |
2715 | /* There's a special case we can use ordered re-materialization for |
2716 | conditionally excuted stores which is when all stores in the loop |
2717 | happen in the same basic-block. In that case we know we'll reach |
2718 | all stores and thus can simply process that BB and emit a single |
2719 | conditional block of ordered materializations. See PR102436. */ |
2720 | basic_block single_store_bb = NULL; |
2721 | EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num], |
2722 | 0, i, bi) |
2723 | { |
2724 | bool fail = false; |
2725 | ref = memory_accesses.refs_list[i]; |
2726 | for (auto loc : ref->accesses_in_loop) |
2727 | if (!gimple_vdef (g: loc.stmt)) |
2728 | ; |
2729 | else if (!single_store_bb) |
2730 | { |
2731 | single_store_bb = gimple_bb (g: loc.stmt); |
2732 | bool conditional = false; |
2733 | for (edge e : exits) |
2734 | if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb)) |
2735 | { |
2736 | /* Conditional as seen from e. */ |
2737 | conditional = true; |
2738 | break; |
2739 | } |
2740 | if (!conditional) |
2741 | { |
2742 | fail = true; |
2743 | break; |
2744 | } |
2745 | } |
2746 | else if (single_store_bb != gimple_bb (g: loc.stmt)) |
2747 | { |
2748 | fail = true; |
2749 | break; |
2750 | } |
2751 | if (fail) |
2752 | { |
2753 | single_store_bb = NULL; |
2754 | break; |
2755 | } |
2756 | } |
2757 | if (single_store_bb) |
2758 | { |
2759 | /* Analyze the single block with stores. */ |
2760 | auto_bitmap fully_visited; |
2761 | auto_bitmap refs_not_supported; |
2762 | auto_bitmap refs_not_in_seq; |
2763 | auto_vec<seq_entry> seq; |
2764 | bitmap_copy (refs_not_in_seq, mem_refs); |
2765 | int res = sm_seq_valid_bb (loop, bb: single_store_bb, NULL_TREE, |
2766 | seq, refs_not_in_seq, refs_not_supported, |
2767 | forked: false, fully_visited); |
2768 | if (res != 1) |
2769 | { |
2770 | /* Unhandled refs can still fail this. */ |
2771 | bitmap_clear (mem_refs); |
2772 | return; |
2773 | } |
2774 | |
2775 | /* We cannot handle sm_other since we neither remember the |
2776 | stored location nor the value at the point we execute them. */ |
2777 | for (unsigned i = 0; i < seq.length (); ++i) |
2778 | { |
2779 | unsigned new_i; |
2780 | if (seq[i].second == sm_other |
2781 | && seq[i].from != NULL_TREE) |
2782 | seq[i].from = NULL_TREE; |
2783 | else if ((seq[i].second == sm_ord |
2784 | || (seq[i].second == sm_other |
2785 | && seq[i].from != NULL_TREE)) |
2786 | && !sm_seq_push_down (seq, ptr: i, at: &new_i)) |
2787 | { |
2788 | bitmap_set_bit (refs_not_supported, seq[new_i].first); |
2789 | seq[new_i].second = sm_other; |
2790 | seq[new_i].from = NULL_TREE; |
2791 | } |
2792 | } |
2793 | bitmap_and_compl_into (mem_refs, refs_not_supported); |
2794 | if (bitmap_empty_p (map: mem_refs)) |
2795 | return; |
2796 | |
2797 | /* Prune seq. */ |
2798 | while (seq.last ().second == sm_other |
2799 | && seq.last ().from == NULL_TREE) |
2800 | seq.pop (); |
2801 | |
2802 | hash_map<im_mem_ref *, sm_aux *> aux_map; |
2803 | |
2804 | /* Execute SM but delay the store materialization for ordered |
2805 | sequences on exit. */ |
2806 | bool first_p = true; |
2807 | EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) |
2808 | { |
2809 | ref = memory_accesses.refs_list[i]; |
2810 | execute_sm (loop, ref, aux_map, maybe_mt: true, use_other_flag_var: !first_p); |
2811 | first_p = false; |
2812 | } |
2813 | |
2814 | /* Get at the single flag variable we eventually produced. */ |
2815 | im_mem_ref *ref |
2816 | = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)]; |
2817 | sm_aux *aux = *aux_map.get (k: ref); |
2818 | |
2819 | /* Materialize ordered store sequences on exits. */ |
2820 | edge e; |
2821 | FOR_EACH_VEC_ELT (exits, i, e) |
2822 | { |
2823 | edge append_cond_position = NULL; |
2824 | edge last_cond_fallthru = NULL; |
2825 | edge insert_e = e; |
2826 | /* Construct the single flag variable control flow and insert |
2827 | the ordered seq of stores in the then block. With |
2828 | -fstore-data-races we can do the stores unconditionally. */ |
2829 | if (aux->store_flag) |
2830 | insert_e |
2831 | = single_pred_edge |
2832 | (bb: execute_sm_if_changed (ex: e, NULL_TREE, NULL_TREE, |
2833 | flag: aux->store_flag, |
2834 | preheader: loop_preheader_edge (loop), |
2835 | flag_bbs: &aux->flag_bbs, append_cond_position, |
2836 | last_cond_fallthru)); |
2837 | execute_sm_exit (loop, ex: insert_e, seq, aux_map, kind: sm_ord, |
2838 | append_cond_position, last_cond_fallthru); |
2839 | gsi_commit_one_edge_insert (insert_e, NULL); |
2840 | } |
2841 | |
2842 | for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); |
2843 | iter != aux_map.end (); ++iter) |
2844 | delete (*iter).second; |
2845 | |
2846 | return; |
2847 | } |
2848 | |
2849 | /* To address PR57359 before actually applying store-motion check |
2850 | the candidates found for validity with regards to reordering |
2851 | relative to other stores which we until here disambiguated using |
2852 | TBAA which isn't valid. |
2853 | What matters is the order of the last stores to the mem_refs |
2854 | with respect to the other stores of the loop at the point of the |
2855 | loop exits. */ |
2856 | |
2857 | /* For each exit compute the store order, pruning from mem_refs |
2858 | on the fly. */ |
2859 | /* The complexity of this is at least |
2860 | O(number of exits * number of SM refs) but more approaching |
2861 | O(number of exits * number of SM refs * number of stores). */ |
2862 | /* ??? Somehow do this in a single sweep over the loop body. */ |
2863 | auto_vec<std::pair<edge, vec<seq_entry> > > sms; |
2864 | auto_bitmap refs_not_supported (&lim_bitmap_obstack); |
2865 | edge e; |
2866 | FOR_EACH_VEC_ELT (exits, i, e) |
2867 | { |
2868 | vec<seq_entry> seq; |
2869 | seq.create (nelems: 4); |
2870 | auto_bitmap refs_not_in_seq (&lim_bitmap_obstack); |
2871 | bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported); |
2872 | if (bitmap_empty_p (map: refs_not_in_seq)) |
2873 | { |
2874 | seq.release (); |
2875 | break; |
2876 | } |
2877 | auto_bitmap fully_visited; |
2878 | int res = sm_seq_valid_bb (loop, bb: e->src, NULL_TREE, |
2879 | seq, refs_not_in_seq, |
2880 | refs_not_supported, forked: false, |
2881 | fully_visited); |
2882 | if (res != 1) |
2883 | { |
2884 | bitmap_copy (refs_not_supported, mem_refs); |
2885 | seq.release (); |
2886 | break; |
2887 | } |
2888 | sms.safe_push (obj: std::make_pair (x&: e, y&: seq)); |
2889 | } |
2890 | |
2891 | /* Prune pruned mem_refs from earlier processed exits. */ |
2892 | bool changed = !bitmap_empty_p (map: refs_not_supported); |
2893 | while (changed) |
2894 | { |
2895 | changed = false; |
2896 | std::pair<edge, vec<seq_entry> > *seq; |
2897 | FOR_EACH_VEC_ELT (sms, i, seq) |
2898 | { |
2899 | bool need_to_push = false; |
2900 | for (unsigned i = 0; i < seq->second.length (); ++i) |
2901 | { |
2902 | sm_kind kind = seq->second[i].second; |
2903 | if (kind == sm_other && seq->second[i].from == NULL_TREE) |
2904 | break; |
2905 | unsigned id = seq->second[i].first; |
2906 | unsigned new_idx; |
2907 | if (kind == sm_ord |
2908 | && bitmap_bit_p (refs_not_supported, id)) |
2909 | { |
2910 | seq->second[i].second = sm_other; |
2911 | gcc_assert (seq->second[i].from == NULL_TREE); |
2912 | need_to_push = true; |
2913 | } |
2914 | else if (need_to_push |
2915 | && !sm_seq_push_down (seq&: seq->second, ptr: i, at: &new_idx)) |
2916 | { |
2917 | /* We need to push down both sm_ord and sm_other |
2918 | but for the latter we need to disqualify all |
2919 | following refs. */ |
2920 | if (kind == sm_ord) |
2921 | { |
2922 | if (bitmap_set_bit (refs_not_supported, id)) |
2923 | changed = true; |
2924 | seq->second[new_idx].second = sm_other; |
2925 | } |
2926 | else |
2927 | { |
2928 | for (unsigned j = seq->second.length () - 1; |
2929 | j > new_idx; --j) |
2930 | if (seq->second[j].second == sm_ord |
2931 | && bitmap_set_bit (refs_not_supported, |
2932 | seq->second[j].first)) |
2933 | changed = true; |
2934 | seq->second.truncate (size: new_idx); |
2935 | break; |
2936 | } |
2937 | } |
2938 | } |
2939 | } |
2940 | } |
2941 | std::pair<edge, vec<seq_entry> > *seq; |
2942 | FOR_EACH_VEC_ELT (sms, i, seq) |
2943 | { |
2944 | /* Prune sm_other from the end. */ |
2945 | while (!seq->second.is_empty () |
2946 | && seq->second.last ().second == sm_other) |
2947 | seq->second.pop (); |
2948 | /* Prune duplicates from the start. */ |
2949 | auto_bitmap seen (&lim_bitmap_obstack); |
2950 | unsigned j, k; |
2951 | for (j = k = 0; j < seq->second.length (); ++j) |
2952 | if (bitmap_set_bit (seen, seq->second[j].first)) |
2953 | { |
2954 | if (k != j) |
2955 | seq->second[k] = seq->second[j]; |
2956 | ++k; |
2957 | } |
2958 | seq->second.truncate (size: k); |
2959 | /* And verify. */ |
2960 | seq_entry *e; |
2961 | FOR_EACH_VEC_ELT (seq->second, j, e) |
2962 | gcc_assert (e->second == sm_ord |
2963 | || (e->second == sm_other && e->from != NULL_TREE)); |
2964 | } |
2965 | |
2966 | /* Verify dependence for refs we cannot handle with the order preserving |
2967 | code (refs_not_supported) or prune them from mem_refs. */ |
2968 | auto_vec<seq_entry> unord_refs; |
2969 | EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi) |
2970 | { |
2971 | ref = memory_accesses.refs_list[i]; |
2972 | if (!ref_indep_loop_p (loop, ref, sm_waw)) |
2973 | bitmap_clear_bit (mem_refs, i); |
2974 | /* We've now verified store order for ref with respect to all other |
2975 | stores in the loop does not matter. */ |
2976 | else |
2977 | unord_refs.safe_push (obj: seq_entry (i, sm_unord)); |
2978 | } |
2979 | |
2980 | hash_map<im_mem_ref *, sm_aux *> aux_map; |
2981 | |
2982 | /* Execute SM but delay the store materialization for ordered |
2983 | sequences on exit. */ |
2984 | EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) |
2985 | { |
2986 | ref = memory_accesses.refs_list[i]; |
2987 | execute_sm (loop, ref, aux_map, maybe_mt: bitmap_bit_p (refs_not_supported, i), |
2988 | use_other_flag_var: false); |
2989 | } |
2990 | |
2991 | /* Materialize ordered store sequences on exits. */ |
2992 | FOR_EACH_VEC_ELT (exits, i, e) |
2993 | { |
2994 | edge append_cond_position = NULL; |
2995 | edge last_cond_fallthru = NULL; |
2996 | if (i < sms.length ()) |
2997 | { |
2998 | gcc_assert (sms[i].first == e); |
2999 | execute_sm_exit (loop, ex: e, seq&: sms[i].second, aux_map, kind: sm_ord, |
3000 | append_cond_position, last_cond_fallthru); |
3001 | sms[i].second.release (); |
3002 | } |
3003 | if (!unord_refs.is_empty ()) |
3004 | execute_sm_exit (loop, ex: e, seq&: unord_refs, aux_map, kind: sm_unord, |
3005 | append_cond_position, last_cond_fallthru); |
3006 | /* Commit edge inserts here to preserve the order of stores |
3007 | when an exit exits multiple loops. */ |
3008 | gsi_commit_one_edge_insert (e, NULL); |
3009 | } |
3010 | |
3011 | for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); |
3012 | iter != aux_map.end (); ++iter) |
3013 | delete (*iter).second; |
3014 | } |
3015 | |
3016 | class ref_always_accessed |
3017 | { |
3018 | public: |
3019 | ref_always_accessed (class loop *loop_, bool stored_p_) |
3020 | : loop (loop_), stored_p (stored_p_) {} |
3021 | bool operator () (mem_ref_loc *loc); |
3022 | class loop *loop; |
3023 | bool stored_p; |
3024 | }; |
3025 | |
3026 | bool |
3027 | ref_always_accessed::operator () (mem_ref_loc *loc) |
3028 | { |
3029 | class loop *must_exec; |
3030 | |
3031 | struct lim_aux_data *lim_data = get_lim_data (stmt: loc->stmt); |
3032 | if (!lim_data) |
3033 | return false; |
3034 | |
3035 | /* If we require an always executed store make sure the statement |
3036 | is a store. */ |
3037 | if (stored_p) |
3038 | { |
3039 | tree lhs = gimple_get_lhs (loc->stmt); |
3040 | if (!lhs |
3041 | || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs))) |
3042 | return false; |
3043 | } |
3044 | |
3045 | must_exec = lim_data->always_executed_in; |
3046 | if (!must_exec) |
3047 | return false; |
3048 | |
3049 | if (must_exec == loop |
3050 | || flow_loop_nested_p (must_exec, loop)) |
3051 | return true; |
3052 | |
3053 | return false; |
3054 | } |
3055 | |
3056 | /* Returns true if REF is always accessed in LOOP. If STORED_P is true |
3057 | make sure REF is always stored to in LOOP. */ |
3058 | |
3059 | static bool |
3060 | ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p) |
3061 | { |
3062 | return for_all_locs_in_loop (loop, ref, |
3063 | fn: ref_always_accessed (loop, stored_p)); |
3064 | } |
3065 | |
3066 | /* Returns true if REF1 and REF2 are independent. */ |
3067 | |
3068 | static bool |
3069 | refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p) |
3070 | { |
3071 | if (ref1 == ref2) |
3072 | return true; |
3073 | |
3074 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3075 | fprintf (stream: dump_file, format: "Querying dependency of refs %u and %u: " , |
3076 | ref1->id, ref2->id); |
3077 | |
3078 | if (mem_refs_may_alias_p (mem1: ref1, mem2: ref2, ttae_cache: &memory_accesses.ttae_cache, tbaa_p)) |
3079 | { |
3080 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3081 | fprintf (stream: dump_file, format: "dependent.\n" ); |
3082 | return false; |
3083 | } |
3084 | else |
3085 | { |
3086 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3087 | fprintf (stream: dump_file, format: "independent.\n" ); |
3088 | return true; |
3089 | } |
3090 | } |
3091 | |
3092 | /* Returns true if REF is independent on all other accessess in LOOP. |
3093 | KIND specifies the kind of dependence to consider. |
3094 | lim_raw assumes REF is not stored in LOOP and disambiguates RAW |
3095 | dependences so if true REF can be hoisted out of LOOP |
3096 | sm_war disambiguates a store REF against all other loads to see |
3097 | whether the store can be sunk across loads out of LOOP |
3098 | sm_waw disambiguates a store REF against all other stores to see |
3099 | whether the store can be sunk across stores out of LOOP. */ |
3100 | |
3101 | static bool |
3102 | ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) |
3103 | { |
3104 | bool indep_p = true; |
3105 | bitmap refs_to_check; |
3106 | |
3107 | if (kind == sm_war) |
3108 | refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num]; |
3109 | else |
3110 | refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; |
3111 | |
3112 | if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) |
3113 | || ref->mem.ref == error_mark_node) |
3114 | indep_p = false; |
3115 | else |
3116 | { |
3117 | /* tri-state, { unknown, independent, dependent } */ |
3118 | dep_state state = query_loop_dependence (loop, ref, kind); |
3119 | if (state != dep_unknown) |
3120 | return state == dep_independent ? true : false; |
3121 | |
3122 | class loop *inner = loop->inner; |
3123 | while (inner) |
3124 | { |
3125 | if (!ref_indep_loop_p (loop: inner, ref, kind)) |
3126 | { |
3127 | indep_p = false; |
3128 | break; |
3129 | } |
3130 | inner = inner->next; |
3131 | } |
3132 | |
3133 | if (indep_p) |
3134 | { |
3135 | unsigned i; |
3136 | bitmap_iterator bi; |
3137 | EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) |
3138 | { |
3139 | im_mem_ref *aref = memory_accesses.refs_list[i]; |
3140 | if (aref->mem.ref == error_mark_node) |
3141 | { |
3142 | gimple *stmt = aref->accesses_in_loop[0].stmt; |
3143 | if ((kind == sm_war |
3144 | && ref_maybe_used_by_stmt_p (stmt, &ref->mem, |
3145 | kind != sm_waw)) |
3146 | || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, |
3147 | kind != sm_waw)) |
3148 | { |
3149 | indep_p = false; |
3150 | break; |
3151 | } |
3152 | } |
3153 | else if (!refs_independent_p (ref1: ref, ref2: aref, tbaa_p: kind != sm_waw)) |
3154 | { |
3155 | indep_p = false; |
3156 | break; |
3157 | } |
3158 | } |
3159 | } |
3160 | } |
3161 | |
3162 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3163 | fprintf (stream: dump_file, format: "Querying %s dependencies of ref %u in loop %d: %s\n" , |
3164 | kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW" ), |
3165 | ref->id, loop->num, indep_p ? "independent" : "dependent" ); |
3166 | |
3167 | /* Record the computed result in the cache. */ |
3168 | record_loop_dependence (loop, ref, kind, |
3169 | state: indep_p ? dep_independent : dep_dependent); |
3170 | |
3171 | return indep_p; |
3172 | } |
3173 | |
3174 | class ref_in_loop_hot_body |
3175 | { |
3176 | public: |
3177 | ref_in_loop_hot_body (class loop *loop_) : l (loop_) {} |
3178 | bool operator () (mem_ref_loc *loc); |
3179 | class loop *l; |
3180 | }; |
3181 | |
3182 | /* Check the coldest loop between loop L and innermost loop. If there is one |
3183 | cold loop between L and INNER_LOOP, store motion can be performed, otherwise |
3184 | no cold loop means no store motion. get_coldest_out_loop also handles cases |
3185 | when l is inner_loop. */ |
3186 | bool |
3187 | ref_in_loop_hot_body::operator () (mem_ref_loc *loc) |
3188 | { |
3189 | basic_block curr_bb = gimple_bb (g: loc->stmt); |
3190 | class loop *inner_loop = curr_bb->loop_father; |
3191 | return get_coldest_out_loop (outermost_loop: l, loop: inner_loop, curr_bb); |
3192 | } |
3193 | |
3194 | |
3195 | /* Returns true if we can perform store motion of REF from LOOP. */ |
3196 | |
3197 | static bool |
3198 | can_sm_ref_p (class loop *loop, im_mem_ref *ref) |
3199 | { |
3200 | tree base; |
3201 | |
3202 | /* Can't hoist unanalyzable refs. */ |
3203 | if (!MEM_ANALYZABLE (ref)) |
3204 | return false; |
3205 | |
3206 | /* Can't hoist/sink aggregate copies. */ |
3207 | if (ref->mem.ref == error_mark_node) |
3208 | return false; |
3209 | |
3210 | /* It should be movable. */ |
3211 | if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) |
3212 | || TREE_THIS_VOLATILE (ref->mem.ref) |
3213 | || !for_each_index (&ref->mem.ref, may_move_till, loop)) |
3214 | return false; |
3215 | |
3216 | /* If it can throw fail, we do not properly update EH info. */ |
3217 | if (tree_could_throw_p (ref->mem.ref)) |
3218 | return false; |
3219 | |
3220 | /* If it can trap, it must be always executed in LOOP. |
3221 | Readonly memory locations may trap when storing to them, but |
3222 | tree_could_trap_p is a predicate for rvalues, so check that |
3223 | explicitly. */ |
3224 | base = get_base_address (t: ref->mem.ref); |
3225 | if ((tree_could_trap_p (ref->mem.ref) |
3226 | || (DECL_P (base) && TREE_READONLY (base))) |
3227 | /* ??? We can at least use false here, allowing loads? We |
3228 | are forcing conditional stores if the ref is not always |
3229 | stored to later anyway. So this would only guard |
3230 | the load we need to emit. Thus when the ref is not |
3231 | loaded we can elide this completely? */ |
3232 | && !ref_always_accessed_p (loop, ref, stored_p: true)) |
3233 | return false; |
3234 | |
3235 | /* Verify all loads of ref can be hoisted. */ |
3236 | if (ref->loaded |
3237 | && bitmap_bit_p (ref->loaded, loop->num) |
3238 | && !ref_indep_loop_p (loop, ref, kind: lim_raw)) |
3239 | return false; |
3240 | |
3241 | /* Verify the candidate can be disambiguated against all loads, |
3242 | that is, we can elide all in-loop stores. Disambiguation |
3243 | against stores is done later when we cannot guarantee preserving |
3244 | the order of stores. */ |
3245 | if (!ref_indep_loop_p (loop, ref, kind: sm_war)) |
3246 | return false; |
3247 | |
3248 | /* Verify whether the candidate is hot for LOOP. Only do store motion if the |
3249 | candidate's profile count is hot. Statement in cold BB shouldn't be moved |
3250 | out of it's loop_father. */ |
3251 | if (!for_all_locs_in_loop (loop, ref, fn: ref_in_loop_hot_body (loop))) |
3252 | return false; |
3253 | |
3254 | return true; |
3255 | } |
3256 | |
3257 | /* Marks the references in LOOP for that store motion should be performed |
3258 | in REFS_TO_SM. SM_EXECUTED is the set of references for that store |
3259 | motion was performed in one of the outer loops. */ |
3260 | |
3261 | static void |
3262 | find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) |
3263 | { |
3264 | bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num]; |
3265 | unsigned i; |
3266 | bitmap_iterator bi; |
3267 | im_mem_ref *ref; |
3268 | |
3269 | EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi) |
3270 | { |
3271 | ref = memory_accesses.refs_list[i]; |
3272 | if (can_sm_ref_p (loop, ref) && dbg_cnt (index: lim)) |
3273 | bitmap_set_bit (refs_to_sm, i); |
3274 | } |
3275 | } |
3276 | |
3277 | /* Checks whether LOOP (with exits stored in EXITS array) is suitable |
3278 | for a store motion optimization (i.e. whether we can insert statement |
3279 | on its exits). */ |
3280 | |
3281 | static bool |
3282 | loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, |
3283 | const vec<edge> &exits) |
3284 | { |
3285 | unsigned i; |
3286 | edge ex; |
3287 | |
3288 | FOR_EACH_VEC_ELT (exits, i, ex) |
3289 | if (ex->flags & (EDGE_ABNORMAL | EDGE_EH)) |
3290 | return false; |
3291 | |
3292 | return true; |
3293 | } |
3294 | |
3295 | /* Try to perform store motion for all memory references modified inside |
3296 | LOOP. SM_EXECUTED is the bitmap of the memory references for that |
3297 | store motion was executed in one of the outer loops. */ |
3298 | |
3299 | static void |
3300 | store_motion_loop (class loop *loop, bitmap sm_executed) |
3301 | { |
3302 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3303 | class loop *subloop; |
3304 | bitmap sm_in_loop = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
3305 | |
3306 | if (loop_suitable_for_sm (loop, exits)) |
3307 | { |
3308 | find_refs_for_sm (loop, sm_executed, refs_to_sm: sm_in_loop); |
3309 | if (!bitmap_empty_p (map: sm_in_loop)) |
3310 | hoist_memory_references (loop, mem_refs: sm_in_loop, exits); |
3311 | } |
3312 | |
3313 | bitmap_ior_into (sm_executed, sm_in_loop); |
3314 | for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) |
3315 | store_motion_loop (loop: subloop, sm_executed); |
3316 | bitmap_and_compl_into (sm_executed, sm_in_loop); |
3317 | BITMAP_FREE (sm_in_loop); |
3318 | } |
3319 | |
3320 | /* Try to perform store motion for all memory references modified inside |
3321 | loops. */ |
3322 | |
3323 | static void |
3324 | do_store_motion (void) |
3325 | { |
3326 | class loop *loop; |
3327 | bitmap sm_executed = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
3328 | |
3329 | for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) |
3330 | store_motion_loop (loop, sm_executed); |
3331 | |
3332 | BITMAP_FREE (sm_executed); |
3333 | } |
3334 | |
3335 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. |
3336 | for each such basic block bb records the outermost loop for that execution |
3337 | of its header implies execution of bb. CONTAINS_CALL is the bitmap of |
3338 | blocks that contain a nonpure call. */ |
3339 | |
3340 | static void |
3341 | fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) |
3342 | { |
3343 | basic_block bb = NULL, last = NULL; |
3344 | edge e; |
3345 | class loop *inn_loop = loop; |
3346 | |
3347 | if (ALWAYS_EXECUTED_IN (loop->header) == NULL) |
3348 | { |
3349 | auto_vec<basic_block, 64> worklist; |
3350 | worklist.reserve_exact (nelems: loop->num_nodes); |
3351 | worklist.quick_push (obj: loop->header); |
3352 | do |
3353 | { |
3354 | edge_iterator ei; |
3355 | bb = worklist.pop (); |
3356 | |
3357 | if (!flow_bb_inside_loop_p (inn_loop, bb)) |
3358 | { |
3359 | /* When we are leaving a possibly infinite inner loop |
3360 | we have to stop processing. */ |
3361 | if (!finite_loop_p (inn_loop)) |
3362 | break; |
3363 | /* If the loop was finite we can continue with processing |
3364 | the loop we exited to. */ |
3365 | inn_loop = bb->loop_father; |
3366 | } |
3367 | |
3368 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
3369 | last = bb; |
3370 | |
3371 | if (bitmap_bit_p (map: contains_call, bitno: bb->index)) |
3372 | break; |
3373 | |
3374 | /* If LOOP exits from this BB stop processing. */ |
3375 | FOR_EACH_EDGE (e, ei, bb->succs) |
3376 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
3377 | break; |
3378 | if (e) |
3379 | break; |
3380 | |
3381 | /* A loop might be infinite (TODO use simple loop analysis |
3382 | to disprove this if possible). */ |
3383 | if (bb->flags & BB_IRREDUCIBLE_LOOP) |
3384 | break; |
3385 | |
3386 | if (bb->loop_father->header == bb) |
3387 | /* Record that we enter into a subloop since it might not |
3388 | be finite. */ |
3389 | /* ??? Entering into a not always executed subloop makes |
3390 | fill_always_executed_in quadratic in loop depth since |
3391 | we walk those loops N times. This is not a problem |
3392 | in practice though, see PR102253 for a worst-case testcase. */ |
3393 | inn_loop = bb->loop_father; |
3394 | |
3395 | /* Walk the body of LOOP sorted by dominance relation. Additionally, |
3396 | if a basic block S dominates the latch, then only blocks dominated |
3397 | by S are after it. |
3398 | This is get_loop_body_in_dom_order using a worklist algorithm and |
3399 | stopping once we are no longer interested in visiting further |
3400 | blocks. */ |
3401 | unsigned old_len = worklist.length (); |
3402 | unsigned postpone = 0; |
3403 | for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); |
3404 | son; |
3405 | son = next_dom_son (CDI_DOMINATORS, son)) |
3406 | { |
3407 | if (!flow_bb_inside_loop_p (loop, son)) |
3408 | continue; |
3409 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) |
3410 | postpone = worklist.length (); |
3411 | worklist.quick_push (obj: son); |
3412 | } |
3413 | if (postpone) |
3414 | /* Postponing the block that dominates the latch means |
3415 | processing it last and thus putting it earliest in the |
3416 | worklist. */ |
3417 | std::swap (a&: worklist[old_len], b&: worklist[postpone]); |
3418 | } |
3419 | while (!worklist.is_empty ()); |
3420 | |
3421 | while (1) |
3422 | { |
3423 | if (dump_enabled_p ()) |
3424 | dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n" , |
3425 | last->index, loop->num); |
3426 | SET_ALWAYS_EXECUTED_IN (last, loop); |
3427 | if (last == loop->header) |
3428 | break; |
3429 | last = get_immediate_dominator (CDI_DOMINATORS, last); |
3430 | } |
3431 | } |
3432 | |
3433 | for (loop = loop->inner; loop; loop = loop->next) |
3434 | fill_always_executed_in_1 (loop, contains_call); |
3435 | } |
3436 | |
3437 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. |
3438 | for each such basic block bb records the outermost loop for that execution |
3439 | of its header implies execution of bb. */ |
3440 | |
3441 | static void |
3442 | fill_always_executed_in (void) |
3443 | { |
3444 | basic_block bb; |
3445 | class loop *loop; |
3446 | |
3447 | auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); |
3448 | bitmap_clear (contains_call); |
3449 | FOR_EACH_BB_FN (bb, cfun) |
3450 | { |
3451 | gimple_stmt_iterator gsi; |
3452 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3453 | { |
3454 | if (nonpure_call_p (stmt: gsi_stmt (i: gsi))) |
3455 | break; |
3456 | } |
3457 | |
3458 | if (!gsi_end_p (i: gsi)) |
3459 | bitmap_set_bit (map: contains_call, bitno: bb->index); |
3460 | } |
3461 | |
3462 | for (loop = current_loops->tree_root->inner; loop; loop = loop->next) |
3463 | fill_always_executed_in_1 (loop, contains_call); |
3464 | } |
3465 | |
3466 | /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop |
3467 | to LOOP. Then recursively iterate each inner loop. */ |
3468 | |
3469 | void |
3470 | fill_coldest_and_hotter_out_loop (class loop *coldest_loop, |
3471 | class loop *hotter_loop, class loop *loop) |
3472 | { |
3473 | if (bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3474 | loop: coldest_loop)) |
3475 | coldest_loop = loop; |
3476 | |
3477 | coldest_outermost_loop[loop->num] = coldest_loop; |
3478 | |
3479 | hotter_than_inner_loop[loop->num] = NULL; |
3480 | class loop *outer_loop = loop_outer (loop); |
3481 | if (hotter_loop |
3482 | && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3483 | loop: hotter_loop)) |
3484 | hotter_than_inner_loop[loop->num] = hotter_loop; |
3485 | |
3486 | if (outer_loop && outer_loop != current_loops->tree_root |
3487 | && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3488 | loop: outer_loop)) |
3489 | hotter_than_inner_loop[loop->num] = outer_loop; |
3490 | |
3491 | if (dump_enabled_p ()) |
3492 | { |
3493 | dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, " , |
3494 | loop->num, coldest_loop->num); |
3495 | if (hotter_than_inner_loop[loop->num]) |
3496 | dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n" , |
3497 | hotter_than_inner_loop[loop->num]->num); |
3498 | else |
3499 | dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n" ); |
3500 | } |
3501 | |
3502 | class loop *inner_loop; |
3503 | for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next) |
3504 | fill_coldest_and_hotter_out_loop (coldest_loop, |
3505 | hotter_loop: hotter_than_inner_loop[loop->num], |
3506 | loop: inner_loop); |
3507 | } |
3508 | |
3509 | /* Compute the global information needed by the loop invariant motion pass. */ |
3510 | |
3511 | static void |
3512 | tree_ssa_lim_initialize (bool store_motion) |
3513 | { |
3514 | unsigned i; |
3515 | |
3516 | bitmap_obstack_initialize (&lim_bitmap_obstack); |
3517 | gcc_obstack_init (&mem_ref_obstack); |
3518 | lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>; |
3519 | |
3520 | if (flag_tm) |
3521 | compute_transaction_bits (); |
3522 | |
3523 | memory_accesses.refs = new hash_table<mem_ref_hasher> (100); |
3524 | memory_accesses.refs_list.create (nelems: 100); |
3525 | /* Allocate a special, unanalyzable mem-ref with ID zero. */ |
3526 | memory_accesses.refs_list.quick_push |
3527 | (obj: mem_ref_alloc (NULL, hash: 0, UNANALYZABLE_MEM_ID)); |
3528 | |
3529 | memory_accesses.refs_loaded_in_loop.create (nelems: number_of_loops (cfun)); |
3530 | memory_accesses.refs_loaded_in_loop.quick_grow_cleared (len: number_of_loops (cfun)); |
3531 | memory_accesses.refs_stored_in_loop.create (nelems: number_of_loops (cfun)); |
3532 | memory_accesses.refs_stored_in_loop.quick_grow_cleared (len: number_of_loops (cfun)); |
3533 | if (store_motion) |
3534 | { |
3535 | memory_accesses.all_refs_stored_in_loop.create (nelems: number_of_loops (cfun)); |
3536 | memory_accesses.all_refs_stored_in_loop.quick_grow_cleared |
3537 | (len: number_of_loops (cfun)); |
3538 | } |
3539 | |
3540 | for (i = 0; i < number_of_loops (cfun); i++) |
3541 | { |
3542 | bitmap_initialize (head: &memory_accesses.refs_loaded_in_loop[i], |
3543 | obstack: &lim_bitmap_obstack); |
3544 | bitmap_initialize (head: &memory_accesses.refs_stored_in_loop[i], |
3545 | obstack: &lim_bitmap_obstack); |
3546 | if (store_motion) |
3547 | bitmap_initialize (head: &memory_accesses.all_refs_stored_in_loop[i], |
3548 | obstack: &lim_bitmap_obstack); |
3549 | } |
3550 | |
3551 | memory_accesses.ttae_cache = NULL; |
3552 | |
3553 | /* Initialize bb_loop_postorder with a mapping from loop->num to |
3554 | its postorder index. */ |
3555 | i = 0; |
3556 | bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); |
3557 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
3558 | bb_loop_postorder[loop->num] = i++; |
3559 | } |
3560 | |
3561 | /* Cleans up after the invariant motion pass. */ |
3562 | |
3563 | static void |
3564 | tree_ssa_lim_finalize (void) |
3565 | { |
3566 | basic_block bb; |
3567 | unsigned i; |
3568 | im_mem_ref *ref; |
3569 | |
3570 | FOR_EACH_BB_FN (bb, cfun) |
3571 | SET_ALWAYS_EXECUTED_IN (bb, NULL); |
3572 | |
3573 | bitmap_obstack_release (&lim_bitmap_obstack); |
3574 | delete lim_aux_data_map; |
3575 | |
3576 | delete memory_accesses.refs; |
3577 | memory_accesses.refs = NULL; |
3578 | |
3579 | FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) |
3580 | memref_free (mem: ref); |
3581 | memory_accesses.refs_list.release (); |
3582 | obstack_free (&mem_ref_obstack, NULL); |
3583 | |
3584 | memory_accesses.refs_loaded_in_loop.release (); |
3585 | memory_accesses.refs_stored_in_loop.release (); |
3586 | memory_accesses.all_refs_stored_in_loop.release (); |
3587 | |
3588 | if (memory_accesses.ttae_cache) |
3589 | free_affine_expand_cache (&memory_accesses.ttae_cache); |
3590 | |
3591 | free (ptr: bb_loop_postorder); |
3592 | |
3593 | coldest_outermost_loop.release (); |
3594 | hotter_than_inner_loop.release (); |
3595 | } |
3596 | |
3597 | /* Moves invariants from loops. Only "expensive" invariants are moved out -- |
3598 | i.e. those that are likely to be win regardless of the register pressure. |
3599 | Only perform store motion if STORE_MOTION is true. */ |
3600 | |
3601 | unsigned int |
3602 | loop_invariant_motion_in_fun (function *fun, bool store_motion) |
3603 | { |
3604 | unsigned int todo = 0; |
3605 | |
3606 | tree_ssa_lim_initialize (store_motion); |
3607 | |
3608 | mark_ssa_maybe_undefs (); |
3609 | |
3610 | /* Gathers information about memory accesses in the loops. */ |
3611 | analyze_memory_references (store_motion); |
3612 | |
3613 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ |
3614 | fill_always_executed_in (); |
3615 | |
3616 | /* Pre-compute coldest outermost loop and nearest hotter loop of each loop. |
3617 | */ |
3618 | class loop *loop; |
3619 | coldest_outermost_loop.create (nelems: number_of_loops (cfun)); |
3620 | coldest_outermost_loop.safe_grow_cleared (len: number_of_loops (cfun)); |
3621 | hotter_than_inner_loop.create (nelems: number_of_loops (cfun)); |
3622 | hotter_than_inner_loop.safe_grow_cleared (len: number_of_loops (cfun)); |
3623 | for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) |
3624 | fill_coldest_and_hotter_out_loop (coldest_loop: loop, NULL, loop); |
3625 | |
3626 | int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); |
3627 | int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); |
3628 | |
3629 | /* For each statement determine the outermost loop in that it is |
3630 | invariant and cost for computing the invariant. */ |
3631 | for (int i = 0; i < n; ++i) |
3632 | compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i])); |
3633 | |
3634 | /* Execute store motion. Force the necessary invariants to be moved |
3635 | out of the loops as well. */ |
3636 | if (store_motion) |
3637 | do_store_motion (); |
3638 | |
3639 | free (ptr: rpo); |
3640 | rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); |
3641 | n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); |
3642 | |
3643 | /* Move the expressions that are expensive enough. */ |
3644 | for (int i = 0; i < n; ++i) |
3645 | todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i])); |
3646 | |
3647 | free (ptr: rpo); |
3648 | |
3649 | gsi_commit_edge_inserts (); |
3650 | if (need_ssa_update_p (fun)) |
3651 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
3652 | |
3653 | tree_ssa_lim_finalize (); |
3654 | |
3655 | return todo; |
3656 | } |
3657 | |
3658 | /* Loop invariant motion pass. */ |
3659 | |
3660 | namespace { |
3661 | |
3662 | const pass_data pass_data_lim = |
3663 | { |
3664 | .type: GIMPLE_PASS, /* type */ |
3665 | .name: "lim" , /* name */ |
3666 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
3667 | .tv_id: TV_LIM, /* tv_id */ |
3668 | PROP_cfg, /* properties_required */ |
3669 | .properties_provided: 0, /* properties_provided */ |
3670 | .properties_destroyed: 0, /* properties_destroyed */ |
3671 | .todo_flags_start: 0, /* todo_flags_start */ |
3672 | .todo_flags_finish: 0, /* todo_flags_finish */ |
3673 | }; |
3674 | |
3675 | class pass_lim : public gimple_opt_pass |
3676 | { |
3677 | public: |
3678 | pass_lim (gcc::context *ctxt) |
3679 | : gimple_opt_pass (pass_data_lim, ctxt) |
3680 | {} |
3681 | |
3682 | /* opt_pass methods: */ |
3683 | opt_pass * clone () final override { return new pass_lim (m_ctxt); } |
3684 | bool gate (function *) final override { return flag_tree_loop_im != 0; } |
3685 | unsigned int execute (function *) final override; |
3686 | |
3687 | }; // class pass_lim |
3688 | |
3689 | unsigned int |
3690 | pass_lim::execute (function *fun) |
3691 | { |
3692 | bool in_loop_pipeline = scev_initialized_p (); |
3693 | if (!in_loop_pipeline) |
3694 | loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
3695 | |
3696 | if (number_of_loops (fn: fun) <= 1) |
3697 | return 0; |
3698 | unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores); |
3699 | |
3700 | if (!in_loop_pipeline) |
3701 | loop_optimizer_finalize (); |
3702 | else |
3703 | scev_reset (); |
3704 | return todo; |
3705 | } |
3706 | |
3707 | } // anon namespace |
3708 | |
3709 | gimple_opt_pass * |
3710 | make_pass_lim (gcc::context *ctxt) |
3711 | { |
3712 | return new pass_lim (ctxt); |
3713 | } |
3714 | |
3715 | |
3716 | |