1 | /* RTL-based forward propagation pass for GNU compiler. |
2 | Copyright (C) 2005-2024 Free Software Foundation, Inc. |
3 | Contributed by Paolo Bonzini and Steven Bosscher. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #define INCLUDE_ALGORITHM |
22 | #define INCLUDE_FUNCTIONAL |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "rtl.h" |
28 | #include "rtlanal.h" |
29 | #include "df.h" |
30 | #include "rtl-ssa.h" |
31 | |
32 | #include "predict.h" |
33 | #include "cfgrtl.h" |
34 | #include "cfgcleanup.h" |
35 | #include "cfgloop.h" |
36 | #include "tree-pass.h" |
37 | #include "rtl-iter.h" |
38 | #include "target.h" |
39 | |
40 | /* This pass does simple forward propagation and simplification when an |
41 | operand of an insn can only come from a single def. This pass uses |
42 | RTL SSA, so it is global. However, we only do limited analysis of |
43 | available expressions. |
44 | |
45 | 1) The pass tries to propagate the source of the def into the use, |
46 | and checks if the result is independent of the substituted value. |
47 | For example, the high word of a (zero_extend:DI (reg:SI M)) is always |
48 | zero, independent of the source register. |
49 | |
50 | In particular, we propagate constants into the use site. Sometimes |
51 | RTL expansion did not put the constant in the same insn on purpose, |
52 | to satisfy a predicate, and the result will fail to be recognized; |
53 | but this happens rarely and in this case we can still create a |
54 | REG_EQUAL note. For multi-word operations, this |
55 | |
56 | (set (subreg:SI (reg:DI 120) 0) (const_int 0)) |
57 | (set (subreg:SI (reg:DI 120) 4) (const_int -1)) |
58 | (set (subreg:SI (reg:DI 122) 0) |
59 | (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) |
60 | (set (subreg:SI (reg:DI 122) 4) |
61 | (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) |
62 | |
63 | can be simplified to the much simpler |
64 | |
65 | (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) |
66 | (set (subreg:SI (reg:DI 122) 4) (const_int -1)) |
67 | |
68 | This particular propagation is also effective at putting together |
69 | complex addressing modes. We are more aggressive inside MEMs, in |
70 | that all definitions are propagated if the use is in a MEM; if the |
71 | result is a valid memory address we check address_cost to decide |
72 | whether the substitution is worthwhile. |
73 | |
74 | 2) The pass propagates register copies. This is not as effective as |
75 | the copy propagation done by CSE's canon_reg, which works by walking |
76 | the instruction chain, it can help the other transformations. |
77 | |
78 | We should consider removing this optimization, and instead reorder the |
79 | RTL passes, because GCSE does this transformation too. With some luck, |
80 | the CSE pass at the end of rest_of_handle_gcse could also go away. |
81 | |
82 | 3) The pass looks for paradoxical subregs that are actually unnecessary. |
83 | Things like this: |
84 | |
85 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) |
86 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) |
87 | (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) |
88 | (subreg:SI (reg:QI 121) 0))) |
89 | |
90 | are very common on machines that can only do word-sized operations. |
91 | For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), |
92 | if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), |
93 | we can replace the paradoxical subreg with simply (reg:WIDE M). The |
94 | above will simplify this to |
95 | |
96 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) |
97 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) |
98 | (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) |
99 | |
100 | where the first two insns are now dead. */ |
101 | |
102 | using namespace rtl_ssa; |
103 | |
104 | static int num_changes; |
105 | |
106 | /* Do not try to replace constant addresses or addresses of local and |
107 | argument slots. These MEM expressions are made only once and inserted |
108 | in many instructions, as well as being used to control symbol table |
109 | output. It is not safe to clobber them. |
110 | |
111 | There are some uncommon cases where the address is already in a register |
112 | for some reason, but we cannot take advantage of that because we have |
113 | no easy way to unshare the MEM. In addition, looking up all stack |
114 | addresses is costly. */ |
115 | |
116 | static bool |
117 | can_simplify_addr (rtx addr) |
118 | { |
119 | rtx reg; |
120 | |
121 | if (CONSTANT_ADDRESS_P (addr)) |
122 | return false; |
123 | |
124 | if (GET_CODE (addr) == PLUS) |
125 | reg = XEXP (addr, 0); |
126 | else |
127 | reg = addr; |
128 | |
129 | return (!REG_P (reg) |
130 | || (REGNO (reg) != FRAME_POINTER_REGNUM |
131 | && REGNO (reg) != HARD_FRAME_POINTER_REGNUM |
132 | && REGNO (reg) != ARG_POINTER_REGNUM)); |
133 | } |
134 | |
135 | /* MEM is the result of an address simplification, and temporarily |
136 | undoing changes OLD_NUM_CHANGES onwards restores the original address. |
137 | Return whether it is good to use the new address instead of the |
138 | old one. INSN is the containing instruction. */ |
139 | |
140 | static bool |
141 | should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) |
142 | { |
143 | int gain; |
144 | |
145 | /* Prefer the new address if it is less expensive. */ |
146 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); |
147 | temporarily_undo_changes (old_num_changes); |
148 | gain = address_cost (XEXP (mem, 0), GET_MODE (mem), |
149 | MEM_ADDR_SPACE (mem), speed); |
150 | redo_changes (old_num_changes); |
151 | gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), |
152 | MEM_ADDR_SPACE (mem), speed); |
153 | |
154 | /* If the addresses have equivalent cost, prefer the new address |
155 | if it has the highest `set_src_cost'. That has the potential of |
156 | eliminating the most insns without additional costs, and it |
157 | is the same that cse.cc used to do. */ |
158 | if (gain == 0) |
159 | { |
160 | gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed); |
161 | temporarily_undo_changes (old_num_changes); |
162 | gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed); |
163 | redo_changes (old_num_changes); |
164 | } |
165 | |
166 | return (gain > 0); |
167 | } |
168 | |
169 | |
170 | namespace |
171 | { |
172 | class fwprop_propagation : public insn_propagation |
173 | { |
174 | public: |
175 | static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT; |
176 | static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1; |
177 | static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2; |
178 | |
179 | fwprop_propagation (insn_info *, set_info *, rtx, rtx); |
180 | |
181 | bool changed_mem_p () const { return result_flags & CHANGED_MEM; } |
182 | bool folded_to_constants_p () const; |
183 | bool likely_profitable_p () const; |
184 | |
185 | bool check_mem (int, rtx) final override; |
186 | void note_simplification (int, uint16_t, rtx, rtx) final override; |
187 | uint16_t classify_result (rtx, rtx); |
188 | |
189 | private: |
190 | const bool single_use_p; |
191 | const bool single_ebb_p; |
192 | }; |
193 | } |
194 | |
195 | /* Prepare to replace FROM with TO in USE_INSN. */ |
196 | |
197 | fwprop_propagation::fwprop_propagation (insn_info *use_insn, |
198 | set_info *def, rtx from, rtx to) |
199 | : insn_propagation (use_insn->rtl (), from, to), |
200 | single_use_p (def->single_nondebug_use ()), |
201 | single_ebb_p (use_insn->ebb () == def->ebb ()) |
202 | { |
203 | should_check_mems = true; |
204 | should_note_simplifications = true; |
205 | } |
206 | |
207 | /* MEM is the result of an address simplification, and temporarily |
208 | undoing changes OLD_NUM_CHANGES onwards restores the original address. |
209 | Return true if the propagation should continue, false if it has failed. */ |
210 | |
211 | bool |
212 | fwprop_propagation::check_mem (int old_num_changes, rtx mem) |
213 | { |
214 | if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0), |
215 | MEM_ADDR_SPACE (mem))) |
216 | { |
217 | failure_reason = "would create an invalid MEM" ; |
218 | return false; |
219 | } |
220 | |
221 | temporarily_undo_changes (old_num_changes); |
222 | bool can_simplify = can_simplify_addr (XEXP (mem, 0)); |
223 | redo_changes (old_num_changes); |
224 | if (!can_simplify) |
225 | { |
226 | failure_reason = "would replace a frame address" ; |
227 | return false; |
228 | } |
229 | |
230 | /* Copy propagations are always ok. Otherwise check the costs. */ |
231 | if (!(REG_P (from) && REG_P (to)) |
232 | && !should_replace_address (old_num_changes, mem, insn)) |
233 | { |
234 | failure_reason = "would increase the cost of a MEM" ; |
235 | return false; |
236 | } |
237 | |
238 | result_flags |= CHANGED_MEM; |
239 | return true; |
240 | } |
241 | |
242 | /* OLDX has been simplified to NEWX. Describe the change in terms of |
243 | result_flags. */ |
244 | |
245 | uint16_t |
246 | fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx) |
247 | { |
248 | if (CONSTANT_P (new_rtx)) |
249 | { |
250 | /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason, |
251 | and NEW_RTX is likely not a legitimate address. We want it to |
252 | disappear if it is invalid. |
253 | |
254 | ??? Using the mode of the LO_SUM as the mode of the address |
255 | seems odd, but it was what the pre-SSA code did. */ |
256 | if (GET_CODE (old_rtx) == LO_SUM |
257 | && !memory_address_p (GET_MODE (old_rtx), new_rtx)) |
258 | return CONSTANT; |
259 | return CONSTANT | PROFITABLE; |
260 | } |
261 | |
262 | /* Allow replacements that simplify operations on a vector or complex |
263 | value to a component. The most prominent case is |
264 | (subreg ([vec_]concat ...)). */ |
265 | if (REG_P (new_rtx) |
266 | && !HARD_REGISTER_P (new_rtx) |
267 | && (VECTOR_MODE_P (GET_MODE (from)) |
268 | || COMPLEX_MODE_P (GET_MODE (from))) |
269 | && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from))) |
270 | return PROFITABLE; |
271 | |
272 | /* Allow (subreg (mem)) -> (mem) simplifications with the following |
273 | exceptions: |
274 | 1) Propagating (mem)s into multiple uses is not profitable. |
275 | 2) Propagating (mem)s across EBBs may not be profitable if the source EBB |
276 | runs less frequently. |
277 | 3) Propagating (mem)s into paradoxical (subreg)s is not profitable. |
278 | 4) Creating new (mem/v)s is not correct, since DCE will not remove the old |
279 | ones. */ |
280 | if (single_use_p |
281 | && single_ebb_p |
282 | && SUBREG_P (old_rtx) |
283 | && !paradoxical_subreg_p (x: old_rtx) |
284 | && MEM_P (new_rtx) |
285 | && !MEM_VOLATILE_P (new_rtx)) |
286 | return PROFITABLE; |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES |
292 | is the number of unrelated changes that had been made before processing |
293 | OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags |
294 | had at that point. */ |
295 | |
296 | void |
297 | fwprop_propagation::note_simplification (int old_num_changes, |
298 | uint16_t old_result_flags, |
299 | rtx old_rtx, rtx new_rtx) |
300 | { |
301 | result_flags &= ~(CONSTANT | PROFITABLE); |
302 | uint16_t new_flags = classify_result (old_rtx, new_rtx); |
303 | if (old_num_changes) |
304 | new_flags &= old_result_flags; |
305 | result_flags |= new_flags; |
306 | } |
307 | |
308 | /* Return true if all substitutions eventually folded to constants. */ |
309 | |
310 | bool |
311 | fwprop_propagation::folded_to_constants_p () const |
312 | { |
313 | /* If we're propagating a HIGH, require it to be folded with a |
314 | partnering LO_SUM. For example, a REG_EQUAL note with a register |
315 | replaced by an unfolded HIGH is not useful. */ |
316 | if (CONSTANT_P (to) && GET_CODE (to) != HIGH) |
317 | return true; |
318 | return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT); |
319 | } |
320 | |
321 | |
322 | /* Return true if it is worth keeping the result of the propagation, |
323 | false if it would increase the complexity of the pattern too much. */ |
324 | |
325 | bool |
326 | fwprop_propagation::likely_profitable_p () const |
327 | { |
328 | if (changed_mem_p ()) |
329 | return true; |
330 | |
331 | if (!(result_flags & UNSIMPLIFIED) |
332 | && (result_flags & PROFITABLE)) |
333 | return true; |
334 | |
335 | if (REG_P (to)) |
336 | return true; |
337 | |
338 | if (GET_CODE (to) == SUBREG |
339 | && REG_P (SUBREG_REG (to)) |
340 | && !paradoxical_subreg_p (x: to)) |
341 | return true; |
342 | |
343 | if (CONSTANT_P (to)) |
344 | return true; |
345 | |
346 | return false; |
347 | } |
348 | |
349 | /* Check that X has a single def. */ |
350 | |
351 | static bool |
352 | reg_single_def_p (rtx x) |
353 | { |
354 | return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); |
355 | } |
356 | |
357 | /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of |
358 | USE_INSN. Return the number of substitutions on success, otherwise return |
359 | -1 and leave USE_INSN unchanged. |
360 | |
361 | If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC |
362 | to fold to a constant, so that the note does not use any more registers |
363 | than it did previously. If REQUIRE_CONSTANT is false, also allow the |
364 | substitution if it's something we'd normally allow for the main |
365 | instruction pattern. */ |
366 | |
367 | static int |
368 | try_fwprop_subst_note (insn_info *use_insn, set_info *def, |
369 | rtx note, rtx dest, rtx src, bool require_constant) |
370 | { |
371 | rtx_insn *use_rtl = use_insn->rtl (); |
372 | insn_info *def_insn = def->insn (); |
373 | |
374 | insn_change_watermark watermark; |
375 | fwprop_propagation prop (use_insn, def, dest, src); |
376 | if (!prop.apply_to_rvalue (&XEXP (note, 0))) |
377 | { |
378 | if (dump_file && (dump_flags & TDF_DETAILS)) |
379 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
380 | " notes of insn %d: %s\n" , def_insn->uid (), |
381 | use_insn->uid (), prop.failure_reason); |
382 | return -1; |
383 | } |
384 | |
385 | if (prop.num_replacements == 0) |
386 | return 0; |
387 | |
388 | if (require_constant) |
389 | { |
390 | if (!prop.folded_to_constants_p ()) |
391 | { |
392 | if (dump_file && (dump_flags & TDF_DETAILS)) |
393 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
394 | " notes of insn %d: %s\n" , def_insn->uid (), |
395 | use_insn->uid (), "wouldn't fold to constants" ); |
396 | return -1; |
397 | } |
398 | } |
399 | else |
400 | { |
401 | if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ()) |
402 | { |
403 | if (dump_file && (dump_flags & TDF_DETAILS)) |
404 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
405 | " notes of insn %d: %s\n" , def_insn->uid (), |
406 | use_insn->uid (), "would increase complexity of node" ); |
407 | return -1; |
408 | } |
409 | } |
410 | |
411 | if (dump_file && (dump_flags & TDF_DETAILS)) |
412 | { |
413 | fprintf (stream: dump_file, format: "\nin notes of insn %d, replacing:\n " , |
414 | INSN_UID (insn: use_rtl)); |
415 | temporarily_undo_changes (0); |
416 | print_inline_rtx (dump_file, note, 2); |
417 | redo_changes (0); |
418 | fprintf (stream: dump_file, format: "\n with:\n " ); |
419 | print_inline_rtx (dump_file, note, 2); |
420 | fprintf (stream: dump_file, format: "\n" ); |
421 | } |
422 | watermark.keep (); |
423 | return prop.num_replacements; |
424 | } |
425 | |
426 | /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of |
427 | USE_INSN's pattern. Return true on success, otherwise leave USE_INSN |
428 | unchanged. */ |
429 | |
430 | static bool |
431 | try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, |
432 | set_info *def, rtx *loc, rtx dest, rtx src) |
433 | { |
434 | insn_info *use_insn = use_change.insn (); |
435 | rtx_insn *use_rtl = use_insn->rtl (); |
436 | insn_info *def_insn = def->insn (); |
437 | |
438 | insn_change_watermark watermark; |
439 | fwprop_propagation prop (use_insn, def, dest, src); |
440 | if (!prop.apply_to_pattern (loc)) |
441 | { |
442 | if (dump_file && (dump_flags & TDF_DETAILS)) |
443 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
444 | " insn %d: %s\n" , def_insn->uid (), use_insn->uid (), |
445 | prop.failure_reason); |
446 | return false; |
447 | } |
448 | |
449 | if (prop.num_replacements == 0) |
450 | return false; |
451 | |
452 | if (!prop.likely_profitable_p () |
453 | && (prop.changed_mem_p () |
454 | || contains_mem_rtx_p (x: src) |
455 | || use_insn->is_asm () |
456 | || !single_set (insn: use_rtl))) |
457 | { |
458 | if (dump_file && (dump_flags & TDF_DETAILS)) |
459 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
460 | " insn %d: %s\n" , def_insn->uid (), use_insn->uid (), |
461 | "would increase complexity of pattern" ); |
462 | return false; |
463 | } |
464 | |
465 | if (dump_file && (dump_flags & TDF_DETAILS)) |
466 | { |
467 | fprintf (stream: dump_file, format: "\npropagating insn %d into insn %d, replacing:\n" , |
468 | def_insn->uid (), use_insn->uid ()); |
469 | temporarily_undo_changes (0); |
470 | print_rtl_single (dump_file, PATTERN (insn: use_rtl)); |
471 | redo_changes (0); |
472 | } |
473 | |
474 | /* ??? In theory, it should be better to use insn costs rather than |
475 | set_src_costs here. That would involve replacing this code with |
476 | change_is_worthwhile. */ |
477 | bool ok = recog (watermark&: attempt, change&: use_change); |
478 | if (ok && !prop.changed_mem_p () && !use_insn->is_asm ()) |
479 | if (rtx use_set = single_set (insn: use_rtl)) |
480 | { |
481 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn: use_rtl)); |
482 | temporarily_undo_changes (0); |
483 | auto old_cost = set_src_cost (SET_SRC (use_set), |
484 | GET_MODE (SET_DEST (use_set)), speed_p: speed); |
485 | redo_changes (0); |
486 | auto new_cost = set_src_cost (SET_SRC (use_set), |
487 | GET_MODE (SET_DEST (use_set)), speed_p: speed); |
488 | if (new_cost > old_cost |
489 | || (new_cost == old_cost && !prop.likely_profitable_p ())) |
490 | { |
491 | if (dump_file) |
492 | fprintf (stream: dump_file, format: "change not profitable" |
493 | " (cost %d -> cost %d)\n" , old_cost, new_cost); |
494 | ok = false; |
495 | } |
496 | } |
497 | |
498 | if (!ok) |
499 | { |
500 | /* The pattern didn't match, but if all uses of SRC folded to |
501 | constants, we can add a REG_EQUAL note for the result, if there |
502 | isn't one already. */ |
503 | if (!prop.folded_to_constants_p ()) |
504 | return false; |
505 | |
506 | /* Test this first to avoid creating an unnecessary copy of SRC. */ |
507 | if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX)) |
508 | return false; |
509 | |
510 | rtx set = set_for_reg_notes (use_rtl); |
511 | if (!set || !REG_P (SET_DEST (set))) |
512 | return false; |
513 | |
514 | rtx value = copy_rtx (SET_SRC (set)); |
515 | cancel_changes (0); |
516 | |
517 | /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note, |
518 | because the bits in there can be anything and so might not |
519 | match the REG_EQUAL note content. See PR70574. */ |
520 | if (contains_paradoxical_subreg_p (SET_SRC (set))) |
521 | return false; |
522 | |
523 | if (dump_file && (dump_flags & TDF_DETAILS)) |
524 | fprintf (stream: dump_file, format: " Setting REG_EQUAL note\n" ); |
525 | |
526 | return set_unique_reg_note (use_rtl, REG_EQUAL, value); |
527 | } |
528 | |
529 | rtx *note_ptr = ®_NOTES (use_rtl); |
530 | while (rtx note = *note_ptr) |
531 | { |
532 | if ((REG_NOTE_KIND (note) == REG_EQUAL |
533 | || REG_NOTE_KIND (note) == REG_EQUIV) |
534 | && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: false) < 0) |
535 | { |
536 | *note_ptr = XEXP (note, 1); |
537 | free_EXPR_LIST_node (note); |
538 | } |
539 | else |
540 | note_ptr = &XEXP (note, 1); |
541 | } |
542 | |
543 | confirm_change_group (); |
544 | crtl->ssa->change_insn (change&: use_change); |
545 | num_changes++; |
546 | return true; |
547 | } |
548 | |
549 | /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes, |
550 | given that it was not possible to do this for USE_INSN's main pattern. |
551 | Return true on success, otherwise leave USE_INSN unchanged. */ |
552 | |
553 | static bool |
554 | try_fwprop_subst_notes (insn_info *use_insn, set_info *def, |
555 | rtx dest, rtx src) |
556 | { |
557 | rtx_insn *use_rtl = use_insn->rtl (); |
558 | for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1)) |
559 | if ((REG_NOTE_KIND (note) == REG_EQUAL |
560 | || REG_NOTE_KIND (note) == REG_EQUIV) |
561 | && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: true) > 0) |
562 | { |
563 | confirm_change_group (); |
564 | return true; |
565 | } |
566 | |
567 | return false; |
568 | } |
569 | |
570 | /* Check whether we could validly substitute (set DEST SRC), which defines DEF, |
571 | into USE. If so, first try performing the substitution in location LOC |
572 | of USE->insn ()'s pattern. If that fails, try instead to substitute |
573 | into the notes. |
574 | |
575 | Return true on success, otherwise leave USE_INSN unchanged. */ |
576 | |
577 | static bool |
578 | try_fwprop_subst (use_info *use, set_info *def, |
579 | rtx *loc, rtx dest, rtx src) |
580 | { |
581 | insn_info *use_insn = use->insn (); |
582 | insn_info *def_insn = def->insn (); |
583 | |
584 | auto attempt = crtl->ssa->new_change_attempt (); |
585 | use_array src_uses = remove_note_accesses (watermark&: attempt, accesses: def_insn->uses ()); |
586 | |
587 | /* ??? Not really a meaningful test: it means we can propagate arithmetic |
588 | involving hard registers but not bare references to them. A better |
589 | test would be to iterate over src_uses looking for hard registers |
590 | that are not fixed. */ |
591 | if (REG_P (src) && HARD_REGISTER_P (src)) |
592 | return false; |
593 | |
594 | /* ??? It would be better to make this EBB-based instead. That would |
595 | involve checking for equal EBBs rather than equal BBs and trying |
596 | to make the uses available at use_insn->ebb ()->first_bb (). */ |
597 | if (def_insn->bb () != use_insn->bb ()) |
598 | { |
599 | src_uses = crtl->ssa->make_uses_available (watermark&: attempt, uses: src_uses, |
600 | bb: use_insn->bb (), |
601 | will_be_debug_uses: use_insn->is_debug_insn ()); |
602 | if (!src_uses.is_valid ()) |
603 | return false; |
604 | } |
605 | |
606 | insn_change use_change (use_insn); |
607 | use_change.new_uses = merge_access_arrays (watermark&: attempt, accesses1: use_change.new_uses, |
608 | accesses2: src_uses); |
609 | if (!use_change.new_uses.is_valid ()) |
610 | return false; |
611 | |
612 | /* ??? We could allow movement within the EBB by adding: |
613 | |
614 | use_change.move_range = use_insn->ebb ()->insn_range (); */ |
615 | if (!restrict_movement (change&: use_change)) |
616 | return false; |
617 | |
618 | return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src) |
619 | || try_fwprop_subst_notes (use_insn, def, dest, src)); |
620 | } |
621 | |
622 | /* For the given single_set INSN, containing SRC known to be a |
623 | ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN |
624 | is redundant due to the register being set by a LOAD_EXTEND_OP |
625 | load from memory. */ |
626 | |
627 | static bool |
628 | free_load_extend (rtx src, insn_info *insn) |
629 | { |
630 | rtx reg = XEXP (src, 0); |
631 | if (load_extend_op (GET_MODE (reg)) != GET_CODE (src)) |
632 | return false; |
633 | |
634 | def_info *def = nullptr; |
635 | for (use_info *use : insn->uses ()) |
636 | if (use->regno () == REGNO (reg)) |
637 | { |
638 | def = use->def (); |
639 | break; |
640 | } |
641 | |
642 | if (!def) |
643 | return false; |
644 | |
645 | insn_info *def_insn = def->insn (); |
646 | if (def_insn->is_artificial ()) |
647 | return false; |
648 | |
649 | rtx_insn *def_rtl = def_insn->rtl (); |
650 | if (NONJUMP_INSN_P (def_rtl)) |
651 | { |
652 | rtx patt = PATTERN (insn: def_rtl); |
653 | |
654 | if (GET_CODE (patt) == SET |
655 | && GET_CODE (SET_SRC (patt)) == MEM |
656 | && rtx_equal_p (SET_DEST (patt), reg)) |
657 | return true; |
658 | } |
659 | return false; |
660 | } |
661 | |
662 | /* Subroutine of forward_propagate_subreg that handles a use of DEST |
663 | in REF. The other parameters are the same. */ |
664 | |
665 | static bool |
666 | forward_propagate_subreg (use_info *use, set_info *def, |
667 | rtx dest, rtx src, df_ref ref) |
668 | { |
669 | scalar_int_mode int_use_mode, src_mode; |
670 | |
671 | /* Only consider subregs... */ |
672 | rtx use_reg = DF_REF_REG (ref); |
673 | machine_mode use_mode = GET_MODE (use_reg); |
674 | if (GET_CODE (use_reg) != SUBREG |
675 | || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest)) |
676 | return false; |
677 | |
678 | /* ??? Replacing throughout the pattern would help for match_dups. */ |
679 | rtx *loc = DF_REF_LOC (ref); |
680 | if (paradoxical_subreg_p (x: use_reg)) |
681 | { |
682 | /* If this is a paradoxical SUBREG, we have no idea what value the |
683 | extra bits would have. However, if the operand is equivalent to |
684 | a SUBREG whose operand is the same as our mode, and all the modes |
685 | are within a word, we can just use the inner operand because |
686 | these SUBREGs just say how to treat the register. */ |
687 | if (GET_CODE (src) == SUBREG |
688 | && REG_P (SUBREG_REG (src)) |
689 | && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER |
690 | && GET_MODE (SUBREG_REG (src)) == use_mode |
691 | && subreg_lowpart_p (src)) |
692 | return try_fwprop_subst (use, def, loc, dest: use_reg, SUBREG_REG (src)); |
693 | } |
694 | |
695 | /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG |
696 | is the low part of the reg being extended then just use the inner |
697 | operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will |
698 | be removed due to it matching a LOAD_EXTEND_OP load from memory, |
699 | or due to the operation being a no-op when applied to registers. |
700 | For example, if we have: |
701 | |
702 | A: (set (reg:DI X) (sign_extend:DI (reg:SI Y))) |
703 | B: (... (subreg:SI (reg:DI X)) ...) |
704 | |
705 | and mode_rep_extended says that Y is already sign-extended, |
706 | the backend will typically allow A to be combined with the |
707 | definition of Y or, failing that, allow A to be deleted after |
708 | reload through register tying. Introducing more uses of Y |
709 | prevents both optimisations. */ |
710 | else if (is_a <scalar_int_mode> (m: use_mode, result: &int_use_mode) |
711 | && subreg_lowpart_p (use_reg)) |
712 | { |
713 | if ((GET_CODE (src) == ZERO_EXTEND |
714 | || GET_CODE (src) == SIGN_EXTEND) |
715 | && is_a <scalar_int_mode> (GET_MODE (src), result: &src_mode) |
716 | && REG_P (XEXP (src, 0)) |
717 | && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER |
718 | && GET_MODE (XEXP (src, 0)) == use_mode |
719 | && !free_load_extend (src, insn: def->insn ()) |
720 | && (targetm.mode_rep_extended (int_use_mode, src_mode) |
721 | != (int) GET_CODE (src))) |
722 | return try_fwprop_subst (use, def, loc, dest: use_reg, XEXP (src, 0)); |
723 | } |
724 | |
725 | return false; |
726 | } |
727 | |
728 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify |
729 | the result, handling cases where DEST is used in a subreg and where |
730 | applying that subreg to SRC results in a useful simplification. */ |
731 | |
732 | static bool |
733 | forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src) |
734 | { |
735 | if (!use->includes_subregs () || !REG_P (dest)) |
736 | return false; |
737 | |
738 | if (GET_CODE (src) != SUBREG |
739 | && GET_CODE (src) != ZERO_EXTEND |
740 | && GET_CODE (src) != SIGN_EXTEND) |
741 | return false; |
742 | |
743 | rtx_insn *use_rtl = use->insn ()->rtl (); |
744 | df_ref ref; |
745 | |
746 | FOR_EACH_INSN_USE (ref, use_rtl) |
747 | if (DF_REF_REGNO (ref) == use->regno () |
748 | && forward_propagate_subreg (use, def, dest, src, ref)) |
749 | return true; |
750 | |
751 | FOR_EACH_INSN_EQ_USE (ref, use_rtl) |
752 | if (DF_REF_REGNO (ref) == use->regno () |
753 | && forward_propagate_subreg (use, def, dest, src, ref)) |
754 | return true; |
755 | |
756 | return false; |
757 | } |
758 | |
759 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and |
760 | simplify the result. */ |
761 | |
762 | static bool |
763 | forward_propagate_and_simplify (use_info *use, set_info *def, |
764 | rtx dest, rtx src) |
765 | { |
766 | insn_info *use_insn = use->insn (); |
767 | rtx_insn *use_rtl = use_insn->rtl (); |
768 | insn_info *def_insn = def->insn (); |
769 | |
770 | /* ??? This check seems unnecessary. We should be able to propagate |
771 | into any kind of instruction, regardless of whether it's a single set. |
772 | It seems odd to be more permissive with asms than normal instructions. */ |
773 | bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ()); |
774 | rtx use_set = single_set (insn: use_rtl); |
775 | if (need_single_set && !use_set) |
776 | return false; |
777 | |
778 | /* Do not propagate into PC etc. |
779 | |
780 | ??? This too seems unnecessary. The current code should work correctly |
781 | without it, including cases where jumps become unconditional. */ |
782 | if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) |
783 | return false; |
784 | |
785 | /* In __asm don't replace if src might need more registers than |
786 | reg, as that could increase register pressure on the __asm. */ |
787 | if (use_insn->is_asm () && def_insn->uses ().size () > 1) |
788 | return false; |
789 | |
790 | /* Check if the def is loading something from the constant pool; in this |
791 | case we would undo optimization such as compress_float_constant. |
792 | Still, we can set a REG_EQUAL note. */ |
793 | if (MEM_P (src) && MEM_READONLY_P (src)) |
794 | { |
795 | rtx x = avoid_constant_pool_reference (src); |
796 | rtx note_set; |
797 | if (x != src |
798 | && (note_set = set_for_reg_notes (use_rtl)) |
799 | && REG_P (SET_DEST (note_set)) |
800 | && !contains_paradoxical_subreg_p (SET_SRC (note_set))) |
801 | { |
802 | rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX); |
803 | rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set); |
804 | rtx new_rtx = simplify_replace_rtx (old_rtx, src, x); |
805 | if (old_rtx != new_rtx) |
806 | set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx)); |
807 | } |
808 | return false; |
809 | } |
810 | |
811 | /* ??? Unconditionally propagating into PATTERN would work better |
812 | for instructions that have match_dups. */ |
813 | rtx *loc = need_single_set ? &use_set : &PATTERN (insn: use_rtl); |
814 | return try_fwprop_subst (use, def, loc, dest, src); |
815 | } |
816 | |
817 | /* Given a use USE of an insn, if it has a single reaching |
818 | definition, try to forward propagate it into that insn. |
819 | Return true if something changed. |
820 | |
821 | REG_PROP_ONLY is true if we should only propagate register copies. */ |
822 | |
823 | static bool |
824 | forward_propagate_into (use_info *use, bool reg_prop_only = false) |
825 | { |
826 | if (use->includes_read_writes ()) |
827 | return false; |
828 | |
829 | /* Disregard uninitialized uses. */ |
830 | set_info *def = use->def (); |
831 | if (!def) |
832 | return false; |
833 | |
834 | /* Only consider single-register definitions. This could be relaxed, |
835 | but it should rarely be needed before RA. */ |
836 | def = look_through_degenerate_phi (access: def); |
837 | if (def->includes_multiregs ()) |
838 | return false; |
839 | |
840 | /* Only consider uses whose definition comes from a real instruction. */ |
841 | insn_info *def_insn = def->insn (); |
842 | if (def_insn->is_artificial ()) |
843 | return false; |
844 | |
845 | rtx_insn *def_rtl = def_insn->rtl (); |
846 | if (!NONJUMP_INSN_P (def_rtl)) |
847 | return false; |
848 | /* ??? This seems an unnecessary restriction. We can easily tell |
849 | which set the definition comes from. */ |
850 | if (multiple_sets (def_rtl)) |
851 | return false; |
852 | rtx def_set = simple_regno_set (PATTERN (insn: def_rtl), def->regno ()); |
853 | if (!def_set) |
854 | return false; |
855 | |
856 | rtx dest = SET_DEST (def_set); |
857 | rtx src = SET_SRC (def_set); |
858 | if (volatile_refs_p (src)) |
859 | return false; |
860 | |
861 | /* Allow propagations into a loop only for reg-to-reg copies, since |
862 | replacing one register by another shouldn't increase the cost. |
863 | Propagations from inner loop to outer loop should also be ok. */ |
864 | struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father; |
865 | struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father; |
866 | if ((reg_prop_only |
867 | || (def_loop != use_loop |
868 | && !flow_loop_nested_p (use_loop, def_loop))) |
869 | && (!reg_single_def_p (x: dest) || !reg_single_def_p (x: src))) |
870 | return false; |
871 | |
872 | /* Don't substitute into a non-local goto, this confuses CFG. */ |
873 | insn_info *use_insn = use->insn (); |
874 | rtx_insn *use_rtl = use_insn->rtl (); |
875 | if (JUMP_P (use_rtl) |
876 | && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX)) |
877 | return false; |
878 | |
879 | if (forward_propagate_and_simplify (use, def, dest, src) |
880 | || forward_propagate_subreg (use, def, dest, src)) |
881 | return true; |
882 | |
883 | return false; |
884 | } |
885 | |
886 | static void |
887 | fwprop_init (void) |
888 | { |
889 | num_changes = 0; |
890 | calculate_dominance_info (CDI_DOMINATORS); |
891 | |
892 | /* We do not always want to propagate into loops, so we have to find |
893 | loops and be careful about them. Avoid CFG modifications so that |
894 | we don't have to update dominance information afterwards for |
895 | build_single_def_use_links. */ |
896 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
897 | |
898 | df_analyze (); |
899 | crtl->ssa = new rtl_ssa::function_info (cfun); |
900 | } |
901 | |
902 | static void |
903 | fwprop_done (void) |
904 | { |
905 | loop_optimizer_finalize (); |
906 | |
907 | crtl->ssa->perform_pending_updates (); |
908 | free_dominance_info (CDI_DOMINATORS); |
909 | cleanup_cfg (0); |
910 | |
911 | delete crtl->ssa; |
912 | crtl->ssa = nullptr; |
913 | |
914 | delete_trivially_dead_insns (get_insns (), max_reg_num ()); |
915 | |
916 | if (dump_file) |
917 | fprintf (stream: dump_file, |
918 | format: "\nNumber of successful forward propagations: %d\n\n" , |
919 | num_changes); |
920 | } |
921 | |
922 | /* Try to optimize INSN, returning true if something changes. |
923 | FWPROP_ADDR_P is true if we are running fwprop_addr rather than |
924 | the full fwprop. */ |
925 | |
926 | static bool |
927 | fwprop_insn (insn_info *insn, bool fwprop_addr_p) |
928 | { |
929 | for (use_info *use : insn->uses ()) |
930 | { |
931 | if (use->is_mem ()) |
932 | continue; |
933 | /* ??? The choices here follow those in the pre-SSA code. */ |
934 | if (!use->includes_address_uses ()) |
935 | { |
936 | if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p)) |
937 | return true; |
938 | } |
939 | else |
940 | { |
941 | struct loop *loop = insn->bb ()->cfg_bb ()->loop_father; |
942 | /* The outermost loop is not really a loop. */ |
943 | if (loop == NULL || loop_outer (loop) == NULL) |
944 | { |
945 | if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p)) |
946 | return true; |
947 | } |
948 | else if (fwprop_addr_p) |
949 | { |
950 | if (forward_propagate_into (use, reg_prop_only: false)) |
951 | return true; |
952 | } |
953 | } |
954 | } |
955 | return false; |
956 | } |
957 | |
958 | /* Main entry point. */ |
959 | |
960 | static bool |
961 | gate_fwprop (void) |
962 | { |
963 | return optimize > 0 && flag_forward_propagate; |
964 | } |
965 | |
966 | static unsigned int |
967 | fwprop (bool fwprop_addr_p) |
968 | { |
969 | fwprop_init (); |
970 | |
971 | /* Go through all the instructions (including debug instructions) looking |
972 | for uses that we could propagate into. |
973 | |
974 | Do not forward propagate addresses into loops until after unrolling. |
975 | CSE did so because it was able to fix its own mess, but we are not. */ |
976 | |
977 | insn_info *next; |
978 | |
979 | /* ??? This code uses a worklist in order to preserve the behavior |
980 | of the pre-SSA implementation. It would be better to instead |
981 | iterate on each instruction until no more propagations are |
982 | possible, then move on to the next. */ |
983 | auto_vec<insn_info *> worklist; |
984 | for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) |
985 | { |
986 | next = insn->next_any_insn (); |
987 | if (insn->can_be_optimized () || insn->is_debug_insn ()) |
988 | if (fwprop_insn (insn, fwprop_addr_p)) |
989 | worklist.safe_push (obj: insn); |
990 | } |
991 | for (unsigned int i = 0; i < worklist.length (); ++i) |
992 | { |
993 | insn_info *insn = worklist[i]; |
994 | if (fwprop_insn (insn, fwprop_addr_p)) |
995 | worklist.safe_push (obj: insn); |
996 | } |
997 | |
998 | fwprop_done (); |
999 | return 0; |
1000 | } |
1001 | |
1002 | namespace { |
1003 | |
1004 | const pass_data pass_data_rtl_fwprop = |
1005 | { |
1006 | .type: RTL_PASS, /* type */ |
1007 | .name: "fwprop1" , /* name */ |
1008 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1009 | .tv_id: TV_FWPROP, /* tv_id */ |
1010 | .properties_required: 0, /* properties_required */ |
1011 | .properties_provided: 0, /* properties_provided */ |
1012 | .properties_destroyed: 0, /* properties_destroyed */ |
1013 | .todo_flags_start: 0, /* todo_flags_start */ |
1014 | TODO_df_finish, /* todo_flags_finish */ |
1015 | }; |
1016 | |
1017 | class pass_rtl_fwprop : public rtl_opt_pass |
1018 | { |
1019 | public: |
1020 | pass_rtl_fwprop (gcc::context *ctxt) |
1021 | : rtl_opt_pass (pass_data_rtl_fwprop, ctxt) |
1022 | {} |
1023 | |
1024 | /* opt_pass methods: */ |
1025 | bool gate (function *) final override { return gate_fwprop (); } |
1026 | unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: false); } |
1027 | |
1028 | }; // class pass_rtl_fwprop |
1029 | |
1030 | } // anon namespace |
1031 | |
1032 | rtl_opt_pass * |
1033 | make_pass_rtl_fwprop (gcc::context *ctxt) |
1034 | { |
1035 | return new pass_rtl_fwprop (ctxt); |
1036 | } |
1037 | |
1038 | namespace { |
1039 | |
1040 | const pass_data pass_data_rtl_fwprop_addr = |
1041 | { |
1042 | .type: RTL_PASS, /* type */ |
1043 | .name: "fwprop2" , /* name */ |
1044 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1045 | .tv_id: TV_FWPROP, /* tv_id */ |
1046 | .properties_required: 0, /* properties_required */ |
1047 | .properties_provided: 0, /* properties_provided */ |
1048 | .properties_destroyed: 0, /* properties_destroyed */ |
1049 | .todo_flags_start: 0, /* todo_flags_start */ |
1050 | TODO_df_finish, /* todo_flags_finish */ |
1051 | }; |
1052 | |
1053 | class pass_rtl_fwprop_addr : public rtl_opt_pass |
1054 | { |
1055 | public: |
1056 | pass_rtl_fwprop_addr (gcc::context *ctxt) |
1057 | : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt) |
1058 | {} |
1059 | |
1060 | /* opt_pass methods: */ |
1061 | bool gate (function *) final override { return gate_fwprop (); } |
1062 | unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: true); } |
1063 | |
1064 | }; // class pass_rtl_fwprop_addr |
1065 | |
1066 | } // anon namespace |
1067 | |
1068 | rtl_opt_pass * |
1069 | make_pass_rtl_fwprop_addr (gcc::context *ctxt) |
1070 | { |
1071 | return new pass_rtl_fwprop_addr (ctxt); |
1072 | } |
1073 | |