1/* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#define INCLUDE_ALGORITHM
22#define INCLUDE_FUNCTIONAL
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "rtl.h"
28#include "rtlanal.h"
29#include "df.h"
30#include "rtl-ssa.h"
31
32#include "predict.h"
33#include "cfgrtl.h"
34#include "cfgcleanup.h"
35#include "cfgloop.h"
36#include "tree-pass.h"
37#include "rtl-iter.h"
38#include "target.h"
39
40/* This pass does simple forward propagation and simplification when an
41 operand of an insn can only come from a single def. This pass uses
42 RTL SSA, so it is global. However, we only do limited analysis of
43 available expressions.
44
45 1) The pass tries to propagate the source of the def into the use,
46 and checks if the result is independent of the substituted value.
47 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48 zero, independent of the source register.
49
50 In particular, we propagate constants into the use site. Sometimes
51 RTL expansion did not put the constant in the same insn on purpose,
52 to satisfy a predicate, and the result will fail to be recognized;
53 but this happens rarely and in this case we can still create a
54 REG_EQUAL note. For multi-word operations, this
55
56 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58 (set (subreg:SI (reg:DI 122) 0)
59 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60 (set (subreg:SI (reg:DI 122) 4)
61 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
62
63 can be simplified to the much simpler
64
65 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
67
68 This particular propagation is also effective at putting together
69 complex addressing modes. We are more aggressive inside MEMs, in
70 that all definitions are propagated if the use is in a MEM; if the
71 result is a valid memory address we check address_cost to decide
72 whether the substitution is worthwhile.
73
74 2) The pass propagates register copies. This is not as effective as
75 the copy propagation done by CSE's canon_reg, which works by walking
76 the instruction chain, it can help the other transformations.
77
78 We should consider removing this optimization, and instead reorder the
79 RTL passes, because GCSE does this transformation too. With some luck,
80 the CSE pass at the end of rest_of_handle_gcse could also go away.
81
82 3) The pass looks for paradoxical subregs that are actually unnecessary.
83 Things like this:
84
85 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88 (subreg:SI (reg:QI 121) 0)))
89
90 are very common on machines that can only do word-sized operations.
91 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93 we can replace the paradoxical subreg with simply (reg:WIDE M). The
94 above will simplify this to
95
96 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99
100 where the first two insns are now dead. */
101
102using namespace rtl_ssa;
103
104static int num_changes;
105
106/* Do not try to replace constant addresses or addresses of local and
107 argument slots. These MEM expressions are made only once and inserted
108 in many instructions, as well as being used to control symbol table
109 output. It is not safe to clobber them.
110
111 There are some uncommon cases where the address is already in a register
112 for some reason, but we cannot take advantage of that because we have
113 no easy way to unshare the MEM. In addition, looking up all stack
114 addresses is costly. */
115
116static bool
117can_simplify_addr (rtx addr)
118{
119 rtx reg;
120
121 if (CONSTANT_ADDRESS_P (addr))
122 return false;
123
124 if (GET_CODE (addr) == PLUS)
125 reg = XEXP (addr, 0);
126 else
127 reg = addr;
128
129 return (!REG_P (reg)
130 || (REGNO (reg) != FRAME_POINTER_REGNUM
131 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 && REGNO (reg) != ARG_POINTER_REGNUM));
133}
134
135/* MEM is the result of an address simplification, and temporarily
136 undoing changes OLD_NUM_CHANGES onwards restores the original address.
137 Return whether it is good to use the new address instead of the
138 old one. INSN is the containing instruction. */
139
140static bool
141should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
142{
143 int gain;
144
145 /* Prefer the new address if it is less expensive. */
146 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147 temporarily_undo_changes (old_num_changes);
148 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 MEM_ADDR_SPACE (mem), speed);
150 redo_changes (old_num_changes);
151 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 MEM_ADDR_SPACE (mem), speed);
153
154 /* If the addresses have equivalent cost, prefer the new address
155 if it has the highest `set_src_cost'. That has the potential of
156 eliminating the most insns without additional costs, and it
157 is the same that cse.cc used to do. */
158 if (gain == 0)
159 {
160 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed);
161 temporarily_undo_changes (old_num_changes);
162 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed);
163 redo_changes (old_num_changes);
164 }
165
166 return (gain > 0);
167}
168
169
170namespace
171{
172 class fwprop_propagation : public insn_propagation
173 {
174 public:
175 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
178
179 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
180
181 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182 bool folded_to_constants_p () const;
183 bool likely_profitable_p () const;
184
185 bool check_mem (int, rtx) final override;
186 void note_simplification (int, uint16_t, rtx, rtx) final override;
187 uint16_t classify_result (rtx, rtx);
188
189 private:
190 const bool single_use_p;
191 const bool single_ebb_p;
192 };
193}
194
195/* Prepare to replace FROM with TO in USE_INSN. */
196
197fwprop_propagation::fwprop_propagation (insn_info *use_insn,
198 set_info *def, rtx from, rtx to)
199 : insn_propagation (use_insn->rtl (), from, to),
200 single_use_p (def->single_nondebug_use ()),
201 single_ebb_p (use_insn->ebb () == def->ebb ())
202{
203 should_check_mems = true;
204 should_note_simplifications = true;
205}
206
207/* MEM is the result of an address simplification, and temporarily
208 undoing changes OLD_NUM_CHANGES onwards restores the original address.
209 Return true if the propagation should continue, false if it has failed. */
210
211bool
212fwprop_propagation::check_mem (int old_num_changes, rtx mem)
213{
214 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
215 MEM_ADDR_SPACE (mem)))
216 {
217 failure_reason = "would create an invalid MEM";
218 return false;
219 }
220
221 temporarily_undo_changes (old_num_changes);
222 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
223 redo_changes (old_num_changes);
224 if (!can_simplify)
225 {
226 failure_reason = "would replace a frame address";
227 return false;
228 }
229
230 /* Copy propagations are always ok. Otherwise check the costs. */
231 if (!(REG_P (from) && REG_P (to))
232 && !should_replace_address (old_num_changes, mem, insn))
233 {
234 failure_reason = "would increase the cost of a MEM";
235 return false;
236 }
237
238 result_flags |= CHANGED_MEM;
239 return true;
240}
241
242/* OLDX has been simplified to NEWX. Describe the change in terms of
243 result_flags. */
244
245uint16_t
246fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247{
248 if (CONSTANT_P (new_rtx))
249 {
250 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
251 and NEW_RTX is likely not a legitimate address. We want it to
252 disappear if it is invalid.
253
254 ??? Using the mode of the LO_SUM as the mode of the address
255 seems odd, but it was what the pre-SSA code did. */
256 if (GET_CODE (old_rtx) == LO_SUM
257 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
258 return CONSTANT;
259 return CONSTANT | PROFITABLE;
260 }
261
262 /* Allow replacements that simplify operations on a vector or complex
263 value to a component. The most prominent case is
264 (subreg ([vec_]concat ...)). */
265 if (REG_P (new_rtx)
266 && !HARD_REGISTER_P (new_rtx)
267 && (VECTOR_MODE_P (GET_MODE (from))
268 || COMPLEX_MODE_P (GET_MODE (from)))
269 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270 return PROFITABLE;
271
272 /* Allow (subreg (mem)) -> (mem) simplifications with the following
273 exceptions:
274 1) Propagating (mem)s into multiple uses is not profitable.
275 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276 runs less frequently.
277 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279 ones. */
280 if (single_use_p
281 && single_ebb_p
282 && SUBREG_P (old_rtx)
283 && !paradoxical_subreg_p (x: old_rtx)
284 && MEM_P (new_rtx)
285 && !MEM_VOLATILE_P (new_rtx))
286 return PROFITABLE;
287
288 return 0;
289}
290
291/* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
292 is the number of unrelated changes that had been made before processing
293 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
294 had at that point. */
295
296void
297fwprop_propagation::note_simplification (int old_num_changes,
298 uint16_t old_result_flags,
299 rtx old_rtx, rtx new_rtx)
300{
301 result_flags &= ~(CONSTANT | PROFITABLE);
302 uint16_t new_flags = classify_result (old_rtx, new_rtx);
303 if (old_num_changes)
304 new_flags &= old_result_flags;
305 result_flags |= new_flags;
306}
307
308/* Return true if all substitutions eventually folded to constants. */
309
310bool
311fwprop_propagation::folded_to_constants_p () const
312{
313 /* If we're propagating a HIGH, require it to be folded with a
314 partnering LO_SUM. For example, a REG_EQUAL note with a register
315 replaced by an unfolded HIGH is not useful. */
316 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317 return true;
318 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
319}
320
321
322/* Return true if it is worth keeping the result of the propagation,
323 false if it would increase the complexity of the pattern too much. */
324
325bool
326fwprop_propagation::likely_profitable_p () const
327{
328 if (changed_mem_p ())
329 return true;
330
331 if (!(result_flags & UNSIMPLIFIED)
332 && (result_flags & PROFITABLE))
333 return true;
334
335 if (REG_P (to))
336 return true;
337
338 if (GET_CODE (to) == SUBREG
339 && REG_P (SUBREG_REG (to))
340 && !paradoxical_subreg_p (x: to))
341 return true;
342
343 if (CONSTANT_P (to))
344 return true;
345
346 return false;
347}
348
349/* Check that X has a single def. */
350
351static bool
352reg_single_def_p (rtx x)
353{
354 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
355}
356
357/* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
358 USE_INSN. Return the number of substitutions on success, otherwise return
359 -1 and leave USE_INSN unchanged.
360
361 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
362 to fold to a constant, so that the note does not use any more registers
363 than it did previously. If REQUIRE_CONSTANT is false, also allow the
364 substitution if it's something we'd normally allow for the main
365 instruction pattern. */
366
367static int
368try_fwprop_subst_note (insn_info *use_insn, set_info *def,
369 rtx note, rtx dest, rtx src, bool require_constant)
370{
371 rtx_insn *use_rtl = use_insn->rtl ();
372 insn_info *def_insn = def->insn ();
373
374 insn_change_watermark watermark;
375 fwprop_propagation prop (use_insn, def, dest, src);
376 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
377 {
378 if (dump_file && (dump_flags & TDF_DETAILS))
379 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
380 " notes of insn %d: %s\n", def_insn->uid (),
381 use_insn->uid (), prop.failure_reason);
382 return -1;
383 }
384
385 if (prop.num_replacements == 0)
386 return 0;
387
388 if (require_constant)
389 {
390 if (!prop.folded_to_constants_p ())
391 {
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), "wouldn't fold to constants");
396 return -1;
397 }
398 }
399 else
400 {
401 if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
402 {
403 if (dump_file && (dump_flags & TDF_DETAILS))
404 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
405 " notes of insn %d: %s\n", def_insn->uid (),
406 use_insn->uid (), "would increase complexity of node");
407 return -1;
408 }
409 }
410
411 if (dump_file && (dump_flags & TDF_DETAILS))
412 {
413 fprintf (stream: dump_file, format: "\nin notes of insn %d, replacing:\n ",
414 INSN_UID (insn: use_rtl));
415 temporarily_undo_changes (0);
416 print_inline_rtx (dump_file, note, 2);
417 redo_changes (0);
418 fprintf (stream: dump_file, format: "\n with:\n ");
419 print_inline_rtx (dump_file, note, 2);
420 fprintf (stream: dump_file, format: "\n");
421 }
422 watermark.keep ();
423 return prop.num_replacements;
424}
425
426/* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
427 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
428 unchanged. */
429
430static bool
431try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
432 set_info *def, rtx *loc, rtx dest, rtx src)
433{
434 insn_info *use_insn = use_change.insn ();
435 rtx_insn *use_rtl = use_insn->rtl ();
436 insn_info *def_insn = def->insn ();
437
438 insn_change_watermark watermark;
439 fwprop_propagation prop (use_insn, def, dest, src);
440 if (!prop.apply_to_pattern (loc))
441 {
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
444 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
445 prop.failure_reason);
446 return false;
447 }
448
449 if (prop.num_replacements == 0)
450 return false;
451
452 if (!prop.likely_profitable_p ()
453 && (prop.changed_mem_p ()
454 || contains_mem_rtx_p (x: src)
455 || use_insn->is_asm ()
456 || !single_set (insn: use_rtl)))
457 {
458 if (dump_file && (dump_flags & TDF_DETAILS))
459 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
460 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
461 "would increase complexity of pattern");
462 return false;
463 }
464
465 if (dump_file && (dump_flags & TDF_DETAILS))
466 {
467 fprintf (stream: dump_file, format: "\npropagating insn %d into insn %d, replacing:\n",
468 def_insn->uid (), use_insn->uid ());
469 temporarily_undo_changes (0);
470 print_rtl_single (dump_file, PATTERN (insn: use_rtl));
471 redo_changes (0);
472 }
473
474 /* ??? In theory, it should be better to use insn costs rather than
475 set_src_costs here. That would involve replacing this code with
476 change_is_worthwhile. */
477 bool ok = recog (watermark&: attempt, change&: use_change);
478 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
479 if (rtx use_set = single_set (insn: use_rtl))
480 {
481 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn: use_rtl));
482 temporarily_undo_changes (0);
483 auto old_cost = set_src_cost (SET_SRC (use_set),
484 GET_MODE (SET_DEST (use_set)), speed_p: speed);
485 redo_changes (0);
486 auto new_cost = set_src_cost (SET_SRC (use_set),
487 GET_MODE (SET_DEST (use_set)), speed_p: speed);
488 if (new_cost > old_cost
489 || (new_cost == old_cost && !prop.likely_profitable_p ()))
490 {
491 if (dump_file)
492 fprintf (stream: dump_file, format: "change not profitable"
493 " (cost %d -> cost %d)\n", old_cost, new_cost);
494 ok = false;
495 }
496 }
497
498 if (!ok)
499 {
500 /* The pattern didn't match, but if all uses of SRC folded to
501 constants, we can add a REG_EQUAL note for the result, if there
502 isn't one already. */
503 if (!prop.folded_to_constants_p ())
504 return false;
505
506 /* Test this first to avoid creating an unnecessary copy of SRC. */
507 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
508 return false;
509
510 rtx set = set_for_reg_notes (use_rtl);
511 if (!set || !REG_P (SET_DEST (set)))
512 return false;
513
514 rtx value = copy_rtx (SET_SRC (set));
515 cancel_changes (0);
516
517 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
518 because the bits in there can be anything and so might not
519 match the REG_EQUAL note content. See PR70574. */
520 if (contains_paradoxical_subreg_p (SET_SRC (set)))
521 return false;
522
523 if (dump_file && (dump_flags & TDF_DETAILS))
524 fprintf (stream: dump_file, format: " Setting REG_EQUAL note\n");
525
526 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
527 }
528
529 rtx *note_ptr = &REG_NOTES (use_rtl);
530 while (rtx note = *note_ptr)
531 {
532 if ((REG_NOTE_KIND (note) == REG_EQUAL
533 || REG_NOTE_KIND (note) == REG_EQUIV)
534 && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: false) < 0)
535 {
536 *note_ptr = XEXP (note, 1);
537 free_EXPR_LIST_node (note);
538 }
539 else
540 note_ptr = &XEXP (note, 1);
541 }
542
543 confirm_change_group ();
544 crtl->ssa->change_insn (change&: use_change);
545 num_changes++;
546 return true;
547}
548
549/* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
550 given that it was not possible to do this for USE_INSN's main pattern.
551 Return true on success, otherwise leave USE_INSN unchanged. */
552
553static bool
554try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
555 rtx dest, rtx src)
556{
557 rtx_insn *use_rtl = use_insn->rtl ();
558 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
559 if ((REG_NOTE_KIND (note) == REG_EQUAL
560 || REG_NOTE_KIND (note) == REG_EQUIV)
561 && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: true) > 0)
562 {
563 confirm_change_group ();
564 return true;
565 }
566
567 return false;
568}
569
570/* Check whether we could validly substitute (set DEST SRC), which defines DEF,
571 into USE. If so, first try performing the substitution in location LOC
572 of USE->insn ()'s pattern. If that fails, try instead to substitute
573 into the notes.
574
575 Return true on success, otherwise leave USE_INSN unchanged. */
576
577static bool
578try_fwprop_subst (use_info *use, set_info *def,
579 rtx *loc, rtx dest, rtx src)
580{
581 insn_info *use_insn = use->insn ();
582 insn_info *def_insn = def->insn ();
583
584 auto attempt = crtl->ssa->new_change_attempt ();
585 use_array src_uses = remove_note_accesses (watermark&: attempt, accesses: def_insn->uses ());
586
587 /* ??? Not really a meaningful test: it means we can propagate arithmetic
588 involving hard registers but not bare references to them. A better
589 test would be to iterate over src_uses looking for hard registers
590 that are not fixed. */
591 if (REG_P (src) && HARD_REGISTER_P (src))
592 return false;
593
594 /* ??? It would be better to make this EBB-based instead. That would
595 involve checking for equal EBBs rather than equal BBs and trying
596 to make the uses available at use_insn->ebb ()->first_bb (). */
597 if (def_insn->bb () != use_insn->bb ())
598 {
599 src_uses = crtl->ssa->make_uses_available (watermark&: attempt, uses: src_uses,
600 bb: use_insn->bb (),
601 will_be_debug_uses: use_insn->is_debug_insn ());
602 if (!src_uses.is_valid ())
603 return false;
604 }
605
606 insn_change use_change (use_insn);
607 use_change.new_uses = merge_access_arrays (watermark&: attempt, accesses1: use_change.new_uses,
608 accesses2: src_uses);
609 if (!use_change.new_uses.is_valid ())
610 return false;
611
612 /* ??? We could allow movement within the EBB by adding:
613
614 use_change.move_range = use_insn->ebb ()->insn_range (); */
615 if (!restrict_movement (change&: use_change))
616 return false;
617
618 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
619 || try_fwprop_subst_notes (use_insn, def, dest, src));
620}
621
622/* For the given single_set INSN, containing SRC known to be a
623 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
624 is redundant due to the register being set by a LOAD_EXTEND_OP
625 load from memory. */
626
627static bool
628free_load_extend (rtx src, insn_info *insn)
629{
630 rtx reg = XEXP (src, 0);
631 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
632 return false;
633
634 def_info *def = nullptr;
635 for (use_info *use : insn->uses ())
636 if (use->regno () == REGNO (reg))
637 {
638 def = use->def ();
639 break;
640 }
641
642 if (!def)
643 return false;
644
645 insn_info *def_insn = def->insn ();
646 if (def_insn->is_artificial ())
647 return false;
648
649 rtx_insn *def_rtl = def_insn->rtl ();
650 if (NONJUMP_INSN_P (def_rtl))
651 {
652 rtx patt = PATTERN (insn: def_rtl);
653
654 if (GET_CODE (patt) == SET
655 && GET_CODE (SET_SRC (patt)) == MEM
656 && rtx_equal_p (SET_DEST (patt), reg))
657 return true;
658 }
659 return false;
660}
661
662/* Subroutine of forward_propagate_subreg that handles a use of DEST
663 in REF. The other parameters are the same. */
664
665static bool
666forward_propagate_subreg (use_info *use, set_info *def,
667 rtx dest, rtx src, df_ref ref)
668{
669 scalar_int_mode int_use_mode, src_mode;
670
671 /* Only consider subregs... */
672 rtx use_reg = DF_REF_REG (ref);
673 machine_mode use_mode = GET_MODE (use_reg);
674 if (GET_CODE (use_reg) != SUBREG
675 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
676 return false;
677
678 /* ??? Replacing throughout the pattern would help for match_dups. */
679 rtx *loc = DF_REF_LOC (ref);
680 if (paradoxical_subreg_p (x: use_reg))
681 {
682 /* If this is a paradoxical SUBREG, we have no idea what value the
683 extra bits would have. However, if the operand is equivalent to
684 a SUBREG whose operand is the same as our mode, and all the modes
685 are within a word, we can just use the inner operand because
686 these SUBREGs just say how to treat the register. */
687 if (GET_CODE (src) == SUBREG
688 && REG_P (SUBREG_REG (src))
689 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
690 && GET_MODE (SUBREG_REG (src)) == use_mode
691 && subreg_lowpart_p (src))
692 return try_fwprop_subst (use, def, loc, dest: use_reg, SUBREG_REG (src));
693 }
694
695 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
696 is the low part of the reg being extended then just use the inner
697 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
698 be removed due to it matching a LOAD_EXTEND_OP load from memory,
699 or due to the operation being a no-op when applied to registers.
700 For example, if we have:
701
702 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
703 B: (... (subreg:SI (reg:DI X)) ...)
704
705 and mode_rep_extended says that Y is already sign-extended,
706 the backend will typically allow A to be combined with the
707 definition of Y or, failing that, allow A to be deleted after
708 reload through register tying. Introducing more uses of Y
709 prevents both optimisations. */
710 else if (is_a <scalar_int_mode> (m: use_mode, result: &int_use_mode)
711 && subreg_lowpart_p (use_reg))
712 {
713 if ((GET_CODE (src) == ZERO_EXTEND
714 || GET_CODE (src) == SIGN_EXTEND)
715 && is_a <scalar_int_mode> (GET_MODE (src), result: &src_mode)
716 && REG_P (XEXP (src, 0))
717 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
718 && GET_MODE (XEXP (src, 0)) == use_mode
719 && !free_load_extend (src, insn: def->insn ())
720 && (targetm.mode_rep_extended (int_use_mode, src_mode)
721 != (int) GET_CODE (src)))
722 return try_fwprop_subst (use, def, loc, dest: use_reg, XEXP (src, 0));
723 }
724
725 return false;
726}
727
728/* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
729 the result, handling cases where DEST is used in a subreg and where
730 applying that subreg to SRC results in a useful simplification. */
731
732static bool
733forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
734{
735 if (!use->includes_subregs () || !REG_P (dest))
736 return false;
737
738 if (GET_CODE (src) != SUBREG
739 && GET_CODE (src) != ZERO_EXTEND
740 && GET_CODE (src) != SIGN_EXTEND)
741 return false;
742
743 rtx_insn *use_rtl = use->insn ()->rtl ();
744 df_ref ref;
745
746 FOR_EACH_INSN_USE (ref, use_rtl)
747 if (DF_REF_REGNO (ref) == use->regno ()
748 && forward_propagate_subreg (use, def, dest, src, ref))
749 return true;
750
751 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
752 if (DF_REF_REGNO (ref) == use->regno ()
753 && forward_propagate_subreg (use, def, dest, src, ref))
754 return true;
755
756 return false;
757}
758
759/* Try to substitute (set DEST SRC), which defines DEF, into USE and
760 simplify the result. */
761
762static bool
763forward_propagate_and_simplify (use_info *use, set_info *def,
764 rtx dest, rtx src)
765{
766 insn_info *use_insn = use->insn ();
767 rtx_insn *use_rtl = use_insn->rtl ();
768 insn_info *def_insn = def->insn ();
769
770 /* ??? This check seems unnecessary. We should be able to propagate
771 into any kind of instruction, regardless of whether it's a single set.
772 It seems odd to be more permissive with asms than normal instructions. */
773 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
774 rtx use_set = single_set (insn: use_rtl);
775 if (need_single_set && !use_set)
776 return false;
777
778 /* Do not propagate into PC etc.
779
780 ??? This too seems unnecessary. The current code should work correctly
781 without it, including cases where jumps become unconditional. */
782 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
783 return false;
784
785 /* In __asm don't replace if src might need more registers than
786 reg, as that could increase register pressure on the __asm. */
787 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
788 return false;
789
790 /* Check if the def is loading something from the constant pool; in this
791 case we would undo optimization such as compress_float_constant.
792 Still, we can set a REG_EQUAL note. */
793 if (MEM_P (src) && MEM_READONLY_P (src))
794 {
795 rtx x = avoid_constant_pool_reference (src);
796 rtx note_set;
797 if (x != src
798 && (note_set = set_for_reg_notes (use_rtl))
799 && REG_P (SET_DEST (note_set))
800 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
801 {
802 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
803 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
804 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
805 if (old_rtx != new_rtx)
806 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
807 }
808 return false;
809 }
810
811 /* ??? Unconditionally propagating into PATTERN would work better
812 for instructions that have match_dups. */
813 rtx *loc = need_single_set ? &use_set : &PATTERN (insn: use_rtl);
814 return try_fwprop_subst (use, def, loc, dest, src);
815}
816
817/* Given a use USE of an insn, if it has a single reaching
818 definition, try to forward propagate it into that insn.
819 Return true if something changed.
820
821 REG_PROP_ONLY is true if we should only propagate register copies. */
822
823static bool
824forward_propagate_into (use_info *use, bool reg_prop_only = false)
825{
826 if (use->includes_read_writes ())
827 return false;
828
829 /* Disregard uninitialized uses. */
830 set_info *def = use->def ();
831 if (!def)
832 return false;
833
834 /* Only consider single-register definitions. This could be relaxed,
835 but it should rarely be needed before RA. */
836 def = look_through_degenerate_phi (access: def);
837 if (def->includes_multiregs ())
838 return false;
839
840 /* Only consider uses whose definition comes from a real instruction. */
841 insn_info *def_insn = def->insn ();
842 if (def_insn->is_artificial ())
843 return false;
844
845 rtx_insn *def_rtl = def_insn->rtl ();
846 if (!NONJUMP_INSN_P (def_rtl))
847 return false;
848 /* ??? This seems an unnecessary restriction. We can easily tell
849 which set the definition comes from. */
850 if (multiple_sets (def_rtl))
851 return false;
852 rtx def_set = simple_regno_set (PATTERN (insn: def_rtl), def->regno ());
853 if (!def_set)
854 return false;
855
856 rtx dest = SET_DEST (def_set);
857 rtx src = SET_SRC (def_set);
858 if (volatile_refs_p (src))
859 return false;
860
861 /* Allow propagations into a loop only for reg-to-reg copies, since
862 replacing one register by another shouldn't increase the cost.
863 Propagations from inner loop to outer loop should also be ok. */
864 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
865 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
866 if ((reg_prop_only
867 || (def_loop != use_loop
868 && !flow_loop_nested_p (use_loop, def_loop)))
869 && (!reg_single_def_p (x: dest) || !reg_single_def_p (x: src)))
870 return false;
871
872 /* Don't substitute into a non-local goto, this confuses CFG. */
873 insn_info *use_insn = use->insn ();
874 rtx_insn *use_rtl = use_insn->rtl ();
875 if (JUMP_P (use_rtl)
876 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
877 return false;
878
879 if (forward_propagate_and_simplify (use, def, dest, src)
880 || forward_propagate_subreg (use, def, dest, src))
881 return true;
882
883 return false;
884}
885
886static void
887fwprop_init (void)
888{
889 num_changes = 0;
890 calculate_dominance_info (CDI_DOMINATORS);
891
892 /* We do not always want to propagate into loops, so we have to find
893 loops and be careful about them. Avoid CFG modifications so that
894 we don't have to update dominance information afterwards for
895 build_single_def_use_links. */
896 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
897
898 df_analyze ();
899 crtl->ssa = new rtl_ssa::function_info (cfun);
900}
901
902static void
903fwprop_done (void)
904{
905 loop_optimizer_finalize ();
906
907 crtl->ssa->perform_pending_updates ();
908 free_dominance_info (CDI_DOMINATORS);
909 cleanup_cfg (0);
910
911 delete crtl->ssa;
912 crtl->ssa = nullptr;
913
914 delete_trivially_dead_insns (get_insns (), max_reg_num ());
915
916 if (dump_file)
917 fprintf (stream: dump_file,
918 format: "\nNumber of successful forward propagations: %d\n\n",
919 num_changes);
920}
921
922/* Try to optimize INSN, returning true if something changes.
923 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
924 the full fwprop. */
925
926static bool
927fwprop_insn (insn_info *insn, bool fwprop_addr_p)
928{
929 for (use_info *use : insn->uses ())
930 {
931 if (use->is_mem ())
932 continue;
933 /* ??? The choices here follow those in the pre-SSA code. */
934 if (!use->includes_address_uses ())
935 {
936 if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
937 return true;
938 }
939 else
940 {
941 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
942 /* The outermost loop is not really a loop. */
943 if (loop == NULL || loop_outer (loop) == NULL)
944 {
945 if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
946 return true;
947 }
948 else if (fwprop_addr_p)
949 {
950 if (forward_propagate_into (use, reg_prop_only: false))
951 return true;
952 }
953 }
954 }
955 return false;
956}
957
958/* Main entry point. */
959
960static bool
961gate_fwprop (void)
962{
963 return optimize > 0 && flag_forward_propagate;
964}
965
966static unsigned int
967fwprop (bool fwprop_addr_p)
968{
969 fwprop_init ();
970
971 /* Go through all the instructions (including debug instructions) looking
972 for uses that we could propagate into.
973
974 Do not forward propagate addresses into loops until after unrolling.
975 CSE did so because it was able to fix its own mess, but we are not. */
976
977 insn_info *next;
978
979 /* ??? This code uses a worklist in order to preserve the behavior
980 of the pre-SSA implementation. It would be better to instead
981 iterate on each instruction until no more propagations are
982 possible, then move on to the next. */
983 auto_vec<insn_info *> worklist;
984 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
985 {
986 next = insn->next_any_insn ();
987 if (insn->can_be_optimized () || insn->is_debug_insn ())
988 if (fwprop_insn (insn, fwprop_addr_p))
989 worklist.safe_push (obj: insn);
990 }
991 for (unsigned int i = 0; i < worklist.length (); ++i)
992 {
993 insn_info *insn = worklist[i];
994 if (fwprop_insn (insn, fwprop_addr_p))
995 worklist.safe_push (obj: insn);
996 }
997
998 fwprop_done ();
999 return 0;
1000}
1001
1002namespace {
1003
1004const pass_data pass_data_rtl_fwprop =
1005{
1006 .type: RTL_PASS, /* type */
1007 .name: "fwprop1", /* name */
1008 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1009 .tv_id: TV_FWPROP, /* tv_id */
1010 .properties_required: 0, /* properties_required */
1011 .properties_provided: 0, /* properties_provided */
1012 .properties_destroyed: 0, /* properties_destroyed */
1013 .todo_flags_start: 0, /* todo_flags_start */
1014 TODO_df_finish, /* todo_flags_finish */
1015};
1016
1017class pass_rtl_fwprop : public rtl_opt_pass
1018{
1019public:
1020 pass_rtl_fwprop (gcc::context *ctxt)
1021 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1022 {}
1023
1024 /* opt_pass methods: */
1025 bool gate (function *) final override { return gate_fwprop (); }
1026 unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: false); }
1027
1028}; // class pass_rtl_fwprop
1029
1030} // anon namespace
1031
1032rtl_opt_pass *
1033make_pass_rtl_fwprop (gcc::context *ctxt)
1034{
1035 return new pass_rtl_fwprop (ctxt);
1036}
1037
1038namespace {
1039
1040const pass_data pass_data_rtl_fwprop_addr =
1041{
1042 .type: RTL_PASS, /* type */
1043 .name: "fwprop2", /* name */
1044 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1045 .tv_id: TV_FWPROP, /* tv_id */
1046 .properties_required: 0, /* properties_required */
1047 .properties_provided: 0, /* properties_provided */
1048 .properties_destroyed: 0, /* properties_destroyed */
1049 .todo_flags_start: 0, /* todo_flags_start */
1050 TODO_df_finish, /* todo_flags_finish */
1051};
1052
1053class pass_rtl_fwprop_addr : public rtl_opt_pass
1054{
1055public:
1056 pass_rtl_fwprop_addr (gcc::context *ctxt)
1057 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1058 {}
1059
1060 /* opt_pass methods: */
1061 bool gate (function *) final override { return gate_fwprop (); }
1062 unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: true); }
1063
1064}; // class pass_rtl_fwprop_addr
1065
1066} // anon namespace
1067
1068rtl_opt_pass *
1069make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1070{
1071 return new pass_rtl_fwprop_addr (ctxt);
1072}
1073

source code of gcc/fwprop.cc