fwprop.cc source code [gcc/fwprop.cc]

1	/ RTL-based forward propagation pass for GNU compiler.*
2	Copyright (C) 2005-2024 Free Software Foundation, Inc.
3	Contributed by Paolo Bonzini and Steven Bosscher.
4
5	This file is part of GCC.
6
7	GCC is free software; you can redistribute it and/or modify it under
8	the terms of the GNU General Public License as published by the Free
9	Software Foundation; either version 3, or (at your option) any later
10	version.
11
12	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13	WARRANTY; without even the implied warranty of MERCHANTABILITY or
14	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15	for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GCC; see the file COPYING3. If not see
19	<http://www.gnu.org/licenses/>. /*
20
21	#define INCLUDE_ALGORITHM
22	#define INCLUDE_FUNCTIONAL
23	#include "config.h"
24	#include "system.h"
25	#include "coretypes.h"
26	#include "backend.h"
27	#include "rtl.h"
28	#include "rtlanal.h"
29	#include "df.h"
30	#include "rtl-ssa.h"
31
32	#include "predict.h"
33	#include "cfgrtl.h"
34	#include "cfgcleanup.h"
35	#include "cfgloop.h"
36	#include "tree-pass.h"
37	#include "rtl-iter.h"
38	#include "target.h"
39
40	/ This pass does simple forward propagation and simplification when an*
41	operand of an insn can only come from a single def. This pass uses
42	RTL SSA, so it is global. However, we only do limited analysis of
43	available expressions.
44
45	1) The pass tries to propagate the source of the def into the use,
46	and checks if the result is independent of the substituted value.
47	For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48	zero, independent of the source register.
49
50	In particular, we propagate constants into the use site. Sometimes
51	RTL expansion did not put the constant in the same insn on purpose,
52	to satisfy a predicate, and the result will fail to be recognized;
53	but this happens rarely and in this case we can still create a
54	REG_EQUAL note. For multi-word operations, this
55
56	(set (subreg:SI (reg:DI 120) 0) (const_int 0))
57	(set (subreg:SI (reg:DI 120) 4) (const_int -1))
58	(set (subreg:SI (reg:DI 122) 0)
59	(ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60	(set (subreg:SI (reg:DI 122) 4)
61	(ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
62
63	can be simplified to the much simpler
64
65	(set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66	(set (subreg:SI (reg:DI 122) 4) (const_int -1))
67
68	This particular propagation is also effective at putting together
69	complex addressing modes. We are more aggressive inside MEMs, in
70	that all definitions are propagated if the use is in a MEM; if the
71	result is a valid memory address we check address_cost to decide
72	whether the substitution is worthwhile.
73
74	2) The pass propagates register copies. This is not as effective as
75	the copy propagation done by CSE's canon_reg, which works by walking
76	the instruction chain, it can help the other transformations.
77
78	We should consider removing this optimization, and instead reorder the
79	RTL passes, because GCSE does this transformation too. With some luck,
80	the CSE pass at the end of rest_of_handle_gcse could also go away.
81
82	3) The pass looks for paradoxical subregs that are actually unnecessary.
83	Things like this:
84
85	(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86	(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87	(set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88	(subreg:SI (reg:QI 121) 0)))
89
90	are very common on machines that can only do word-sized operations.
91	For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92	if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93	we can replace the paradoxical subreg with simply (reg:WIDE M). The
94	above will simplify this to
95
96	(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97	(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98	(set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99
100	where the first two insns are now dead. /*
101
102	using namespace rtl_ssa;
103
104	static int num_changes;
105
106	/ Do not try to replace constant addresses or addresses of local and*
107	argument slots. These MEM expressions are made only once and inserted
108	in many instructions, as well as being used to control symbol table
109	output. It is not safe to clobber them.
110
111	There are some uncommon cases where the address is already in a register
112	for some reason, but we cannot take advantage of that because we have
113	no easy way to unshare the MEM. In addition, looking up all stack
114	addresses is costly. /*
115
116	static bool
117	can_simplify_addr (rtx addr)
118	{
119	rtx reg;
120
121	if (CONSTANT_ADDRESS_P (addr))
122	return false;
123
124	if (GET_CODE (addr) == PLUS)
125	reg = XEXP (addr, `0`);
126	else
127	reg = addr;
128
129	return (!REG_P (reg)
130	\|\| (REGNO (reg) != FRAME_POINTER_REGNUM
131	&& REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132	&& REGNO (reg) != ARG_POINTER_REGNUM));
133	}
134
135	/ MEM is the result of an address simplification, and temporarily*
136	undoing changes OLD_NUM_CHANGES onwards restores the original address.
137	Return whether it is good to use the new address instead of the
138	old one. INSN is the containing instruction. /*
139
140	static bool
141	should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
142	{
143	int gain;
144
145	/ Prefer the new address if it is less expensive. /
146	bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147	temporarily_undo_changes (old_num_changes);
148	gain = address_cost (XEXP (mem, `0`), GET_MODE (mem),
149	MEM_ADDR_SPACE (mem), speed);
150	redo_changes (old_num_changes);
151	gain -= address_cost (XEXP (mem, `0`), GET_MODE (mem),
152	MEM_ADDR_SPACE (mem), speed);
153
154	/ If the addresses have equivalent cost, prefer the new address*
155	if it has the highest `set_src_cost'. That has the potential of
156	eliminating the most insns without additional costs, and it
157	is the same that cse.cc used to do. /*
158	if (gain == `0`)
159	{
160	gain = set_src_cost (XEXP (mem, `0`), VOIDmode, speed_p: speed);
161	temporarily_undo_changes (old_num_changes);
162	gain -= set_src_cost (XEXP (mem, `0`), VOIDmode, speed_p: speed);
163	redo_changes (old_num_changes);
164	}
165
166	return (gain > `0`);
167	}
168
169
170	namespace
171	{
172	class fwprop_propagation : public insn_propagation
173	{
174	public:
175	static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176	static const uint16_t CONSTANT = FIRST_SPARE_RESULT << `1`;
177	static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << `2`;
178
179	fwprop_propagation (insn_info , set_info , rtx, rtx);
180
181	bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182	bool folded_to_constants_p () const;
183	bool likely_profitable_p () const;
184
185	bool check_mem (int, rtx) final override;
186	void note_simplification (int, uint16_t, rtx, rtx) final override;
187	uint16_t classify_result (rtx, rtx);
188
189	private:
190	const bool single_use_p;
191	const bool single_ebb_p;
192	};
193	}
194
195	/ Prepare to replace FROM with TO in USE_INSN. /
196
197	fwprop_propagation::fwprop_propagation (insn_info *use_insn,
198	set_info *def, rtx from, rtx to)
199	: insn_propagation (use_insn->rtl (), from, to),
200	single_use_p (def->single_nondebug_use ()),
201	single_ebb_p (use_insn->ebb () == def->ebb ())
202	{
203	should_check_mems = true;
204	should_note_simplifications = true;
205	}
206
207	/ MEM is the result of an address simplification, and temporarily*
208	undoing changes OLD_NUM_CHANGES onwards restores the original address.
209	Return true if the propagation should continue, false if it has failed. /*
210
211	bool
212	fwprop_propagation::check_mem (int old_num_changes, rtx mem)
213	{
214	if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, `0`),
215	MEM_ADDR_SPACE (mem)))
216	{
217	failure_reason = "would create an invalid MEM";
218	return false;
219	}
220
221	temporarily_undo_changes (old_num_changes);
222	bool can_simplify = can_simplify_addr (XEXP (mem, `0`));
223	redo_changes (old_num_changes);
224	if (!can_simplify)
225	{
226	failure_reason = "would replace a frame address";
227	return false;
228	}
229
230	/ Copy propagations are always ok. Otherwise check the costs. /
231	if (!(REG_P (from) && REG_P (to))
232	&& !should_replace_address (old_num_changes, mem, insn))
233	{
234	failure_reason = "would increase the cost of a MEM";
235	return false;
236	}
237
238	result_flags \|= CHANGED_MEM;
239	return true;
240	}
241
242	/ OLDX has been simplified to NEWX. Describe the change in terms of*
243	result_flags. /*
244
245	uint16_t
246	fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247	{
248	if (CONSTANT_P (new_rtx))
249	{
250	/ If OLD_RTX is a LO_SUM, then it presumably exists for a reason,*
251	and NEW_RTX is likely not a legitimate address. We want it to
252	disappear if it is invalid.
253
254	??? Using the mode of the LO_SUM as the mode of the address
255	seems odd, but it was what the pre-SSA code did. /*
256	if (GET_CODE (old_rtx) == LO_SUM
257	&& !memory_address_p (GET_MODE (old_rtx), new_rtx))
258	return CONSTANT;
259	return CONSTANT \| PROFITABLE;
260	}
261
262	/ Allow replacements that simplify operations on a vector or complex*
263	value to a component. The most prominent case is
264	(subreg ([vec_]concat ...)). /*
265	if (REG_P (new_rtx)
266	&& !HARD_REGISTER_P (new_rtx)
267	&& (VECTOR_MODE_P (GET_MODE (from))
268	\|\| COMPLEX_MODE_P (GET_MODE (from)))
269	&& GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270	return PROFITABLE;
271
272	/ Allow (subreg (mem)) -> (mem) simplifications with the following*
273	exceptions:
274	1) Propagating (mem)s into multiple uses is not profitable.
275	2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276	runs less frequently.
277	3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278	4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279	ones. /*
280	if (single_use_p
281	&& single_ebb_p
282	&& SUBREG_P (old_rtx)
283	&& !paradoxical_subreg_p (x: old_rtx)
284	&& MEM_P (new_rtx)
285	&& !MEM_VOLATILE_P (new_rtx))
286	return PROFITABLE;
287
288	return `0`;
289	}
290
291	/ Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES*
292	is the number of unrelated changes that had been made before processing
293	OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
294	had at that point. /*
295
296	void
297	fwprop_propagation::note_simplification (int old_num_changes,
298	uint16_t old_result_flags,
299	rtx old_rtx, rtx new_rtx)
300	{
301	result_flags &= ~(CONSTANT \| PROFITABLE);
302	uint16_t new_flags = classify_result (old_rtx, new_rtx);
303	if (old_num_changes)
304	new_flags &= old_result_flags;
305	result_flags \|= new_flags;
306	}
307
308	/ Return true if all substitutions eventually folded to constants. /
309
310	bool
311	fwprop_propagation::folded_to_constants_p () const
312	{
313	/ If we're propagating a HIGH, require it to be folded with a*
314	partnering LO_SUM. For example, a REG_EQUAL note with a register
315	replaced by an unfolded HIGH is not useful. /*
316	if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317	return true;
318	return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
319	}
320
321
322	/ Return true if it is worth keeping the result of the propagation,*
323	false if it would increase the complexity of the pattern too much. /*
324
325	bool
326	fwprop_propagation::likely_profitable_p () const
327	{
328	if (changed_mem_p ())
329	return true;
330
331	if (!(result_flags & UNSIMPLIFIED)
332	&& (result_flags & PROFITABLE))
333	return true;
334
335	if (REG_P (to))
336	return true;
337
338	if (GET_CODE (to) == SUBREG
339	&& REG_P (SUBREG_REG (to))
340	&& !paradoxical_subreg_p (x: to))
341	return true;
342
343	if (CONSTANT_P (to))
344	return true;
345
346	return false;
347	}
348
349	/ Check that X has a single def. /
350
351	static bool
352	reg_single_def_p (rtx x)
353	{
354	return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
355	}
356
357	/ Try to substitute (set DEST SRC), which defines DEF, into note NOTE of*
358	USE_INSN. Return the number of substitutions on success, otherwise return
359	-1 and leave USE_INSN unchanged.
360
361	If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
362	to fold to a constant, so that the note does not use any more registers
363	than it did previously. If REQUIRE_CONSTANT is false, also allow the
364	substitution if it's something we'd normally allow for the main
365	instruction pattern. /*
366
367	static int
368	try_fwprop_subst_note (insn_info use_insn, set_info def,
369	rtx note, rtx dest, rtx src, bool require_constant)
370	{
371	rtx_insn *use_rtl = use_insn->rtl ();
372	insn_info *def_insn = def->insn ();
373
374	insn_change_watermark watermark;
375	fwprop_propagation prop (use_insn, def, dest, src);
376	if (!prop.apply_to_rvalue (&XEXP (note, `0`)))
377	{
378	if (dump_file && (dump_flags & TDF_DETAILS))
379	fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
380	" notes of insn %d: %s\n", def_insn->uid (),
381	use_insn->uid (), prop.failure_reason);
382	return -`1`;
383	}
384
385	if (prop.num_replacements == `0`)
386	return `0`;
387
388	if (require_constant)
389	{
390	if (!prop.folded_to_constants_p ())
391	{
392	if (dump_file && (dump_flags & TDF_DETAILS))
393	fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
394	" notes of insn %d: %s\n", def_insn->uid (),
395	use_insn->uid (), "wouldn't fold to constants");
396	return -`1`;
397	}
398	}
399	else
400	{
401	if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
402	{
403	if (dump_file && (dump_flags & TDF_DETAILS))
404	fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
405	" notes of insn %d: %s\n", def_insn->uid (),
406	use_insn->uid (), "would increase complexity of node");
407	return -`1`;
408	}
409	}
410
411	if (dump_file && (dump_flags & TDF_DETAILS))
412	{
413	fprintf (stream: dump_file, format: "\nin notes of insn %d, replacing:\n ",
414	INSN_UID (insn: use_rtl));
415	temporarily_undo_changes (`0`);
416	print_inline_rtx (dump_file, note, `2`);
417	redo_changes (`0`);
418	fprintf (stream: dump_file, format: "\n with:\n ");
419	print_inline_rtx (dump_file, note, `2`);
420	fprintf (stream: dump_file, format: "\n");
421	}
422	watermark.keep ();
423	return prop.num_replacements;
424	}
425
426	/ Try to substitute (set DEST SRC), which defines DEF, into location LOC of*
427	USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
428	unchanged. /*
429
430	static bool
431	try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
432	set_info def, rtx loc, rtx dest, rtx src)
433	{
434	insn_info *use_insn = use_change.insn ();
435	rtx_insn *use_rtl = use_insn->rtl ();
436	insn_info *def_insn = def->insn ();
437
438	insn_change_watermark watermark;
439	fwprop_propagation prop (use_insn, def, dest, src);
440	if (!prop.apply_to_pattern (loc))
441	{
442	if (dump_file && (dump_flags & TDF_DETAILS))
443	fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
444	" insn %d: %s\n", def_insn->uid (), use_insn->uid (),
445	prop.failure_reason);
446	return false;
447	}
448
449	if (prop.num_replacements == `0`)
450	return false;
451
452	if (!prop.likely_profitable_p ()
453	&& (prop.changed_mem_p ()
454	\|\| contains_mem_rtx_p (x: src)
455	\|\| use_insn->is_asm ()
456	\|\| !single_set (insn: use_rtl)))
457	{
458	if (dump_file && (dump_flags & TDF_DETAILS))
459	fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
460	" insn %d: %s\n", def_insn->uid (), use_insn->uid (),
461	"would increase complexity of pattern");
462	return false;
463	}
464
465	if (dump_file && (dump_flags & TDF_DETAILS))
466	{
467	fprintf (stream: dump_file, format: "\npropagating insn %d into insn %d, replacing:\n",
468	def_insn->uid (), use_insn->uid ());
469	temporarily_undo_changes (`0`);
470	print_rtl_single (dump_file, PATTERN (insn: use_rtl));
471	redo_changes (`0`);
472	}
473
474	/ ??? In theory, it should be better to use insn costs rather than*
475	set_src_costs here. That would involve replacing this code with
476	change_is_worthwhile. /*
477	bool ok = recog (watermark&: attempt, change&: use_change);
478	if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
479	if (rtx use_set = single_set (insn: use_rtl))
480	{
481	bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn: use_rtl));
482	temporarily_undo_changes (`0`);
483	auto old_cost = set_src_cost (SET_SRC (use_set),
484	GET_MODE (SET_DEST (use_set)), speed_p: speed);
485	redo_changes (`0`);
486	auto new_cost = set_src_cost (SET_SRC (use_set),
487	GET_MODE (SET_DEST (use_set)), speed_p: speed);
488	if (new_cost > old_cost
489	\|\| (new_cost == old_cost && !prop.likely_profitable_p ()))
490	{
491	if (dump_file)
492	fprintf (stream: dump_file, format: "change not profitable"
493	" (cost %d -> cost %d)\n", old_cost, new_cost);
494	ok = false;
495	}
496	}
497
498	if (!ok)
499	{
500	/ The pattern didn't match, but if all uses of SRC folded to*
501	constants, we can add a REG_EQUAL note for the result, if there
502	isn't one already. /*
503	if (!prop.folded_to_constants_p ())
504	return false;
505
506	/ Test this first to avoid creating an unnecessary copy of SRC. /
507	if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
508	return false;
509
510	rtx set = set_for_reg_notes (use_rtl);
511	if (!set \|\| !REG_P (SET_DEST (set)))
512	return false;
513
514	rtx value = copy_rtx (SET_SRC (set));
515	cancel_changes (`0`);
516
517	/ If there are any paradoxical SUBREGs, drop the REG_EQUAL note,*
518	because the bits in there can be anything and so might not
519	match the REG_EQUAL note content. See PR70574. /*
520	if (contains_paradoxical_subreg_p (SET_SRC (set)))
521	return false;
522
523	if (dump_file && (dump_flags & TDF_DETAILS))
524	fprintf (stream: dump_file, format: " Setting REG_EQUAL note\n");
525
526	return set_unique_reg_note (use_rtl, REG_EQUAL, value);
527	}
528
529	rtx *note_ptr = &REG_NOTES (use_rtl);
530	while (rtx note = *note_ptr)
531	{
532	if ((REG_NOTE_KIND (note) == REG_EQUAL
533	\|\| REG_NOTE_KIND (note) == REG_EQUIV)
534	&& try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: false) < `0`)
535	{
536	*note_ptr = XEXP (note, `1`);
537	free_EXPR_LIST_node (note);
538	}
539	else
540	note_ptr = &XEXP (note, `1`);
541	}
542
543	confirm_change_group ();
544	crtl->ssa->change_insn (change&: use_change);
545	num_changes++;
546	return true;
547	}
548
549	/ Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,*
550	given that it was not possible to do this for USE_INSN's main pattern.
551	Return true on success, otherwise leave USE_INSN unchanged. /*
552
553	static bool
554	try_fwprop_subst_notes (insn_info use_insn, set_info def,
555	rtx dest, rtx src)
556	{
557	rtx_insn *use_rtl = use_insn->rtl ();
558	for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, `1`))
559	if ((REG_NOTE_KIND (note) == REG_EQUAL
560	\|\| REG_NOTE_KIND (note) == REG_EQUIV)
561	&& try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: true) > `0`)
562	{
563	confirm_change_group ();
564	return true;
565	}
566
567	return false;
568	}
569
570	/ Check whether we could validly substitute (set DEST SRC), which defines DEF,*
571	into USE. If so, first try performing the substitution in location LOC
572	of USE->insn ()'s pattern. If that fails, try instead to substitute
573	into the notes.
574
575	Return true on success, otherwise leave USE_INSN unchanged. /*
576
577	static bool
578	try_fwprop_subst (use_info use, set_info def,
579	rtx *loc, rtx dest, rtx src)
580	{
581	insn_info *use_insn = use->insn ();
582	insn_info *def_insn = def->insn ();
583
584	auto attempt = crtl->ssa->new_change_attempt ();
585	use_array src_uses = remove_note_accesses (watermark&: attempt, accesses: def_insn->uses ());
586
587	/ ??? Not really a meaningful test: it means we can propagate arithmetic*
588	involving hard registers but not bare references to them. A better
589	test would be to iterate over src_uses looking for hard registers
590	that are not fixed. /*
591	if (REG_P (src) && HARD_REGISTER_P (src))
592	return false;
593
594	/ ??? It would be better to make this EBB-based instead. That would*
595	involve checking for equal EBBs rather than equal BBs and trying
596	to make the uses available at use_insn->ebb ()->first_bb (). /*
597	if (def_insn->bb () != use_insn->bb ())
598	{
599	src_uses = crtl->ssa->make_uses_available (watermark&: attempt, uses: src_uses,
600	bb: use_insn->bb (),
601	will_be_debug_uses: use_insn->is_debug_insn ());
602	if (!src_uses.is_valid ())
603	return false;
604	}
605
606	insn_change use_change (use_insn);
607	use_change.new_uses = merge_access_arrays (watermark&: attempt, accesses1: use_change.new_uses,
608	accesses2: src_uses);
609	if (!use_change.new_uses.is_valid ())
610	return false;
611
612	/ ??? We could allow movement within the EBB by adding:*
613
614	use_change.move_range = use_insn->ebb ()->insn_range (); /*
615	if (!restrict_movement (change&: use_change))
616	return false;
617
618	return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
619	\|\| try_fwprop_subst_notes (use_insn, def, dest, src));
620	}
621
622	/ For the given single_set INSN, containing SRC known to be a*
623	ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
624	is redundant due to the register being set by a LOAD_EXTEND_OP
625	load from memory. /*
626
627	static bool
628	free_load_extend (rtx src, insn_info *insn)
629	{
630	rtx reg = XEXP (src, `0`);
631	if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
632	return false;
633
634	def_info def = nullptr*;
635	for (use_info *use : insn->uses ())
636	if (use->regno () == REGNO (reg))
637	{
638	def = use->def ();
639	break;
640	}
641
642	if (!def)
643	return false;
644
645	insn_info *def_insn = def->insn ();
646	if (def_insn->is_artificial ())
647	return false;
648
649	rtx_insn *def_rtl = def_insn->rtl ();
650	if (NONJUMP_INSN_P (def_rtl))
651	{
652	rtx patt = PATTERN (insn: def_rtl);
653
654	if (GET_CODE (patt) == SET
655	&& GET_CODE (SET_SRC (patt)) == MEM
656	&& rtx_equal_p (SET_DEST (patt), reg))
657	return true;
658	}
659	return false;
660	}
661
662	/ Subroutine of forward_propagate_subreg that handles a use of DEST*
663	in REF. The other parameters are the same. /*
664
665	static bool
666	forward_propagate_subreg (use_info use, set_info def,
667	rtx dest, rtx src, df_ref ref)
668	{
669	scalar_int_mode int_use_mode, src_mode;
670
671	/ Only consider subregs... /
672	rtx use_reg = DF_REF_REG (ref);
673	machine_mode use_mode = GET_MODE (use_reg);
674	if (GET_CODE (use_reg) != SUBREG
675	\|\| GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
676	return false;
677
678	/ ??? Replacing throughout the pattern would help for match_dups. /
679	rtx *loc = DF_REF_LOC (ref);
680	if (paradoxical_subreg_p (x: use_reg))
681	{
682	/ If this is a paradoxical SUBREG, we have no idea what value the*
683	extra bits would have. However, if the operand is equivalent to
684	a SUBREG whose operand is the same as our mode, and all the modes
685	are within a word, we can just use the inner operand because
686	these SUBREGs just say how to treat the register. /*
687	if (GET_CODE (src) == SUBREG
688	&& REG_P (SUBREG_REG (src))
689	&& REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
690	&& GET_MODE (SUBREG_REG (src)) == use_mode
691	&& subreg_lowpart_p (src))
692	return try_fwprop_subst (use, def, loc, dest: use_reg, SUBREG_REG (src));
693	}
694
695	/ If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG*
696	is the low part of the reg being extended then just use the inner
697	operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
698	be removed due to it matching a LOAD_EXTEND_OP load from memory,
699	or due to the operation being a no-op when applied to registers.
700	For example, if we have:
701
702	A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
703	B: (... (subreg:SI (reg:DI X)) ...)
704
705	and mode_rep_extended says that Y is already sign-extended,
706	the backend will typically allow A to be combined with the
707	definition of Y or, failing that, allow A to be deleted after
708	reload through register tying. Introducing more uses of Y
709	prevents both optimisations. /*
710	else if (is_a <scalar_int_mode> (m: use_mode, result: &int_use_mode)
711	&& subreg_lowpart_p (use_reg))
712	{
713	if ((GET_CODE (src) == ZERO_EXTEND
714	\|\| GET_CODE (src) == SIGN_EXTEND)
715	&& is_a <scalar_int_mode> (GET_MODE (src), result: &src_mode)
716	&& REG_P (XEXP (src, `0`))
717	&& REGNO (XEXP (src, `0`)) >= FIRST_PSEUDO_REGISTER
718	&& GET_MODE (XEXP (src, `0`)) == use_mode
719	&& !free_load_extend (src, insn: def->insn ())
720	&& (targetm.mode_rep_extended (int_use_mode, src_mode)
721	!= (int) GET_CODE (src)))
722	return try_fwprop_subst (use, def, loc, dest: use_reg, XEXP (src, `0`));
723	}
724
725	return false;
726	}
727
728	/ Try to substitute (set DEST SRC), which defines DEF, into USE and simplify*
729	the result, handling cases where DEST is used in a subreg and where
730	applying that subreg to SRC results in a useful simplification. /*
731
732	static bool
733	forward_propagate_subreg (use_info use, set_info def, rtx dest, rtx src)
734	{
735	if (!use->includes_subregs () \|\| !REG_P (dest))
736	return false;
737
738	if (GET_CODE (src) != SUBREG
739	&& GET_CODE (src) != ZERO_EXTEND
740	&& GET_CODE (src) != SIGN_EXTEND)
741	return false;
742
743	rtx_insn *use_rtl = use->insn ()->rtl ();
744	df_ref ref;
745
746	FOR_EACH_INSN_USE (ref, use_rtl)
747	if (DF_REF_REGNO (ref) == use->regno ()
748	&& forward_propagate_subreg (use, def, dest, src, ref))
749	return true;
750
751	FOR_EACH_INSN_EQ_USE (ref, use_rtl)
752	if (DF_REF_REGNO (ref) == use->regno ()
753	&& forward_propagate_subreg (use, def, dest, src, ref))
754	return true;
755
756	return false;
757	}
758
759	/ Try to substitute (set DEST SRC), which defines DEF, into USE and*
760	simplify the result. /*
761
762	static bool
763	forward_propagate_and_simplify (use_info use, set_info def,
764	rtx dest, rtx src)
765	{
766	insn_info *use_insn = use->insn ();
767	rtx_insn *use_rtl = use_insn->rtl ();
768	insn_info *def_insn = def->insn ();
769
770	/ ??? This check seems unnecessary. We should be able to propagate*
771	into any kind of instruction, regardless of whether it's a single set.
772	It seems odd to be more permissive with asms than normal instructions. /*
773	bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
774	rtx use_set = single_set (insn: use_rtl);
775	if (need_single_set && !use_set)
776	return false;
777
778	/ Do not propagate into PC etc.*
779
780	??? This too seems unnecessary. The current code should work correctly
781	without it, including cases where jumps become unconditional. /*
782	if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
783	return false;
784
785	/ In __asm don't replace if src might need more registers than*
786	reg, as that could increase register pressure on the __asm. /*
787	if (use_insn->is_asm () && def_insn->uses ().size () > `1`)
788	return false;
789
790	/ Check if the def is loading something from the constant pool; in this*
791	case we would undo optimization such as compress_float_constant.
792	Still, we can set a REG_EQUAL note. /*
793	if (MEM_P (src) && MEM_READONLY_P (src))
794	{
795	rtx x = avoid_constant_pool_reference (src);
796	rtx note_set;
797	if (x != src
798	&& (note_set = set_for_reg_notes (use_rtl))
799	&& REG_P (SET_DEST (note_set))
800	&& !contains_paradoxical_subreg_p (SET_SRC (note_set)))
801	{
802	rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
803	rtx old_rtx = note ? XEXP (note, `0`) : SET_SRC (note_set);
804	rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
805	if (old_rtx != new_rtx)
806	set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
807	}
808	return false;
809	}
810
811	/ ??? Unconditionally propagating into PATTERN would work better*
812	for instructions that have match_dups. /*
813	rtx *loc = need_single_set ? &use_set : &PATTERN (insn: use_rtl);
814	return try_fwprop_subst (use, def, loc, dest, src);
815	}
816
817	/ Given a use USE of an insn, if it has a single reaching*
818	definition, try to forward propagate it into that insn.
819	Return true if something changed.
820
821	REG_PROP_ONLY is true if we should only propagate register copies. /*
822
823	static bool
824	forward_propagate_into (use_info use, bool* reg_prop_only = false)
825	{
826	if (use->includes_read_writes ())
827	return false;
828
829	/ Disregard uninitialized uses. /
830	set_info *def = use->def ();
831	if (!def)
832	return false;
833
834	/ Only consider single-register definitions. This could be relaxed,*
835	but it should rarely be needed before RA. /*
836	def = look_through_degenerate_phi (access: def);
837	if (def->includes_multiregs ())
838	return false;
839
840	/ Only consider uses whose definition comes from a real instruction. /
841	insn_info *def_insn = def->insn ();
842	if (def_insn->is_artificial ())
843	return false;
844
845	rtx_insn *def_rtl = def_insn->rtl ();
846	if (!NONJUMP_INSN_P (def_rtl))
847	return false;
848	/ ??? This seems an unnecessary restriction. We can easily tell*
849	which set the definition comes from. /*
850	if (multiple_sets (def_rtl))
851	return false;
852	rtx def_set = simple_regno_set (PATTERN (insn: def_rtl), def->regno ());
853	if (!def_set)
854	return false;
855
856	rtx dest = SET_DEST (def_set);
857	rtx src = SET_SRC (def_set);
858	if (volatile_refs_p (src))
859	return false;
860
861	/ Allow propagations into a loop only for reg-to-reg copies, since*
862	replacing one register by another shouldn't increase the cost.
863	Propagations from inner loop to outer loop should also be ok. /*
864	struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
865	struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
866	if ((reg_prop_only
867	\|\| (def_loop != use_loop
868	&& !flow_loop_nested_p (use_loop, def_loop)))
869	&& (!reg_single_def_p (x: dest) \|\| !reg_single_def_p (x: src)))
870	return false;
871
872	/ Don't substitute into a non-local goto, this confuses CFG. /
873	insn_info *use_insn = use->insn ();
874	rtx_insn *use_rtl = use_insn->rtl ();
875	if (JUMP_P (use_rtl)
876	&& find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
877	return false;
878
879	if (forward_propagate_and_simplify (use, def, dest, src)
880	\|\| forward_propagate_subreg (use, def, dest, src))
881	return true;
882
883	return false;
884	}
885
886	static void
887	fwprop_init (void)
888	{
889	num_changes = `0`;
890	calculate_dominance_info (CDI_DOMINATORS);
891
892	/ We do not always want to propagate into loops, so we have to find*
893	loops and be careful about them. Avoid CFG modifications so that
894	we don't have to update dominance information afterwards for
895	build_single_def_use_links. /*
896	loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
897
898	df_analyze ();
899	crtl->ssa = new rtl_ssa::function_info (cfun);
900	}
901
902	static void
903	fwprop_done (void)
904	{
905	loop_optimizer_finalize ();
906
907	crtl->ssa->perform_pending_updates ();
908	free_dominance_info (CDI_DOMINATORS);
909	cleanup_cfg (`0`);
910
911	delete crtl->ssa;
912	crtl->ssa = nullptr;
913
914	delete_trivially_dead_insns (get_insns (), max_reg_num ());
915
916	if (dump_file)
917	fprintf (stream: dump_file,
918	format: "\nNumber of successful forward propagations: %d\n\n",
919	num_changes);
920	}
921
922	/ Try to optimize INSN, returning true if something changes.*
923	FWPROP_ADDR_P is true if we are running fwprop_addr rather than
924	the full fwprop. /*
925
926	static bool
927	fwprop_insn (insn_info insn, bool* fwprop_addr_p)
928	{
929	for (use_info *use : insn->uses ())
930	{
931	if (use->is_mem ())
932	continue;
933	/ ??? The choices here follow those in the pre-SSA code. /
934	if (!use->includes_address_uses ())
935	{
936	if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
937	return true;
938	}
939	else
940	{
941	struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
942	/ The outermost loop is not really a loop. /
943	if (loop == NULL \|\| loop_outer (loop) == NULL)
944	{
945	if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
946	return true;
947	}
948	else if (fwprop_addr_p)
949	{
950	if (forward_propagate_into (use, reg_prop_only: false))
951	return true;
952	}
953	}
954	}
955	return false;
956	}
957
958	/ Main entry point. /
959
960	static bool
961	gate_fwprop (void)
962	{
963	return optimize > `0` && flag_forward_propagate;
964	}
965
966	static unsigned int
967	fwprop (bool fwprop_addr_p)
968	{
969	fwprop_init ();
970
971	/ Go through all the instructions (including debug instructions) looking*
972	for uses that we could propagate into.
973
974	Do not forward propagate addresses into loops until after unrolling.
975	CSE did so because it was able to fix its own mess, but we are not. /*
976
977	insn_info *next;
978
979	/ ??? This code uses a worklist in order to preserve the behavior*
980	of the pre-SSA implementation. It would be better to instead
981	iterate on each instruction until no more propagations are
982	possible, then move on to the next. /*
983	auto_vec<insn_info *> worklist;
984	for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
985	{
986	next = insn->next_any_insn ();
987	if (insn->can_be_optimized () \|\| insn->is_debug_insn ())
988	if (fwprop_insn (insn, fwprop_addr_p))
989	worklist.safe_push (obj: insn);
990	}
991	for (unsigned int i = `0`; i < worklist.length (); ++i)
992	{
993	insn_info *insn = worklist [i];
994	if (fwprop_insn (insn, fwprop_addr_p))
995	worklist.safe_push (obj: insn);
996	}
997
998	fwprop_done ();
999	return `0`;
1000	}
1001
1002	namespace {
1003
1004	const pass_data pass_data_rtl_fwprop =
1005	{
1006	.type: RTL_PASS, / type /
1007	.name: "fwprop1", / name /
1008	.optinfo_flags: OPTGROUP_NONE, / optinfo_flags /
1009	.tv_id: TV_FWPROP, / tv_id /
1010	.properties_required: `0`, / properties_required /
1011	.properties_provided: `0`, / properties_provided /
1012	.properties_destroyed: `0`, / properties_destroyed /
1013	.todo_flags_start: `0`, / todo_flags_start /
1014	TODO_df_finish, / todo_flags_finish /
1015	};
1016
1017	class pass_rtl_fwprop : public rtl_opt_pass
1018	{
1019	public:
1020	pass_rtl_fwprop (gcc::context *ctxt)
1021	: rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1022	{}
1023
1024	/ opt_pass methods: /
1025	bool gate (function ) final override { return* gate_fwprop (); }
1026	unsigned int execute (function ) final override { return* fwprop (fwprop_addr_p: false); }
1027
1028	}; // class pass_rtl_fwprop
1029
1030	} // anon namespace
1031
1032	rtl_opt_pass *
1033	make_pass_rtl_fwprop (gcc::context *ctxt)
1034	{
1035	return new pass_rtl_fwprop (ctxt);
1036	}
1037
1038	namespace {
1039
1040	const pass_data pass_data_rtl_fwprop_addr =
1041	{
1042	.type: RTL_PASS, / type /
1043	.name: "fwprop2", / name /
1044	.optinfo_flags: OPTGROUP_NONE, / optinfo_flags /
1045	.tv_id: TV_FWPROP, / tv_id /
1046	.properties_required: `0`, / properties_required /
1047	.properties_provided: `0`, / properties_provided /
1048	.properties_destroyed: `0`, / properties_destroyed /
1049	.todo_flags_start: `0`, / todo_flags_start /
1050	TODO_df_finish, / todo_flags_finish /
1051	};
1052
1053	class pass_rtl_fwprop_addr : public rtl_opt_pass
1054	{
1055	public:
1056	pass_rtl_fwprop_addr (gcc::context *ctxt)
1057	: rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1058	{}
1059
1060	/ opt_pass methods: /
1061	bool gate (function ) final override { return* gate_fwprop (); }
1062	unsigned int execute (function ) final override { return* fwprop (fwprop_addr_p: true); }
1063
1064	}; // class pass_rtl_fwprop_addr
1065
1066	} // anon namespace
1067
1068	rtl_opt_pass *
1069	make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1070	{
1071	return new pass_rtl_fwprop_addr (ctxt);
1072	}
1073

source code of gcc/fwprop.cc