1 | /* Coalesce spilled pseudos. |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | Contributed by Vladimir Makarov <vmakarov@redhat.com>. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | /* This file contains a pass making some simple RTL code |
23 | transformations by coalescing pseudos to remove some move insns. |
24 | |
25 | Spilling pseudos in LRA can create memory-memory moves. We should |
26 | remove potential memory-memory moves before the next constraint |
27 | pass because the constraint pass will generate additional insns for |
28 | such moves and all these insns will be hard to remove afterwards. |
29 | |
30 | Here we coalesce only spilled pseudos. Coalescing non-spilled |
31 | pseudos (with different hard regs) might result in spilling |
32 | additional pseudos because of possible conflicts with other |
33 | non-spilled pseudos and, as a consequence, in more constraint |
34 | passes and even LRA infinite cycling. Trivial the same hard |
35 | register moves will be removed by subsequent compiler passes. |
36 | |
37 | We don't coalesce special reload pseudos. It complicates LRA code |
38 | a lot without visible generated code improvement. |
39 | |
40 | The pseudo live-ranges are used to find conflicting pseudos during |
41 | coalescing. |
42 | |
43 | Most frequently executed moves is tried to be coalesced first. */ |
44 | |
45 | #include "config.h" |
46 | #include "system.h" |
47 | #include "coretypes.h" |
48 | #include "backend.h" |
49 | #include "rtl.h" |
50 | #include "predict.h" |
51 | #include "df.h" |
52 | #include "insn-config.h" |
53 | #include "regs.h" |
54 | #include "memmodel.h" |
55 | #include "ira.h" |
56 | #include "recog.h" |
57 | #include "lra-int.h" |
58 | |
59 | /* Arrays whose elements represent the first and the next pseudo |
60 | (regno) in the coalesced pseudos group to which given pseudo (its |
61 | regno is the index) belongs. The next of the last pseudo in the |
62 | group refers to the first pseudo in the group, in other words the |
63 | group is represented by a cyclic list. */ |
64 | static int *first_coalesced_pseudo, *next_coalesced_pseudo; |
65 | |
66 | /* The function is used to sort moves according to their execution |
67 | frequencies. */ |
68 | static int |
69 | move_freq_compare_func (const void *v1p, const void *v2p) |
70 | { |
71 | rtx_insn *mv1 = *(rtx_insn * const *) v1p; |
72 | rtx_insn *mv2 = *(rtx_insn * const *) v2p; |
73 | int pri1, pri2; |
74 | |
75 | pri1 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv1)); |
76 | pri2 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv2)); |
77 | if (pri2 - pri1) |
78 | return pri2 - pri1; |
79 | |
80 | /* If frequencies are equal, sort by moves, so that the results of |
81 | qsort leave nothing to chance. */ |
82 | return (int) INSN_UID (insn: mv1) - (int) INSN_UID (insn: mv2); |
83 | } |
84 | |
85 | /* Pseudos which go away after coalescing. */ |
86 | static bitmap_head coalesced_pseudos_bitmap; |
87 | |
88 | /* Merge two sets of coalesced pseudos given correspondingly by |
89 | pseudos REGNO1 and REGNO2 (more accurately merging REGNO2 group |
90 | into REGNO1 group). Set up COALESCED_PSEUDOS_BITMAP. */ |
91 | static void |
92 | merge_pseudos (int regno1, int regno2) |
93 | { |
94 | int regno, first, first2, last, next; |
95 | |
96 | first = first_coalesced_pseudo[regno1]; |
97 | if ((first2 = first_coalesced_pseudo[regno2]) == first) |
98 | return; |
99 | for (last = regno2, regno = next_coalesced_pseudo[regno2];; |
100 | regno = next_coalesced_pseudo[regno]) |
101 | { |
102 | first_coalesced_pseudo[regno] = first; |
103 | bitmap_set_bit (&coalesced_pseudos_bitmap, regno); |
104 | if (regno == regno2) |
105 | break; |
106 | last = regno; |
107 | } |
108 | next = next_coalesced_pseudo[first]; |
109 | next_coalesced_pseudo[first] = regno2; |
110 | next_coalesced_pseudo[last] = next; |
111 | lra_reg_info[first].live_ranges |
112 | = (lra_merge_live_ranges |
113 | (lra_reg_info[first].live_ranges, |
114 | lra_copy_live_range_list (lra_reg_info[first2].live_ranges))); |
115 | lra_update_biggest_mode (regno: first, mode: lra_reg_info[first2].biggest_mode); |
116 | } |
117 | |
118 | /* Change pseudos in *LOC on their coalescing group |
119 | representatives. */ |
120 | static bool |
121 | substitute (rtx *loc) |
122 | { |
123 | int i, regno; |
124 | const char *fmt; |
125 | enum rtx_code code; |
126 | bool res; |
127 | |
128 | if (*loc == NULL_RTX) |
129 | return false; |
130 | code = GET_CODE (*loc); |
131 | if (code == REG) |
132 | { |
133 | regno = REGNO (*loc); |
134 | if (regno < FIRST_PSEUDO_REGISTER |
135 | || first_coalesced_pseudo[regno] == regno) |
136 | return false; |
137 | *loc = regno_reg_rtx[first_coalesced_pseudo[regno]]; |
138 | return true; |
139 | } |
140 | |
141 | res = false; |
142 | fmt = GET_RTX_FORMAT (code); |
143 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
144 | { |
145 | if (fmt[i] == 'e') |
146 | { |
147 | if (substitute (loc: &XEXP (*loc, i))) |
148 | res = true; |
149 | } |
150 | else if (fmt[i] == 'E') |
151 | { |
152 | int j; |
153 | |
154 | for (j = XVECLEN (*loc, i) - 1; j >= 0; j--) |
155 | if (substitute (loc: &XVECEXP (*loc, i, j))) |
156 | res = true; |
157 | } |
158 | } |
159 | return res; |
160 | } |
161 | |
162 | /* Specialize "substitute" for use on an insn. This can't change |
163 | the insn ptr, just the contents of the insn. */ |
164 | |
165 | static bool |
166 | substitute_within_insn (rtx_insn *insn) |
167 | { |
168 | rtx loc = insn; |
169 | return substitute (loc: &loc); |
170 | } |
171 | |
172 | /* The current iteration (1, 2, ...) of the coalescing pass. */ |
173 | int lra_coalesce_iter; |
174 | |
175 | /* Return true if the move involving REGNO1 and REGNO2 is a potential |
176 | memory-memory move. */ |
177 | static bool |
178 | mem_move_p (int regno1, int regno2) |
179 | { |
180 | return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0; |
181 | } |
182 | |
183 | /* Pseudos used instead of the coalesced pseudos. */ |
184 | static bitmap_head used_pseudos_bitmap; |
185 | |
186 | /* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info |
187 | bitmap). */ |
188 | static void |
189 | update_live_info (bitmap lr_bitmap) |
190 | { |
191 | unsigned int j; |
192 | bitmap_iterator bi; |
193 | |
194 | bitmap_clear (&used_pseudos_bitmap); |
195 | EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap, |
196 | FIRST_PSEUDO_REGISTER, j, bi) |
197 | bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]); |
198 | if (! bitmap_empty_p (map: &used_pseudos_bitmap)) |
199 | { |
200 | bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap); |
201 | bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap); |
202 | } |
203 | } |
204 | |
205 | /* Return true if pseudo REGNO can be potentially coalesced. */ |
206 | static bool |
207 | coalescable_pseudo_p (int regno) |
208 | { |
209 | lra_assert (regno >= FIRST_PSEUDO_REGISTER); |
210 | return (/* We don't want to coalesce regnos with equivalences, at |
211 | least without updating this info. */ |
212 | ira_reg_equiv[regno].constant == NULL_RTX |
213 | && ira_reg_equiv[regno].memory == NULL_RTX |
214 | && ira_reg_equiv[regno].invariant == NULL_RTX); |
215 | } |
216 | |
217 | /* The major function for aggressive pseudo coalescing of moves only |
218 | if the both pseudos were spilled and not special reload pseudos. */ |
219 | bool |
220 | lra_coalesce (void) |
221 | { |
222 | basic_block bb; |
223 | rtx_insn *mv, *insn, *next, **sorted_moves; |
224 | rtx set; |
225 | int i, mv_num, sregno, dregno; |
226 | int coalesced_moves; |
227 | int max_regno = max_reg_num (); |
228 | bitmap_head involved_insns_bitmap; |
229 | |
230 | timevar_push (tv: TV_LRA_COALESCE); |
231 | |
232 | if (lra_dump_file != NULL) |
233 | fprintf (stream: lra_dump_file, |
234 | format: "\n********** Pseudos coalescing #%d: **********\n\n" , |
235 | ++lra_coalesce_iter); |
236 | first_coalesced_pseudo = XNEWVEC (int, max_regno); |
237 | next_coalesced_pseudo = XNEWVEC (int, max_regno); |
238 | for (i = 0; i < max_regno; i++) |
239 | first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i; |
240 | sorted_moves = XNEWVEC (rtx_insn *, get_max_uid ()); |
241 | mv_num = 0; |
242 | /* Collect moves. */ |
243 | coalesced_moves = 0; |
244 | FOR_EACH_BB_FN (bb, cfun) |
245 | { |
246 | FOR_BB_INSNS_SAFE (bb, insn, next) |
247 | if (INSN_P (insn) |
248 | && (set = single_set (insn)) != NULL_RTX |
249 | && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)) |
250 | && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER |
251 | && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER |
252 | && mem_move_p (regno1: sregno, regno2: dregno) |
253 | && coalescable_pseudo_p (regno: sregno) && coalescable_pseudo_p (regno: dregno) |
254 | && ! side_effects_p (set) |
255 | && !(lra_intersected_live_ranges_p |
256 | (lra_reg_info[sregno].live_ranges, |
257 | lra_reg_info[dregno].live_ranges))) |
258 | sorted_moves[mv_num++] = insn; |
259 | } |
260 | qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func); |
261 | /* Coalesced copies, most frequently executed first. */ |
262 | bitmap_initialize (head: &coalesced_pseudos_bitmap, obstack: ®_obstack); |
263 | bitmap_initialize (head: &involved_insns_bitmap, obstack: ®_obstack); |
264 | for (i = 0; i < mv_num; i++) |
265 | { |
266 | mv = sorted_moves[i]; |
267 | set = single_set (insn: mv); |
268 | lra_assert (set != NULL && REG_P (SET_SRC (set)) |
269 | && REG_P (SET_DEST (set))); |
270 | sregno = REGNO (SET_SRC (set)); |
271 | dregno = REGNO (SET_DEST (set)); |
272 | if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno]) |
273 | { |
274 | coalesced_moves++; |
275 | if (lra_dump_file != NULL) |
276 | fprintf |
277 | (stream: lra_dump_file, format: " Coalescing move %i:r%d-r%d (freq=%d)\n" , |
278 | INSN_UID (insn: mv), sregno, dregno, |
279 | REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv))); |
280 | /* We updated involved_insns_bitmap when doing the merge. */ |
281 | } |
282 | else if (!(lra_intersected_live_ranges_p |
283 | (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges, |
284 | lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges))) |
285 | { |
286 | coalesced_moves++; |
287 | if (lra_dump_file != NULL) |
288 | fprintf |
289 | (stream: lra_dump_file, |
290 | format: " Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n" , |
291 | INSN_UID (insn: mv), sregno, ORIGINAL_REGNO (SET_SRC (set)), |
292 | dregno, ORIGINAL_REGNO (SET_DEST (set)), |
293 | REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv))); |
294 | bitmap_ior_into (&involved_insns_bitmap, |
295 | &lra_reg_info[sregno].insn_bitmap); |
296 | bitmap_ior_into (&involved_insns_bitmap, |
297 | &lra_reg_info[dregno].insn_bitmap); |
298 | merge_pseudos (regno1: sregno, regno2: dregno); |
299 | } |
300 | } |
301 | bitmap_initialize (head: &used_pseudos_bitmap, obstack: ®_obstack); |
302 | FOR_EACH_BB_FN (bb, cfun) |
303 | { |
304 | update_live_info (lr_bitmap: df_get_live_in (bb)); |
305 | update_live_info (lr_bitmap: df_get_live_out (bb)); |
306 | FOR_BB_INSNS_SAFE (bb, insn, next) |
307 | if (INSN_P (insn) |
308 | && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn))) |
309 | { |
310 | if (! substitute_within_insn (insn)) |
311 | continue; |
312 | lra_update_insn_regno_info (insn); |
313 | if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set)) |
314 | { |
315 | /* Coalesced move. */ |
316 | if (lra_dump_file != NULL) |
317 | fprintf (stream: lra_dump_file, format: " Removing move %i (freq=%d)\n" , |
318 | INSN_UID (insn), |
319 | REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn))); |
320 | lra_set_insn_deleted (insn); |
321 | } |
322 | } |
323 | } |
324 | /* If we have situation after inheritance pass: |
325 | |
326 | r1 <- p1 insn originally setting p1 |
327 | i1 <- r1 setting inheritance i1 from reload r1 |
328 | ... |
329 | ... <- ... p2 ... dead p2 |
330 | .. |
331 | p1 <- i1 |
332 | r2 <- i1 |
333 | ...<- ... r2 ... |
334 | |
335 | And we are coalescing p1 and p2 using p1. In this case i1 and p1 |
336 | should have different values, otherwise they can get the same |
337 | hard reg and this is wrong for insn using p2 before coalescing. |
338 | The situation even can be more complicated when new reload |
339 | pseudos occur after the inheriatnce. So invalidate the result |
340 | pseudos. */ |
341 | for (i = 0; i < max_regno; i++) |
342 | if (first_coalesced_pseudo[i] == i |
343 | && first_coalesced_pseudo[i] != next_coalesced_pseudo[i]) |
344 | { |
345 | lra_set_regno_unique_value (i); |
346 | if (lra_dump_file != NULL) |
347 | fprintf (stream: lra_dump_file, |
348 | format: " Make unique value for coalescing result r%d\n" , i); |
349 | } |
350 | bitmap_clear (&used_pseudos_bitmap); |
351 | bitmap_clear (&involved_insns_bitmap); |
352 | bitmap_clear (&coalesced_pseudos_bitmap); |
353 | if (lra_dump_file != NULL && coalesced_moves != 0) |
354 | fprintf (stream: lra_dump_file, format: "Coalesced Moves = %d\n" , coalesced_moves); |
355 | free (ptr: sorted_moves); |
356 | free (ptr: next_coalesced_pseudo); |
357 | free (ptr: first_coalesced_pseudo); |
358 | timevar_pop (tv: TV_LRA_COALESCE); |
359 | return coalesced_moves != 0; |
360 | } |
361 | |