1 | /* IRA conflict builder. |
2 | Copyright (C) 2006-2024 Free Software Foundation, Inc. |
3 | Contributed by Vladimir Makarov <vmakarov@redhat.com>. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "target.h" |
26 | #include "rtl.h" |
27 | #include "predict.h" |
28 | #include "memmodel.h" |
29 | #include "tm_p.h" |
30 | #include "insn-config.h" |
31 | #include "regs.h" |
32 | #include "ira.h" |
33 | #include "ira-int.h" |
34 | #include "sparseset.h" |
35 | #include "addresses.h" |
36 | |
37 | /* This file contains code responsible for allocno conflict creation, |
38 | allocno copy creation and allocno info accumulation on upper level |
39 | regions. */ |
40 | |
41 | /* ira_allocnos_num array of arrays of bits, recording whether two |
42 | allocno's conflict (can't go in the same hardware register). |
43 | |
44 | Some arrays will be used as conflict bit vector of the |
45 | corresponding allocnos see function build_object_conflicts. */ |
46 | static IRA_INT_TYPE **conflicts; |
47 | |
48 | /* Macro to test a conflict of C1 and C2 in `conflicts'. */ |
49 | #define OBJECTS_CONFLICT_P(C1, C2) \ |
50 | (OBJECT_MIN (C1) <= OBJECT_CONFLICT_ID (C2) \ |
51 | && OBJECT_CONFLICT_ID (C2) <= OBJECT_MAX (C1) \ |
52 | && TEST_MINMAX_SET_BIT (conflicts[OBJECT_CONFLICT_ID (C1)], \ |
53 | OBJECT_CONFLICT_ID (C2), \ |
54 | OBJECT_MIN (C1), OBJECT_MAX (C1))) |
55 | |
56 | |
57 | /* Record a conflict between objects OBJ1 and OBJ2. If necessary, |
58 | canonicalize the conflict by recording it for lower-order subobjects |
59 | of the corresponding allocnos. */ |
60 | static void |
61 | record_object_conflict (ira_object_t obj1, ira_object_t obj2) |
62 | { |
63 | ira_allocno_t a1 = OBJECT_ALLOCNO (obj1); |
64 | ira_allocno_t a2 = OBJECT_ALLOCNO (obj2); |
65 | int w1 = OBJECT_SUBWORD (obj1); |
66 | int w2 = OBJECT_SUBWORD (obj2); |
67 | int id1, id2; |
68 | |
69 | /* Canonicalize the conflict. If two identically-numbered words |
70 | conflict, always record this as a conflict between words 0. That |
71 | is the only information we need, and it is easier to test for if |
72 | it is collected in each allocno's lowest-order object. */ |
73 | if (w1 == w2 && w1 > 0) |
74 | { |
75 | obj1 = ALLOCNO_OBJECT (a1, 0); |
76 | obj2 = ALLOCNO_OBJECT (a2, 0); |
77 | } |
78 | id1 = OBJECT_CONFLICT_ID (obj1); |
79 | id2 = OBJECT_CONFLICT_ID (obj2); |
80 | |
81 | SET_MINMAX_SET_BIT (conflicts[id1], id2, OBJECT_MIN (obj1), |
82 | OBJECT_MAX (obj1)); |
83 | SET_MINMAX_SET_BIT (conflicts[id2], id1, OBJECT_MIN (obj2), |
84 | OBJECT_MAX (obj2)); |
85 | } |
86 | |
87 | /* Build allocno conflict table by processing allocno live ranges. |
88 | Return true if the table was built. The table is not built if it |
89 | is too big. */ |
90 | static bool |
91 | build_conflict_bit_table (void) |
92 | { |
93 | int i; |
94 | unsigned int j; |
95 | enum reg_class aclass; |
96 | int object_set_words, allocated_words_num, conflict_bit_vec_words_num; |
97 | live_range_t r; |
98 | ira_allocno_t allocno; |
99 | ira_allocno_iterator ai; |
100 | sparseset objects_live; |
101 | ira_object_t obj; |
102 | ira_allocno_object_iterator aoi; |
103 | |
104 | allocated_words_num = 0; |
105 | FOR_EACH_ALLOCNO (allocno, ai) |
106 | FOR_EACH_ALLOCNO_OBJECT (allocno, obj, aoi) |
107 | { |
108 | if (OBJECT_MAX (obj) < OBJECT_MIN (obj)) |
109 | continue; |
110 | conflict_bit_vec_words_num |
111 | = ((OBJECT_MAX (obj) - OBJECT_MIN (obj) + IRA_INT_BITS) |
112 | / IRA_INT_BITS); |
113 | allocated_words_num += conflict_bit_vec_words_num; |
114 | if ((uint64_t) allocated_words_num * sizeof (IRA_INT_TYPE) |
115 | > (uint64_t) param_ira_max_conflict_table_size * 1024 * 1024) |
116 | { |
117 | if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) |
118 | fprintf (stream: ira_dump_file, |
119 | format: "+++Conflict table will be too big(>%dMB) " |
120 | "-- don't use it\n" , |
121 | param_ira_max_conflict_table_size); |
122 | return false; |
123 | } |
124 | } |
125 | |
126 | conflicts = (IRA_INT_TYPE **) ira_allocate (sizeof (IRA_INT_TYPE *) |
127 | * ira_objects_num); |
128 | allocated_words_num = 0; |
129 | FOR_EACH_ALLOCNO (allocno, ai) |
130 | FOR_EACH_ALLOCNO_OBJECT (allocno, obj, aoi) |
131 | { |
132 | int id = OBJECT_CONFLICT_ID (obj); |
133 | if (OBJECT_MAX (obj) < OBJECT_MIN (obj)) |
134 | { |
135 | conflicts[id] = NULL; |
136 | continue; |
137 | } |
138 | conflict_bit_vec_words_num |
139 | = ((OBJECT_MAX (obj) - OBJECT_MIN (obj) + IRA_INT_BITS) |
140 | / IRA_INT_BITS); |
141 | allocated_words_num += conflict_bit_vec_words_num; |
142 | conflicts[id] |
143 | = (IRA_INT_TYPE *) ira_allocate (sizeof (IRA_INT_TYPE) |
144 | * conflict_bit_vec_words_num); |
145 | memset (s: conflicts[id], c: 0, |
146 | n: sizeof (IRA_INT_TYPE) * conflict_bit_vec_words_num); |
147 | } |
148 | |
149 | object_set_words = (ira_objects_num + IRA_INT_BITS - 1) / IRA_INT_BITS; |
150 | if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) |
151 | fprintf (stream: ira_dump_file, |
152 | format: "+++Allocating " HOST_SIZE_T_PRINT_UNSIGNED |
153 | " bytes for conflict table (uncompressed size " |
154 | HOST_SIZE_T_PRINT_UNSIGNED ")\n" , |
155 | (fmt_size_t) (sizeof (IRA_INT_TYPE) * allocated_words_num), |
156 | (fmt_size_t) (sizeof (IRA_INT_TYPE) * object_set_words |
157 | * ira_objects_num)); |
158 | |
159 | objects_live = sparseset_alloc (n_elms: ira_objects_num); |
160 | for (i = 0; i < ira_max_point; i++) |
161 | { |
162 | for (r = ira_start_point_ranges[i]; r != NULL; r = r->start_next) |
163 | { |
164 | ira_object_t obj = r->object; |
165 | ira_allocno_t allocno = OBJECT_ALLOCNO (obj); |
166 | int id = OBJECT_CONFLICT_ID (obj); |
167 | |
168 | gcc_assert (id < ira_objects_num); |
169 | |
170 | aclass = ALLOCNO_CLASS (allocno); |
171 | EXECUTE_IF_SET_IN_SPARSESET (objects_live, j) |
172 | { |
173 | ira_object_t live_obj = ira_object_id_map[j]; |
174 | ira_allocno_t live_a = OBJECT_ALLOCNO (live_obj); |
175 | enum reg_class live_aclass = ALLOCNO_CLASS (live_a); |
176 | |
177 | if (ira_reg_classes_intersect_p[aclass][live_aclass] |
178 | /* Don't set up conflict for the allocno with itself. */ |
179 | && live_a != allocno) |
180 | { |
181 | record_object_conflict (obj1: obj, obj2: live_obj); |
182 | } |
183 | } |
184 | sparseset_set_bit (s: objects_live, e: id); |
185 | } |
186 | |
187 | for (r = ira_finish_point_ranges[i]; r != NULL; r = r->finish_next) |
188 | sparseset_clear_bit (objects_live, OBJECT_CONFLICT_ID (r->object)); |
189 | } |
190 | sparseset_free (objects_live); |
191 | return true; |
192 | } |
193 | |
194 | /* Return true iff allocnos A1 and A2 cannot be allocated to the same |
195 | register due to conflicts. */ |
196 | |
197 | static bool |
198 | allocnos_conflict_for_copy_p (ira_allocno_t a1, ira_allocno_t a2) |
199 | { |
200 | /* Due to the fact that we canonicalize conflicts (see |
201 | record_object_conflict), we only need to test for conflicts of |
202 | the lowest order words. */ |
203 | ira_object_t obj1 = ALLOCNO_OBJECT (a1, 0); |
204 | ira_object_t obj2 = ALLOCNO_OBJECT (a2, 0); |
205 | |
206 | return OBJECTS_CONFLICT_P (obj1, obj2); |
207 | } |
208 | |
209 | /* Check that X is REG or SUBREG of REG. */ |
210 | #define REG_SUBREG_P(x) \ |
211 | (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (SUBREG_REG (x)))) |
212 | |
213 | /* Return X if X is a REG, otherwise it should be SUBREG of REG and |
214 | the function returns the reg in this case. *OFFSET will be set to |
215 | 0 in the first case or the regno offset in the first case. */ |
216 | static rtx |
217 | go_through_subreg (rtx x, int *offset) |
218 | { |
219 | rtx reg; |
220 | |
221 | *offset = 0; |
222 | if (REG_P (x)) |
223 | return x; |
224 | ira_assert (GET_CODE (x) == SUBREG); |
225 | reg = SUBREG_REG (x); |
226 | ira_assert (REG_P (reg)); |
227 | if (REGNO (reg) < FIRST_PSEUDO_REGISTER) |
228 | *offset = subreg_regno_offset (REGNO (reg), GET_MODE (reg), |
229 | SUBREG_BYTE (x), GET_MODE (x)); |
230 | else if (!can_div_trunc_p (SUBREG_BYTE (x), |
231 | REGMODE_NATURAL_SIZE (GET_MODE (x)), quotient: offset)) |
232 | /* Checked by validate_subreg. We must know at compile time which |
233 | inner hard registers are being accessed. */ |
234 | gcc_unreachable (); |
235 | return reg; |
236 | } |
237 | |
238 | /* Return the recomputed frequency for this shuffle copy or its similar |
239 | case, since it's not for a real move insn, make it smaller. */ |
240 | |
241 | static int |
242 | get_freq_for_shuffle_copy (int freq) |
243 | { |
244 | return freq < 8 ? 1 : freq / 8; |
245 | } |
246 | |
247 | /* Process registers REG1 and REG2 in move INSN with execution |
248 | frequency FREQ. The function also processes the registers in a |
249 | potential move insn (INSN == NULL in this case) with frequency |
250 | FREQ. The function can modify hard register costs of the |
251 | corresponding allocnos or create a copy involving the corresponding |
252 | allocnos. The function does nothing if the both registers are hard |
253 | registers. When nothing is changed, the function returns FALSE. |
254 | SINGLE_INPUT_OP_HAS_CSTR_P is only meaningful when constraint_p |
255 | is true, see function ira_get_dup_out_num for its meaning. */ |
256 | static bool |
257 | process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, rtx_insn *insn, |
258 | int freq, bool single_input_op_has_cstr_p = true) |
259 | { |
260 | int allocno_preferenced_hard_regno, index, offset1, offset2; |
261 | int cost, conflict_cost, move_cost; |
262 | bool only_regs_p; |
263 | ira_allocno_t a; |
264 | reg_class_t rclass, aclass; |
265 | machine_mode mode; |
266 | ira_copy_t cp; |
267 | |
268 | gcc_assert (REG_SUBREG_P (reg1) && REG_SUBREG_P (reg2)); |
269 | only_regs_p = REG_P (reg1) && REG_P (reg2); |
270 | reg1 = go_through_subreg (x: reg1, offset: &offset1); |
271 | reg2 = go_through_subreg (x: reg2, offset: &offset2); |
272 | /* Set up hard regno preferenced by allocno. If allocno gets the |
273 | hard regno the copy (or potential move) insn will be removed. */ |
274 | if (HARD_REGISTER_P (reg1)) |
275 | { |
276 | if (HARD_REGISTER_P (reg2)) |
277 | return false; |
278 | allocno_preferenced_hard_regno = REGNO (reg1) + offset1 - offset2; |
279 | a = ira_curr_regno_allocno_map[REGNO (reg2)]; |
280 | } |
281 | else if (HARD_REGISTER_P (reg2)) |
282 | { |
283 | allocno_preferenced_hard_regno = REGNO (reg2) + offset2 - offset1; |
284 | a = ira_curr_regno_allocno_map[REGNO (reg1)]; |
285 | } |
286 | else |
287 | { |
288 | ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)]; |
289 | ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)]; |
290 | |
291 | if (!allocnos_conflict_for_copy_p (a1, a2) |
292 | && offset1 == offset2 |
293 | && ordered_p (a: GET_MODE_PRECISION (ALLOCNO_MODE (a1)), |
294 | b: GET_MODE_PRECISION (ALLOCNO_MODE (a2)))) |
295 | { |
296 | cp = ira_add_allocno_copy (a1, a2, freq, constraint_p, insn, |
297 | ira_curr_loop_tree_node); |
298 | bitmap_set_bit (ira_curr_loop_tree_node->local_copies, cp->num); |
299 | return true; |
300 | } |
301 | else |
302 | return false; |
303 | } |
304 | |
305 | if (! IN_RANGE (allocno_preferenced_hard_regno, |
306 | 0, FIRST_PSEUDO_REGISTER - 1)) |
307 | /* Cannot be tied. */ |
308 | return false; |
309 | rclass = REGNO_REG_CLASS (allocno_preferenced_hard_regno); |
310 | mode = ALLOCNO_MODE (a); |
311 | aclass = ALLOCNO_CLASS (a); |
312 | if (only_regs_p && insn != NULL_RTX |
313 | && reg_class_size[rclass] <= ira_reg_class_max_nregs [rclass][mode]) |
314 | /* It is already taken into account in ira-costs.cc. */ |
315 | return false; |
316 | index = ira_class_hard_reg_index[aclass][allocno_preferenced_hard_regno]; |
317 | if (index < 0) |
318 | /* Cannot be tied. It is not in the allocno class. */ |
319 | return false; |
320 | ira_init_register_move_cost_if_necessary (mode); |
321 | if (HARD_REGISTER_P (reg1)) |
322 | move_cost = ira_register_move_cost[mode][aclass][rclass]; |
323 | else |
324 | move_cost = ira_register_move_cost[mode][rclass][aclass]; |
325 | |
326 | if (!single_input_op_has_cstr_p) |
327 | { |
328 | /* When this is a constraint copy and the matching constraint |
329 | doesn't only exist for this given operand but also for some |
330 | other operand(s), it means saving the possible move cost does |
331 | NOT need to require reg1 and reg2 to use the same hardware |
332 | register, so this hardware preference isn't required to be |
333 | fixed. To avoid it to over prefer this hardware register, |
334 | and over disparage this hardware register on conflicted |
335 | objects, we need some cost tweaking here, similar to what |
336 | we do for shuffle copy. */ |
337 | gcc_assert (constraint_p); |
338 | int reduced_freq = get_freq_for_shuffle_copy (freq); |
339 | if (HARD_REGISTER_P (reg1)) |
340 | /* For reg2 = opcode(reg1, reg3 ...), assume that reg3 is a |
341 | pseudo register which has matching constraint on reg2, |
342 | even if reg2 isn't assigned by reg1, it's still possible |
343 | not to have register moves if reg2 and reg3 use the same |
344 | hardware register. So to avoid the allocation to over |
345 | prefer reg1, we can just take it as a shuffle copy. */ |
346 | cost = conflict_cost = move_cost * reduced_freq; |
347 | else |
348 | { |
349 | /* For reg1 = opcode(reg2, reg3 ...), assume that reg3 is a |
350 | pseudo register which has matching constraint on reg2, |
351 | to save the register move, it's better to assign reg1 |
352 | to either of reg2 and reg3 (or one of other pseudos like |
353 | reg3), it's reasonable to use freq for the cost. But |
354 | for conflict_cost, since reg2 and reg3 conflicts with |
355 | each other, both of them has the chance to be assigned |
356 | by reg1, assume reg3 has one copy which also conflicts |
357 | with reg2, we shouldn't make it less preferred on reg1 |
358 | since reg3 has the same chance to be assigned by reg1. |
359 | So it adjusts the conflic_cost to make it same as what |
360 | we use for shuffle copy. */ |
361 | cost = move_cost * freq; |
362 | conflict_cost = move_cost * reduced_freq; |
363 | } |
364 | } |
365 | else |
366 | cost = conflict_cost = move_cost * freq; |
367 | |
368 | do |
369 | { |
370 | ira_allocate_and_set_costs |
371 | (vec: &ALLOCNO_HARD_REG_COSTS (a), aclass, |
372 | ALLOCNO_CLASS_COST (a)); |
373 | ira_allocate_and_set_costs |
374 | (vec: &ALLOCNO_CONFLICT_HARD_REG_COSTS (a), aclass, val: 0); |
375 | ALLOCNO_HARD_REG_COSTS (a)[index] -= cost; |
376 | ALLOCNO_CONFLICT_HARD_REG_COSTS (a)[index] -= conflict_cost; |
377 | if (ALLOCNO_HARD_REG_COSTS (a)[index] < ALLOCNO_CLASS_COST (a)) |
378 | ALLOCNO_CLASS_COST (a) = ALLOCNO_HARD_REG_COSTS (a)[index]; |
379 | ira_add_allocno_pref (a, allocno_preferenced_hard_regno, freq); |
380 | a = ira_parent_or_cap_allocno (a); |
381 | } |
382 | while (a != NULL); |
383 | return true; |
384 | } |
385 | |
386 | /* Return true if output operand OUTPUT and input operand INPUT of |
387 | INSN can use the same register class for at least one alternative. |
388 | INSN is already described in recog_data and recog_op_alt. */ |
389 | static bool |
390 | can_use_same_reg_p (rtx_insn *insn, int output, int input) |
391 | { |
392 | alternative_mask preferred = get_preferred_alternatives (insn); |
393 | for (int nalt = 0; nalt < recog_data.n_alternatives; nalt++) |
394 | { |
395 | if (!TEST_BIT (preferred, nalt)) |
396 | continue; |
397 | |
398 | const operand_alternative *op_alt |
399 | = &recog_op_alt[nalt * recog_data.n_operands]; |
400 | if (op_alt[input].matches == output) |
401 | return true; |
402 | |
403 | if (op_alt[output].earlyclobber) |
404 | continue; |
405 | |
406 | if (ira_reg_class_intersect[op_alt[input].cl][op_alt[output].cl] |
407 | != NO_REGS) |
408 | return true; |
409 | } |
410 | return false; |
411 | } |
412 | |
413 | /* Process all of the output registers of the current insn (INSN) which |
414 | are not bound (BOUND_P) and the input register REG (its operand number |
415 | OP_NUM) which dies in the insn as if there were a move insn between |
416 | them with frequency FREQ. */ |
417 | static void |
418 | process_reg_shuffles (rtx_insn *insn, rtx reg, int op_num, int freq, |
419 | bool *bound_p) |
420 | { |
421 | int i; |
422 | rtx another_reg; |
423 | |
424 | gcc_assert (REG_SUBREG_P (reg)); |
425 | for (i = 0; i < recog_data.n_operands; i++) |
426 | { |
427 | another_reg = recog_data.operand[i]; |
428 | |
429 | if (!REG_SUBREG_P (another_reg) || op_num == i |
430 | || recog_data.operand_type[i] != OP_OUT |
431 | || bound_p[i] |
432 | || (!can_use_same_reg_p (insn, output: i, input: op_num) |
433 | && (recog_data.constraints[op_num][0] != '%' |
434 | || !can_use_same_reg_p (insn, output: i, input: op_num + 1)) |
435 | && (op_num == 0 |
436 | || recog_data.constraints[op_num - 1][0] != '%' |
437 | || !can_use_same_reg_p (insn, output: i, input: op_num - 1)))) |
438 | continue; |
439 | |
440 | process_regs_for_copy (reg1: reg, reg2: another_reg, constraint_p: false, NULL, freq); |
441 | } |
442 | } |
443 | |
444 | /* Process INSN and create allocno copies if necessary. For example, |
445 | it might be because INSN is a pseudo-register move or INSN is two |
446 | operand insn. */ |
447 | static void |
448 | add_insn_allocno_copies (rtx_insn *insn) |
449 | { |
450 | rtx set, operand, dup; |
451 | bool bound_p[MAX_RECOG_OPERANDS]; |
452 | int i, n, freq; |
453 | alternative_mask alts; |
454 | |
455 | freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); |
456 | if (freq == 0) |
457 | freq = 1; |
458 | if ((set = single_set (insn)) != NULL_RTX |
459 | && REG_SUBREG_P (SET_DEST (set)) && REG_SUBREG_P (SET_SRC (set)) |
460 | && ! side_effects_p (set) |
461 | && find_reg_note (insn, REG_DEAD, |
462 | REG_P (SET_SRC (set)) |
463 | ? SET_SRC (set) |
464 | : SUBREG_REG (SET_SRC (set))) != NULL_RTX) |
465 | { |
466 | process_regs_for_copy (SET_SRC (set), SET_DEST (set), |
467 | constraint_p: false, insn, freq); |
468 | return; |
469 | } |
470 | /* Fast check of possibility of constraint or shuffle copies. If |
471 | there are no dead registers, there will be no such copies. */ |
472 | if (! find_reg_note (insn, REG_DEAD, NULL_RTX)) |
473 | return; |
474 | alts = ira_setup_alts (insn); |
475 | for (i = 0; i < recog_data.n_operands; i++) |
476 | bound_p[i] = false; |
477 | for (i = 0; i < recog_data.n_operands; i++) |
478 | { |
479 | operand = recog_data.operand[i]; |
480 | if (! REG_SUBREG_P (operand)) |
481 | continue; |
482 | bool single_input_op_has_cstr_p; |
483 | if ((n = ira_get_dup_out_num (i, alts, single_input_op_has_cstr_p)) >= 0) |
484 | { |
485 | bound_p[n] = true; |
486 | dup = recog_data.operand[n]; |
487 | if (REG_SUBREG_P (dup) |
488 | && find_reg_note (insn, REG_DEAD, |
489 | REG_P (operand) |
490 | ? operand |
491 | : SUBREG_REG (operand)) != NULL_RTX) |
492 | process_regs_for_copy (reg1: operand, reg2: dup, constraint_p: true, NULL, freq, |
493 | single_input_op_has_cstr_p); |
494 | } |
495 | } |
496 | for (i = 0; i < recog_data.n_operands; i++) |
497 | { |
498 | operand = recog_data.operand[i]; |
499 | if (REG_SUBREG_P (operand) |
500 | && find_reg_note (insn, REG_DEAD, |
501 | REG_P (operand) |
502 | ? operand : SUBREG_REG (operand)) != NULL_RTX) |
503 | { |
504 | /* If an operand dies, prefer its hard register for the output |
505 | operands by decreasing the hard register cost or creating |
506 | the corresponding allocno copies. The cost will not |
507 | correspond to a real move insn cost, so make the frequency |
508 | smaller. */ |
509 | int new_freq = get_freq_for_shuffle_copy (freq); |
510 | process_reg_shuffles (insn, reg: operand, op_num: i, freq: new_freq, bound_p); |
511 | } |
512 | } |
513 | } |
514 | |
515 | /* Add copies originated from BB given by LOOP_TREE_NODE. */ |
516 | static void |
517 | add_copies (ira_loop_tree_node_t loop_tree_node) |
518 | { |
519 | basic_block bb; |
520 | rtx_insn *insn; |
521 | |
522 | bb = loop_tree_node->bb; |
523 | if (bb == NULL) |
524 | return; |
525 | FOR_BB_INSNS (bb, insn) |
526 | if (NONDEBUG_INSN_P (insn)) |
527 | add_insn_allocno_copies (insn); |
528 | } |
529 | |
530 | /* Propagate copies the corresponding allocnos on upper loop tree |
531 | level. */ |
532 | static void |
533 | propagate_copies (void) |
534 | { |
535 | ira_copy_t cp; |
536 | ira_copy_iterator ci; |
537 | ira_allocno_t a1, a2, parent_a1, parent_a2; |
538 | |
539 | FOR_EACH_COPY (cp, ci) |
540 | { |
541 | a1 = cp->first; |
542 | a2 = cp->second; |
543 | if (ALLOCNO_LOOP_TREE_NODE (a1) == ira_loop_tree_root) |
544 | continue; |
545 | ira_assert ((ALLOCNO_LOOP_TREE_NODE (a2) != ira_loop_tree_root)); |
546 | parent_a1 = ira_parent_or_cap_allocno (a1); |
547 | parent_a2 = ira_parent_or_cap_allocno (a2); |
548 | ira_assert (parent_a1 != NULL && parent_a2 != NULL); |
549 | if (! allocnos_conflict_for_copy_p (a1: parent_a1, a2: parent_a2)) |
550 | ira_add_allocno_copy (parent_a1, parent_a2, cp->freq, |
551 | cp->constraint_p, cp->insn, cp->loop_tree_node); |
552 | } |
553 | } |
554 | |
555 | /* Array used to collect all conflict allocnos for given allocno. */ |
556 | static ira_object_t *collected_conflict_objects; |
557 | |
558 | /* Build conflict vectors or bit conflict vectors (whatever is more |
559 | profitable) for object OBJ from the conflict table. */ |
560 | static void |
561 | build_object_conflicts (ira_object_t obj) |
562 | { |
563 | int i, px, parent_num; |
564 | ira_allocno_t parent_a, another_parent_a; |
565 | ira_object_t parent_obj; |
566 | ira_allocno_t a = OBJECT_ALLOCNO (obj); |
567 | IRA_INT_TYPE *object_conflicts; |
568 | minmax_set_iterator asi; |
569 | int parent_min, parent_max ATTRIBUTE_UNUSED; |
570 | |
571 | object_conflicts = conflicts[OBJECT_CONFLICT_ID (obj)]; |
572 | px = 0; |
573 | FOR_EACH_BIT_IN_MINMAX_SET (object_conflicts, |
574 | OBJECT_MIN (obj), OBJECT_MAX (obj), i, asi) |
575 | { |
576 | ira_object_t another_obj = ira_object_id_map[i]; |
577 | ira_allocno_t another_a = OBJECT_ALLOCNO (obj); |
578 | |
579 | ira_assert (ira_reg_classes_intersect_p |
580 | [ALLOCNO_CLASS (a)][ALLOCNO_CLASS (another_a)]); |
581 | collected_conflict_objects[px++] = another_obj; |
582 | } |
583 | if (ira_conflict_vector_profitable_p (obj, px)) |
584 | { |
585 | ira_object_t *vec; |
586 | ira_allocate_conflict_vec (obj, px); |
587 | vec = OBJECT_CONFLICT_VEC (obj); |
588 | memcpy (dest: vec, src: collected_conflict_objects, n: sizeof (ira_object_t) * px); |
589 | vec[px] = NULL; |
590 | OBJECT_NUM_CONFLICTS (obj) = px; |
591 | } |
592 | else |
593 | { |
594 | int conflict_bit_vec_words_num; |
595 | |
596 | OBJECT_CONFLICT_ARRAY (obj) = object_conflicts; |
597 | if (OBJECT_MAX (obj) < OBJECT_MIN (obj)) |
598 | conflict_bit_vec_words_num = 0; |
599 | else |
600 | conflict_bit_vec_words_num |
601 | = ((OBJECT_MAX (obj) - OBJECT_MIN (obj) + IRA_INT_BITS) |
602 | / IRA_INT_BITS); |
603 | OBJECT_CONFLICT_ARRAY_SIZE (obj) |
604 | = conflict_bit_vec_words_num * sizeof (IRA_INT_TYPE); |
605 | } |
606 | |
607 | parent_a = ira_parent_or_cap_allocno (a); |
608 | if (parent_a == NULL) |
609 | return; |
610 | ira_assert (ALLOCNO_CLASS (a) == ALLOCNO_CLASS (parent_a)); |
611 | ira_assert (ALLOCNO_NUM_OBJECTS (a) == ALLOCNO_NUM_OBJECTS (parent_a)); |
612 | parent_obj = ALLOCNO_OBJECT (parent_a, OBJECT_SUBWORD (obj)); |
613 | parent_num = OBJECT_CONFLICT_ID (parent_obj); |
614 | parent_min = OBJECT_MIN (parent_obj); |
615 | parent_max = OBJECT_MAX (parent_obj); |
616 | FOR_EACH_BIT_IN_MINMAX_SET (object_conflicts, |
617 | OBJECT_MIN (obj), OBJECT_MAX (obj), i, asi) |
618 | { |
619 | ira_object_t another_obj = ira_object_id_map[i]; |
620 | ira_allocno_t another_a = OBJECT_ALLOCNO (another_obj); |
621 | int another_word = OBJECT_SUBWORD (another_obj); |
622 | |
623 | ira_assert (ira_reg_classes_intersect_p |
624 | [ALLOCNO_CLASS (a)][ALLOCNO_CLASS (another_a)]); |
625 | |
626 | another_parent_a = ira_parent_or_cap_allocno (another_a); |
627 | if (another_parent_a == NULL) |
628 | continue; |
629 | ira_assert (ALLOCNO_NUM (another_parent_a) >= 0); |
630 | ira_assert (ALLOCNO_CLASS (another_a) |
631 | == ALLOCNO_CLASS (another_parent_a)); |
632 | ira_assert (ALLOCNO_NUM_OBJECTS (another_a) |
633 | == ALLOCNO_NUM_OBJECTS (another_parent_a)); |
634 | SET_MINMAX_SET_BIT (conflicts[parent_num], |
635 | OBJECT_CONFLICT_ID (ALLOCNO_OBJECT (another_parent_a, |
636 | another_word)), |
637 | parent_min, parent_max); |
638 | } |
639 | } |
640 | |
641 | /* Build conflict vectors or bit conflict vectors (whatever is more |
642 | profitable) of all allocnos from the conflict table. */ |
643 | static void |
644 | build_conflicts (void) |
645 | { |
646 | int i; |
647 | ira_allocno_t a, cap; |
648 | |
649 | collected_conflict_objects |
650 | = (ira_object_t *) ira_allocate (sizeof (ira_object_t) |
651 | * ira_objects_num); |
652 | for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) |
653 | for (a = ira_regno_allocno_map[i]; |
654 | a != NULL; |
655 | a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) |
656 | { |
657 | int j, nregs = ALLOCNO_NUM_OBJECTS (a); |
658 | for (j = 0; j < nregs; j++) |
659 | { |
660 | ira_object_t obj = ALLOCNO_OBJECT (a, j); |
661 | build_object_conflicts (obj); |
662 | for (cap = ALLOCNO_CAP (a); cap != NULL; cap = ALLOCNO_CAP (cap)) |
663 | { |
664 | ira_object_t cap_obj = ALLOCNO_OBJECT (cap, j); |
665 | gcc_assert (ALLOCNO_NUM_OBJECTS (cap) == ALLOCNO_NUM_OBJECTS (a)); |
666 | build_object_conflicts (obj: cap_obj); |
667 | } |
668 | } |
669 | } |
670 | ira_free (addr: collected_conflict_objects); |
671 | } |
672 | |
673 | |
674 | |
675 | /* Print hard reg set SET with TITLE to FILE. */ |
676 | static void |
677 | print_hard_reg_set (FILE *file, const char *title, HARD_REG_SET set) |
678 | { |
679 | int i, start, end; |
680 | |
681 | fputs (s: title, stream: file); |
682 | for (start = end = -1, i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
683 | { |
684 | bool reg_included = TEST_HARD_REG_BIT (set, bit: i); |
685 | |
686 | if (reg_included) |
687 | { |
688 | if (start == -1) |
689 | start = i; |
690 | end = i; |
691 | } |
692 | if (start >= 0 && (!reg_included || i == FIRST_PSEUDO_REGISTER - 1)) |
693 | { |
694 | if (start == end) |
695 | fprintf (stream: file, format: " %d" , start); |
696 | else if (start == end + 1) |
697 | fprintf (stream: file, format: " %d %d" , start, end); |
698 | else |
699 | fprintf (stream: file, format: " %d-%d" , start, end); |
700 | start = -1; |
701 | } |
702 | } |
703 | putc (c: '\n', stream: file); |
704 | } |
705 | |
706 | static void |
707 | print_allocno_conflicts (FILE * file, bool reg_p, ira_allocno_t a) |
708 | { |
709 | HARD_REG_SET conflicting_hard_regs; |
710 | basic_block bb; |
711 | int n, i; |
712 | |
713 | if (reg_p) |
714 | fprintf (stream: file, format: ";; r%d" , ALLOCNO_REGNO (a)); |
715 | else |
716 | { |
717 | fprintf (stream: file, format: ";; a%d(r%d," , ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); |
718 | if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) |
719 | fprintf (stream: file, format: "b%d" , bb->index); |
720 | else |
721 | fprintf (stream: file, format: "l%d" , ALLOCNO_LOOP_TREE_NODE (a)->loop_num); |
722 | putc (c: ')', stream: file); |
723 | } |
724 | |
725 | fputs (s: " conflicts:" , stream: file); |
726 | n = ALLOCNO_NUM_OBJECTS (a); |
727 | for (i = 0; i < n; i++) |
728 | { |
729 | ira_object_t obj = ALLOCNO_OBJECT (a, i); |
730 | ira_object_t conflict_obj; |
731 | ira_object_conflict_iterator oci; |
732 | |
733 | if (OBJECT_CONFLICT_ARRAY (obj) == NULL) |
734 | { |
735 | fprintf (stream: file, format: "\n;; total conflict hard regs:\n" ); |
736 | fprintf (stream: file, format: ";; conflict hard regs:\n\n" ); |
737 | continue; |
738 | } |
739 | |
740 | if (n > 1) |
741 | fprintf (stream: file, format: "\n;; subobject %d:" , i); |
742 | FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci) |
743 | { |
744 | ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj); |
745 | if (reg_p) |
746 | fprintf (stream: file, format: " r%d," , ALLOCNO_REGNO (conflict_a)); |
747 | else |
748 | { |
749 | fprintf (stream: file, format: " a%d(r%d" , ALLOCNO_NUM (conflict_a), |
750 | ALLOCNO_REGNO (conflict_a)); |
751 | if (ALLOCNO_NUM_OBJECTS (conflict_a) > 1) |
752 | fprintf (stream: file, format: ",w%d" , OBJECT_SUBWORD (conflict_obj)); |
753 | if ((bb = ALLOCNO_LOOP_TREE_NODE (conflict_a)->bb) != NULL) |
754 | fprintf (stream: file, format: ",b%d" , bb->index); |
755 | else |
756 | fprintf (stream: file, format: ",l%d" , |
757 | ALLOCNO_LOOP_TREE_NODE (conflict_a)->loop_num); |
758 | putc (c: ')', stream: file); |
759 | } |
760 | } |
761 | conflicting_hard_regs = (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |
762 | & ~ira_no_alloc_regs |
763 | & reg_class_contents[ALLOCNO_CLASS (a)]); |
764 | print_hard_reg_set (file, title: "\n;; total conflict hard regs:" , |
765 | set: conflicting_hard_regs); |
766 | |
767 | conflicting_hard_regs = (OBJECT_CONFLICT_HARD_REGS (obj) |
768 | & ~ira_no_alloc_regs |
769 | & reg_class_contents[ALLOCNO_CLASS (a)]); |
770 | print_hard_reg_set (file, title: ";; conflict hard regs:" , |
771 | set: conflicting_hard_regs); |
772 | putc (c: '\n', stream: file); |
773 | } |
774 | |
775 | } |
776 | |
777 | /* Print information about allocno or only regno (if REG_P) conflicts |
778 | to FILE. */ |
779 | static void |
780 | print_conflicts (FILE *file, bool reg_p) |
781 | { |
782 | ira_allocno_t a; |
783 | ira_allocno_iterator ai; |
784 | |
785 | FOR_EACH_ALLOCNO (a, ai) |
786 | print_allocno_conflicts (file, reg_p, a); |
787 | putc (c: '\n', stream: file); |
788 | } |
789 | |
790 | /* Print information about allocno or only regno (if REG_P) conflicts |
791 | to stderr. */ |
792 | void |
793 | ira_debug_conflicts (bool reg_p) |
794 | { |
795 | print_conflicts (stderr, reg_p); |
796 | } |
797 | |
798 | |
799 | |
800 | /* Entry function which builds allocno conflicts and allocno copies |
801 | and accumulate some allocno info on upper level regions. */ |
802 | void |
803 | ira_build_conflicts (void) |
804 | { |
805 | enum reg_class base; |
806 | ira_allocno_t a; |
807 | ira_allocno_iterator ai; |
808 | HARD_REG_SET temp_hard_reg_set; |
809 | |
810 | if (ira_conflicts_p) |
811 | { |
812 | ira_conflicts_p = build_conflict_bit_table (); |
813 | if (ira_conflicts_p) |
814 | { |
815 | ira_object_t obj; |
816 | ira_object_iterator oi; |
817 | |
818 | build_conflicts (); |
819 | ira_traverse_loop_tree (true, ira_loop_tree_root, add_copies, NULL); |
820 | /* We need finished conflict table for the subsequent call. */ |
821 | if (flag_ira_region == IRA_REGION_ALL |
822 | || flag_ira_region == IRA_REGION_MIXED) |
823 | propagate_copies (); |
824 | |
825 | /* Now we can free memory for the conflict table (see function |
826 | build_object_conflicts for details). */ |
827 | FOR_EACH_OBJECT (obj, oi) |
828 | { |
829 | if (OBJECT_CONFLICT_ARRAY (obj) != conflicts[OBJECT_CONFLICT_ID (obj)]) |
830 | ira_free (addr: conflicts[OBJECT_CONFLICT_ID (obj)]); |
831 | } |
832 | ira_free (addr: conflicts); |
833 | } |
834 | } |
835 | base = base_reg_class (VOIDmode, ADDR_SPACE_GENERIC, outer_code: ADDRESS, index_code: SCRATCH); |
836 | if (! targetm.class_likely_spilled_p (base)) |
837 | CLEAR_HARD_REG_SET (set&: temp_hard_reg_set); |
838 | else |
839 | temp_hard_reg_set = reg_class_contents[base] & ~ira_no_alloc_regs; |
840 | FOR_EACH_ALLOCNO (a, ai) |
841 | { |
842 | int i, n = ALLOCNO_NUM_OBJECTS (a); |
843 | |
844 | for (i = 0; i < n; i++) |
845 | { |
846 | ira_object_t obj = ALLOCNO_OBJECT (a, i); |
847 | rtx allocno_reg = regno_reg_rtx [ALLOCNO_REGNO (a)]; |
848 | |
849 | /* For debugging purposes don't put user defined variables in |
850 | callee-clobbered registers. However, do allow parameters |
851 | in callee-clobbered registers to improve debugging. This |
852 | is a bit of a fragile hack. */ |
853 | if (optimize == 0 |
854 | && REG_USERVAR_P (allocno_reg) |
855 | && ! reg_is_parm_p (allocno_reg)) |
856 | { |
857 | HARD_REG_SET new_conflict_regs = crtl->abi->full_reg_clobbers (); |
858 | OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; |
859 | OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; |
860 | } |
861 | |
862 | if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) |
863 | { |
864 | HARD_REG_SET new_conflict_regs = ira_need_caller_save_regs (a); |
865 | if (flag_caller_saves) |
866 | new_conflict_regs &= (~savable_regs | temp_hard_reg_set); |
867 | OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; |
868 | OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; |
869 | } |
870 | |
871 | /* Now we deal with paradoxical subreg cases where certain registers |
872 | cannot be accessed in the widest mode. */ |
873 | machine_mode outer_mode = ALLOCNO_WMODE (a); |
874 | machine_mode inner_mode = ALLOCNO_MODE (a); |
875 | if (paradoxical_subreg_p (outermode: outer_mode, innermode: inner_mode)) |
876 | { |
877 | enum reg_class aclass = ALLOCNO_CLASS (a); |
878 | for (int j = ira_class_hard_regs_num[aclass] - 1; j >= 0; --j) |
879 | { |
880 | int inner_regno = ira_class_hard_regs[aclass][j]; |
881 | int outer_regno = simplify_subreg_regno (inner_regno, |
882 | inner_mode, 0, |
883 | outer_mode); |
884 | if (outer_regno < 0 |
885 | || !in_hard_reg_set_p (reg_class_contents[aclass], |
886 | mode: outer_mode, regno: outer_regno)) |
887 | { |
888 | SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), |
889 | bit: inner_regno); |
890 | SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), |
891 | bit: inner_regno); |
892 | } |
893 | } |
894 | } |
895 | } |
896 | } |
897 | if (optimize && ira_conflicts_p |
898 | && internal_flag_ira_verbose > 2 && ira_dump_file != NULL) |
899 | print_conflicts (file: ira_dump_file, reg_p: false); |
900 | } |
901 | |