1 | /* Scheduler hooks for IA-32 which implement CPU specific logic. |
2 | Copyright (C) 1988-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #define IN_TARGET_CODE 1 |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "rtl.h" |
27 | #include "tree.h" |
28 | #include "cfghooks.h" |
29 | #include "tm_p.h" |
30 | #include "target.h" |
31 | #include "insn-config.h" |
32 | #include "insn-attr.h" |
33 | #include "insn-opinit.h" |
34 | #include "recog.h" |
35 | |
36 | /* Return the maximum number of instructions a cpu can issue. */ |
37 | |
38 | int |
39 | ix86_issue_rate (void) |
40 | { |
41 | switch (ix86_tune) |
42 | { |
43 | case PROCESSOR_PENTIUM: |
44 | case PROCESSOR_LAKEMONT: |
45 | case PROCESSOR_BONNELL: |
46 | case PROCESSOR_SILVERMONT: |
47 | case PROCESSOR_KNL: |
48 | case PROCESSOR_KNM: |
49 | case PROCESSOR_INTEL: |
50 | case PROCESSOR_K6: |
51 | case PROCESSOR_BTVER2: |
52 | case PROCESSOR_PENTIUM4: |
53 | case PROCESSOR_NOCONA: |
54 | return 2; |
55 | |
56 | case PROCESSOR_PENTIUMPRO: |
57 | case PROCESSOR_ATHLON: |
58 | case PROCESSOR_K8: |
59 | case PROCESSOR_AMDFAM10: |
60 | case PROCESSOR_BTVER1: |
61 | case PROCESSOR_LUJIAZUI: |
62 | return 3; |
63 | |
64 | case PROCESSOR_BDVER1: |
65 | case PROCESSOR_BDVER2: |
66 | case PROCESSOR_BDVER3: |
67 | case PROCESSOR_BDVER4: |
68 | case PROCESSOR_ZNVER1: |
69 | case PROCESSOR_ZNVER2: |
70 | case PROCESSOR_ZNVER3: |
71 | case PROCESSOR_ZNVER4: |
72 | case PROCESSOR_ZNVER5: |
73 | case PROCESSOR_CORE2: |
74 | case PROCESSOR_NEHALEM: |
75 | case PROCESSOR_SANDYBRIDGE: |
76 | case PROCESSOR_HASWELL: |
77 | case PROCESSOR_TREMONT: |
78 | case PROCESSOR_SKYLAKE: |
79 | case PROCESSOR_SKYLAKE_AVX512: |
80 | case PROCESSOR_CASCADELAKE: |
81 | case PROCESSOR_CANNONLAKE: |
82 | case PROCESSOR_ALDERLAKE: |
83 | case PROCESSOR_YONGFENG: |
84 | case PROCESSOR_GENERIC: |
85 | return 4; |
86 | |
87 | case PROCESSOR_ICELAKE_CLIENT: |
88 | case PROCESSOR_ICELAKE_SERVER: |
89 | case PROCESSOR_TIGERLAKE: |
90 | case PROCESSOR_COOPERLAKE: |
91 | case PROCESSOR_ROCKETLAKE: |
92 | return 5; |
93 | |
94 | case PROCESSOR_SAPPHIRERAPIDS: |
95 | return 6; |
96 | |
97 | default: |
98 | return 1; |
99 | } |
100 | } |
101 | |
102 | /* Return true iff USE_INSN has a memory address with operands set by |
103 | SET_INSN. */ |
104 | |
105 | bool |
106 | ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn) |
107 | { |
108 | int i; |
109 | extract_insn_cached (use_insn); |
110 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
111 | if (MEM_P (recog_data.operand[i])) |
112 | { |
113 | rtx addr = XEXP (recog_data.operand[i], 0); |
114 | if (modified_in_p (addr, set_insn) != 0) |
115 | { |
116 | /* No AGI stall if SET_INSN is a push or pop and USE_INSN |
117 | has SP based memory (unless index reg is modified in a pop). */ |
118 | rtx set = single_set (insn: set_insn); |
119 | if (set |
120 | && (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))) |
121 | || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set))))) |
122 | { |
123 | struct ix86_address parts; |
124 | if (ix86_decompose_address (addr, &parts) |
125 | && parts.base == stack_pointer_rtx |
126 | && (parts.index == NULL_RTX |
127 | || MEM_P (SET_DEST (set)) |
128 | || !modified_in_p (parts.index, set_insn))) |
129 | return false; |
130 | } |
131 | return true; |
132 | } |
133 | return false; |
134 | } |
135 | return false; |
136 | } |
137 | |
138 | /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set |
139 | by DEP_INSN and nothing set by DEP_INSN. */ |
140 | |
141 | static bool |
142 | ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type) |
143 | { |
144 | rtx set, set2; |
145 | |
146 | /* Simplify the test for uninteresting insns. */ |
147 | if (insn_type != TYPE_SETCC |
148 | && insn_type != TYPE_ICMOV |
149 | && insn_type != TYPE_FCMOV |
150 | && insn_type != TYPE_IBR) |
151 | return false; |
152 | |
153 | if ((set = single_set (insn: dep_insn)) != 0) |
154 | { |
155 | set = SET_DEST (set); |
156 | set2 = NULL_RTX; |
157 | } |
158 | else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL |
159 | && XVECLEN (PATTERN (dep_insn), 0) == 2 |
160 | && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET |
161 | && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) |
162 | { |
163 | set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
164 | set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
165 | } |
166 | else |
167 | return false; |
168 | |
169 | if (!REG_P (set) || REGNO (set) != FLAGS_REG) |
170 | return false; |
171 | |
172 | /* This test is true if the dependent insn reads the flags but |
173 | not any other potentially set register. */ |
174 | if (!reg_overlap_mentioned_p (set, PATTERN (insn))) |
175 | return false; |
176 | |
177 | if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) |
178 | return false; |
179 | |
180 | return true; |
181 | } |
182 | |
183 | /* Helper function for exact_store_load_dependency. |
184 | Return true if addr is found in insn. */ |
185 | static bool |
186 | exact_dependency_1 (rtx addr, rtx insn) |
187 | { |
188 | enum rtx_code code; |
189 | const char *format_ptr; |
190 | int i, j; |
191 | |
192 | code = GET_CODE (insn); |
193 | switch (code) |
194 | { |
195 | case MEM: |
196 | if (rtx_equal_p (addr, insn)) |
197 | return true; |
198 | break; |
199 | case REG: |
200 | CASE_CONST_ANY: |
201 | case SYMBOL_REF: |
202 | case CODE_LABEL: |
203 | case PC: |
204 | case EXPR_LIST: |
205 | return false; |
206 | default: |
207 | break; |
208 | } |
209 | |
210 | format_ptr = GET_RTX_FORMAT (code); |
211 | for (i = 0; i < GET_RTX_LENGTH (code); i++) |
212 | { |
213 | switch (*format_ptr++) |
214 | { |
215 | case 'e': |
216 | if (exact_dependency_1 (addr, XEXP (insn, i))) |
217 | return true; |
218 | break; |
219 | case 'E': |
220 | for (j = 0; j < XVECLEN (insn, i); j++) |
221 | if (exact_dependency_1 (addr, XVECEXP (insn, i, j))) |
222 | return true; |
223 | break; |
224 | } |
225 | } |
226 | return false; |
227 | } |
228 | |
229 | /* Return true if there exists exact dependency for store & load, i.e. |
230 | the same memory address is used in them. */ |
231 | static bool |
232 | exact_store_load_dependency (rtx_insn *store, rtx_insn *load) |
233 | { |
234 | rtx set1, set2; |
235 | |
236 | set1 = single_set (insn: store); |
237 | if (!set1) |
238 | return false; |
239 | if (!MEM_P (SET_DEST (set1))) |
240 | return false; |
241 | set2 = single_set (insn: load); |
242 | if (!set2) |
243 | return false; |
244 | if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2))) |
245 | return true; |
246 | return false; |
247 | } |
248 | |
249 | |
250 | /* This function corrects the value of COST (latency) based on the relationship |
251 | between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength |
252 | DW. It should return the new value. |
253 | |
254 | On x86 CPUs this is most commonly used to model the fact that valus of |
255 | registers used to compute address of memory operand needs to be ready |
256 | earlier than values of registers used in the actual operation. */ |
257 | |
258 | int |
259 | ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, |
260 | unsigned int) |
261 | { |
262 | enum attr_type insn_type, dep_insn_type; |
263 | enum attr_memory memory; |
264 | rtx set, set2; |
265 | int dep_insn_code_number; |
266 | |
267 | /* Anti and output dependencies have zero cost on all CPUs. */ |
268 | if (dep_type != 0) |
269 | return 0; |
270 | |
271 | dep_insn_code_number = recog_memoized (insn: dep_insn); |
272 | |
273 | /* If we can't recognize the insns, we can't really do anything. */ |
274 | if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) |
275 | return cost; |
276 | |
277 | insn_type = get_attr_type (insn); |
278 | dep_insn_type = get_attr_type (dep_insn); |
279 | |
280 | switch (ix86_tune) |
281 | { |
282 | case PROCESSOR_PENTIUM: |
283 | case PROCESSOR_LAKEMONT: |
284 | /* Address Generation Interlock adds a cycle of latency. */ |
285 | if (insn_type == TYPE_LEA) |
286 | { |
287 | rtx addr = PATTERN (insn); |
288 | |
289 | if (GET_CODE (addr) == PARALLEL) |
290 | addr = XVECEXP (addr, 0, 0); |
291 | |
292 | gcc_assert (GET_CODE (addr) == SET); |
293 | |
294 | addr = SET_SRC (addr); |
295 | if (modified_in_p (addr, dep_insn)) |
296 | cost += 1; |
297 | } |
298 | else if (ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
299 | cost += 1; |
300 | |
301 | /* ??? Compares pair with jump/setcc. */ |
302 | if (ix86_flags_dependent (insn, dep_insn, insn_type)) |
303 | cost = 0; |
304 | |
305 | /* Floating point stores require value to be ready one cycle earlier. */ |
306 | if (insn_type == TYPE_FMOV |
307 | && get_attr_memory (insn) == MEMORY_STORE |
308 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
309 | cost += 1; |
310 | break; |
311 | |
312 | case PROCESSOR_PENTIUMPRO: |
313 | /* INT->FP conversion is expensive. */ |
314 | if (get_attr_fp_int_src (dep_insn)) |
315 | cost += 5; |
316 | |
317 | /* There is one cycle extra latency between an FP op and a store. */ |
318 | if (insn_type == TYPE_FMOV |
319 | && (set = single_set (insn: dep_insn)) != NULL_RTX |
320 | && (set2 = single_set (insn)) != NULL_RTX |
321 | && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) |
322 | && MEM_P (SET_DEST (set2))) |
323 | cost += 1; |
324 | |
325 | memory = get_attr_memory (insn); |
326 | |
327 | /* Show ability of reorder buffer to hide latency of load by executing |
328 | in parallel with previous instruction in case |
329 | previous instruction is not needed to compute the address. */ |
330 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
331 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
332 | { |
333 | /* Claim moves to take one cycle, as core can issue one load |
334 | at time and the next load can start cycle later. */ |
335 | if (dep_insn_type == TYPE_IMOV |
336 | || dep_insn_type == TYPE_FMOV) |
337 | cost = 1; |
338 | else if (cost > 1) |
339 | cost--; |
340 | } |
341 | break; |
342 | |
343 | case PROCESSOR_K6: |
344 | /* The esp dependency is resolved before |
345 | the instruction is really finished. */ |
346 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
347 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
348 | return 1; |
349 | |
350 | /* INT->FP conversion is expensive. */ |
351 | if (get_attr_fp_int_src (dep_insn)) |
352 | cost += 5; |
353 | |
354 | memory = get_attr_memory (insn); |
355 | |
356 | /* Show ability of reorder buffer to hide latency of load by executing |
357 | in parallel with previous instruction in case |
358 | previous instruction is not needed to compute the address. */ |
359 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
360 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
361 | { |
362 | /* Claim moves to take one cycle, as core can issue one load |
363 | at time and the next load can start cycle later. */ |
364 | if (dep_insn_type == TYPE_IMOV |
365 | || dep_insn_type == TYPE_FMOV) |
366 | cost = 1; |
367 | else if (cost > 2) |
368 | cost -= 2; |
369 | else |
370 | cost = 1; |
371 | } |
372 | break; |
373 | |
374 | case PROCESSOR_AMDFAM10: |
375 | case PROCESSOR_BDVER1: |
376 | case PROCESSOR_BDVER2: |
377 | case PROCESSOR_BDVER3: |
378 | case PROCESSOR_BDVER4: |
379 | case PROCESSOR_BTVER1: |
380 | case PROCESSOR_BTVER2: |
381 | /* Stack engine allows to execute push&pop instructions in parall. */ |
382 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
383 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
384 | return 0; |
385 | /* FALLTHRU */ |
386 | |
387 | case PROCESSOR_ATHLON: |
388 | case PROCESSOR_K8: |
389 | memory = get_attr_memory (insn); |
390 | |
391 | /* Show ability of reorder buffer to hide latency of load by executing |
392 | in parallel with previous instruction in case |
393 | previous instruction is not needed to compute the address. */ |
394 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
395 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
396 | { |
397 | enum attr_unit unit = get_attr_unit (insn); |
398 | int loadcost = 3; |
399 | |
400 | /* Because of the difference between the length of integer and |
401 | floating unit pipeline preparation stages, the memory operands |
402 | for floating point are cheaper. |
403 | |
404 | ??? For Athlon it the difference is most probably 2. */ |
405 | if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) |
406 | loadcost = 3; |
407 | else |
408 | loadcost = TARGET_CPU_P (ATHLON) ? 2 : 0; |
409 | |
410 | if (cost >= loadcost) |
411 | cost -= loadcost; |
412 | else |
413 | cost = 0; |
414 | } |
415 | break; |
416 | |
417 | case PROCESSOR_ZNVER1: |
418 | case PROCESSOR_ZNVER2: |
419 | case PROCESSOR_ZNVER3: |
420 | case PROCESSOR_ZNVER4: |
421 | case PROCESSOR_ZNVER5: |
422 | /* Stack engine allows to execute push&pop instructions in parall. */ |
423 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
424 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
425 | return 0; |
426 | |
427 | memory = get_attr_memory (insn); |
428 | |
429 | /* Show ability of reorder buffer to hide latency of load by executing |
430 | in parallel with previous instruction in case |
431 | previous instruction is not needed to compute the address. */ |
432 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
433 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
434 | { |
435 | enum attr_unit unit = get_attr_unit (insn); |
436 | int loadcost; |
437 | |
438 | if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) |
439 | loadcost = 4; |
440 | else |
441 | loadcost = 7; |
442 | |
443 | if (cost >= loadcost) |
444 | cost -= loadcost; |
445 | else |
446 | cost = 0; |
447 | } |
448 | break; |
449 | |
450 | case PROCESSOR_YONGFENG: |
451 | /* Stack engine allows to execute push&pop instructions in parallel. */ |
452 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
453 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
454 | return 0; |
455 | /* FALLTHRU */ |
456 | |
457 | case PROCESSOR_LUJIAZUI: |
458 | memory = get_attr_memory (insn); |
459 | |
460 | /* Show ability of reorder buffer to hide latency of load by executing |
461 | in parallel with previous instruction in case |
462 | previous instruction is not needed to compute the address. */ |
463 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
464 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
465 | { |
466 | int loadcost = 4; |
467 | |
468 | if (cost >= loadcost) |
469 | cost -= loadcost; |
470 | else |
471 | cost = 0; |
472 | } |
473 | break; |
474 | |
475 | case PROCESSOR_CORE2: |
476 | case PROCESSOR_NEHALEM: |
477 | case PROCESSOR_SANDYBRIDGE: |
478 | case PROCESSOR_HASWELL: |
479 | case PROCESSOR_TREMONT: |
480 | case PROCESSOR_ALDERLAKE: |
481 | case PROCESSOR_GENERIC: |
482 | /* Stack engine allows to execute push&pop instructions in parall. */ |
483 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
484 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
485 | return 0; |
486 | |
487 | memory = get_attr_memory (insn); |
488 | |
489 | /* Show ability of reorder buffer to hide latency of load by executing |
490 | in parallel with previous instruction in case |
491 | previous instruction is not needed to compute the address. */ |
492 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
493 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
494 | { |
495 | if (cost >= 4) |
496 | cost -= 4; |
497 | else |
498 | cost = 0; |
499 | } |
500 | break; |
501 | |
502 | case PROCESSOR_SILVERMONT: |
503 | case PROCESSOR_KNL: |
504 | case PROCESSOR_KNM: |
505 | case PROCESSOR_INTEL: |
506 | if (!reload_completed) |
507 | return cost; |
508 | |
509 | /* Increase cost of integer loads. */ |
510 | memory = get_attr_memory (dep_insn); |
511 | if (memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
512 | { |
513 | enum attr_unit unit = get_attr_unit (dep_insn); |
514 | if (unit == UNIT_INTEGER && cost == 1) |
515 | { |
516 | if (memory == MEMORY_LOAD) |
517 | cost = 3; |
518 | else |
519 | { |
520 | /* Increase cost of ld/st for short int types only |
521 | because of store forwarding issue. */ |
522 | rtx set = single_set (insn: dep_insn); |
523 | if (set && (GET_MODE (SET_DEST (set)) == QImode |
524 | || GET_MODE (SET_DEST (set)) == HImode)) |
525 | { |
526 | /* Increase cost of store/load insn if exact |
527 | dependence exists and it is load insn. */ |
528 | enum attr_memory insn_memory = get_attr_memory (insn); |
529 | if (insn_memory == MEMORY_LOAD |
530 | && exact_store_load_dependency (store: dep_insn, load: insn)) |
531 | cost = 3; |
532 | } |
533 | } |
534 | } |
535 | } |
536 | |
537 | default: |
538 | break; |
539 | } |
540 | |
541 | return cost; |
542 | } |
543 | |
544 | /* How many alternative schedules to try. This should be as wide as the |
545 | scheduling freedom in the DFA, but no wider. Making this value too |
546 | large results extra work for the scheduler. */ |
547 | |
548 | int |
549 | ia32_multipass_dfa_lookahead (void) |
550 | { |
551 | /* Generally, we want haifa-sched:max_issue() to look ahead as far |
552 | as many instructions can be executed on a cycle, i.e., |
553 | issue_rate. */ |
554 | if (reload_completed) |
555 | return ix86_issue_rate (); |
556 | /* Don't use lookahead for pre-reload schedule to save compile time. */ |
557 | return 0; |
558 | } |
559 | |
560 | /* Return true if target platform supports macro-fusion. */ |
561 | |
562 | bool |
563 | ix86_macro_fusion_p () |
564 | { |
565 | return TARGET_FUSE_CMP_AND_BRANCH; |
566 | } |
567 | |
568 | /* Check whether current microarchitecture support macro fusion |
569 | for insn pair "CONDGEN + CONDJMP". Refer to |
570 | "Intel Architectures Optimization Reference Manual". */ |
571 | |
572 | bool |
573 | ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp) |
574 | { |
575 | rtx src, dest; |
576 | enum rtx_code ccode; |
577 | rtx compare_set = NULL_RTX, test_if, cond; |
578 | rtx alu_set = NULL_RTX, addr = NULL_RTX; |
579 | enum attr_type condgen_type; |
580 | |
581 | if (!any_condjump_p (condjmp)) |
582 | return false; |
583 | |
584 | unsigned int condreg1, condreg2; |
585 | rtx cc_reg_1; |
586 | targetm.fixed_condition_code_regs (&condreg1, &condreg2); |
587 | cc_reg_1 = gen_rtx_REG (CCmode, condreg1); |
588 | if (!reg_referenced_p (cc_reg_1, PATTERN (insn: condjmp)) |
589 | || !condgen |
590 | || !modified_in_p (cc_reg_1, condgen)) |
591 | return false; |
592 | |
593 | condgen_type = get_attr_type (condgen); |
594 | if (condgen_type == TYPE_MULTI |
595 | && INSN_CODE (condgen) == code_for_stack_protect_test_1 (arg0: ptr_mode) |
596 | && TARGET_FUSE_ALU_AND_BRANCH) |
597 | { |
598 | /* stack_protect_test_<mode> ends with a sub, which subtracts |
599 | a non-rip special memory operand from a GPR. */ |
600 | src = NULL_RTX; |
601 | alu_set = XVECEXP (PATTERN (condgen), 0, 1); |
602 | goto handle_stack_protect_test; |
603 | } |
604 | else if (condgen_type != TYPE_TEST |
605 | && condgen_type != TYPE_ICMP |
606 | && condgen_type != TYPE_INCDEC |
607 | && condgen_type != TYPE_ALU) |
608 | return false; |
609 | |
610 | compare_set = single_set (insn: condgen); |
611 | if (compare_set == NULL_RTX && !TARGET_FUSE_ALU_AND_BRANCH) |
612 | return false; |
613 | |
614 | if (compare_set == NULL_RTX) |
615 | { |
616 | int i; |
617 | rtx pat = PATTERN (insn: condgen); |
618 | for (i = 0; i < XVECLEN (pat, 0); i++) |
619 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) |
620 | { |
621 | rtx set_src = SET_SRC (XVECEXP (pat, 0, i)); |
622 | if (GET_CODE (set_src) == COMPARE) |
623 | compare_set = XVECEXP (pat, 0, i); |
624 | else |
625 | alu_set = XVECEXP (pat, 0, i); |
626 | } |
627 | } |
628 | if (compare_set == NULL_RTX) |
629 | return false; |
630 | src = SET_SRC (compare_set); |
631 | if (GET_CODE (src) != COMPARE) |
632 | return false; |
633 | |
634 | /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not |
635 | supported. */ |
636 | if ((MEM_P (XEXP (src, 0)) && CONST_INT_P (XEXP (src, 1))) |
637 | || (MEM_P (XEXP (src, 1)) && CONST_INT_P (XEXP (src, 0)))) |
638 | return false; |
639 | |
640 | /* No fusion for RIP-relative address. */ |
641 | if (MEM_P (XEXP (src, 0))) |
642 | addr = XEXP (XEXP (src, 0), 0); |
643 | else if (MEM_P (XEXP (src, 1))) |
644 | addr = XEXP (XEXP (src, 1), 0); |
645 | |
646 | if (addr) |
647 | { |
648 | ix86_address parts; |
649 | int ok = ix86_decompose_address (addr, &parts); |
650 | gcc_assert (ok); |
651 | |
652 | if (ix86_rip_relative_addr_p (parts: &parts)) |
653 | return false; |
654 | } |
655 | |
656 | handle_stack_protect_test: |
657 | test_if = SET_SRC (pc_set (condjmp)); |
658 | cond = XEXP (test_if, 0); |
659 | ccode = GET_CODE (cond); |
660 | /* Check whether conditional jump use Sign or Overflow Flags. */ |
661 | if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS |
662 | && (ccode == GE || ccode == GT || ccode == LE || ccode == LT)) |
663 | return false; |
664 | |
665 | /* Return true for TYPE_TEST and TYPE_ICMP. */ |
666 | if (condgen_type == TYPE_TEST || condgen_type == TYPE_ICMP) |
667 | return true; |
668 | |
669 | /* The following is the case that macro-fusion for alu + jmp. */ |
670 | if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set) |
671 | return false; |
672 | |
673 | /* No fusion for alu op with memory destination operand. */ |
674 | dest = SET_DEST (alu_set); |
675 | if (MEM_P (dest)) |
676 | return false; |
677 | |
678 | /* Macro-fusion for inc/dec + unsigned conditional jump is not |
679 | supported. */ |
680 | if (condgen_type == TYPE_INCDEC |
681 | && (ccode == GEU || ccode == GTU || ccode == LEU || ccode == LTU)) |
682 | return false; |
683 | |
684 | return true; |
685 | } |
686 | |
687 | |