1 | /* Subroutines used for code generation on IA-32. |
2 | Copyright (C) 1988-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #define INCLUDE_STRING |
21 | #define IN_TARGET_CODE 1 |
22 | |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "memmodel.h" |
30 | #include "gimple.h" |
31 | #include "cfghooks.h" |
32 | #include "cfgloop.h" |
33 | #include "df.h" |
34 | #include "tm_p.h" |
35 | #include "stringpool.h" |
36 | #include "expmed.h" |
37 | #include "optabs.h" |
38 | #include "regs.h" |
39 | #include "emit-rtl.h" |
40 | #include "recog.h" |
41 | #include "cgraph.h" |
42 | #include "diagnostic.h" |
43 | #include "cfgbuild.h" |
44 | #include "alias.h" |
45 | #include "fold-const.h" |
46 | #include "attribs.h" |
47 | #include "calls.h" |
48 | #include "stor-layout.h" |
49 | #include "varasm.h" |
50 | #include "output.h" |
51 | #include "insn-attr.h" |
52 | #include "flags.h" |
53 | #include "except.h" |
54 | #include "explow.h" |
55 | #include "expr.h" |
56 | #include "cfgrtl.h" |
57 | #include "common/common-target.h" |
58 | #include "langhooks.h" |
59 | #include "reload.h" |
60 | #include "gimplify.h" |
61 | #include "dwarf2.h" |
62 | #include "tm-constrs.h" |
63 | #include "cselib.h" |
64 | #include "sched-int.h" |
65 | #include "opts.h" |
66 | #include "tree-pass.h" |
67 | #include "context.h" |
68 | #include "pass_manager.h" |
69 | #include "target-globals.h" |
70 | #include "gimple-iterator.h" |
71 | #include "gimple-fold.h" |
72 | #include "tree-vectorizer.h" |
73 | #include "shrink-wrap.h" |
74 | #include "builtins.h" |
75 | #include "rtl-iter.h" |
76 | #include "tree-iterator.h" |
77 | #include "dbgcnt.h" |
78 | #include "case-cfn-macros.h" |
79 | #include "dojump.h" |
80 | #include "fold-const-call.h" |
81 | #include "tree-vrp.h" |
82 | #include "tree-ssanames.h" |
83 | #include "selftest.h" |
84 | #include "selftest-rtl.h" |
85 | #include "print-rtl.h" |
86 | #include "intl.h" |
87 | #include "ifcvt.h" |
88 | #include "symbol-summary.h" |
89 | #include "sreal.h" |
90 | #include "ipa-cp.h" |
91 | #include "ipa-prop.h" |
92 | #include "ipa-fnsummary.h" |
93 | #include "wide-int-bitmask.h" |
94 | #include "tree-vector-builder.h" |
95 | #include "debug.h" |
96 | #include "dwarf2out.h" |
97 | #include "i386-options.h" |
98 | #include "i386-builtins.h" |
99 | #include "i386-expand.h" |
100 | #include "i386-features.h" |
101 | #include "function-abi.h" |
102 | #include "rtl-error.h" |
103 | |
104 | /* This file should be included last. */ |
105 | #include "target-def.h" |
106 | |
107 | static rtx legitimize_dllimport_symbol (rtx, bool); |
108 | static rtx legitimize_pe_coff_extern_decl (rtx, bool); |
109 | static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
110 | static void ix86_emit_restore_reg_using_pop (rtx, bool = false); |
111 | |
112 | |
113 | #ifndef CHECK_STACK_LIMIT |
114 | #define CHECK_STACK_LIMIT (-1) |
115 | #endif |
116 | |
117 | /* Return index of given mode in mult and division cost tables. */ |
118 | #define MODE_INDEX(mode) \ |
119 | ((mode) == QImode ? 0 \ |
120 | : (mode) == HImode ? 1 \ |
121 | : (mode) == SImode ? 2 \ |
122 | : (mode) == DImode ? 3 \ |
123 | : 4) |
124 | |
125 | |
126 | /* Set by -mtune. */ |
127 | const struct processor_costs *ix86_tune_cost = NULL; |
128 | |
129 | /* Set by -mtune or -Os. */ |
130 | const struct processor_costs *ix86_cost = NULL; |
131 | |
132 | /* In case the average insn count for single function invocation is |
133 | lower than this constant, emit fast (but longer) prologue and |
134 | epilogue code. */ |
135 | #define FAST_PROLOGUE_INSN_COUNT 20 |
136 | |
137 | /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
138 | static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
139 | static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
140 | static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
141 | |
142 | /* Array of the smallest class containing reg number REGNO, indexed by |
143 | REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
144 | |
145 | enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
146 | { |
147 | /* ax, dx, cx, bx */ |
148 | AREG, DREG, CREG, BREG, |
149 | /* si, di, bp, sp */ |
150 | SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
151 | /* FP registers */ |
152 | FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
153 | FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
154 | /* arg pointer, flags, fpsr, frame */ |
155 | NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
156 | /* SSE registers */ |
157 | SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, |
158 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
159 | /* MMX registers */ |
160 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
161 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
162 | /* REX registers */ |
163 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
164 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
165 | /* SSE REX registers */ |
166 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
167 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
168 | /* AVX-512 SSE registers */ |
169 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
170 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
171 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
172 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
173 | /* Mask registers. */ |
174 | ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
175 | MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
176 | /* REX2 registers */ |
177 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
178 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
179 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
180 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
181 | }; |
182 | |
183 | /* The "default" register map used in 32bit mode. */ |
184 | |
185 | int const debugger_register_map[FIRST_PSEUDO_REGISTER] = |
186 | { |
187 | /* general regs */ |
188 | 0, 2, 1, 3, 6, 7, 4, 5, |
189 | /* fp regs */ |
190 | 12, 13, 14, 15, 16, 17, 18, 19, |
191 | /* arg, flags, fpsr, frame */ |
192 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
193 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
194 | /* SSE */ |
195 | 21, 22, 23, 24, 25, 26, 27, 28, |
196 | /* MMX */ |
197 | 29, 30, 31, 32, 33, 34, 35, 36, |
198 | /* extended integer registers */ |
199 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
200 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
201 | /* extended sse registers */ |
202 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
203 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
204 | /* AVX-512 registers 16-23 */ |
205 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
206 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
207 | /* AVX-512 registers 24-31 */ |
208 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
209 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
210 | /* Mask registers */ |
211 | 93, 94, 95, 96, 97, 98, 99, 100 |
212 | }; |
213 | |
214 | /* The "default" register map used in 64bit mode. */ |
215 | |
216 | int const debugger64_register_map[FIRST_PSEUDO_REGISTER] = |
217 | { |
218 | /* general regs */ |
219 | 0, 1, 2, 3, 4, 5, 6, 7, |
220 | /* fp regs */ |
221 | 33, 34, 35, 36, 37, 38, 39, 40, |
222 | /* arg, flags, fpsr, frame */ |
223 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
224 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
225 | /* SSE */ |
226 | 17, 18, 19, 20, 21, 22, 23, 24, |
227 | /* MMX */ |
228 | 41, 42, 43, 44, 45, 46, 47, 48, |
229 | /* extended integer registers */ |
230 | 8, 9, 10, 11, 12, 13, 14, 15, |
231 | /* extended SSE registers */ |
232 | 25, 26, 27, 28, 29, 30, 31, 32, |
233 | /* AVX-512 registers 16-23 */ |
234 | 67, 68, 69, 70, 71, 72, 73, 74, |
235 | /* AVX-512 registers 24-31 */ |
236 | 75, 76, 77, 78, 79, 80, 81, 82, |
237 | /* Mask registers */ |
238 | 118, 119, 120, 121, 122, 123, 124, 125, |
239 | /* rex2 extend interger registers */ |
240 | 130, 131, 132, 133, 134, 135, 136, 137, |
241 | 138, 139, 140, 141, 142, 143, 144, 145 |
242 | }; |
243 | |
244 | /* Define the register numbers to be used in Dwarf debugging information. |
245 | The SVR4 reference port C compiler uses the following register numbers |
246 | in its Dwarf output code: |
247 | 0 for %eax (gcc regno = 0) |
248 | 1 for %ecx (gcc regno = 2) |
249 | 2 for %edx (gcc regno = 1) |
250 | 3 for %ebx (gcc regno = 3) |
251 | 4 for %esp (gcc regno = 7) |
252 | 5 for %ebp (gcc regno = 6) |
253 | 6 for %esi (gcc regno = 4) |
254 | 7 for %edi (gcc regno = 5) |
255 | The following three DWARF register numbers are never generated by |
256 | the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
257 | believed these numbers have these meanings. |
258 | 8 for %eip (no gcc equivalent) |
259 | 9 for %eflags (gcc regno = 17) |
260 | 10 for %trapno (no gcc equivalent) |
261 | It is not at all clear how we should number the FP stack registers |
262 | for the x86 architecture. If the version of SDB on x86/svr4 were |
263 | a bit less brain dead with respect to floating-point then we would |
264 | have a precedent to follow with respect to DWARF register numbers |
265 | for x86 FP registers, but the SDB on x86/svr4 was so completely |
266 | broken with respect to FP registers that it is hardly worth thinking |
267 | of it as something to strive for compatibility with. |
268 | The version of x86/svr4 SDB I had does (partially) |
269 | seem to believe that DWARF register number 11 is associated with |
270 | the x86 register %st(0), but that's about all. Higher DWARF |
271 | register numbers don't seem to be associated with anything in |
272 | particular, and even for DWARF regno 11, SDB only seemed to under- |
273 | stand that it should say that a variable lives in %st(0) (when |
274 | asked via an `=' command) if we said it was in DWARF regno 11, |
275 | but SDB still printed garbage when asked for the value of the |
276 | variable in question (via a `/' command). |
277 | (Also note that the labels SDB printed for various FP stack regs |
278 | when doing an `x' command were all wrong.) |
279 | Note that these problems generally don't affect the native SVR4 |
280 | C compiler because it doesn't allow the use of -O with -g and |
281 | because when it is *not* optimizing, it allocates a memory |
282 | location for each floating-point variable, and the memory |
283 | location is what gets described in the DWARF AT_location |
284 | attribute for the variable in question. |
285 | Regardless of the severe mental illness of the x86/svr4 SDB, we |
286 | do something sensible here and we use the following DWARF |
287 | register numbers. Note that these are all stack-top-relative |
288 | numbers. |
289 | 11 for %st(0) (gcc regno = 8) |
290 | 12 for %st(1) (gcc regno = 9) |
291 | 13 for %st(2) (gcc regno = 10) |
292 | 14 for %st(3) (gcc regno = 11) |
293 | 15 for %st(4) (gcc regno = 12) |
294 | 16 for %st(5) (gcc regno = 13) |
295 | 17 for %st(6) (gcc regno = 14) |
296 | 18 for %st(7) (gcc regno = 15) |
297 | */ |
298 | int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] = |
299 | { |
300 | /* general regs */ |
301 | 0, 2, 1, 3, 6, 7, 5, 4, |
302 | /* fp regs */ |
303 | 11, 12, 13, 14, 15, 16, 17, 18, |
304 | /* arg, flags, fpsr, frame */ |
305 | IGNORED_DWARF_REGNUM, 9, |
306 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
307 | /* SSE registers */ |
308 | 21, 22, 23, 24, 25, 26, 27, 28, |
309 | /* MMX registers */ |
310 | 29, 30, 31, 32, 33, 34, 35, 36, |
311 | /* extended integer registers */ |
312 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
313 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
314 | /* extended sse registers */ |
315 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
316 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
317 | /* AVX-512 registers 16-23 */ |
318 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
319 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
320 | /* AVX-512 registers 24-31 */ |
321 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
322 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
323 | /* Mask registers */ |
324 | 93, 94, 95, 96, 97, 98, 99, 100 |
325 | }; |
326 | |
327 | /* Define parameter passing and return registers. */ |
328 | |
329 | static int const x86_64_int_parameter_registers[6] = |
330 | { |
331 | DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
332 | }; |
333 | |
334 | static int const x86_64_ms_abi_int_parameter_registers[4] = |
335 | { |
336 | CX_REG, DX_REG, R8_REG, R9_REG |
337 | }; |
338 | |
339 | static int const x86_64_int_return_registers[4] = |
340 | { |
341 | AX_REG, DX_REG, DI_REG, SI_REG |
342 | }; |
343 | |
344 | /* Define the structure for the machine field in struct function. */ |
345 | |
346 | struct GTY(()) stack_local_entry { |
347 | unsigned short mode; |
348 | unsigned short n; |
349 | rtx rtl; |
350 | struct stack_local_entry *next; |
351 | }; |
352 | |
353 | /* Which cpu are we scheduling for. */ |
354 | enum attr_cpu ix86_schedule; |
355 | |
356 | /* Which cpu are we optimizing for. */ |
357 | enum processor_type ix86_tune; |
358 | |
359 | /* Which instruction set architecture to use. */ |
360 | enum processor_type ix86_arch; |
361 | |
362 | /* True if processor has SSE prefetch instruction. */ |
363 | unsigned char ix86_prefetch_sse; |
364 | |
365 | /* Preferred alignment for stack boundary in bits. */ |
366 | unsigned int ix86_preferred_stack_boundary; |
367 | |
368 | /* Alignment for incoming stack boundary in bits specified at |
369 | command line. */ |
370 | unsigned int ix86_user_incoming_stack_boundary; |
371 | |
372 | /* Default alignment for incoming stack boundary in bits. */ |
373 | unsigned int ix86_default_incoming_stack_boundary; |
374 | |
375 | /* Alignment for incoming stack boundary in bits. */ |
376 | unsigned int ix86_incoming_stack_boundary; |
377 | |
378 | /* True if there is no direct access to extern symbols. */ |
379 | bool ix86_has_no_direct_extern_access; |
380 | |
381 | /* Calling abi specific va_list type nodes. */ |
382 | tree sysv_va_list_type_node; |
383 | tree ms_va_list_type_node; |
384 | |
385 | /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
386 | char internal_label_prefix[16]; |
387 | int internal_label_prefix_len; |
388 | |
389 | /* Fence to use after loop using movnt. */ |
390 | tree x86_mfence; |
391 | |
392 | /* Register class used for passing given 64bit part of the argument. |
393 | These represent classes as documented by the PS ABI, with the exception |
394 | of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
395 | use SF or DFmode move instead of DImode to avoid reformatting penalties. |
396 | |
397 | Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
398 | whenever possible (upper half does contain padding). */ |
399 | enum x86_64_reg_class |
400 | { |
401 | X86_64_NO_CLASS, |
402 | X86_64_INTEGER_CLASS, |
403 | X86_64_INTEGERSI_CLASS, |
404 | X86_64_SSE_CLASS, |
405 | X86_64_SSEHF_CLASS, |
406 | X86_64_SSESF_CLASS, |
407 | X86_64_SSEDF_CLASS, |
408 | X86_64_SSEUP_CLASS, |
409 | X86_64_X87_CLASS, |
410 | X86_64_X87UP_CLASS, |
411 | X86_64_COMPLEX_X87_CLASS, |
412 | X86_64_MEMORY_CLASS |
413 | }; |
414 | |
415 | #define MAX_CLASSES 8 |
416 | |
417 | /* Table of constants used by fldpi, fldln2, etc.... */ |
418 | static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
419 | static bool ext_80387_constants_init; |
420 | |
421 | |
422 | static rtx ix86_function_value (const_tree, const_tree, bool); |
423 | static bool ix86_function_value_regno_p (const unsigned int); |
424 | static unsigned int ix86_function_arg_boundary (machine_mode, |
425 | const_tree); |
426 | static rtx ix86_static_chain (const_tree, bool); |
427 | static int ix86_function_regparm (const_tree, const_tree); |
428 | static void ix86_compute_frame_layout (void); |
429 | static tree ix86_canonical_va_list_type (tree); |
430 | static unsigned int split_stack_prologue_scratch_regno (void); |
431 | static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
432 | |
433 | static bool ix86_can_inline_p (tree, tree); |
434 | static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
435 | |
436 | |
437 | /* Whether -mtune= or -march= were specified */ |
438 | int ix86_tune_defaulted; |
439 | int ix86_arch_specified; |
440 | |
441 | /* Return true if a red-zone is in use. We can't use red-zone when |
442 | there are local indirect jumps, like "indirect_jump" or "tablejump", |
443 | which jumps to another place in the function, since "call" in the |
444 | indirect thunk pushes the return address onto stack, destroying |
445 | red-zone. |
446 | |
447 | TODO: If we can reserve the first 2 WORDs, for PUSH and, another |
448 | for CALL, in red-zone, we can allow local indirect jumps with |
449 | indirect thunk. */ |
450 | |
451 | bool |
452 | ix86_using_red_zone (void) |
453 | { |
454 | return (TARGET_RED_ZONE |
455 | && !TARGET_64BIT_MS_ABI |
456 | && (!cfun->machine->has_local_indirect_jump |
457 | || cfun->machine->indirect_branch_type == indirect_branch_keep)); |
458 | } |
459 | |
460 | /* Return true, if profiling code should be emitted before |
461 | prologue. Otherwise it returns false. |
462 | Note: For x86 with "hotfix" it is sorried. */ |
463 | static bool |
464 | ix86_profile_before_prologue (void) |
465 | { |
466 | return flag_fentry != 0; |
467 | } |
468 | |
469 | /* Update register usage after having seen the compiler flags. */ |
470 | |
471 | static void |
472 | ix86_conditional_register_usage (void) |
473 | { |
474 | int i, c_mask; |
475 | |
476 | /* If there are no caller-saved registers, preserve all registers. |
477 | except fixed_regs and registers used for function return value |
478 | since aggregate_value_p checks call_used_regs[regno] on return |
479 | value. */ |
480 | if (cfun |
481 | && (cfun->machine->call_saved_registers |
482 | == TYPE_NO_CALLER_SAVED_REGISTERS)) |
483 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
484 | if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) |
485 | call_used_regs[i] = 0; |
486 | |
487 | /* For 32-bit targets, disable the REX registers. */ |
488 | if (! TARGET_64BIT) |
489 | { |
490 | for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
491 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
492 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
493 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
494 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
495 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
496 | } |
497 | |
498 | /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
499 | c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); |
500 | |
501 | CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
502 | |
503 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
504 | { |
505 | /* Set/reset conditionally defined registers from |
506 | CALL_USED_REGISTERS initializer. */ |
507 | if (call_used_regs[i] > 1) |
508 | call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
509 | |
510 | /* Calculate registers of CLOBBERED_REGS register set |
511 | as call used registers from GENERAL_REGS register set. */ |
512 | if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i) |
513 | && call_used_regs[i]) |
514 | SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i); |
515 | } |
516 | |
517 | /* If MMX is disabled, disable the registers. */ |
518 | if (! TARGET_MMX) |
519 | accessible_reg_set &= ~reg_class_contents[MMX_REGS]; |
520 | |
521 | /* If SSE is disabled, disable the registers. */ |
522 | if (! TARGET_SSE) |
523 | accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; |
524 | |
525 | /* If the FPU is disabled, disable the registers. */ |
526 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
527 | accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; |
528 | |
529 | /* If AVX512F is disabled, disable the registers. */ |
530 | if (! TARGET_AVX512F) |
531 | { |
532 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
533 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
534 | |
535 | accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; |
536 | } |
537 | |
538 | /* If APX is disabled, disable the registers. */ |
539 | if (! (TARGET_APX_EGPR && TARGET_64BIT)) |
540 | { |
541 | for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++) |
542 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
543 | } |
544 | } |
545 | |
546 | /* Canonicalize a comparison from one we don't have to one we do have. */ |
547 | |
548 | static void |
549 | ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
550 | bool op0_preserve_value) |
551 | { |
552 | /* The order of operands in x87 ficom compare is forced by combine in |
553 | simplify_comparison () function. Float operator is treated as RTX_OBJ |
554 | with a precedence over other operators and is always put in the first |
555 | place. Swap condition and operands to match ficom instruction. */ |
556 | if (!op0_preserve_value |
557 | && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) |
558 | { |
559 | enum rtx_code scode = swap_condition ((enum rtx_code) *code); |
560 | |
561 | /* We are called only for compares that are split to SAHF instruction. |
562 | Ensure that we have setcc/jcc insn for the swapped condition. */ |
563 | if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) |
564 | { |
565 | std::swap (a&: *op0, b&: *op1); |
566 | *code = (int) scode; |
567 | } |
568 | } |
569 | } |
570 | |
571 | |
572 | /* Hook to determine if one function can safely inline another. */ |
573 | |
574 | static bool |
575 | ix86_can_inline_p (tree caller, tree callee) |
576 | { |
577 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
578 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
579 | |
580 | /* Changes of those flags can be tolerated for always inlines. Lets hope |
581 | user knows what he is doing. */ |
582 | unsigned HOST_WIDE_INT always_inline_safe_mask |
583 | = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS |
584 | | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD |
585 | | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD |
586 | | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS |
587 | | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE |
588 | | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER |
589 | | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); |
590 | |
591 | |
592 | if (!callee_tree) |
593 | callee_tree = target_option_default_node; |
594 | if (!caller_tree) |
595 | caller_tree = target_option_default_node; |
596 | if (callee_tree == caller_tree) |
597 | return true; |
598 | |
599 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
600 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
601 | bool ret = false; |
602 | bool always_inline |
603 | = (DECL_DISREGARD_INLINE_LIMITS (callee) |
604 | && lookup_attribute (attr_name: "always_inline" , |
605 | DECL_ATTRIBUTES (callee))); |
606 | |
607 | /* If callee only uses GPRs, ignore MASK_80387. */ |
608 | if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)) |
609 | always_inline_safe_mask |= MASK_80387; |
610 | |
611 | cgraph_node *callee_node = cgraph_node::get (decl: callee); |
612 | /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 |
613 | function can inline a SSE2 function but a SSE2 function can't inline |
614 | a SSE4 function. */ |
615 | if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
616 | != callee_opts->x_ix86_isa_flags) |
617 | || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) |
618 | != callee_opts->x_ix86_isa_flags2)) |
619 | ret = false; |
620 | |
621 | /* See if we have the same non-isa options. */ |
622 | else if ((!always_inline |
623 | && caller_opts->x_target_flags != callee_opts->x_target_flags) |
624 | || (caller_opts->x_target_flags & ~always_inline_safe_mask) |
625 | != (callee_opts->x_target_flags & ~always_inline_safe_mask)) |
626 | ret = false; |
627 | |
628 | else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath |
629 | /* If the calle doesn't use FP expressions differences in |
630 | ix86_fpmath can be ignored. We are called from FEs |
631 | for multi-versioning call optimization, so beware of |
632 | ipa_fn_summaries not available. */ |
633 | && (! ipa_fn_summaries |
634 | || ipa_fn_summaries->get (node: callee_node) == NULL |
635 | || ipa_fn_summaries->get (node: callee_node)->fp_expressions)) |
636 | ret = false; |
637 | |
638 | /* At this point we cannot identify whether arch or tune setting |
639 | comes from target attribute or not. So the most conservative way |
640 | is to allow the callee that uses default arch and tune string to |
641 | be inlined. */ |
642 | else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64" ) |
643 | && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic" )) |
644 | ret = true; |
645 | |
646 | /* See if arch, tune, etc. are the same. As previous ISA flags already |
647 | checks if callee's ISA is subset of caller's, do not block |
648 | always_inline attribute for callee even it has different arch. */ |
649 | else if (!always_inline && caller_opts->arch != callee_opts->arch) |
650 | ret = false; |
651 | |
652 | else if (!always_inline && caller_opts->tune != callee_opts->tune) |
653 | ret = false; |
654 | |
655 | else if (!always_inline |
656 | && caller_opts->branch_cost != callee_opts->branch_cost) |
657 | ret = false; |
658 | |
659 | else |
660 | ret = true; |
661 | |
662 | return ret; |
663 | } |
664 | |
665 | /* Return true if this goes in large data/bss. */ |
666 | |
667 | static bool |
668 | ix86_in_large_data_p (tree exp) |
669 | { |
670 | if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC |
671 | && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC) |
672 | return false; |
673 | |
674 | if (exp == NULL_TREE) |
675 | return false; |
676 | |
677 | /* Functions are never large data. */ |
678 | if (TREE_CODE (exp) == FUNCTION_DECL) |
679 | return false; |
680 | |
681 | /* Automatic variables are never large data. */ |
682 | if (VAR_P (exp) && !is_global_var (t: exp)) |
683 | return false; |
684 | |
685 | if (VAR_P (exp) && DECL_SECTION_NAME (exp)) |
686 | { |
687 | const char *section = DECL_SECTION_NAME (exp); |
688 | if (strcmp (s1: section, s2: ".ldata" ) == 0 |
689 | || strcmp (s1: section, s2: ".lbss" ) == 0) |
690 | return true; |
691 | return false; |
692 | } |
693 | else |
694 | { |
695 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
696 | |
697 | /* If this is an incomplete type with size 0, then we can't put it |
698 | in data because it might be too big when completed. Also, |
699 | int_size_in_bytes returns -1 if size can vary or is larger than |
700 | an integer in which case also it is safer to assume that it goes in |
701 | large data. */ |
702 | if (size <= 0 || size > ix86_section_threshold) |
703 | return true; |
704 | } |
705 | |
706 | return false; |
707 | } |
708 | |
709 | /* i386-specific section flag to mark large sections. */ |
710 | #define SECTION_LARGE SECTION_MACH_DEP |
711 | |
712 | /* Switch to the appropriate section for output of DECL. |
713 | DECL is either a `VAR_DECL' node or a constant of some sort. |
714 | RELOC indicates whether forming the initial value of DECL requires |
715 | link-time relocations. */ |
716 | |
717 | ATTRIBUTE_UNUSED static section * |
718 | x86_64_elf_select_section (tree decl, int reloc, |
719 | unsigned HOST_WIDE_INT align) |
720 | { |
721 | if (ix86_in_large_data_p (exp: decl)) |
722 | { |
723 | const char *sname = NULL; |
724 | unsigned int flags = SECTION_WRITE | SECTION_LARGE; |
725 | switch (categorize_decl_for_section (decl, reloc)) |
726 | { |
727 | case SECCAT_DATA: |
728 | sname = ".ldata" ; |
729 | break; |
730 | case SECCAT_DATA_REL: |
731 | sname = ".ldata.rel" ; |
732 | break; |
733 | case SECCAT_DATA_REL_LOCAL: |
734 | sname = ".ldata.rel.local" ; |
735 | break; |
736 | case SECCAT_DATA_REL_RO: |
737 | sname = ".ldata.rel.ro" ; |
738 | break; |
739 | case SECCAT_DATA_REL_RO_LOCAL: |
740 | sname = ".ldata.rel.ro.local" ; |
741 | break; |
742 | case SECCAT_BSS: |
743 | sname = ".lbss" ; |
744 | flags |= SECTION_BSS; |
745 | break; |
746 | case SECCAT_RODATA: |
747 | case SECCAT_RODATA_MERGE_STR: |
748 | case SECCAT_RODATA_MERGE_STR_INIT: |
749 | case SECCAT_RODATA_MERGE_CONST: |
750 | sname = ".lrodata" ; |
751 | flags &= ~SECTION_WRITE; |
752 | break; |
753 | case SECCAT_SRODATA: |
754 | case SECCAT_SDATA: |
755 | case SECCAT_SBSS: |
756 | gcc_unreachable (); |
757 | case SECCAT_TEXT: |
758 | case SECCAT_TDATA: |
759 | case SECCAT_TBSS: |
760 | /* We don't split these for medium model. Place them into |
761 | default sections and hope for best. */ |
762 | break; |
763 | } |
764 | if (sname) |
765 | { |
766 | /* We might get called with string constants, but get_named_section |
767 | doesn't like them as they are not DECLs. Also, we need to set |
768 | flags in that case. */ |
769 | if (!DECL_P (decl)) |
770 | return get_section (sname, flags, NULL); |
771 | return get_named_section (decl, sname, reloc); |
772 | } |
773 | } |
774 | return default_elf_select_section (decl, reloc, align); |
775 | } |
776 | |
777 | /* Select a set of attributes for section NAME based on the properties |
778 | of DECL and whether or not RELOC indicates that DECL's initializer |
779 | might contain runtime relocations. */ |
780 | |
781 | static unsigned int ATTRIBUTE_UNUSED |
782 | x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
783 | { |
784 | unsigned int flags = default_section_type_flags (decl, name, reloc); |
785 | |
786 | if (ix86_in_large_data_p (exp: decl)) |
787 | flags |= SECTION_LARGE; |
788 | |
789 | if (decl == NULL_TREE |
790 | && (strcmp (s1: name, s2: ".ldata.rel.ro" ) == 0 |
791 | || strcmp (s1: name, s2: ".ldata.rel.ro.local" ) == 0)) |
792 | flags |= SECTION_RELRO; |
793 | |
794 | if (strcmp (s1: name, s2: ".lbss" ) == 0 |
795 | || startswith (str: name, prefix: ".lbss." ) |
796 | || startswith (str: name, prefix: ".gnu.linkonce.lb." )) |
797 | flags |= SECTION_BSS; |
798 | |
799 | return flags; |
800 | } |
801 | |
802 | /* Build up a unique section name, expressed as a |
803 | STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
804 | RELOC indicates whether the initial value of EXP requires |
805 | link-time relocations. */ |
806 | |
807 | static void ATTRIBUTE_UNUSED |
808 | x86_64_elf_unique_section (tree decl, int reloc) |
809 | { |
810 | if (ix86_in_large_data_p (exp: decl)) |
811 | { |
812 | const char *prefix = NULL; |
813 | /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
814 | bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
815 | |
816 | switch (categorize_decl_for_section (decl, reloc)) |
817 | { |
818 | case SECCAT_DATA: |
819 | case SECCAT_DATA_REL: |
820 | case SECCAT_DATA_REL_LOCAL: |
821 | case SECCAT_DATA_REL_RO: |
822 | case SECCAT_DATA_REL_RO_LOCAL: |
823 | prefix = one_only ? ".ld" : ".ldata" ; |
824 | break; |
825 | case SECCAT_BSS: |
826 | prefix = one_only ? ".lb" : ".lbss" ; |
827 | break; |
828 | case SECCAT_RODATA: |
829 | case SECCAT_RODATA_MERGE_STR: |
830 | case SECCAT_RODATA_MERGE_STR_INIT: |
831 | case SECCAT_RODATA_MERGE_CONST: |
832 | prefix = one_only ? ".lr" : ".lrodata" ; |
833 | break; |
834 | case SECCAT_SRODATA: |
835 | case SECCAT_SDATA: |
836 | case SECCAT_SBSS: |
837 | gcc_unreachable (); |
838 | case SECCAT_TEXT: |
839 | case SECCAT_TDATA: |
840 | case SECCAT_TBSS: |
841 | /* We don't split these for medium model. Place them into |
842 | default sections and hope for best. */ |
843 | break; |
844 | } |
845 | if (prefix) |
846 | { |
847 | const char *name, *linkonce; |
848 | char *string; |
849 | |
850 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
851 | name = targetm.strip_name_encoding (name); |
852 | |
853 | /* If we're using one_only, then there needs to be a .gnu.linkonce |
854 | prefix to the section name. */ |
855 | linkonce = one_only ? ".gnu.linkonce" : "" ; |
856 | |
857 | string = ACONCAT ((linkonce, prefix, "." , name, NULL)); |
858 | |
859 | set_decl_section_name (decl, string); |
860 | return; |
861 | } |
862 | } |
863 | default_unique_section (decl, reloc); |
864 | } |
865 | |
866 | #ifdef COMMON_ASM_OP |
867 | |
868 | #ifndef LARGECOMM_SECTION_ASM_OP |
869 | #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" |
870 | #endif |
871 | |
872 | /* This says how to output assembler code to declare an |
873 | uninitialized external linkage data object. |
874 | |
875 | For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for |
876 | large objects. */ |
877 | void |
878 | x86_elf_aligned_decl_common (FILE *file, tree decl, |
879 | const char *name, unsigned HOST_WIDE_INT size, |
880 | unsigned align) |
881 | { |
882 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
883 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
884 | && size > (unsigned int)ix86_section_threshold) |
885 | { |
886 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
887 | fputs (LARGECOMM_SECTION_ASM_OP, stream: file); |
888 | } |
889 | else |
890 | fputs (COMMON_ASM_OP, stream: file); |
891 | assemble_name (file, name); |
892 | fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n" , |
893 | size, align / BITS_PER_UNIT); |
894 | } |
895 | #endif |
896 | |
897 | /* Utility function for targets to use in implementing |
898 | ASM_OUTPUT_ALIGNED_BSS. */ |
899 | |
900 | void |
901 | x86_output_aligned_bss (FILE *file, tree decl, const char *name, |
902 | unsigned HOST_WIDE_INT size, unsigned align) |
903 | { |
904 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
905 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
906 | && size > (unsigned int)ix86_section_threshold) |
907 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
908 | else |
909 | switch_to_section (bss_section); |
910 | ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
911 | #ifdef ASM_DECLARE_OBJECT_NAME |
912 | last_assemble_variable_decl = decl; |
913 | ASM_DECLARE_OBJECT_NAME (file, name, decl); |
914 | #else |
915 | /* Standard thing is just output label for the object. */ |
916 | ASM_OUTPUT_LABEL (file, name); |
917 | #endif /* ASM_DECLARE_OBJECT_NAME */ |
918 | ASM_OUTPUT_SKIP (file, size ? size : 1); |
919 | } |
920 | |
921 | /* Decide whether we must probe the stack before any space allocation |
922 | on this target. It's essentially TARGET_STACK_PROBE except when |
923 | -fstack-check causes the stack to be already probed differently. */ |
924 | |
925 | bool |
926 | ix86_target_stack_probe (void) |
927 | { |
928 | /* Do not probe the stack twice if static stack checking is enabled. */ |
929 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
930 | return false; |
931 | |
932 | return TARGET_STACK_PROBE; |
933 | } |
934 | |
935 | /* Decide whether we can make a sibling call to a function. DECL is the |
936 | declaration of the function being targeted by the call and EXP is the |
937 | CALL_EXPR representing the call. */ |
938 | |
939 | static bool |
940 | ix86_function_ok_for_sibcall (tree decl, tree exp) |
941 | { |
942 | tree type, decl_or_type; |
943 | rtx a, b; |
944 | bool bind_global = decl && !targetm.binds_local_p (decl); |
945 | |
946 | if (ix86_function_naked (fn: current_function_decl)) |
947 | return false; |
948 | |
949 | /* Sibling call isn't OK if there are no caller-saved registers |
950 | since all registers must be preserved before return. */ |
951 | if (cfun->machine->call_saved_registers |
952 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
953 | return false; |
954 | |
955 | /* If we are generating position-independent code, we cannot sibcall |
956 | optimize direct calls to global functions, as the PLT requires |
957 | %ebx be live. (Darwin does not have a PLT.) */ |
958 | if (!TARGET_MACHO |
959 | && !TARGET_64BIT |
960 | && flag_pic |
961 | && flag_plt |
962 | && bind_global) |
963 | return false; |
964 | |
965 | /* If we need to align the outgoing stack, then sibcalling would |
966 | unalign the stack, which may break the called function. */ |
967 | if (ix86_minimum_incoming_stack_boundary (true) |
968 | < PREFERRED_STACK_BOUNDARY) |
969 | return false; |
970 | |
971 | if (decl) |
972 | { |
973 | decl_or_type = decl; |
974 | type = TREE_TYPE (decl); |
975 | } |
976 | else |
977 | { |
978 | /* We're looking at the CALL_EXPR, we need the type of the function. */ |
979 | type = CALL_EXPR_FN (exp); /* pointer expression */ |
980 | type = TREE_TYPE (type); /* pointer type */ |
981 | type = TREE_TYPE (type); /* function type */ |
982 | decl_or_type = type; |
983 | } |
984 | |
985 | /* Sibling call isn't OK if callee has no callee-saved registers |
986 | and the calling function has callee-saved registers. */ |
987 | if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS |
988 | && (cfun->machine->call_saved_registers |
989 | != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP) |
990 | && lookup_attribute (attr_name: "no_callee_saved_registers" , |
991 | TYPE_ATTRIBUTES (type))) |
992 | return false; |
993 | |
994 | /* If outgoing reg parm stack space changes, we cannot do sibcall. */ |
995 | if ((OUTGOING_REG_PARM_STACK_SPACE (type) |
996 | != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl))) |
997 | || (REG_PARM_STACK_SPACE (decl_or_type) |
998 | != REG_PARM_STACK_SPACE (current_function_decl))) |
999 | { |
1000 | maybe_complain_about_tail_call (exp, |
1001 | "inconsistent size of stack space" |
1002 | " allocated for arguments which are" |
1003 | " passed in registers" ); |
1004 | return false; |
1005 | } |
1006 | |
1007 | /* Check that the return value locations are the same. Like |
1008 | if we are returning floats on the 80387 register stack, we cannot |
1009 | make a sibcall from a function that doesn't return a float to a |
1010 | function that does or, conversely, from a function that does return |
1011 | a float to a function that doesn't; the necessary stack adjustment |
1012 | would not be executed. This is also the place we notice |
1013 | differences in the return value ABI. Note that it is ok for one |
1014 | of the functions to have void return type as long as the return |
1015 | value of the other is passed in a register. */ |
1016 | a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
1017 | b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
1018 | cfun->decl, false); |
1019 | if (STACK_REG_P (a) || STACK_REG_P (b)) |
1020 | { |
1021 | if (!rtx_equal_p (a, b)) |
1022 | return false; |
1023 | } |
1024 | else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
1025 | ; |
1026 | else if (!rtx_equal_p (a, b)) |
1027 | return false; |
1028 | |
1029 | if (TARGET_64BIT) |
1030 | { |
1031 | /* The SYSV ABI has more call-clobbered registers; |
1032 | disallow sibcalls from MS to SYSV. */ |
1033 | if (cfun->machine->call_abi == MS_ABI |
1034 | && ix86_function_type_abi (type) == SYSV_ABI) |
1035 | return false; |
1036 | } |
1037 | else |
1038 | { |
1039 | /* If this call is indirect, we'll need to be able to use a |
1040 | call-clobbered register for the address of the target function. |
1041 | Make sure that all such registers are not used for passing |
1042 | parameters. Note that DLLIMPORT functions and call to global |
1043 | function via GOT slot are indirect. */ |
1044 | if (!decl |
1045 | || (bind_global && flag_pic && !flag_plt) |
1046 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) |
1047 | || flag_force_indirect_call) |
1048 | { |
1049 | /* Check if regparm >= 3 since arg_reg_available is set to |
1050 | false if regparm == 0. If regparm is 1 or 2, there is |
1051 | always a call-clobbered register available. |
1052 | |
1053 | ??? The symbol indirect call doesn't need a call-clobbered |
1054 | register. But we don't know if this is a symbol indirect |
1055 | call or not here. */ |
1056 | if (ix86_function_regparm (type, decl) >= 3 |
1057 | && !cfun->machine->arg_reg_available) |
1058 | return false; |
1059 | } |
1060 | } |
1061 | |
1062 | if (decl && ix86_use_pseudo_pic_reg ()) |
1063 | { |
1064 | /* When PIC register is used, it must be restored after ifunc |
1065 | function returns. */ |
1066 | cgraph_node *node = cgraph_node::get (decl); |
1067 | if (node && node->ifunc_resolver) |
1068 | return false; |
1069 | } |
1070 | |
1071 | /* Disable sibcall if callee has indirect_return attribute and |
1072 | caller doesn't since callee will return to the caller's caller |
1073 | via an indirect jump. */ |
1074 | if (((flag_cf_protection & (CF_RETURN | CF_BRANCH)) |
1075 | == (CF_RETURN | CF_BRANCH)) |
1076 | && lookup_attribute (attr_name: "indirect_return" , TYPE_ATTRIBUTES (type)) |
1077 | && !lookup_attribute (attr_name: "indirect_return" , |
1078 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))) |
1079 | return false; |
1080 | |
1081 | /* Otherwise okay. That also includes certain types of indirect calls. */ |
1082 | return true; |
1083 | } |
1084 | |
1085 | /* This function determines from TYPE the calling-convention. */ |
1086 | |
1087 | unsigned int |
1088 | ix86_get_callcvt (const_tree type) |
1089 | { |
1090 | unsigned int ret = 0; |
1091 | bool is_stdarg; |
1092 | tree attrs; |
1093 | |
1094 | if (TARGET_64BIT) |
1095 | return IX86_CALLCVT_CDECL; |
1096 | |
1097 | attrs = TYPE_ATTRIBUTES (type); |
1098 | if (attrs != NULL_TREE) |
1099 | { |
1100 | if (lookup_attribute (attr_name: "cdecl" , list: attrs)) |
1101 | ret |= IX86_CALLCVT_CDECL; |
1102 | else if (lookup_attribute (attr_name: "stdcall" , list: attrs)) |
1103 | ret |= IX86_CALLCVT_STDCALL; |
1104 | else if (lookup_attribute (attr_name: "fastcall" , list: attrs)) |
1105 | ret |= IX86_CALLCVT_FASTCALL; |
1106 | else if (lookup_attribute (attr_name: "thiscall" , list: attrs)) |
1107 | ret |= IX86_CALLCVT_THISCALL; |
1108 | |
1109 | /* Regparam isn't allowed for thiscall and fastcall. */ |
1110 | if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
1111 | { |
1112 | if (lookup_attribute (attr_name: "regparm" , list: attrs)) |
1113 | ret |= IX86_CALLCVT_REGPARM; |
1114 | if (lookup_attribute (attr_name: "sseregparm" , list: attrs)) |
1115 | ret |= IX86_CALLCVT_SSEREGPARM; |
1116 | } |
1117 | |
1118 | if (IX86_BASE_CALLCVT(ret) != 0) |
1119 | return ret; |
1120 | } |
1121 | |
1122 | is_stdarg = stdarg_p (type); |
1123 | if (TARGET_RTD && !is_stdarg) |
1124 | return IX86_CALLCVT_STDCALL | ret; |
1125 | |
1126 | if (ret != 0 |
1127 | || is_stdarg |
1128 | || TREE_CODE (type) != METHOD_TYPE |
1129 | || ix86_function_type_abi (type) != MS_ABI) |
1130 | return IX86_CALLCVT_CDECL | ret; |
1131 | |
1132 | return IX86_CALLCVT_THISCALL; |
1133 | } |
1134 | |
1135 | /* Return 0 if the attributes for two types are incompatible, 1 if they |
1136 | are compatible, and 2 if they are nearly compatible (which causes a |
1137 | warning to be generated). */ |
1138 | |
1139 | static int |
1140 | ix86_comp_type_attributes (const_tree type1, const_tree type2) |
1141 | { |
1142 | unsigned int ccvt1, ccvt2; |
1143 | |
1144 | if (TREE_CODE (type1) != FUNCTION_TYPE |
1145 | && TREE_CODE (type1) != METHOD_TYPE) |
1146 | return 1; |
1147 | |
1148 | ccvt1 = ix86_get_callcvt (type: type1); |
1149 | ccvt2 = ix86_get_callcvt (type: type2); |
1150 | if (ccvt1 != ccvt2) |
1151 | return 0; |
1152 | if (ix86_function_regparm (type1, NULL) |
1153 | != ix86_function_regparm (type2, NULL)) |
1154 | return 0; |
1155 | |
1156 | if (lookup_attribute (attr_name: "no_callee_saved_registers" , |
1157 | TYPE_ATTRIBUTES (type1)) |
1158 | != lookup_attribute (attr_name: "no_callee_saved_registers" , |
1159 | TYPE_ATTRIBUTES (type2))) |
1160 | return 0; |
1161 | |
1162 | return 1; |
1163 | } |
1164 | |
1165 | /* Return the regparm value for a function with the indicated TYPE and DECL. |
1166 | DECL may be NULL when calling function indirectly |
1167 | or considering a libcall. */ |
1168 | |
1169 | static int |
1170 | ix86_function_regparm (const_tree type, const_tree decl) |
1171 | { |
1172 | tree attr; |
1173 | int regparm; |
1174 | unsigned int ccvt; |
1175 | |
1176 | if (TARGET_64BIT) |
1177 | return (ix86_function_type_abi (type) == SYSV_ABI |
1178 | ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
1179 | ccvt = ix86_get_callcvt (type); |
1180 | regparm = ix86_regparm; |
1181 | |
1182 | if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
1183 | { |
1184 | attr = lookup_attribute (attr_name: "regparm" , TYPE_ATTRIBUTES (type)); |
1185 | if (attr) |
1186 | { |
1187 | regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
1188 | return regparm; |
1189 | } |
1190 | } |
1191 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1192 | return 2; |
1193 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1194 | return 1; |
1195 | |
1196 | /* Use register calling convention for local functions when possible. */ |
1197 | if (decl |
1198 | && TREE_CODE (decl) == FUNCTION_DECL) |
1199 | { |
1200 | cgraph_node *target = cgraph_node::get (decl); |
1201 | if (target) |
1202 | target = target->function_symbol (); |
1203 | |
1204 | /* Caller and callee must agree on the calling convention, so |
1205 | checking here just optimize means that with |
1206 | __attribute__((optimize (...))) caller could use regparm convention |
1207 | and callee not, or vice versa. Instead look at whether the callee |
1208 | is optimized or not. */ |
1209 | if (target && opt_for_fn (target->decl, optimize) |
1210 | && !(profile_flag && !flag_fentry)) |
1211 | { |
1212 | if (target->local && target->can_change_signature) |
1213 | { |
1214 | int local_regparm, globals = 0, regno; |
1215 | |
1216 | /* Make sure no regparm register is taken by a |
1217 | fixed register variable. */ |
1218 | for (local_regparm = 0; local_regparm < REGPARM_MAX; |
1219 | local_regparm++) |
1220 | if (fixed_regs[local_regparm]) |
1221 | break; |
1222 | |
1223 | /* We don't want to use regparm(3) for nested functions as |
1224 | these use a static chain pointer in the third argument. */ |
1225 | if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) |
1226 | local_regparm = 2; |
1227 | |
1228 | /* Save a register for the split stack. */ |
1229 | if (flag_split_stack) |
1230 | { |
1231 | if (local_regparm == 3) |
1232 | local_regparm = 2; |
1233 | else if (local_regparm == 2 |
1234 | && DECL_STATIC_CHAIN (target->decl)) |
1235 | local_regparm = 1; |
1236 | } |
1237 | |
1238 | /* Each fixed register usage increases register pressure, |
1239 | so less registers should be used for argument passing. |
1240 | This functionality can be overriden by an explicit |
1241 | regparm value. */ |
1242 | for (regno = AX_REG; regno <= DI_REG; regno++) |
1243 | if (fixed_regs[regno]) |
1244 | globals++; |
1245 | |
1246 | local_regparm |
1247 | = globals < local_regparm ? local_regparm - globals : 0; |
1248 | |
1249 | if (local_regparm > regparm) |
1250 | regparm = local_regparm; |
1251 | } |
1252 | } |
1253 | } |
1254 | |
1255 | return regparm; |
1256 | } |
1257 | |
1258 | /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
1259 | DFmode (2) arguments in SSE registers for a function with the |
1260 | indicated TYPE and DECL. DECL may be NULL when calling function |
1261 | indirectly or considering a libcall. Return -1 if any FP parameter |
1262 | should be rejected by error. This is used in siutation we imply SSE |
1263 | calling convetion but the function is called from another function with |
1264 | SSE disabled. Otherwise return 0. */ |
1265 | |
1266 | static int |
1267 | ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
1268 | { |
1269 | gcc_assert (!TARGET_64BIT); |
1270 | |
1271 | /* Use SSE registers to pass SFmode and DFmode arguments if requested |
1272 | by the sseregparm attribute. */ |
1273 | if (TARGET_SSEREGPARM |
1274 | || (type && lookup_attribute (attr_name: "sseregparm" , TYPE_ATTRIBUTES (type)))) |
1275 | { |
1276 | if (!TARGET_SSE) |
1277 | { |
1278 | if (warn) |
1279 | { |
1280 | if (decl) |
1281 | error ("calling %qD with attribute sseregparm without " |
1282 | "SSE/SSE2 enabled" , decl); |
1283 | else |
1284 | error ("calling %qT with attribute sseregparm without " |
1285 | "SSE/SSE2 enabled" , type); |
1286 | } |
1287 | return 0; |
1288 | } |
1289 | |
1290 | return 2; |
1291 | } |
1292 | |
1293 | if (!decl) |
1294 | return 0; |
1295 | |
1296 | cgraph_node *target = cgraph_node::get (decl); |
1297 | if (target) |
1298 | target = target->function_symbol (); |
1299 | |
1300 | /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
1301 | (and DFmode for SSE2) arguments in SSE registers. */ |
1302 | if (target |
1303 | /* TARGET_SSE_MATH */ |
1304 | && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE) |
1305 | && opt_for_fn (target->decl, optimize) |
1306 | && !(profile_flag && !flag_fentry)) |
1307 | { |
1308 | if (target->local && target->can_change_signature) |
1309 | { |
1310 | /* Refuse to produce wrong code when local function with SSE enabled |
1311 | is called from SSE disabled function. |
1312 | FIXME: We need a way to detect these cases cross-ltrans partition |
1313 | and avoid using SSE calling conventions on local functions called |
1314 | from function with SSE disabled. For now at least delay the |
1315 | warning until we know we are going to produce wrong code. |
1316 | See PR66047 */ |
1317 | if (!TARGET_SSE && warn) |
1318 | return -1; |
1319 | return TARGET_SSE2_P (target_opts_for_fn (target->decl) |
1320 | ->x_ix86_isa_flags) ? 2 : 1; |
1321 | } |
1322 | } |
1323 | |
1324 | return 0; |
1325 | } |
1326 | |
1327 | /* Return true if EAX is live at the start of the function. Used by |
1328 | ix86_expand_prologue to determine if we need special help before |
1329 | calling allocate_stack_worker. */ |
1330 | |
1331 | static bool |
1332 | ix86_eax_live_at_start_p (void) |
1333 | { |
1334 | /* Cheat. Don't bother working forward from ix86_function_regparm |
1335 | to the function type to whether an actual argument is located in |
1336 | eax. Instead just look at cfg info, which is still close enough |
1337 | to correct at this point. This gives false positives for broken |
1338 | functions that might use uninitialized data that happens to be |
1339 | allocated in eax, but who cares? */ |
1340 | return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
1341 | } |
1342 | |
1343 | static bool |
1344 | ix86_keep_aggregate_return_pointer (tree fntype) |
1345 | { |
1346 | tree attr; |
1347 | |
1348 | if (!TARGET_64BIT) |
1349 | { |
1350 | attr = lookup_attribute (attr_name: "callee_pop_aggregate_return" , |
1351 | TYPE_ATTRIBUTES (fntype)); |
1352 | if (attr) |
1353 | return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
1354 | |
1355 | /* For 32-bit MS-ABI the default is to keep aggregate |
1356 | return pointer. */ |
1357 | if (ix86_function_type_abi (fntype) == MS_ABI) |
1358 | return true; |
1359 | } |
1360 | return KEEP_AGGREGATE_RETURN_POINTER != 0; |
1361 | } |
1362 | |
1363 | /* Value is the number of bytes of arguments automatically |
1364 | popped when returning from a subroutine call. |
1365 | FUNDECL is the declaration node of the function (as a tree), |
1366 | FUNTYPE is the data type of the function (as a tree), |
1367 | or for a library call it is an identifier node for the subroutine name. |
1368 | SIZE is the number of bytes of arguments passed on the stack. |
1369 | |
1370 | On the 80386, the RTD insn may be used to pop them if the number |
1371 | of args is fixed, but if the number is variable then the caller |
1372 | must pop them all. RTD can't be used for library calls now |
1373 | because the library is compiled with the Unix compiler. |
1374 | Use of RTD is a selectable option, since it is incompatible with |
1375 | standard Unix calling sequences. If the option is not selected, |
1376 | the caller must always pop the args. |
1377 | |
1378 | The attribute stdcall is equivalent to RTD on a per module basis. */ |
1379 | |
1380 | static poly_int64 |
1381 | ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) |
1382 | { |
1383 | unsigned int ccvt; |
1384 | |
1385 | /* None of the 64-bit ABIs pop arguments. */ |
1386 | if (TARGET_64BIT) |
1387 | return 0; |
1388 | |
1389 | ccvt = ix86_get_callcvt (type: funtype); |
1390 | |
1391 | if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
1392 | | IX86_CALLCVT_THISCALL)) != 0 |
1393 | && ! stdarg_p (funtype)) |
1394 | return size; |
1395 | |
1396 | /* Lose any fake structure return argument if it is passed on the stack. */ |
1397 | if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
1398 | && !ix86_keep_aggregate_return_pointer (fntype: funtype)) |
1399 | { |
1400 | int nregs = ix86_function_regparm (type: funtype, decl: fundecl); |
1401 | if (nregs == 0) |
1402 | return GET_MODE_SIZE (Pmode); |
1403 | } |
1404 | |
1405 | return 0; |
1406 | } |
1407 | |
1408 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
1409 | |
1410 | static bool |
1411 | ix86_legitimate_combined_insn (rtx_insn *insn) |
1412 | { |
1413 | int i; |
1414 | |
1415 | /* Check operand constraints in case hard registers were propagated |
1416 | into insn pattern. This check prevents combine pass from |
1417 | generating insn patterns with invalid hard register operands. |
1418 | These invalid insns can eventually confuse reload to error out |
1419 | with a spill failure. See also PRs 46829 and 46843. */ |
1420 | |
1421 | gcc_assert (INSN_CODE (insn) >= 0); |
1422 | |
1423 | extract_insn (insn); |
1424 | preprocess_constraints (insn); |
1425 | |
1426 | int n_operands = recog_data.n_operands; |
1427 | int n_alternatives = recog_data.n_alternatives; |
1428 | for (i = 0; i < n_operands; i++) |
1429 | { |
1430 | rtx op = recog_data.operand[i]; |
1431 | machine_mode mode = GET_MODE (op); |
1432 | const operand_alternative *op_alt; |
1433 | int offset = 0; |
1434 | bool win; |
1435 | int j; |
1436 | |
1437 | /* A unary operator may be accepted by the predicate, but it |
1438 | is irrelevant for matching constraints. */ |
1439 | if (UNARY_P (op)) |
1440 | op = XEXP (op, 0); |
1441 | |
1442 | if (SUBREG_P (op)) |
1443 | { |
1444 | if (REG_P (SUBREG_REG (op)) |
1445 | && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
1446 | offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
1447 | GET_MODE (SUBREG_REG (op)), |
1448 | SUBREG_BYTE (op), |
1449 | GET_MODE (op)); |
1450 | op = SUBREG_REG (op); |
1451 | } |
1452 | |
1453 | if (!(REG_P (op) && HARD_REGISTER_P (op))) |
1454 | continue; |
1455 | |
1456 | op_alt = recog_op_alt; |
1457 | |
1458 | /* Operand has no constraints, anything is OK. */ |
1459 | win = !n_alternatives; |
1460 | |
1461 | alternative_mask preferred = get_preferred_alternatives (insn); |
1462 | for (j = 0; j < n_alternatives; j++, op_alt += n_operands) |
1463 | { |
1464 | if (!TEST_BIT (preferred, j)) |
1465 | continue; |
1466 | if (op_alt[i].anything_ok |
1467 | || (op_alt[i].matches != -1 |
1468 | && operands_match_p |
1469 | (recog_data.operand[i], |
1470 | recog_data.operand[op_alt[i].matches])) |
1471 | || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) |
1472 | { |
1473 | win = true; |
1474 | break; |
1475 | } |
1476 | } |
1477 | |
1478 | if (!win) |
1479 | return false; |
1480 | } |
1481 | |
1482 | return true; |
1483 | } |
1484 | |
1485 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
1486 | |
1487 | static unsigned HOST_WIDE_INT |
1488 | ix86_asan_shadow_offset (void) |
1489 | { |
1490 | return SUBTARGET_SHADOW_OFFSET; |
1491 | } |
1492 | |
1493 | /* Argument support functions. */ |
1494 | |
1495 | /* Return true when register may be used to pass function parameters. */ |
1496 | bool |
1497 | ix86_function_arg_regno_p (int regno) |
1498 | { |
1499 | int i; |
1500 | enum calling_abi call_abi; |
1501 | const int *parm_regs; |
1502 | |
1503 | if (TARGET_SSE && SSE_REGNO_P (regno) |
1504 | && regno < FIRST_SSE_REG + SSE_REGPARM_MAX) |
1505 | return true; |
1506 | |
1507 | if (!TARGET_64BIT) |
1508 | return (regno < REGPARM_MAX |
1509 | || (TARGET_MMX && MMX_REGNO_P (regno) |
1510 | && regno < FIRST_MMX_REG + MMX_REGPARM_MAX)); |
1511 | |
1512 | /* TODO: The function should depend on current function ABI but |
1513 | builtins.cc would need updating then. Therefore we use the |
1514 | default ABI. */ |
1515 | call_abi = ix86_cfun_abi (); |
1516 | |
1517 | /* RAX is used as hidden argument to va_arg functions. */ |
1518 | if (call_abi == SYSV_ABI && regno == AX_REG) |
1519 | return true; |
1520 | |
1521 | if (call_abi == MS_ABI) |
1522 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
1523 | else |
1524 | parm_regs = x86_64_int_parameter_registers; |
1525 | |
1526 | for (i = 0; i < (call_abi == MS_ABI |
1527 | ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
1528 | if (regno == parm_regs[i]) |
1529 | return true; |
1530 | return false; |
1531 | } |
1532 | |
1533 | /* Return if we do not know how to pass ARG solely in registers. */ |
1534 | |
1535 | static bool |
1536 | ix86_must_pass_in_stack (const function_arg_info &arg) |
1537 | { |
1538 | if (must_pass_in_stack_var_size_or_pad (arg)) |
1539 | return true; |
1540 | |
1541 | /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
1542 | The layout_type routine is crafty and tries to trick us into passing |
1543 | currently unsupported vector types on the stack by using TImode. */ |
1544 | return (!TARGET_64BIT && arg.mode == TImode |
1545 | && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); |
1546 | } |
1547 | |
1548 | /* It returns the size, in bytes, of the area reserved for arguments passed |
1549 | in registers for the function represented by fndecl dependent to the used |
1550 | abi format. */ |
1551 | int |
1552 | ix86_reg_parm_stack_space (const_tree fndecl) |
1553 | { |
1554 | enum calling_abi call_abi = SYSV_ABI; |
1555 | if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
1556 | call_abi = ix86_function_abi (fndecl); |
1557 | else |
1558 | call_abi = ix86_function_type_abi (fndecl); |
1559 | if (TARGET_64BIT && call_abi == MS_ABI) |
1560 | return 32; |
1561 | return 0; |
1562 | } |
1563 | |
1564 | /* We add this as a workaround in order to use libc_has_function |
1565 | hook in i386.md. */ |
1566 | bool |
1567 | ix86_libc_has_function (enum function_class fn_class) |
1568 | { |
1569 | return targetm.libc_has_function (fn_class, NULL_TREE); |
1570 | } |
1571 | |
1572 | /* Returns value SYSV_ABI, MS_ABI dependent on fntype, |
1573 | specifying the call abi used. */ |
1574 | enum calling_abi |
1575 | ix86_function_type_abi (const_tree fntype) |
1576 | { |
1577 | enum calling_abi abi = ix86_abi; |
1578 | |
1579 | if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) |
1580 | return abi; |
1581 | |
1582 | if (abi == SYSV_ABI |
1583 | && lookup_attribute (attr_name: "ms_abi" , TYPE_ATTRIBUTES (fntype))) |
1584 | { |
1585 | static int warned; |
1586 | if (TARGET_X32 && !warned) |
1587 | { |
1588 | error ("X32 does not support %<ms_abi%> attribute" ); |
1589 | warned = 1; |
1590 | } |
1591 | |
1592 | abi = MS_ABI; |
1593 | } |
1594 | else if (abi == MS_ABI |
1595 | && lookup_attribute (attr_name: "sysv_abi" , TYPE_ATTRIBUTES (fntype))) |
1596 | abi = SYSV_ABI; |
1597 | |
1598 | return abi; |
1599 | } |
1600 | |
1601 | enum calling_abi |
1602 | ix86_function_abi (const_tree fndecl) |
1603 | { |
1604 | return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; |
1605 | } |
1606 | |
1607 | /* Returns value SYSV_ABI, MS_ABI dependent on cfun, |
1608 | specifying the call abi used. */ |
1609 | enum calling_abi |
1610 | ix86_cfun_abi (void) |
1611 | { |
1612 | return cfun ? cfun->machine->call_abi : ix86_abi; |
1613 | } |
1614 | |
1615 | bool |
1616 | ix86_function_ms_hook_prologue (const_tree fn) |
1617 | { |
1618 | if (fn && lookup_attribute (attr_name: "ms_hook_prologue" , DECL_ATTRIBUTES (fn))) |
1619 | { |
1620 | if (decl_function_context (fn) != NULL_TREE) |
1621 | error_at (DECL_SOURCE_LOCATION (fn), |
1622 | "%<ms_hook_prologue%> attribute is not compatible " |
1623 | "with nested function" ); |
1624 | else |
1625 | return true; |
1626 | } |
1627 | return false; |
1628 | } |
1629 | |
1630 | bool |
1631 | ix86_function_naked (const_tree fn) |
1632 | { |
1633 | if (fn && lookup_attribute (attr_name: "naked" , DECL_ATTRIBUTES (fn))) |
1634 | return true; |
1635 | |
1636 | return false; |
1637 | } |
1638 | |
1639 | /* Write the extra assembler code needed to declare a function properly. */ |
1640 | |
1641 | void |
1642 | ix86_asm_output_function_label (FILE *out_file, const char *fname, |
1643 | tree decl) |
1644 | { |
1645 | bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl); |
1646 | |
1647 | if (cfun) |
1648 | cfun->machine->function_label_emitted = true; |
1649 | |
1650 | if (is_ms_hook) |
1651 | { |
1652 | int i, filler_count = (TARGET_64BIT ? 32 : 16); |
1653 | unsigned int filler_cc = 0xcccccccc; |
1654 | |
1655 | for (i = 0; i < filler_count; i += 4) |
1656 | fprintf (stream: out_file, ASM_LONG " %#x\n" , filler_cc); |
1657 | } |
1658 | |
1659 | #ifdef SUBTARGET_ASM_UNWIND_INIT |
1660 | SUBTARGET_ASM_UNWIND_INIT (out_file); |
1661 | #endif |
1662 | |
1663 | assemble_function_label_raw (out_file, fname); |
1664 | |
1665 | /* Output magic byte marker, if hot-patch attribute is set. */ |
1666 | if (is_ms_hook) |
1667 | { |
1668 | if (TARGET_64BIT) |
1669 | { |
1670 | /* leaq [%rsp + 0], %rsp */ |
1671 | fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n" , |
1672 | stream: out_file); |
1673 | } |
1674 | else |
1675 | { |
1676 | /* movl.s %edi, %edi |
1677 | push %ebp |
1678 | movl.s %esp, %ebp */ |
1679 | fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n" , stream: out_file); |
1680 | } |
1681 | } |
1682 | } |
1683 | |
1684 | /* Implementation of call abi switching target hook. Specific to FNDECL |
1685 | the specific call register sets are set. See also |
1686 | ix86_conditional_register_usage for more details. */ |
1687 | void |
1688 | ix86_call_abi_override (const_tree fndecl) |
1689 | { |
1690 | cfun->machine->call_abi = ix86_function_abi (fndecl); |
1691 | } |
1692 | |
1693 | /* Return 1 if pseudo register should be created and used to hold |
1694 | GOT address for PIC code. */ |
1695 | bool |
1696 | ix86_use_pseudo_pic_reg (void) |
1697 | { |
1698 | if ((TARGET_64BIT |
1699 | && (ix86_cmodel == CM_SMALL_PIC |
1700 | || TARGET_PECOFF)) |
1701 | || !flag_pic) |
1702 | return false; |
1703 | return true; |
1704 | } |
1705 | |
1706 | /* Initialize large model PIC register. */ |
1707 | |
1708 | static void |
1709 | ix86_init_large_pic_reg (unsigned int tmp_regno) |
1710 | { |
1711 | rtx_code_label *label; |
1712 | rtx tmp_reg; |
1713 | |
1714 | gcc_assert (Pmode == DImode); |
1715 | label = gen_label_rtx (); |
1716 | emit_label (label); |
1717 | LABEL_PRESERVE_P (label) = 1; |
1718 | tmp_reg = gen_rtx_REG (Pmode, tmp_regno); |
1719 | gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); |
1720 | emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
1721 | label)); |
1722 | emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
1723 | emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); |
1724 | const char *name = LABEL_NAME (label); |
1725 | PUT_CODE (label, NOTE); |
1726 | NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; |
1727 | NOTE_DELETED_LABEL_NAME (label) = name; |
1728 | } |
1729 | |
1730 | /* Create and initialize PIC register if required. */ |
1731 | static void |
1732 | ix86_init_pic_reg (void) |
1733 | { |
1734 | edge entry_edge; |
1735 | rtx_insn *seq; |
1736 | |
1737 | if (!ix86_use_pseudo_pic_reg ()) |
1738 | return; |
1739 | |
1740 | start_sequence (); |
1741 | |
1742 | if (TARGET_64BIT) |
1743 | { |
1744 | if (ix86_cmodel == CM_LARGE_PIC) |
1745 | ix86_init_large_pic_reg (R11_REG); |
1746 | else |
1747 | emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
1748 | } |
1749 | else |
1750 | { |
1751 | /* If there is future mcount call in the function it is more profitable |
1752 | to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ |
1753 | rtx reg = crtl->profile |
1754 | ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) |
1755 | : pic_offset_table_rtx; |
1756 | rtx_insn *insn = emit_insn (gen_set_got (reg)); |
1757 | RTX_FRAME_RELATED_P (insn) = 1; |
1758 | if (crtl->profile) |
1759 | emit_move_insn (pic_offset_table_rtx, reg); |
1760 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
1761 | } |
1762 | |
1763 | seq = get_insns (); |
1764 | end_sequence (); |
1765 | |
1766 | entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1767 | insert_insn_on_edge (seq, entry_edge); |
1768 | commit_one_edge_insertion (e: entry_edge); |
1769 | } |
1770 | |
1771 | /* Initialize a variable CUM of type CUMULATIVE_ARGS |
1772 | for a call to a function whose data type is FNTYPE. |
1773 | For a library call, FNTYPE is 0. */ |
1774 | |
1775 | void |
1776 | init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
1777 | tree fntype, /* tree ptr for function decl */ |
1778 | rtx libname, /* SYMBOL_REF of library name or 0 */ |
1779 | tree fndecl, |
1780 | int caller) |
1781 | { |
1782 | struct cgraph_node *local_info_node = NULL; |
1783 | struct cgraph_node *target = NULL; |
1784 | |
1785 | /* Set silent_p to false to raise an error for invalid calls when |
1786 | expanding function body. */ |
1787 | cfun->machine->silent_p = false; |
1788 | |
1789 | memset (s: cum, c: 0, n: sizeof (*cum)); |
1790 | |
1791 | if (fndecl) |
1792 | { |
1793 | target = cgraph_node::get (decl: fndecl); |
1794 | if (target) |
1795 | { |
1796 | target = target->function_symbol (); |
1797 | local_info_node = cgraph_node::local_info_node (decl: target->decl); |
1798 | cum->call_abi = ix86_function_abi (fndecl: target->decl); |
1799 | } |
1800 | else |
1801 | cum->call_abi = ix86_function_abi (fndecl); |
1802 | } |
1803 | else |
1804 | cum->call_abi = ix86_function_type_abi (fntype); |
1805 | |
1806 | cum->caller = caller; |
1807 | |
1808 | /* Set up the number of registers to use for passing arguments. */ |
1809 | cum->nregs = ix86_regparm; |
1810 | if (TARGET_64BIT) |
1811 | { |
1812 | cum->nregs = (cum->call_abi == SYSV_ABI |
1813 | ? X86_64_REGPARM_MAX |
1814 | : X86_64_MS_REGPARM_MAX); |
1815 | } |
1816 | if (TARGET_SSE) |
1817 | { |
1818 | cum->sse_nregs = SSE_REGPARM_MAX; |
1819 | if (TARGET_64BIT) |
1820 | { |
1821 | cum->sse_nregs = (cum->call_abi == SYSV_ABI |
1822 | ? X86_64_SSE_REGPARM_MAX |
1823 | : X86_64_MS_SSE_REGPARM_MAX); |
1824 | } |
1825 | } |
1826 | if (TARGET_MMX) |
1827 | cum->mmx_nregs = MMX_REGPARM_MAX; |
1828 | cum->warn_avx512f = true; |
1829 | cum->warn_avx = true; |
1830 | cum->warn_sse = true; |
1831 | cum->warn_mmx = true; |
1832 | |
1833 | /* Because type might mismatch in between caller and callee, we need to |
1834 | use actual type of function for local calls. |
1835 | FIXME: cgraph_analyze can be told to actually record if function uses |
1836 | va_start so for local functions maybe_vaarg can be made aggressive |
1837 | helping K&R code. |
1838 | FIXME: once typesytem is fixed, we won't need this code anymore. */ |
1839 | if (local_info_node && local_info_node->local |
1840 | && local_info_node->can_change_signature) |
1841 | fntype = TREE_TYPE (target->decl); |
1842 | cum->stdarg = stdarg_p (fntype); |
1843 | cum->maybe_vaarg = (fntype |
1844 | ? (!prototype_p (fntype) || stdarg_p (fntype)) |
1845 | : !libname); |
1846 | |
1847 | cum->decl = fndecl; |
1848 | |
1849 | cum->warn_empty = !warn_abi || cum->stdarg; |
1850 | if (!cum->warn_empty && fntype) |
1851 | { |
1852 | function_args_iterator iter; |
1853 | tree argtype; |
1854 | bool seen_empty_type = false; |
1855 | FOREACH_FUNCTION_ARGS (fntype, argtype, iter) |
1856 | { |
1857 | if (argtype == error_mark_node || VOID_TYPE_P (argtype)) |
1858 | break; |
1859 | if (TYPE_EMPTY_P (argtype)) |
1860 | seen_empty_type = true; |
1861 | else if (seen_empty_type) |
1862 | { |
1863 | cum->warn_empty = true; |
1864 | break; |
1865 | } |
1866 | } |
1867 | } |
1868 | |
1869 | if (!TARGET_64BIT) |
1870 | { |
1871 | /* If there are variable arguments, then we won't pass anything |
1872 | in registers in 32-bit mode. */ |
1873 | if (stdarg_p (fntype)) |
1874 | { |
1875 | cum->nregs = 0; |
1876 | /* Since in 32-bit, variable arguments are always passed on |
1877 | stack, there is scratch register available for indirect |
1878 | sibcall. */ |
1879 | cfun->machine->arg_reg_available = true; |
1880 | cum->sse_nregs = 0; |
1881 | cum->mmx_nregs = 0; |
1882 | cum->warn_avx512f = false; |
1883 | cum->warn_avx = false; |
1884 | cum->warn_sse = false; |
1885 | cum->warn_mmx = false; |
1886 | return; |
1887 | } |
1888 | |
1889 | /* Use ecx and edx registers if function has fastcall attribute, |
1890 | else look for regparm information. */ |
1891 | if (fntype) |
1892 | { |
1893 | unsigned int ccvt = ix86_get_callcvt (type: fntype); |
1894 | if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1895 | { |
1896 | cum->nregs = 1; |
1897 | cum->fastcall = 1; /* Same first register as in fastcall. */ |
1898 | } |
1899 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1900 | { |
1901 | cum->nregs = 2; |
1902 | cum->fastcall = 1; |
1903 | } |
1904 | else |
1905 | cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl); |
1906 | } |
1907 | |
1908 | /* Set up the number of SSE registers used for passing SFmode |
1909 | and DFmode arguments. Warn for mismatching ABI. */ |
1910 | cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true); |
1911 | } |
1912 | |
1913 | cfun->machine->arg_reg_available = (cum->nregs > 0); |
1914 | } |
1915 | |
1916 | /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
1917 | But in the case of vector types, it is some vector mode. |
1918 | |
1919 | When we have only some of our vector isa extensions enabled, then there |
1920 | are some modes for which vector_mode_supported_p is false. For these |
1921 | modes, the generic vector support in gcc will choose some non-vector mode |
1922 | in order to implement the type. By computing the natural mode, we'll |
1923 | select the proper ABI location for the operand and not depend on whatever |
1924 | the middle-end decides to do with these vector types. |
1925 | |
1926 | The midde-end can't deal with the vector types > 16 bytes. In this |
1927 | case, we return the original mode and warn ABI change if CUM isn't |
1928 | NULL. |
1929 | |
1930 | If INT_RETURN is true, warn ABI change if the vector mode isn't |
1931 | available for function return value. */ |
1932 | |
1933 | static machine_mode |
1934 | type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
1935 | bool in_return) |
1936 | { |
1937 | machine_mode mode = TYPE_MODE (type); |
1938 | |
1939 | if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode)) |
1940 | { |
1941 | HOST_WIDE_INT size = int_size_in_bytes (type); |
1942 | if ((size == 8 || size == 16 || size == 32 || size == 64) |
1943 | /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
1944 | && TYPE_VECTOR_SUBPARTS (node: type) > 1) |
1945 | { |
1946 | machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
1947 | |
1948 | /* There are no XFmode vector modes ... */ |
1949 | if (innermode == XFmode) |
1950 | return mode; |
1951 | |
1952 | /* ... and no decimal float vector modes. */ |
1953 | if (DECIMAL_FLOAT_MODE_P (innermode)) |
1954 | return mode; |
1955 | |
1956 | if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type))) |
1957 | mode = MIN_MODE_VECTOR_FLOAT; |
1958 | else |
1959 | mode = MIN_MODE_VECTOR_INT; |
1960 | |
1961 | /* Get the mode which has this inner mode and number of units. */ |
1962 | FOR_EACH_MODE_FROM (mode, mode) |
1963 | if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type) |
1964 | && GET_MODE_INNER (mode) == innermode) |
1965 | { |
1966 | if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512) |
1967 | && !TARGET_IAMCU) |
1968 | { |
1969 | static bool warnedavx512f; |
1970 | static bool warnedavx512f_ret; |
1971 | |
1972 | if (cum && cum->warn_avx512f && !warnedavx512f) |
1973 | { |
1974 | if (warning (OPT_Wpsabi, "AVX512F vector argument " |
1975 | "without AVX512F enabled changes the ABI" )) |
1976 | warnedavx512f = true; |
1977 | } |
1978 | else if (in_return && !warnedavx512f_ret) |
1979 | { |
1980 | if (warning (OPT_Wpsabi, "AVX512F vector return " |
1981 | "without AVX512F enabled changes the ABI" )) |
1982 | warnedavx512f_ret = true; |
1983 | } |
1984 | |
1985 | return TYPE_MODE (type); |
1986 | } |
1987 | else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) |
1988 | { |
1989 | static bool warnedavx; |
1990 | static bool warnedavx_ret; |
1991 | |
1992 | if (cum && cum->warn_avx && !warnedavx) |
1993 | { |
1994 | if (warning (OPT_Wpsabi, "AVX vector argument " |
1995 | "without AVX enabled changes the ABI" )) |
1996 | warnedavx = true; |
1997 | } |
1998 | else if (in_return && !warnedavx_ret) |
1999 | { |
2000 | if (warning (OPT_Wpsabi, "AVX vector return " |
2001 | "without AVX enabled changes the ABI" )) |
2002 | warnedavx_ret = true; |
2003 | } |
2004 | |
2005 | return TYPE_MODE (type); |
2006 | } |
2007 | else if (((size == 8 && TARGET_64BIT) || size == 16) |
2008 | && !TARGET_SSE |
2009 | && !TARGET_IAMCU) |
2010 | { |
2011 | static bool warnedsse; |
2012 | static bool warnedsse_ret; |
2013 | |
2014 | if (cum && cum->warn_sse && !warnedsse) |
2015 | { |
2016 | if (warning (OPT_Wpsabi, "SSE vector argument " |
2017 | "without SSE enabled changes the ABI" )) |
2018 | warnedsse = true; |
2019 | } |
2020 | else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
2021 | { |
2022 | if (warning (OPT_Wpsabi, "SSE vector return " |
2023 | "without SSE enabled changes the ABI" )) |
2024 | warnedsse_ret = true; |
2025 | } |
2026 | } |
2027 | else if ((size == 8 && !TARGET_64BIT) |
2028 | && (!cfun |
2029 | || cfun->machine->func_type == TYPE_NORMAL) |
2030 | && !TARGET_MMX |
2031 | && !TARGET_IAMCU) |
2032 | { |
2033 | static bool warnedmmx; |
2034 | static bool warnedmmx_ret; |
2035 | |
2036 | if (cum && cum->warn_mmx && !warnedmmx) |
2037 | { |
2038 | if (warning (OPT_Wpsabi, "MMX vector argument " |
2039 | "without MMX enabled changes the ABI" )) |
2040 | warnedmmx = true; |
2041 | } |
2042 | else if (in_return && !warnedmmx_ret) |
2043 | { |
2044 | if (warning (OPT_Wpsabi, "MMX vector return " |
2045 | "without MMX enabled changes the ABI" )) |
2046 | warnedmmx_ret = true; |
2047 | } |
2048 | } |
2049 | return mode; |
2050 | } |
2051 | |
2052 | gcc_unreachable (); |
2053 | } |
2054 | } |
2055 | |
2056 | return mode; |
2057 | } |
2058 | |
2059 | /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
2060 | this may not agree with the mode that the type system has chosen for the |
2061 | register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
2062 | go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
2063 | |
2064 | static rtx |
2065 | gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, |
2066 | unsigned int regno) |
2067 | { |
2068 | rtx tmp; |
2069 | |
2070 | if (orig_mode != BLKmode) |
2071 | tmp = gen_rtx_REG (orig_mode, regno); |
2072 | else |
2073 | { |
2074 | tmp = gen_rtx_REG (mode, regno); |
2075 | tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
2076 | tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
2077 | } |
2078 | |
2079 | return tmp; |
2080 | } |
2081 | |
2082 | /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
2083 | of this code is to classify each 8bytes of incoming argument by the register |
2084 | class and assign registers accordingly. */ |
2085 | |
2086 | /* Return the union class of CLASS1 and CLASS2. |
2087 | See the x86-64 PS ABI for details. */ |
2088 | |
2089 | static enum x86_64_reg_class |
2090 | merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
2091 | { |
2092 | /* Rule #1: If both classes are equal, this is the resulting class. */ |
2093 | if (class1 == class2) |
2094 | return class1; |
2095 | |
2096 | /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
2097 | the other class. */ |
2098 | if (class1 == X86_64_NO_CLASS) |
2099 | return class2; |
2100 | if (class2 == X86_64_NO_CLASS) |
2101 | return class1; |
2102 | |
2103 | /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
2104 | if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
2105 | return X86_64_MEMORY_CLASS; |
2106 | |
2107 | /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
2108 | if ((class1 == X86_64_INTEGERSI_CLASS |
2109 | && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) |
2110 | || (class2 == X86_64_INTEGERSI_CLASS |
2111 | && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) |
2112 | return X86_64_INTEGERSI_CLASS; |
2113 | if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
2114 | || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
2115 | return X86_64_INTEGER_CLASS; |
2116 | |
2117 | /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
2118 | MEMORY is used. */ |
2119 | if (class1 == X86_64_X87_CLASS |
2120 | || class1 == X86_64_X87UP_CLASS |
2121 | || class1 == X86_64_COMPLEX_X87_CLASS |
2122 | || class2 == X86_64_X87_CLASS |
2123 | || class2 == X86_64_X87UP_CLASS |
2124 | || class2 == X86_64_COMPLEX_X87_CLASS) |
2125 | return X86_64_MEMORY_CLASS; |
2126 | |
2127 | /* Rule #6: Otherwise class SSE is used. */ |
2128 | return X86_64_SSE_CLASS; |
2129 | } |
2130 | |
2131 | /* Classify the argument of type TYPE and mode MODE. |
2132 | CLASSES will be filled by the register class used to pass each word |
2133 | of the operand. The number of words is returned. In case the parameter |
2134 | should be passed in memory, 0 is returned. As a special case for zero |
2135 | sized containers, classes[0] will be NO_CLASS and 1 is returned. |
2136 | |
2137 | BIT_OFFSET is used internally for handling records and specifies offset |
2138 | of the offset in bits modulo 512 to avoid overflow cases. |
2139 | |
2140 | See the x86-64 PS ABI for details. |
2141 | */ |
2142 | |
2143 | static int |
2144 | classify_argument (machine_mode mode, const_tree type, |
2145 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset, |
2146 | int &zero_width_bitfields) |
2147 | { |
2148 | HOST_WIDE_INT bytes |
2149 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2150 | int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); |
2151 | |
2152 | /* Variable sized entities are always passed/returned in memory. */ |
2153 | if (bytes < 0) |
2154 | return 0; |
2155 | |
2156 | if (mode != VOIDmode) |
2157 | { |
2158 | /* The value of "named" doesn't matter. */ |
2159 | function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); |
2160 | if (targetm.calls.must_pass_in_stack (arg)) |
2161 | return 0; |
2162 | } |
2163 | |
2164 | if (type && (AGGREGATE_TYPE_P (type) |
2165 | || (TREE_CODE (type) == BITINT_TYPE && words > 1))) |
2166 | { |
2167 | int i; |
2168 | tree field; |
2169 | enum x86_64_reg_class subclasses[MAX_CLASSES]; |
2170 | |
2171 | /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
2172 | if (bytes > 64) |
2173 | return 0; |
2174 | |
2175 | for (i = 0; i < words; i++) |
2176 | classes[i] = X86_64_NO_CLASS; |
2177 | |
2178 | /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
2179 | signalize memory class, so handle it as special case. */ |
2180 | if (!words) |
2181 | { |
2182 | classes[0] = X86_64_NO_CLASS; |
2183 | return 1; |
2184 | } |
2185 | |
2186 | /* Classify each field of record and merge classes. */ |
2187 | switch (TREE_CODE (type)) |
2188 | { |
2189 | case RECORD_TYPE: |
2190 | /* And now merge the fields of structure. */ |
2191 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2192 | { |
2193 | if (TREE_CODE (field) == FIELD_DECL) |
2194 | { |
2195 | int num; |
2196 | |
2197 | if (TREE_TYPE (field) == error_mark_node) |
2198 | continue; |
2199 | |
2200 | /* Bitfields are always classified as integer. Handle them |
2201 | early, since later code would consider them to be |
2202 | misaligned integers. */ |
2203 | if (DECL_BIT_FIELD (field)) |
2204 | { |
2205 | if (integer_zerop (DECL_SIZE (field))) |
2206 | { |
2207 | if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field)) |
2208 | continue; |
2209 | if (zero_width_bitfields != 2) |
2210 | { |
2211 | zero_width_bitfields = 1; |
2212 | continue; |
2213 | } |
2214 | } |
2215 | for (i = (int_bit_position (field) |
2216 | + (bit_offset % 64)) / 8 / 8; |
2217 | i < ((int_bit_position (field) + (bit_offset % 64)) |
2218 | + tree_to_shwi (DECL_SIZE (field)) |
2219 | + 63) / 8 / 8; i++) |
2220 | classes[i] |
2221 | = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]); |
2222 | } |
2223 | else |
2224 | { |
2225 | int pos; |
2226 | |
2227 | type = TREE_TYPE (field); |
2228 | |
2229 | /* Flexible array member is ignored. */ |
2230 | if (TYPE_MODE (type) == BLKmode |
2231 | && TREE_CODE (type) == ARRAY_TYPE |
2232 | && TYPE_SIZE (type) == NULL_TREE |
2233 | && TYPE_DOMAIN (type) != NULL_TREE |
2234 | && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
2235 | == NULL_TREE)) |
2236 | { |
2237 | static bool warned; |
2238 | |
2239 | if (!warned && warn_psabi) |
2240 | { |
2241 | warned = true; |
2242 | inform (input_location, |
2243 | "the ABI of passing struct with" |
2244 | " a flexible array member has" |
2245 | " changed in GCC 4.4" ); |
2246 | } |
2247 | continue; |
2248 | } |
2249 | num = classify_argument (TYPE_MODE (type), type, |
2250 | classes: subclasses, |
2251 | bit_offset: (int_bit_position (field) |
2252 | + bit_offset) % 512, |
2253 | zero_width_bitfields); |
2254 | if (!num) |
2255 | return 0; |
2256 | pos = (int_bit_position (field) |
2257 | + (bit_offset % 64)) / 8 / 8; |
2258 | for (i = 0; i < num && (i + pos) < words; i++) |
2259 | classes[i + pos] |
2260 | = merge_classes (class1: subclasses[i], class2: classes[i + pos]); |
2261 | } |
2262 | } |
2263 | } |
2264 | break; |
2265 | |
2266 | case ARRAY_TYPE: |
2267 | /* Arrays are handled as small records. */ |
2268 | { |
2269 | int num; |
2270 | num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
2271 | TREE_TYPE (type), classes: subclasses, bit_offset, |
2272 | zero_width_bitfields); |
2273 | if (!num) |
2274 | return 0; |
2275 | |
2276 | /* The partial classes are now full classes. */ |
2277 | if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
2278 | subclasses[0] = X86_64_SSE_CLASS; |
2279 | if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) |
2280 | subclasses[0] = X86_64_SSE_CLASS; |
2281 | if (subclasses[0] == X86_64_INTEGERSI_CLASS |
2282 | && !((bit_offset % 64) == 0 && bytes == 4)) |
2283 | subclasses[0] = X86_64_INTEGER_CLASS; |
2284 | |
2285 | for (i = 0; i < words; i++) |
2286 | classes[i] = subclasses[i % num]; |
2287 | |
2288 | break; |
2289 | } |
2290 | case UNION_TYPE: |
2291 | case QUAL_UNION_TYPE: |
2292 | /* Unions are similar to RECORD_TYPE but offset is always 0. |
2293 | */ |
2294 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2295 | { |
2296 | if (TREE_CODE (field) == FIELD_DECL) |
2297 | { |
2298 | int num; |
2299 | |
2300 | if (TREE_TYPE (field) == error_mark_node) |
2301 | continue; |
2302 | |
2303 | num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
2304 | TREE_TYPE (field), classes: subclasses, |
2305 | bit_offset, zero_width_bitfields); |
2306 | if (!num) |
2307 | return 0; |
2308 | for (i = 0; i < num && i < words; i++) |
2309 | classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]); |
2310 | } |
2311 | } |
2312 | break; |
2313 | |
2314 | case BITINT_TYPE: |
2315 | /* _BitInt(N) for N > 64 is passed as structure containing |
2316 | (N + 63) / 64 64-bit elements. */ |
2317 | if (words > 2) |
2318 | return 0; |
2319 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2320 | return 2; |
2321 | |
2322 | default: |
2323 | gcc_unreachable (); |
2324 | } |
2325 | |
2326 | if (words > 2) |
2327 | { |
2328 | /* When size > 16 bytes, if the first one isn't |
2329 | X86_64_SSE_CLASS or any other ones aren't |
2330 | X86_64_SSEUP_CLASS, everything should be passed in |
2331 | memory. */ |
2332 | if (classes[0] != X86_64_SSE_CLASS) |
2333 | return 0; |
2334 | |
2335 | for (i = 1; i < words; i++) |
2336 | if (classes[i] != X86_64_SSEUP_CLASS) |
2337 | return 0; |
2338 | } |
2339 | |
2340 | /* Final merger cleanup. */ |
2341 | for (i = 0; i < words; i++) |
2342 | { |
2343 | /* If one class is MEMORY, everything should be passed in |
2344 | memory. */ |
2345 | if (classes[i] == X86_64_MEMORY_CLASS) |
2346 | return 0; |
2347 | |
2348 | /* The X86_64_SSEUP_CLASS should be always preceded by |
2349 | X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
2350 | if (classes[i] == X86_64_SSEUP_CLASS |
2351 | && classes[i - 1] != X86_64_SSE_CLASS |
2352 | && classes[i - 1] != X86_64_SSEUP_CLASS) |
2353 | { |
2354 | /* The first one should never be X86_64_SSEUP_CLASS. */ |
2355 | gcc_assert (i != 0); |
2356 | classes[i] = X86_64_SSE_CLASS; |
2357 | } |
2358 | |
2359 | /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
2360 | everything should be passed in memory. */ |
2361 | if (classes[i] == X86_64_X87UP_CLASS |
2362 | && (classes[i - 1] != X86_64_X87_CLASS)) |
2363 | { |
2364 | static bool warned; |
2365 | |
2366 | /* The first one should never be X86_64_X87UP_CLASS. */ |
2367 | gcc_assert (i != 0); |
2368 | if (!warned && warn_psabi) |
2369 | { |
2370 | warned = true; |
2371 | inform (input_location, |
2372 | "the ABI of passing union with %<long double%>" |
2373 | " has changed in GCC 4.4" ); |
2374 | } |
2375 | return 0; |
2376 | } |
2377 | } |
2378 | return words; |
2379 | } |
2380 | |
2381 | /* Compute alignment needed. We align all types to natural boundaries with |
2382 | exception of XFmode that is aligned to 64bits. */ |
2383 | if (mode != VOIDmode && mode != BLKmode) |
2384 | { |
2385 | int mode_alignment = GET_MODE_BITSIZE (mode); |
2386 | |
2387 | if (mode == XFmode) |
2388 | mode_alignment = 128; |
2389 | else if (mode == XCmode) |
2390 | mode_alignment = 256; |
2391 | if (COMPLEX_MODE_P (mode)) |
2392 | mode_alignment /= 2; |
2393 | /* Misaligned fields are always returned in memory. */ |
2394 | if (bit_offset % mode_alignment) |
2395 | return 0; |
2396 | } |
2397 | |
2398 | /* for V1xx modes, just use the base mode */ |
2399 | if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
2400 | && GET_MODE_UNIT_SIZE (mode) == bytes) |
2401 | mode = GET_MODE_INNER (mode); |
2402 | |
2403 | /* Classification of atomic types. */ |
2404 | switch (mode) |
2405 | { |
2406 | case E_SDmode: |
2407 | case E_DDmode: |
2408 | classes[0] = X86_64_SSE_CLASS; |
2409 | return 1; |
2410 | case E_TDmode: |
2411 | classes[0] = X86_64_SSE_CLASS; |
2412 | classes[1] = X86_64_SSEUP_CLASS; |
2413 | return 2; |
2414 | case E_DImode: |
2415 | case E_SImode: |
2416 | case E_HImode: |
2417 | case E_QImode: |
2418 | case E_CSImode: |
2419 | case E_CHImode: |
2420 | case E_CQImode: |
2421 | { |
2422 | int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
2423 | |
2424 | /* Analyze last 128 bits only. */ |
2425 | size = (size - 1) & 0x7f; |
2426 | |
2427 | if (size < 32) |
2428 | { |
2429 | classes[0] = X86_64_INTEGERSI_CLASS; |
2430 | return 1; |
2431 | } |
2432 | else if (size < 64) |
2433 | { |
2434 | classes[0] = X86_64_INTEGER_CLASS; |
2435 | return 1; |
2436 | } |
2437 | else if (size < 64+32) |
2438 | { |
2439 | classes[0] = X86_64_INTEGER_CLASS; |
2440 | classes[1] = X86_64_INTEGERSI_CLASS; |
2441 | return 2; |
2442 | } |
2443 | else if (size < 64+64) |
2444 | { |
2445 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2446 | return 2; |
2447 | } |
2448 | else |
2449 | gcc_unreachable (); |
2450 | } |
2451 | case E_CDImode: |
2452 | case E_TImode: |
2453 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2454 | return 2; |
2455 | case E_COImode: |
2456 | case E_OImode: |
2457 | /* OImode shouldn't be used directly. */ |
2458 | gcc_unreachable (); |
2459 | case E_CTImode: |
2460 | return 0; |
2461 | case E_HFmode: |
2462 | case E_BFmode: |
2463 | if (!(bit_offset % 64)) |
2464 | classes[0] = X86_64_SSEHF_CLASS; |
2465 | else |
2466 | classes[0] = X86_64_SSE_CLASS; |
2467 | return 1; |
2468 | case E_SFmode: |
2469 | if (!(bit_offset % 64)) |
2470 | classes[0] = X86_64_SSESF_CLASS; |
2471 | else |
2472 | classes[0] = X86_64_SSE_CLASS; |
2473 | return 1; |
2474 | case E_DFmode: |
2475 | classes[0] = X86_64_SSEDF_CLASS; |
2476 | return 1; |
2477 | case E_XFmode: |
2478 | classes[0] = X86_64_X87_CLASS; |
2479 | classes[1] = X86_64_X87UP_CLASS; |
2480 | return 2; |
2481 | case E_TFmode: |
2482 | classes[0] = X86_64_SSE_CLASS; |
2483 | classes[1] = X86_64_SSEUP_CLASS; |
2484 | return 2; |
2485 | case E_HCmode: |
2486 | case E_BCmode: |
2487 | classes[0] = X86_64_SSE_CLASS; |
2488 | if (!(bit_offset % 64)) |
2489 | return 1; |
2490 | else |
2491 | { |
2492 | classes[1] = X86_64_SSEHF_CLASS; |
2493 | return 2; |
2494 | } |
2495 | case E_SCmode: |
2496 | classes[0] = X86_64_SSE_CLASS; |
2497 | if (!(bit_offset % 64)) |
2498 | return 1; |
2499 | else |
2500 | { |
2501 | static bool warned; |
2502 | |
2503 | if (!warned && warn_psabi) |
2504 | { |
2505 | warned = true; |
2506 | inform (input_location, |
2507 | "the ABI of passing structure with %<complex float%>" |
2508 | " member has changed in GCC 4.4" ); |
2509 | } |
2510 | classes[1] = X86_64_SSESF_CLASS; |
2511 | return 2; |
2512 | } |
2513 | case E_DCmode: |
2514 | classes[0] = X86_64_SSEDF_CLASS; |
2515 | classes[1] = X86_64_SSEDF_CLASS; |
2516 | return 2; |
2517 | case E_XCmode: |
2518 | classes[0] = X86_64_COMPLEX_X87_CLASS; |
2519 | return 1; |
2520 | case E_TCmode: |
2521 | /* This modes is larger than 16 bytes. */ |
2522 | return 0; |
2523 | case E_V8SFmode: |
2524 | case E_V8SImode: |
2525 | case E_V32QImode: |
2526 | case E_V16HFmode: |
2527 | case E_V16BFmode: |
2528 | case E_V16HImode: |
2529 | case E_V4DFmode: |
2530 | case E_V4DImode: |
2531 | classes[0] = X86_64_SSE_CLASS; |
2532 | classes[1] = X86_64_SSEUP_CLASS; |
2533 | classes[2] = X86_64_SSEUP_CLASS; |
2534 | classes[3] = X86_64_SSEUP_CLASS; |
2535 | return 4; |
2536 | case E_V8DFmode: |
2537 | case E_V16SFmode: |
2538 | case E_V32HFmode: |
2539 | case E_V32BFmode: |
2540 | case E_V8DImode: |
2541 | case E_V16SImode: |
2542 | case E_V32HImode: |
2543 | case E_V64QImode: |
2544 | classes[0] = X86_64_SSE_CLASS; |
2545 | classes[1] = X86_64_SSEUP_CLASS; |
2546 | classes[2] = X86_64_SSEUP_CLASS; |
2547 | classes[3] = X86_64_SSEUP_CLASS; |
2548 | classes[4] = X86_64_SSEUP_CLASS; |
2549 | classes[5] = X86_64_SSEUP_CLASS; |
2550 | classes[6] = X86_64_SSEUP_CLASS; |
2551 | classes[7] = X86_64_SSEUP_CLASS; |
2552 | return 8; |
2553 | case E_V4SFmode: |
2554 | case E_V4SImode: |
2555 | case E_V16QImode: |
2556 | case E_V8HImode: |
2557 | case E_V8HFmode: |
2558 | case E_V8BFmode: |
2559 | case E_V2DFmode: |
2560 | case E_V2DImode: |
2561 | classes[0] = X86_64_SSE_CLASS; |
2562 | classes[1] = X86_64_SSEUP_CLASS; |
2563 | return 2; |
2564 | case E_V1TImode: |
2565 | case E_V1DImode: |
2566 | case E_V2SFmode: |
2567 | case E_V2SImode: |
2568 | case E_V4HImode: |
2569 | case E_V4HFmode: |
2570 | case E_V4BFmode: |
2571 | case E_V2HFmode: |
2572 | case E_V2BFmode: |
2573 | case E_V8QImode: |
2574 | classes[0] = X86_64_SSE_CLASS; |
2575 | return 1; |
2576 | case E_BLKmode: |
2577 | case E_VOIDmode: |
2578 | return 0; |
2579 | default: |
2580 | gcc_assert (VECTOR_MODE_P (mode)); |
2581 | |
2582 | if (bytes > 16) |
2583 | return 0; |
2584 | |
2585 | gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
2586 | |
2587 | if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
2588 | classes[0] = X86_64_INTEGERSI_CLASS; |
2589 | else |
2590 | classes[0] = X86_64_INTEGER_CLASS; |
2591 | classes[1] = X86_64_INTEGER_CLASS; |
2592 | return 1 + (bytes > 8); |
2593 | } |
2594 | } |
2595 | |
2596 | /* Wrapper around classify_argument with the extra zero_width_bitfields |
2597 | argument, to diagnose GCC 12.1 ABI differences for C. */ |
2598 | |
2599 | static int |
2600 | classify_argument (machine_mode mode, const_tree type, |
2601 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
2602 | { |
2603 | int zero_width_bitfields = 0; |
2604 | static bool warned = false; |
2605 | int n = classify_argument (mode, type, classes, bit_offset, |
2606 | zero_width_bitfields); |
2607 | if (!zero_width_bitfields || warned || !warn_psabi) |
2608 | return n; |
2609 | enum x86_64_reg_class alt_classes[MAX_CLASSES]; |
2610 | zero_width_bitfields = 2; |
2611 | if (classify_argument (mode, type, classes: alt_classes, bit_offset, |
2612 | zero_width_bitfields) != n) |
2613 | zero_width_bitfields = 3; |
2614 | else |
2615 | for (int i = 0; i < n; i++) |
2616 | if (classes[i] != alt_classes[i]) |
2617 | { |
2618 | zero_width_bitfields = 3; |
2619 | break; |
2620 | } |
2621 | if (zero_width_bitfields == 3) |
2622 | { |
2623 | warned = true; |
2624 | const char *url |
2625 | = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields" ; |
2626 | |
2627 | inform (input_location, |
2628 | "the ABI of passing C structures with zero-width bit-fields" |
2629 | " has changed in GCC %{12.1%}" , url); |
2630 | } |
2631 | return n; |
2632 | } |
2633 | |
2634 | /* Examine the argument and return set number of register required in each |
2635 | class. Return true iff parameter should be passed in memory. */ |
2636 | |
2637 | static bool |
2638 | examine_argument (machine_mode mode, const_tree type, int in_return, |
2639 | int *int_nregs, int *sse_nregs) |
2640 | { |
2641 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2642 | int n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2643 | |
2644 | *int_nregs = 0; |
2645 | *sse_nregs = 0; |
2646 | |
2647 | if (!n) |
2648 | return true; |
2649 | for (n--; n >= 0; n--) |
2650 | switch (regclass[n]) |
2651 | { |
2652 | case X86_64_INTEGER_CLASS: |
2653 | case X86_64_INTEGERSI_CLASS: |
2654 | (*int_nregs)++; |
2655 | break; |
2656 | case X86_64_SSE_CLASS: |
2657 | case X86_64_SSEHF_CLASS: |
2658 | case X86_64_SSESF_CLASS: |
2659 | case X86_64_SSEDF_CLASS: |
2660 | (*sse_nregs)++; |
2661 | break; |
2662 | case X86_64_NO_CLASS: |
2663 | case X86_64_SSEUP_CLASS: |
2664 | break; |
2665 | case X86_64_X87_CLASS: |
2666 | case X86_64_X87UP_CLASS: |
2667 | case X86_64_COMPLEX_X87_CLASS: |
2668 | if (!in_return) |
2669 | return true; |
2670 | break; |
2671 | case X86_64_MEMORY_CLASS: |
2672 | gcc_unreachable (); |
2673 | } |
2674 | |
2675 | return false; |
2676 | } |
2677 | |
2678 | /* Construct container for the argument used by GCC interface. See |
2679 | FUNCTION_ARG for the detailed description. */ |
2680 | |
2681 | static rtx |
2682 | construct_container (machine_mode mode, machine_mode orig_mode, |
2683 | const_tree type, int in_return, int nintregs, int nsseregs, |
2684 | const int *intreg, int sse_regno) |
2685 | { |
2686 | /* The following variables hold the static issued_error state. */ |
2687 | static bool issued_sse_arg_error; |
2688 | static bool issued_sse_ret_error; |
2689 | static bool issued_x87_ret_error; |
2690 | |
2691 | machine_mode tmpmode; |
2692 | int bytes |
2693 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2694 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2695 | int n; |
2696 | int i; |
2697 | int nexps = 0; |
2698 | int needed_sseregs, needed_intregs; |
2699 | rtx exp[MAX_CLASSES]; |
2700 | rtx ret; |
2701 | |
2702 | n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2703 | if (!n) |
2704 | return NULL; |
2705 | if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs, |
2706 | sse_nregs: &needed_sseregs)) |
2707 | return NULL; |
2708 | if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
2709 | return NULL; |
2710 | |
2711 | /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
2712 | some less clueful developer tries to use floating-point anyway. */ |
2713 | if (needed_sseregs |
2714 | && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2))) |
2715 | { |
2716 | /* Return early if we shouldn't raise an error for invalid |
2717 | calls. */ |
2718 | if (cfun != NULL && cfun->machine->silent_p) |
2719 | return NULL; |
2720 | if (in_return) |
2721 | { |
2722 | if (!issued_sse_ret_error) |
2723 | { |
2724 | if (VALID_SSE2_TYPE_MODE (mode)) |
2725 | error ("SSE register return with SSE2 disabled" ); |
2726 | else |
2727 | error ("SSE register return with SSE disabled" ); |
2728 | issued_sse_ret_error = true; |
2729 | } |
2730 | } |
2731 | else if (!issued_sse_arg_error) |
2732 | { |
2733 | if (VALID_SSE2_TYPE_MODE (mode)) |
2734 | error ("SSE register argument with SSE2 disabled" ); |
2735 | else |
2736 | error ("SSE register argument with SSE disabled" ); |
2737 | issued_sse_arg_error = true; |
2738 | } |
2739 | return NULL; |
2740 | } |
2741 | |
2742 | /* Likewise, error if the ABI requires us to return values in the |
2743 | x87 registers and the user specified -mno-80387. */ |
2744 | if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
2745 | for (i = 0; i < n; i++) |
2746 | if (regclass[i] == X86_64_X87_CLASS |
2747 | || regclass[i] == X86_64_X87UP_CLASS |
2748 | || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
2749 | { |
2750 | /* Return early if we shouldn't raise an error for invalid |
2751 | calls. */ |
2752 | if (cfun != NULL && cfun->machine->silent_p) |
2753 | return NULL; |
2754 | if (!issued_x87_ret_error) |
2755 | { |
2756 | error ("x87 register return with x87 disabled" ); |
2757 | issued_x87_ret_error = true; |
2758 | } |
2759 | return NULL; |
2760 | } |
2761 | |
2762 | /* First construct simple cases. Avoid SCmode, since we want to use |
2763 | single register to pass this type. */ |
2764 | if (n == 1 && mode != SCmode && mode != HCmode) |
2765 | switch (regclass[0]) |
2766 | { |
2767 | case X86_64_INTEGER_CLASS: |
2768 | case X86_64_INTEGERSI_CLASS: |
2769 | return gen_rtx_REG (mode, intreg[0]); |
2770 | case X86_64_SSE_CLASS: |
2771 | case X86_64_SSEHF_CLASS: |
2772 | case X86_64_SSESF_CLASS: |
2773 | case X86_64_SSEDF_CLASS: |
2774 | if (mode != BLKmode) |
2775 | return gen_reg_or_parallel (mode, orig_mode, |
2776 | GET_SSE_REGNO (sse_regno)); |
2777 | break; |
2778 | case X86_64_X87_CLASS: |
2779 | case X86_64_COMPLEX_X87_CLASS: |
2780 | return gen_rtx_REG (mode, FIRST_STACK_REG); |
2781 | case X86_64_NO_CLASS: |
2782 | /* Zero sized array, struct or class. */ |
2783 | return NULL; |
2784 | default: |
2785 | gcc_unreachable (); |
2786 | } |
2787 | if (n == 2 |
2788 | && regclass[0] == X86_64_SSE_CLASS |
2789 | && regclass[1] == X86_64_SSEUP_CLASS |
2790 | && mode != BLKmode) |
2791 | return gen_reg_or_parallel (mode, orig_mode, |
2792 | GET_SSE_REGNO (sse_regno)); |
2793 | if (n == 4 |
2794 | && regclass[0] == X86_64_SSE_CLASS |
2795 | && regclass[1] == X86_64_SSEUP_CLASS |
2796 | && regclass[2] == X86_64_SSEUP_CLASS |
2797 | && regclass[3] == X86_64_SSEUP_CLASS |
2798 | && mode != BLKmode) |
2799 | return gen_reg_or_parallel (mode, orig_mode, |
2800 | GET_SSE_REGNO (sse_regno)); |
2801 | if (n == 8 |
2802 | && regclass[0] == X86_64_SSE_CLASS |
2803 | && regclass[1] == X86_64_SSEUP_CLASS |
2804 | && regclass[2] == X86_64_SSEUP_CLASS |
2805 | && regclass[3] == X86_64_SSEUP_CLASS |
2806 | && regclass[4] == X86_64_SSEUP_CLASS |
2807 | && regclass[5] == X86_64_SSEUP_CLASS |
2808 | && regclass[6] == X86_64_SSEUP_CLASS |
2809 | && regclass[7] == X86_64_SSEUP_CLASS |
2810 | && mode != BLKmode) |
2811 | return gen_reg_or_parallel (mode, orig_mode, |
2812 | GET_SSE_REGNO (sse_regno)); |
2813 | if (n == 2 |
2814 | && regclass[0] == X86_64_X87_CLASS |
2815 | && regclass[1] == X86_64_X87UP_CLASS) |
2816 | return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
2817 | |
2818 | if (n == 2 |
2819 | && regclass[0] == X86_64_INTEGER_CLASS |
2820 | && regclass[1] == X86_64_INTEGER_CLASS |
2821 | && (mode == CDImode || mode == TImode || mode == BLKmode) |
2822 | && intreg[0] + 1 == intreg[1]) |
2823 | { |
2824 | if (mode == BLKmode) |
2825 | { |
2826 | /* Use TImode for BLKmode values in 2 integer registers. */ |
2827 | exp[0] = gen_rtx_EXPR_LIST (VOIDmode, |
2828 | gen_rtx_REG (TImode, intreg[0]), |
2829 | GEN_INT (0)); |
2830 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); |
2831 | XVECEXP (ret, 0, 0) = exp[0]; |
2832 | return ret; |
2833 | } |
2834 | else |
2835 | return gen_rtx_REG (mode, intreg[0]); |
2836 | } |
2837 | |
2838 | /* Otherwise figure out the entries of the PARALLEL. */ |
2839 | for (i = 0; i < n; i++) |
2840 | { |
2841 | int pos; |
2842 | |
2843 | switch (regclass[i]) |
2844 | { |
2845 | case X86_64_NO_CLASS: |
2846 | break; |
2847 | case X86_64_INTEGER_CLASS: |
2848 | case X86_64_INTEGERSI_CLASS: |
2849 | /* Merge TImodes on aligned occasions here too. */ |
2850 | if (i * 8 + 8 > bytes) |
2851 | { |
2852 | unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; |
2853 | if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode)) |
2854 | /* We've requested 24 bytes we |
2855 | don't have mode for. Use DImode. */ |
2856 | tmpmode = DImode; |
2857 | } |
2858 | else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
2859 | tmpmode = SImode; |
2860 | else |
2861 | tmpmode = DImode; |
2862 | exp [nexps++] |
2863 | = gen_rtx_EXPR_LIST (VOIDmode, |
2864 | gen_rtx_REG (tmpmode, *intreg), |
2865 | GEN_INT (i*8)); |
2866 | intreg++; |
2867 | break; |
2868 | case X86_64_SSEHF_CLASS: |
2869 | tmpmode = (mode == BFmode ? BFmode : HFmode); |
2870 | exp [nexps++] |
2871 | = gen_rtx_EXPR_LIST (VOIDmode, |
2872 | gen_rtx_REG (tmpmode, |
2873 | GET_SSE_REGNO (sse_regno)), |
2874 | GEN_INT (i*8)); |
2875 | sse_regno++; |
2876 | break; |
2877 | case X86_64_SSESF_CLASS: |
2878 | exp [nexps++] |
2879 | = gen_rtx_EXPR_LIST (VOIDmode, |
2880 | gen_rtx_REG (SFmode, |
2881 | GET_SSE_REGNO (sse_regno)), |
2882 | GEN_INT (i*8)); |
2883 | sse_regno++; |
2884 | break; |
2885 | case X86_64_SSEDF_CLASS: |
2886 | exp [nexps++] |
2887 | = gen_rtx_EXPR_LIST (VOIDmode, |
2888 | gen_rtx_REG (DFmode, |
2889 | GET_SSE_REGNO (sse_regno)), |
2890 | GEN_INT (i*8)); |
2891 | sse_regno++; |
2892 | break; |
2893 | case X86_64_SSE_CLASS: |
2894 | pos = i; |
2895 | switch (n) |
2896 | { |
2897 | case 1: |
2898 | tmpmode = DImode; |
2899 | break; |
2900 | case 2: |
2901 | if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
2902 | { |
2903 | tmpmode = TImode; |
2904 | i++; |
2905 | } |
2906 | else |
2907 | tmpmode = DImode; |
2908 | break; |
2909 | case 4: |
2910 | gcc_assert (i == 0 |
2911 | && regclass[1] == X86_64_SSEUP_CLASS |
2912 | && regclass[2] == X86_64_SSEUP_CLASS |
2913 | && regclass[3] == X86_64_SSEUP_CLASS); |
2914 | tmpmode = OImode; |
2915 | i += 3; |
2916 | break; |
2917 | case 8: |
2918 | gcc_assert (i == 0 |
2919 | && regclass[1] == X86_64_SSEUP_CLASS |
2920 | && regclass[2] == X86_64_SSEUP_CLASS |
2921 | && regclass[3] == X86_64_SSEUP_CLASS |
2922 | && regclass[4] == X86_64_SSEUP_CLASS |
2923 | && regclass[5] == X86_64_SSEUP_CLASS |
2924 | && regclass[6] == X86_64_SSEUP_CLASS |
2925 | && regclass[7] == X86_64_SSEUP_CLASS); |
2926 | tmpmode = XImode; |
2927 | i += 7; |
2928 | break; |
2929 | default: |
2930 | gcc_unreachable (); |
2931 | } |
2932 | exp [nexps++] |
2933 | = gen_rtx_EXPR_LIST (VOIDmode, |
2934 | gen_rtx_REG (tmpmode, |
2935 | GET_SSE_REGNO (sse_regno)), |
2936 | GEN_INT (pos*8)); |
2937 | sse_regno++; |
2938 | break; |
2939 | default: |
2940 | gcc_unreachable (); |
2941 | } |
2942 | } |
2943 | |
2944 | /* Empty aligned struct, union or class. */ |
2945 | if (nexps == 0) |
2946 | return NULL; |
2947 | |
2948 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
2949 | for (i = 0; i < nexps; i++) |
2950 | XVECEXP (ret, 0, i) = exp [i]; |
2951 | return ret; |
2952 | } |
2953 | |
2954 | /* Update the data in CUM to advance over an argument of mode MODE |
2955 | and data type TYPE. (TYPE is null for libcalls where that information |
2956 | may not be available.) |
2957 | |
2958 | Return a number of integer regsiters advanced over. */ |
2959 | |
2960 | static int |
2961 | function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
2962 | const_tree type, HOST_WIDE_INT bytes, |
2963 | HOST_WIDE_INT words) |
2964 | { |
2965 | int res = 0; |
2966 | bool error_p = false; |
2967 | |
2968 | if (TARGET_IAMCU) |
2969 | { |
2970 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
2971 | bytes in registers. */ |
2972 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
2973 | goto pass_in_reg; |
2974 | return res; |
2975 | } |
2976 | |
2977 | switch (mode) |
2978 | { |
2979 | default: |
2980 | break; |
2981 | |
2982 | case E_BLKmode: |
2983 | if (bytes < 0) |
2984 | break; |
2985 | /* FALLTHRU */ |
2986 | |
2987 | case E_DImode: |
2988 | case E_SImode: |
2989 | case E_HImode: |
2990 | case E_QImode: |
2991 | pass_in_reg: |
2992 | cum->words += words; |
2993 | cum->nregs -= words; |
2994 | cum->regno += words; |
2995 | if (cum->nregs >= 0) |
2996 | res = words; |
2997 | if (cum->nregs <= 0) |
2998 | { |
2999 | cum->nregs = 0; |
3000 | cfun->machine->arg_reg_available = false; |
3001 | cum->regno = 0; |
3002 | } |
3003 | break; |
3004 | |
3005 | case E_OImode: |
3006 | /* OImode shouldn't be used directly. */ |
3007 | gcc_unreachable (); |
3008 | |
3009 | case E_DFmode: |
3010 | if (cum->float_in_sse == -1) |
3011 | error_p = true; |
3012 | if (cum->float_in_sse < 2) |
3013 | break; |
3014 | /* FALLTHRU */ |
3015 | case E_SFmode: |
3016 | if (cum->float_in_sse == -1) |
3017 | error_p = true; |
3018 | if (cum->float_in_sse < 1) |
3019 | break; |
3020 | /* FALLTHRU */ |
3021 | |
3022 | case E_V16HFmode: |
3023 | case E_V16BFmode: |
3024 | case E_V8SFmode: |
3025 | case E_V8SImode: |
3026 | case E_V64QImode: |
3027 | case E_V32HImode: |
3028 | case E_V16SImode: |
3029 | case E_V8DImode: |
3030 | case E_V32HFmode: |
3031 | case E_V32BFmode: |
3032 | case E_V16SFmode: |
3033 | case E_V8DFmode: |
3034 | case E_V32QImode: |
3035 | case E_V16HImode: |
3036 | case E_V4DFmode: |
3037 | case E_V4DImode: |
3038 | case E_TImode: |
3039 | case E_V16QImode: |
3040 | case E_V8HImode: |
3041 | case E_V4SImode: |
3042 | case E_V2DImode: |
3043 | case E_V8HFmode: |
3044 | case E_V8BFmode: |
3045 | case E_V4SFmode: |
3046 | case E_V2DFmode: |
3047 | if (!type || !AGGREGATE_TYPE_P (type)) |
3048 | { |
3049 | cum->sse_words += words; |
3050 | cum->sse_nregs -= 1; |
3051 | cum->sse_regno += 1; |
3052 | if (cum->sse_nregs <= 0) |
3053 | { |
3054 | cum->sse_nregs = 0; |
3055 | cum->sse_regno = 0; |
3056 | } |
3057 | } |
3058 | break; |
3059 | |
3060 | case E_V8QImode: |
3061 | case E_V4HImode: |
3062 | case E_V4HFmode: |
3063 | case E_V4BFmode: |
3064 | case E_V2SImode: |
3065 | case E_V2SFmode: |
3066 | case E_V1TImode: |
3067 | case E_V1DImode: |
3068 | if (!type || !AGGREGATE_TYPE_P (type)) |
3069 | { |
3070 | cum->mmx_words += words; |
3071 | cum->mmx_nregs -= 1; |
3072 | cum->mmx_regno += 1; |
3073 | if (cum->mmx_nregs <= 0) |
3074 | { |
3075 | cum->mmx_nregs = 0; |
3076 | cum->mmx_regno = 0; |
3077 | } |
3078 | } |
3079 | break; |
3080 | } |
3081 | if (error_p) |
3082 | { |
3083 | cum->float_in_sse = 0; |
3084 | error ("calling %qD with SSE calling convention without " |
3085 | "SSE/SSE2 enabled" , cum->decl); |
3086 | sorry ("this is a GCC bug that can be worked around by adding " |
3087 | "attribute used to function called" ); |
3088 | } |
3089 | |
3090 | return res; |
3091 | } |
3092 | |
3093 | static int |
3094 | function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3095 | const_tree type, HOST_WIDE_INT words, bool named) |
3096 | { |
3097 | int int_nregs, sse_nregs; |
3098 | |
3099 | /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
3100 | if (!named && (VALID_AVX512F_REG_MODE (mode) |
3101 | || VALID_AVX256_REG_MODE (mode))) |
3102 | return 0; |
3103 | |
3104 | if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs) |
3105 | && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
3106 | { |
3107 | cum->nregs -= int_nregs; |
3108 | cum->sse_nregs -= sse_nregs; |
3109 | cum->regno += int_nregs; |
3110 | cum->sse_regno += sse_nregs; |
3111 | return int_nregs; |
3112 | } |
3113 | else |
3114 | { |
3115 | int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
3116 | cum->words = ROUND_UP (cum->words, align); |
3117 | cum->words += words; |
3118 | return 0; |
3119 | } |
3120 | } |
3121 | |
3122 | static int |
3123 | function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
3124 | HOST_WIDE_INT words) |
3125 | { |
3126 | /* Otherwise, this should be passed indirect. */ |
3127 | gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
3128 | |
3129 | cum->words += words; |
3130 | if (cum->nregs > 0) |
3131 | { |
3132 | cum->nregs -= 1; |
3133 | cum->regno += 1; |
3134 | return 1; |
3135 | } |
3136 | return 0; |
3137 | } |
3138 | |
3139 | /* Update the data in CUM to advance over argument ARG. */ |
3140 | |
3141 | static void |
3142 | ix86_function_arg_advance (cumulative_args_t cum_v, |
3143 | const function_arg_info &arg) |
3144 | { |
3145 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3146 | machine_mode mode = arg.mode; |
3147 | HOST_WIDE_INT bytes, words; |
3148 | int nregs; |
3149 | |
3150 | /* The argument of interrupt handler is a special case and is |
3151 | handled in ix86_function_arg. */ |
3152 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3153 | return; |
3154 | |
3155 | bytes = arg.promoted_size_in_bytes (); |
3156 | words = CEIL (bytes, UNITS_PER_WORD); |
3157 | |
3158 | if (arg.type) |
3159 | mode = type_natural_mode (type: arg.type, NULL, in_return: false); |
3160 | |
3161 | if (TARGET_64BIT) |
3162 | { |
3163 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3164 | |
3165 | if (call_abi == MS_ABI) |
3166 | nregs = function_arg_advance_ms_64 (cum, bytes, words); |
3167 | else |
3168 | nregs = function_arg_advance_64 (cum, mode, type: arg.type, words, |
3169 | named: arg.named); |
3170 | } |
3171 | else |
3172 | nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words); |
3173 | |
3174 | if (!nregs) |
3175 | { |
3176 | /* Track if there are outgoing arguments on stack. */ |
3177 | if (cum->caller) |
3178 | cfun->machine->outgoing_args_on_stack = true; |
3179 | } |
3180 | } |
3181 | |
3182 | /* Define where to put the arguments to a function. |
3183 | Value is zero to push the argument on the stack, |
3184 | or a hard register in which to store the argument. |
3185 | |
3186 | MODE is the argument's machine mode. |
3187 | TYPE is the data type of the argument (as a tree). |
3188 | This is null for libcalls where that information may |
3189 | not be available. |
3190 | CUM is a variable of type CUMULATIVE_ARGS which gives info about |
3191 | the preceding args and about the function being called. |
3192 | NAMED is nonzero if this argument is a named parameter |
3193 | (otherwise it is an extra parameter matching an ellipsis). */ |
3194 | |
3195 | static rtx |
3196 | function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3197 | machine_mode orig_mode, const_tree type, |
3198 | HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
3199 | { |
3200 | bool error_p = false; |
3201 | |
3202 | /* Avoid the AL settings for the Unix64 ABI. */ |
3203 | if (mode == VOIDmode) |
3204 | return constm1_rtx; |
3205 | |
3206 | if (TARGET_IAMCU) |
3207 | { |
3208 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
3209 | bytes in registers. */ |
3210 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
3211 | goto pass_in_reg; |
3212 | return NULL_RTX; |
3213 | } |
3214 | |
3215 | switch (mode) |
3216 | { |
3217 | default: |
3218 | break; |
3219 | |
3220 | case E_BLKmode: |
3221 | if (bytes < 0) |
3222 | break; |
3223 | /* FALLTHRU */ |
3224 | case E_DImode: |
3225 | case E_SImode: |
3226 | case E_HImode: |
3227 | case E_QImode: |
3228 | pass_in_reg: |
3229 | if (words <= cum->nregs) |
3230 | { |
3231 | int regno = cum->regno; |
3232 | |
3233 | /* Fastcall allocates the first two DWORD (SImode) or |
3234 | smaller arguments to ECX and EDX if it isn't an |
3235 | aggregate type . */ |
3236 | if (cum->fastcall) |
3237 | { |
3238 | if (mode == BLKmode |
3239 | || mode == DImode |
3240 | || (type && AGGREGATE_TYPE_P (type))) |
3241 | break; |
3242 | |
3243 | /* ECX not EAX is the first allocated register. */ |
3244 | if (regno == AX_REG) |
3245 | regno = CX_REG; |
3246 | } |
3247 | return gen_rtx_REG (mode, regno); |
3248 | } |
3249 | break; |
3250 | |
3251 | case E_DFmode: |
3252 | if (cum->float_in_sse == -1) |
3253 | error_p = true; |
3254 | if (cum->float_in_sse < 2) |
3255 | break; |
3256 | /* FALLTHRU */ |
3257 | case E_SFmode: |
3258 | if (cum->float_in_sse == -1) |
3259 | error_p = true; |
3260 | if (cum->float_in_sse < 1) |
3261 | break; |
3262 | /* FALLTHRU */ |
3263 | case E_TImode: |
3264 | /* In 32bit, we pass TImode in xmm registers. */ |
3265 | case E_V16QImode: |
3266 | case E_V8HImode: |
3267 | case E_V4SImode: |
3268 | case E_V2DImode: |
3269 | case E_V8HFmode: |
3270 | case E_V8BFmode: |
3271 | case E_V4SFmode: |
3272 | case E_V2DFmode: |
3273 | if (!type || !AGGREGATE_TYPE_P (type)) |
3274 | { |
3275 | if (cum->sse_nregs) |
3276 | return gen_reg_or_parallel (mode, orig_mode, |
3277 | regno: cum->sse_regno + FIRST_SSE_REG); |
3278 | } |
3279 | break; |
3280 | |
3281 | case E_OImode: |
3282 | case E_XImode: |
3283 | /* OImode and XImode shouldn't be used directly. */ |
3284 | gcc_unreachable (); |
3285 | |
3286 | case E_V64QImode: |
3287 | case E_V32HImode: |
3288 | case E_V16SImode: |
3289 | case E_V8DImode: |
3290 | case E_V32HFmode: |
3291 | case E_V32BFmode: |
3292 | case E_V16SFmode: |
3293 | case E_V8DFmode: |
3294 | case E_V16HFmode: |
3295 | case E_V16BFmode: |
3296 | case E_V8SFmode: |
3297 | case E_V8SImode: |
3298 | case E_V32QImode: |
3299 | case E_V16HImode: |
3300 | case E_V4DFmode: |
3301 | case E_V4DImode: |
3302 | if (!type || !AGGREGATE_TYPE_P (type)) |
3303 | { |
3304 | if (cum->sse_nregs) |
3305 | return gen_reg_or_parallel (mode, orig_mode, |
3306 | regno: cum->sse_regno + FIRST_SSE_REG); |
3307 | } |
3308 | break; |
3309 | |
3310 | case E_V8QImode: |
3311 | case E_V4HImode: |
3312 | case E_V4HFmode: |
3313 | case E_V4BFmode: |
3314 | case E_V2SImode: |
3315 | case E_V2SFmode: |
3316 | case E_V1TImode: |
3317 | case E_V1DImode: |
3318 | if (!type || !AGGREGATE_TYPE_P (type)) |
3319 | { |
3320 | if (cum->mmx_nregs) |
3321 | return gen_reg_or_parallel (mode, orig_mode, |
3322 | regno: cum->mmx_regno + FIRST_MMX_REG); |
3323 | } |
3324 | break; |
3325 | } |
3326 | if (error_p) |
3327 | { |
3328 | cum->float_in_sse = 0; |
3329 | error ("calling %qD with SSE calling convention without " |
3330 | "SSE/SSE2 enabled" , cum->decl); |
3331 | sorry ("this is a GCC bug that can be worked around by adding " |
3332 | "attribute used to function called" ); |
3333 | } |
3334 | |
3335 | return NULL_RTX; |
3336 | } |
3337 | |
3338 | static rtx |
3339 | function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3340 | machine_mode orig_mode, const_tree type, bool named) |
3341 | { |
3342 | /* Handle a hidden AL argument containing number of registers |
3343 | for varargs x86-64 functions. */ |
3344 | if (mode == VOIDmode) |
3345 | return GEN_INT (cum->maybe_vaarg |
3346 | ? (cum->sse_nregs < 0 |
3347 | ? X86_64_SSE_REGPARM_MAX |
3348 | : cum->sse_regno) |
3349 | : -1); |
3350 | |
3351 | switch (mode) |
3352 | { |
3353 | default: |
3354 | break; |
3355 | |
3356 | case E_V16HFmode: |
3357 | case E_V16BFmode: |
3358 | case E_V8SFmode: |
3359 | case E_V8SImode: |
3360 | case E_V32QImode: |
3361 | case E_V16HImode: |
3362 | case E_V4DFmode: |
3363 | case E_V4DImode: |
3364 | case E_V32HFmode: |
3365 | case E_V32BFmode: |
3366 | case E_V16SFmode: |
3367 | case E_V16SImode: |
3368 | case E_V64QImode: |
3369 | case E_V32HImode: |
3370 | case E_V8DFmode: |
3371 | case E_V8DImode: |
3372 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
3373 | if (!named) |
3374 | return NULL; |
3375 | break; |
3376 | } |
3377 | |
3378 | return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs, |
3379 | nsseregs: cum->sse_nregs, |
3380 | intreg: &x86_64_int_parameter_registers [cum->regno], |
3381 | sse_regno: cum->sse_regno); |
3382 | } |
3383 | |
3384 | static rtx |
3385 | function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3386 | machine_mode orig_mode, bool named, const_tree type, |
3387 | HOST_WIDE_INT bytes) |
3388 | { |
3389 | unsigned int regno; |
3390 | |
3391 | /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. |
3392 | We use value of -2 to specify that current function call is MSABI. */ |
3393 | if (mode == VOIDmode) |
3394 | return GEN_INT (-2); |
3395 | |
3396 | /* If we've run out of registers, it goes on the stack. */ |
3397 | if (cum->nregs == 0) |
3398 | return NULL_RTX; |
3399 | |
3400 | regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; |
3401 | |
3402 | /* Only floating point modes are passed in anything but integer regs. */ |
3403 | if (TARGET_SSE && (mode == SFmode || mode == DFmode)) |
3404 | { |
3405 | if (named) |
3406 | { |
3407 | if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) |
3408 | regno = cum->regno + FIRST_SSE_REG; |
3409 | } |
3410 | else |
3411 | { |
3412 | rtx t1, t2; |
3413 | |
3414 | /* Unnamed floating parameters are passed in both the |
3415 | SSE and integer registers. */ |
3416 | t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); |
3417 | t2 = gen_rtx_REG (mode, regno); |
3418 | t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); |
3419 | t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); |
3420 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); |
3421 | } |
3422 | } |
3423 | /* Handle aggregated types passed in register. */ |
3424 | if (orig_mode == BLKmode) |
3425 | { |
3426 | if (bytes > 0 && bytes <= 8) |
3427 | mode = (bytes > 4 ? DImode : SImode); |
3428 | if (mode == BLKmode) |
3429 | mode = DImode; |
3430 | } |
3431 | |
3432 | return gen_reg_or_parallel (mode, orig_mode, regno); |
3433 | } |
3434 | |
3435 | /* Return where to put the arguments to a function. |
3436 | Return zero to push the argument on the stack, or a hard register in which to store the argument. |
3437 | |
3438 | ARG describes the argument while CUM gives information about the |
3439 | preceding args and about the function being called. */ |
3440 | |
3441 | static rtx |
3442 | ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
3443 | { |
3444 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3445 | machine_mode mode = arg.mode; |
3446 | HOST_WIDE_INT bytes, words; |
3447 | rtx reg; |
3448 | |
3449 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3450 | { |
3451 | gcc_assert (arg.type != NULL_TREE); |
3452 | if (POINTER_TYPE_P (arg.type)) |
3453 | { |
3454 | /* This is the pointer argument. */ |
3455 | gcc_assert (TYPE_MODE (arg.type) == ptr_mode); |
3456 | /* It is at -WORD(AP) in the current frame in interrupt and |
3457 | exception handlers. */ |
3458 | reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); |
3459 | } |
3460 | else |
3461 | { |
3462 | gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION |
3463 | && TREE_CODE (arg.type) == INTEGER_TYPE |
3464 | && TYPE_MODE (arg.type) == word_mode); |
3465 | /* The error code is the word-mode integer argument at |
3466 | -2 * WORD(AP) in the current frame of the exception |
3467 | handler. */ |
3468 | reg = gen_rtx_MEM (word_mode, |
3469 | plus_constant (Pmode, |
3470 | arg_pointer_rtx, |
3471 | -2 * UNITS_PER_WORD)); |
3472 | } |
3473 | return reg; |
3474 | } |
3475 | |
3476 | bytes = arg.promoted_size_in_bytes (); |
3477 | words = CEIL (bytes, UNITS_PER_WORD); |
3478 | |
3479 | /* To simplify the code below, represent vector types with a vector mode |
3480 | even if MMX/SSE are not active. */ |
3481 | if (arg.type && VECTOR_TYPE_P (arg.type)) |
3482 | mode = type_natural_mode (type: arg.type, cum, in_return: false); |
3483 | |
3484 | if (TARGET_64BIT) |
3485 | { |
3486 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3487 | |
3488 | if (call_abi == MS_ABI) |
3489 | reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named, |
3490 | type: arg.type, bytes); |
3491 | else |
3492 | reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named); |
3493 | } |
3494 | else |
3495 | reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words); |
3496 | |
3497 | /* Track if there are outgoing arguments on stack. */ |
3498 | if (reg == NULL_RTX && cum->caller) |
3499 | cfun->machine->outgoing_args_on_stack = true; |
3500 | |
3501 | return reg; |
3502 | } |
3503 | |
3504 | /* A C expression that indicates when an argument must be passed by |
3505 | reference. If nonzero for an argument, a copy of that argument is |
3506 | made in memory and a pointer to the argument is passed instead of |
3507 | the argument itself. The pointer is passed in whatever way is |
3508 | appropriate for passing a pointer to that type. */ |
3509 | |
3510 | static bool |
3511 | ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) |
3512 | { |
3513 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3514 | |
3515 | if (TARGET_64BIT) |
3516 | { |
3517 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3518 | |
3519 | /* See Windows x64 Software Convention. */ |
3520 | if (call_abi == MS_ABI) |
3521 | { |
3522 | HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); |
3523 | |
3524 | if (tree type = arg.type) |
3525 | { |
3526 | /* Arrays are passed by reference. */ |
3527 | if (TREE_CODE (type) == ARRAY_TYPE) |
3528 | return true; |
3529 | |
3530 | if (RECORD_OR_UNION_TYPE_P (type)) |
3531 | { |
3532 | /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
3533 | are passed by reference. */ |
3534 | msize = int_size_in_bytes (type); |
3535 | } |
3536 | } |
3537 | |
3538 | /* __m128 is passed by reference. */ |
3539 | return msize != 1 && msize != 2 && msize != 4 && msize != 8; |
3540 | } |
3541 | else if (arg.type && int_size_in_bytes (arg.type) == -1) |
3542 | return true; |
3543 | } |
3544 | |
3545 | return false; |
3546 | } |
3547 | |
3548 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3549 | passing ABI. XXX: This function is obsolete and is only used for |
3550 | checking psABI compatibility with previous versions of GCC. */ |
3551 | |
3552 | static bool |
3553 | ix86_compat_aligned_value_p (const_tree type) |
3554 | { |
3555 | machine_mode mode = TYPE_MODE (type); |
3556 | if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
3557 | || mode == TDmode |
3558 | || mode == TFmode |
3559 | || mode == TCmode) |
3560 | && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
3561 | return true; |
3562 | if (TYPE_ALIGN (type) < 128) |
3563 | return false; |
3564 | |
3565 | if (AGGREGATE_TYPE_P (type)) |
3566 | { |
3567 | /* Walk the aggregates recursively. */ |
3568 | switch (TREE_CODE (type)) |
3569 | { |
3570 | case RECORD_TYPE: |
3571 | case UNION_TYPE: |
3572 | case QUAL_UNION_TYPE: |
3573 | { |
3574 | tree field; |
3575 | |
3576 | /* Walk all the structure fields. */ |
3577 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
3578 | { |
3579 | if (TREE_CODE (field) == FIELD_DECL |
3580 | && ix86_compat_aligned_value_p (TREE_TYPE (field))) |
3581 | return true; |
3582 | } |
3583 | break; |
3584 | } |
3585 | |
3586 | case ARRAY_TYPE: |
3587 | /* Just for use if some languages passes arrays by value. */ |
3588 | if (ix86_compat_aligned_value_p (TREE_TYPE (type))) |
3589 | return true; |
3590 | break; |
3591 | |
3592 | default: |
3593 | gcc_unreachable (); |
3594 | } |
3595 | } |
3596 | return false; |
3597 | } |
3598 | |
3599 | /* Return the alignment boundary for MODE and TYPE with alignment ALIGN. |
3600 | XXX: This function is obsolete and is only used for checking psABI |
3601 | compatibility with previous versions of GCC. */ |
3602 | |
3603 | static unsigned int |
3604 | ix86_compat_function_arg_boundary (machine_mode mode, |
3605 | const_tree type, unsigned int align) |
3606 | { |
3607 | /* In 32bit, only _Decimal128 and __float128 are aligned to their |
3608 | natural boundaries. */ |
3609 | if (!TARGET_64BIT && mode != TDmode && mode != TFmode) |
3610 | { |
3611 | /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
3612 | make an exception for SSE modes since these require 128bit |
3613 | alignment. |
3614 | |
3615 | The handling here differs from field_alignment. ICC aligns MMX |
3616 | arguments to 4 byte boundaries, while structure fields are aligned |
3617 | to 8 byte boundaries. */ |
3618 | if (!type) |
3619 | { |
3620 | if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) |
3621 | align = PARM_BOUNDARY; |
3622 | } |
3623 | else |
3624 | { |
3625 | if (!ix86_compat_aligned_value_p (type)) |
3626 | align = PARM_BOUNDARY; |
3627 | } |
3628 | } |
3629 | if (align > BIGGEST_ALIGNMENT) |
3630 | align = BIGGEST_ALIGNMENT; |
3631 | return align; |
3632 | } |
3633 | |
3634 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3635 | passing ABI. */ |
3636 | |
3637 | static bool |
3638 | ix86_contains_aligned_value_p (const_tree type) |
3639 | { |
3640 | machine_mode mode = TYPE_MODE (type); |
3641 | |
3642 | if (mode == XFmode || mode == XCmode) |
3643 | return false; |
3644 | |
3645 | if (TYPE_ALIGN (type) < 128) |
3646 | return false; |
3647 | |
3648 | if (AGGREGATE_TYPE_P (type)) |
3649 | { |
3650 | /* Walk the aggregates recursively. */ |
3651 | switch (TREE_CODE (type)) |
3652 | { |
3653 | case RECORD_TYPE: |
3654 | case UNION_TYPE: |
3655 | case QUAL_UNION_TYPE: |
3656 | { |
3657 | tree field; |
3658 | |
3659 | /* Walk all the structure fields. */ |
3660 | for (field = TYPE_FIELDS (type); |
3661 | field; |
3662 | field = DECL_CHAIN (field)) |
3663 | { |
3664 | if (TREE_CODE (field) == FIELD_DECL |
3665 | && ix86_contains_aligned_value_p (TREE_TYPE (field))) |
3666 | return true; |
3667 | } |
3668 | break; |
3669 | } |
3670 | |
3671 | case ARRAY_TYPE: |
3672 | /* Just for use if some languages passes arrays by value. */ |
3673 | if (ix86_contains_aligned_value_p (TREE_TYPE (type))) |
3674 | return true; |
3675 | break; |
3676 | |
3677 | default: |
3678 | gcc_unreachable (); |
3679 | } |
3680 | } |
3681 | else |
3682 | return TYPE_ALIGN (type) >= 128; |
3683 | |
3684 | return false; |
3685 | } |
3686 | |
3687 | /* Gives the alignment boundary, in bits, of an argument with the |
3688 | specified mode and type. */ |
3689 | |
3690 | static unsigned int |
3691 | ix86_function_arg_boundary (machine_mode mode, const_tree type) |
3692 | { |
3693 | unsigned int align; |
3694 | if (type) |
3695 | { |
3696 | /* Since the main variant type is used for call, we convert it to |
3697 | the main variant type. */ |
3698 | type = TYPE_MAIN_VARIANT (type); |
3699 | align = TYPE_ALIGN (type); |
3700 | if (TYPE_EMPTY_P (type)) |
3701 | return PARM_BOUNDARY; |
3702 | } |
3703 | else |
3704 | align = GET_MODE_ALIGNMENT (mode); |
3705 | if (align < PARM_BOUNDARY) |
3706 | align = PARM_BOUNDARY; |
3707 | else |
3708 | { |
3709 | static bool warned; |
3710 | unsigned int saved_align = align; |
3711 | |
3712 | if (!TARGET_64BIT) |
3713 | { |
3714 | /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ |
3715 | if (!type) |
3716 | { |
3717 | if (mode == XFmode || mode == XCmode) |
3718 | align = PARM_BOUNDARY; |
3719 | } |
3720 | else if (!ix86_contains_aligned_value_p (type)) |
3721 | align = PARM_BOUNDARY; |
3722 | |
3723 | if (align < 128) |
3724 | align = PARM_BOUNDARY; |
3725 | } |
3726 | |
3727 | if (warn_psabi |
3728 | && !warned |
3729 | && align != ix86_compat_function_arg_boundary (mode, type, |
3730 | align: saved_align)) |
3731 | { |
3732 | warned = true; |
3733 | inform (input_location, |
3734 | "the ABI for passing parameters with %d-byte" |
3735 | " alignment has changed in GCC 4.6" , |
3736 | align / BITS_PER_UNIT); |
3737 | } |
3738 | } |
3739 | |
3740 | return align; |
3741 | } |
3742 | |
3743 | /* Return true if N is a possible register number of function value. */ |
3744 | |
3745 | static bool |
3746 | ix86_function_value_regno_p (const unsigned int regno) |
3747 | { |
3748 | switch (regno) |
3749 | { |
3750 | case AX_REG: |
3751 | return true; |
3752 | case DX_REG: |
3753 | return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); |
3754 | case DI_REG: |
3755 | case SI_REG: |
3756 | return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; |
3757 | |
3758 | /* Complex values are returned in %st(0)/%st(1) pair. */ |
3759 | case ST0_REG: |
3760 | case ST1_REG: |
3761 | /* TODO: The function should depend on current function ABI but |
3762 | builtins.cc would need updating then. Therefore we use the |
3763 | default ABI. */ |
3764 | if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) |
3765 | return false; |
3766 | return TARGET_FLOAT_RETURNS_IN_80387; |
3767 | |
3768 | /* Complex values are returned in %xmm0/%xmm1 pair. */ |
3769 | case XMM0_REG: |
3770 | case XMM1_REG: |
3771 | return TARGET_SSE; |
3772 | |
3773 | case MM0_REG: |
3774 | if (TARGET_MACHO || TARGET_64BIT) |
3775 | return false; |
3776 | return TARGET_MMX; |
3777 | } |
3778 | |
3779 | return false; |
3780 | } |
3781 | |
3782 | /* Check whether the register REGNO should be zeroed on X86. |
3783 | When ALL_SSE_ZEROED is true, all SSE registers have been zeroed |
3784 | together, no need to zero it again. |
3785 | When NEED_ZERO_MMX is true, MMX registers should be cleared. */ |
3786 | |
3787 | static bool |
3788 | zero_call_used_regno_p (const unsigned int regno, |
3789 | bool all_sse_zeroed, |
3790 | bool need_zero_mmx) |
3791 | { |
3792 | return GENERAL_REGNO_P (regno) |
3793 | || (!all_sse_zeroed && SSE_REGNO_P (regno)) |
3794 | || MASK_REGNO_P (regno) |
3795 | || (need_zero_mmx && MMX_REGNO_P (regno)); |
3796 | } |
3797 | |
3798 | /* Return the machine_mode that is used to zero register REGNO. */ |
3799 | |
3800 | static machine_mode |
3801 | zero_call_used_regno_mode (const unsigned int regno) |
3802 | { |
3803 | /* NB: We only need to zero the lower 32 bits for integer registers |
3804 | and the lower 128 bits for vector registers since destination are |
3805 | zero-extended to the full register width. */ |
3806 | if (GENERAL_REGNO_P (regno)) |
3807 | return SImode; |
3808 | else if (SSE_REGNO_P (regno)) |
3809 | return V4SFmode; |
3810 | else if (MASK_REGNO_P (regno)) |
3811 | return HImode; |
3812 | else if (MMX_REGNO_P (regno)) |
3813 | return V2SImode; |
3814 | else |
3815 | gcc_unreachable (); |
3816 | } |
3817 | |
3818 | /* Generate a rtx to zero all vector registers together if possible, |
3819 | otherwise, return NULL. */ |
3820 | |
3821 | static rtx |
3822 | zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs) |
3823 | { |
3824 | if (!TARGET_AVX) |
3825 | return NULL; |
3826 | |
3827 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3828 | if ((LEGACY_SSE_REGNO_P (regno) |
3829 | || (TARGET_64BIT |
3830 | && (REX_SSE_REGNO_P (regno) |
3831 | || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno))))) |
3832 | && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3833 | return NULL; |
3834 | |
3835 | return gen_avx_vzeroall (); |
3836 | } |
3837 | |
3838 | /* Generate insns to zero all st registers together. |
3839 | Return true when zeroing instructions are generated. |
3840 | Assume the number of st registers that are zeroed is num_of_st, |
3841 | we will emit the following sequence to zero them together: |
3842 | fldz; \ |
3843 | fldz; \ |
3844 | ... |
3845 | fldz; \ |
3846 | fstp %%st(0); \ |
3847 | fstp %%st(0); \ |
3848 | ... |
3849 | fstp %%st(0); |
3850 | i.e., num_of_st fldz followed by num_of_st fstp to clear the stack |
3851 | mark stack slots empty. |
3852 | |
3853 | How to compute the num_of_st: |
3854 | There is no direct mapping from stack registers to hard register |
3855 | numbers. If one stack register needs to be cleared, we don't know |
3856 | where in the stack the value remains. So, if any stack register |
3857 | needs to be cleared, the whole stack should be cleared. However, |
3858 | x87 stack registers that hold the return value should be excluded. |
3859 | x87 returns in the top (two for complex values) register, so |
3860 | num_of_st should be 7/6 when x87 returns, otherwise it will be 8. |
3861 | return the value of num_of_st. */ |
3862 | |
3863 | |
3864 | static int |
3865 | zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs) |
3866 | { |
3867 | |
3868 | /* If the FPU is disabled, no need to zero all st registers. */ |
3869 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
3870 | return 0; |
3871 | |
3872 | unsigned int num_of_st = 0; |
3873 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3874 | if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno)) |
3875 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3876 | { |
3877 | num_of_st++; |
3878 | break; |
3879 | } |
3880 | |
3881 | if (num_of_st == 0) |
3882 | return 0; |
3883 | |
3884 | bool return_with_x87 = false; |
3885 | return_with_x87 = (crtl->return_rtx |
3886 | && (STACK_REG_P (crtl->return_rtx))); |
3887 | |
3888 | bool complex_return = false; |
3889 | complex_return = (crtl->return_rtx |
3890 | && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx))); |
3891 | |
3892 | if (return_with_x87) |
3893 | if (complex_return) |
3894 | num_of_st = 6; |
3895 | else |
3896 | num_of_st = 7; |
3897 | else |
3898 | num_of_st = 8; |
3899 | |
3900 | rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG); |
3901 | for (unsigned int i = 0; i < num_of_st; i++) |
3902 | emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode))); |
3903 | |
3904 | for (unsigned int i = 0; i < num_of_st; i++) |
3905 | { |
3906 | rtx insn; |
3907 | insn = emit_insn (gen_rtx_SET (st_reg, st_reg)); |
3908 | add_reg_note (insn, REG_DEAD, st_reg); |
3909 | } |
3910 | return num_of_st; |
3911 | } |
3912 | |
3913 | |
3914 | /* When the routine exit in MMX mode, if any ST register needs |
3915 | to be zeroed, we should clear all MMX registers except the |
3916 | RET_MMX_REGNO that holds the return value. */ |
3917 | static bool |
3918 | zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs, |
3919 | unsigned int ret_mmx_regno) |
3920 | { |
3921 | bool need_zero_all_mm = false; |
3922 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3923 | if (STACK_REGNO_P (regno) |
3924 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3925 | { |
3926 | need_zero_all_mm = true; |
3927 | break; |
3928 | } |
3929 | |
3930 | if (!need_zero_all_mm) |
3931 | return false; |
3932 | |
3933 | machine_mode mode = V2SImode; |
3934 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
3935 | if (regno != ret_mmx_regno) |
3936 | { |
3937 | rtx reg = gen_rtx_REG (mode, regno); |
3938 | emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode))); |
3939 | } |
3940 | return true; |
3941 | } |
3942 | |
3943 | /* TARGET_ZERO_CALL_USED_REGS. */ |
3944 | /* Generate a sequence of instructions that zero registers specified by |
3945 | NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually |
3946 | zeroed. */ |
3947 | static HARD_REG_SET |
3948 | ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) |
3949 | { |
3950 | HARD_REG_SET zeroed_hardregs; |
3951 | bool all_sse_zeroed = false; |
3952 | int all_st_zeroed_num = 0; |
3953 | bool all_mm_zeroed = false; |
3954 | |
3955 | CLEAR_HARD_REG_SET (set&: zeroed_hardregs); |
3956 | |
3957 | /* first, let's see whether we can zero all vector registers together. */ |
3958 | rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs); |
3959 | if (zero_all_vec_insn) |
3960 | { |
3961 | emit_insn (zero_all_vec_insn); |
3962 | all_sse_zeroed = true; |
3963 | } |
3964 | |
3965 | /* mm/st registers are shared registers set, we should follow the following |
3966 | rules to clear them: |
3967 | MMX exit mode x87 exit mode |
3968 | -------------|----------------------|--------------- |
3969 | uses x87 reg | clear all MMX | clear all x87 |
3970 | uses MMX reg | clear individual MMX | clear all x87 |
3971 | x87 + MMX | clear all MMX | clear all x87 |
3972 | |
3973 | first, we should decide which mode (MMX mode or x87 mode) the function |
3974 | exit with. */ |
3975 | |
3976 | bool exit_with_mmx_mode = (crtl->return_rtx |
3977 | && (MMX_REG_P (crtl->return_rtx))); |
3978 | |
3979 | if (!exit_with_mmx_mode) |
3980 | /* x87 exit mode, we should zero all st registers together. */ |
3981 | { |
3982 | all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs); |
3983 | |
3984 | if (all_st_zeroed_num > 0) |
3985 | for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++) |
3986 | /* x87 stack registers that hold the return value should be excluded. |
3987 | x87 returns in the top (two for complex values) register. */ |
3988 | if (all_st_zeroed_num == 8 |
3989 | || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx)) |
3990 | || (all_st_zeroed_num == 6 |
3991 | && (regno == (REGNO (crtl->return_rtx) + 1))))) |
3992 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
3993 | } |
3994 | else |
3995 | /* MMX exit mode, check whether we can zero all mm registers. */ |
3996 | { |
3997 | unsigned int exit_mmx_regno = REGNO (crtl->return_rtx); |
3998 | all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs, |
3999 | ret_mmx_regno: exit_mmx_regno); |
4000 | if (all_mm_zeroed) |
4001 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
4002 | if (regno != exit_mmx_regno) |
4003 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
4004 | } |
4005 | |
4006 | /* Now, generate instructions to zero all the other registers. */ |
4007 | |
4008 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
4009 | { |
4010 | if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
4011 | continue; |
4012 | if (!zero_call_used_regno_p (regno, all_sse_zeroed, |
4013 | need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed)) |
4014 | continue; |
4015 | |
4016 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
4017 | |
4018 | machine_mode mode = zero_call_used_regno_mode (regno); |
4019 | |
4020 | rtx reg = gen_rtx_REG (mode, regno); |
4021 | rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode)); |
4022 | |
4023 | switch (mode) |
4024 | { |
4025 | case E_SImode: |
4026 | if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) |
4027 | { |
4028 | rtx clob = gen_rtx_CLOBBER (VOIDmode, |
4029 | gen_rtx_REG (CCmode, |
4030 | FLAGS_REG)); |
4031 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, |
4032 | tmp, |
4033 | clob)); |
4034 | } |
4035 | /* FALLTHRU. */ |
4036 | |
4037 | case E_V4SFmode: |
4038 | case E_HImode: |
4039 | case E_V2SImode: |
4040 | emit_insn (tmp); |
4041 | break; |
4042 | |
4043 | default: |
4044 | gcc_unreachable (); |
4045 | } |
4046 | } |
4047 | return zeroed_hardregs; |
4048 | } |
4049 | |
4050 | /* Define how to find the value returned by a function. |
4051 | VALTYPE is the data type of the value (as a tree). |
4052 | If the precise function being called is known, FUNC is its FUNCTION_DECL; |
4053 | otherwise, FUNC is 0. */ |
4054 | |
4055 | static rtx |
4056 | function_value_32 (machine_mode orig_mode, machine_mode mode, |
4057 | const_tree fntype, const_tree fn) |
4058 | { |
4059 | unsigned int regno; |
4060 | |
4061 | /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
4062 | we normally prevent this case when mmx is not available. However |
4063 | some ABIs may require the result to be returned like DImode. */ |
4064 | if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
4065 | regno = FIRST_MMX_REG; |
4066 | |
4067 | /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
4068 | we prevent this case when sse is not available. However some ABIs |
4069 | may require the result to be returned like integer TImode. */ |
4070 | else if (mode == TImode |
4071 | || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
4072 | regno = FIRST_SSE_REG; |
4073 | |
4074 | /* 32-byte vector modes in %ymm0. */ |
4075 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) |
4076 | regno = FIRST_SSE_REG; |
4077 | |
4078 | /* 64-byte vector modes in %zmm0. */ |
4079 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) |
4080 | regno = FIRST_SSE_REG; |
4081 | |
4082 | /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ |
4083 | else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) |
4084 | regno = FIRST_FLOAT_REG; |
4085 | else |
4086 | /* Most things go in %eax. */ |
4087 | regno = AX_REG; |
4088 | |
4089 | /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */ |
4090 | if (mode == HFmode || mode == BFmode) |
4091 | { |
4092 | if (!TARGET_SSE2) |
4093 | { |
4094 | error ("SSE register return with SSE2 disabled" ); |
4095 | regno = AX_REG; |
4096 | } |
4097 | else |
4098 | regno = FIRST_SSE_REG; |
4099 | } |
4100 | |
4101 | if (mode == HCmode) |
4102 | { |
4103 | if (!TARGET_SSE2) |
4104 | error ("SSE register return with SSE2 disabled" ); |
4105 | |
4106 | rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1)); |
4107 | XVECEXP (ret, 0, 0) |
4108 | = gen_rtx_EXPR_LIST (VOIDmode, |
4109 | gen_rtx_REG (SImode, |
4110 | TARGET_SSE2 ? FIRST_SSE_REG : AX_REG), |
4111 | GEN_INT (0)); |
4112 | return ret; |
4113 | } |
4114 | |
4115 | /* Override FP return register with %xmm0 for local functions when |
4116 | SSE math is enabled or for functions with sseregparm attribute. */ |
4117 | if ((fn || fntype) && (mode == SFmode || mode == DFmode)) |
4118 | { |
4119 | int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false); |
4120 | if (sse_level == -1) |
4121 | { |
4122 | error ("calling %qD with SSE calling convention without " |
4123 | "SSE/SSE2 enabled" , fn); |
4124 | sorry ("this is a GCC bug that can be worked around by adding " |
4125 | "attribute used to function called" ); |
4126 | } |
4127 | else if ((sse_level >= 1 && mode == SFmode) |
4128 | || (sse_level == 2 && mode == DFmode)) |
4129 | regno = FIRST_SSE_REG; |
4130 | } |
4131 | |
4132 | /* OImode shouldn't be used directly. */ |
4133 | gcc_assert (mode != OImode); |
4134 | |
4135 | return gen_rtx_REG (orig_mode, regno); |
4136 | } |
4137 | |
4138 | static rtx |
4139 | function_value_64 (machine_mode orig_mode, machine_mode mode, |
4140 | const_tree valtype) |
4141 | { |
4142 | rtx ret; |
4143 | |
4144 | /* Handle libcalls, which don't provide a type node. */ |
4145 | if (valtype == NULL) |
4146 | { |
4147 | unsigned int regno; |
4148 | |
4149 | switch (mode) |
4150 | { |
4151 | case E_BFmode: |
4152 | case E_HFmode: |
4153 | case E_HCmode: |
4154 | case E_SFmode: |
4155 | case E_SCmode: |
4156 | case E_DFmode: |
4157 | case E_DCmode: |
4158 | case E_TFmode: |
4159 | case E_SDmode: |
4160 | case E_DDmode: |
4161 | case E_TDmode: |
4162 | regno = FIRST_SSE_REG; |
4163 | break; |
4164 | case E_XFmode: |
4165 | case E_XCmode: |
4166 | regno = FIRST_FLOAT_REG; |
4167 | break; |
4168 | case E_TCmode: |
4169 | return NULL; |
4170 | default: |
4171 | regno = AX_REG; |
4172 | } |
4173 | |
4174 | return gen_rtx_REG (mode, regno); |
4175 | } |
4176 | else if (POINTER_TYPE_P (valtype)) |
4177 | { |
4178 | /* Pointers are always returned in word_mode. */ |
4179 | mode = word_mode; |
4180 | } |
4181 | |
4182 | ret = construct_container (mode, orig_mode, type: valtype, in_return: 1, |
4183 | X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, |
4184 | intreg: x86_64_int_return_registers, sse_regno: 0); |
4185 | |
4186 | /* For zero sized structures, construct_container returns NULL, but we |
4187 | need to keep rest of compiler happy by returning meaningful value. */ |
4188 | if (!ret) |
4189 | ret = gen_rtx_REG (orig_mode, AX_REG); |
4190 | |
4191 | return ret; |
4192 | } |
4193 | |
4194 | static rtx |
4195 | function_value_ms_32 (machine_mode orig_mode, machine_mode mode, |
4196 | const_tree fntype, const_tree fn, const_tree valtype) |
4197 | { |
4198 | unsigned int regno; |
4199 | |
4200 | /* Floating point return values in %st(0) |
4201 | (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ |
4202 | if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 |
4203 | && (GET_MODE_SIZE (mode) > 8 |
4204 | || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) |
4205 | { |
4206 | regno = FIRST_FLOAT_REG; |
4207 | return gen_rtx_REG (orig_mode, regno); |
4208 | } |
4209 | else |
4210 | return function_value_32(orig_mode, mode, fntype,fn); |
4211 | } |
4212 | |
4213 | static rtx |
4214 | function_value_ms_64 (machine_mode orig_mode, machine_mode mode, |
4215 | const_tree valtype) |
4216 | { |
4217 | unsigned int regno = AX_REG; |
4218 | |
4219 | if (TARGET_SSE) |
4220 | { |
4221 | switch (GET_MODE_SIZE (mode)) |
4222 | { |
4223 | case 16: |
4224 | if (valtype != NULL_TREE |
4225 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4226 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4227 | && !INTEGRAL_TYPE_P (valtype) |
4228 | && !VECTOR_FLOAT_TYPE_P (valtype)) |
4229 | break; |
4230 | if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4231 | && !COMPLEX_MODE_P (mode)) |
4232 | regno = FIRST_SSE_REG; |
4233 | break; |
4234 | case 8: |
4235 | case 4: |
4236 | if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) |
4237 | break; |
4238 | if (mode == SFmode || mode == DFmode) |
4239 | regno = FIRST_SSE_REG; |
4240 | break; |
4241 | default: |
4242 | break; |
4243 | } |
4244 | } |
4245 | return gen_rtx_REG (orig_mode, regno); |
4246 | } |
4247 | |
4248 | static rtx |
4249 | ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, |
4250 | machine_mode orig_mode, machine_mode mode) |
4251 | { |
4252 | const_tree fn, fntype; |
4253 | |
4254 | fn = NULL_TREE; |
4255 | if (fntype_or_decl && DECL_P (fntype_or_decl)) |
4256 | fn = fntype_or_decl; |
4257 | fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
4258 | |
4259 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4260 | { |
4261 | if (TARGET_64BIT) |
4262 | return function_value_ms_64 (orig_mode, mode, valtype); |
4263 | else |
4264 | return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); |
4265 | } |
4266 | else if (TARGET_64BIT) |
4267 | return function_value_64 (orig_mode, mode, valtype); |
4268 | else |
4269 | return function_value_32 (orig_mode, mode, fntype, fn); |
4270 | } |
4271 | |
4272 | static rtx |
4273 | ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) |
4274 | { |
4275 | machine_mode mode, orig_mode; |
4276 | |
4277 | orig_mode = TYPE_MODE (valtype); |
4278 | mode = type_natural_mode (type: valtype, NULL, in_return: true); |
4279 | return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); |
4280 | } |
4281 | |
4282 | /* Pointer function arguments and return values are promoted to |
4283 | word_mode for normal functions. */ |
4284 | |
4285 | static machine_mode |
4286 | ix86_promote_function_mode (const_tree type, machine_mode mode, |
4287 | int *punsignedp, const_tree fntype, |
4288 | int for_return) |
4289 | { |
4290 | if (cfun->machine->func_type == TYPE_NORMAL |
4291 | && type != NULL_TREE |
4292 | && POINTER_TYPE_P (type)) |
4293 | { |
4294 | *punsignedp = POINTERS_EXTEND_UNSIGNED; |
4295 | return word_mode; |
4296 | } |
4297 | return default_promote_function_mode (type, mode, punsignedp, fntype, |
4298 | for_return); |
4299 | } |
4300 | |
4301 | /* Return true if a structure, union or array with MODE containing FIELD |
4302 | should be accessed using BLKmode. */ |
4303 | |
4304 | static bool |
4305 | ix86_member_type_forces_blk (const_tree field, machine_mode mode) |
4306 | { |
4307 | /* Union with XFmode must be in BLKmode. */ |
4308 | return (mode == XFmode |
4309 | && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE |
4310 | || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); |
4311 | } |
4312 | |
4313 | rtx |
4314 | ix86_libcall_value (machine_mode mode) |
4315 | { |
4316 | return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode); |
4317 | } |
4318 | |
4319 | /* Return true iff type is returned in memory. */ |
4320 | |
4321 | static bool |
4322 | ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
4323 | { |
4324 | const machine_mode mode = type_natural_mode (type, NULL, in_return: true); |
4325 | HOST_WIDE_INT size; |
4326 | |
4327 | if (TARGET_64BIT) |
4328 | { |
4329 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4330 | { |
4331 | size = int_size_in_bytes (type); |
4332 | |
4333 | /* __m128 is returned in xmm0. */ |
4334 | if ((!type || VECTOR_INTEGER_TYPE_P (type) |
4335 | || INTEGRAL_TYPE_P (type) |
4336 | || VECTOR_FLOAT_TYPE_P (type)) |
4337 | && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4338 | && !COMPLEX_MODE_P (mode) |
4339 | && (GET_MODE_SIZE (mode) == 16 || size == 16)) |
4340 | return false; |
4341 | |
4342 | /* Otherwise, the size must be exactly in [1248]. */ |
4343 | return size != 1 && size != 2 && size != 4 && size != 8; |
4344 | } |
4345 | else |
4346 | { |
4347 | int needed_intregs, needed_sseregs; |
4348 | |
4349 | return examine_argument (mode, type, in_return: 1, |
4350 | int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4351 | } |
4352 | } |
4353 | else |
4354 | { |
4355 | size = int_size_in_bytes (type); |
4356 | |
4357 | /* Intel MCU psABI returns scalars and aggregates no larger than 8 |
4358 | bytes in registers. */ |
4359 | if (TARGET_IAMCU) |
4360 | return VECTOR_MODE_P (mode) || size < 0 || size > 8; |
4361 | |
4362 | if (mode == BLKmode) |
4363 | return true; |
4364 | |
4365 | if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
4366 | return false; |
4367 | |
4368 | if (VECTOR_MODE_P (mode) || mode == TImode) |
4369 | { |
4370 | /* User-created vectors small enough to fit in EAX. */ |
4371 | if (size < 8) |
4372 | return false; |
4373 | |
4374 | /* Unless ABI prescibes otherwise, |
4375 | MMX/3dNow values are returned in MM0 if available. */ |
4376 | |
4377 | if (size == 8) |
4378 | return TARGET_VECT8_RETURNS || !TARGET_MMX; |
4379 | |
4380 | /* SSE values are returned in XMM0 if available. */ |
4381 | if (size == 16) |
4382 | return !TARGET_SSE; |
4383 | |
4384 | /* AVX values are returned in YMM0 if available. */ |
4385 | if (size == 32) |
4386 | return !TARGET_AVX; |
4387 | |
4388 | /* AVX512F values are returned in ZMM0 if available. */ |
4389 | if (size == 64) |
4390 | return !TARGET_AVX512F || !TARGET_EVEX512; |
4391 | } |
4392 | |
4393 | if (mode == XFmode) |
4394 | return false; |
4395 | |
4396 | if (size > 12) |
4397 | return true; |
4398 | |
4399 | /* OImode shouldn't be used directly. */ |
4400 | gcc_assert (mode != OImode); |
4401 | |
4402 | return false; |
4403 | } |
4404 | } |
4405 | |
4406 | /* Implement TARGET_PUSH_ARGUMENT. */ |
4407 | |
4408 | static bool |
4409 | ix86_push_argument (unsigned int npush) |
4410 | { |
4411 | /* If SSE2 is available, use vector move to put large argument onto |
4412 | stack. NB: In 32-bit mode, use 8-byte vector move. */ |
4413 | return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8)) |
4414 | && TARGET_PUSH_ARGS |
4415 | && !ACCUMULATE_OUTGOING_ARGS); |
4416 | } |
4417 | |
4418 | |
4419 | /* Create the va_list data type. */ |
4420 | |
4421 | static tree |
4422 | ix86_build_builtin_va_list_64 (void) |
4423 | { |
4424 | tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
4425 | |
4426 | record = lang_hooks.types.make_type (RECORD_TYPE); |
4427 | type_decl = build_decl (BUILTINS_LOCATION, |
4428 | TYPE_DECL, get_identifier ("__va_list_tag" ), record); |
4429 | |
4430 | f_gpr = build_decl (BUILTINS_LOCATION, |
4431 | FIELD_DECL, get_identifier ("gp_offset" ), |
4432 | unsigned_type_node); |
4433 | f_fpr = build_decl (BUILTINS_LOCATION, |
4434 | FIELD_DECL, get_identifier ("fp_offset" ), |
4435 | unsigned_type_node); |
4436 | f_ovf = build_decl (BUILTINS_LOCATION, |
4437 | FIELD_DECL, get_identifier ("overflow_arg_area" ), |
4438 | ptr_type_node); |
4439 | f_sav = build_decl (BUILTINS_LOCATION, |
4440 | FIELD_DECL, get_identifier ("reg_save_area" ), |
4441 | ptr_type_node); |
4442 | |
4443 | va_list_gpr_counter_field = f_gpr; |
4444 | va_list_fpr_counter_field = f_fpr; |
4445 | |
4446 | DECL_FIELD_CONTEXT (f_gpr) = record; |
4447 | DECL_FIELD_CONTEXT (f_fpr) = record; |
4448 | DECL_FIELD_CONTEXT (f_ovf) = record; |
4449 | DECL_FIELD_CONTEXT (f_sav) = record; |
4450 | |
4451 | TYPE_STUB_DECL (record) = type_decl; |
4452 | TYPE_NAME (record) = type_decl; |
4453 | TYPE_FIELDS (record) = f_gpr; |
4454 | DECL_CHAIN (f_gpr) = f_fpr; |
4455 | DECL_CHAIN (f_fpr) = f_ovf; |
4456 | DECL_CHAIN (f_ovf) = f_sav; |
4457 | |
4458 | layout_type (record); |
4459 | |
4460 | TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list" ), |
4461 | NULL_TREE, TYPE_ATTRIBUTES (record)); |
4462 | |
4463 | /* The correct type is an array type of one element. */ |
4464 | return build_array_type (record, build_index_type (size_zero_node)); |
4465 | } |
4466 | |
4467 | /* Setup the builtin va_list data type and for 64-bit the additional |
4468 | calling convention specific va_list data types. */ |
4469 | |
4470 | static tree |
4471 | ix86_build_builtin_va_list (void) |
4472 | { |
4473 | if (TARGET_64BIT) |
4474 | { |
4475 | /* Initialize ABI specific va_list builtin types. |
4476 | |
4477 | In lto1, we can encounter two va_list types: |
4478 | - one as a result of the type-merge across TUs, and |
4479 | - the one constructed here. |
4480 | These two types will not have the same TYPE_MAIN_VARIANT, and therefore |
4481 | a type identity check in canonical_va_list_type based on |
4482 | TYPE_MAIN_VARIANT (which we used to have) will not work. |
4483 | Instead, we tag each va_list_type_node with its unique attribute, and |
4484 | look for the attribute in the type identity check in |
4485 | canonical_va_list_type. |
4486 | |
4487 | Tagging sysv_va_list_type_node directly with the attribute is |
4488 | problematic since it's a array of one record, which will degrade into a |
4489 | pointer to record when used as parameter (see build_va_arg comments for |
4490 | an example), dropping the attribute in the process. So we tag the |
4491 | record instead. */ |
4492 | |
4493 | /* For SYSV_ABI we use an array of one record. */ |
4494 | sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); |
4495 | |
4496 | /* For MS_ABI we use plain pointer to argument area. */ |
4497 | tree char_ptr_type = build_pointer_type (char_type_node); |
4498 | tree attr = tree_cons (get_identifier ("ms_abi va_list" ), NULL_TREE, |
4499 | TYPE_ATTRIBUTES (char_ptr_type)); |
4500 | ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); |
4501 | |
4502 | return ((ix86_abi == MS_ABI) |
4503 | ? ms_va_list_type_node |
4504 | : sysv_va_list_type_node); |
4505 | } |
4506 | else |
4507 | { |
4508 | /* For i386 we use plain pointer to argument area. */ |
4509 | return build_pointer_type (char_type_node); |
4510 | } |
4511 | } |
4512 | |
4513 | /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
4514 | |
4515 | static void |
4516 | setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
4517 | { |
4518 | rtx save_area, mem; |
4519 | alias_set_type set; |
4520 | int i, max; |
4521 | |
4522 | /* GPR size of varargs save area. */ |
4523 | if (cfun->va_list_gpr_size) |
4524 | ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; |
4525 | else |
4526 | ix86_varargs_gpr_size = 0; |
4527 | |
4528 | /* FPR size of varargs save area. We don't need it if we don't pass |
4529 | anything in SSE registers. */ |
4530 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4531 | ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; |
4532 | else |
4533 | ix86_varargs_fpr_size = 0; |
4534 | |
4535 | if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) |
4536 | return; |
4537 | |
4538 | save_area = frame_pointer_rtx; |
4539 | set = get_varargs_alias_set (); |
4540 | |
4541 | max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
4542 | if (max > X86_64_REGPARM_MAX) |
4543 | max = X86_64_REGPARM_MAX; |
4544 | |
4545 | for (i = cum->regno; i < max; i++) |
4546 | { |
4547 | mem = gen_rtx_MEM (word_mode, |
4548 | plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); |
4549 | MEM_NOTRAP_P (mem) = 1; |
4550 | set_mem_alias_set (mem, set); |
4551 | emit_move_insn (mem, |
4552 | gen_rtx_REG (word_mode, |
4553 | x86_64_int_parameter_registers[i])); |
4554 | } |
4555 | |
4556 | if (ix86_varargs_fpr_size) |
4557 | { |
4558 | machine_mode smode; |
4559 | rtx_code_label *label; |
4560 | rtx test; |
4561 | |
4562 | /* Now emit code to save SSE registers. The AX parameter contains number |
4563 | of SSE parameter registers used to call this function, though all we |
4564 | actually check here is the zero/non-zero status. */ |
4565 | |
4566 | label = gen_label_rtx (); |
4567 | test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); |
4568 | emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), |
4569 | label)); |
4570 | |
4571 | /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if |
4572 | we used movdqa (i.e. TImode) instead? Perhaps even better would |
4573 | be if we could determine the real mode of the data, via a hook |
4574 | into pass_stdarg. Ignore all that for now. */ |
4575 | smode = V4SFmode; |
4576 | if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) |
4577 | crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); |
4578 | |
4579 | max = cum->sse_regno + cfun->va_list_fpr_size / 16; |
4580 | if (max > X86_64_SSE_REGPARM_MAX) |
4581 | max = X86_64_SSE_REGPARM_MAX; |
4582 | |
4583 | for (i = cum->sse_regno; i < max; ++i) |
4584 | { |
4585 | mem = plus_constant (Pmode, save_area, |
4586 | i * 16 + ix86_varargs_gpr_size); |
4587 | mem = gen_rtx_MEM (smode, mem); |
4588 | MEM_NOTRAP_P (mem) = 1; |
4589 | set_mem_alias_set (mem, set); |
4590 | set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); |
4591 | |
4592 | emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); |
4593 | } |
4594 | |
4595 | emit_label (label); |
4596 | } |
4597 | } |
4598 | |
4599 | static void |
4600 | setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
4601 | { |
4602 | alias_set_type set = get_varargs_alias_set (); |
4603 | int i; |
4604 | |
4605 | /* Reset to zero, as there might be a sysv vaarg used |
4606 | before. */ |
4607 | ix86_varargs_gpr_size = 0; |
4608 | ix86_varargs_fpr_size = 0; |
4609 | |
4610 | for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
4611 | { |
4612 | rtx reg, mem; |
4613 | |
4614 | mem = gen_rtx_MEM (Pmode, |
4615 | plus_constant (Pmode, virtual_incoming_args_rtx, |
4616 | i * UNITS_PER_WORD)); |
4617 | MEM_NOTRAP_P (mem) = 1; |
4618 | set_mem_alias_set (mem, set); |
4619 | |
4620 | reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); |
4621 | emit_move_insn (mem, reg); |
4622 | } |
4623 | } |
4624 | |
4625 | static void |
4626 | ix86_setup_incoming_varargs (cumulative_args_t cum_v, |
4627 | const function_arg_info &arg, |
4628 | int *, int no_rtl) |
4629 | { |
4630 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
4631 | CUMULATIVE_ARGS next_cum; |
4632 | tree fntype; |
4633 | |
4634 | /* This argument doesn't appear to be used anymore. Which is good, |
4635 | because the old code here didn't suppress rtl generation. */ |
4636 | gcc_assert (!no_rtl); |
4637 | |
4638 | if (!TARGET_64BIT) |
4639 | return; |
4640 | |
4641 | fntype = TREE_TYPE (current_function_decl); |
4642 | |
4643 | /* For varargs, we do not want to skip the dummy va_dcl argument. |
4644 | For stdargs, we do want to skip the last named argument. */ |
4645 | next_cum = *cum; |
4646 | if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) |
4647 | || arg.type != NULL_TREE) |
4648 | && stdarg_p (fntype)) |
4649 | ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg); |
4650 | |
4651 | if (cum->call_abi == MS_ABI) |
4652 | setup_incoming_varargs_ms_64 (&next_cum); |
4653 | else |
4654 | setup_incoming_varargs_64 (&next_cum); |
4655 | } |
4656 | |
4657 | /* Checks if TYPE is of kind va_list char *. */ |
4658 | |
4659 | static bool |
4660 | is_va_list_char_pointer (tree type) |
4661 | { |
4662 | tree canonic; |
4663 | |
4664 | /* For 32-bit it is always true. */ |
4665 | if (!TARGET_64BIT) |
4666 | return true; |
4667 | canonic = ix86_canonical_va_list_type (type); |
4668 | return (canonic == ms_va_list_type_node |
4669 | || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
4670 | } |
4671 | |
4672 | /* Implement va_start. */ |
4673 | |
4674 | static void |
4675 | ix86_va_start (tree valist, rtx nextarg) |
4676 | { |
4677 | HOST_WIDE_INT words, n_gpr, n_fpr; |
4678 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4679 | tree gpr, fpr, ovf, sav, t; |
4680 | tree type; |
4681 | rtx ovf_rtx; |
4682 | |
4683 | if (flag_split_stack |
4684 | && cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4685 | { |
4686 | unsigned int scratch_regno; |
4687 | |
4688 | /* When we are splitting the stack, we can't refer to the stack |
4689 | arguments using internal_arg_pointer, because they may be on |
4690 | the old stack. The split stack prologue will arrange to |
4691 | leave a pointer to the old stack arguments in a scratch |
4692 | register, which we here copy to a pseudo-register. The split |
4693 | stack prologue can't set the pseudo-register directly because |
4694 | it (the prologue) runs before any registers have been saved. */ |
4695 | |
4696 | scratch_regno = split_stack_prologue_scratch_regno (); |
4697 | if (scratch_regno != INVALID_REGNUM) |
4698 | { |
4699 | rtx reg; |
4700 | rtx_insn *seq; |
4701 | |
4702 | reg = gen_reg_rtx (Pmode); |
4703 | cfun->machine->split_stack_varargs_pointer = reg; |
4704 | |
4705 | start_sequence (); |
4706 | emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); |
4707 | seq = get_insns (); |
4708 | end_sequence (); |
4709 | |
4710 | push_topmost_sequence (); |
4711 | emit_insn_after (seq, entry_of_function ()); |
4712 | pop_topmost_sequence (); |
4713 | } |
4714 | } |
4715 | |
4716 | /* Only 64bit target needs something special. */ |
4717 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4718 | { |
4719 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4720 | std_expand_builtin_va_start (valist, nextarg); |
4721 | else |
4722 | { |
4723 | rtx va_r, next; |
4724 | |
4725 | va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE); |
4726 | next = expand_binop (ptr_mode, add_optab, |
4727 | cfun->machine->split_stack_varargs_pointer, |
4728 | crtl->args.arg_offset_rtx, |
4729 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4730 | convert_move (va_r, next, 0); |
4731 | } |
4732 | return; |
4733 | } |
4734 | |
4735 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4736 | f_fpr = DECL_CHAIN (f_gpr); |
4737 | f_ovf = DECL_CHAIN (f_fpr); |
4738 | f_sav = DECL_CHAIN (f_ovf); |
4739 | |
4740 | valist = build_simple_mem_ref (valist); |
4741 | TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); |
4742 | /* The following should be folded into the MEM_REF offset. */ |
4743 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), |
4744 | f_gpr, NULL_TREE); |
4745 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), |
4746 | f_fpr, NULL_TREE); |
4747 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), |
4748 | f_ovf, NULL_TREE); |
4749 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), |
4750 | f_sav, NULL_TREE); |
4751 | |
4752 | /* Count number of gp and fp argument registers used. */ |
4753 | words = crtl->args.info.words; |
4754 | n_gpr = crtl->args.info.regno; |
4755 | n_fpr = crtl->args.info.sse_regno; |
4756 | |
4757 | if (cfun->va_list_gpr_size) |
4758 | { |
4759 | type = TREE_TYPE (gpr); |
4760 | t = build2 (MODIFY_EXPR, type, |
4761 | gpr, build_int_cst (type, n_gpr * 8)); |
4762 | TREE_SIDE_EFFECTS (t) = 1; |
4763 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4764 | } |
4765 | |
4766 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4767 | { |
4768 | type = TREE_TYPE (fpr); |
4769 | t = build2 (MODIFY_EXPR, type, fpr, |
4770 | build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); |
4771 | TREE_SIDE_EFFECTS (t) = 1; |
4772 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4773 | } |
4774 | |
4775 | /* Find the overflow area. */ |
4776 | type = TREE_TYPE (ovf); |
4777 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4778 | ovf_rtx = crtl->args.internal_arg_pointer; |
4779 | else |
4780 | ovf_rtx = cfun->machine->split_stack_varargs_pointer; |
4781 | t = make_tree (type, ovf_rtx); |
4782 | if (words != 0) |
4783 | t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); |
4784 | |
4785 | t = build2 (MODIFY_EXPR, type, ovf, t); |
4786 | TREE_SIDE_EFFECTS (t) = 1; |
4787 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4788 | |
4789 | if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) |
4790 | { |
4791 | /* Find the register save area. |
4792 | Prologue of the function save it right above stack frame. */ |
4793 | type = TREE_TYPE (sav); |
4794 | t = make_tree (type, frame_pointer_rtx); |
4795 | if (!ix86_varargs_gpr_size) |
4796 | t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); |
4797 | |
4798 | t = build2 (MODIFY_EXPR, type, sav, t); |
4799 | TREE_SIDE_EFFECTS (t) = 1; |
4800 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4801 | } |
4802 | } |
4803 | |
4804 | /* Implement va_arg. */ |
4805 | |
4806 | static tree |
4807 | ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
4808 | gimple_seq *post_p) |
4809 | { |
4810 | static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
4811 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4812 | tree gpr, fpr, ovf, sav, t; |
4813 | int size, rsize; |
4814 | tree lab_false, lab_over = NULL_TREE; |
4815 | tree addr, t2; |
4816 | rtx container; |
4817 | int indirect_p = 0; |
4818 | tree ptrtype; |
4819 | machine_mode nat_mode; |
4820 | unsigned int arg_boundary; |
4821 | unsigned int type_align; |
4822 | |
4823 | /* Only 64bit target needs something special. */ |
4824 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4825 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
4826 | |
4827 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4828 | f_fpr = DECL_CHAIN (f_gpr); |
4829 | f_ovf = DECL_CHAIN (f_fpr); |
4830 | f_sav = DECL_CHAIN (f_ovf); |
4831 | |
4832 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), |
4833 | valist, f_gpr, NULL_TREE); |
4834 | |
4835 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
4836 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
4837 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
4838 | |
4839 | indirect_p = pass_va_arg_by_reference (type); |
4840 | if (indirect_p) |
4841 | type = build_pointer_type (type); |
4842 | size = arg_int_size_in_bytes (type); |
4843 | rsize = CEIL (size, UNITS_PER_WORD); |
4844 | |
4845 | nat_mode = type_natural_mode (type, NULL, in_return: false); |
4846 | switch (nat_mode) |
4847 | { |
4848 | case E_V16HFmode: |
4849 | case E_V16BFmode: |
4850 | case E_V8SFmode: |
4851 | case E_V8SImode: |
4852 | case E_V32QImode: |
4853 | case E_V16HImode: |
4854 | case E_V4DFmode: |
4855 | case E_V4DImode: |
4856 | case E_V32HFmode: |
4857 | case E_V32BFmode: |
4858 | case E_V16SFmode: |
4859 | case E_V16SImode: |
4860 | case E_V64QImode: |
4861 | case E_V32HImode: |
4862 | case E_V8DFmode: |
4863 | case E_V8DImode: |
4864 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
4865 | if (!TARGET_64BIT_MS_ABI) |
4866 | { |
4867 | container = NULL; |
4868 | break; |
4869 | } |
4870 | /* FALLTHRU */ |
4871 | |
4872 | default: |
4873 | container = construct_container (mode: nat_mode, TYPE_MODE (type), |
4874 | type, in_return: 0, X86_64_REGPARM_MAX, |
4875 | X86_64_SSE_REGPARM_MAX, intreg, |
4876 | sse_regno: 0); |
4877 | break; |
4878 | } |
4879 | |
4880 | /* Pull the value out of the saved registers. */ |
4881 | |
4882 | addr = create_tmp_var (ptr_type_node, "addr" ); |
4883 | type_align = TYPE_ALIGN (type); |
4884 | |
4885 | if (container) |
4886 | { |
4887 | int needed_intregs, needed_sseregs; |
4888 | bool need_temp; |
4889 | tree int_addr, sse_addr; |
4890 | |
4891 | lab_false = create_artificial_label (UNKNOWN_LOCATION); |
4892 | lab_over = create_artificial_label (UNKNOWN_LOCATION); |
4893 | |
4894 | examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4895 | |
4896 | need_temp = (!REG_P (container) |
4897 | && ((needed_intregs && TYPE_ALIGN (type) > 64) |
4898 | || TYPE_ALIGN (type) > 128)); |
4899 | |
4900 | /* In case we are passing structure, verify that it is consecutive block |
4901 | on the register save area. If not we need to do moves. */ |
4902 | if (!need_temp && !REG_P (container)) |
4903 | { |
4904 | /* Verify that all registers are strictly consecutive */ |
4905 | if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
4906 | { |
4907 | int i; |
4908 | |
4909 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4910 | { |
4911 | rtx slot = XVECEXP (container, 0, i); |
4912 | if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
4913 | || INTVAL (XEXP (slot, 1)) != i * 16) |
4914 | need_temp = true; |
4915 | } |
4916 | } |
4917 | else |
4918 | { |
4919 | int i; |
4920 | |
4921 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4922 | { |
4923 | rtx slot = XVECEXP (container, 0, i); |
4924 | if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
4925 | || INTVAL (XEXP (slot, 1)) != i * 8) |
4926 | need_temp = true; |
4927 | } |
4928 | } |
4929 | } |
4930 | if (!need_temp) |
4931 | { |
4932 | int_addr = addr; |
4933 | sse_addr = addr; |
4934 | } |
4935 | else |
4936 | { |
4937 | int_addr = create_tmp_var (ptr_type_node, "int_addr" ); |
4938 | sse_addr = create_tmp_var (ptr_type_node, "sse_addr" ); |
4939 | } |
4940 | |
4941 | /* First ensure that we fit completely in registers. */ |
4942 | if (needed_intregs) |
4943 | { |
4944 | t = build_int_cst (TREE_TYPE (gpr), |
4945 | (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); |
4946 | t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
4947 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
4948 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
4949 | gimplify_and_add (t, pre_p); |
4950 | } |
4951 | if (needed_sseregs) |
4952 | { |
4953 | t = build_int_cst (TREE_TYPE (fpr), |
4954 | (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
4955 | + X86_64_REGPARM_MAX * 8); |
4956 | t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
4957 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
4958 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
4959 | gimplify_and_add (t, pre_p); |
4960 | } |
4961 | |
4962 | /* Compute index to start of area used for integer regs. */ |
4963 | if (needed_intregs) |
4964 | { |
4965 | /* int_addr = gpr + sav; */ |
4966 | t = fold_build_pointer_plus (sav, gpr); |
4967 | gimplify_assign (int_addr, t, pre_p); |
4968 | } |
4969 | if (needed_sseregs) |
4970 | { |
4971 | /* sse_addr = fpr + sav; */ |
4972 | t = fold_build_pointer_plus (sav, fpr); |
4973 | gimplify_assign (sse_addr, t, pre_p); |
4974 | } |
4975 | if (need_temp) |
4976 | { |
4977 | int i, prev_size = 0; |
4978 | tree temp = create_tmp_var (type, "va_arg_tmp" ); |
4979 | TREE_ADDRESSABLE (temp) = 1; |
4980 | |
4981 | /* addr = &temp; */ |
4982 | t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
4983 | gimplify_assign (addr, t, pre_p); |
4984 | |
4985 | for (i = 0; i < XVECLEN (container, 0); i++) |
4986 | { |
4987 | rtx slot = XVECEXP (container, 0, i); |
4988 | rtx reg = XEXP (slot, 0); |
4989 | machine_mode mode = GET_MODE (reg); |
4990 | tree piece_type; |
4991 | tree addr_type; |
4992 | tree daddr_type; |
4993 | tree src_addr, src; |
4994 | int src_offset; |
4995 | tree dest_addr, dest; |
4996 | int cur_size = GET_MODE_SIZE (mode); |
4997 | |
4998 | gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); |
4999 | prev_size = INTVAL (XEXP (slot, 1)); |
5000 | if (prev_size + cur_size > size) |
5001 | { |
5002 | cur_size = size - prev_size; |
5003 | unsigned int nbits = cur_size * BITS_PER_UNIT; |
5004 | if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode)) |
5005 | mode = QImode; |
5006 | } |
5007 | piece_type = lang_hooks.types.type_for_mode (mode, 1); |
5008 | if (mode == GET_MODE (reg)) |
5009 | addr_type = build_pointer_type (piece_type); |
5010 | else |
5011 | addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
5012 | true); |
5013 | daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
5014 | true); |
5015 | |
5016 | if (SSE_REGNO_P (REGNO (reg))) |
5017 | { |
5018 | src_addr = sse_addr; |
5019 | src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
5020 | } |
5021 | else |
5022 | { |
5023 | src_addr = int_addr; |
5024 | src_offset = REGNO (reg) * 8; |
5025 | } |
5026 | src_addr = fold_convert (addr_type, src_addr); |
5027 | src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); |
5028 | |
5029 | dest_addr = fold_convert (daddr_type, addr); |
5030 | dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); |
5031 | if (cur_size == GET_MODE_SIZE (mode)) |
5032 | { |
5033 | src = build_va_arg_indirect_ref (src_addr); |
5034 | dest = build_va_arg_indirect_ref (dest_addr); |
5035 | |
5036 | gimplify_assign (dest, src, pre_p); |
5037 | } |
5038 | else |
5039 | { |
5040 | tree copy |
5041 | = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY), |
5042 | 3, dest_addr, src_addr, |
5043 | size_int (cur_size)); |
5044 | gimplify_and_add (copy, pre_p); |
5045 | } |
5046 | prev_size += cur_size; |
5047 | } |
5048 | } |
5049 | |
5050 | if (needed_intregs) |
5051 | { |
5052 | t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
5053 | build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
5054 | gimplify_assign (gpr, t, pre_p); |
5055 | /* The GPR save area guarantees only 8-byte alignment. */ |
5056 | if (!need_temp) |
5057 | type_align = MIN (type_align, 64); |
5058 | } |
5059 | |
5060 | if (needed_sseregs) |
5061 | { |
5062 | t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
5063 | build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
5064 | gimplify_assign (unshare_expr (fpr), t, pre_p); |
5065 | } |
5066 | |
5067 | gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over)); |
5068 | |
5069 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false)); |
5070 | } |
5071 | |
5072 | /* ... otherwise out of the overflow area. */ |
5073 | |
5074 | /* When we align parameter on stack for caller, if the parameter |
5075 | alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be |
5076 | aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee |
5077 | here with caller. */ |
5078 | arg_boundary = ix86_function_arg_boundary (VOIDmode, type); |
5079 | if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) |
5080 | arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; |
5081 | |
5082 | /* Care for on-stack alignment if needed. */ |
5083 | if (arg_boundary <= 64 || size == 0) |
5084 | t = ovf; |
5085 | else |
5086 | { |
5087 | HOST_WIDE_INT align = arg_boundary / 8; |
5088 | t = fold_build_pointer_plus_hwi (ovf, align - 1); |
5089 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
5090 | build_int_cst (TREE_TYPE (t), -align)); |
5091 | } |
5092 | |
5093 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
5094 | gimplify_assign (addr, t, pre_p); |
5095 | |
5096 | t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); |
5097 | gimplify_assign (unshare_expr (ovf), t, pre_p); |
5098 | |
5099 | if (container) |
5100 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over)); |
5101 | |
5102 | type = build_aligned_type (type, type_align); |
5103 | ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
5104 | addr = fold_convert (ptrtype, addr); |
5105 | |
5106 | if (indirect_p) |
5107 | addr = build_va_arg_indirect_ref (addr); |
5108 | return build_va_arg_indirect_ref (addr); |
5109 | } |
5110 | |
5111 | /* Return true if OPNUM's MEM should be matched |
5112 | in movabs* patterns. */ |
5113 | |
5114 | bool |
5115 | ix86_check_movabs (rtx insn, int opnum) |
5116 | { |
5117 | rtx set, mem; |
5118 | |
5119 | set = PATTERN (insn); |
5120 | if (GET_CODE (set) == PARALLEL) |
5121 | set = XVECEXP (set, 0, 0); |
5122 | gcc_assert (GET_CODE (set) == SET); |
5123 | mem = XEXP (set, opnum); |
5124 | while (SUBREG_P (mem)) |
5125 | mem = SUBREG_REG (mem); |
5126 | gcc_assert (MEM_P (mem)); |
5127 | return volatile_ok || !MEM_VOLATILE_P (mem); |
5128 | } |
5129 | |
5130 | /* Return false if INSN contains a MEM with a non-default address space. */ |
5131 | bool |
5132 | ix86_check_no_addr_space (rtx insn) |
5133 | { |
5134 | subrtx_var_iterator::array_type array; |
5135 | FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) |
5136 | { |
5137 | rtx x = *iter; |
5138 | if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) |
5139 | return false; |
5140 | } |
5141 | return true; |
5142 | } |
5143 | |
5144 | /* Initialize the table of extra 80387 mathematical constants. */ |
5145 | |
5146 | static void |
5147 | init_ext_80387_constants (void) |
5148 | { |
5149 | static const char * cst[5] = |
5150 | { |
5151 | "0.3010299956639811952256464283594894482" , /* 0: fldlg2 */ |
5152 | "0.6931471805599453094286904741849753009" , /* 1: fldln2 */ |
5153 | "1.4426950408889634073876517827983434472" , /* 2: fldl2e */ |
5154 | "3.3219280948873623478083405569094566090" , /* 3: fldl2t */ |
5155 | "3.1415926535897932385128089594061862044" , /* 4: fldpi */ |
5156 | }; |
5157 | int i; |
5158 | |
5159 | for (i = 0; i < 5; i++) |
5160 | { |
5161 | real_from_string (&ext_80387_constants_table[i], cst[i]); |
5162 | /* Ensure each constant is rounded to XFmode precision. */ |
5163 | real_convert (&ext_80387_constants_table[i], |
5164 | XFmode, &ext_80387_constants_table[i]); |
5165 | } |
5166 | |
5167 | ext_80387_constants_init = 1; |
5168 | } |
5169 | |
5170 | /* Return non-zero if the constant is something that |
5171 | can be loaded with a special instruction. */ |
5172 | |
5173 | int |
5174 | standard_80387_constant_p (rtx x) |
5175 | { |
5176 | machine_mode mode = GET_MODE (x); |
5177 | |
5178 | const REAL_VALUE_TYPE *r; |
5179 | |
5180 | if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) |
5181 | return -1; |
5182 | |
5183 | if (x == CONST0_RTX (mode)) |
5184 | return 1; |
5185 | if (x == CONST1_RTX (mode)) |
5186 | return 2; |
5187 | |
5188 | r = CONST_DOUBLE_REAL_VALUE (x); |
5189 | |
5190 | /* For XFmode constants, try to find a special 80387 instruction when |
5191 | optimizing for size or on those CPUs that benefit from them. */ |
5192 | if (mode == XFmode |
5193 | && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS) |
5194 | && !flag_rounding_math) |
5195 | { |
5196 | int i; |
5197 | |
5198 | if (! ext_80387_constants_init) |
5199 | init_ext_80387_constants (); |
5200 | |
5201 | for (i = 0; i < 5; i++) |
5202 | if (real_identical (r, &ext_80387_constants_table[i])) |
5203 | return i + 3; |
5204 | } |
5205 | |
5206 | /* Load of the constant -0.0 or -1.0 will be split as |
5207 | fldz;fchs or fld1;fchs sequence. */ |
5208 | if (real_isnegzero (r)) |
5209 | return 8; |
5210 | if (real_identical (r, &dconstm1)) |
5211 | return 9; |
5212 | |
5213 | return 0; |
5214 | } |
5215 | |
5216 | /* Return the opcode of the special instruction to be used to load |
5217 | the constant X. */ |
5218 | |
5219 | const char * |
5220 | standard_80387_constant_opcode (rtx x) |
5221 | { |
5222 | switch (standard_80387_constant_p (x)) |
5223 | { |
5224 | case 1: |
5225 | return "fldz" ; |
5226 | case 2: |
5227 | return "fld1" ; |
5228 | case 3: |
5229 | return "fldlg2" ; |
5230 | case 4: |
5231 | return "fldln2" ; |
5232 | case 5: |
5233 | return "fldl2e" ; |
5234 | case 6: |
5235 | return "fldl2t" ; |
5236 | case 7: |
5237 | return "fldpi" ; |
5238 | case 8: |
5239 | case 9: |
5240 | return "#" ; |
5241 | default: |
5242 | gcc_unreachable (); |
5243 | } |
5244 | } |
5245 | |
5246 | /* Return the CONST_DOUBLE representing the 80387 constant that is |
5247 | loaded by the specified special instruction. The argument IDX |
5248 | matches the return value from standard_80387_constant_p. */ |
5249 | |
5250 | rtx |
5251 | standard_80387_constant_rtx (int idx) |
5252 | { |
5253 | int i; |
5254 | |
5255 | if (! ext_80387_constants_init) |
5256 | init_ext_80387_constants (); |
5257 | |
5258 | switch (idx) |
5259 | { |
5260 | case 3: |
5261 | case 4: |
5262 | case 5: |
5263 | case 6: |
5264 | case 7: |
5265 | i = idx - 3; |
5266 | break; |
5267 | |
5268 | default: |
5269 | gcc_unreachable (); |
5270 | } |
5271 | |
5272 | return const_double_from_real_value (ext_80387_constants_table[i], |
5273 | XFmode); |
5274 | } |
5275 | |
5276 | /* Return 1 if X is all bits 0, 2 if X is all bits 1 |
5277 | and 3 if X is all bits 1 with zero extend |
5278 | in supported SSE/AVX vector mode. */ |
5279 | |
5280 | int |
5281 | standard_sse_constant_p (rtx x, machine_mode pred_mode) |
5282 | { |
5283 | machine_mode mode; |
5284 | |
5285 | if (!TARGET_SSE) |
5286 | return 0; |
5287 | |
5288 | mode = GET_MODE (x); |
5289 | |
5290 | if (x == const0_rtx || const0_operand (x, mode)) |
5291 | return 1; |
5292 | |
5293 | if (x == constm1_rtx |
5294 | || vector_all_ones_operand (x, mode) |
5295 | || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5296 | || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT) |
5297 | && float_vector_all_ones_operand (x, mode))) |
5298 | { |
5299 | /* VOIDmode integer constant, get mode from the predicate. */ |
5300 | if (mode == VOIDmode) |
5301 | mode = pred_mode; |
5302 | |
5303 | switch (GET_MODE_SIZE (mode)) |
5304 | { |
5305 | case 64: |
5306 | if (TARGET_AVX512F && TARGET_EVEX512) |
5307 | return 2; |
5308 | break; |
5309 | case 32: |
5310 | if (TARGET_AVX2) |
5311 | return 2; |
5312 | break; |
5313 | case 16: |
5314 | if (TARGET_SSE2) |
5315 | return 2; |
5316 | break; |
5317 | case 0: |
5318 | /* VOIDmode */ |
5319 | gcc_unreachable (); |
5320 | default: |
5321 | break; |
5322 | } |
5323 | } |
5324 | |
5325 | if (vector_all_ones_zero_extend_half_operand (x, mode) |
5326 | || vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5327 | return 3; |
5328 | |
5329 | return 0; |
5330 | } |
5331 | |
5332 | /* Return the opcode of the special instruction to be used to load |
5333 | the constant operands[1] into operands[0]. */ |
5334 | |
5335 | const char * |
5336 | standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) |
5337 | { |
5338 | machine_mode mode; |
5339 | rtx x = operands[1]; |
5340 | |
5341 | gcc_assert (TARGET_SSE); |
5342 | |
5343 | mode = GET_MODE (x); |
5344 | |
5345 | if (x == const0_rtx || const0_operand (x, mode)) |
5346 | { |
5347 | switch (get_attr_mode (insn)) |
5348 | { |
5349 | case MODE_TI: |
5350 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5351 | return "%vpxor\t%0, %d0" ; |
5352 | /* FALLTHRU */ |
5353 | case MODE_XI: |
5354 | case MODE_OI: |
5355 | if (EXT_REX_SSE_REG_P (operands[0])) |
5356 | { |
5357 | if (TARGET_AVX512VL) |
5358 | return "vpxord\t%x0, %x0, %x0" ; |
5359 | else if (TARGET_EVEX512) |
5360 | return "vpxord\t%g0, %g0, %g0" ; |
5361 | else |
5362 | gcc_unreachable (); |
5363 | } |
5364 | return "vpxor\t%x0, %x0, %x0" ; |
5365 | |
5366 | case MODE_V2DF: |
5367 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5368 | return "%vxorpd\t%0, %d0" ; |
5369 | /* FALLTHRU */ |
5370 | case MODE_V8DF: |
5371 | case MODE_V4DF: |
5372 | if (EXT_REX_SSE_REG_P (operands[0])) |
5373 | { |
5374 | if (TARGET_AVX512DQ) |
5375 | { |
5376 | if (TARGET_AVX512VL) |
5377 | return "vxorpd\t%x0, %x0, %x0" ; |
5378 | else if (TARGET_EVEX512) |
5379 | return "vxorpd\t%g0, %g0, %g0" ; |
5380 | else |
5381 | gcc_unreachable (); |
5382 | } |
5383 | else |
5384 | { |
5385 | if (TARGET_AVX512VL) |
5386 | return "vpxorq\t%x0, %x0, %x0" ; |
5387 | else if (TARGET_EVEX512) |
5388 | return "vpxorq\t%g0, %g0, %g0" ; |
5389 | else |
5390 | gcc_unreachable (); |
5391 | } |
5392 | } |
5393 | return "vxorpd\t%x0, %x0, %x0" ; |
5394 | |
5395 | case MODE_V4SF: |
5396 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5397 | return "%vxorps\t%0, %d0" ; |
5398 | /* FALLTHRU */ |
5399 | case MODE_V16SF: |
5400 | case MODE_V8SF: |
5401 | if (EXT_REX_SSE_REG_P (operands[0])) |
5402 | { |
5403 | if (TARGET_AVX512DQ) |
5404 | { |
5405 | if (TARGET_AVX512VL) |
5406 | return "vxorps\t%x0, %x0, %x0" ; |
5407 | else if (TARGET_EVEX512) |
5408 | return "vxorps\t%g0, %g0, %g0" ; |
5409 | else |
5410 | gcc_unreachable (); |
5411 | } |
5412 | else |
5413 | { |
5414 | if (TARGET_AVX512VL) |
5415 | return "vpxord\t%x0, %x0, %x0" ; |
5416 | else if (TARGET_EVEX512) |
5417 | return "vpxord\t%g0, %g0, %g0" ; |
5418 | else |
5419 | gcc_unreachable (); |
5420 | } |
5421 | } |
5422 | return "vxorps\t%x0, %x0, %x0" ; |
5423 | |
5424 | default: |
5425 | gcc_unreachable (); |
5426 | } |
5427 | } |
5428 | else if (x == constm1_rtx |
5429 | || vector_all_ones_operand (x, mode) |
5430 | || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5431 | && float_vector_all_ones_operand (x, mode))) |
5432 | { |
5433 | enum attr_mode insn_mode = get_attr_mode (insn); |
5434 | |
5435 | switch (insn_mode) |
5436 | { |
5437 | case MODE_XI: |
5438 | case MODE_V8DF: |
5439 | case MODE_V16SF: |
5440 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5441 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
5442 | |
5443 | case MODE_OI: |
5444 | case MODE_V4DF: |
5445 | case MODE_V8SF: |
5446 | gcc_assert (TARGET_AVX2); |
5447 | /* FALLTHRU */ |
5448 | case MODE_TI: |
5449 | case MODE_V2DF: |
5450 | case MODE_V4SF: |
5451 | gcc_assert (TARGET_SSE2); |
5452 | if (EXT_REX_SSE_REG_P (operands[0])) |
5453 | { |
5454 | if (TARGET_AVX512VL) |
5455 | return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}" ; |
5456 | else if (TARGET_EVEX512) |
5457 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
5458 | else |
5459 | gcc_unreachable (); |
5460 | } |
5461 | return (TARGET_AVX |
5462 | ? "vpcmpeqd\t%0, %0, %0" |
5463 | : "pcmpeqd\t%0, %0" ); |
5464 | |
5465 | default: |
5466 | gcc_unreachable (); |
5467 | } |
5468 | } |
5469 | else if (vector_all_ones_zero_extend_half_operand (x, mode)) |
5470 | { |
5471 | if (GET_MODE_SIZE (mode) == 64) |
5472 | { |
5473 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5474 | return "vpcmpeqd\t%t0, %t0, %t0" ; |
5475 | } |
5476 | else if (GET_MODE_SIZE (mode) == 32) |
5477 | { |
5478 | gcc_assert (TARGET_AVX); |
5479 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
5480 | } |
5481 | gcc_unreachable (); |
5482 | } |
5483 | else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5484 | { |
5485 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5486 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
5487 | } |
5488 | |
5489 | gcc_unreachable (); |
5490 | } |
5491 | |
5492 | /* Returns true if INSN can be transformed from a memory load |
5493 | to a supported FP constant load. */ |
5494 | |
5495 | bool |
5496 | ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) |
5497 | { |
5498 | rtx src = find_constant_src (insn); |
5499 | |
5500 | gcc_assert (REG_P (dst)); |
5501 | |
5502 | if (src == NULL |
5503 | || (SSE_REGNO_P (REGNO (dst)) |
5504 | && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1) |
5505 | || (!TARGET_AVX512VL |
5506 | && EXT_REX_SSE_REGNO_P (REGNO (dst)) |
5507 | && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1) |
5508 | || (STACK_REGNO_P (REGNO (dst)) |
5509 | && standard_80387_constant_p (x: src) < 1)) |
5510 | return false; |
5511 | |
5512 | return true; |
5513 | } |
5514 | |
5515 | /* Predicate for pre-reload splitters with associated instructions, |
5516 | which can match any time before the split1 pass (usually combine), |
5517 | then are unconditionally split in that pass and should not be |
5518 | matched again afterwards. */ |
5519 | |
5520 | bool |
5521 | ix86_pre_reload_split (void) |
5522 | { |
5523 | return (can_create_pseudo_p () |
5524 | && !(cfun->curr_properties & PROP_rtl_split_insns)); |
5525 | } |
5526 | |
5527 | /* Return the opcode of the TYPE_SSEMOV instruction. To move from |
5528 | or to xmm16-xmm31/ymm16-ymm31 registers, we either require |
5529 | TARGET_AVX512VL or it is a register to register move which can |
5530 | be done with zmm register move. */ |
5531 | |
5532 | static const char * |
5533 | ix86_get_ssemov (rtx *operands, unsigned size, |
5534 | enum attr_mode insn_mode, machine_mode mode) |
5535 | { |
5536 | char buf[128]; |
5537 | bool misaligned_p = (misaligned_operand (operands[0], mode) |
5538 | || misaligned_operand (operands[1], mode)); |
5539 | bool evex_reg_p = (size == 64 |
5540 | || EXT_REX_SSE_REG_P (operands[0]) |
5541 | || EXT_REX_SSE_REG_P (operands[1])); |
5542 | |
5543 | bool egpr_p = (TARGET_APX_EGPR |
5544 | && (x86_extended_rex2reg_mentioned_p (operands[0]) |
5545 | || x86_extended_rex2reg_mentioned_p (operands[1]))); |
5546 | bool egpr_vl = egpr_p && TARGET_AVX512VL; |
5547 | |
5548 | machine_mode scalar_mode; |
5549 | |
5550 | const char *opcode = NULL; |
5551 | enum |
5552 | { |
5553 | opcode_int, |
5554 | opcode_float, |
5555 | opcode_double |
5556 | } type = opcode_int; |
5557 | |
5558 | switch (insn_mode) |
5559 | { |
5560 | case MODE_V16SF: |
5561 | case MODE_V8SF: |
5562 | case MODE_V4SF: |
5563 | scalar_mode = E_SFmode; |
5564 | type = opcode_float; |
5565 | break; |
5566 | case MODE_V8DF: |
5567 | case MODE_V4DF: |
5568 | case MODE_V2DF: |
5569 | scalar_mode = E_DFmode; |
5570 | type = opcode_double; |
5571 | break; |
5572 | case MODE_XI: |
5573 | case MODE_OI: |
5574 | case MODE_TI: |
5575 | scalar_mode = GET_MODE_INNER (mode); |
5576 | break; |
5577 | default: |
5578 | gcc_unreachable (); |
5579 | } |
5580 | |
5581 | /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, |
5582 | we can only use zmm register move without memory operand. */ |
5583 | if (evex_reg_p |
5584 | && !TARGET_AVX512VL |
5585 | && GET_MODE_SIZE (mode) < 64) |
5586 | { |
5587 | /* NB: Even though ix86_hard_regno_mode_ok doesn't allow |
5588 | xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when |
5589 | AVX512VL is disabled, LRA can still generate reg to |
5590 | reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit |
5591 | modes. */ |
5592 | if (memory_operand (operands[0], mode) |
5593 | || memory_operand (operands[1], mode)) |
5594 | gcc_unreachable (); |
5595 | size = 64; |
5596 | /* We need TARGET_EVEX512 to move into zmm register. */ |
5597 | gcc_assert (TARGET_EVEX512); |
5598 | switch (type) |
5599 | { |
5600 | case opcode_int: |
5601 | if (scalar_mode == E_HFmode || scalar_mode == E_BFmode) |
5602 | opcode = (misaligned_p |
5603 | ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64" ) |
5604 | : "vmovdqa64" ); |
5605 | else |
5606 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
5607 | break; |
5608 | case opcode_float: |
5609 | opcode = misaligned_p ? "vmovups" : "vmovaps" ; |
5610 | break; |
5611 | case opcode_double: |
5612 | opcode = misaligned_p ? "vmovupd" : "vmovapd" ; |
5613 | break; |
5614 | } |
5615 | } |
5616 | else if (SCALAR_FLOAT_MODE_P (scalar_mode)) |
5617 | { |
5618 | switch (scalar_mode) |
5619 | { |
5620 | case E_HFmode: |
5621 | case E_BFmode: |
5622 | if (evex_reg_p || egpr_vl) |
5623 | opcode = (misaligned_p |
5624 | ? (TARGET_AVX512BW |
5625 | ? "vmovdqu16" |
5626 | : "vmovdqu64" ) |
5627 | : "vmovdqa64" ); |
5628 | else if (egpr_p) |
5629 | opcode = (misaligned_p |
5630 | ? (TARGET_AVX512BW |
5631 | ? "vmovdqu16" |
5632 | : "%vmovups" ) |
5633 | : "%vmovaps" ); |
5634 | else |
5635 | opcode = (misaligned_p |
5636 | ? (TARGET_AVX512BW |
5637 | ? "vmovdqu16" |
5638 | : "%vmovdqu" ) |
5639 | : "%vmovdqa" ); |
5640 | break; |
5641 | case E_SFmode: |
5642 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5643 | break; |
5644 | case E_DFmode: |
5645 | opcode = misaligned_p ? "%vmovupd" : "%vmovapd" ; |
5646 | break; |
5647 | case E_TFmode: |
5648 | if (evex_reg_p || egpr_vl) |
5649 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5650 | else if (egpr_p) |
5651 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5652 | else |
5653 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5654 | break; |
5655 | default: |
5656 | gcc_unreachable (); |
5657 | } |
5658 | } |
5659 | else if (SCALAR_INT_MODE_P (scalar_mode)) |
5660 | { |
5661 | switch (scalar_mode) |
5662 | { |
5663 | case E_QImode: |
5664 | if (evex_reg_p || egpr_vl) |
5665 | opcode = (misaligned_p |
5666 | ? (TARGET_AVX512BW |
5667 | ? "vmovdqu8" |
5668 | : "vmovdqu64" ) |
5669 | : "vmovdqa64" ); |
5670 | else if (egpr_p) |
5671 | opcode = (misaligned_p |
5672 | ? (TARGET_AVX512BW |
5673 | ? "vmovdqu8" |
5674 | : "%vmovups" ) |
5675 | : "%vmovaps" ); |
5676 | else |
5677 | opcode = (misaligned_p |
5678 | ? (TARGET_AVX512BW |
5679 | ? "vmovdqu8" |
5680 | : "%vmovdqu" ) |
5681 | : "%vmovdqa" ); |
5682 | break; |
5683 | case E_HImode: |
5684 | if (evex_reg_p || egpr_vl) |
5685 | opcode = (misaligned_p |
5686 | ? (TARGET_AVX512BW |
5687 | ? "vmovdqu16" |
5688 | : "vmovdqu64" ) |
5689 | : "vmovdqa64" ); |
5690 | else if (egpr_p) |
5691 | opcode = (misaligned_p |
5692 | ? (TARGET_AVX512BW |
5693 | ? "vmovdqu16" |
5694 | : "%vmovups" ) |
5695 | : "%vmovaps" ); |
5696 | else |
5697 | opcode = (misaligned_p |
5698 | ? (TARGET_AVX512BW |
5699 | ? "vmovdqu16" |
5700 | : "%vmovdqu" ) |
5701 | : "%vmovdqa" ); |
5702 | break; |
5703 | case E_SImode: |
5704 | if (evex_reg_p || egpr_vl) |
5705 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
5706 | else if (egpr_p) |
5707 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5708 | else |
5709 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5710 | break; |
5711 | case E_DImode: |
5712 | case E_TImode: |
5713 | case E_OImode: |
5714 | if (evex_reg_p || egpr_vl) |
5715 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5716 | else if (egpr_p) |
5717 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5718 | else |
5719 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5720 | break; |
5721 | case E_XImode: |
5722 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5723 | break; |
5724 | default: |
5725 | gcc_unreachable (); |
5726 | } |
5727 | } |
5728 | else |
5729 | gcc_unreachable (); |
5730 | |
5731 | switch (size) |
5732 | { |
5733 | case 64: |
5734 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}" , |
5735 | opcode); |
5736 | break; |
5737 | case 32: |
5738 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}" , |
5739 | opcode); |
5740 | break; |
5741 | case 16: |
5742 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}" , |
5743 | opcode); |
5744 | break; |
5745 | default: |
5746 | gcc_unreachable (); |
5747 | } |
5748 | output_asm_insn (buf, operands); |
5749 | return "" ; |
5750 | } |
5751 | |
5752 | /* Return the template of the TYPE_SSEMOV instruction to move |
5753 | operands[1] into operands[0]. */ |
5754 | |
5755 | const char * |
5756 | ix86_output_ssemov (rtx_insn *insn, rtx *operands) |
5757 | { |
5758 | machine_mode mode = GET_MODE (operands[0]); |
5759 | if (get_attr_type (insn) != TYPE_SSEMOV |
5760 | || mode != GET_MODE (operands[1])) |
5761 | gcc_unreachable (); |
5762 | |
5763 | enum attr_mode insn_mode = get_attr_mode (insn); |
5764 | |
5765 | switch (insn_mode) |
5766 | { |
5767 | case MODE_XI: |
5768 | case MODE_V8DF: |
5769 | case MODE_V16SF: |
5770 | return ix86_get_ssemov (operands, size: 64, insn_mode, mode); |
5771 | |
5772 | case MODE_OI: |
5773 | case MODE_V4DF: |
5774 | case MODE_V8SF: |
5775 | return ix86_get_ssemov (operands, size: 32, insn_mode, mode); |
5776 | |
5777 | case MODE_TI: |
5778 | case MODE_V2DF: |
5779 | case MODE_V4SF: |
5780 | return ix86_get_ssemov (operands, size: 16, insn_mode, mode); |
5781 | |
5782 | case MODE_DI: |
5783 | /* Handle broken assemblers that require movd instead of movq. */ |
5784 | if (GENERAL_REG_P (operands[0])) |
5785 | { |
5786 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5787 | return "%vmovq\t{%1, %q0|%q0, %1}" ; |
5788 | else |
5789 | return "%vmovd\t{%1, %q0|%q0, %1}" ; |
5790 | } |
5791 | else if (GENERAL_REG_P (operands[1])) |
5792 | { |
5793 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5794 | return "%vmovq\t{%q1, %0|%0, %q1}" ; |
5795 | else |
5796 | return "%vmovd\t{%q1, %0|%0, %q1}" ; |
5797 | } |
5798 | else |
5799 | return "%vmovq\t{%1, %0|%0, %1}" ; |
5800 | |
5801 | case MODE_SI: |
5802 | if (GENERAL_REG_P (operands[0])) |
5803 | return "%vmovd\t{%1, %k0|%k0, %1}" ; |
5804 | else if (GENERAL_REG_P (operands[1])) |
5805 | return "%vmovd\t{%k1, %0|%0, %k1}" ; |
5806 | else |
5807 | return "%vmovd\t{%1, %0|%0, %1}" ; |
5808 | |
5809 | case MODE_HI: |
5810 | if (GENERAL_REG_P (operands[0])) |
5811 | return "vmovw\t{%1, %k0|%k0, %1}" ; |
5812 | else if (GENERAL_REG_P (operands[1])) |
5813 | return "vmovw\t{%k1, %0|%0, %k1}" ; |
5814 | else |
5815 | return "vmovw\t{%1, %0|%0, %1}" ; |
5816 | |
5817 | case MODE_DF: |
5818 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5819 | return "vmovsd\t{%d1, %0|%0, %d1}" ; |
5820 | else |
5821 | return "%vmovsd\t{%1, %0|%0, %1}" ; |
5822 | |
5823 | case MODE_SF: |
5824 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5825 | return "vmovss\t{%d1, %0|%0, %d1}" ; |
5826 | else |
5827 | return "%vmovss\t{%1, %0|%0, %1}" ; |
5828 | |
5829 | case MODE_HF: |
5830 | case MODE_BF: |
5831 | if (REG_P (operands[0]) && REG_P (operands[1])) |
5832 | return "vmovsh\t{%d1, %0|%0, %d1}" ; |
5833 | else |
5834 | return "vmovsh\t{%1, %0|%0, %1}" ; |
5835 | |
5836 | case MODE_V1DF: |
5837 | gcc_assert (!TARGET_AVX); |
5838 | return "movlpd\t{%1, %0|%0, %1}" ; |
5839 | |
5840 | case MODE_V2SF: |
5841 | if (TARGET_AVX && REG_P (operands[0])) |
5842 | return "vmovlps\t{%1, %d0|%d0, %1}" ; |
5843 | else |
5844 | return "%vmovlps\t{%1, %0|%0, %1}" ; |
5845 | |
5846 | default: |
5847 | gcc_unreachable (); |
5848 | } |
5849 | } |
5850 | |
5851 | /* Returns true if OP contains a symbol reference */ |
5852 | |
5853 | bool |
5854 | symbolic_reference_mentioned_p (rtx op) |
5855 | { |
5856 | const char *fmt; |
5857 | int i; |
5858 | |
5859 | if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) |
5860 | return true; |
5861 | |
5862 | fmt = GET_RTX_FORMAT (GET_CODE (op)); |
5863 | for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
5864 | { |
5865 | if (fmt[i] == 'E') |
5866 | { |
5867 | int j; |
5868 | |
5869 | for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
5870 | if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
5871 | return true; |
5872 | } |
5873 | |
5874 | else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
5875 | return true; |
5876 | } |
5877 | |
5878 | return false; |
5879 | } |
5880 | |
5881 | /* Return true if it is appropriate to emit `ret' instructions in the |
5882 | body of a function. Do this only if the epilogue is simple, needing a |
5883 | couple of insns. Prior to reloading, we can't tell how many registers |
5884 | must be saved, so return false then. Return false if there is no frame |
5885 | marker to de-allocate. */ |
5886 | |
5887 | bool |
5888 | ix86_can_use_return_insn_p (void) |
5889 | { |
5890 | if (ix86_function_ms_hook_prologue (fn: current_function_decl)) |
5891 | return false; |
5892 | |
5893 | if (ix86_function_naked (fn: current_function_decl)) |
5894 | return false; |
5895 | |
5896 | /* Don't use `ret' instruction in interrupt handler. */ |
5897 | if (! reload_completed |
5898 | || frame_pointer_needed |
5899 | || cfun->machine->func_type != TYPE_NORMAL) |
5900 | return 0; |
5901 | |
5902 | /* Don't allow more than 32k pop, since that's all we can do |
5903 | with one instruction. */ |
5904 | if (crtl->args.pops_args && crtl->args.size >= 32768) |
5905 | return 0; |
5906 | |
5907 | struct ix86_frame &frame = cfun->machine->frame; |
5908 | return (frame.stack_pointer_offset == UNITS_PER_WORD |
5909 | && (frame.nregs + frame.nsseregs) == 0); |
5910 | } |
5911 | |
5912 | /* Return stack frame size. get_frame_size () returns used stack slots |
5913 | during compilation, which may be optimized out later. If stack frame |
5914 | is needed, stack_frame_required should be true. */ |
5915 | |
5916 | static HOST_WIDE_INT |
5917 | ix86_get_frame_size (void) |
5918 | { |
5919 | if (cfun->machine->stack_frame_required) |
5920 | return get_frame_size (); |
5921 | else |
5922 | return 0; |
5923 | } |
5924 | |
5925 | /* Value should be nonzero if functions must have frame pointers. |
5926 | Zero means the frame pointer need not be set up (and parms may |
5927 | be accessed via the stack pointer) in functions that seem suitable. */ |
5928 | |
5929 | static bool |
5930 | ix86_frame_pointer_required (void) |
5931 | { |
5932 | /* If we accessed previous frames, then the generated code expects |
5933 | to be able to access the saved ebp value in our frame. */ |
5934 | if (cfun->machine->accesses_prev_frame) |
5935 | return true; |
5936 | |
5937 | /* Several x86 os'es need a frame pointer for other reasons, |
5938 | usually pertaining to setjmp. */ |
5939 | if (SUBTARGET_FRAME_POINTER_REQUIRED) |
5940 | return true; |
5941 | |
5942 | /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ |
5943 | if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) |
5944 | return true; |
5945 | |
5946 | /* Win64 SEH, very large frames need a frame-pointer as maximum stack |
5947 | allocation is 4GB. */ |
5948 | if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) |
5949 | return true; |
5950 | |
5951 | /* SSE saves require frame-pointer when stack is misaligned. */ |
5952 | if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) |
5953 | return true; |
5954 | |
5955 | /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER |
5956 | turns off the frame pointer by default. Turn it back on now if |
5957 | we've not got a leaf function. */ |
5958 | if (TARGET_OMIT_LEAF_FRAME_POINTER |
5959 | && (!crtl->is_leaf |
5960 | || ix86_current_function_calls_tls_descriptor)) |
5961 | return true; |
5962 | |
5963 | /* Several versions of mcount for the x86 assumes that there is a |
5964 | frame, so we cannot allow profiling without a frame pointer. */ |
5965 | if (crtl->profile && !flag_fentry) |
5966 | return true; |
5967 | |
5968 | return false; |
5969 | } |
5970 | |
5971 | /* Record that the current function accesses previous call frames. */ |
5972 | |
5973 | void |
5974 | ix86_setup_frame_addresses (void) |
5975 | { |
5976 | cfun->machine->accesses_prev_frame = 1; |
5977 | } |
5978 | |
5979 | #ifndef USE_HIDDEN_LINKONCE |
5980 | # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) |
5981 | # define USE_HIDDEN_LINKONCE 1 |
5982 | # else |
5983 | # define USE_HIDDEN_LINKONCE 0 |
5984 | # endif |
5985 | #endif |
5986 | |
5987 | /* Label count for call and return thunks. It is used to make unique |
5988 | labels in call and return thunks. */ |
5989 | static int indirectlabelno; |
5990 | |
5991 | /* True if call thunk function is needed. */ |
5992 | static bool indirect_thunk_needed = false; |
5993 | |
5994 | /* Bit masks of integer registers, which contain branch target, used |
5995 | by call thunk functions. */ |
5996 | static HARD_REG_SET indirect_thunks_used; |
5997 | |
5998 | /* True if return thunk function is needed. */ |
5999 | static bool indirect_return_needed = false; |
6000 | |
6001 | /* True if return thunk function via CX is needed. */ |
6002 | static bool indirect_return_via_cx; |
6003 | |
6004 | #ifndef INDIRECT_LABEL |
6005 | # define INDIRECT_LABEL "LIND" |
6006 | #endif |
6007 | |
6008 | /* Indicate what prefix is needed for an indirect branch. */ |
6009 | enum indirect_thunk_prefix |
6010 | { |
6011 | indirect_thunk_prefix_none, |
6012 | indirect_thunk_prefix_nt |
6013 | }; |
6014 | |
6015 | /* Return the prefix needed for an indirect branch INSN. */ |
6016 | |
6017 | enum indirect_thunk_prefix |
6018 | indirect_thunk_need_prefix (rtx_insn *insn) |
6019 | { |
6020 | enum indirect_thunk_prefix need_prefix; |
6021 | if ((cfun->machine->indirect_branch_type |
6022 | == indirect_branch_thunk_extern) |
6023 | && ix86_notrack_prefixed_insn_p (insn)) |
6024 | { |
6025 | /* NOTRACK prefix is only used with external thunk so that it |
6026 | can be properly updated to support CET at run-time. */ |
6027 | need_prefix = indirect_thunk_prefix_nt; |
6028 | } |
6029 | else |
6030 | need_prefix = indirect_thunk_prefix_none; |
6031 | return need_prefix; |
6032 | } |
6033 | |
6034 | /* Fills in the label name that should be used for the indirect thunk. */ |
6035 | |
6036 | static void |
6037 | indirect_thunk_name (char name[32], unsigned int regno, |
6038 | enum indirect_thunk_prefix need_prefix, |
6039 | bool ret_p) |
6040 | { |
6041 | if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) |
6042 | gcc_unreachable (); |
6043 | |
6044 | if (USE_HIDDEN_LINKONCE) |
6045 | { |
6046 | const char *prefix; |
6047 | |
6048 | if (need_prefix == indirect_thunk_prefix_nt |
6049 | && regno != INVALID_REGNUM) |
6050 | { |
6051 | /* NOTRACK prefix is only used with external thunk via |
6052 | register so that NOTRACK prefix can be added to indirect |
6053 | branch via register to support CET at run-time. */ |
6054 | prefix = "_nt" ; |
6055 | } |
6056 | else |
6057 | prefix = "" ; |
6058 | |
6059 | const char *ret = ret_p ? "return" : "indirect" ; |
6060 | |
6061 | if (regno != INVALID_REGNUM) |
6062 | { |
6063 | const char *reg_prefix; |
6064 | if (LEGACY_INT_REGNO_P (regno)) |
6065 | reg_prefix = TARGET_64BIT ? "r" : "e" ; |
6066 | else |
6067 | reg_prefix = "" ; |
6068 | sprintf (s: name, format: "__x86_%s_thunk%s_%s%s" , |
6069 | ret, prefix, reg_prefix, reg_names[regno]); |
6070 | } |
6071 | else |
6072 | sprintf (s: name, format: "__x86_%s_thunk%s" , ret, prefix); |
6073 | } |
6074 | else |
6075 | { |
6076 | if (regno != INVALID_REGNUM) |
6077 | ASM_GENERATE_INTERNAL_LABEL (name, "LITR" , regno); |
6078 | else |
6079 | { |
6080 | if (ret_p) |
6081 | ASM_GENERATE_INTERNAL_LABEL (name, "LRT" , 0); |
6082 | else |
6083 | ASM_GENERATE_INTERNAL_LABEL (name, "LIT" , 0); |
6084 | } |
6085 | } |
6086 | } |
6087 | |
6088 | /* Output a call and return thunk for indirect branch. If REGNO != -1, |
6089 | the function address is in REGNO and the call and return thunk looks like: |
6090 | |
6091 | call L2 |
6092 | L1: |
6093 | pause |
6094 | lfence |
6095 | jmp L1 |
6096 | L2: |
6097 | mov %REG, (%sp) |
6098 | ret |
6099 | |
6100 | Otherwise, the function address is on the top of stack and the |
6101 | call and return thunk looks like: |
6102 | |
6103 | call L2 |
6104 | L1: |
6105 | pause |
6106 | lfence |
6107 | jmp L1 |
6108 | L2: |
6109 | lea WORD_SIZE(%sp), %sp |
6110 | ret |
6111 | */ |
6112 | |
6113 | static void |
6114 | output_indirect_thunk (unsigned int regno) |
6115 | { |
6116 | char indirectlabel1[32]; |
6117 | char indirectlabel2[32]; |
6118 | |
6119 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, |
6120 | indirectlabelno++); |
6121 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, |
6122 | indirectlabelno++); |
6123 | |
6124 | /* Call */ |
6125 | fputs (s: "\tcall\t" , stream: asm_out_file); |
6126 | assemble_name_raw (asm_out_file, indirectlabel2); |
6127 | fputc (c: '\n', stream: asm_out_file); |
6128 | |
6129 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
6130 | |
6131 | /* AMD and Intel CPUs prefer each a different instruction as loop filler. |
6132 | Usage of both pause + lfence is compromise solution. */ |
6133 | fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n" ); |
6134 | |
6135 | /* Jump. */ |
6136 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
6137 | assemble_name_raw (asm_out_file, indirectlabel1); |
6138 | fputc (c: '\n', stream: asm_out_file); |
6139 | |
6140 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
6141 | |
6142 | /* The above call insn pushed a word to stack. Adjust CFI info. */ |
6143 | if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) |
6144 | { |
6145 | if (! dwarf2out_do_cfi_asm ()) |
6146 | { |
6147 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6148 | xcfi->dw_cfi_opc = DW_CFA_advance_loc4; |
6149 | xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); |
6150 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6151 | } |
6152 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6153 | xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; |
6154 | xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; |
6155 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6156 | dwarf2out_emit_cfi (cfi: xcfi); |
6157 | } |
6158 | |
6159 | if (regno != INVALID_REGNUM) |
6160 | { |
6161 | /* MOV. */ |
6162 | rtx xops[2]; |
6163 | xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); |
6164 | xops[1] = gen_rtx_REG (word_mode, regno); |
6165 | output_asm_insn ("mov\t{%1, %0|%0, %1}" , xops); |
6166 | } |
6167 | else |
6168 | { |
6169 | /* LEA. */ |
6170 | rtx xops[2]; |
6171 | xops[0] = stack_pointer_rtx; |
6172 | xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
6173 | output_asm_insn ("lea\t{%E1, %0|%0, %E1}" , xops); |
6174 | } |
6175 | |
6176 | fputs (s: "\tret\n" , stream: asm_out_file); |
6177 | if ((ix86_harden_sls & harden_sls_return)) |
6178 | fputs (s: "\tint3\n" , stream: asm_out_file); |
6179 | } |
6180 | |
6181 | /* Output a funtion with a call and return thunk for indirect branch. |
6182 | If REGNO != INVALID_REGNUM, the function address is in REGNO. |
6183 | Otherwise, the function address is on the top of stack. Thunk is |
6184 | used for function return if RET_P is true. */ |
6185 | |
6186 | static void |
6187 | output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, |
6188 | unsigned int regno, bool ret_p) |
6189 | { |
6190 | char name[32]; |
6191 | tree decl; |
6192 | |
6193 | /* Create __x86_indirect_thunk. */ |
6194 | indirect_thunk_name (name, regno, need_prefix, ret_p); |
6195 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6196 | get_identifier (name), |
6197 | build_function_type_list (void_type_node, NULL_TREE)); |
6198 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6199 | NULL_TREE, void_type_node); |
6200 | TREE_PUBLIC (decl) = 1; |
6201 | TREE_STATIC (decl) = 1; |
6202 | DECL_IGNORED_P (decl) = 1; |
6203 | |
6204 | #if TARGET_MACHO |
6205 | if (TARGET_MACHO) |
6206 | { |
6207 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6208 | fputs ("\t.weak_definition\t" , asm_out_file); |
6209 | assemble_name (asm_out_file, name); |
6210 | fputs ("\n\t.private_extern\t" , asm_out_file); |
6211 | assemble_name (asm_out_file, name); |
6212 | putc ('\n', asm_out_file); |
6213 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6214 | DECL_WEAK (decl) = 1; |
6215 | } |
6216 | else |
6217 | #endif |
6218 | if (USE_HIDDEN_LINKONCE) |
6219 | { |
6220 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6221 | |
6222 | targetm.asm_out.unique_section (decl, 0); |
6223 | switch_to_section (get_named_section (decl, NULL, 0)); |
6224 | |
6225 | targetm.asm_out.globalize_label (asm_out_file, name); |
6226 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
6227 | assemble_name (asm_out_file, name); |
6228 | putc (c: '\n', stream: asm_out_file); |
6229 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6230 | } |
6231 | else |
6232 | { |
6233 | switch_to_section (text_section); |
6234 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6235 | } |
6236 | |
6237 | DECL_INITIAL (decl) = make_node (BLOCK); |
6238 | current_function_decl = decl; |
6239 | allocate_struct_function (decl, false); |
6240 | init_function_start (decl); |
6241 | /* We're about to hide the function body from callees of final_* by |
6242 | emitting it directly; tell them we're a thunk, if they care. */ |
6243 | cfun->is_thunk = true; |
6244 | first_function_block_is_cold = false; |
6245 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6246 | final_start_function (emit_barrier (), asm_out_file, 1); |
6247 | |
6248 | output_indirect_thunk (regno); |
6249 | |
6250 | final_end_function (); |
6251 | init_insn_lengths (); |
6252 | free_after_compilation (cfun); |
6253 | set_cfun (NULL); |
6254 | current_function_decl = NULL; |
6255 | } |
6256 | |
6257 | static int pic_labels_used; |
6258 | |
6259 | /* Fills in the label name that should be used for a pc thunk for |
6260 | the given register. */ |
6261 | |
6262 | static void |
6263 | get_pc_thunk_name (char name[32], unsigned int regno) |
6264 | { |
6265 | gcc_assert (!TARGET_64BIT); |
6266 | |
6267 | if (USE_HIDDEN_LINKONCE) |
6268 | sprintf (s: name, format: "__x86.get_pc_thunk.%s" , reg_names[regno]); |
6269 | else |
6270 | ASM_GENERATE_INTERNAL_LABEL (name, "LPR" , regno); |
6271 | } |
6272 | |
6273 | |
6274 | /* This function generates code for -fpic that loads %ebx with |
6275 | the return address of the caller and then returns. */ |
6276 | |
6277 | static void |
6278 | ix86_code_end (void) |
6279 | { |
6280 | rtx xops[2]; |
6281 | unsigned int regno; |
6282 | |
6283 | if (indirect_return_needed) |
6284 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6285 | INVALID_REGNUM, ret_p: true); |
6286 | if (indirect_return_via_cx) |
6287 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6288 | CX_REG, ret_p: true); |
6289 | if (indirect_thunk_needed) |
6290 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6291 | INVALID_REGNUM, ret_p: false); |
6292 | |
6293 | for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) |
6294 | { |
6295 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6296 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6297 | regno, ret_p: false); |
6298 | } |
6299 | |
6300 | for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++) |
6301 | { |
6302 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6303 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6304 | regno, ret_p: false); |
6305 | } |
6306 | |
6307 | for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) |
6308 | { |
6309 | char name[32]; |
6310 | tree decl; |
6311 | |
6312 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6313 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6314 | regno, ret_p: false); |
6315 | |
6316 | if (!(pic_labels_used & (1 << regno))) |
6317 | continue; |
6318 | |
6319 | get_pc_thunk_name (name, regno); |
6320 | |
6321 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6322 | get_identifier (name), |
6323 | build_function_type_list (void_type_node, NULL_TREE)); |
6324 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6325 | NULL_TREE, void_type_node); |
6326 | TREE_PUBLIC (decl) = 1; |
6327 | TREE_STATIC (decl) = 1; |
6328 | DECL_IGNORED_P (decl) = 1; |
6329 | |
6330 | #if TARGET_MACHO |
6331 | if (TARGET_MACHO) |
6332 | { |
6333 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6334 | fputs ("\t.weak_definition\t" , asm_out_file); |
6335 | assemble_name (asm_out_file, name); |
6336 | fputs ("\n\t.private_extern\t" , asm_out_file); |
6337 | assemble_name (asm_out_file, name); |
6338 | putc ('\n', asm_out_file); |
6339 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6340 | DECL_WEAK (decl) = 1; |
6341 | } |
6342 | else |
6343 | #endif |
6344 | if (USE_HIDDEN_LINKONCE) |
6345 | { |
6346 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6347 | |
6348 | targetm.asm_out.unique_section (decl, 0); |
6349 | switch_to_section (get_named_section (decl, NULL, 0)); |
6350 | |
6351 | targetm.asm_out.globalize_label (asm_out_file, name); |
6352 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
6353 | assemble_name (asm_out_file, name); |
6354 | putc (c: '\n', stream: asm_out_file); |
6355 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6356 | } |
6357 | else |
6358 | { |
6359 | switch_to_section (text_section); |
6360 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6361 | } |
6362 | |
6363 | DECL_INITIAL (decl) = make_node (BLOCK); |
6364 | current_function_decl = decl; |
6365 | allocate_struct_function (decl, false); |
6366 | init_function_start (decl); |
6367 | /* We're about to hide the function body from callees of final_* by |
6368 | emitting it directly; tell them we're a thunk, if they care. */ |
6369 | cfun->is_thunk = true; |
6370 | first_function_block_is_cold = false; |
6371 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6372 | final_start_function (emit_barrier (), asm_out_file, 1); |
6373 | |
6374 | /* Pad stack IP move with 4 instructions (two NOPs count |
6375 | as one instruction). */ |
6376 | if (TARGET_PAD_SHORT_FUNCTION) |
6377 | { |
6378 | int i = 8; |
6379 | |
6380 | while (i--) |
6381 | fputs (s: "\tnop\n" , stream: asm_out_file); |
6382 | } |
6383 | |
6384 | xops[0] = gen_rtx_REG (Pmode, regno); |
6385 | xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
6386 | output_asm_insn ("mov%z0\t{%1, %0|%0, %1}" , xops); |
6387 | fputs (s: "\tret\n" , stream: asm_out_file); |
6388 | final_end_function (); |
6389 | init_insn_lengths (); |
6390 | free_after_compilation (cfun); |
6391 | set_cfun (NULL); |
6392 | current_function_decl = NULL; |
6393 | } |
6394 | |
6395 | if (flag_split_stack) |
6396 | file_end_indicate_split_stack (); |
6397 | } |
6398 | |
6399 | /* Emit code for the SET_GOT patterns. */ |
6400 | |
6401 | const char * |
6402 | output_set_got (rtx dest, rtx label) |
6403 | { |
6404 | rtx xops[3]; |
6405 | |
6406 | xops[0] = dest; |
6407 | |
6408 | if (TARGET_VXWORKS_RTP && flag_pic) |
6409 | { |
6410 | /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ |
6411 | xops[2] = gen_rtx_MEM (Pmode, |
6412 | gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); |
6413 | output_asm_insn ("mov{l}\t{%2, %0|%0, %2}" , xops); |
6414 | |
6415 | /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. |
6416 | Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as |
6417 | an unadorned address. */ |
6418 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
6419 | SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; |
6420 | output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}" , xops); |
6421 | return "" ; |
6422 | } |
6423 | |
6424 | xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
6425 | |
6426 | if (flag_pic) |
6427 | { |
6428 | char name[32]; |
6429 | get_pc_thunk_name (name, REGNO (dest)); |
6430 | pic_labels_used |= 1 << REGNO (dest); |
6431 | |
6432 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
6433 | xops[2] = gen_rtx_MEM (QImode, xops[2]); |
6434 | output_asm_insn ("%!call\t%X2" , xops); |
6435 | |
6436 | #if TARGET_MACHO |
6437 | /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. |
6438 | This is what will be referenced by the Mach-O PIC subsystem. */ |
6439 | if (machopic_should_output_picbase_label () || !label) |
6440 | ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
6441 | |
6442 | /* When we are restoring the pic base at the site of a nonlocal label, |
6443 | and we decided to emit the pic base above, we will still output a |
6444 | local label used for calculating the correction offset (even though |
6445 | the offset will be 0 in that case). */ |
6446 | if (label) |
6447 | targetm.asm_out.internal_label (asm_out_file, "L" , |
6448 | CODE_LABEL_NUMBER (label)); |
6449 | #endif |
6450 | } |
6451 | else |
6452 | { |
6453 | if (TARGET_MACHO) |
6454 | /* We don't need a pic base, we're not producing pic. */ |
6455 | gcc_unreachable (); |
6456 | |
6457 | xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
6458 | output_asm_insn ("mov%z0\t{%2, %0|%0, %2}" , xops); |
6459 | targetm.asm_out.internal_label (asm_out_file, "L" , |
6460 | CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
6461 | } |
6462 | |
6463 | if (!TARGET_MACHO) |
6464 | output_asm_insn ("add%z0\t{%1, %0|%0, %1}" , xops); |
6465 | |
6466 | return "" ; |
6467 | } |
6468 | |
6469 | /* Generate an "push" pattern for input ARG. */ |
6470 | |
6471 | rtx |
6472 | gen_push (rtx arg, bool ppx_p) |
6473 | { |
6474 | struct machine_function *m = cfun->machine; |
6475 | |
6476 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6477 | m->fs.cfa_offset += UNITS_PER_WORD; |
6478 | m->fs.sp_offset += UNITS_PER_WORD; |
6479 | |
6480 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6481 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6482 | |
6483 | rtx stack = gen_rtx_MEM (word_mode, |
6484 | gen_rtx_PRE_DEC (Pmode, |
6485 | stack_pointer_rtx)); |
6486 | return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg); |
6487 | } |
6488 | |
6489 | rtx |
6490 | gen_pushfl (void) |
6491 | { |
6492 | struct machine_function *m = cfun->machine; |
6493 | rtx flags, mem; |
6494 | |
6495 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6496 | m->fs.cfa_offset += UNITS_PER_WORD; |
6497 | m->fs.sp_offset += UNITS_PER_WORD; |
6498 | |
6499 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
6500 | |
6501 | mem = gen_rtx_MEM (word_mode, |
6502 | gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); |
6503 | |
6504 | return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags); |
6505 | } |
6506 | |
6507 | /* Generate an "pop" pattern for input ARG. */ |
6508 | |
6509 | rtx |
6510 | gen_pop (rtx arg, bool ppx_p) |
6511 | { |
6512 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6513 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6514 | |
6515 | rtx stack = gen_rtx_MEM (word_mode, |
6516 | gen_rtx_POST_INC (Pmode, |
6517 | stack_pointer_rtx)); |
6518 | |
6519 | return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack); |
6520 | } |
6521 | |
6522 | rtx |
6523 | gen_popfl (void) |
6524 | { |
6525 | rtx flags, mem; |
6526 | |
6527 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
6528 | |
6529 | mem = gen_rtx_MEM (word_mode, |
6530 | gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); |
6531 | |
6532 | return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem); |
6533 | } |
6534 | |
6535 | /* Generate a "push2" pattern for input ARG. */ |
6536 | rtx |
6537 | gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) |
6538 | { |
6539 | struct machine_function *m = cfun->machine; |
6540 | const int offset = UNITS_PER_WORD * 2; |
6541 | |
6542 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6543 | m->fs.cfa_offset += offset; |
6544 | m->fs.sp_offset += offset; |
6545 | |
6546 | if (REG_P (reg1) && GET_MODE (reg1) != word_mode) |
6547 | reg1 = gen_rtx_REG (word_mode, REGNO (reg1)); |
6548 | |
6549 | if (REG_P (reg2) && GET_MODE (reg2) != word_mode) |
6550 | reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); |
6551 | |
6552 | return ppx_p ? gen_push2p_di (mem, reg1, reg2): |
6553 | gen_push2_di (mem, reg1, reg2); |
6554 | } |
6555 | |
6556 | /* Return >= 0 if there is an unused call-clobbered register available |
6557 | for the entire function. */ |
6558 | |
6559 | static unsigned int |
6560 | ix86_select_alt_pic_regnum (void) |
6561 | { |
6562 | if (ix86_use_pseudo_pic_reg ()) |
6563 | return INVALID_REGNUM; |
6564 | |
6565 | if (crtl->is_leaf |
6566 | && !crtl->profile |
6567 | && !ix86_current_function_calls_tls_descriptor) |
6568 | { |
6569 | int i, drap; |
6570 | /* Can't use the same register for both PIC and DRAP. */ |
6571 | if (crtl->drap_reg) |
6572 | drap = REGNO (crtl->drap_reg); |
6573 | else |
6574 | drap = -1; |
6575 | for (i = 2; i >= 0; --i) |
6576 | if (i != drap && !df_regs_ever_live_p (i)) |
6577 | return i; |
6578 | } |
6579 | |
6580 | return INVALID_REGNUM; |
6581 | } |
6582 | |
6583 | /* Return true if REGNO is used by the epilogue. */ |
6584 | |
6585 | bool |
6586 | ix86_epilogue_uses (int regno) |
6587 | { |
6588 | /* If there are no caller-saved registers, we preserve all registers, |
6589 | except for MMX and x87 registers which aren't supported when saving |
6590 | and restoring registers. Don't explicitly save SP register since |
6591 | it is always preserved. */ |
6592 | return (epilogue_completed |
6593 | && (cfun->machine->call_saved_registers |
6594 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
6595 | && !fixed_regs[regno] |
6596 | && !STACK_REGNO_P (regno) |
6597 | && !MMX_REGNO_P (regno)); |
6598 | } |
6599 | |
6600 | /* Return nonzero if register REGNO can be used as a scratch register |
6601 | in peephole2. */ |
6602 | |
6603 | static bool |
6604 | ix86_hard_regno_scratch_ok (unsigned int regno) |
6605 | { |
6606 | /* If there are no caller-saved registers, we can't use any register |
6607 | as a scratch register after epilogue and use REGNO as scratch |
6608 | register only if it has been used before to avoid saving and |
6609 | restoring it. */ |
6610 | return ((cfun->machine->call_saved_registers |
6611 | != TYPE_NO_CALLER_SAVED_REGISTERS) |
6612 | || (!epilogue_completed |
6613 | && df_regs_ever_live_p (regno))); |
6614 | } |
6615 | |
6616 | /* Return TRUE if we need to save REGNO. */ |
6617 | |
6618 | bool |
6619 | ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) |
6620 | { |
6621 | rtx reg; |
6622 | |
6623 | switch (cfun->machine->call_saved_registers) |
6624 | { |
6625 | case TYPE_DEFAULT_CALL_SAVED_REGISTERS: |
6626 | break; |
6627 | |
6628 | case TYPE_NO_CALLER_SAVED_REGISTERS: |
6629 | /* If there are no caller-saved registers, we preserve all |
6630 | registers, except for MMX and x87 registers which aren't |
6631 | supported when saving and restoring registers. Don't |
6632 | explicitly save SP register since it is always preserved. |
6633 | |
6634 | Don't preserve registers used for function return value. */ |
6635 | reg = crtl->return_rtx; |
6636 | if (reg) |
6637 | { |
6638 | unsigned int i = REGNO (reg); |
6639 | unsigned int nregs = REG_NREGS (reg); |
6640 | while (nregs-- > 0) |
6641 | if ((i + nregs) == regno) |
6642 | return false; |
6643 | } |
6644 | |
6645 | return (df_regs_ever_live_p (regno) |
6646 | && !fixed_regs[regno] |
6647 | && !STACK_REGNO_P (regno) |
6648 | && !MMX_REGNO_P (regno) |
6649 | && (regno != HARD_FRAME_POINTER_REGNUM |
6650 | || !frame_pointer_needed)); |
6651 | |
6652 | case TYPE_NO_CALLEE_SAVED_REGISTERS: |
6653 | return false; |
6654 | |
6655 | case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP: |
6656 | if (regno != HARD_FRAME_POINTER_REGNUM) |
6657 | return false; |
6658 | break; |
6659 | } |
6660 | |
6661 | if (regno == REAL_PIC_OFFSET_TABLE_REGNUM |
6662 | && pic_offset_table_rtx) |
6663 | { |
6664 | if (ix86_use_pseudo_pic_reg ()) |
6665 | { |
6666 | /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to |
6667 | _mcount in prologue. */ |
6668 | if (!TARGET_64BIT && flag_pic && crtl->profile) |
6669 | return true; |
6670 | } |
6671 | else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
6672 | || crtl->profile |
6673 | || crtl->calls_eh_return |
6674 | || crtl->uses_const_pool |
6675 | || cfun->has_nonlocal_label) |
6676 | return ix86_select_alt_pic_regnum () == INVALID_REGNUM; |
6677 | } |
6678 | |
6679 | if (crtl->calls_eh_return && maybe_eh_return) |
6680 | { |
6681 | unsigned i; |
6682 | for (i = 0; ; i++) |
6683 | { |
6684 | unsigned test = EH_RETURN_DATA_REGNO (i); |
6685 | if (test == INVALID_REGNUM) |
6686 | break; |
6687 | if (test == regno) |
6688 | return true; |
6689 | } |
6690 | } |
6691 | |
6692 | if (ignore_outlined && cfun->machine->call_ms2sysv) |
6693 | { |
6694 | unsigned count = cfun->machine->call_ms2sysv_extra_regs |
6695 | + xlogue_layout::MIN_REGS; |
6696 | if (xlogue_layout::is_stub_managed_reg (regno, count)) |
6697 | return false; |
6698 | } |
6699 | |
6700 | if (crtl->drap_reg |
6701 | && regno == REGNO (crtl->drap_reg) |
6702 | && !cfun->machine->no_drap_save_restore) |
6703 | return true; |
6704 | |
6705 | return (df_regs_ever_live_p (regno) |
6706 | && !call_used_or_fixed_reg_p (regno) |
6707 | && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
6708 | } |
6709 | |
6710 | /* Return number of saved general prupose registers. */ |
6711 | |
6712 | static int |
6713 | ix86_nsaved_regs (void) |
6714 | { |
6715 | int nregs = 0; |
6716 | int regno; |
6717 | |
6718 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6719 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6720 | nregs ++; |
6721 | return nregs; |
6722 | } |
6723 | |
6724 | /* Return number of saved SSE registers. */ |
6725 | |
6726 | static int |
6727 | ix86_nsaved_sseregs (void) |
6728 | { |
6729 | int nregs = 0; |
6730 | int regno; |
6731 | |
6732 | if (!TARGET_64BIT_MS_ABI) |
6733 | return 0; |
6734 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6735 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6736 | nregs ++; |
6737 | return nregs; |
6738 | } |
6739 | |
6740 | /* Given FROM and TO register numbers, say whether this elimination is |
6741 | allowed. If stack alignment is needed, we can only replace argument |
6742 | pointer with hard frame pointer, or replace frame pointer with stack |
6743 | pointer. Otherwise, frame pointer elimination is automatically |
6744 | handled and all other eliminations are valid. */ |
6745 | |
6746 | static bool |
6747 | ix86_can_eliminate (const int from, const int to) |
6748 | { |
6749 | if (stack_realign_fp) |
6750 | return ((from == ARG_POINTER_REGNUM |
6751 | && to == HARD_FRAME_POINTER_REGNUM) |
6752 | || (from == FRAME_POINTER_REGNUM |
6753 | && to == STACK_POINTER_REGNUM)); |
6754 | else |
6755 | return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
6756 | } |
6757 | |
6758 | /* Return the offset between two registers, one to be eliminated, and the other |
6759 | its replacement, at the start of a routine. */ |
6760 | |
6761 | HOST_WIDE_INT |
6762 | ix86_initial_elimination_offset (int from, int to) |
6763 | { |
6764 | struct ix86_frame &frame = cfun->machine->frame; |
6765 | |
6766 | if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
6767 | return frame.hard_frame_pointer_offset; |
6768 | else if (from == FRAME_POINTER_REGNUM |
6769 | && to == HARD_FRAME_POINTER_REGNUM) |
6770 | return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
6771 | else |
6772 | { |
6773 | gcc_assert (to == STACK_POINTER_REGNUM); |
6774 | |
6775 | if (from == ARG_POINTER_REGNUM) |
6776 | return frame.stack_pointer_offset; |
6777 | |
6778 | gcc_assert (from == FRAME_POINTER_REGNUM); |
6779 | return frame.stack_pointer_offset - frame.frame_pointer_offset; |
6780 | } |
6781 | } |
6782 | |
6783 | /* Emits a warning for unsupported msabi to sysv pro/epilogues. */ |
6784 | void |
6785 | warn_once_call_ms2sysv_xlogues (const char *feature) |
6786 | { |
6787 | static bool warned_once = false; |
6788 | if (!warned_once) |
6789 | { |
6790 | warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s" , |
6791 | feature); |
6792 | warned_once = true; |
6793 | } |
6794 | } |
6795 | |
6796 | /* Return the probing interval for -fstack-clash-protection. */ |
6797 | |
6798 | static HOST_WIDE_INT |
6799 | get_probe_interval (void) |
6800 | { |
6801 | if (flag_stack_clash_protection) |
6802 | return (HOST_WIDE_INT_1U |
6803 | << param_stack_clash_protection_probe_interval); |
6804 | else |
6805 | return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); |
6806 | } |
6807 | |
6808 | /* When using -fsplit-stack, the allocation routines set a field in |
6809 | the TCB to the bottom of the stack plus this much space, measured |
6810 | in bytes. */ |
6811 | |
6812 | #define SPLIT_STACK_AVAILABLE 256 |
6813 | |
6814 | /* Return true if push2/pop2 can be generated. */ |
6815 | |
6816 | static bool |
6817 | ix86_can_use_push2pop2 (void) |
6818 | { |
6819 | /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */ |
6820 | unsigned int incoming_stack_boundary |
6821 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
6822 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
6823 | return incoming_stack_boundary % 128 == 0; |
6824 | } |
6825 | |
6826 | /* Helper function to determine whether push2/pop2 can be used in prologue or |
6827 | epilogue for register save/restore. */ |
6828 | static bool |
6829 | ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) |
6830 | { |
6831 | if (!ix86_can_use_push2pop2 ()) |
6832 | return false; |
6833 | int aligned = cfun->machine->fs.sp_offset % 16 == 0; |
6834 | return TARGET_APX_PUSH2POP2 |
6835 | && !cfun->machine->frame.save_regs_using_mov |
6836 | && cfun->machine->func_type == TYPE_NORMAL |
6837 | && (nregs + aligned) >= 3; |
6838 | } |
6839 | |
6840 | /* Fill structure ix86_frame about frame of currently computed function. */ |
6841 | |
6842 | static void |
6843 | ix86_compute_frame_layout (void) |
6844 | { |
6845 | struct ix86_frame *frame = &cfun->machine->frame; |
6846 | struct machine_function *m = cfun->machine; |
6847 | unsigned HOST_WIDE_INT stack_alignment_needed; |
6848 | HOST_WIDE_INT offset; |
6849 | unsigned HOST_WIDE_INT preferred_alignment; |
6850 | HOST_WIDE_INT size = ix86_get_frame_size (); |
6851 | HOST_WIDE_INT to_allocate; |
6852 | |
6853 | /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit |
6854 | * ms_abi functions that call a sysv function. We now need to prune away |
6855 | * cases where it should be disabled. */ |
6856 | if (TARGET_64BIT && m->call_ms2sysv) |
6857 | { |
6858 | gcc_assert (TARGET_64BIT_MS_ABI); |
6859 | gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); |
6860 | gcc_assert (!TARGET_SEH); |
6861 | gcc_assert (TARGET_SSE); |
6862 | gcc_assert (!ix86_using_red_zone ()); |
6863 | |
6864 | if (crtl->calls_eh_return) |
6865 | { |
6866 | gcc_assert (!reload_completed); |
6867 | m->call_ms2sysv = false; |
6868 | warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return" ); |
6869 | } |
6870 | |
6871 | else if (ix86_static_chain_on_stack) |
6872 | { |
6873 | gcc_assert (!reload_completed); |
6874 | m->call_ms2sysv = false; |
6875 | warn_once_call_ms2sysv_xlogues (feature: "static call chains" ); |
6876 | } |
6877 | |
6878 | /* Finally, compute which registers the stub will manage. */ |
6879 | else |
6880 | { |
6881 | unsigned count = xlogue_layout::count_stub_managed_regs (); |
6882 | m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; |
6883 | m->call_ms2sysv_pad_in = 0; |
6884 | } |
6885 | } |
6886 | |
6887 | frame->nregs = ix86_nsaved_regs (); |
6888 | frame->nsseregs = ix86_nsaved_sseregs (); |
6889 | |
6890 | /* 64-bit MS ABI seem to require stack alignment to be always 16, |
6891 | except for function prologues, leaf functions and when the defult |
6892 | incoming stack boundary is overriden at command line or via |
6893 | force_align_arg_pointer attribute. |
6894 | |
6895 | Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants |
6896 | at call sites, including profile function calls. |
6897 | |
6898 | For APX push2/pop2, the stack also requires 128b alignment. */ |
6899 | if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs) |
6900 | && crtl->preferred_stack_boundary < 128) |
6901 | || (((TARGET_64BIT_MS_ABI || TARGET_MACHO) |
6902 | && crtl->preferred_stack_boundary < 128) |
6903 | && (!crtl->is_leaf || cfun->calls_alloca != 0 |
6904 | || ix86_current_function_calls_tls_descriptor |
6905 | || (TARGET_MACHO && crtl->profile) |
6906 | || ix86_incoming_stack_boundary < 128))) |
6907 | { |
6908 | crtl->preferred_stack_boundary = 128; |
6909 | if (crtl->stack_alignment_needed < 128) |
6910 | crtl->stack_alignment_needed = 128; |
6911 | } |
6912 | |
6913 | stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
6914 | preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
6915 | |
6916 | gcc_assert (!size || stack_alignment_needed); |
6917 | gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
6918 | gcc_assert (preferred_alignment <= stack_alignment_needed); |
6919 | |
6920 | /* The only ABI saving SSE regs should be 64-bit ms_abi. */ |
6921 | gcc_assert (TARGET_64BIT || !frame->nsseregs); |
6922 | if (TARGET_64BIT && m->call_ms2sysv) |
6923 | { |
6924 | gcc_assert (stack_alignment_needed >= 16); |
6925 | gcc_assert (!frame->nsseregs); |
6926 | } |
6927 | |
6928 | /* For SEH we have to limit the amount of code movement into the prologue. |
6929 | At present we do this via a BLOCKAGE, at which point there's very little |
6930 | scheduling that can be done, which means that there's very little point |
6931 | in doing anything except PUSHs. */ |
6932 | if (TARGET_SEH) |
6933 | m->use_fast_prologue_epilogue = false; |
6934 | else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) |
6935 | { |
6936 | int count = frame->nregs; |
6937 | struct cgraph_node *node = cgraph_node::get (decl: current_function_decl); |
6938 | |
6939 | /* The fast prologue uses move instead of push to save registers. This |
6940 | is significantly longer, but also executes faster as modern hardware |
6941 | can execute the moves in parallel, but can't do that for push/pop. |
6942 | |
6943 | Be careful about choosing what prologue to emit: When function takes |
6944 | many instructions to execute we may use slow version as well as in |
6945 | case function is known to be outside hot spot (this is known with |
6946 | feedback only). Weight the size of function by number of registers |
6947 | to save as it is cheap to use one or two push instructions but very |
6948 | slow to use many of them. |
6949 | |
6950 | Calling this hook multiple times with the same frame requirements |
6951 | must produce the same layout, since the RA might otherwise be |
6952 | unable to reach a fixed point or might fail its final sanity checks. |
6953 | This means that once we've assumed that a function does or doesn't |
6954 | have a particular size, we have to stick to that assumption |
6955 | regardless of how the function has changed since. */ |
6956 | if (count) |
6957 | count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
6958 | if (node->frequency < NODE_FREQUENCY_NORMAL |
6959 | || (flag_branch_probabilities |
6960 | && node->frequency < NODE_FREQUENCY_HOT)) |
6961 | m->use_fast_prologue_epilogue = false; |
6962 | else |
6963 | { |
6964 | if (count != frame->expensive_count) |
6965 | { |
6966 | frame->expensive_count = count; |
6967 | frame->expensive_p = expensive_function_p (count); |
6968 | } |
6969 | m->use_fast_prologue_epilogue = !frame->expensive_p; |
6970 | } |
6971 | } |
6972 | |
6973 | frame->save_regs_using_mov |
6974 | = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue; |
6975 | |
6976 | /* Skip return address and error code in exception handler. */ |
6977 | offset = INCOMING_FRAME_SP_OFFSET; |
6978 | |
6979 | /* Skip pushed static chain. */ |
6980 | if (ix86_static_chain_on_stack) |
6981 | offset += UNITS_PER_WORD; |
6982 | |
6983 | /* Skip saved base pointer. */ |
6984 | if (frame_pointer_needed) |
6985 | offset += UNITS_PER_WORD; |
6986 | frame->hfp_save_offset = offset; |
6987 | |
6988 | /* The traditional frame pointer location is at the top of the frame. */ |
6989 | frame->hard_frame_pointer_offset = offset; |
6990 | |
6991 | /* Register save area */ |
6992 | offset += frame->nregs * UNITS_PER_WORD; |
6993 | frame->reg_save_offset = offset; |
6994 | |
6995 | /* Calculate the size of the va-arg area (not including padding, if any). */ |
6996 | frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; |
6997 | |
6998 | /* Also adjust stack_realign_offset for the largest alignment of |
6999 | stack slot actually used. */ |
7000 | if (stack_realign_fp |
7001 | || (cfun->machine->max_used_stack_alignment != 0 |
7002 | && (offset % cfun->machine->max_used_stack_alignment) != 0)) |
7003 | { |
7004 | /* We may need a 16-byte aligned stack for the remainder of the |
7005 | register save area, but the stack frame for the local function |
7006 | may require a greater alignment if using AVX/2/512. In order |
7007 | to avoid wasting space, we first calculate the space needed for |
7008 | the rest of the register saves, add that to the stack pointer, |
7009 | and then realign the stack to the boundary of the start of the |
7010 | frame for the local function. */ |
7011 | HOST_WIDE_INT space_needed = 0; |
7012 | HOST_WIDE_INT sse_reg_space_needed = 0; |
7013 | |
7014 | if (TARGET_64BIT) |
7015 | { |
7016 | if (m->call_ms2sysv) |
7017 | { |
7018 | m->call_ms2sysv_pad_in = 0; |
7019 | space_needed = xlogue_layout::get_instance ().get_stack_space_used (); |
7020 | } |
7021 | |
7022 | else if (frame->nsseregs) |
7023 | /* The only ABI that has saved SSE registers (Win64) also has a |
7024 | 16-byte aligned default stack. However, many programs violate |
7025 | the ABI, and Wine64 forces stack realignment to compensate. */ |
7026 | space_needed = frame->nsseregs * 16; |
7027 | |
7028 | sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); |
7029 | |
7030 | /* 64-bit frame->va_arg_size should always be a multiple of 16, but |
7031 | rounding to be pedantic. */ |
7032 | space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); |
7033 | } |
7034 | else |
7035 | space_needed = frame->va_arg_size; |
7036 | |
7037 | /* Record the allocation size required prior to the realignment AND. */ |
7038 | frame->stack_realign_allocate = space_needed; |
7039 | |
7040 | /* The re-aligned stack starts at frame->stack_realign_offset. Values |
7041 | before this point are not directly comparable with values below |
7042 | this point. Use sp_valid_at to determine if the stack pointer is |
7043 | valid for a given offset, fp_valid_at for the frame pointer, or |
7044 | choose_baseaddr to have a base register chosen for you. |
7045 | |
7046 | Note that the result of (frame->stack_realign_offset |
7047 | & (stack_alignment_needed - 1)) may not equal zero. */ |
7048 | offset = ROUND_UP (offset + space_needed, stack_alignment_needed); |
7049 | frame->stack_realign_offset = offset - space_needed; |
7050 | frame->sse_reg_save_offset = frame->stack_realign_offset |
7051 | + sse_reg_space_needed; |
7052 | } |
7053 | else |
7054 | { |
7055 | frame->stack_realign_offset = offset; |
7056 | |
7057 | if (TARGET_64BIT && m->call_ms2sysv) |
7058 | { |
7059 | m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); |
7060 | offset += xlogue_layout::get_instance ().get_stack_space_used (); |
7061 | } |
7062 | |
7063 | /* Align and set SSE register save area. */ |
7064 | else if (frame->nsseregs) |
7065 | { |
7066 | /* If the incoming stack boundary is at least 16 bytes, or DRAP is |
7067 | required and the DRAP re-alignment boundary is at least 16 bytes, |
7068 | then we want the SSE register save area properly aligned. */ |
7069 | if (ix86_incoming_stack_boundary >= 128 |
7070 | || (stack_realign_drap && stack_alignment_needed >= 16)) |
7071 | offset = ROUND_UP (offset, 16); |
7072 | offset += frame->nsseregs * 16; |
7073 | } |
7074 | frame->sse_reg_save_offset = offset; |
7075 | offset += frame->va_arg_size; |
7076 | } |
7077 | |
7078 | /* Align start of frame for local function. When a function call |
7079 | is removed, it may become a leaf function. But if argument may |
7080 | be passed on stack, we need to align the stack when there is no |
7081 | tail call. */ |
7082 | if (m->call_ms2sysv |
7083 | || frame->va_arg_size != 0 |
7084 | || size != 0 |
7085 | || !crtl->is_leaf |
7086 | || (!crtl->tail_call_emit |
7087 | && cfun->machine->outgoing_args_on_stack) |
7088 | || cfun->calls_alloca |
7089 | || ix86_current_function_calls_tls_descriptor) |
7090 | offset = ROUND_UP (offset, stack_alignment_needed); |
7091 | |
7092 | /* Frame pointer points here. */ |
7093 | frame->frame_pointer_offset = offset; |
7094 | |
7095 | offset += size; |
7096 | |
7097 | /* Add outgoing arguments area. Can be skipped if we eliminated |
7098 | all the function calls as dead code. |
7099 | Skipping is however impossible when function calls alloca. Alloca |
7100 | expander assumes that last crtl->outgoing_args_size |
7101 | of stack frame are unused. */ |
7102 | if (ACCUMULATE_OUTGOING_ARGS |
7103 | && (!crtl->is_leaf || cfun->calls_alloca |
7104 | || ix86_current_function_calls_tls_descriptor)) |
7105 | { |
7106 | offset += crtl->outgoing_args_size; |
7107 | frame->outgoing_arguments_size = crtl->outgoing_args_size; |
7108 | } |
7109 | else |
7110 | frame->outgoing_arguments_size = 0; |
7111 | |
7112 | /* Align stack boundary. Only needed if we're calling another function |
7113 | or using alloca. */ |
7114 | if (!crtl->is_leaf || cfun->calls_alloca |
7115 | || ix86_current_function_calls_tls_descriptor) |
7116 | offset = ROUND_UP (offset, preferred_alignment); |
7117 | |
7118 | /* We've reached end of stack frame. */ |
7119 | frame->stack_pointer_offset = offset; |
7120 | |
7121 | /* Size prologue needs to allocate. */ |
7122 | to_allocate = offset - frame->sse_reg_save_offset; |
7123 | |
7124 | if ((!to_allocate && frame->nregs <= 1) |
7125 | || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) |
7126 | /* If static stack checking is enabled and done with probes, |
7127 | the registers need to be saved before allocating the frame. */ |
7128 | || flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
7129 | /* If stack clash probing needs a loop, then it needs a |
7130 | scratch register. But the returned register is only guaranteed |
7131 | to be safe to use after register saves are complete. So if |
7132 | stack clash protections are enabled and the allocated frame is |
7133 | larger than the probe interval, then use pushes to save |
7134 | callee saved registers. */ |
7135 | || (flag_stack_clash_protection |
7136 | && !ix86_target_stack_probe () |
7137 | && to_allocate > get_probe_interval ())) |
7138 | frame->save_regs_using_mov = false; |
7139 | |
7140 | if (ix86_using_red_zone () |
7141 | && crtl->sp_is_unchanging |
7142 | && crtl->is_leaf |
7143 | && !ix86_pc_thunk_call_expanded |
7144 | && !ix86_current_function_calls_tls_descriptor) |
7145 | { |
7146 | frame->red_zone_size = to_allocate; |
7147 | if (frame->save_regs_using_mov) |
7148 | frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
7149 | if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
7150 | frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
7151 | } |
7152 | else |
7153 | frame->red_zone_size = 0; |
7154 | frame->stack_pointer_offset -= frame->red_zone_size; |
7155 | |
7156 | /* The SEH frame pointer location is near the bottom of the frame. |
7157 | This is enforced by the fact that the difference between the |
7158 | stack pointer and the frame pointer is limited to 240 bytes in |
7159 | the unwind data structure. */ |
7160 | if (TARGET_SEH) |
7161 | { |
7162 | /* Force the frame pointer to point at or below the lowest register save |
7163 | area, see the SEH code in config/i386/winnt.cc for the rationale. */ |
7164 | frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; |
7165 | |
7166 | /* If we can leave the frame pointer where it is, do so; however return |
7167 | the establisher frame for __builtin_frame_address (0) or else if the |
7168 | frame overflows the SEH maximum frame size. |
7169 | |
7170 | Note that the value returned by __builtin_frame_address (0) is quite |
7171 | constrained, because setjmp is piggybacked on the SEH machinery with |
7172 | recent versions of MinGW: |
7173 | |
7174 | # elif defined(__SEH__) |
7175 | # if defined(__aarch64__) || defined(_ARM64_) |
7176 | # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry()) |
7177 | # elif (__MINGW_GCC_VERSION < 40702) |
7178 | # define setjmp(BUF) _setjmp((BUF), mingw_getsp()) |
7179 | # else |
7180 | # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0)) |
7181 | # endif |
7182 | |
7183 | and the second argument passed to _setjmp, if not null, is forwarded |
7184 | to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has |
7185 | built an ExceptionRecord on the fly describing the setjmp buffer). */ |
7186 | const HOST_WIDE_INT diff |
7187 | = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; |
7188 | if (diff <= 255 && !crtl->accesses_prior_frames) |
7189 | { |
7190 | /* The resulting diff will be a multiple of 16 lower than 255, |
7191 | i.e. at most 240 as required by the unwind data structure. */ |
7192 | frame->hard_frame_pointer_offset += (diff & 15); |
7193 | } |
7194 | else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) |
7195 | { |
7196 | /* Ideally we'd determine what portion of the local stack frame |
7197 | (within the constraint of the lowest 240) is most heavily used. |
7198 | But without that complication, simply bias the frame pointer |
7199 | by 128 bytes so as to maximize the amount of the local stack |
7200 | frame that is addressable with 8-bit offsets. */ |
7201 | frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; |
7202 | } |
7203 | else |
7204 | frame->hard_frame_pointer_offset = frame->hfp_save_offset; |
7205 | } |
7206 | } |
7207 | |
7208 | /* This is semi-inlined memory_address_length, but simplified |
7209 | since we know that we're always dealing with reg+offset, and |
7210 | to avoid having to create and discard all that rtl. */ |
7211 | |
7212 | static inline int |
7213 | choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) |
7214 | { |
7215 | int len = 4; |
7216 | |
7217 | if (offset == 0) |
7218 | { |
7219 | /* EBP and R13 cannot be encoded without an offset. */ |
7220 | len = (regno == BP_REG || regno == R13_REG); |
7221 | } |
7222 | else if (IN_RANGE (offset, -128, 127)) |
7223 | len = 1; |
7224 | |
7225 | /* ESP and R12 must be encoded with a SIB byte. */ |
7226 | if (regno == SP_REG || regno == R12_REG) |
7227 | len++; |
7228 | |
7229 | return len; |
7230 | } |
7231 | |
7232 | /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in |
7233 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7234 | |
7235 | static bool |
7236 | sp_valid_at (HOST_WIDE_INT cfa_offset) |
7237 | { |
7238 | const struct machine_frame_state &fs = cfun->machine->fs; |
7239 | if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) |
7240 | { |
7241 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7242 | gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); |
7243 | return false; |
7244 | } |
7245 | return fs.sp_valid; |
7246 | } |
7247 | |
7248 | /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in |
7249 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7250 | |
7251 | static inline bool |
7252 | fp_valid_at (HOST_WIDE_INT cfa_offset) |
7253 | { |
7254 | const struct machine_frame_state &fs = cfun->machine->fs; |
7255 | if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) |
7256 | { |
7257 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7258 | gcc_assert (cfa_offset >= fs.sp_realigned_offset); |
7259 | return false; |
7260 | } |
7261 | return fs.fp_valid; |
7262 | } |
7263 | |
7264 | /* Choose a base register based upon alignment requested, speed and/or |
7265 | size. */ |
7266 | |
7267 | static void |
7268 | choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, |
7269 | HOST_WIDE_INT &base_offset, |
7270 | unsigned int align_reqested, unsigned int *align) |
7271 | { |
7272 | const struct machine_function *m = cfun->machine; |
7273 | unsigned int hfp_align; |
7274 | unsigned int drap_align; |
7275 | unsigned int sp_align; |
7276 | bool hfp_ok = fp_valid_at (cfa_offset); |
7277 | bool drap_ok = m->fs.drap_valid; |
7278 | bool sp_ok = sp_valid_at (cfa_offset); |
7279 | |
7280 | hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; |
7281 | |
7282 | /* Filter out any registers that don't meet the requested alignment |
7283 | criteria. */ |
7284 | if (align_reqested) |
7285 | { |
7286 | if (m->fs.realigned) |
7287 | hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; |
7288 | /* SEH unwind code does do not currently support REG_CFA_EXPRESSION |
7289 | notes (which we would need to use a realigned stack pointer), |
7290 | so disable on SEH targets. */ |
7291 | else if (m->fs.sp_realigned) |
7292 | sp_align = crtl->stack_alignment_needed; |
7293 | |
7294 | hfp_ok = hfp_ok && hfp_align >= align_reqested; |
7295 | drap_ok = drap_ok && drap_align >= align_reqested; |
7296 | sp_ok = sp_ok && sp_align >= align_reqested; |
7297 | } |
7298 | |
7299 | if (m->use_fast_prologue_epilogue) |
7300 | { |
7301 | /* Choose the base register most likely to allow the most scheduling |
7302 | opportunities. Generally FP is valid throughout the function, |
7303 | while DRAP must be reloaded within the epilogue. But choose either |
7304 | over the SP due to increased encoding size. */ |
7305 | |
7306 | if (hfp_ok) |
7307 | { |
7308 | base_reg = hard_frame_pointer_rtx; |
7309 | base_offset = m->fs.fp_offset - cfa_offset; |
7310 | } |
7311 | else if (drap_ok) |
7312 | { |
7313 | base_reg = crtl->drap_reg; |
7314 | base_offset = 0 - cfa_offset; |
7315 | } |
7316 | else if (sp_ok) |
7317 | { |
7318 | base_reg = stack_pointer_rtx; |
7319 | base_offset = m->fs.sp_offset - cfa_offset; |
7320 | } |
7321 | } |
7322 | else |
7323 | { |
7324 | HOST_WIDE_INT toffset; |
7325 | int len = 16, tlen; |
7326 | |
7327 | /* Choose the base register with the smallest address encoding. |
7328 | With a tie, choose FP > DRAP > SP. */ |
7329 | if (sp_ok) |
7330 | { |
7331 | base_reg = stack_pointer_rtx; |
7332 | base_offset = m->fs.sp_offset - cfa_offset; |
7333 | len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset); |
7334 | } |
7335 | if (drap_ok) |
7336 | { |
7337 | toffset = 0 - cfa_offset; |
7338 | tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset); |
7339 | if (tlen <= len) |
7340 | { |
7341 | base_reg = crtl->drap_reg; |
7342 | base_offset = toffset; |
7343 | len = tlen; |
7344 | } |
7345 | } |
7346 | if (hfp_ok) |
7347 | { |
7348 | toffset = m->fs.fp_offset - cfa_offset; |
7349 | tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset); |
7350 | if (tlen <= len) |
7351 | { |
7352 | base_reg = hard_frame_pointer_rtx; |
7353 | base_offset = toffset; |
7354 | } |
7355 | } |
7356 | } |
7357 | |
7358 | /* Set the align return value. */ |
7359 | if (align) |
7360 | { |
7361 | if (base_reg == stack_pointer_rtx) |
7362 | *align = sp_align; |
7363 | else if (base_reg == crtl->drap_reg) |
7364 | *align = drap_align; |
7365 | else if (base_reg == hard_frame_pointer_rtx) |
7366 | *align = hfp_align; |
7367 | } |
7368 | } |
7369 | |
7370 | /* Return an RTX that points to CFA_OFFSET within the stack frame and |
7371 | the alignment of address. If ALIGN is non-null, it should point to |
7372 | an alignment value (in bits) that is preferred or zero and will |
7373 | recieve the alignment of the base register that was selected, |
7374 | irrespective of rather or not CFA_OFFSET is a multiple of that |
7375 | alignment value. If it is possible for the base register offset to be |
7376 | non-immediate then SCRATCH_REGNO should specify a scratch register to |
7377 | use. |
7378 | |
7379 | The valid base registers are taken from CFUN->MACHINE->FS. */ |
7380 | |
7381 | static rtx |
7382 | choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, |
7383 | unsigned int scratch_regno = INVALID_REGNUM) |
7384 | { |
7385 | rtx base_reg = NULL; |
7386 | HOST_WIDE_INT base_offset = 0; |
7387 | |
7388 | /* If a specific alignment is requested, try to get a base register |
7389 | with that alignment first. */ |
7390 | if (align && *align) |
7391 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align); |
7392 | |
7393 | if (!base_reg) |
7394 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align); |
7395 | |
7396 | gcc_assert (base_reg != NULL); |
7397 | |
7398 | rtx base_offset_rtx = GEN_INT (base_offset); |
7399 | |
7400 | if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) |
7401 | { |
7402 | gcc_assert (scratch_regno != INVALID_REGNUM); |
7403 | |
7404 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
7405 | emit_move_insn (scratch_reg, base_offset_rtx); |
7406 | |
7407 | return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); |
7408 | } |
7409 | |
7410 | return plus_constant (Pmode, base_reg, base_offset); |
7411 | } |
7412 | |
7413 | /* Emit code to save registers in the prologue. */ |
7414 | |
7415 | static void |
7416 | ix86_emit_save_regs (void) |
7417 | { |
7418 | int regno; |
7419 | rtx_insn *insn; |
7420 | |
7421 | if (!TARGET_APX_PUSH2POP2 |
7422 | || !ix86_can_use_push2pop2 () |
7423 | || cfun->machine->func_type != TYPE_NORMAL) |
7424 | { |
7425 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7426 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7427 | { |
7428 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
7429 | TARGET_APX_PPX)); |
7430 | RTX_FRAME_RELATED_P (insn) = 1; |
7431 | } |
7432 | } |
7433 | else |
7434 | { |
7435 | int regno_list[2]; |
7436 | regno_list[0] = regno_list[1] = -1; |
7437 | int loaded_regnum = 0; |
7438 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
7439 | |
7440 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7441 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7442 | { |
7443 | if (aligned) |
7444 | { |
7445 | regno_list[loaded_regnum++] = regno; |
7446 | if (loaded_regnum == 2) |
7447 | { |
7448 | gcc_assert (regno_list[0] != -1 |
7449 | && regno_list[1] != -1 |
7450 | && regno_list[0] != regno_list[1]); |
7451 | const int offset = UNITS_PER_WORD * 2; |
7452 | rtx mem = gen_rtx_MEM (TImode, |
7453 | gen_rtx_PRE_DEC (Pmode, |
7454 | stack_pointer_rtx)); |
7455 | insn = emit_insn (gen_push2 (mem, |
7456 | reg1: gen_rtx_REG (word_mode, |
7457 | regno_list[0]), |
7458 | reg2: gen_rtx_REG (word_mode, |
7459 | regno_list[1]), |
7460 | TARGET_APX_PPX)); |
7461 | RTX_FRAME_RELATED_P (insn) = 1; |
7462 | rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); |
7463 | |
7464 | for (int i = 0; i < 2; i++) |
7465 | { |
7466 | rtx dwarf_reg = gen_rtx_REG (word_mode, |
7467 | regno_list[i]); |
7468 | rtx sp_offset = plus_constant (Pmode, |
7469 | stack_pointer_rtx, |
7470 | + UNITS_PER_WORD |
7471 | * (1 - i)); |
7472 | rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, |
7473 | sp_offset), |
7474 | dwarf_reg); |
7475 | RTX_FRAME_RELATED_P (tmp) = 1; |
7476 | XVECEXP (dwarf, 0, i + 1) = tmp; |
7477 | } |
7478 | rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, |
7479 | plus_constant (Pmode, |
7480 | stack_pointer_rtx, |
7481 | -offset)); |
7482 | RTX_FRAME_RELATED_P (sp_tmp) = 1; |
7483 | XVECEXP (dwarf, 0, 0) = sp_tmp; |
7484 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
7485 | |
7486 | loaded_regnum = 0; |
7487 | regno_list[0] = regno_list[1] = -1; |
7488 | } |
7489 | } |
7490 | else |
7491 | { |
7492 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
7493 | TARGET_APX_PPX)); |
7494 | RTX_FRAME_RELATED_P (insn) = 1; |
7495 | aligned = true; |
7496 | } |
7497 | } |
7498 | if (loaded_regnum == 1) |
7499 | { |
7500 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, |
7501 | regno_list[0]), |
7502 | TARGET_APX_PPX)); |
7503 | RTX_FRAME_RELATED_P (insn) = 1; |
7504 | } |
7505 | } |
7506 | } |
7507 | |
7508 | /* Emit a single register save at CFA - CFA_OFFSET. */ |
7509 | |
7510 | static void |
7511 | ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, |
7512 | HOST_WIDE_INT cfa_offset) |
7513 | { |
7514 | struct machine_function *m = cfun->machine; |
7515 | rtx reg = gen_rtx_REG (mode, regno); |
7516 | rtx mem, addr, base, insn; |
7517 | unsigned int align = GET_MODE_ALIGNMENT (mode); |
7518 | |
7519 | addr = choose_baseaddr (cfa_offset, align: &align); |
7520 | mem = gen_frame_mem (mode, addr); |
7521 | |
7522 | /* The location aligment depends upon the base register. */ |
7523 | align = MIN (GET_MODE_ALIGNMENT (mode), align); |
7524 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
7525 | set_mem_align (mem, align); |
7526 | |
7527 | insn = emit_insn (gen_rtx_SET (mem, reg)); |
7528 | RTX_FRAME_RELATED_P (insn) = 1; |
7529 | |
7530 | base = addr; |
7531 | if (GET_CODE (base) == PLUS) |
7532 | base = XEXP (base, 0); |
7533 | gcc_checking_assert (REG_P (base)); |
7534 | |
7535 | /* When saving registers into a re-aligned local stack frame, avoid |
7536 | any tricky guessing by dwarf2out. */ |
7537 | if (m->fs.realigned) |
7538 | { |
7539 | gcc_checking_assert (stack_realign_drap); |
7540 | |
7541 | if (regno == REGNO (crtl->drap_reg)) |
7542 | { |
7543 | /* A bit of a hack. We force the DRAP register to be saved in |
7544 | the re-aligned stack frame, which provides us with a copy |
7545 | of the CFA that will last past the prologue. Install it. */ |
7546 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7547 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7548 | cfun->machine->fs.fp_offset - cfa_offset); |
7549 | mem = gen_rtx_MEM (mode, addr); |
7550 | add_reg_note (insn, REG_CFA_DEF_CFA, mem); |
7551 | } |
7552 | else |
7553 | { |
7554 | /* The frame pointer is a stable reference within the |
7555 | aligned frame. Use it. */ |
7556 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7557 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7558 | cfun->machine->fs.fp_offset - cfa_offset); |
7559 | mem = gen_rtx_MEM (mode, addr); |
7560 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7561 | } |
7562 | } |
7563 | |
7564 | else if (base == stack_pointer_rtx && m->fs.sp_realigned |
7565 | && cfa_offset >= m->fs.sp_realigned_offset) |
7566 | { |
7567 | gcc_checking_assert (stack_realign_fp); |
7568 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7569 | } |
7570 | |
7571 | /* The memory may not be relative to the current CFA register, |
7572 | which means that we may need to generate a new pattern for |
7573 | use by the unwind info. */ |
7574 | else if (base != m->fs.cfa_reg) |
7575 | { |
7576 | addr = plus_constant (Pmode, m->fs.cfa_reg, |
7577 | m->fs.cfa_offset - cfa_offset); |
7578 | mem = gen_rtx_MEM (mode, addr); |
7579 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); |
7580 | } |
7581 | } |
7582 | |
7583 | /* Emit code to save registers using MOV insns. |
7584 | First register is stored at CFA - CFA_OFFSET. */ |
7585 | static void |
7586 | ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7587 | { |
7588 | unsigned int regno; |
7589 | |
7590 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7591 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7592 | { |
7593 | ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset); |
7594 | cfa_offset -= UNITS_PER_WORD; |
7595 | } |
7596 | } |
7597 | |
7598 | /* Emit code to save SSE registers using MOV insns. |
7599 | First register is stored at CFA - CFA_OFFSET. */ |
7600 | static void |
7601 | ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7602 | { |
7603 | unsigned int regno; |
7604 | |
7605 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7606 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7607 | { |
7608 | ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); |
7609 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
7610 | } |
7611 | } |
7612 | |
7613 | static GTY(()) rtx queued_cfa_restores; |
7614 | |
7615 | /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
7616 | manipulation insn. The value is on the stack at CFA - CFA_OFFSET. |
7617 | Don't add the note if the previously saved value will be left untouched |
7618 | within stack red-zone till return, as unwinders can find the same value |
7619 | in the register and on the stack. */ |
7620 | |
7621 | static void |
7622 | ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) |
7623 | { |
7624 | if (!crtl->shrink_wrapped |
7625 | && cfa_offset <= cfun->machine->fs.red_zone_offset) |
7626 | return; |
7627 | |
7628 | if (insn) |
7629 | { |
7630 | add_reg_note (insn, REG_CFA_RESTORE, reg); |
7631 | RTX_FRAME_RELATED_P (insn) = 1; |
7632 | } |
7633 | else |
7634 | queued_cfa_restores |
7635 | = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
7636 | } |
7637 | |
7638 | /* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
7639 | |
7640 | static void |
7641 | ix86_add_queued_cfa_restore_notes (rtx insn) |
7642 | { |
7643 | rtx last; |
7644 | if (!queued_cfa_restores) |
7645 | return; |
7646 | for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
7647 | ; |
7648 | XEXP (last, 1) = REG_NOTES (insn); |
7649 | REG_NOTES (insn) = queued_cfa_restores; |
7650 | queued_cfa_restores = NULL_RTX; |
7651 | RTX_FRAME_RELATED_P (insn) = 1; |
7652 | } |
7653 | |
7654 | /* Expand prologue or epilogue stack adjustment. |
7655 | The pattern exist to put a dependency on all ebp-based memory accesses. |
7656 | STYLE should be negative if instructions should be marked as frame related, |
7657 | zero if %r11 register is live and cannot be freely used and positive |
7658 | otherwise. */ |
7659 | |
7660 | static rtx |
7661 | pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
7662 | int style, bool set_cfa) |
7663 | { |
7664 | struct machine_function *m = cfun->machine; |
7665 | rtx addend = offset; |
7666 | rtx insn; |
7667 | bool add_frame_related_expr = false; |
7668 | |
7669 | if (!x86_64_immediate_operand (offset, Pmode)) |
7670 | { |
7671 | /* r11 is used by indirect sibcall return as well, set before the |
7672 | epilogue and used after the epilogue. */ |
7673 | if (style) |
7674 | addend = gen_rtx_REG (Pmode, R11_REG); |
7675 | else |
7676 | { |
7677 | gcc_assert (src != hard_frame_pointer_rtx |
7678 | && dest != hard_frame_pointer_rtx); |
7679 | addend = hard_frame_pointer_rtx; |
7680 | } |
7681 | emit_insn (gen_rtx_SET (addend, offset)); |
7682 | if (style < 0) |
7683 | add_frame_related_expr = true; |
7684 | } |
7685 | |
7686 | insn = emit_insn (gen_pro_epilogue_adjust_stack_add |
7687 | (Pmode, x0: dest, x1: src, x2: addend)); |
7688 | if (style >= 0) |
7689 | ix86_add_queued_cfa_restore_notes (insn); |
7690 | |
7691 | if (set_cfa) |
7692 | { |
7693 | rtx r; |
7694 | |
7695 | gcc_assert (m->fs.cfa_reg == src); |
7696 | m->fs.cfa_offset += INTVAL (offset); |
7697 | m->fs.cfa_reg = dest; |
7698 | |
7699 | r = gen_rtx_PLUS (Pmode, src, offset); |
7700 | r = gen_rtx_SET (dest, r); |
7701 | add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
7702 | RTX_FRAME_RELATED_P (insn) = 1; |
7703 | } |
7704 | else if (style < 0) |
7705 | { |
7706 | RTX_FRAME_RELATED_P (insn) = 1; |
7707 | if (add_frame_related_expr) |
7708 | { |
7709 | rtx r = gen_rtx_PLUS (Pmode, src, offset); |
7710 | r = gen_rtx_SET (dest, r); |
7711 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); |
7712 | } |
7713 | } |
7714 | |
7715 | if (dest == stack_pointer_rtx) |
7716 | { |
7717 | HOST_WIDE_INT ooffset = m->fs.sp_offset; |
7718 | bool valid = m->fs.sp_valid; |
7719 | bool realigned = m->fs.sp_realigned; |
7720 | |
7721 | if (src == hard_frame_pointer_rtx) |
7722 | { |
7723 | valid = m->fs.fp_valid; |
7724 | realigned = false; |
7725 | ooffset = m->fs.fp_offset; |
7726 | } |
7727 | else if (src == crtl->drap_reg) |
7728 | { |
7729 | valid = m->fs.drap_valid; |
7730 | realigned = false; |
7731 | ooffset = 0; |
7732 | } |
7733 | else |
7734 | { |
7735 | /* Else there are two possibilities: SP itself, which we set |
7736 | up as the default above. Or EH_RETURN_STACKADJ_RTX, which is |
7737 | taken care of this by hand along the eh_return path. */ |
7738 | gcc_checking_assert (src == stack_pointer_rtx |
7739 | || offset == const0_rtx); |
7740 | } |
7741 | |
7742 | m->fs.sp_offset = ooffset - INTVAL (offset); |
7743 | m->fs.sp_valid = valid; |
7744 | m->fs.sp_realigned = realigned; |
7745 | } |
7746 | return insn; |
7747 | } |
7748 | |
7749 | /* Find an available register to be used as dynamic realign argument |
7750 | pointer regsiter. Such a register will be written in prologue and |
7751 | used in begin of body, so it must not be |
7752 | 1. parameter passing register. |
7753 | 2. GOT pointer. |
7754 | We reuse static-chain register if it is available. Otherwise, we |
7755 | use DI for i386 and R13 for x86-64. We chose R13 since it has |
7756 | shorter encoding. |
7757 | |
7758 | Return: the regno of chosen register. */ |
7759 | |
7760 | static unsigned int |
7761 | find_drap_reg (void) |
7762 | { |
7763 | tree decl = cfun->decl; |
7764 | |
7765 | /* Always use callee-saved register if there are no caller-saved |
7766 | registers. */ |
7767 | if (TARGET_64BIT) |
7768 | { |
7769 | /* Use R13 for nested function or function need static chain. |
7770 | Since function with tail call may use any caller-saved |
7771 | registers in epilogue, DRAP must not use caller-saved |
7772 | register in such case. */ |
7773 | if (DECL_STATIC_CHAIN (decl) |
7774 | || (cfun->machine->call_saved_registers |
7775 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
7776 | || crtl->tail_call_emit) |
7777 | return R13_REG; |
7778 | |
7779 | return R10_REG; |
7780 | } |
7781 | else |
7782 | { |
7783 | /* Use DI for nested function or function need static chain. |
7784 | Since function with tail call may use any caller-saved |
7785 | registers in epilogue, DRAP must not use caller-saved |
7786 | register in such case. */ |
7787 | if (DECL_STATIC_CHAIN (decl) |
7788 | || (cfun->machine->call_saved_registers |
7789 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
7790 | || crtl->tail_call_emit |
7791 | || crtl->calls_eh_return) |
7792 | return DI_REG; |
7793 | |
7794 | /* Reuse static chain register if it isn't used for parameter |
7795 | passing. */ |
7796 | if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) |
7797 | { |
7798 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); |
7799 | if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) |
7800 | return CX_REG; |
7801 | } |
7802 | return DI_REG; |
7803 | } |
7804 | } |
7805 | |
7806 | /* Return minimum incoming stack alignment. */ |
7807 | |
7808 | static unsigned int |
7809 | ix86_minimum_incoming_stack_boundary (bool sibcall) |
7810 | { |
7811 | unsigned int incoming_stack_boundary; |
7812 | |
7813 | /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ |
7814 | if (cfun->machine->func_type != TYPE_NORMAL) |
7815 | incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; |
7816 | /* Prefer the one specified at command line. */ |
7817 | else if (ix86_user_incoming_stack_boundary) |
7818 | incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
7819 | /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
7820 | if -mstackrealign is used, it isn't used for sibcall check and |
7821 | estimated stack alignment is 128bit. */ |
7822 | else if (!sibcall |
7823 | && ix86_force_align_arg_pointer |
7824 | && crtl->stack_alignment_estimated == 128) |
7825 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7826 | else |
7827 | incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
7828 | |
7829 | /* Incoming stack alignment can be changed on individual functions |
7830 | via force_align_arg_pointer attribute. We use the smallest |
7831 | incoming stack boundary. */ |
7832 | if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
7833 | && lookup_attribute (attr_name: "force_align_arg_pointer" , |
7834 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
7835 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7836 | |
7837 | /* The incoming stack frame has to be aligned at least at |
7838 | parm_stack_boundary. */ |
7839 | if (incoming_stack_boundary < crtl->parm_stack_boundary) |
7840 | incoming_stack_boundary = crtl->parm_stack_boundary; |
7841 | |
7842 | /* Stack at entrance of main is aligned by runtime. We use the |
7843 | smallest incoming stack boundary. */ |
7844 | if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
7845 | && DECL_NAME (current_function_decl) |
7846 | && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
7847 | && DECL_FILE_SCOPE_P (current_function_decl)) |
7848 | incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
7849 | |
7850 | return incoming_stack_boundary; |
7851 | } |
7852 | |
7853 | /* Update incoming stack boundary and estimated stack alignment. */ |
7854 | |
7855 | static void |
7856 | ix86_update_stack_boundary (void) |
7857 | { |
7858 | ix86_incoming_stack_boundary |
7859 | = ix86_minimum_incoming_stack_boundary (sibcall: false); |
7860 | |
7861 | /* x86_64 vararg needs 16byte stack alignment for register save area. */ |
7862 | if (TARGET_64BIT |
7863 | && cfun->stdarg |
7864 | && crtl->stack_alignment_estimated < 128) |
7865 | crtl->stack_alignment_estimated = 128; |
7866 | |
7867 | /* __tls_get_addr needs to be called with 16-byte aligned stack. */ |
7868 | if (ix86_tls_descriptor_calls_expanded_in_cfun |
7869 | && crtl->preferred_stack_boundary < 128) |
7870 | crtl->preferred_stack_boundary = 128; |
7871 | } |
7872 | |
7873 | /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is |
7874 | needed or an rtx for DRAP otherwise. */ |
7875 | |
7876 | static rtx |
7877 | ix86_get_drap_rtx (void) |
7878 | { |
7879 | /* We must use DRAP if there are outgoing arguments on stack or |
7880 | the stack pointer register is clobbered by asm statment and |
7881 | ACCUMULATE_OUTGOING_ARGS is false. */ |
7882 | if (ix86_force_drap |
7883 | || ((cfun->machine->outgoing_args_on_stack |
7884 | || crtl->sp_is_clobbered_by_asm) |
7885 | && !ACCUMULATE_OUTGOING_ARGS)) |
7886 | crtl->need_drap = true; |
7887 | |
7888 | if (stack_realign_drap) |
7889 | { |
7890 | /* Assign DRAP to vDRAP and returns vDRAP */ |
7891 | unsigned int regno = find_drap_reg (); |
7892 | rtx drap_vreg; |
7893 | rtx arg_ptr; |
7894 | rtx_insn *seq, *insn; |
7895 | |
7896 | arg_ptr = gen_rtx_REG (Pmode, regno); |
7897 | crtl->drap_reg = arg_ptr; |
7898 | |
7899 | start_sequence (); |
7900 | drap_vreg = copy_to_reg (arg_ptr); |
7901 | seq = get_insns (); |
7902 | end_sequence (); |
7903 | |
7904 | insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ())); |
7905 | if (!optimize) |
7906 | { |
7907 | add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
7908 | RTX_FRAME_RELATED_P (insn) = 1; |
7909 | } |
7910 | return drap_vreg; |
7911 | } |
7912 | else |
7913 | return NULL; |
7914 | } |
7915 | |
7916 | /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
7917 | |
7918 | static rtx |
7919 | ix86_internal_arg_pointer (void) |
7920 | { |
7921 | return virtual_incoming_args_rtx; |
7922 | } |
7923 | |
7924 | struct scratch_reg { |
7925 | rtx reg; |
7926 | bool saved; |
7927 | }; |
7928 | |
7929 | /* Return a short-lived scratch register for use on function entry. |
7930 | In 32-bit mode, it is valid only after the registers are saved |
7931 | in the prologue. This register must be released by means of |
7932 | release_scratch_register_on_entry once it is dead. */ |
7933 | |
7934 | static void |
7935 | get_scratch_register_on_entry (struct scratch_reg *sr) |
7936 | { |
7937 | int regno; |
7938 | |
7939 | sr->saved = false; |
7940 | |
7941 | if (TARGET_64BIT) |
7942 | { |
7943 | /* We always use R11 in 64-bit mode. */ |
7944 | regno = R11_REG; |
7945 | } |
7946 | else |
7947 | { |
7948 | tree decl = current_function_decl, fntype = TREE_TYPE (decl); |
7949 | bool fastcall_p |
7950 | = lookup_attribute (attr_name: "fastcall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
7951 | bool thiscall_p |
7952 | = lookup_attribute (attr_name: "thiscall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
7953 | bool static_chain_p = DECL_STATIC_CHAIN (decl); |
7954 | int regparm = ix86_function_regparm (type: fntype, decl); |
7955 | int drap_regno |
7956 | = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; |
7957 | |
7958 | /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax |
7959 | for the static chain register. */ |
7960 | if ((regparm < 1 || (fastcall_p && !static_chain_p)) |
7961 | && drap_regno != AX_REG) |
7962 | regno = AX_REG; |
7963 | /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx |
7964 | for the static chain register. */ |
7965 | else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) |
7966 | regno = AX_REG; |
7967 | else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) |
7968 | regno = DX_REG; |
7969 | /* ecx is the static chain register. */ |
7970 | else if (regparm < 3 && !fastcall_p && !thiscall_p |
7971 | && !static_chain_p |
7972 | && drap_regno != CX_REG) |
7973 | regno = CX_REG; |
7974 | else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false)) |
7975 | regno = BX_REG; |
7976 | /* esi is the static chain register. */ |
7977 | else if (!(regparm == 3 && static_chain_p) |
7978 | && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false)) |
7979 | regno = SI_REG; |
7980 | else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false)) |
7981 | regno = DI_REG; |
7982 | else |
7983 | { |
7984 | regno = (drap_regno == AX_REG ? DX_REG : AX_REG); |
7985 | sr->saved = true; |
7986 | } |
7987 | } |
7988 | |
7989 | sr->reg = gen_rtx_REG (Pmode, regno); |
7990 | if (sr->saved) |
7991 | { |
7992 | rtx_insn *insn = emit_insn (gen_push (arg: sr->reg)); |
7993 | RTX_FRAME_RELATED_P (insn) = 1; |
7994 | } |
7995 | } |
7996 | |
7997 | /* Release a scratch register obtained from the preceding function. |
7998 | |
7999 | If RELEASE_VIA_POP is true, we just pop the register off the stack |
8000 | to release it. This is what non-Linux systems use with -fstack-check. |
8001 | |
8002 | Otherwise we use OFFSET to locate the saved register and the |
8003 | allocated stack space becomes part of the local frame and is |
8004 | deallocated by the epilogue. */ |
8005 | |
8006 | static void |
8007 | release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, |
8008 | bool release_via_pop) |
8009 | { |
8010 | if (sr->saved) |
8011 | { |
8012 | if (release_via_pop) |
8013 | { |
8014 | struct machine_function *m = cfun->machine; |
8015 | rtx x, insn = emit_insn (gen_pop (arg: sr->reg)); |
8016 | |
8017 | /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ |
8018 | RTX_FRAME_RELATED_P (insn) = 1; |
8019 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
8020 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8021 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); |
8022 | m->fs.sp_offset -= UNITS_PER_WORD; |
8023 | } |
8024 | else |
8025 | { |
8026 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
8027 | x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); |
8028 | emit_insn (x); |
8029 | } |
8030 | } |
8031 | } |
8032 | |
8033 | /* Emit code to adjust the stack pointer by SIZE bytes while probing it. |
8034 | |
8035 | If INT_REGISTERS_SAVED is true, then integer registers have already been |
8036 | pushed on the stack. |
8037 | |
8038 | If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope |
8039 | beyond SIZE bytes. |
8040 | |
8041 | This assumes no knowledge of the current probing state, i.e. it is never |
8042 | allowed to allocate more than PROBE_INTERVAL bytes of stack space without |
8043 | a suitable probe. */ |
8044 | |
8045 | static void |
8046 | ix86_adjust_stack_and_probe (HOST_WIDE_INT size, |
8047 | const bool int_registers_saved, |
8048 | const bool protection_area) |
8049 | { |
8050 | struct machine_function *m = cfun->machine; |
8051 | |
8052 | /* If this function does not statically allocate stack space, then |
8053 | no probes are needed. */ |
8054 | if (!size) |
8055 | { |
8056 | /* However, the allocation of space via pushes for register |
8057 | saves could be viewed as allocating space, but without the |
8058 | need to probe. */ |
8059 | if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) |
8060 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
8061 | else |
8062 | dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); |
8063 | return; |
8064 | } |
8065 | |
8066 | /* If we are a noreturn function, then we have to consider the |
8067 | possibility that we're called via a jump rather than a call. |
8068 | |
8069 | Thus we don't have the implicit probe generated by saving the |
8070 | return address into the stack at the call. Thus, the stack |
8071 | pointer could be anywhere in the guard page. The safe thing |
8072 | to do is emit a probe now. |
8073 | |
8074 | The probe can be avoided if we have already emitted any callee |
8075 | register saves into the stack or have a frame pointer (which will |
8076 | have been saved as well). Those saves will function as implicit |
8077 | probes. |
8078 | |
8079 | ?!? This should be revamped to work like aarch64 and s390 where |
8080 | we track the offset from the most recent probe. Normally that |
8081 | offset would be zero. For a noreturn function we would reset |
8082 | it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then |
8083 | we just probe when we cross PROBE_INTERVAL. */ |
8084 | if (TREE_THIS_VOLATILE (cfun->decl) |
8085 | && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) |
8086 | { |
8087 | /* We can safely use any register here since we're just going to push |
8088 | its value and immediately pop it back. But we do try and avoid |
8089 | argument passing registers so as not to introduce dependencies in |
8090 | the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ |
8091 | rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); |
8092 | rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg)); |
8093 | rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg)); |
8094 | m->fs.sp_offset -= UNITS_PER_WORD; |
8095 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8096 | { |
8097 | m->fs.cfa_offset -= UNITS_PER_WORD; |
8098 | rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
8099 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8100 | add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); |
8101 | RTX_FRAME_RELATED_P (insn_push) = 1; |
8102 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
8103 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8104 | add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); |
8105 | RTX_FRAME_RELATED_P (insn_pop) = 1; |
8106 | } |
8107 | emit_insn (gen_blockage ()); |
8108 | } |
8109 | |
8110 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8111 | const int dope = 4 * UNITS_PER_WORD; |
8112 | |
8113 | /* If there is protection area, take it into account in the size. */ |
8114 | if (protection_area) |
8115 | size += probe_interval + dope; |
8116 | |
8117 | /* If we allocate less than the size of the guard statically, |
8118 | then no probing is necessary, but we do need to allocate |
8119 | the stack. */ |
8120 | else if (size < (1 << param_stack_clash_protection_guard_size)) |
8121 | { |
8122 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8123 | GEN_INT (-size), style: -1, |
8124 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8125 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
8126 | return; |
8127 | } |
8128 | |
8129 | /* We're allocating a large enough stack frame that we need to |
8130 | emit probes. Either emit them inline or in a loop depending |
8131 | on the size. */ |
8132 | if (size <= 4 * probe_interval) |
8133 | { |
8134 | HOST_WIDE_INT i; |
8135 | for (i = probe_interval; i <= size; i += probe_interval) |
8136 | { |
8137 | /* Allocate PROBE_INTERVAL bytes. */ |
8138 | rtx insn |
8139 | = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8140 | GEN_INT (-probe_interval), style: -1, |
8141 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8142 | add_reg_note (insn, REG_STACK_CHECK, const0_rtx); |
8143 | |
8144 | /* And probe at *sp. */ |
8145 | emit_stack_probe (stack_pointer_rtx); |
8146 | emit_insn (gen_blockage ()); |
8147 | } |
8148 | |
8149 | /* We need to allocate space for the residual, but we do not need |
8150 | to probe the residual... */ |
8151 | HOST_WIDE_INT residual = (i - probe_interval - size); |
8152 | if (residual) |
8153 | { |
8154 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8155 | GEN_INT (residual), style: -1, |
8156 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8157 | |
8158 | /* ...except if there is a protection area to maintain. */ |
8159 | if (protection_area) |
8160 | emit_stack_probe (stack_pointer_rtx); |
8161 | } |
8162 | |
8163 | dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); |
8164 | } |
8165 | else |
8166 | { |
8167 | /* We expect the GP registers to be saved when probes are used |
8168 | as the probing sequences might need a scratch register and |
8169 | the routine to allocate one assumes the integer registers |
8170 | have already been saved. */ |
8171 | gcc_assert (int_registers_saved); |
8172 | |
8173 | struct scratch_reg sr; |
8174 | get_scratch_register_on_entry (sr: &sr); |
8175 | |
8176 | /* If we needed to save a register, then account for any space |
8177 | that was pushed (we are not going to pop the register when |
8178 | we do the restore). */ |
8179 | if (sr.saved) |
8180 | size -= UNITS_PER_WORD; |
8181 | |
8182 | /* Step 1: round SIZE down to a multiple of the interval. */ |
8183 | HOST_WIDE_INT rounded_size = size & -probe_interval; |
8184 | |
8185 | /* Step 2: compute final value of the loop counter. Use lea if |
8186 | possible. */ |
8187 | rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); |
8188 | rtx insn; |
8189 | if (address_no_seg_operand (addr, Pmode)) |
8190 | insn = emit_insn (gen_rtx_SET (sr.reg, addr)); |
8191 | else |
8192 | { |
8193 | emit_move_insn (sr.reg, GEN_INT (-rounded_size)); |
8194 | insn = emit_insn (gen_rtx_SET (sr.reg, |
8195 | gen_rtx_PLUS (Pmode, sr.reg, |
8196 | stack_pointer_rtx))); |
8197 | } |
8198 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8199 | { |
8200 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8201 | plus_constant (Pmode, sr.reg, |
8202 | m->fs.cfa_offset + rounded_size)); |
8203 | RTX_FRAME_RELATED_P (insn) = 1; |
8204 | } |
8205 | |
8206 | /* Step 3: the loop. */ |
8207 | rtx size_rtx = GEN_INT (rounded_size); |
8208 | insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg, |
8209 | x2: size_rtx)); |
8210 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8211 | { |
8212 | m->fs.cfa_offset += rounded_size; |
8213 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8214 | plus_constant (Pmode, stack_pointer_rtx, |
8215 | m->fs.cfa_offset)); |
8216 | RTX_FRAME_RELATED_P (insn) = 1; |
8217 | } |
8218 | m->fs.sp_offset += rounded_size; |
8219 | emit_insn (gen_blockage ()); |
8220 | |
8221 | /* Step 4: adjust SP if we cannot assert at compile-time that SIZE |
8222 | is equal to ROUNDED_SIZE. */ |
8223 | |
8224 | if (size != rounded_size) |
8225 | { |
8226 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8227 | GEN_INT (rounded_size - size), style: -1, |
8228 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8229 | |
8230 | if (protection_area) |
8231 | emit_stack_probe (stack_pointer_rtx); |
8232 | } |
8233 | |
8234 | dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); |
8235 | |
8236 | /* This does not deallocate the space reserved for the scratch |
8237 | register. That will be deallocated in the epilogue. */ |
8238 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false); |
8239 | } |
8240 | |
8241 | /* Adjust back to account for the protection area. */ |
8242 | if (protection_area) |
8243 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8244 | GEN_INT (probe_interval + dope), style: -1, |
8245 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8246 | |
8247 | /* Make sure nothing is scheduled before we are done. */ |
8248 | emit_insn (gen_blockage ()); |
8249 | } |
8250 | |
8251 | /* Adjust the stack pointer up to REG while probing it. */ |
8252 | |
8253 | const char * |
8254 | output_adjust_stack_and_probe (rtx reg) |
8255 | { |
8256 | static int labelno = 0; |
8257 | char loop_lab[32]; |
8258 | rtx xops[2]; |
8259 | |
8260 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
8261 | |
8262 | /* Loop. */ |
8263 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8264 | |
8265 | /* SP = SP + PROBE_INTERVAL. */ |
8266 | xops[0] = stack_pointer_rtx; |
8267 | xops[1] = GEN_INT (get_probe_interval ()); |
8268 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
8269 | |
8270 | /* Probe at SP. */ |
8271 | xops[1] = const0_rtx; |
8272 | output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}" , xops); |
8273 | |
8274 | /* Test if SP == LAST_ADDR. */ |
8275 | xops[0] = stack_pointer_rtx; |
8276 | xops[1] = reg; |
8277 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
8278 | |
8279 | /* Branch. */ |
8280 | fputs (s: "\tjne\t" , stream: asm_out_file); |
8281 | assemble_name_raw (asm_out_file, loop_lab); |
8282 | fputc (c: '\n', stream: asm_out_file); |
8283 | |
8284 | return "" ; |
8285 | } |
8286 | |
8287 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
8288 | inclusive. These are offsets from the current stack pointer. |
8289 | |
8290 | INT_REGISTERS_SAVED is true if integer registers have already been |
8291 | pushed on the stack. */ |
8292 | |
8293 | static void |
8294 | ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, |
8295 | const bool int_registers_saved) |
8296 | { |
8297 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8298 | |
8299 | /* See if we have a constant small number of probes to generate. If so, |
8300 | that's the easy case. The run-time loop is made up of 6 insns in the |
8301 | generic case while the compile-time loop is made up of n insns for n # |
8302 | of intervals. */ |
8303 | if (size <= 6 * probe_interval) |
8304 | { |
8305 | HOST_WIDE_INT i; |
8306 | |
8307 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
8308 | it exceeds SIZE. If only one probe is needed, this will not |
8309 | generate any code. Then probe at FIRST + SIZE. */ |
8310 | for (i = probe_interval; i < size; i += probe_interval) |
8311 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8312 | -(first + i))); |
8313 | |
8314 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8315 | -(first + size))); |
8316 | } |
8317 | |
8318 | /* Otherwise, do the same as above, but in a loop. Note that we must be |
8319 | extra careful with variables wrapping around because we might be at |
8320 | the very top (or the very bottom) of the address space and we have |
8321 | to be able to handle this case properly; in particular, we use an |
8322 | equality test for the loop condition. */ |
8323 | else |
8324 | { |
8325 | /* We expect the GP registers to be saved when probes are used |
8326 | as the probing sequences might need a scratch register and |
8327 | the routine to allocate one assumes the integer registers |
8328 | have already been saved. */ |
8329 | gcc_assert (int_registers_saved); |
8330 | |
8331 | HOST_WIDE_INT rounded_size, last; |
8332 | struct scratch_reg sr; |
8333 | |
8334 | get_scratch_register_on_entry (sr: &sr); |
8335 | |
8336 | |
8337 | /* Step 1: round SIZE to the previous multiple of the interval. */ |
8338 | |
8339 | rounded_size = ROUND_DOWN (size, probe_interval); |
8340 | |
8341 | |
8342 | /* Step 2: compute initial and final value of the loop counter. */ |
8343 | |
8344 | /* TEST_OFFSET = FIRST. */ |
8345 | emit_move_insn (sr.reg, GEN_INT (-first)); |
8346 | |
8347 | /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ |
8348 | last = first + rounded_size; |
8349 | |
8350 | |
8351 | /* Step 3: the loop |
8352 | |
8353 | do |
8354 | { |
8355 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
8356 | probe at TEST_ADDR |
8357 | } |
8358 | while (TEST_ADDR != LAST_ADDR) |
8359 | |
8360 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
8361 | until it is equal to ROUNDED_SIZE. */ |
8362 | |
8363 | emit_insn |
8364 | (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last))); |
8365 | |
8366 | |
8367 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
8368 | that SIZE is equal to ROUNDED_SIZE. */ |
8369 | |
8370 | if (size != rounded_size) |
8371 | emit_stack_probe (plus_constant (Pmode, |
8372 | gen_rtx_PLUS (Pmode, |
8373 | stack_pointer_rtx, |
8374 | sr.reg), |
8375 | rounded_size - size)); |
8376 | |
8377 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true); |
8378 | } |
8379 | |
8380 | /* Make sure nothing is scheduled before we are done. */ |
8381 | emit_insn (gen_blockage ()); |
8382 | } |
8383 | |
8384 | /* Probe a range of stack addresses from REG to END, inclusive. These are |
8385 | offsets from the current stack pointer. */ |
8386 | |
8387 | const char * |
8388 | output_probe_stack_range (rtx reg, rtx end) |
8389 | { |
8390 | static int labelno = 0; |
8391 | char loop_lab[32]; |
8392 | rtx xops[3]; |
8393 | |
8394 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
8395 | |
8396 | /* Loop. */ |
8397 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8398 | |
8399 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
8400 | xops[0] = reg; |
8401 | xops[1] = GEN_INT (get_probe_interval ()); |
8402 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
8403 | |
8404 | /* Probe at TEST_ADDR. */ |
8405 | xops[0] = stack_pointer_rtx; |
8406 | xops[1] = reg; |
8407 | xops[2] = const0_rtx; |
8408 | output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}" , xops); |
8409 | |
8410 | /* Test if TEST_ADDR == LAST_ADDR. */ |
8411 | xops[0] = reg; |
8412 | xops[1] = end; |
8413 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
8414 | |
8415 | /* Branch. */ |
8416 | fputs (s: "\tjne\t" , stream: asm_out_file); |
8417 | assemble_name_raw (asm_out_file, loop_lab); |
8418 | fputc (c: '\n', stream: asm_out_file); |
8419 | |
8420 | return "" ; |
8421 | } |
8422 | |
8423 | /* Set stack_frame_required to false if stack frame isn't required. |
8424 | Update STACK_ALIGNMENT to the largest alignment, in bits, of stack |
8425 | slot used if stack frame is required and CHECK_STACK_SLOT is true. */ |
8426 | |
8427 | static void |
8428 | ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, |
8429 | bool check_stack_slot) |
8430 | { |
8431 | HARD_REG_SET set_up_by_prologue, prologue_used; |
8432 | basic_block bb; |
8433 | |
8434 | CLEAR_HARD_REG_SET (set&: prologue_used); |
8435 | CLEAR_HARD_REG_SET (set&: set_up_by_prologue); |
8436 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); |
8437 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); |
8438 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, |
8439 | HARD_FRAME_POINTER_REGNUM); |
8440 | |
8441 | /* The preferred stack alignment is the minimum stack alignment. */ |
8442 | if (stack_alignment > crtl->preferred_stack_boundary) |
8443 | stack_alignment = crtl->preferred_stack_boundary; |
8444 | |
8445 | bool require_stack_frame = false; |
8446 | |
8447 | FOR_EACH_BB_FN (bb, cfun) |
8448 | { |
8449 | rtx_insn *insn; |
8450 | FOR_BB_INSNS (bb, insn) |
8451 | if (NONDEBUG_INSN_P (insn) |
8452 | && requires_stack_frame_p (insn, prologue_used, |
8453 | set_up_by_prologue)) |
8454 | { |
8455 | require_stack_frame = true; |
8456 | |
8457 | if (check_stack_slot) |
8458 | { |
8459 | /* Find the maximum stack alignment. */ |
8460 | subrtx_iterator::array_type array; |
8461 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) |
8462 | if (MEM_P (*iter) |
8463 | && (reg_mentioned_p (stack_pointer_rtx, |
8464 | *iter) |
8465 | || reg_mentioned_p (frame_pointer_rtx, |
8466 | *iter))) |
8467 | { |
8468 | unsigned int alignment = MEM_ALIGN (*iter); |
8469 | if (alignment > stack_alignment) |
8470 | stack_alignment = alignment; |
8471 | } |
8472 | } |
8473 | } |
8474 | } |
8475 | |
8476 | cfun->machine->stack_frame_required = require_stack_frame; |
8477 | } |
8478 | |
8479 | /* Finalize stack_realign_needed and frame_pointer_needed flags, which |
8480 | will guide prologue/epilogue to be generated in correct form. */ |
8481 | |
8482 | static void |
8483 | ix86_finalize_stack_frame_flags (void) |
8484 | { |
8485 | /* Check if stack realign is really needed after reload, and |
8486 | stores result in cfun */ |
8487 | unsigned int incoming_stack_boundary |
8488 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
8489 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
8490 | unsigned int stack_alignment |
8491 | = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor |
8492 | ? crtl->max_used_stack_slot_alignment |
8493 | : crtl->stack_alignment_needed); |
8494 | unsigned int stack_realign |
8495 | = (incoming_stack_boundary < stack_alignment); |
8496 | bool recompute_frame_layout_p = false; |
8497 | |
8498 | if (crtl->stack_realign_finalized) |
8499 | { |
8500 | /* After stack_realign_needed is finalized, we can't no longer |
8501 | change it. */ |
8502 | gcc_assert (crtl->stack_realign_needed == stack_realign); |
8503 | return; |
8504 | } |
8505 | |
8506 | /* It is always safe to compute max_used_stack_alignment. We |
8507 | compute it only if 128-bit aligned load/store may be generated |
8508 | on misaligned stack slot which will lead to segfault. */ |
8509 | bool check_stack_slot |
8510 | = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); |
8511 | ix86_find_max_used_stack_alignment (stack_alignment, |
8512 | check_stack_slot); |
8513 | |
8514 | /* If the only reason for frame_pointer_needed is that we conservatively |
8515 | assumed stack realignment might be needed or -fno-omit-frame-pointer |
8516 | is used, but in the end nothing that needed the stack alignment had |
8517 | been spilled nor stack access, clear frame_pointer_needed and say we |
8518 | don't need stack realignment. |
8519 | |
8520 | When vector register is used for piecewise move and store, we don't |
8521 | increase stack_alignment_needed as there is no register spill for |
8522 | piecewise move and store. Since stack_realign_needed is set to true |
8523 | by checking stack_alignment_estimated which is updated by pseudo |
8524 | vector register usage, we also need to check stack_realign_needed to |
8525 | eliminate frame pointer. */ |
8526 | if ((stack_realign |
8527 | || (!flag_omit_frame_pointer && optimize) |
8528 | || crtl->stack_realign_needed) |
8529 | && frame_pointer_needed |
8530 | && crtl->is_leaf |
8531 | && crtl->sp_is_unchanging |
8532 | && !ix86_current_function_calls_tls_descriptor |
8533 | && !crtl->accesses_prior_frames |
8534 | && !cfun->calls_alloca |
8535 | && !crtl->calls_eh_return |
8536 | /* See ira_setup_eliminable_regset for the rationale. */ |
8537 | && !(STACK_CHECK_MOVING_SP |
8538 | && flag_stack_check |
8539 | && flag_exceptions |
8540 | && cfun->can_throw_non_call_exceptions) |
8541 | && !ix86_frame_pointer_required () |
8542 | && ix86_get_frame_size () == 0 |
8543 | && ix86_nsaved_sseregs () == 0 |
8544 | && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) |
8545 | { |
8546 | if (cfun->machine->stack_frame_required) |
8547 | { |
8548 | /* Stack frame is required. If stack alignment needed is less |
8549 | than incoming stack boundary, don't realign stack. */ |
8550 | stack_realign = incoming_stack_boundary < stack_alignment; |
8551 | if (!stack_realign) |
8552 | { |
8553 | crtl->max_used_stack_slot_alignment |
8554 | = incoming_stack_boundary; |
8555 | crtl->stack_alignment_needed |
8556 | = incoming_stack_boundary; |
8557 | /* Also update preferred_stack_boundary for leaf |
8558 | functions. */ |
8559 | crtl->preferred_stack_boundary |
8560 | = incoming_stack_boundary; |
8561 | } |
8562 | } |
8563 | else |
8564 | { |
8565 | /* If drap has been set, but it actually isn't live at the |
8566 | start of the function, there is no reason to set it up. */ |
8567 | if (crtl->drap_reg) |
8568 | { |
8569 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8570 | if (! REGNO_REG_SET_P (DF_LR_IN (bb), |
8571 | REGNO (crtl->drap_reg))) |
8572 | { |
8573 | crtl->drap_reg = NULL_RTX; |
8574 | crtl->need_drap = false; |
8575 | } |
8576 | } |
8577 | else |
8578 | cfun->machine->no_drap_save_restore = true; |
8579 | |
8580 | frame_pointer_needed = false; |
8581 | stack_realign = false; |
8582 | crtl->max_used_stack_slot_alignment = incoming_stack_boundary; |
8583 | crtl->stack_alignment_needed = incoming_stack_boundary; |
8584 | crtl->stack_alignment_estimated = incoming_stack_boundary; |
8585 | if (crtl->preferred_stack_boundary > incoming_stack_boundary) |
8586 | crtl->preferred_stack_boundary = incoming_stack_boundary; |
8587 | df_finish_pass (true); |
8588 | df_scan_alloc (NULL); |
8589 | df_scan_blocks (); |
8590 | df_compute_regs_ever_live (true); |
8591 | df_analyze (); |
8592 | |
8593 | if (flag_var_tracking) |
8594 | { |
8595 | /* Since frame pointer is no longer available, replace it with |
8596 | stack pointer - UNITS_PER_WORD in debug insns. */ |
8597 | df_ref ref, next; |
8598 | for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); |
8599 | ref; ref = next) |
8600 | { |
8601 | next = DF_REF_NEXT_REG (ref); |
8602 | if (!DF_REF_INSN_INFO (ref)) |
8603 | continue; |
8604 | |
8605 | /* Make sure the next ref is for a different instruction, |
8606 | so that we're not affected by the rescan. */ |
8607 | rtx_insn *insn = DF_REF_INSN (ref); |
8608 | while (next && DF_REF_INSN (next) == insn) |
8609 | next = DF_REF_NEXT_REG (next); |
8610 | |
8611 | if (DEBUG_INSN_P (insn)) |
8612 | { |
8613 | bool changed = false; |
8614 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) |
8615 | { |
8616 | rtx *loc = DF_REF_LOC (ref); |
8617 | if (*loc == hard_frame_pointer_rtx) |
8618 | { |
8619 | *loc = plus_constant (Pmode, |
8620 | stack_pointer_rtx, |
8621 | -UNITS_PER_WORD); |
8622 | changed = true; |
8623 | } |
8624 | } |
8625 | if (changed) |
8626 | df_insn_rescan (insn); |
8627 | } |
8628 | } |
8629 | } |
8630 | |
8631 | recompute_frame_layout_p = true; |
8632 | } |
8633 | } |
8634 | else if (crtl->max_used_stack_slot_alignment >= 128 |
8635 | && cfun->machine->stack_frame_required) |
8636 | { |
8637 | /* We don't need to realign stack. max_used_stack_alignment is |
8638 | used to decide how stack frame should be aligned. This is |
8639 | independent of any psABIs nor 32-bit vs 64-bit. */ |
8640 | cfun->machine->max_used_stack_alignment |
8641 | = stack_alignment / BITS_PER_UNIT; |
8642 | } |
8643 | |
8644 | if (crtl->stack_realign_needed != stack_realign) |
8645 | recompute_frame_layout_p = true; |
8646 | crtl->stack_realign_needed = stack_realign; |
8647 | crtl->stack_realign_finalized = true; |
8648 | if (recompute_frame_layout_p) |
8649 | ix86_compute_frame_layout (); |
8650 | } |
8651 | |
8652 | /* Delete SET_GOT right after entry block if it is allocated to reg. */ |
8653 | |
8654 | static void |
8655 | ix86_elim_entry_set_got (rtx reg) |
8656 | { |
8657 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8658 | rtx_insn *c_insn = BB_HEAD (bb); |
8659 | if (!NONDEBUG_INSN_P (c_insn)) |
8660 | c_insn = next_nonnote_nondebug_insn (c_insn); |
8661 | if (c_insn && NONJUMP_INSN_P (c_insn)) |
8662 | { |
8663 | rtx pat = PATTERN (insn: c_insn); |
8664 | if (GET_CODE (pat) == PARALLEL) |
8665 | { |
8666 | rtx set = XVECEXP (pat, 0, 0); |
8667 | if (GET_CODE (set) == SET |
8668 | && GET_CODE (SET_SRC (set)) == UNSPEC |
8669 | && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT |
8670 | && REGNO (SET_DEST (set)) == REGNO (reg)) |
8671 | delete_insn (c_insn); |
8672 | } |
8673 | } |
8674 | } |
8675 | |
8676 | static rtx |
8677 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) |
8678 | { |
8679 | rtx addr, mem; |
8680 | |
8681 | if (offset) |
8682 | addr = plus_constant (Pmode, frame_reg, offset); |
8683 | mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); |
8684 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); |
8685 | } |
8686 | |
8687 | static inline rtx |
8688 | gen_frame_load (rtx reg, rtx frame_reg, int offset) |
8689 | { |
8690 | return gen_frame_set (reg, frame_reg, offset, store: false); |
8691 | } |
8692 | |
8693 | static inline rtx |
8694 | gen_frame_store (rtx reg, rtx frame_reg, int offset) |
8695 | { |
8696 | return gen_frame_set (reg, frame_reg, offset, store: true); |
8697 | } |
8698 | |
8699 | static void |
8700 | ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) |
8701 | { |
8702 | struct machine_function *m = cfun->machine; |
8703 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
8704 | + m->call_ms2sysv_extra_regs; |
8705 | rtvec v = rtvec_alloc (ncregs + 1); |
8706 | unsigned int align, i, vi = 0; |
8707 | rtx_insn *insn; |
8708 | rtx sym, addr; |
8709 | rtx rax = gen_rtx_REG (word_mode, AX_REG); |
8710 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
8711 | |
8712 | /* AL should only be live with sysv_abi. */ |
8713 | gcc_assert (!ix86_eax_live_at_start_p ()); |
8714 | gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); |
8715 | |
8716 | /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather |
8717 | we've actually realigned the stack or not. */ |
8718 | align = GET_MODE_ALIGNMENT (V4SFmode); |
8719 | addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset |
8720 | + xlogue.get_stub_ptr_offset (), align: &align, AX_REG); |
8721 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
8722 | |
8723 | emit_insn (gen_rtx_SET (rax, addr)); |
8724 | |
8725 | /* Get the stub symbol. */ |
8726 | sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP |
8727 | : XLOGUE_STUB_SAVE); |
8728 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
8729 | |
8730 | for (i = 0; i < ncregs; ++i) |
8731 | { |
8732 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
8733 | rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), |
8734 | r.regno); |
8735 | RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset); |
8736 | } |
8737 | |
8738 | gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); |
8739 | |
8740 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); |
8741 | RTX_FRAME_RELATED_P (insn) = true; |
8742 | } |
8743 | |
8744 | /* Generate and return an insn body to AND X with Y. */ |
8745 | |
8746 | static rtx_insn * |
8747 | gen_and2_insn (rtx x, rtx y) |
8748 | { |
8749 | enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x)); |
8750 | |
8751 | gcc_assert (insn_operand_matches (icode, 0, x)); |
8752 | gcc_assert (insn_operand_matches (icode, 1, x)); |
8753 | gcc_assert (insn_operand_matches (icode, 2, y)); |
8754 | |
8755 | return GEN_FCN (icode) (x, x, y); |
8756 | } |
8757 | |
8758 | /* Expand the prologue into a bunch of separate insns. */ |
8759 | |
8760 | void |
8761 | ix86_expand_prologue (void) |
8762 | { |
8763 | struct machine_function *m = cfun->machine; |
8764 | rtx insn, t; |
8765 | HOST_WIDE_INT allocate; |
8766 | bool int_registers_saved; |
8767 | bool sse_registers_saved; |
8768 | bool save_stub_call_needed; |
8769 | rtx static_chain = NULL_RTX; |
8770 | |
8771 | ix86_last_zero_store_uid = 0; |
8772 | if (ix86_function_naked (fn: current_function_decl)) |
8773 | { |
8774 | if (flag_stack_usage_info) |
8775 | current_function_static_stack_size = 0; |
8776 | return; |
8777 | } |
8778 | |
8779 | ix86_finalize_stack_frame_flags (); |
8780 | |
8781 | /* DRAP should not coexist with stack_realign_fp */ |
8782 | gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
8783 | |
8784 | memset (s: &m->fs, c: 0, n: sizeof (m->fs)); |
8785 | |
8786 | /* Initialize CFA state for before the prologue. */ |
8787 | m->fs.cfa_reg = stack_pointer_rtx; |
8788 | m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; |
8789 | |
8790 | /* Track SP offset to the CFA. We continue tracking this after we've |
8791 | swapped the CFA register away from SP. In the case of re-alignment |
8792 | this is fudged; we're interested to offsets within the local frame. */ |
8793 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
8794 | m->fs.sp_valid = true; |
8795 | m->fs.sp_realigned = false; |
8796 | |
8797 | const struct ix86_frame &frame = cfun->machine->frame; |
8798 | |
8799 | if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl)) |
8800 | { |
8801 | /* We should have already generated an error for any use of |
8802 | ms_hook on a nested function. */ |
8803 | gcc_checking_assert (!ix86_static_chain_on_stack); |
8804 | |
8805 | /* Check if profiling is active and we shall use profiling before |
8806 | prologue variant. If so sorry. */ |
8807 | if (crtl->profile && flag_fentry != 0) |
8808 | sorry ("%<ms_hook_prologue%> attribute is not compatible " |
8809 | "with %<-mfentry%> for 32-bit" ); |
8810 | |
8811 | /* In ix86_asm_output_function_label we emitted: |
8812 | 8b ff movl.s %edi,%edi |
8813 | 55 push %ebp |
8814 | 8b ec movl.s %esp,%ebp |
8815 | |
8816 | This matches the hookable function prologue in Win32 API |
8817 | functions in Microsoft Windows XP Service Pack 2 and newer. |
8818 | Wine uses this to enable Windows apps to hook the Win32 API |
8819 | functions provided by Wine. |
8820 | |
8821 | What that means is that we've already set up the frame pointer. */ |
8822 | |
8823 | if (frame_pointer_needed |
8824 | && !(crtl->drap_reg && crtl->stack_realign_needed)) |
8825 | { |
8826 | rtx push, mov; |
8827 | |
8828 | /* We've decided to use the frame pointer already set up. |
8829 | Describe this to the unwinder by pretending that both |
8830 | push and mov insns happen right here. |
8831 | |
8832 | Putting the unwind info here at the end of the ms_hook |
8833 | is done so that we can make absolutely certain we get |
8834 | the required byte sequence at the start of the function, |
8835 | rather than relying on an assembler that can produce |
8836 | the exact encoding required. |
8837 | |
8838 | However it does mean (in the unpatched case) that we have |
8839 | a 1 insn window where the asynchronous unwind info is |
8840 | incorrect. However, if we placed the unwind info at |
8841 | its correct location we would have incorrect unwind info |
8842 | in the patched case. Which is probably all moot since |
8843 | I don't expect Wine generates dwarf2 unwind info for the |
8844 | system libraries that use this feature. */ |
8845 | |
8846 | insn = emit_insn (gen_blockage ()); |
8847 | |
8848 | push = gen_push (hard_frame_pointer_rtx); |
8849 | mov = gen_rtx_SET (hard_frame_pointer_rtx, |
8850 | stack_pointer_rtx); |
8851 | RTX_FRAME_RELATED_P (push) = 1; |
8852 | RTX_FRAME_RELATED_P (mov) = 1; |
8853 | |
8854 | RTX_FRAME_RELATED_P (insn) = 1; |
8855 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
8856 | gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); |
8857 | |
8858 | /* Note that gen_push incremented m->fs.cfa_offset, even |
8859 | though we didn't emit the push insn here. */ |
8860 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
8861 | m->fs.fp_offset = m->fs.cfa_offset; |
8862 | m->fs.fp_valid = true; |
8863 | } |
8864 | else |
8865 | { |
8866 | /* The frame pointer is not needed so pop %ebp again. |
8867 | This leaves us with a pristine state. */ |
8868 | emit_insn (gen_pop (hard_frame_pointer_rtx)); |
8869 | } |
8870 | } |
8871 | |
8872 | /* The first insn of a function that accepts its static chain on the |
8873 | stack is to push the register that would be filled in by a direct |
8874 | call. This insn will be skipped by the trampoline. */ |
8875 | else if (ix86_static_chain_on_stack) |
8876 | { |
8877 | static_chain = ix86_static_chain (cfun->decl, false); |
8878 | insn = emit_insn (gen_push (arg: static_chain)); |
8879 | emit_insn (gen_blockage ()); |
8880 | |
8881 | /* We don't want to interpret this push insn as a register save, |
8882 | only as a stack adjustment. The real copy of the register as |
8883 | a save will be done later, if needed. */ |
8884 | t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
8885 | t = gen_rtx_SET (stack_pointer_rtx, t); |
8886 | add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
8887 | RTX_FRAME_RELATED_P (insn) = 1; |
8888 | } |
8889 | |
8890 | /* Emit prologue code to adjust stack alignment and setup DRAP, in case |
8891 | of DRAP is needed and stack realignment is really needed after reload */ |
8892 | if (stack_realign_drap) |
8893 | { |
8894 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
8895 | |
8896 | /* Can't use DRAP in interrupt function. */ |
8897 | if (cfun->machine->func_type != TYPE_NORMAL) |
8898 | sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " |
8899 | "in interrupt service routine. This may be worked " |
8900 | "around by avoiding functions with aggregate return." ); |
8901 | |
8902 | /* Only need to push parameter pointer reg if it is caller saved. */ |
8903 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
8904 | { |
8905 | /* Push arg pointer reg */ |
8906 | insn = emit_insn (gen_push (crtl->drap_reg)); |
8907 | RTX_FRAME_RELATED_P (insn) = 1; |
8908 | } |
8909 | |
8910 | /* Grab the argument pointer. */ |
8911 | t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); |
8912 | insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
8913 | RTX_FRAME_RELATED_P (insn) = 1; |
8914 | m->fs.cfa_reg = crtl->drap_reg; |
8915 | m->fs.cfa_offset = 0; |
8916 | |
8917 | /* Align the stack. */ |
8918 | insn = emit_insn (gen_and2_insn (stack_pointer_rtx, |
8919 | GEN_INT (-align_bytes))); |
8920 | RTX_FRAME_RELATED_P (insn) = 1; |
8921 | |
8922 | /* Replicate the return address on the stack so that return |
8923 | address can be reached via (argp - 1) slot. This is needed |
8924 | to implement macro RETURN_ADDR_RTX and intrinsic function |
8925 | expand_builtin_return_addr etc. */ |
8926 | t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); |
8927 | t = gen_frame_mem (word_mode, t); |
8928 | insn = emit_insn (gen_push (arg: t)); |
8929 | RTX_FRAME_RELATED_P (insn) = 1; |
8930 | |
8931 | /* For the purposes of frame and register save area addressing, |
8932 | we've started over with a new frame. */ |
8933 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
8934 | m->fs.realigned = true; |
8935 | |
8936 | if (static_chain) |
8937 | { |
8938 | /* Replicate static chain on the stack so that static chain |
8939 | can be reached via (argp - 2) slot. This is needed for |
8940 | nested function with stack realignment. */ |
8941 | insn = emit_insn (gen_push (arg: static_chain)); |
8942 | RTX_FRAME_RELATED_P (insn) = 1; |
8943 | } |
8944 | } |
8945 | |
8946 | int_registers_saved = (frame.nregs == 0); |
8947 | sse_registers_saved = (frame.nsseregs == 0); |
8948 | save_stub_call_needed = (m->call_ms2sysv); |
8949 | gcc_assert (sse_registers_saved || !save_stub_call_needed); |
8950 | |
8951 | if (frame_pointer_needed && !m->fs.fp_valid) |
8952 | { |
8953 | /* Note: AT&T enter does NOT have reversed args. Enter is probably |
8954 | slower on all targets. Also sdb didn't like it. */ |
8955 | insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
8956 | RTX_FRAME_RELATED_P (insn) = 1; |
8957 | |
8958 | if (m->fs.sp_offset == frame.hard_frame_pointer_offset) |
8959 | { |
8960 | insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
8961 | RTX_FRAME_RELATED_P (insn) = 1; |
8962 | |
8963 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8964 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
8965 | m->fs.fp_offset = m->fs.sp_offset; |
8966 | m->fs.fp_valid = true; |
8967 | } |
8968 | } |
8969 | |
8970 | if (!int_registers_saved) |
8971 | { |
8972 | /* If saving registers via PUSH, do so now. */ |
8973 | if (!frame.save_regs_using_mov) |
8974 | { |
8975 | ix86_emit_save_regs (); |
8976 | int_registers_saved = true; |
8977 | gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
8978 | } |
8979 | |
8980 | /* When using red zone we may start register saving before allocating |
8981 | the stack frame saving one cycle of the prologue. However, avoid |
8982 | doing this if we have to probe the stack; at least on x86_64 the |
8983 | stack probe can turn into a call that clobbers a red zone location. */ |
8984 | else if (ix86_using_red_zone () |
8985 | && (! TARGET_STACK_PROBE |
8986 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) |
8987 | { |
8988 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
8989 | cfun->machine->red_zone_used = true; |
8990 | int_registers_saved = true; |
8991 | } |
8992 | } |
8993 | |
8994 | if (frame.red_zone_size != 0) |
8995 | cfun->machine->red_zone_used = true; |
8996 | |
8997 | if (stack_realign_fp) |
8998 | { |
8999 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
9000 | gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); |
9001 | |
9002 | /* Record last valid frame pointer offset. */ |
9003 | m->fs.sp_realigned_fp_last = frame.reg_save_offset; |
9004 | |
9005 | /* The computation of the size of the re-aligned stack frame means |
9006 | that we must allocate the size of the register save area before |
9007 | performing the actual alignment. Otherwise we cannot guarantee |
9008 | that there's enough storage above the realignment point. */ |
9009 | allocate = frame.reg_save_offset - m->fs.sp_offset |
9010 | + frame.stack_realign_allocate; |
9011 | if (allocate) |
9012 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9013 | GEN_INT (-allocate), style: -1, set_cfa: false); |
9014 | |
9015 | /* Align the stack. */ |
9016 | emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes))); |
9017 | m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); |
9018 | m->fs.sp_realigned_offset = m->fs.sp_offset |
9019 | - frame.stack_realign_allocate; |
9020 | /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. |
9021 | Beyond this point, stack access should be done via choose_baseaddr or |
9022 | by using sp_valid_at and fp_valid_at to determine the correct base |
9023 | register. Henceforth, any CFA offset should be thought of as logical |
9024 | and not physical. */ |
9025 | gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); |
9026 | gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); |
9027 | m->fs.sp_realigned = true; |
9028 | |
9029 | /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which |
9030 | is needed to describe where a register is saved using a realigned |
9031 | stack pointer, so we need to invalidate the stack pointer for that |
9032 | target. */ |
9033 | if (TARGET_SEH) |
9034 | m->fs.sp_valid = false; |
9035 | |
9036 | /* If SP offset is non-immediate after allocation of the stack frame, |
9037 | then emit SSE saves or stub call prior to allocating the rest of the |
9038 | stack frame. This is less efficient for the out-of-line stub because |
9039 | we can't combine allocations across the call barrier, but it's better |
9040 | than using a scratch register. */ |
9041 | else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset |
9042 | - m->fs.sp_realigned_offset), |
9043 | Pmode)) |
9044 | { |
9045 | if (!sse_registers_saved) |
9046 | { |
9047 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9048 | sse_registers_saved = true; |
9049 | } |
9050 | else if (save_stub_call_needed) |
9051 | { |
9052 | ix86_emit_outlined_ms2sysv_save (frame); |
9053 | save_stub_call_needed = false; |
9054 | } |
9055 | } |
9056 | } |
9057 | |
9058 | allocate = frame.stack_pointer_offset - m->fs.sp_offset; |
9059 | |
9060 | if (flag_stack_usage_info) |
9061 | { |
9062 | /* We start to count from ARG_POINTER. */ |
9063 | HOST_WIDE_INT stack_size = frame.stack_pointer_offset; |
9064 | |
9065 | /* If it was realigned, take into account the fake frame. */ |
9066 | if (stack_realign_drap) |
9067 | { |
9068 | if (ix86_static_chain_on_stack) |
9069 | stack_size += UNITS_PER_WORD; |
9070 | |
9071 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
9072 | stack_size += UNITS_PER_WORD; |
9073 | |
9074 | /* This over-estimates by 1 minimal-stack-alignment-unit but |
9075 | mitigates that by counting in the new return address slot. */ |
9076 | current_function_dynamic_stack_size |
9077 | += crtl->stack_alignment_needed / BITS_PER_UNIT; |
9078 | } |
9079 | |
9080 | current_function_static_stack_size = stack_size; |
9081 | } |
9082 | |
9083 | /* On SEH target with very large frame size, allocate an area to save |
9084 | SSE registers (as the very large allocation won't be described). */ |
9085 | if (TARGET_SEH |
9086 | && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE |
9087 | && !sse_registers_saved) |
9088 | { |
9089 | HOST_WIDE_INT sse_size |
9090 | = frame.sse_reg_save_offset - frame.reg_save_offset; |
9091 | |
9092 | gcc_assert (int_registers_saved); |
9093 | |
9094 | /* No need to do stack checking as the area will be immediately |
9095 | written. */ |
9096 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9097 | GEN_INT (-sse_size), style: -1, |
9098 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9099 | allocate -= sse_size; |
9100 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9101 | sse_registers_saved = true; |
9102 | } |
9103 | |
9104 | /* If stack clash protection is requested, then probe the stack, unless it |
9105 | is already probed on the target. */ |
9106 | if (allocate >= 0 |
9107 | && flag_stack_clash_protection |
9108 | && !ix86_target_stack_probe ()) |
9109 | { |
9110 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false); |
9111 | allocate = 0; |
9112 | } |
9113 | |
9114 | /* The stack has already been decremented by the instruction calling us |
9115 | so probe if the size is non-negative to preserve the protection area. */ |
9116 | else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
9117 | { |
9118 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
9119 | |
9120 | if (STACK_CHECK_MOVING_SP) |
9121 | { |
9122 | if (crtl->is_leaf |
9123 | && !cfun->calls_alloca |
9124 | && allocate <= probe_interval) |
9125 | ; |
9126 | |
9127 | else |
9128 | { |
9129 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true); |
9130 | allocate = 0; |
9131 | } |
9132 | } |
9133 | |
9134 | else |
9135 | { |
9136 | HOST_WIDE_INT size = allocate; |
9137 | |
9138 | if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) |
9139 | size = 0x80000000 - get_stack_check_protect () - 1; |
9140 | |
9141 | if (TARGET_STACK_PROBE) |
9142 | { |
9143 | if (crtl->is_leaf && !cfun->calls_alloca) |
9144 | { |
9145 | if (size > probe_interval) |
9146 | ix86_emit_probe_stack_range (first: 0, size, int_registers_saved); |
9147 | } |
9148 | else |
9149 | ix86_emit_probe_stack_range (first: 0, |
9150 | size: size + get_stack_check_protect (), |
9151 | int_registers_saved); |
9152 | } |
9153 | else |
9154 | { |
9155 | if (crtl->is_leaf && !cfun->calls_alloca) |
9156 | { |
9157 | if (size > probe_interval |
9158 | && size > get_stack_check_protect ()) |
9159 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), |
9160 | size: (size |
9161 | - get_stack_check_protect ()), |
9162 | int_registers_saved); |
9163 | } |
9164 | else |
9165 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), size, |
9166 | int_registers_saved); |
9167 | } |
9168 | } |
9169 | } |
9170 | |
9171 | if (allocate == 0) |
9172 | ; |
9173 | else if (!ix86_target_stack_probe () |
9174 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT) |
9175 | { |
9176 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9177 | GEN_INT (-allocate), style: -1, |
9178 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9179 | } |
9180 | else |
9181 | { |
9182 | rtx eax = gen_rtx_REG (Pmode, AX_REG); |
9183 | rtx r10 = NULL; |
9184 | const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); |
9185 | bool eax_live = ix86_eax_live_at_start_p (); |
9186 | bool r10_live = false; |
9187 | |
9188 | if (TARGET_64BIT) |
9189 | r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); |
9190 | |
9191 | if (eax_live) |
9192 | { |
9193 | insn = emit_insn (gen_push (arg: eax)); |
9194 | allocate -= UNITS_PER_WORD; |
9195 | /* Note that SEH directives need to continue tracking the stack |
9196 | pointer even after the frame pointer has been set up. */ |
9197 | if (sp_is_cfa_reg || TARGET_SEH) |
9198 | { |
9199 | if (sp_is_cfa_reg) |
9200 | m->fs.cfa_offset += UNITS_PER_WORD; |
9201 | RTX_FRAME_RELATED_P (insn) = 1; |
9202 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9203 | gen_rtx_SET (stack_pointer_rtx, |
9204 | plus_constant (Pmode, |
9205 | stack_pointer_rtx, |
9206 | -UNITS_PER_WORD))); |
9207 | } |
9208 | } |
9209 | |
9210 | if (r10_live) |
9211 | { |
9212 | r10 = gen_rtx_REG (Pmode, R10_REG); |
9213 | insn = emit_insn (gen_push (arg: r10)); |
9214 | allocate -= UNITS_PER_WORD; |
9215 | if (sp_is_cfa_reg || TARGET_SEH) |
9216 | { |
9217 | if (sp_is_cfa_reg) |
9218 | m->fs.cfa_offset += UNITS_PER_WORD; |
9219 | RTX_FRAME_RELATED_P (insn) = 1; |
9220 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9221 | gen_rtx_SET (stack_pointer_rtx, |
9222 | plus_constant (Pmode, |
9223 | stack_pointer_rtx, |
9224 | -UNITS_PER_WORD))); |
9225 | } |
9226 | } |
9227 | |
9228 | emit_move_insn (eax, GEN_INT (allocate)); |
9229 | emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax)); |
9230 | |
9231 | /* Use the fact that AX still contains ALLOCATE. */ |
9232 | insn = emit_insn (gen_pro_epilogue_adjust_stack_sub |
9233 | (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax)); |
9234 | |
9235 | if (sp_is_cfa_reg || TARGET_SEH) |
9236 | { |
9237 | if (sp_is_cfa_reg) |
9238 | m->fs.cfa_offset += allocate; |
9239 | RTX_FRAME_RELATED_P (insn) = 1; |
9240 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9241 | gen_rtx_SET (stack_pointer_rtx, |
9242 | plus_constant (Pmode, stack_pointer_rtx, |
9243 | -allocate))); |
9244 | } |
9245 | m->fs.sp_offset += allocate; |
9246 | |
9247 | /* Use stack_pointer_rtx for relative addressing so that code works for |
9248 | realigned stack. But this means that we need a blockage to prevent |
9249 | stores based on the frame pointer from being scheduled before. */ |
9250 | if (r10_live && eax_live) |
9251 | { |
9252 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9253 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
9254 | gen_frame_mem (word_mode, t)); |
9255 | t = plus_constant (Pmode, t, UNITS_PER_WORD); |
9256 | emit_move_insn (gen_rtx_REG (word_mode, AX_REG), |
9257 | gen_frame_mem (word_mode, t)); |
9258 | emit_insn (gen_memory_blockage ()); |
9259 | } |
9260 | else if (eax_live || r10_live) |
9261 | { |
9262 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9263 | emit_move_insn (gen_rtx_REG (word_mode, |
9264 | (eax_live ? AX_REG : R10_REG)), |
9265 | gen_frame_mem (word_mode, t)); |
9266 | emit_insn (gen_memory_blockage ()); |
9267 | } |
9268 | } |
9269 | gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); |
9270 | |
9271 | /* If we havn't already set up the frame pointer, do so now. */ |
9272 | if (frame_pointer_needed && !m->fs.fp_valid) |
9273 | { |
9274 | insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
9275 | GEN_INT (frame.stack_pointer_offset |
9276 | - frame.hard_frame_pointer_offset)); |
9277 | insn = emit_insn (insn); |
9278 | RTX_FRAME_RELATED_P (insn) = 1; |
9279 | add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); |
9280 | |
9281 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9282 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
9283 | m->fs.fp_offset = frame.hard_frame_pointer_offset; |
9284 | m->fs.fp_valid = true; |
9285 | } |
9286 | |
9287 | if (!int_registers_saved) |
9288 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
9289 | if (!sse_registers_saved) |
9290 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9291 | else if (save_stub_call_needed) |
9292 | ix86_emit_outlined_ms2sysv_save (frame); |
9293 | |
9294 | /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT |
9295 | in PROLOGUE. */ |
9296 | if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) |
9297 | { |
9298 | rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); |
9299 | insn = emit_insn (gen_set_got (pic)); |
9300 | RTX_FRAME_RELATED_P (insn) = 1; |
9301 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
9302 | emit_insn (gen_prologue_use (pic)); |
9303 | /* Deleting already emmitted SET_GOT if exist and allocated to |
9304 | REAL_PIC_OFFSET_TABLE_REGNUM. */ |
9305 | ix86_elim_entry_set_got (reg: pic); |
9306 | } |
9307 | |
9308 | if (crtl->drap_reg && !crtl->stack_realign_needed) |
9309 | { |
9310 | /* vDRAP is setup but after reload it turns out stack realign |
9311 | isn't necessary, here we will emit prologue to setup DRAP |
9312 | without stack realign adjustment */ |
9313 | t = choose_baseaddr (cfa_offset: 0, NULL); |
9314 | emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
9315 | } |
9316 | |
9317 | /* Prevent instructions from being scheduled into register save push |
9318 | sequence when access to the redzone area is done through frame pointer. |
9319 | The offset between the frame pointer and the stack pointer is calculated |
9320 | relative to the value of the stack pointer at the end of the function |
9321 | prologue, and moving instructions that access redzone area via frame |
9322 | pointer inside push sequence violates this assumption. */ |
9323 | if (frame_pointer_needed && frame.red_zone_size) |
9324 | emit_insn (gen_memory_blockage ()); |
9325 | |
9326 | /* SEH requires that the prologue end within 256 bytes of the start of |
9327 | the function. Prevent instruction schedules that would extend that. |
9328 | Further, prevent alloca modifications to the stack pointer from being |
9329 | combined with prologue modifications. */ |
9330 | if (TARGET_SEH) |
9331 | emit_insn (gen_prologue_use (stack_pointer_rtx)); |
9332 | } |
9333 | |
9334 | /* Emit code to restore REG using a POP or POPP insn. */ |
9335 | |
9336 | static void |
9337 | ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) |
9338 | { |
9339 | struct machine_function *m = cfun->machine; |
9340 | rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p)); |
9341 | |
9342 | ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset); |
9343 | m->fs.sp_offset -= UNITS_PER_WORD; |
9344 | |
9345 | if (m->fs.cfa_reg == crtl->drap_reg |
9346 | && REGNO (reg) == REGNO (crtl->drap_reg)) |
9347 | { |
9348 | /* Previously we'd represented the CFA as an expression |
9349 | like *(%ebp - 8). We've just popped that value from |
9350 | the stack, which means we need to reset the CFA to |
9351 | the drap register. This will remain until we restore |
9352 | the stack pointer. */ |
9353 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9354 | RTX_FRAME_RELATED_P (insn) = 1; |
9355 | |
9356 | /* This means that the DRAP register is valid for addressing too. */ |
9357 | m->fs.drap_valid = true; |
9358 | return; |
9359 | } |
9360 | |
9361 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9362 | { |
9363 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
9364 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9365 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9366 | RTX_FRAME_RELATED_P (insn) = 1; |
9367 | |
9368 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9369 | } |
9370 | |
9371 | /* When the frame pointer is the CFA, and we pop it, we are |
9372 | swapping back to the stack pointer as the CFA. This happens |
9373 | for stack frames that don't allocate other data, so we assume |
9374 | the stack pointer is now pointing at the return address, i.e. |
9375 | the function entry state, which makes the offset be 1 word. */ |
9376 | if (reg == hard_frame_pointer_rtx) |
9377 | { |
9378 | m->fs.fp_valid = false; |
9379 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9380 | { |
9381 | m->fs.cfa_reg = stack_pointer_rtx; |
9382 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9383 | |
9384 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9385 | plus_constant (Pmode, stack_pointer_rtx, |
9386 | m->fs.cfa_offset)); |
9387 | RTX_FRAME_RELATED_P (insn) = 1; |
9388 | } |
9389 | } |
9390 | } |
9391 | |
9392 | /* Emit code to restore REG using a POP2 insn. */ |
9393 | static void |
9394 | ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) |
9395 | { |
9396 | struct machine_function *m = cfun->machine; |
9397 | const int offset = UNITS_PER_WORD * 2; |
9398 | rtx_insn *insn; |
9399 | |
9400 | rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, |
9401 | stack_pointer_rtx)); |
9402 | |
9403 | if (ppx_p) |
9404 | insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); |
9405 | else |
9406 | insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); |
9407 | |
9408 | RTX_FRAME_RELATED_P (insn) = 1; |
9409 | |
9410 | rtx dwarf = NULL_RTX; |
9411 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf); |
9412 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf); |
9413 | REG_NOTES (insn) = dwarf; |
9414 | m->fs.sp_offset -= offset; |
9415 | |
9416 | if (m->fs.cfa_reg == crtl->drap_reg |
9417 | && (REGNO (reg1) == REGNO (crtl->drap_reg) |
9418 | || REGNO (reg2) == REGNO (crtl->drap_reg))) |
9419 | { |
9420 | /* Previously we'd represented the CFA as an expression |
9421 | like *(%ebp - 8). We've just popped that value from |
9422 | the stack, which means we need to reset the CFA to |
9423 | the drap register. This will remain until we restore |
9424 | the stack pointer. */ |
9425 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9426 | REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2); |
9427 | RTX_FRAME_RELATED_P (insn) = 1; |
9428 | |
9429 | /* This means that the DRAP register is valid for addressing too. */ |
9430 | m->fs.drap_valid = true; |
9431 | return; |
9432 | } |
9433 | |
9434 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9435 | { |
9436 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
9437 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9438 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9439 | RTX_FRAME_RELATED_P (insn) = 1; |
9440 | |
9441 | m->fs.cfa_offset -= offset; |
9442 | } |
9443 | |
9444 | /* When the frame pointer is the CFA, and we pop it, we are |
9445 | swapping back to the stack pointer as the CFA. This happens |
9446 | for stack frames that don't allocate other data, so we assume |
9447 | the stack pointer is now pointing at the return address, i.e. |
9448 | the function entry state, which makes the offset be 1 word. */ |
9449 | if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx) |
9450 | { |
9451 | m->fs.fp_valid = false; |
9452 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9453 | { |
9454 | m->fs.cfa_reg = stack_pointer_rtx; |
9455 | m->fs.cfa_offset -= offset; |
9456 | |
9457 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9458 | plus_constant (Pmode, stack_pointer_rtx, |
9459 | m->fs.cfa_offset)); |
9460 | RTX_FRAME_RELATED_P (insn) = 1; |
9461 | } |
9462 | } |
9463 | } |
9464 | |
9465 | /* Emit code to restore saved registers using POP insns. */ |
9466 | |
9467 | static void |
9468 | ix86_emit_restore_regs_using_pop (bool ppx_p) |
9469 | { |
9470 | unsigned int regno; |
9471 | |
9472 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9473 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9474 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p); |
9475 | } |
9476 | |
9477 | /* Emit code to restore saved registers using POP2 insns. */ |
9478 | |
9479 | static void |
9480 | ix86_emit_restore_regs_using_pop2 (void) |
9481 | { |
9482 | int regno; |
9483 | int regno_list[2]; |
9484 | regno_list[0] = regno_list[1] = -1; |
9485 | int loaded_regnum = 0; |
9486 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
9487 | |
9488 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9489 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9490 | { |
9491 | if (aligned) |
9492 | { |
9493 | regno_list[loaded_regnum++] = regno; |
9494 | if (loaded_regnum == 2) |
9495 | { |
9496 | gcc_assert (regno_list[0] != -1 |
9497 | && regno_list[1] != -1 |
9498 | && regno_list[0] != regno_list[1]); |
9499 | |
9500 | ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode, |
9501 | regno_list[0]), |
9502 | reg2: gen_rtx_REG (word_mode, |
9503 | regno_list[1]), |
9504 | TARGET_APX_PPX); |
9505 | loaded_regnum = 0; |
9506 | regno_list[0] = regno_list[1] = -1; |
9507 | } |
9508 | } |
9509 | else |
9510 | { |
9511 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), |
9512 | TARGET_APX_PPX); |
9513 | aligned = true; |
9514 | } |
9515 | } |
9516 | |
9517 | if (loaded_regnum == 1) |
9518 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]), |
9519 | TARGET_APX_PPX); |
9520 | } |
9521 | |
9522 | /* Emit code and notes for the LEAVE instruction. If insn is non-null, |
9523 | omits the emit and only attaches the notes. */ |
9524 | |
9525 | static void |
9526 | ix86_emit_leave (rtx_insn *insn) |
9527 | { |
9528 | struct machine_function *m = cfun->machine; |
9529 | |
9530 | if (!insn) |
9531 | insn = emit_insn (gen_leave (arg0: word_mode)); |
9532 | |
9533 | ix86_add_queued_cfa_restore_notes (insn); |
9534 | |
9535 | gcc_assert (m->fs.fp_valid); |
9536 | m->fs.sp_valid = true; |
9537 | m->fs.sp_realigned = false; |
9538 | m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; |
9539 | m->fs.fp_valid = false; |
9540 | |
9541 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9542 | { |
9543 | m->fs.cfa_reg = stack_pointer_rtx; |
9544 | m->fs.cfa_offset = m->fs.sp_offset; |
9545 | |
9546 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9547 | plus_constant (Pmode, stack_pointer_rtx, |
9548 | m->fs.sp_offset)); |
9549 | RTX_FRAME_RELATED_P (insn) = 1; |
9550 | } |
9551 | ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, |
9552 | cfa_offset: m->fs.fp_offset); |
9553 | } |
9554 | |
9555 | /* Emit code to restore saved registers using MOV insns. |
9556 | First register is restored from CFA - CFA_OFFSET. */ |
9557 | static void |
9558 | ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9559 | bool maybe_eh_return) |
9560 | { |
9561 | struct machine_function *m = cfun->machine; |
9562 | unsigned int regno; |
9563 | |
9564 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9565 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9566 | { |
9567 | rtx reg = gen_rtx_REG (word_mode, regno); |
9568 | rtx mem; |
9569 | rtx_insn *insn; |
9570 | |
9571 | mem = choose_baseaddr (cfa_offset, NULL); |
9572 | mem = gen_frame_mem (word_mode, mem); |
9573 | insn = emit_move_insn (reg, mem); |
9574 | |
9575 | if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) |
9576 | { |
9577 | /* Previously we'd represented the CFA as an expression |
9578 | like *(%ebp - 8). We've just popped that value from |
9579 | the stack, which means we need to reset the CFA to |
9580 | the drap register. This will remain until we restore |
9581 | the stack pointer. */ |
9582 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9583 | RTX_FRAME_RELATED_P (insn) = 1; |
9584 | |
9585 | /* This means that the DRAP register is valid for addressing. */ |
9586 | m->fs.drap_valid = true; |
9587 | } |
9588 | else |
9589 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9590 | |
9591 | cfa_offset -= UNITS_PER_WORD; |
9592 | } |
9593 | } |
9594 | |
9595 | /* Emit code to restore saved registers using MOV insns. |
9596 | First register is restored from CFA - CFA_OFFSET. */ |
9597 | static void |
9598 | ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9599 | bool maybe_eh_return) |
9600 | { |
9601 | unsigned int regno; |
9602 | |
9603 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9604 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9605 | { |
9606 | rtx reg = gen_rtx_REG (V4SFmode, regno); |
9607 | rtx mem; |
9608 | unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); |
9609 | |
9610 | mem = choose_baseaddr (cfa_offset, align: &align); |
9611 | mem = gen_rtx_MEM (V4SFmode, mem); |
9612 | |
9613 | /* The location aligment depends upon the base register. */ |
9614 | align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); |
9615 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
9616 | set_mem_align (mem, align); |
9617 | emit_insn (gen_rtx_SET (reg, mem)); |
9618 | |
9619 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9620 | |
9621 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
9622 | } |
9623 | } |
9624 | |
9625 | static void |
9626 | ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, |
9627 | bool use_call, int style) |
9628 | { |
9629 | struct machine_function *m = cfun->machine; |
9630 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
9631 | + m->call_ms2sysv_extra_regs; |
9632 | rtvec v; |
9633 | unsigned int elems_needed, align, i, vi = 0; |
9634 | rtx_insn *insn; |
9635 | rtx sym, tmp; |
9636 | rtx rsi = gen_rtx_REG (word_mode, SI_REG); |
9637 | rtx r10 = NULL_RTX; |
9638 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
9639 | HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); |
9640 | HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; |
9641 | rtx rsi_frame_load = NULL_RTX; |
9642 | HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; |
9643 | enum xlogue_stub stub; |
9644 | |
9645 | gcc_assert (!m->fs.fp_valid || frame_pointer_needed); |
9646 | |
9647 | /* If using a realigned stack, we should never start with padding. */ |
9648 | gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); |
9649 | |
9650 | /* Setup RSI as the stub's base pointer. */ |
9651 | align = GET_MODE_ALIGNMENT (V4SFmode); |
9652 | tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG); |
9653 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
9654 | |
9655 | emit_insn (gen_rtx_SET (rsi, tmp)); |
9656 | |
9657 | /* Get a symbol for the stub. */ |
9658 | if (frame_pointer_needed) |
9659 | stub = use_call ? XLOGUE_STUB_RESTORE_HFP |
9660 | : XLOGUE_STUB_RESTORE_HFP_TAIL; |
9661 | else |
9662 | stub = use_call ? XLOGUE_STUB_RESTORE |
9663 | : XLOGUE_STUB_RESTORE_TAIL; |
9664 | sym = xlogue.get_stub_rtx (stub); |
9665 | |
9666 | elems_needed = ncregs; |
9667 | if (use_call) |
9668 | elems_needed += 1; |
9669 | else |
9670 | elems_needed += frame_pointer_needed ? 5 : 3; |
9671 | v = rtvec_alloc (elems_needed); |
9672 | |
9673 | /* We call the epilogue stub when we need to pop incoming args or we are |
9674 | doing a sibling call as the tail. Otherwise, we will emit a jmp to the |
9675 | epilogue stub and it is the tail-call. */ |
9676 | if (use_call) |
9677 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9678 | else |
9679 | { |
9680 | RTVEC_ELT (v, vi++) = ret_rtx; |
9681 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9682 | if (frame_pointer_needed) |
9683 | { |
9684 | rtx rbp = gen_rtx_REG (DImode, BP_REG); |
9685 | gcc_assert (m->fs.fp_valid); |
9686 | gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); |
9687 | |
9688 | tmp = plus_constant (DImode, rbp, 8); |
9689 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); |
9690 | RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); |
9691 | tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); |
9692 | RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); |
9693 | } |
9694 | else |
9695 | { |
9696 | /* If no hard frame pointer, we set R10 to the SP restore value. */ |
9697 | gcc_assert (!m->fs.fp_valid); |
9698 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
9699 | gcc_assert (m->fs.sp_valid); |
9700 | |
9701 | r10 = gen_rtx_REG (DImode, R10_REG); |
9702 | tmp = plus_constant (Pmode, rsi, stub_ptr_offset); |
9703 | emit_insn (gen_rtx_SET (r10, tmp)); |
9704 | |
9705 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); |
9706 | } |
9707 | } |
9708 | |
9709 | /* Generate frame load insns and restore notes. */ |
9710 | for (i = 0; i < ncregs; ++i) |
9711 | { |
9712 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
9713 | machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; |
9714 | rtx reg, frame_load; |
9715 | |
9716 | reg = gen_rtx_REG (mode, r.regno); |
9717 | frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset); |
9718 | |
9719 | /* Save RSI frame load insn & note to add last. */ |
9720 | if (r.regno == SI_REG) |
9721 | { |
9722 | gcc_assert (!rsi_frame_load); |
9723 | rsi_frame_load = frame_load; |
9724 | rsi_restore_offset = r.offset; |
9725 | } |
9726 | else |
9727 | { |
9728 | RTVEC_ELT (v, vi++) = frame_load; |
9729 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset); |
9730 | } |
9731 | } |
9732 | |
9733 | /* Add RSI frame load & restore note at the end. */ |
9734 | gcc_assert (rsi_frame_load); |
9735 | gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); |
9736 | RTVEC_ELT (v, vi++) = rsi_frame_load; |
9737 | ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG), |
9738 | cfa_offset: rsi_restore_offset); |
9739 | |
9740 | /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ |
9741 | if (!use_call && !frame_pointer_needed) |
9742 | { |
9743 | gcc_assert (m->fs.sp_valid); |
9744 | gcc_assert (!m->fs.sp_realigned); |
9745 | |
9746 | /* At this point, R10 should point to frame.stack_realign_offset. */ |
9747 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9748 | m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; |
9749 | m->fs.sp_offset = frame.stack_realign_offset; |
9750 | } |
9751 | |
9752 | gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); |
9753 | tmp = gen_rtx_PARALLEL (VOIDmode, v); |
9754 | if (use_call) |
9755 | insn = emit_insn (tmp); |
9756 | else |
9757 | { |
9758 | insn = emit_jump_insn (tmp); |
9759 | JUMP_LABEL (insn) = ret_rtx; |
9760 | |
9761 | if (frame_pointer_needed) |
9762 | ix86_emit_leave (insn); |
9763 | else |
9764 | { |
9765 | /* Need CFA adjust note. */ |
9766 | tmp = gen_rtx_SET (stack_pointer_rtx, r10); |
9767 | add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); |
9768 | } |
9769 | } |
9770 | |
9771 | RTX_FRAME_RELATED_P (insn) = true; |
9772 | ix86_add_queued_cfa_restore_notes (insn); |
9773 | |
9774 | /* If we're not doing a tail-call, we need to adjust the stack. */ |
9775 | if (use_call && m->fs.sp_valid) |
9776 | { |
9777 | HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; |
9778 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9779 | GEN_INT (dealloc), style, |
9780 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9781 | } |
9782 | } |
9783 | |
9784 | /* Restore function stack, frame, and registers. */ |
9785 | |
9786 | void |
9787 | ix86_expand_epilogue (int style) |
9788 | { |
9789 | struct machine_function *m = cfun->machine; |
9790 | struct machine_frame_state frame_state_save = m->fs; |
9791 | bool restore_regs_via_mov; |
9792 | bool using_drap; |
9793 | bool restore_stub_is_tail = false; |
9794 | |
9795 | if (ix86_function_naked (fn: current_function_decl)) |
9796 | { |
9797 | /* The program should not reach this point. */ |
9798 | emit_insn (gen_ud2 ()); |
9799 | return; |
9800 | } |
9801 | |
9802 | ix86_finalize_stack_frame_flags (); |
9803 | const struct ix86_frame &frame = cfun->machine->frame; |
9804 | |
9805 | m->fs.sp_realigned = stack_realign_fp; |
9806 | m->fs.sp_valid = stack_realign_fp |
9807 | || !frame_pointer_needed |
9808 | || crtl->sp_is_unchanging; |
9809 | gcc_assert (!m->fs.sp_valid |
9810 | || m->fs.sp_offset == frame.stack_pointer_offset); |
9811 | |
9812 | /* The FP must be valid if the frame pointer is present. */ |
9813 | gcc_assert (frame_pointer_needed == m->fs.fp_valid); |
9814 | gcc_assert (!m->fs.fp_valid |
9815 | || m->fs.fp_offset == frame.hard_frame_pointer_offset); |
9816 | |
9817 | /* We must have *some* valid pointer to the stack frame. */ |
9818 | gcc_assert (m->fs.sp_valid || m->fs.fp_valid); |
9819 | |
9820 | /* The DRAP is never valid at this point. */ |
9821 | gcc_assert (!m->fs.drap_valid); |
9822 | |
9823 | /* See the comment about red zone and frame |
9824 | pointer usage in ix86_expand_prologue. */ |
9825 | if (frame_pointer_needed && frame.red_zone_size) |
9826 | emit_insn (gen_memory_blockage ()); |
9827 | |
9828 | using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
9829 | gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); |
9830 | |
9831 | /* Determine the CFA offset of the end of the red-zone. */ |
9832 | m->fs.red_zone_offset = 0; |
9833 | if (ix86_using_red_zone () && crtl->args.pops_args < 65536) |
9834 | { |
9835 | /* The red-zone begins below return address and error code in |
9836 | exception handler. */ |
9837 | m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; |
9838 | |
9839 | /* When the register save area is in the aligned portion of |
9840 | the stack, determine the maximum runtime displacement that |
9841 | matches up with the aligned frame. */ |
9842 | if (stack_realign_drap) |
9843 | m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT |
9844 | + UNITS_PER_WORD); |
9845 | } |
9846 | |
9847 | HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; |
9848 | |
9849 | /* Special care must be taken for the normal return case of a function |
9850 | using eh_return: the eax and edx registers are marked as saved, but |
9851 | not restored along this path. Adjust the save location to match. */ |
9852 | if (crtl->calls_eh_return && style != 2) |
9853 | reg_save_offset -= 2 * UNITS_PER_WORD; |
9854 | |
9855 | /* EH_RETURN requires the use of moves to function properly. */ |
9856 | if (crtl->calls_eh_return) |
9857 | restore_regs_via_mov = true; |
9858 | /* SEH requires the use of pops to identify the epilogue. */ |
9859 | else if (TARGET_SEH) |
9860 | restore_regs_via_mov = false; |
9861 | /* If we're only restoring one register and sp cannot be used then |
9862 | using a move instruction to restore the register since it's |
9863 | less work than reloading sp and popping the register. */ |
9864 | else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1) |
9865 | restore_regs_via_mov = true; |
9866 | else if (TARGET_EPILOGUE_USING_MOVE |
9867 | && cfun->machine->use_fast_prologue_epilogue |
9868 | && (frame.nregs > 1 |
9869 | || m->fs.sp_offset != reg_save_offset)) |
9870 | restore_regs_via_mov = true; |
9871 | else if (frame_pointer_needed |
9872 | && !frame.nregs |
9873 | && m->fs.sp_offset != reg_save_offset) |
9874 | restore_regs_via_mov = true; |
9875 | else if (frame_pointer_needed |
9876 | && TARGET_USE_LEAVE |
9877 | && cfun->machine->use_fast_prologue_epilogue |
9878 | && frame.nregs == 1) |
9879 | restore_regs_via_mov = true; |
9880 | else |
9881 | restore_regs_via_mov = false; |
9882 | |
9883 | if (restore_regs_via_mov || frame.nsseregs) |
9884 | { |
9885 | /* Ensure that the entire register save area is addressable via |
9886 | the stack pointer, if we will restore SSE regs via sp. */ |
9887 | if (TARGET_64BIT |
9888 | && m->fs.sp_offset > 0x7fffffff |
9889 | && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1) |
9890 | && (frame.nsseregs + frame.nregs) != 0) |
9891 | { |
9892 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9893 | GEN_INT (m->fs.sp_offset |
9894 | - frame.sse_reg_save_offset), |
9895 | style, |
9896 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9897 | } |
9898 | } |
9899 | |
9900 | /* If there are any SSE registers to restore, then we have to do it |
9901 | via moves, since there's obviously no pop for SSE regs. */ |
9902 | if (frame.nsseregs) |
9903 | ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset, |
9904 | maybe_eh_return: style == 2); |
9905 | |
9906 | if (m->call_ms2sysv) |
9907 | { |
9908 | int pop_incoming_args = crtl->args.pops_args && crtl->args.size; |
9909 | |
9910 | /* We cannot use a tail-call for the stub if: |
9911 | 1. We have to pop incoming args, |
9912 | 2. We have additional int regs to restore, or |
9913 | 3. A sibling call will be the tail-call, or |
9914 | 4. We are emitting an eh_return_internal epilogue. |
9915 | |
9916 | TODO: Item 4 has not yet tested! |
9917 | |
9918 | If any of the above are true, we will call the stub rather than |
9919 | jump to it. */ |
9920 | restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); |
9921 | ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style); |
9922 | } |
9923 | |
9924 | /* If using out-of-line stub that is a tail-call, then...*/ |
9925 | if (m->call_ms2sysv && restore_stub_is_tail) |
9926 | { |
9927 | /* TODO: parinoid tests. (remove eventually) */ |
9928 | gcc_assert (m->fs.sp_valid); |
9929 | gcc_assert (!m->fs.sp_realigned); |
9930 | gcc_assert (!m->fs.fp_valid); |
9931 | gcc_assert (!m->fs.realigned); |
9932 | gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); |
9933 | gcc_assert (!crtl->drap_reg); |
9934 | gcc_assert (!frame.nregs); |
9935 | } |
9936 | else if (restore_regs_via_mov) |
9937 | { |
9938 | rtx t; |
9939 | |
9940 | if (frame.nregs) |
9941 | ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2); |
9942 | |
9943 | /* eh_return epilogues need %ecx added to the stack pointer. */ |
9944 | if (style == 2) |
9945 | { |
9946 | rtx sa = EH_RETURN_STACKADJ_RTX; |
9947 | rtx_insn *insn; |
9948 | |
9949 | /* Stack realignment doesn't work with eh_return. */ |
9950 | if (crtl->stack_realign_needed) |
9951 | sorry ("Stack realignment not supported with " |
9952 | "%<__builtin_eh_return%>" ); |
9953 | |
9954 | /* regparm nested functions don't work with eh_return. */ |
9955 | if (ix86_static_chain_on_stack) |
9956 | sorry ("regparm nested function not supported with " |
9957 | "%<__builtin_eh_return%>" ); |
9958 | |
9959 | if (frame_pointer_needed) |
9960 | { |
9961 | t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
9962 | t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); |
9963 | emit_insn (gen_rtx_SET (sa, t)); |
9964 | |
9965 | /* NB: eh_return epilogues must restore the frame pointer |
9966 | in word_mode since the upper 32 bits of RBP register |
9967 | can have any values. */ |
9968 | t = gen_frame_mem (word_mode, hard_frame_pointer_rtx); |
9969 | rtx frame_reg = gen_rtx_REG (word_mode, |
9970 | HARD_FRAME_POINTER_REGNUM); |
9971 | insn = emit_move_insn (frame_reg, t); |
9972 | |
9973 | /* Note that we use SA as a temporary CFA, as the return |
9974 | address is at the proper place relative to it. We |
9975 | pretend this happens at the FP restore insn because |
9976 | prior to this insn the FP would be stored at the wrong |
9977 | offset relative to SA, and after this insn we have no |
9978 | other reasonable register to use for the CFA. We don't |
9979 | bother resetting the CFA to the SP for the duration of |
9980 | the return insn, unless the control flow instrumentation |
9981 | is done. In this case the SP is used later and we have |
9982 | to reset CFA to SP. */ |
9983 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9984 | plus_constant (Pmode, sa, UNITS_PER_WORD)); |
9985 | ix86_add_queued_cfa_restore_notes (insn); |
9986 | add_reg_note (insn, REG_CFA_RESTORE, frame_reg); |
9987 | RTX_FRAME_RELATED_P (insn) = 1; |
9988 | |
9989 | m->fs.cfa_reg = sa; |
9990 | m->fs.cfa_offset = UNITS_PER_WORD; |
9991 | m->fs.fp_valid = false; |
9992 | |
9993 | pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa, |
9994 | const0_rtx, style, |
9995 | flag_cf_protection); |
9996 | } |
9997 | else |
9998 | { |
9999 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
10000 | t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); |
10001 | insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); |
10002 | ix86_add_queued_cfa_restore_notes (insn); |
10003 | |
10004 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
10005 | if (m->fs.cfa_offset != UNITS_PER_WORD) |
10006 | { |
10007 | m->fs.cfa_offset = UNITS_PER_WORD; |
10008 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10009 | plus_constant (Pmode, stack_pointer_rtx, |
10010 | UNITS_PER_WORD)); |
10011 | RTX_FRAME_RELATED_P (insn) = 1; |
10012 | } |
10013 | } |
10014 | m->fs.sp_offset = UNITS_PER_WORD; |
10015 | m->fs.sp_valid = true; |
10016 | m->fs.sp_realigned = false; |
10017 | } |
10018 | } |
10019 | else |
10020 | { |
10021 | /* SEH requires that the function end with (1) a stack adjustment |
10022 | if necessary, (2) a sequence of pops, and (3) a return or |
10023 | jump instruction. Prevent insns from the function body from |
10024 | being scheduled into this sequence. */ |
10025 | if (TARGET_SEH) |
10026 | { |
10027 | /* Prevent a catch region from being adjacent to the standard |
10028 | epilogue sequence. Unfortunately neither crtl->uses_eh_lsda |
10029 | nor several other flags that would be interesting to test are |
10030 | set up yet. */ |
10031 | if (flag_non_call_exceptions) |
10032 | emit_insn (gen_nops (const1_rtx)); |
10033 | else |
10034 | emit_insn (gen_blockage ()); |
10035 | } |
10036 | |
10037 | /* First step is to deallocate the stack frame so that we can |
10038 | pop the registers. If the stack pointer was realigned, it needs |
10039 | to be restored now. Also do it on SEH target for very large |
10040 | frame as the emitted instructions aren't allowed by the ABI |
10041 | in epilogues. */ |
10042 | if (!m->fs.sp_valid || m->fs.sp_realigned |
10043 | || (TARGET_SEH |
10044 | && (m->fs.sp_offset - reg_save_offset |
10045 | >= SEH_MAX_FRAME_SIZE))) |
10046 | { |
10047 | pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, |
10048 | GEN_INT (m->fs.fp_offset |
10049 | - reg_save_offset), |
10050 | style, set_cfa: false); |
10051 | } |
10052 | else if (m->fs.sp_offset != reg_save_offset) |
10053 | { |
10054 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10055 | GEN_INT (m->fs.sp_offset |
10056 | - reg_save_offset), |
10057 | style, |
10058 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
10059 | } |
10060 | |
10061 | if (TARGET_APX_PUSH2POP2 |
10062 | && ix86_can_use_push2pop2 () |
10063 | && m->func_type == TYPE_NORMAL) |
10064 | ix86_emit_restore_regs_using_pop2 (); |
10065 | else |
10066 | ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); |
10067 | } |
10068 | |
10069 | /* If we used a stack pointer and haven't already got rid of it, |
10070 | then do so now. */ |
10071 | if (m->fs.fp_valid) |
10072 | { |
10073 | /* If the stack pointer is valid and pointing at the frame |
10074 | pointer store address, then we only need a pop. */ |
10075 | if (sp_valid_at (cfa_offset: frame.hfp_save_offset) |
10076 | && m->fs.sp_offset == frame.hfp_save_offset) |
10077 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
10078 | /* Leave results in shorter dependency chains on CPUs that are |
10079 | able to grok it fast. */ |
10080 | else if (TARGET_USE_LEAVE |
10081 | || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) |
10082 | || !cfun->machine->use_fast_prologue_epilogue) |
10083 | ix86_emit_leave (NULL); |
10084 | else |
10085 | { |
10086 | pro_epilogue_adjust_stack (stack_pointer_rtx, |
10087 | hard_frame_pointer_rtx, |
10088 | const0_rtx, style, set_cfa: !using_drap); |
10089 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
10090 | } |
10091 | } |
10092 | |
10093 | if (using_drap) |
10094 | { |
10095 | int param_ptr_offset = UNITS_PER_WORD; |
10096 | rtx_insn *insn; |
10097 | |
10098 | gcc_assert (stack_realign_drap); |
10099 | |
10100 | if (ix86_static_chain_on_stack) |
10101 | param_ptr_offset += UNITS_PER_WORD; |
10102 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
10103 | param_ptr_offset += UNITS_PER_WORD; |
10104 | |
10105 | insn = emit_insn (gen_rtx_SET |
10106 | (stack_pointer_rtx, |
10107 | plus_constant (Pmode, crtl->drap_reg, |
10108 | -param_ptr_offset))); |
10109 | m->fs.cfa_reg = stack_pointer_rtx; |
10110 | m->fs.cfa_offset = param_ptr_offset; |
10111 | m->fs.sp_offset = param_ptr_offset; |
10112 | m->fs.realigned = false; |
10113 | |
10114 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10115 | plus_constant (Pmode, stack_pointer_rtx, |
10116 | param_ptr_offset)); |
10117 | RTX_FRAME_RELATED_P (insn) = 1; |
10118 | |
10119 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
10120 | ix86_emit_restore_reg_using_pop (crtl->drap_reg); |
10121 | } |
10122 | |
10123 | /* At this point the stack pointer must be valid, and we must have |
10124 | restored all of the registers. We may not have deallocated the |
10125 | entire stack frame. We've delayed this until now because it may |
10126 | be possible to merge the local stack deallocation with the |
10127 | deallocation forced by ix86_static_chain_on_stack. */ |
10128 | gcc_assert (m->fs.sp_valid); |
10129 | gcc_assert (!m->fs.sp_realigned); |
10130 | gcc_assert (!m->fs.fp_valid); |
10131 | gcc_assert (!m->fs.realigned); |
10132 | if (m->fs.sp_offset != UNITS_PER_WORD) |
10133 | { |
10134 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10135 | GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), |
10136 | style, set_cfa: true); |
10137 | } |
10138 | else |
10139 | ix86_add_queued_cfa_restore_notes (insn: get_last_insn ()); |
10140 | |
10141 | /* Sibcall epilogues don't want a return instruction. */ |
10142 | if (style == 0) |
10143 | { |
10144 | m->fs = frame_state_save; |
10145 | return; |
10146 | } |
10147 | |
10148 | if (cfun->machine->func_type != TYPE_NORMAL) |
10149 | emit_jump_insn (gen_interrupt_return ()); |
10150 | else if (crtl->args.pops_args && crtl->args.size) |
10151 | { |
10152 | rtx popc = GEN_INT (crtl->args.pops_args); |
10153 | |
10154 | /* i386 can only pop 64K bytes. If asked to pop more, pop return |
10155 | address, do explicit add, and jump indirectly to the caller. */ |
10156 | |
10157 | if (crtl->args.pops_args >= 65536) |
10158 | { |
10159 | rtx ecx = gen_rtx_REG (SImode, CX_REG); |
10160 | rtx_insn *insn; |
10161 | |
10162 | /* There is no "pascal" calling convention in any 64bit ABI. */ |
10163 | gcc_assert (!TARGET_64BIT); |
10164 | |
10165 | insn = emit_insn (gen_pop (arg: ecx)); |
10166 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10167 | m->fs.sp_offset -= UNITS_PER_WORD; |
10168 | |
10169 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10170 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10171 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10172 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10173 | RTX_FRAME_RELATED_P (insn) = 1; |
10174 | |
10175 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10176 | offset: popc, style: -1, set_cfa: true); |
10177 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10178 | } |
10179 | else |
10180 | emit_jump_insn (gen_simple_return_pop_internal (popc)); |
10181 | } |
10182 | else if (!m->call_ms2sysv || !restore_stub_is_tail) |
10183 | { |
10184 | /* In case of return from EH a simple return cannot be used |
10185 | as a return address will be compared with a shadow stack |
10186 | return address. Use indirect jump instead. */ |
10187 | if (style == 2 && flag_cf_protection) |
10188 | { |
10189 | /* Register used in indirect jump must be in word_mode. But |
10190 | Pmode may not be the same as word_mode for x32. */ |
10191 | rtx ecx = gen_rtx_REG (word_mode, CX_REG); |
10192 | rtx_insn *insn; |
10193 | |
10194 | insn = emit_insn (gen_pop (arg: ecx)); |
10195 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10196 | m->fs.sp_offset -= UNITS_PER_WORD; |
10197 | |
10198 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10199 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10200 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10201 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10202 | RTX_FRAME_RELATED_P (insn) = 1; |
10203 | |
10204 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10205 | } |
10206 | else |
10207 | emit_jump_insn (gen_simple_return_internal ()); |
10208 | } |
10209 | |
10210 | /* Restore the state back to the state from the prologue, |
10211 | so that it's correct for the next epilogue. */ |
10212 | m->fs = frame_state_save; |
10213 | } |
10214 | |
10215 | /* Reset from the function's potential modifications. */ |
10216 | |
10217 | static void |
10218 | ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) |
10219 | { |
10220 | if (pic_offset_table_rtx |
10221 | && !ix86_use_pseudo_pic_reg ()) |
10222 | SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); |
10223 | |
10224 | if (TARGET_MACHO) |
10225 | { |
10226 | rtx_insn *insn = get_last_insn (); |
10227 | rtx_insn *deleted_debug_label = NULL; |
10228 | |
10229 | /* Mach-O doesn't support labels at the end of objects, so if |
10230 | it looks like we might want one, take special action. |
10231 | First, collect any sequence of deleted debug labels. */ |
10232 | while (insn |
10233 | && NOTE_P (insn) |
10234 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) |
10235 | { |
10236 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL |
10237 | notes only, instead set their CODE_LABEL_NUMBER to -1, |
10238 | otherwise there would be code generation differences |
10239 | in between -g and -g0. */ |
10240 | if (NOTE_P (insn) && NOTE_KIND (insn) |
10241 | == NOTE_INSN_DELETED_DEBUG_LABEL) |
10242 | deleted_debug_label = insn; |
10243 | insn = PREV_INSN (insn); |
10244 | } |
10245 | |
10246 | /* If we have: |
10247 | label: |
10248 | barrier |
10249 | then this needs to be detected, so skip past the barrier. */ |
10250 | |
10251 | if (insn && BARRIER_P (insn)) |
10252 | insn = PREV_INSN (insn); |
10253 | |
10254 | /* Up to now we've only seen notes or barriers. */ |
10255 | if (insn) |
10256 | { |
10257 | if (LABEL_P (insn) |
10258 | || (NOTE_P (insn) |
10259 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) |
10260 | /* Trailing label. */ |
10261 | fputs (s: "\tnop\n" , stream: file); |
10262 | else if (cfun && ! cfun->is_thunk) |
10263 | { |
10264 | /* See if we have a completely empty function body, skipping |
10265 | the special case of the picbase thunk emitted as asm. */ |
10266 | while (insn && ! INSN_P (insn)) |
10267 | insn = PREV_INSN (insn); |
10268 | /* If we don't find any insns, we've got an empty function body; |
10269 | I.e. completely empty - without a return or branch. This is |
10270 | taken as the case where a function body has been removed |
10271 | because it contains an inline __builtin_unreachable(). GCC |
10272 | declares that reaching __builtin_unreachable() means UB so |
10273 | we're not obliged to do anything special; however, we want |
10274 | non-zero-sized function bodies. To meet this, and help the |
10275 | user out, let's trap the case. */ |
10276 | if (insn == NULL) |
10277 | fputs (s: "\tud2\n" , stream: file); |
10278 | } |
10279 | } |
10280 | else if (deleted_debug_label) |
10281 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) |
10282 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
10283 | CODE_LABEL_NUMBER (insn) = -1; |
10284 | } |
10285 | } |
10286 | |
10287 | /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */ |
10288 | |
10289 | void |
10290 | ix86_print_patchable_function_entry (FILE *file, |
10291 | unsigned HOST_WIDE_INT patch_area_size, |
10292 | bool record_p) |
10293 | { |
10294 | if (cfun->machine->function_label_emitted) |
10295 | { |
10296 | /* NB: When ix86_print_patchable_function_entry is called after |
10297 | function table has been emitted, we have inserted or queued |
10298 | a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper |
10299 | place. There is nothing to do here. */ |
10300 | return; |
10301 | } |
10302 | |
10303 | default_print_patchable_function_entry (file, patch_area_size, |
10304 | record_p); |
10305 | } |
10306 | |
10307 | /* Output patchable area. NB: default_print_patchable_function_entry |
10308 | isn't available in i386.md. */ |
10309 | |
10310 | void |
10311 | ix86_output_patchable_area (unsigned int patch_area_size, |
10312 | bool record_p) |
10313 | { |
10314 | default_print_patchable_function_entry (asm_out_file, |
10315 | patch_area_size, |
10316 | record_p); |
10317 | } |
10318 | |
10319 | /* Return a scratch register to use in the split stack prologue. The |
10320 | split stack prologue is used for -fsplit-stack. It is the first |
10321 | instructions in the function, even before the regular prologue. |
10322 | The scratch register can be any caller-saved register which is not |
10323 | used for parameters or for the static chain. */ |
10324 | |
10325 | static unsigned int |
10326 | split_stack_prologue_scratch_regno (void) |
10327 | { |
10328 | if (TARGET_64BIT) |
10329 | return R11_REG; |
10330 | else |
10331 | { |
10332 | bool is_fastcall, is_thiscall; |
10333 | int regparm; |
10334 | |
10335 | is_fastcall = (lookup_attribute (attr_name: "fastcall" , |
10336 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10337 | != NULL); |
10338 | is_thiscall = (lookup_attribute (attr_name: "thiscall" , |
10339 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10340 | != NULL); |
10341 | regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); |
10342 | |
10343 | if (is_fastcall) |
10344 | { |
10345 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10346 | { |
10347 | sorry ("%<-fsplit-stack%> does not support fastcall with " |
10348 | "nested function" ); |
10349 | return INVALID_REGNUM; |
10350 | } |
10351 | return AX_REG; |
10352 | } |
10353 | else if (is_thiscall) |
10354 | { |
10355 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10356 | return DX_REG; |
10357 | return AX_REG; |
10358 | } |
10359 | else if (regparm < 3) |
10360 | { |
10361 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10362 | return CX_REG; |
10363 | else |
10364 | { |
10365 | if (regparm >= 2) |
10366 | { |
10367 | sorry ("%<-fsplit-stack%> does not support 2 register " |
10368 | "parameters for a nested function" ); |
10369 | return INVALID_REGNUM; |
10370 | } |
10371 | return DX_REG; |
10372 | } |
10373 | } |
10374 | else |
10375 | { |
10376 | /* FIXME: We could make this work by pushing a register |
10377 | around the addition and comparison. */ |
10378 | sorry ("%<-fsplit-stack%> does not support 3 register parameters" ); |
10379 | return INVALID_REGNUM; |
10380 | } |
10381 | } |
10382 | } |
10383 | |
10384 | /* A SYMBOL_REF for the function which allocates new stackspace for |
10385 | -fsplit-stack. */ |
10386 | |
10387 | static GTY(()) rtx split_stack_fn; |
10388 | |
10389 | /* A SYMBOL_REF for the more stack function when using the large |
10390 | model. */ |
10391 | |
10392 | static GTY(()) rtx split_stack_fn_large; |
10393 | |
10394 | /* Return location of the stack guard value in the TLS block. */ |
10395 | |
10396 | rtx |
10397 | ix86_split_stack_guard (void) |
10398 | { |
10399 | int offset; |
10400 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
10401 | rtx r; |
10402 | |
10403 | gcc_assert (flag_split_stack); |
10404 | |
10405 | #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET |
10406 | offset = TARGET_THREAD_SPLIT_STACK_OFFSET; |
10407 | #else |
10408 | gcc_unreachable (); |
10409 | #endif |
10410 | |
10411 | r = GEN_INT (offset); |
10412 | r = gen_const_mem (Pmode, r); |
10413 | set_mem_addr_space (r, as); |
10414 | |
10415 | return r; |
10416 | } |
10417 | |
10418 | /* Handle -fsplit-stack. These are the first instructions in the |
10419 | function, even before the regular prologue. */ |
10420 | |
10421 | void |
10422 | ix86_expand_split_stack_prologue (void) |
10423 | { |
10424 | HOST_WIDE_INT allocate; |
10425 | unsigned HOST_WIDE_INT args_size; |
10426 | rtx_code_label *label; |
10427 | rtx limit, current, allocate_rtx, call_fusage; |
10428 | rtx_insn *call_insn; |
10429 | unsigned int scratch_regno = INVALID_REGNUM; |
10430 | rtx scratch_reg = NULL_RTX; |
10431 | rtx_code_label *varargs_label = NULL; |
10432 | rtx fn; |
10433 | |
10434 | gcc_assert (flag_split_stack && reload_completed); |
10435 | |
10436 | ix86_finalize_stack_frame_flags (); |
10437 | struct ix86_frame &frame = cfun->machine->frame; |
10438 | allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; |
10439 | |
10440 | /* This is the label we will branch to if we have enough stack |
10441 | space. We expect the basic block reordering pass to reverse this |
10442 | branch if optimizing, so that we branch in the unlikely case. */ |
10443 | label = gen_label_rtx (); |
10444 | |
10445 | /* We need to compare the stack pointer minus the frame size with |
10446 | the stack boundary in the TCB. The stack boundary always gives |
10447 | us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we |
10448 | can compare directly. Otherwise we need to do an addition. */ |
10449 | |
10450 | limit = ix86_split_stack_guard (); |
10451 | |
10452 | if (allocate >= SPLIT_STACK_AVAILABLE |
10453 | || flag_force_indirect_call) |
10454 | { |
10455 | scratch_regno = split_stack_prologue_scratch_regno (); |
10456 | if (scratch_regno == INVALID_REGNUM) |
10457 | return; |
10458 | } |
10459 | |
10460 | if (allocate >= SPLIT_STACK_AVAILABLE) |
10461 | { |
10462 | rtx offset; |
10463 | |
10464 | /* We need a scratch register to hold the stack pointer minus |
10465 | the required frame size. Since this is the very start of the |
10466 | function, the scratch register can be any caller-saved |
10467 | register which is not used for parameters. */ |
10468 | offset = GEN_INT (- allocate); |
10469 | |
10470 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10471 | if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) |
10472 | { |
10473 | /* We don't use gen_add in this case because it will |
10474 | want to split to lea, but when not optimizing the insn |
10475 | will not be split after this point. */ |
10476 | emit_insn (gen_rtx_SET (scratch_reg, |
10477 | gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
10478 | offset))); |
10479 | } |
10480 | else |
10481 | { |
10482 | emit_move_insn (scratch_reg, offset); |
10483 | emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx)); |
10484 | } |
10485 | current = scratch_reg; |
10486 | } |
10487 | else |
10488 | current = stack_pointer_rtx; |
10489 | |
10490 | ix86_expand_branch (GEU, current, limit, label); |
10491 | rtx_insn *jump_insn = get_last_insn (); |
10492 | JUMP_LABEL (jump_insn) = label; |
10493 | |
10494 | /* Mark the jump as very likely to be taken. */ |
10495 | add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); |
10496 | |
10497 | if (split_stack_fn == NULL_RTX) |
10498 | { |
10499 | split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack" ); |
10500 | SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; |
10501 | } |
10502 | fn = split_stack_fn; |
10503 | |
10504 | /* Get more stack space. We pass in the desired stack space and the |
10505 | size of the arguments to copy to the new stack. In 32-bit mode |
10506 | we push the parameters; __morestack will return on a new stack |
10507 | anyhow. In 64-bit mode we pass the parameters in r10 and |
10508 | r11. */ |
10509 | allocate_rtx = GEN_INT (allocate); |
10510 | args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; |
10511 | call_fusage = NULL_RTX; |
10512 | rtx pop = NULL_RTX; |
10513 | if (TARGET_64BIT) |
10514 | { |
10515 | rtx reg10, reg11; |
10516 | |
10517 | reg10 = gen_rtx_REG (DImode, R10_REG); |
10518 | reg11 = gen_rtx_REG (DImode, R11_REG); |
10519 | |
10520 | /* If this function uses a static chain, it will be in %r10. |
10521 | Preserve it across the call to __morestack. */ |
10522 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10523 | { |
10524 | rtx rax; |
10525 | |
10526 | rax = gen_rtx_REG (word_mode, AX_REG); |
10527 | emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); |
10528 | use_reg (fusage: &call_fusage, reg: rax); |
10529 | } |
10530 | |
10531 | if (flag_force_indirect_call |
10532 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
10533 | { |
10534 | HOST_WIDE_INT argval; |
10535 | |
10536 | if (split_stack_fn_large == NULL_RTX) |
10537 | { |
10538 | split_stack_fn_large |
10539 | = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model" ); |
10540 | SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; |
10541 | } |
10542 | |
10543 | fn = split_stack_fn_large; |
10544 | |
10545 | if (ix86_cmodel == CM_LARGE_PIC) |
10546 | { |
10547 | rtx_code_label *label; |
10548 | rtx x; |
10549 | |
10550 | gcc_assert (Pmode == DImode); |
10551 | |
10552 | label = gen_label_rtx (); |
10553 | emit_label (label); |
10554 | LABEL_PRESERVE_P (label) = 1; |
10555 | emit_insn (gen_set_rip_rex64 (reg10, label)); |
10556 | emit_insn (gen_set_got_offset_rex64 (reg11, label)); |
10557 | emit_insn (gen_add2_insn (reg10, reg11)); |
10558 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT); |
10559 | x = gen_rtx_CONST (Pmode, x); |
10560 | emit_move_insn (reg11, x); |
10561 | x = gen_rtx_PLUS (Pmode, reg10, reg11); |
10562 | x = gen_const_mem (Pmode, x); |
10563 | fn = copy_to_suggested_reg (x, reg11, Pmode); |
10564 | } |
10565 | else if (ix86_cmodel == CM_LARGE) |
10566 | fn = copy_to_suggested_reg (fn, reg11, Pmode); |
10567 | |
10568 | /* When using the large model we need to load the address |
10569 | into a register, and we've run out of registers. So we |
10570 | switch to a different calling convention, and we call a |
10571 | different function: __morestack_large. We pass the |
10572 | argument size in the upper 32 bits of r10 and pass the |
10573 | frame size in the lower 32 bits. */ |
10574 | gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); |
10575 | gcc_assert ((args_size & 0xffffffff) == args_size); |
10576 | |
10577 | argval = ((args_size << 16) << 16) + allocate; |
10578 | emit_move_insn (reg10, GEN_INT (argval)); |
10579 | } |
10580 | else |
10581 | { |
10582 | emit_move_insn (reg10, allocate_rtx); |
10583 | emit_move_insn (reg11, GEN_INT (args_size)); |
10584 | use_reg (fusage: &call_fusage, reg: reg11); |
10585 | } |
10586 | |
10587 | use_reg (fusage: &call_fusage, reg: reg10); |
10588 | } |
10589 | else |
10590 | { |
10591 | if (flag_force_indirect_call && flag_pic) |
10592 | { |
10593 | rtx x; |
10594 | |
10595 | gcc_assert (Pmode == SImode); |
10596 | |
10597 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10598 | |
10599 | emit_insn (gen_set_got (scratch_reg)); |
10600 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn), |
10601 | UNSPEC_GOT); |
10602 | x = gen_rtx_CONST (Pmode, x); |
10603 | x = gen_rtx_PLUS (Pmode, scratch_reg, x); |
10604 | x = gen_const_mem (Pmode, x); |
10605 | fn = copy_to_suggested_reg (x, scratch_reg, Pmode); |
10606 | } |
10607 | |
10608 | rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); |
10609 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); |
10610 | insn = emit_insn (gen_push (arg: allocate_rtx)); |
10611 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); |
10612 | pop = GEN_INT (2 * UNITS_PER_WORD); |
10613 | } |
10614 | |
10615 | if (flag_force_indirect_call && !register_operand (fn, VOIDmode)) |
10616 | { |
10617 | scratch_reg = gen_rtx_REG (word_mode, scratch_regno); |
10618 | |
10619 | if (GET_MODE (fn) != word_mode) |
10620 | fn = gen_rtx_ZERO_EXTEND (word_mode, fn); |
10621 | |
10622 | fn = copy_to_suggested_reg (fn, scratch_reg, word_mode); |
10623 | } |
10624 | |
10625 | call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), |
10626 | GEN_INT (UNITS_PER_WORD), constm1_rtx, |
10627 | pop, false); |
10628 | add_function_usage_to (call_insn, call_fusage); |
10629 | if (!TARGET_64BIT) |
10630 | add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); |
10631 | /* Indicate that this function can't jump to non-local gotos. */ |
10632 | make_reg_eh_region_note_nothrow_nononlocal (call_insn); |
10633 | |
10634 | /* In order to make call/return prediction work right, we now need |
10635 | to execute a return instruction. See |
10636 | libgcc/config/i386/morestack.S for the details on how this works. |
10637 | |
10638 | For flow purposes gcc must not see this as a return |
10639 | instruction--we need control flow to continue at the subsequent |
10640 | label. Therefore, we use an unspec. */ |
10641 | gcc_assert (crtl->args.pops_args < 65536); |
10642 | rtx_insn *ret_insn |
10643 | = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); |
10644 | |
10645 | if ((flag_cf_protection & CF_BRANCH)) |
10646 | { |
10647 | /* Insert ENDBR since __morestack will jump back here via indirect |
10648 | call. */ |
10649 | rtx cet_eb = gen_nop_endbr (); |
10650 | emit_insn_after (cet_eb, ret_insn); |
10651 | } |
10652 | |
10653 | /* If we are in 64-bit mode and this function uses a static chain, |
10654 | we saved %r10 in %rax before calling _morestack. */ |
10655 | if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) |
10656 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
10657 | gen_rtx_REG (word_mode, AX_REG)); |
10658 | |
10659 | /* If this function calls va_start, we need to store a pointer to |
10660 | the arguments on the old stack, because they may not have been |
10661 | all copied to the new stack. At this point the old stack can be |
10662 | found at the frame pointer value used by __morestack, because |
10663 | __morestack has set that up before calling back to us. Here we |
10664 | store that pointer in a scratch register, and in |
10665 | ix86_expand_prologue we store the scratch register in a stack |
10666 | slot. */ |
10667 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10668 | { |
10669 | rtx frame_reg; |
10670 | int words; |
10671 | |
10672 | scratch_regno = split_stack_prologue_scratch_regno (); |
10673 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10674 | frame_reg = gen_rtx_REG (Pmode, BP_REG); |
10675 | |
10676 | /* 64-bit: |
10677 | fp -> old fp value |
10678 | return address within this function |
10679 | return address of caller of this function |
10680 | stack arguments |
10681 | So we add three words to get to the stack arguments. |
10682 | |
10683 | 32-bit: |
10684 | fp -> old fp value |
10685 | return address within this function |
10686 | first argument to __morestack |
10687 | second argument to __morestack |
10688 | return address of caller of this function |
10689 | stack arguments |
10690 | So we add five words to get to the stack arguments. |
10691 | */ |
10692 | words = TARGET_64BIT ? 3 : 5; |
10693 | emit_insn (gen_rtx_SET (scratch_reg, |
10694 | plus_constant (Pmode, frame_reg, |
10695 | words * UNITS_PER_WORD))); |
10696 | |
10697 | varargs_label = gen_label_rtx (); |
10698 | emit_jump_insn (gen_jump (varargs_label)); |
10699 | JUMP_LABEL (get_last_insn ()) = varargs_label; |
10700 | |
10701 | emit_barrier (); |
10702 | } |
10703 | |
10704 | emit_label (label); |
10705 | LABEL_NUSES (label) = 1; |
10706 | |
10707 | /* If this function calls va_start, we now have to set the scratch |
10708 | register for the case where we do not call __morestack. In this |
10709 | case we need to set it based on the stack pointer. */ |
10710 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10711 | { |
10712 | emit_insn (gen_rtx_SET (scratch_reg, |
10713 | plus_constant (Pmode, stack_pointer_rtx, |
10714 | UNITS_PER_WORD))); |
10715 | |
10716 | emit_label (varargs_label); |
10717 | LABEL_NUSES (varargs_label) = 1; |
10718 | } |
10719 | } |
10720 | |
10721 | /* We may have to tell the dataflow pass that the split stack prologue |
10722 | is initializing a scratch register. */ |
10723 | |
10724 | static void |
10725 | ix86_live_on_entry (bitmap regs) |
10726 | { |
10727 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10728 | { |
10729 | gcc_assert (flag_split_stack); |
10730 | bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); |
10731 | } |
10732 | } |
10733 | |
10734 | /* Extract the parts of an RTL expression that is a valid memory address |
10735 | for an instruction. Return false if the structure of the address is |
10736 | grossly off. */ |
10737 | |
10738 | bool |
10739 | ix86_decompose_address (rtx addr, struct ix86_address *out) |
10740 | { |
10741 | rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
10742 | rtx base_reg, index_reg; |
10743 | HOST_WIDE_INT scale = 1; |
10744 | rtx scale_rtx = NULL_RTX; |
10745 | rtx tmp; |
10746 | addr_space_t seg = ADDR_SPACE_GENERIC; |
10747 | |
10748 | /* Allow zero-extended SImode addresses, |
10749 | they will be emitted with addr32 prefix. */ |
10750 | if (TARGET_64BIT && GET_MODE (addr) == DImode) |
10751 | { |
10752 | if (GET_CODE (addr) == ZERO_EXTEND |
10753 | && GET_MODE (XEXP (addr, 0)) == SImode) |
10754 | { |
10755 | addr = XEXP (addr, 0); |
10756 | if (CONST_INT_P (addr)) |
10757 | return false; |
10758 | } |
10759 | else if (GET_CODE (addr) == AND |
10760 | && const_32bit_mask (XEXP (addr, 1), DImode)) |
10761 | { |
10762 | addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); |
10763 | if (addr == NULL_RTX) |
10764 | return false; |
10765 | |
10766 | if (CONST_INT_P (addr)) |
10767 | return false; |
10768 | } |
10769 | else if (GET_CODE (addr) == AND) |
10770 | { |
10771 | /* For ASHIFT inside AND, combine will not generate |
10772 | canonical zero-extend. Merge mask for AND and shift_count |
10773 | to check if it is canonical zero-extend. */ |
10774 | tmp = XEXP (addr, 0); |
10775 | rtx mask = XEXP (addr, 1); |
10776 | if (tmp && GET_CODE(tmp) == ASHIFT) |
10777 | { |
10778 | rtx shift_val = XEXP (tmp, 1); |
10779 | if (CONST_INT_P (mask) && CONST_INT_P (shift_val) |
10780 | && (((unsigned HOST_WIDE_INT) INTVAL(mask) |
10781 | | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1)) |
10782 | == 0xffffffff)) |
10783 | { |
10784 | addr = lowpart_subreg (SImode, XEXP (addr, 0), |
10785 | DImode); |
10786 | } |
10787 | } |
10788 | |
10789 | } |
10790 | } |
10791 | |
10792 | /* Allow SImode subregs of DImode addresses, |
10793 | they will be emitted with addr32 prefix. */ |
10794 | if (TARGET_64BIT && GET_MODE (addr) == SImode) |
10795 | { |
10796 | if (SUBREG_P (addr) |
10797 | && GET_MODE (SUBREG_REG (addr)) == DImode) |
10798 | { |
10799 | addr = SUBREG_REG (addr); |
10800 | if (CONST_INT_P (addr)) |
10801 | return false; |
10802 | } |
10803 | } |
10804 | |
10805 | if (REG_P (addr)) |
10806 | base = addr; |
10807 | else if (SUBREG_P (addr)) |
10808 | { |
10809 | if (REG_P (SUBREG_REG (addr))) |
10810 | base = addr; |
10811 | else |
10812 | return false; |
10813 | } |
10814 | else if (GET_CODE (addr) == PLUS) |
10815 | { |
10816 | rtx addends[4], op; |
10817 | int n = 0, i; |
10818 | |
10819 | op = addr; |
10820 | do |
10821 | { |
10822 | if (n >= 4) |
10823 | return false; |
10824 | addends[n++] = XEXP (op, 1); |
10825 | op = XEXP (op, 0); |
10826 | } |
10827 | while (GET_CODE (op) == PLUS); |
10828 | if (n >= 4) |
10829 | return false; |
10830 | addends[n] = op; |
10831 | |
10832 | for (i = n; i >= 0; --i) |
10833 | { |
10834 | op = addends[i]; |
10835 | switch (GET_CODE (op)) |
10836 | { |
10837 | case MULT: |
10838 | if (index) |
10839 | return false; |
10840 | index = XEXP (op, 0); |
10841 | scale_rtx = XEXP (op, 1); |
10842 | break; |
10843 | |
10844 | case ASHIFT: |
10845 | if (index) |
10846 | return false; |
10847 | index = XEXP (op, 0); |
10848 | tmp = XEXP (op, 1); |
10849 | if (!CONST_INT_P (tmp)) |
10850 | return false; |
10851 | scale = INTVAL (tmp); |
10852 | if ((unsigned HOST_WIDE_INT) scale > 3) |
10853 | return false; |
10854 | scale = 1 << scale; |
10855 | break; |
10856 | |
10857 | case ZERO_EXTEND: |
10858 | op = XEXP (op, 0); |
10859 | if (GET_CODE (op) != UNSPEC) |
10860 | return false; |
10861 | /* FALLTHRU */ |
10862 | |
10863 | case UNSPEC: |
10864 | if (XINT (op, 1) == UNSPEC_TP |
10865 | && TARGET_TLS_DIRECT_SEG_REFS |
10866 | && seg == ADDR_SPACE_GENERIC) |
10867 | seg = DEFAULT_TLS_SEG_REG; |
10868 | else |
10869 | return false; |
10870 | break; |
10871 | |
10872 | case SUBREG: |
10873 | if (!REG_P (SUBREG_REG (op))) |
10874 | return false; |
10875 | /* FALLTHRU */ |
10876 | |
10877 | case REG: |
10878 | if (!base) |
10879 | base = op; |
10880 | else if (!index) |
10881 | index = op; |
10882 | else |
10883 | return false; |
10884 | break; |
10885 | |
10886 | case CONST: |
10887 | case CONST_INT: |
10888 | case SYMBOL_REF: |
10889 | case LABEL_REF: |
10890 | if (disp) |
10891 | return false; |
10892 | disp = op; |
10893 | break; |
10894 | |
10895 | default: |
10896 | return false; |
10897 | } |
10898 | } |
10899 | } |
10900 | else if (GET_CODE (addr) == MULT) |
10901 | { |
10902 | index = XEXP (addr, 0); /* index*scale */ |
10903 | scale_rtx = XEXP (addr, 1); |
10904 | } |
10905 | else if (GET_CODE (addr) == ASHIFT) |
10906 | { |
10907 | /* We're called for lea too, which implements ashift on occasion. */ |
10908 | index = XEXP (addr, 0); |
10909 | tmp = XEXP (addr, 1); |
10910 | if (!CONST_INT_P (tmp)) |
10911 | return false; |
10912 | scale = INTVAL (tmp); |
10913 | if ((unsigned HOST_WIDE_INT) scale > 3) |
10914 | return false; |
10915 | scale = 1 << scale; |
10916 | } |
10917 | else |
10918 | disp = addr; /* displacement */ |
10919 | |
10920 | if (index) |
10921 | { |
10922 | if (REG_P (index)) |
10923 | ; |
10924 | else if (SUBREG_P (index) |
10925 | && REG_P (SUBREG_REG (index))) |
10926 | ; |
10927 | else |
10928 | return false; |
10929 | } |
10930 | |
10931 | /* Extract the integral value of scale. */ |
10932 | if (scale_rtx) |
10933 | { |
10934 | if (!CONST_INT_P (scale_rtx)) |
10935 | return false; |
10936 | scale = INTVAL (scale_rtx); |
10937 | } |
10938 | |
10939 | base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; |
10940 | index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; |
10941 | |
10942 | /* Avoid useless 0 displacement. */ |
10943 | if (disp == const0_rtx && (base || index)) |
10944 | disp = NULL_RTX; |
10945 | |
10946 | /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
10947 | if (base_reg && index_reg && scale == 1 |
10948 | && (REGNO (index_reg) == ARG_POINTER_REGNUM |
10949 | || REGNO (index_reg) == FRAME_POINTER_REGNUM |
10950 | || REGNO (index_reg) == SP_REG)) |
10951 | { |
10952 | std::swap (a&: base, b&: index); |
10953 | std::swap (a&: base_reg, b&: index_reg); |
10954 | } |
10955 | |
10956 | /* Special case: %ebp cannot be encoded as a base without a displacement. |
10957 | Similarly %r13. */ |
10958 | if (!disp && base_reg |
10959 | && (REGNO (base_reg) == ARG_POINTER_REGNUM |
10960 | || REGNO (base_reg) == FRAME_POINTER_REGNUM |
10961 | || REGNO (base_reg) == BP_REG |
10962 | || REGNO (base_reg) == R13_REG)) |
10963 | disp = const0_rtx; |
10964 | |
10965 | /* Special case: on K6, [%esi] makes the instruction vector decoded. |
10966 | Avoid this by transforming to [%esi+0]. |
10967 | Reload calls address legitimization without cfun defined, so we need |
10968 | to test cfun for being non-NULL. */ |
10969 | if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun) |
10970 | && base_reg && !index_reg && !disp |
10971 | && REGNO (base_reg) == SI_REG) |
10972 | disp = const0_rtx; |
10973 | |
10974 | /* Special case: encode reg+reg instead of reg*2. */ |
10975 | if (!base && index && scale == 2) |
10976 | base = index, base_reg = index_reg, scale = 1; |
10977 | |
10978 | /* Special case: scaling cannot be encoded without base or displacement. */ |
10979 | if (!base && !disp && index && scale != 1) |
10980 | disp = const0_rtx; |
10981 | |
10982 | out->base = base; |
10983 | out->index = index; |
10984 | out->disp = disp; |
10985 | out->scale = scale; |
10986 | out->seg = seg; |
10987 | |
10988 | return true; |
10989 | } |
10990 | |
10991 | /* Return cost of the memory address x. |
10992 | For i386, it is better to use a complex address than let gcc copy |
10993 | the address into a reg and make a new pseudo. But not if the address |
10994 | requires to two regs - that would mean more pseudos with longer |
10995 | lifetimes. */ |
10996 | static int |
10997 | ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) |
10998 | { |
10999 | struct ix86_address parts; |
11000 | int cost = 1; |
11001 | int ok = ix86_decompose_address (addr: x, out: &parts); |
11002 | |
11003 | gcc_assert (ok); |
11004 | |
11005 | if (parts.base && SUBREG_P (parts.base)) |
11006 | parts.base = SUBREG_REG (parts.base); |
11007 | if (parts.index && SUBREG_P (parts.index)) |
11008 | parts.index = SUBREG_REG (parts.index); |
11009 | |
11010 | /* Attempt to minimize number of registers in the address by increasing |
11011 | address cost for each used register. We don't increase address cost |
11012 | for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" |
11013 | is not invariant itself it most likely means that base or index is not |
11014 | invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, |
11015 | which is not profitable for x86. */ |
11016 | if (parts.base |
11017 | && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
11018 | && (current_pass->type == GIMPLE_PASS |
11019 | || !pic_offset_table_rtx |
11020 | || !REG_P (parts.base) |
11021 | || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) |
11022 | cost++; |
11023 | |
11024 | if (parts.index |
11025 | && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
11026 | && (current_pass->type == GIMPLE_PASS |
11027 | || !pic_offset_table_rtx |
11028 | || !REG_P (parts.index) |
11029 | || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) |
11030 | cost++; |
11031 | |
11032 | /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
11033 | since it's predecode logic can't detect the length of instructions |
11034 | and it degenerates to vector decoded. Increase cost of such |
11035 | addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
11036 | to split such addresses or even refuse such addresses at all. |
11037 | |
11038 | Following addressing modes are affected: |
11039 | [base+scale*index] |
11040 | [scale*index+disp] |
11041 | [base+index] |
11042 | |
11043 | The first and last case may be avoidable by explicitly coding the zero in |
11044 | memory address, but I don't have AMD-K6 machine handy to check this |
11045 | theory. */ |
11046 | |
11047 | if (TARGET_CPU_P (K6) |
11048 | && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
11049 | || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
11050 | || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
11051 | cost += 10; |
11052 | |
11053 | return cost; |
11054 | } |
11055 | |
11056 | /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
11057 | this is used for to form addresses to local data when -fPIC is in |
11058 | use. */ |
11059 | |
11060 | static bool |
11061 | darwin_local_data_pic (rtx disp) |
11062 | { |
11063 | return (GET_CODE (disp) == UNSPEC |
11064 | && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); |
11065 | } |
11066 | |
11067 | /* True if the function symbol operand X should be loaded from GOT. |
11068 | If CALL_P is true, X is a call operand. |
11069 | |
11070 | NB: -mno-direct-extern-access doesn't force load from GOT for |
11071 | call. |
11072 | |
11073 | NB: In 32-bit mode, only non-PIC is allowed in inline assembly |
11074 | statements, since a PIC register could not be available at the |
11075 | call site. */ |
11076 | |
11077 | bool |
11078 | ix86_force_load_from_GOT_p (rtx x, bool call_p) |
11079 | { |
11080 | return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X)) |
11081 | && !TARGET_PECOFF && !TARGET_MACHO |
11082 | && (!flag_pic || this_is_asm_operands) |
11083 | && ix86_cmodel != CM_LARGE |
11084 | && ix86_cmodel != CM_LARGE_PIC |
11085 | && GET_CODE (x) == SYMBOL_REF |
11086 | && ((!call_p |
11087 | && (!ix86_direct_extern_access |
11088 | || (SYMBOL_REF_DECL (x) |
11089 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
11090 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))) |
11091 | || (SYMBOL_REF_FUNCTION_P (x) |
11092 | && (!flag_plt |
11093 | || (SYMBOL_REF_DECL (x) |
11094 | && lookup_attribute (attr_name: "noplt" , |
11095 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))) |
11096 | && !SYMBOL_REF_LOCAL_P (x)); |
11097 | } |
11098 | |
11099 | /* Determine if a given RTX is a valid constant. We already know this |
11100 | satisfies CONSTANT_P. */ |
11101 | |
11102 | static bool |
11103 | ix86_legitimate_constant_p (machine_mode mode, rtx x) |
11104 | { |
11105 | switch (GET_CODE (x)) |
11106 | { |
11107 | case CONST: |
11108 | x = XEXP (x, 0); |
11109 | |
11110 | if (GET_CODE (x) == PLUS) |
11111 | { |
11112 | if (!CONST_INT_P (XEXP (x, 1))) |
11113 | return false; |
11114 | x = XEXP (x, 0); |
11115 | } |
11116 | |
11117 | if (TARGET_MACHO && darwin_local_data_pic (disp: x)) |
11118 | return true; |
11119 | |
11120 | /* Only some unspecs are valid as "constants". */ |
11121 | if (GET_CODE (x) == UNSPEC) |
11122 | switch (XINT (x, 1)) |
11123 | { |
11124 | case UNSPEC_GOT: |
11125 | case UNSPEC_GOTOFF: |
11126 | case UNSPEC_PLTOFF: |
11127 | return TARGET_64BIT; |
11128 | case UNSPEC_TPOFF: |
11129 | case UNSPEC_NTPOFF: |
11130 | x = XVECEXP (x, 0, 0); |
11131 | return (GET_CODE (x) == SYMBOL_REF |
11132 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
11133 | case UNSPEC_DTPOFF: |
11134 | x = XVECEXP (x, 0, 0); |
11135 | return (GET_CODE (x) == SYMBOL_REF |
11136 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
11137 | default: |
11138 | return false; |
11139 | } |
11140 | |
11141 | /* We must have drilled down to a symbol. */ |
11142 | if (GET_CODE (x) == LABEL_REF) |
11143 | return true; |
11144 | if (GET_CODE (x) != SYMBOL_REF) |
11145 | return false; |
11146 | /* FALLTHRU */ |
11147 | |
11148 | case SYMBOL_REF: |
11149 | /* TLS symbols are never valid. */ |
11150 | if (SYMBOL_REF_TLS_MODEL (x)) |
11151 | return false; |
11152 | |
11153 | /* DLLIMPORT symbols are never valid. */ |
11154 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES |
11155 | && SYMBOL_REF_DLLIMPORT_P (x)) |
11156 | return false; |
11157 | |
11158 | #if TARGET_MACHO |
11159 | /* mdynamic-no-pic */ |
11160 | if (MACHO_DYNAMIC_NO_PIC_P) |
11161 | return machopic_symbol_defined_p (x); |
11162 | #endif |
11163 | |
11164 | /* External function address should be loaded |
11165 | via the GOT slot to avoid PLT. */ |
11166 | if (ix86_force_load_from_GOT_p (x)) |
11167 | return false; |
11168 | |
11169 | break; |
11170 | |
11171 | CASE_CONST_SCALAR_INT: |
11172 | if (ix86_endbr_immediate_operand (x, VOIDmode)) |
11173 | return false; |
11174 | |
11175 | switch (mode) |
11176 | { |
11177 | case E_TImode: |
11178 | if (TARGET_64BIT) |
11179 | return true; |
11180 | /* FALLTHRU */ |
11181 | case E_OImode: |
11182 | case E_XImode: |
11183 | if (!standard_sse_constant_p (x, pred_mode: mode) |
11184 | && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512 |
11185 | ? XImode |
11186 | : (TARGET_AVX |
11187 | ? OImode |
11188 | : (TARGET_SSE2 |
11189 | ? TImode : DImode))) < GET_MODE_SIZE (mode)) |
11190 | return false; |
11191 | default: |
11192 | break; |
11193 | } |
11194 | break; |
11195 | |
11196 | case CONST_VECTOR: |
11197 | if (!standard_sse_constant_p (x, pred_mode: mode)) |
11198 | return false; |
11199 | break; |
11200 | |
11201 | case CONST_DOUBLE: |
11202 | if (mode == E_BFmode) |
11203 | return false; |
11204 | |
11205 | default: |
11206 | break; |
11207 | } |
11208 | |
11209 | /* Otherwise we handle everything else in the move patterns. */ |
11210 | return true; |
11211 | } |
11212 | |
11213 | /* Determine if it's legal to put X into the constant pool. This |
11214 | is not possible for the address of thread-local symbols, which |
11215 | is checked above. */ |
11216 | |
11217 | static bool |
11218 | ix86_cannot_force_const_mem (machine_mode mode, rtx x) |
11219 | { |
11220 | /* We can put any immediate constant in memory. */ |
11221 | switch (GET_CODE (x)) |
11222 | { |
11223 | CASE_CONST_ANY: |
11224 | return false; |
11225 | |
11226 | default: |
11227 | break; |
11228 | } |
11229 | |
11230 | return !ix86_legitimate_constant_p (mode, x); |
11231 | } |
11232 | |
11233 | /* Nonzero if the symbol is marked as dllimport, or as stub-variable, |
11234 | otherwise zero. */ |
11235 | |
11236 | static bool |
11237 | is_imported_p (rtx x) |
11238 | { |
11239 | if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES |
11240 | || GET_CODE (x) != SYMBOL_REF) |
11241 | return false; |
11242 | |
11243 | return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); |
11244 | } |
11245 | |
11246 | |
11247 | /* Nonzero if the constant value X is a legitimate general operand |
11248 | when generating PIC code. It is given that flag_pic is on and |
11249 | that X satisfies CONSTANT_P. */ |
11250 | |
11251 | bool |
11252 | legitimate_pic_operand_p (rtx x) |
11253 | { |
11254 | rtx inner; |
11255 | |
11256 | switch (GET_CODE (x)) |
11257 | { |
11258 | case CONST: |
11259 | inner = XEXP (x, 0); |
11260 | if (GET_CODE (inner) == PLUS |
11261 | && CONST_INT_P (XEXP (inner, 1))) |
11262 | inner = XEXP (inner, 0); |
11263 | |
11264 | /* Only some unspecs are valid as "constants". */ |
11265 | if (GET_CODE (inner) == UNSPEC) |
11266 | switch (XINT (inner, 1)) |
11267 | { |
11268 | case UNSPEC_GOT: |
11269 | case UNSPEC_GOTOFF: |
11270 | case UNSPEC_PLTOFF: |
11271 | return TARGET_64BIT; |
11272 | case UNSPEC_TPOFF: |
11273 | x = XVECEXP (inner, 0, 0); |
11274 | return (GET_CODE (x) == SYMBOL_REF |
11275 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
11276 | case UNSPEC_MACHOPIC_OFFSET: |
11277 | return legitimate_pic_address_disp_p (x); |
11278 | default: |
11279 | return false; |
11280 | } |
11281 | /* FALLTHRU */ |
11282 | |
11283 | case SYMBOL_REF: |
11284 | case LABEL_REF: |
11285 | return legitimate_pic_address_disp_p (x); |
11286 | |
11287 | default: |
11288 | return true; |
11289 | } |
11290 | } |
11291 | |
11292 | /* Determine if a given CONST RTX is a valid memory displacement |
11293 | in PIC mode. */ |
11294 | |
11295 | bool |
11296 | legitimate_pic_address_disp_p (rtx disp) |
11297 | { |
11298 | bool saw_plus; |
11299 | |
11300 | /* In 64bit mode we can allow direct addresses of symbols and labels |
11301 | when they are not dynamic symbols. */ |
11302 | if (TARGET_64BIT) |
11303 | { |
11304 | rtx op0 = disp, op1; |
11305 | |
11306 | switch (GET_CODE (disp)) |
11307 | { |
11308 | case LABEL_REF: |
11309 | return true; |
11310 | |
11311 | case CONST: |
11312 | if (GET_CODE (XEXP (disp, 0)) != PLUS) |
11313 | break; |
11314 | op0 = XEXP (XEXP (disp, 0), 0); |
11315 | op1 = XEXP (XEXP (disp, 0), 1); |
11316 | if (!CONST_INT_P (op1)) |
11317 | break; |
11318 | if (GET_CODE (op0) == UNSPEC |
11319 | && (XINT (op0, 1) == UNSPEC_DTPOFF |
11320 | || XINT (op0, 1) == UNSPEC_NTPOFF) |
11321 | && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) |
11322 | return true; |
11323 | if (INTVAL (op1) >= 16*1024*1024 |
11324 | || INTVAL (op1) < -16*1024*1024) |
11325 | break; |
11326 | if (GET_CODE (op0) == LABEL_REF) |
11327 | return true; |
11328 | if (GET_CODE (op0) == CONST |
11329 | && GET_CODE (XEXP (op0, 0)) == UNSPEC |
11330 | && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) |
11331 | return true; |
11332 | if (GET_CODE (op0) == UNSPEC |
11333 | && XINT (op0, 1) == UNSPEC_PCREL) |
11334 | return true; |
11335 | if (GET_CODE (op0) != SYMBOL_REF) |
11336 | break; |
11337 | /* FALLTHRU */ |
11338 | |
11339 | case SYMBOL_REF: |
11340 | /* TLS references should always be enclosed in UNSPEC. |
11341 | The dllimported symbol needs always to be resolved. */ |
11342 | if (SYMBOL_REF_TLS_MODEL (op0) |
11343 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) |
11344 | return false; |
11345 | |
11346 | if (TARGET_PECOFF) |
11347 | { |
11348 | if (is_imported_p (x: op0)) |
11349 | return true; |
11350 | |
11351 | if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0)) |
11352 | break; |
11353 | |
11354 | /* Non-external-weak function symbols need to be resolved only |
11355 | for the large model. Non-external symbols don't need to be |
11356 | resolved for large and medium models. For the small model, |
11357 | we don't need to resolve anything here. */ |
11358 | if ((ix86_cmodel != CM_LARGE_PIC |
11359 | && SYMBOL_REF_FUNCTION_P (op0) |
11360 | && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0))) |
11361 | || !SYMBOL_REF_EXTERNAL_P (op0) |
11362 | || ix86_cmodel == CM_SMALL_PIC) |
11363 | return true; |
11364 | } |
11365 | else if (!SYMBOL_REF_FAR_ADDR_P (op0) |
11366 | && (SYMBOL_REF_LOCAL_P (op0) |
11367 | || ((ix86_direct_extern_access |
11368 | && !(SYMBOL_REF_DECL (op0) |
11369 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
11370 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0))))) |
11371 | && HAVE_LD_PIE_COPYRELOC |
11372 | && flag_pie |
11373 | && !SYMBOL_REF_WEAK (op0) |
11374 | && !SYMBOL_REF_FUNCTION_P (op0))) |
11375 | && ix86_cmodel != CM_LARGE_PIC) |
11376 | return true; |
11377 | break; |
11378 | |
11379 | default: |
11380 | break; |
11381 | } |
11382 | } |
11383 | if (GET_CODE (disp) != CONST) |
11384 | return false; |
11385 | disp = XEXP (disp, 0); |
11386 | |
11387 | if (TARGET_64BIT) |
11388 | { |
11389 | /* We are unsafe to allow PLUS expressions. This limit allowed distance |
11390 | of GOT tables. We should not need these anyway. */ |
11391 | if (GET_CODE (disp) != UNSPEC |
11392 | || (XINT (disp, 1) != UNSPEC_GOTPCREL |
11393 | && XINT (disp, 1) != UNSPEC_GOTOFF |
11394 | && XINT (disp, 1) != UNSPEC_PCREL |
11395 | && XINT (disp, 1) != UNSPEC_PLTOFF)) |
11396 | return false; |
11397 | |
11398 | if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF |
11399 | && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) |
11400 | return false; |
11401 | return true; |
11402 | } |
11403 | |
11404 | saw_plus = false; |
11405 | if (GET_CODE (disp) == PLUS) |
11406 | { |
11407 | if (!CONST_INT_P (XEXP (disp, 1))) |
11408 | return false; |
11409 | disp = XEXP (disp, 0); |
11410 | saw_plus = true; |
11411 | } |
11412 | |
11413 | if (TARGET_MACHO && darwin_local_data_pic (disp)) |
11414 | return true; |
11415 | |
11416 | if (GET_CODE (disp) != UNSPEC) |
11417 | return false; |
11418 | |
11419 | switch (XINT (disp, 1)) |
11420 | { |
11421 | case UNSPEC_GOT: |
11422 | if (saw_plus) |
11423 | return false; |
11424 | /* We need to check for both symbols and labels because VxWorks loads |
11425 | text labels with @GOT rather than @GOTOFF. See gotoff_operand for |
11426 | details. */ |
11427 | return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11428 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); |
11429 | case UNSPEC_GOTOFF: |
11430 | /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
11431 | While ABI specify also 32bit relocation but we don't produce it in |
11432 | small PIC model at all. */ |
11433 | if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11434 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) |
11435 | && !TARGET_64BIT) |
11436 | return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); |
11437 | return false; |
11438 | case UNSPEC_GOTTPOFF: |
11439 | case UNSPEC_GOTNTPOFF: |
11440 | case UNSPEC_INDNTPOFF: |
11441 | if (saw_plus) |
11442 | return false; |
11443 | disp = XVECEXP (disp, 0, 0); |
11444 | return (GET_CODE (disp) == SYMBOL_REF |
11445 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
11446 | case UNSPEC_NTPOFF: |
11447 | disp = XVECEXP (disp, 0, 0); |
11448 | return (GET_CODE (disp) == SYMBOL_REF |
11449 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
11450 | case UNSPEC_DTPOFF: |
11451 | disp = XVECEXP (disp, 0, 0); |
11452 | return (GET_CODE (disp) == SYMBOL_REF |
11453 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
11454 | } |
11455 | |
11456 | return false; |
11457 | } |
11458 | |
11459 | /* Determine if op is suitable RTX for an address register. |
11460 | Return naked register if a register or a register subreg is |
11461 | found, otherwise return NULL_RTX. */ |
11462 | |
11463 | static rtx |
11464 | ix86_validate_address_register (rtx op) |
11465 | { |
11466 | machine_mode mode = GET_MODE (op); |
11467 | |
11468 | /* Only SImode or DImode registers can form the address. */ |
11469 | if (mode != SImode && mode != DImode) |
11470 | return NULL_RTX; |
11471 | |
11472 | if (REG_P (op)) |
11473 | return op; |
11474 | else if (SUBREG_P (op)) |
11475 | { |
11476 | rtx reg = SUBREG_REG (op); |
11477 | |
11478 | if (!REG_P (reg)) |
11479 | return NULL_RTX; |
11480 | |
11481 | mode = GET_MODE (reg); |
11482 | |
11483 | /* Don't allow SUBREGs that span more than a word. It can |
11484 | lead to spill failures when the register is one word out |
11485 | of a two word structure. */ |
11486 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
11487 | return NULL_RTX; |
11488 | |
11489 | /* Allow only SUBREGs of non-eliminable hard registers. */ |
11490 | if (register_no_elim_operand (reg, mode)) |
11491 | return reg; |
11492 | } |
11493 | |
11494 | /* Op is not a register. */ |
11495 | return NULL_RTX; |
11496 | } |
11497 | |
11498 | /* Determine which memory address register set insn can use. */ |
11499 | |
11500 | static enum attr_addr |
11501 | ix86_memory_address_reg_class (rtx_insn* insn) |
11502 | { |
11503 | /* LRA can do some initialization with NULL insn, |
11504 | return maximum register class in this case. */ |
11505 | enum attr_addr addr_rclass = ADDR_GPR32; |
11506 | |
11507 | if (!insn) |
11508 | return addr_rclass; |
11509 | |
11510 | if (asm_noperands (PATTERN (insn)) >= 0 |
11511 | || GET_CODE (PATTERN (insn)) == ASM_INPUT) |
11512 | return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16; |
11513 | |
11514 | /* Return maximum register class for unrecognized instructions. */ |
11515 | if (INSN_CODE (insn) < 0) |
11516 | return addr_rclass; |
11517 | |
11518 | /* Try to recognize the insn before calling get_attr_addr. |
11519 | Save current recog_data and current alternative. */ |
11520 | struct recog_data_d saved_recog_data = recog_data; |
11521 | int saved_alternative = which_alternative; |
11522 | |
11523 | /* Update recog_data for processing of alternatives. */ |
11524 | extract_insn_cached (insn); |
11525 | |
11526 | /* If current alternative is not set, loop throught enabled |
11527 | alternatives and get the most limited register class. */ |
11528 | if (saved_alternative == -1) |
11529 | { |
11530 | alternative_mask enabled = get_enabled_alternatives (insn); |
11531 | |
11532 | for (int i = 0; i < recog_data.n_alternatives; i++) |
11533 | { |
11534 | if (!TEST_BIT (enabled, i)) |
11535 | continue; |
11536 | |
11537 | which_alternative = i; |
11538 | addr_rclass = MIN (addr_rclass, get_attr_addr (insn)); |
11539 | } |
11540 | } |
11541 | else |
11542 | { |
11543 | which_alternative = saved_alternative; |
11544 | addr_rclass = get_attr_addr (insn); |
11545 | } |
11546 | |
11547 | recog_data = saved_recog_data; |
11548 | which_alternative = saved_alternative; |
11549 | |
11550 | return addr_rclass; |
11551 | } |
11552 | |
11553 | /* Return memory address register class insn can use. */ |
11554 | |
11555 | enum reg_class |
11556 | ix86_insn_base_reg_class (rtx_insn* insn) |
11557 | { |
11558 | switch (ix86_memory_address_reg_class (insn)) |
11559 | { |
11560 | case ADDR_GPR8: |
11561 | return LEGACY_GENERAL_REGS; |
11562 | case ADDR_GPR16: |
11563 | return GENERAL_GPR16; |
11564 | case ADDR_GPR32: |
11565 | break; |
11566 | default: |
11567 | gcc_unreachable (); |
11568 | } |
11569 | |
11570 | return BASE_REG_CLASS; |
11571 | } |
11572 | |
11573 | bool |
11574 | ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn) |
11575 | { |
11576 | switch (ix86_memory_address_reg_class (insn)) |
11577 | { |
11578 | case ADDR_GPR8: |
11579 | return LEGACY_INT_REGNO_P (regno); |
11580 | case ADDR_GPR16: |
11581 | return GENERAL_GPR16_REGNO_P (regno); |
11582 | case ADDR_GPR32: |
11583 | break; |
11584 | default: |
11585 | gcc_unreachable (); |
11586 | } |
11587 | |
11588 | return GENERAL_REGNO_P (regno); |
11589 | } |
11590 | |
11591 | enum reg_class |
11592 | ix86_insn_index_reg_class (rtx_insn* insn) |
11593 | { |
11594 | switch (ix86_memory_address_reg_class (insn)) |
11595 | { |
11596 | case ADDR_GPR8: |
11597 | return LEGACY_INDEX_REGS; |
11598 | case ADDR_GPR16: |
11599 | return INDEX_GPR16; |
11600 | case ADDR_GPR32: |
11601 | break; |
11602 | default: |
11603 | gcc_unreachable (); |
11604 | } |
11605 | |
11606 | return INDEX_REG_CLASS; |
11607 | } |
11608 | |
11609 | /* Recognizes RTL expressions that are valid memory addresses for an |
11610 | instruction. The MODE argument is the machine mode for the MEM |
11611 | expression that wants to use this address. |
11612 | |
11613 | It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
11614 | convert common non-canonical forms to canonical form so that they will |
11615 | be recognized. */ |
11616 | |
11617 | static bool |
11618 | ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, |
11619 | code_helper = ERROR_MARK) |
11620 | { |
11621 | struct ix86_address parts; |
11622 | rtx base, index, disp; |
11623 | HOST_WIDE_INT scale; |
11624 | addr_space_t seg; |
11625 | |
11626 | if (ix86_decompose_address (addr, out: &parts) == 0) |
11627 | /* Decomposition failed. */ |
11628 | return false; |
11629 | |
11630 | base = parts.base; |
11631 | index = parts.index; |
11632 | disp = parts.disp; |
11633 | scale = parts.scale; |
11634 | seg = parts.seg; |
11635 | |
11636 | /* Validate base register. */ |
11637 | if (base) |
11638 | { |
11639 | rtx reg = ix86_validate_address_register (op: base); |
11640 | |
11641 | if (reg == NULL_RTX) |
11642 | return false; |
11643 | |
11644 | unsigned int regno = REGNO (reg); |
11645 | if ((strict && !REGNO_OK_FOR_BASE_P (regno)) |
11646 | || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno))) |
11647 | /* Base is not valid. */ |
11648 | return false; |
11649 | } |
11650 | |
11651 | /* Validate index register. */ |
11652 | if (index) |
11653 | { |
11654 | rtx reg = ix86_validate_address_register (op: index); |
11655 | |
11656 | if (reg == NULL_RTX) |
11657 | return false; |
11658 | |
11659 | unsigned int regno = REGNO (reg); |
11660 | if ((strict && !REGNO_OK_FOR_INDEX_P (regno)) |
11661 | || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno))) |
11662 | /* Index is not valid. */ |
11663 | return false; |
11664 | } |
11665 | |
11666 | /* Index and base should have the same mode. */ |
11667 | if (base && index |
11668 | && GET_MODE (base) != GET_MODE (index)) |
11669 | return false; |
11670 | |
11671 | /* Address override works only on the (%reg) part of %fs:(%reg). */ |
11672 | if (seg != ADDR_SPACE_GENERIC |
11673 | && ((base && GET_MODE (base) != word_mode) |
11674 | || (index && GET_MODE (index) != word_mode))) |
11675 | return false; |
11676 | |
11677 | /* Validate scale factor. */ |
11678 | if (scale != 1) |
11679 | { |
11680 | if (!index) |
11681 | /* Scale without index. */ |
11682 | return false; |
11683 | |
11684 | if (scale != 2 && scale != 4 && scale != 8) |
11685 | /* Scale is not a valid multiplier. */ |
11686 | return false; |
11687 | } |
11688 | |
11689 | /* Validate displacement. */ |
11690 | if (disp) |
11691 | { |
11692 | if (ix86_endbr_immediate_operand (disp, VOIDmode)) |
11693 | return false; |
11694 | |
11695 | if (GET_CODE (disp) == CONST |
11696 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
11697 | && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
11698 | switch (XINT (XEXP (disp, 0), 1)) |
11699 | { |
11700 | /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit |
11701 | when used. While ABI specify also 32bit relocations, we |
11702 | don't produce them at all and use IP relative instead. |
11703 | Allow GOT in 32bit mode for both PIC and non-PIC if symbol |
11704 | should be loaded via GOT. */ |
11705 | case UNSPEC_GOT: |
11706 | if (!TARGET_64BIT |
11707 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11708 | goto is_legitimate_pic; |
11709 | /* FALLTHRU */ |
11710 | case UNSPEC_GOTOFF: |
11711 | gcc_assert (flag_pic); |
11712 | if (!TARGET_64BIT) |
11713 | goto is_legitimate_pic; |
11714 | |
11715 | /* 64bit address unspec. */ |
11716 | return false; |
11717 | |
11718 | case UNSPEC_GOTPCREL: |
11719 | if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11720 | goto is_legitimate_pic; |
11721 | /* FALLTHRU */ |
11722 | case UNSPEC_PCREL: |
11723 | gcc_assert (flag_pic); |
11724 | goto is_legitimate_pic; |
11725 | |
11726 | case UNSPEC_GOTTPOFF: |
11727 | case UNSPEC_GOTNTPOFF: |
11728 | case UNSPEC_INDNTPOFF: |
11729 | case UNSPEC_NTPOFF: |
11730 | case UNSPEC_DTPOFF: |
11731 | break; |
11732 | |
11733 | default: |
11734 | /* Invalid address unspec. */ |
11735 | return false; |
11736 | } |
11737 | |
11738 | else if (SYMBOLIC_CONST (disp) |
11739 | && (flag_pic |
11740 | #if TARGET_MACHO |
11741 | || (MACHOPIC_INDIRECT |
11742 | && !machopic_operand_p (disp)) |
11743 | #endif |
11744 | )) |
11745 | { |
11746 | |
11747 | is_legitimate_pic: |
11748 | if (TARGET_64BIT && (index || base)) |
11749 | { |
11750 | /* foo@dtpoff(%rX) is ok. */ |
11751 | if (GET_CODE (disp) != CONST |
11752 | || GET_CODE (XEXP (disp, 0)) != PLUS |
11753 | || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
11754 | || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
11755 | || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
11756 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) |
11757 | /* Non-constant pic memory reference. */ |
11758 | return false; |
11759 | } |
11760 | else if ((!TARGET_MACHO || flag_pic) |
11761 | && ! legitimate_pic_address_disp_p (disp)) |
11762 | /* Displacement is an invalid pic construct. */ |
11763 | return false; |
11764 | #if TARGET_MACHO |
11765 | else if (MACHO_DYNAMIC_NO_PIC_P |
11766 | && !ix86_legitimate_constant_p (Pmode, disp)) |
11767 | /* displacment must be referenced via non_lazy_pointer */ |
11768 | return false; |
11769 | #endif |
11770 | |
11771 | /* This code used to verify that a symbolic pic displacement |
11772 | includes the pic_offset_table_rtx register. |
11773 | |
11774 | While this is good idea, unfortunately these constructs may |
11775 | be created by "adds using lea" optimization for incorrect |
11776 | code like: |
11777 | |
11778 | int a; |
11779 | int foo(int i) |
11780 | { |
11781 | return *(&a+i); |
11782 | } |
11783 | |
11784 | This code is nonsensical, but results in addressing |
11785 | GOT table with pic_offset_table_rtx base. We can't |
11786 | just refuse it easily, since it gets matched by |
11787 | "addsi3" pattern, that later gets split to lea in the |
11788 | case output register differs from input. While this |
11789 | can be handled by separate addsi pattern for this case |
11790 | that never results in lea, this seems to be easier and |
11791 | correct fix for crash to disable this test. */ |
11792 | } |
11793 | else if (GET_CODE (disp) != LABEL_REF |
11794 | && !CONST_INT_P (disp) |
11795 | && (GET_CODE (disp) != CONST |
11796 | || !ix86_legitimate_constant_p (Pmode, x: disp)) |
11797 | && (GET_CODE (disp) != SYMBOL_REF |
11798 | || !ix86_legitimate_constant_p (Pmode, x: disp))) |
11799 | /* Displacement is not constant. */ |
11800 | return false; |
11801 | else if (TARGET_64BIT |
11802 | && !x86_64_immediate_operand (disp, VOIDmode)) |
11803 | /* Displacement is out of range. */ |
11804 | return false; |
11805 | /* In x32 mode, constant addresses are sign extended to 64bit, so |
11806 | we have to prevent addresses from 0x80000000 to 0xffffffff. */ |
11807 | else if (TARGET_X32 && !(index || base) |
11808 | && CONST_INT_P (disp) |
11809 | && val_signbit_known_set_p (SImode, INTVAL (disp))) |
11810 | return false; |
11811 | } |
11812 | |
11813 | /* Everything looks valid. */ |
11814 | return true; |
11815 | } |
11816 | |
11817 | /* Determine if a given RTX is a valid constant address. */ |
11818 | |
11819 | bool |
11820 | constant_address_p (rtx x) |
11821 | { |
11822 | return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1); |
11823 | } |
11824 | |
11825 | /* Return a unique alias set for the GOT. */ |
11826 | |
11827 | alias_set_type |
11828 | ix86_GOT_alias_set (void) |
11829 | { |
11830 | static alias_set_type set = -1; |
11831 | if (set == -1) |
11832 | set = new_alias_set (); |
11833 | return set; |
11834 | } |
11835 | |
11836 | /* Return a legitimate reference for ORIG (an address) using the |
11837 | register REG. If REG is 0, a new pseudo is generated. |
11838 | |
11839 | There are two types of references that must be handled: |
11840 | |
11841 | 1. Global data references must load the address from the GOT, via |
11842 | the PIC reg. An insn is emitted to do this load, and the reg is |
11843 | returned. |
11844 | |
11845 | 2. Static data references, constant pool addresses, and code labels |
11846 | compute the address as an offset from the GOT, whose base is in |
11847 | the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
11848 | differentiate them from global data objects. The returned |
11849 | address is the PIC reg + an unspec constant. |
11850 | |
11851 | TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
11852 | reg also appears in the address. */ |
11853 | |
11854 | rtx |
11855 | legitimize_pic_address (rtx orig, rtx reg) |
11856 | { |
11857 | rtx addr = orig; |
11858 | rtx new_rtx = orig; |
11859 | |
11860 | #if TARGET_MACHO |
11861 | if (TARGET_MACHO && !TARGET_64BIT) |
11862 | { |
11863 | if (reg == 0) |
11864 | reg = gen_reg_rtx (Pmode); |
11865 | /* Use the generic Mach-O PIC machinery. */ |
11866 | return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
11867 | } |
11868 | #endif |
11869 | |
11870 | if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
11871 | { |
11872 | rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true); |
11873 | if (tmp) |
11874 | return tmp; |
11875 | } |
11876 | |
11877 | if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr)) |
11878 | new_rtx = addr; |
11879 | else if ((!TARGET_64BIT |
11880 | || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) |
11881 | && !TARGET_PECOFF |
11882 | && gotoff_operand (addr, Pmode)) |
11883 | { |
11884 | /* This symbol may be referenced via a displacement |
11885 | from the PIC base address (@GOTOFF). */ |
11886 | if (GET_CODE (addr) == CONST) |
11887 | addr = XEXP (addr, 0); |
11888 | |
11889 | if (GET_CODE (addr) == PLUS) |
11890 | { |
11891 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
11892 | UNSPEC_GOTOFF); |
11893 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
11894 | } |
11895 | else |
11896 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
11897 | |
11898 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11899 | |
11900 | if (TARGET_64BIT) |
11901 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11902 | |
11903 | if (reg != 0) |
11904 | { |
11905 | gcc_assert (REG_P (reg)); |
11906 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
11907 | new_rtx, reg, 1, OPTAB_DIRECT); |
11908 | } |
11909 | else |
11910 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
11911 | } |
11912 | else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) |
11913 | /* We can't always use @GOTOFF for text labels |
11914 | on VxWorks, see gotoff_operand. */ |
11915 | || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) |
11916 | { |
11917 | rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true); |
11918 | if (tmp) |
11919 | return tmp; |
11920 | |
11921 | /* For x64 PE-COFF there is no GOT table, |
11922 | so we use address directly. */ |
11923 | if (TARGET_64BIT && TARGET_PECOFF) |
11924 | { |
11925 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); |
11926 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11927 | } |
11928 | else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) |
11929 | { |
11930 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), |
11931 | UNSPEC_GOTPCREL); |
11932 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11933 | new_rtx = gen_const_mem (Pmode, new_rtx); |
11934 | set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
11935 | } |
11936 | else |
11937 | { |
11938 | /* This symbol must be referenced via a load |
11939 | from the Global Offset Table (@GOT). */ |
11940 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
11941 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11942 | |
11943 | if (TARGET_64BIT) |
11944 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11945 | |
11946 | if (reg != 0) |
11947 | { |
11948 | gcc_assert (REG_P (reg)); |
11949 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
11950 | new_rtx, reg, 1, OPTAB_DIRECT); |
11951 | } |
11952 | else |
11953 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
11954 | |
11955 | new_rtx = gen_const_mem (Pmode, new_rtx); |
11956 | set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
11957 | } |
11958 | |
11959 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11960 | } |
11961 | else |
11962 | { |
11963 | if (CONST_INT_P (addr) |
11964 | && !x86_64_immediate_operand (addr, VOIDmode)) |
11965 | new_rtx = copy_to_suggested_reg (addr, reg, Pmode); |
11966 | else if (GET_CODE (addr) == CONST) |
11967 | { |
11968 | addr = XEXP (addr, 0); |
11969 | |
11970 | /* We must match stuff we generate before. Assume the only |
11971 | unspecs that can get here are ours. Not that we could do |
11972 | anything with them anyway.... */ |
11973 | if (GET_CODE (addr) == UNSPEC |
11974 | || (GET_CODE (addr) == PLUS |
11975 | && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
11976 | return orig; |
11977 | gcc_assert (GET_CODE (addr) == PLUS); |
11978 | } |
11979 | |
11980 | if (GET_CODE (addr) == PLUS) |
11981 | { |
11982 | rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
11983 | |
11984 | /* Check first to see if this is a constant |
11985 | offset from a @GOTOFF symbol reference. */ |
11986 | if (!TARGET_PECOFF |
11987 | && gotoff_operand (op0, Pmode) |
11988 | && CONST_INT_P (op1)) |
11989 | { |
11990 | if (!TARGET_64BIT) |
11991 | { |
11992 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
11993 | UNSPEC_GOTOFF); |
11994 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); |
11995 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11996 | |
11997 | if (reg != 0) |
11998 | { |
11999 | gcc_assert (REG_P (reg)); |
12000 | new_rtx = expand_simple_binop (Pmode, PLUS, |
12001 | pic_offset_table_rtx, |
12002 | new_rtx, reg, 1, |
12003 | OPTAB_DIRECT); |
12004 | } |
12005 | else |
12006 | new_rtx |
12007 | = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
12008 | } |
12009 | else |
12010 | { |
12011 | if (INTVAL (op1) < -16*1024*1024 |
12012 | || INTVAL (op1) >= 16*1024*1024) |
12013 | { |
12014 | if (!x86_64_immediate_operand (op1, Pmode)) |
12015 | op1 = force_reg (Pmode, op1); |
12016 | |
12017 | new_rtx |
12018 | = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
12019 | } |
12020 | } |
12021 | } |
12022 | else |
12023 | { |
12024 | rtx base = legitimize_pic_address (orig: op0, reg); |
12025 | machine_mode mode = GET_MODE (base); |
12026 | new_rtx |
12027 | = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg); |
12028 | |
12029 | if (CONST_INT_P (new_rtx)) |
12030 | { |
12031 | if (INTVAL (new_rtx) < -16*1024*1024 |
12032 | || INTVAL (new_rtx) >= 16*1024*1024) |
12033 | { |
12034 | if (!x86_64_immediate_operand (new_rtx, mode)) |
12035 | new_rtx = force_reg (mode, new_rtx); |
12036 | |
12037 | new_rtx |
12038 | = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); |
12039 | } |
12040 | else |
12041 | new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); |
12042 | } |
12043 | else |
12044 | { |
12045 | /* For %rip addressing, we have to use |
12046 | just disp32, not base nor index. */ |
12047 | if (TARGET_64BIT |
12048 | && (GET_CODE (base) == SYMBOL_REF |
12049 | || GET_CODE (base) == LABEL_REF)) |
12050 | base = force_reg (mode, base); |
12051 | if (GET_CODE (new_rtx) == PLUS |
12052 | && CONSTANT_P (XEXP (new_rtx, 1))) |
12053 | { |
12054 | base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); |
12055 | new_rtx = XEXP (new_rtx, 1); |
12056 | } |
12057 | new_rtx = gen_rtx_PLUS (mode, base, new_rtx); |
12058 | } |
12059 | } |
12060 | } |
12061 | } |
12062 | return new_rtx; |
12063 | } |
12064 | |
12065 | /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
12066 | |
12067 | static rtx |
12068 | get_thread_pointer (machine_mode tp_mode, bool to_reg) |
12069 | { |
12070 | rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
12071 | |
12072 | if (GET_MODE (tp) != tp_mode) |
12073 | { |
12074 | gcc_assert (GET_MODE (tp) == SImode); |
12075 | gcc_assert (tp_mode == DImode); |
12076 | |
12077 | tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); |
12078 | } |
12079 | |
12080 | if (to_reg) |
12081 | tp = copy_to_mode_reg (tp_mode, tp); |
12082 | |
12083 | return tp; |
12084 | } |
12085 | |
12086 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
12087 | |
12088 | static GTY(()) rtx ix86_tls_symbol; |
12089 | |
12090 | static rtx |
12091 | ix86_tls_get_addr (void) |
12092 | { |
12093 | if (!ix86_tls_symbol) |
12094 | { |
12095 | const char *sym |
12096 | = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) |
12097 | ? "___tls_get_addr" : "__tls_get_addr" ); |
12098 | |
12099 | ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); |
12100 | } |
12101 | |
12102 | if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) |
12103 | { |
12104 | rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), |
12105 | UNSPEC_PLTOFF); |
12106 | return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, |
12107 | gen_rtx_CONST (Pmode, unspec)); |
12108 | } |
12109 | |
12110 | return ix86_tls_symbol; |
12111 | } |
12112 | |
12113 | /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
12114 | |
12115 | static GTY(()) rtx ix86_tls_module_base_symbol; |
12116 | |
12117 | rtx |
12118 | ix86_tls_module_base (void) |
12119 | { |
12120 | if (!ix86_tls_module_base_symbol) |
12121 | { |
12122 | ix86_tls_module_base_symbol |
12123 | = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_" ); |
12124 | |
12125 | SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
12126 | |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
12127 | } |
12128 | |
12129 | return ix86_tls_module_base_symbol; |
12130 | } |
12131 | |
12132 | /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
12133 | false if we expect this to be used for a memory address and true if |
12134 | we expect to load the address into a register. */ |
12135 | |
12136 | rtx |
12137 | legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) |
12138 | { |
12139 | rtx dest, base, off; |
12140 | rtx pic = NULL_RTX, tp = NULL_RTX; |
12141 | machine_mode tp_mode = Pmode; |
12142 | int type; |
12143 | |
12144 | /* Fall back to global dynamic model if tool chain cannot support local |
12145 | dynamic. */ |
12146 | if (TARGET_SUN_TLS && !TARGET_64BIT |
12147 | && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM |
12148 | && model == TLS_MODEL_LOCAL_DYNAMIC) |
12149 | model = TLS_MODEL_GLOBAL_DYNAMIC; |
12150 | |
12151 | switch (model) |
12152 | { |
12153 | case TLS_MODEL_GLOBAL_DYNAMIC: |
12154 | if (!TARGET_64BIT) |
12155 | { |
12156 | if (flag_pic && !TARGET_PECOFF) |
12157 | pic = pic_offset_table_rtx; |
12158 | else |
12159 | { |
12160 | pic = gen_reg_rtx (Pmode); |
12161 | emit_insn (gen_set_got (pic)); |
12162 | } |
12163 | } |
12164 | |
12165 | if (TARGET_GNU2_TLS) |
12166 | { |
12167 | dest = gen_reg_rtx (ptr_mode); |
12168 | if (TARGET_64BIT) |
12169 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x)); |
12170 | else |
12171 | emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); |
12172 | |
12173 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12174 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12175 | if (GET_MODE (dest) != Pmode) |
12176 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12177 | dest = force_reg (Pmode, dest); |
12178 | |
12179 | if (GET_MODE (x) != Pmode) |
12180 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12181 | |
12182 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12183 | } |
12184 | else |
12185 | { |
12186 | rtx caddr = ix86_tls_get_addr (); |
12187 | |
12188 | dest = gen_reg_rtx (Pmode); |
12189 | if (TARGET_64BIT) |
12190 | { |
12191 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12192 | rtx_insn *insns; |
12193 | |
12194 | start_sequence (); |
12195 | emit_call_insn |
12196 | (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr)); |
12197 | insns = get_insns (); |
12198 | end_sequence (); |
12199 | |
12200 | if (GET_MODE (x) != Pmode) |
12201 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12202 | |
12203 | RTL_CONST_CALL_P (insns) = 1; |
12204 | emit_libcall_block (insns, dest, rax, x); |
12205 | } |
12206 | else |
12207 | emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); |
12208 | } |
12209 | break; |
12210 | |
12211 | case TLS_MODEL_LOCAL_DYNAMIC: |
12212 | if (!TARGET_64BIT) |
12213 | { |
12214 | if (flag_pic) |
12215 | pic = pic_offset_table_rtx; |
12216 | else |
12217 | { |
12218 | pic = gen_reg_rtx (Pmode); |
12219 | emit_insn (gen_set_got (pic)); |
12220 | } |
12221 | } |
12222 | |
12223 | if (TARGET_GNU2_TLS) |
12224 | { |
12225 | rtx tmp = ix86_tls_module_base (); |
12226 | |
12227 | base = gen_reg_rtx (ptr_mode); |
12228 | if (TARGET_64BIT) |
12229 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp)); |
12230 | else |
12231 | emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); |
12232 | |
12233 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12234 | if (GET_MODE (base) != Pmode) |
12235 | base = gen_rtx_ZERO_EXTEND (Pmode, base); |
12236 | base = force_reg (Pmode, base); |
12237 | } |
12238 | else |
12239 | { |
12240 | rtx caddr = ix86_tls_get_addr (); |
12241 | |
12242 | base = gen_reg_rtx (Pmode); |
12243 | if (TARGET_64BIT) |
12244 | { |
12245 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12246 | rtx_insn *insns; |
12247 | rtx eqv; |
12248 | |
12249 | start_sequence (); |
12250 | emit_call_insn |
12251 | (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr)); |
12252 | insns = get_insns (); |
12253 | end_sequence (); |
12254 | |
12255 | /* Attach a unique REG_EQUAL, to allow the RTL optimizers to |
12256 | share the LD_BASE result with other LD model accesses. */ |
12257 | eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
12258 | UNSPEC_TLS_LD_BASE); |
12259 | |
12260 | RTL_CONST_CALL_P (insns) = 1; |
12261 | emit_libcall_block (insns, base, rax, eqv); |
12262 | } |
12263 | else |
12264 | emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); |
12265 | } |
12266 | |
12267 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
12268 | off = gen_rtx_CONST (Pmode, off); |
12269 | |
12270 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
12271 | |
12272 | if (TARGET_GNU2_TLS) |
12273 | { |
12274 | if (GET_MODE (tp) != Pmode) |
12275 | { |
12276 | dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode); |
12277 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12278 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12279 | } |
12280 | else |
12281 | dest = gen_rtx_PLUS (Pmode, tp, dest); |
12282 | dest = force_reg (Pmode, dest); |
12283 | |
12284 | if (GET_MODE (x) != Pmode) |
12285 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12286 | |
12287 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12288 | } |
12289 | break; |
12290 | |
12291 | case TLS_MODEL_INITIAL_EXEC: |
12292 | if (TARGET_64BIT) |
12293 | { |
12294 | if (TARGET_SUN_TLS && !TARGET_X32) |
12295 | { |
12296 | /* The Sun linker took the AMD64 TLS spec literally |
12297 | and can only handle %rax as destination of the |
12298 | initial executable code sequence. */ |
12299 | |
12300 | dest = gen_reg_rtx (DImode); |
12301 | emit_insn (gen_tls_initial_exec_64_sun (a: dest, b: x)); |
12302 | return dest; |
12303 | } |
12304 | |
12305 | /* Generate DImode references to avoid %fs:(%reg32) |
12306 | problems and linker IE->LE relaxation bug. */ |
12307 | tp_mode = DImode; |
12308 | pic = NULL; |
12309 | type = UNSPEC_GOTNTPOFF; |
12310 | } |
12311 | else if (flag_pic) |
12312 | { |
12313 | pic = pic_offset_table_rtx; |
12314 | type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
12315 | } |
12316 | else if (!TARGET_ANY_GNU_TLS) |
12317 | { |
12318 | pic = gen_reg_rtx (Pmode); |
12319 | emit_insn (gen_set_got (pic)); |
12320 | type = UNSPEC_GOTTPOFF; |
12321 | } |
12322 | else |
12323 | { |
12324 | pic = NULL; |
12325 | type = UNSPEC_INDNTPOFF; |
12326 | } |
12327 | |
12328 | off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); |
12329 | off = gen_rtx_CONST (tp_mode, off); |
12330 | if (pic) |
12331 | off = gen_rtx_PLUS (tp_mode, pic, off); |
12332 | off = gen_const_mem (tp_mode, off); |
12333 | set_mem_alias_set (off, ix86_GOT_alias_set ()); |
12334 | |
12335 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12336 | { |
12337 | base = get_thread_pointer (tp_mode, |
12338 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12339 | off = force_reg (tp_mode, off); |
12340 | dest = gen_rtx_PLUS (tp_mode, base, off); |
12341 | if (tp_mode != Pmode) |
12342 | dest = convert_to_mode (Pmode, dest, 1); |
12343 | } |
12344 | else |
12345 | { |
12346 | base = get_thread_pointer (Pmode, to_reg: true); |
12347 | dest = gen_reg_rtx (Pmode); |
12348 | emit_insn (gen_sub3_insn (dest, base, off)); |
12349 | } |
12350 | break; |
12351 | |
12352 | case TLS_MODEL_LOCAL_EXEC: |
12353 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
12354 | (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12355 | ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
12356 | off = gen_rtx_CONST (Pmode, off); |
12357 | |
12358 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12359 | { |
12360 | base = get_thread_pointer (Pmode, |
12361 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12362 | return gen_rtx_PLUS (Pmode, base, off); |
12363 | } |
12364 | else |
12365 | { |
12366 | base = get_thread_pointer (Pmode, to_reg: true); |
12367 | dest = gen_reg_rtx (Pmode); |
12368 | emit_insn (gen_sub3_insn (dest, base, off)); |
12369 | } |
12370 | break; |
12371 | |
12372 | default: |
12373 | gcc_unreachable (); |
12374 | } |
12375 | |
12376 | return dest; |
12377 | } |
12378 | |
12379 | /* Return true if the TLS address requires insn using integer registers. |
12380 | It's used to prevent KMOV/VMOV in TLS code sequences which require integer |
12381 | MOV instructions, refer to PR103275. */ |
12382 | bool |
12383 | ix86_gpr_tls_address_pattern_p (rtx mem) |
12384 | { |
12385 | gcc_assert (MEM_P (mem)); |
12386 | |
12387 | rtx addr = XEXP (mem, 0); |
12388 | subrtx_var_iterator::array_type array; |
12389 | FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL) |
12390 | { |
12391 | rtx op = *iter; |
12392 | if (GET_CODE (op) == UNSPEC) |
12393 | switch (XINT (op, 1)) |
12394 | { |
12395 | case UNSPEC_GOTNTPOFF: |
12396 | return true; |
12397 | case UNSPEC_TPOFF: |
12398 | if (!TARGET_64BIT) |
12399 | return true; |
12400 | break; |
12401 | default: |
12402 | break; |
12403 | } |
12404 | } |
12405 | |
12406 | return false; |
12407 | } |
12408 | |
12409 | /* Return true if OP refers to a TLS address. */ |
12410 | bool |
12411 | ix86_tls_address_pattern_p (rtx op) |
12412 | { |
12413 | subrtx_var_iterator::array_type array; |
12414 | FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) |
12415 | { |
12416 | rtx op = *iter; |
12417 | if (MEM_P (op)) |
12418 | { |
12419 | rtx *x = &XEXP (op, 0); |
12420 | while (GET_CODE (*x) == PLUS) |
12421 | { |
12422 | int i; |
12423 | for (i = 0; i < 2; i++) |
12424 | { |
12425 | rtx u = XEXP (*x, i); |
12426 | if (GET_CODE (u) == ZERO_EXTEND) |
12427 | u = XEXP (u, 0); |
12428 | if (GET_CODE (u) == UNSPEC |
12429 | && XINT (u, 1) == UNSPEC_TP) |
12430 | return true; |
12431 | } |
12432 | x = &XEXP (*x, 0); |
12433 | } |
12434 | |
12435 | iter.skip_subrtxes (); |
12436 | } |
12437 | } |
12438 | |
12439 | return false; |
12440 | } |
12441 | |
12442 | /* Rewrite *LOC so that it refers to a default TLS address space. */ |
12443 | void |
12444 | ix86_rewrite_tls_address_1 (rtx *loc) |
12445 | { |
12446 | subrtx_ptr_iterator::array_type array; |
12447 | FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
12448 | { |
12449 | rtx *loc = *iter; |
12450 | if (MEM_P (*loc)) |
12451 | { |
12452 | rtx addr = XEXP (*loc, 0); |
12453 | rtx *x = &addr; |
12454 | while (GET_CODE (*x) == PLUS) |
12455 | { |
12456 | int i; |
12457 | for (i = 0; i < 2; i++) |
12458 | { |
12459 | rtx u = XEXP (*x, i); |
12460 | if (GET_CODE (u) == ZERO_EXTEND) |
12461 | u = XEXP (u, 0); |
12462 | if (GET_CODE (u) == UNSPEC |
12463 | && XINT (u, 1) == UNSPEC_TP) |
12464 | { |
12465 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
12466 | |
12467 | *x = XEXP (*x, 1 - i); |
12468 | |
12469 | *loc = replace_equiv_address_nv (*loc, addr, true); |
12470 | set_mem_addr_space (*loc, as); |
12471 | return; |
12472 | } |
12473 | } |
12474 | x = &XEXP (*x, 0); |
12475 | } |
12476 | |
12477 | iter.skip_subrtxes (); |
12478 | } |
12479 | } |
12480 | } |
12481 | |
12482 | /* Rewrite instruction pattern involvning TLS address |
12483 | so that it refers to a default TLS address space. */ |
12484 | rtx |
12485 | ix86_rewrite_tls_address (rtx pattern) |
12486 | { |
12487 | pattern = copy_insn (pattern); |
12488 | ix86_rewrite_tls_address_1 (loc: &pattern); |
12489 | return pattern; |
12490 | } |
12491 | |
12492 | /* Create or return the unique __imp_DECL dllimport symbol corresponding |
12493 | to symbol DECL if BEIMPORT is true. Otherwise create or return the |
12494 | unique refptr-DECL symbol corresponding to symbol DECL. */ |
12495 | |
12496 | struct dllimport_hasher : ggc_cache_ptr_hash<tree_map> |
12497 | { |
12498 | static inline hashval_t hash (tree_map *m) { return m->hash; } |
12499 | static inline bool |
12500 | equal (tree_map *a, tree_map *b) |
12501 | { |
12502 | return a->base.from == b->base.from; |
12503 | } |
12504 | |
12505 | static int |
12506 | keep_cache_entry (tree_map *&m) |
12507 | { |
12508 | return ggc_marked_p (m->base.from); |
12509 | } |
12510 | }; |
12511 | |
12512 | static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map; |
12513 | |
12514 | static tree |
12515 | get_dllimport_decl (tree decl, bool beimport) |
12516 | { |
12517 | struct tree_map *h, in; |
12518 | const char *name; |
12519 | const char *prefix; |
12520 | size_t namelen, prefixlen; |
12521 | char *imp_name; |
12522 | tree to; |
12523 | rtx rtl; |
12524 | |
12525 | if (!dllimport_map) |
12526 | dllimport_map = hash_table<dllimport_hasher>::create_ggc (n: 512); |
12527 | |
12528 | in.hash = htab_hash_pointer (decl); |
12529 | in.base.from = decl; |
12530 | tree_map **loc = dllimport_map->find_slot_with_hash (comparable: &in, hash: in.hash, insert: INSERT); |
12531 | h = *loc; |
12532 | if (h) |
12533 | return h->to; |
12534 | |
12535 | *loc = h = ggc_alloc<tree_map> (); |
12536 | h->hash = in.hash; |
12537 | h->base.from = decl; |
12538 | h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), |
12539 | VAR_DECL, NULL, ptr_type_node); |
12540 | DECL_ARTIFICIAL (to) = 1; |
12541 | DECL_IGNORED_P (to) = 1; |
12542 | DECL_EXTERNAL (to) = 1; |
12543 | TREE_READONLY (to) = 1; |
12544 | |
12545 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
12546 | name = targetm.strip_name_encoding (name); |
12547 | if (beimport) |
12548 | prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 |
12549 | ? "*__imp_" : "*__imp__" ; |
12550 | else |
12551 | prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr." ; |
12552 | namelen = strlen (s: name); |
12553 | prefixlen = strlen (s: prefix); |
12554 | imp_name = (char *) alloca (namelen + prefixlen + 1); |
12555 | memcpy (dest: imp_name, src: prefix, n: prefixlen); |
12556 | memcpy (dest: imp_name + prefixlen, src: name, n: namelen + 1); |
12557 | |
12558 | name = ggc_alloc_string (contents: imp_name, length: namelen + prefixlen); |
12559 | rtl = gen_rtx_SYMBOL_REF (Pmode, name); |
12560 | SET_SYMBOL_REF_DECL (rtl, to); |
12561 | SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; |
12562 | if (!beimport) |
12563 | { |
12564 | SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; |
12565 | #ifdef SUB_TARGET_RECORD_STUB |
12566 | SUB_TARGET_RECORD_STUB (name); |
12567 | #endif |
12568 | } |
12569 | |
12570 | rtl = gen_const_mem (Pmode, rtl); |
12571 | set_mem_alias_set (rtl, ix86_GOT_alias_set ()); |
12572 | |
12573 | SET_DECL_RTL (to, rtl); |
12574 | SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); |
12575 | |
12576 | return to; |
12577 | } |
12578 | |
12579 | /* Expand SYMBOL into its corresponding far-address symbol. |
12580 | WANT_REG is true if we require the result be a register. */ |
12581 | |
12582 | static rtx |
12583 | legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) |
12584 | { |
12585 | tree imp_decl; |
12586 | rtx x; |
12587 | |
12588 | gcc_assert (SYMBOL_REF_DECL (symbol)); |
12589 | imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: false); |
12590 | |
12591 | x = DECL_RTL (imp_decl); |
12592 | if (want_reg) |
12593 | x = force_reg (Pmode, x); |
12594 | return x; |
12595 | } |
12596 | |
12597 | /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is |
12598 | true if we require the result be a register. */ |
12599 | |
12600 | static rtx |
12601 | legitimize_dllimport_symbol (rtx symbol, bool want_reg) |
12602 | { |
12603 | tree imp_decl; |
12604 | rtx x; |
12605 | |
12606 | gcc_assert (SYMBOL_REF_DECL (symbol)); |
12607 | imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: true); |
12608 | |
12609 | x = DECL_RTL (imp_decl); |
12610 | if (want_reg) |
12611 | x = force_reg (Pmode, x); |
12612 | return x; |
12613 | } |
12614 | |
12615 | /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG |
12616 | is true if we require the result be a register. */ |
12617 | |
12618 | rtx |
12619 | legitimize_pe_coff_symbol (rtx addr, bool inreg) |
12620 | { |
12621 | if (!TARGET_PECOFF) |
12622 | return NULL_RTX; |
12623 | |
12624 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12625 | { |
12626 | if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) |
12627 | return legitimize_dllimport_symbol (symbol: addr, want_reg: inreg); |
12628 | if (GET_CODE (addr) == CONST |
12629 | && GET_CODE (XEXP (addr, 0)) == PLUS |
12630 | && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
12631 | && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) |
12632 | { |
12633 | rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), want_reg: inreg); |
12634 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
12635 | } |
12636 | } |
12637 | |
12638 | if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) |
12639 | return NULL_RTX; |
12640 | if (GET_CODE (addr) == SYMBOL_REF |
12641 | && !is_imported_p (x: addr) |
12642 | && SYMBOL_REF_EXTERNAL_P (addr) |
12643 | && SYMBOL_REF_DECL (addr)) |
12644 | return legitimize_pe_coff_extern_decl (symbol: addr, want_reg: inreg); |
12645 | |
12646 | if (GET_CODE (addr) == CONST |
12647 | && GET_CODE (XEXP (addr, 0)) == PLUS |
12648 | && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
12649 | && !is_imported_p (XEXP (XEXP (addr, 0), 0)) |
12650 | && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) |
12651 | && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) |
12652 | { |
12653 | rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), want_reg: inreg); |
12654 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
12655 | } |
12656 | return NULL_RTX; |
12657 | } |
12658 | |
12659 | /* Try machine-dependent ways of modifying an illegitimate address |
12660 | to be legitimate. If we find one, return the new, valid address. |
12661 | This macro is used in only one place: `memory_address' in explow.cc. |
12662 | |
12663 | OLDX is the address as it was before break_out_memory_refs was called. |
12664 | In some cases it is useful to look at this to decide what needs to be done. |
12665 | |
12666 | It is always safe for this macro to do nothing. It exists to recognize |
12667 | opportunities to optimize the output. |
12668 | |
12669 | For the 80386, we handle X+REG by loading X into a register R and |
12670 | using R+REG. R will go in a general reg and indexing will be used. |
12671 | However, if REG is a broken-out memory address or multiplication, |
12672 | nothing needs to be done because REG can certainly go in a general reg. |
12673 | |
12674 | When -fpic is used, special handling is needed for symbolic references. |
12675 | See comments by legitimize_pic_address in i386.cc for details. */ |
12676 | |
12677 | static rtx |
12678 | ix86_legitimize_address (rtx x, rtx, machine_mode mode) |
12679 | { |
12680 | bool changed = false; |
12681 | unsigned log; |
12682 | |
12683 | log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; |
12684 | if (log) |
12685 | return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false); |
12686 | if (GET_CODE (x) == CONST |
12687 | && GET_CODE (XEXP (x, 0)) == PLUS |
12688 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF |
12689 | && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
12690 | { |
12691 | rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), |
12692 | model: (enum tls_model) log, for_mov: false); |
12693 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
12694 | } |
12695 | |
12696 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12697 | { |
12698 | rtx tmp = legitimize_pe_coff_symbol (addr: x, inreg: true); |
12699 | if (tmp) |
12700 | return tmp; |
12701 | } |
12702 | |
12703 | if (flag_pic && SYMBOLIC_CONST (x)) |
12704 | return legitimize_pic_address (orig: x, reg: 0); |
12705 | |
12706 | #if TARGET_MACHO |
12707 | if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
12708 | return machopic_indirect_data_reference (x, 0); |
12709 | #endif |
12710 | |
12711 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
12712 | if (GET_CODE (x) == ASHIFT |
12713 | && CONST_INT_P (XEXP (x, 1)) |
12714 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
12715 | { |
12716 | changed = true; |
12717 | log = INTVAL (XEXP (x, 1)); |
12718 | x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
12719 | GEN_INT (1 << log)); |
12720 | } |
12721 | |
12722 | if (GET_CODE (x) == PLUS) |
12723 | { |
12724 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
12725 | |
12726 | if (GET_CODE (XEXP (x, 0)) == ASHIFT |
12727 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
12728 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
12729 | { |
12730 | changed = true; |
12731 | log = INTVAL (XEXP (XEXP (x, 0), 1)); |
12732 | XEXP (x, 0) = gen_rtx_MULT (Pmode, |
12733 | force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
12734 | GEN_INT (1 << log)); |
12735 | } |
12736 | |
12737 | if (GET_CODE (XEXP (x, 1)) == ASHIFT |
12738 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
12739 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
12740 | { |
12741 | changed = true; |
12742 | log = INTVAL (XEXP (XEXP (x, 1), 1)); |
12743 | XEXP (x, 1) = gen_rtx_MULT (Pmode, |
12744 | force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
12745 | GEN_INT (1 << log)); |
12746 | } |
12747 | |
12748 | /* Put multiply first if it isn't already. */ |
12749 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12750 | { |
12751 | std::swap (XEXP (x, 0), XEXP (x, 1)); |
12752 | changed = true; |
12753 | } |
12754 | |
12755 | /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
12756 | into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
12757 | created by virtual register instantiation, register elimination, and |
12758 | similar optimizations. */ |
12759 | if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
12760 | { |
12761 | changed = true; |
12762 | x = gen_rtx_PLUS (Pmode, |
12763 | gen_rtx_PLUS (Pmode, XEXP (x, 0), |
12764 | XEXP (XEXP (x, 1), 0)), |
12765 | XEXP (XEXP (x, 1), 1)); |
12766 | } |
12767 | |
12768 | /* Canonicalize |
12769 | (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
12770 | into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
12771 | else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
12772 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
12773 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
12774 | && CONSTANT_P (XEXP (x, 1))) |
12775 | { |
12776 | rtx constant; |
12777 | rtx other = NULL_RTX; |
12778 | |
12779 | if (CONST_INT_P (XEXP (x, 1))) |
12780 | { |
12781 | constant = XEXP (x, 1); |
12782 | other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12783 | } |
12784 | else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) |
12785 | { |
12786 | constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12787 | other = XEXP (x, 1); |
12788 | } |
12789 | else |
12790 | constant = 0; |
12791 | |
12792 | if (constant) |
12793 | { |
12794 | changed = true; |
12795 | x = gen_rtx_PLUS (Pmode, |
12796 | gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
12797 | XEXP (XEXP (XEXP (x, 0), 1), 0)), |
12798 | plus_constant (Pmode, other, |
12799 | INTVAL (constant))); |
12800 | } |
12801 | } |
12802 | |
12803 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12804 | return x; |
12805 | |
12806 | if (GET_CODE (XEXP (x, 0)) == MULT) |
12807 | { |
12808 | changed = true; |
12809 | XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); |
12810 | } |
12811 | |
12812 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12813 | { |
12814 | changed = true; |
12815 | XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); |
12816 | } |
12817 | |
12818 | if (changed |
12819 | && REG_P (XEXP (x, 1)) |
12820 | && REG_P (XEXP (x, 0))) |
12821 | return x; |
12822 | |
12823 | if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
12824 | { |
12825 | changed = true; |
12826 | x = legitimize_pic_address (orig: x, reg: 0); |
12827 | } |
12828 | |
12829 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12830 | return x; |
12831 | |
12832 | if (REG_P (XEXP (x, 0))) |
12833 | { |
12834 | rtx temp = gen_reg_rtx (Pmode); |
12835 | rtx val = force_operand (XEXP (x, 1), temp); |
12836 | if (val != temp) |
12837 | { |
12838 | val = convert_to_mode (Pmode, val, 1); |
12839 | emit_move_insn (temp, val); |
12840 | } |
12841 | |
12842 | XEXP (x, 1) = temp; |
12843 | return x; |
12844 | } |
12845 | |
12846 | else if (REG_P (XEXP (x, 1))) |
12847 | { |
12848 | rtx temp = gen_reg_rtx (Pmode); |
12849 | rtx val = force_operand (XEXP (x, 0), temp); |
12850 | if (val != temp) |
12851 | { |
12852 | val = convert_to_mode (Pmode, val, 1); |
12853 | emit_move_insn (temp, val); |
12854 | } |
12855 | |
12856 | XEXP (x, 0) = temp; |
12857 | return x; |
12858 | } |
12859 | } |
12860 | |
12861 | return x; |
12862 | } |
12863 | |
12864 | /* Print an integer constant expression in assembler syntax. Addition |
12865 | and subtraction are the only arithmetic that may appear in these |
12866 | expressions. FILE is the stdio stream to write to, X is the rtx, and |
12867 | CODE is the operand print code from the output string. */ |
12868 | |
12869 | static void |
12870 | output_pic_addr_const (FILE *file, rtx x, int code) |
12871 | { |
12872 | char buf[256]; |
12873 | |
12874 | switch (GET_CODE (x)) |
12875 | { |
12876 | case PC: |
12877 | gcc_assert (flag_pic); |
12878 | putc (c: '.', stream: file); |
12879 | break; |
12880 | |
12881 | case SYMBOL_REF: |
12882 | if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) |
12883 | output_addr_const (file, x); |
12884 | else |
12885 | { |
12886 | const char *name = XSTR (x, 0); |
12887 | |
12888 | /* Mark the decl as referenced so that cgraph will |
12889 | output the function. */ |
12890 | if (SYMBOL_REF_DECL (x)) |
12891 | mark_decl_referenced (SYMBOL_REF_DECL (x)); |
12892 | |
12893 | #if TARGET_MACHO |
12894 | if (MACHOPIC_INDIRECT |
12895 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
12896 | name = machopic_indirection_name (x, /*stub_p=*/true); |
12897 | #endif |
12898 | assemble_name (file, name); |
12899 | } |
12900 | if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) |
12901 | && code == 'P' && ix86_call_use_plt_p (x)) |
12902 | fputs (s: "@PLT" , stream: file); |
12903 | break; |
12904 | |
12905 | case LABEL_REF: |
12906 | x = XEXP (x, 0); |
12907 | /* FALLTHRU */ |
12908 | case CODE_LABEL: |
12909 | ASM_GENERATE_INTERNAL_LABEL (buf, "L" , CODE_LABEL_NUMBER (x)); |
12910 | assemble_name (asm_out_file, buf); |
12911 | break; |
12912 | |
12913 | CASE_CONST_SCALAR_INT: |
12914 | output_addr_const (file, x); |
12915 | break; |
12916 | |
12917 | case CONST: |
12918 | /* This used to output parentheses around the expression, |
12919 | but that does not work on the 386 (either ATT or BSD assembler). */ |
12920 | output_pic_addr_const (file, XEXP (x, 0), code); |
12921 | break; |
12922 | |
12923 | case CONST_DOUBLE: |
12924 | /* We can't handle floating point constants; |
12925 | TARGET_PRINT_OPERAND must handle them. */ |
12926 | output_operand_lossage ("floating constant misused" ); |
12927 | break; |
12928 | |
12929 | case PLUS: |
12930 | /* Some assemblers need integer constants to appear first. */ |
12931 | if (CONST_INT_P (XEXP (x, 0))) |
12932 | { |
12933 | output_pic_addr_const (file, XEXP (x, 0), code); |
12934 | putc (c: '+', stream: file); |
12935 | output_pic_addr_const (file, XEXP (x, 1), code); |
12936 | } |
12937 | else |
12938 | { |
12939 | gcc_assert (CONST_INT_P (XEXP (x, 1))); |
12940 | output_pic_addr_const (file, XEXP (x, 1), code); |
12941 | putc (c: '+', stream: file); |
12942 | output_pic_addr_const (file, XEXP (x, 0), code); |
12943 | } |
12944 | break; |
12945 | |
12946 | case MINUS: |
12947 | if (!TARGET_MACHO) |
12948 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file); |
12949 | output_pic_addr_const (file, XEXP (x, 0), code); |
12950 | putc (c: '-', stream: file); |
12951 | output_pic_addr_const (file, XEXP (x, 1), code); |
12952 | if (!TARGET_MACHO) |
12953 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file); |
12954 | break; |
12955 | |
12956 | case UNSPEC: |
12957 | gcc_assert (XVECLEN (x, 0) == 1); |
12958 | output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
12959 | switch (XINT (x, 1)) |
12960 | { |
12961 | case UNSPEC_GOT: |
12962 | fputs (s: "@GOT" , stream: file); |
12963 | break; |
12964 | case UNSPEC_GOTOFF: |
12965 | fputs (s: "@GOTOFF" , stream: file); |
12966 | break; |
12967 | case UNSPEC_PLTOFF: |
12968 | fputs (s: "@PLTOFF" , stream: file); |
12969 | break; |
12970 | case UNSPEC_PCREL: |
12971 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12972 | "(%rip)" : "[rip]" , stream: file); |
12973 | break; |
12974 | case UNSPEC_GOTPCREL: |
12975 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12976 | "@GOTPCREL(%rip)" : "@GOTPCREL[rip]" , stream: file); |
12977 | break; |
12978 | case UNSPEC_GOTTPOFF: |
12979 | /* FIXME: This might be @TPOFF in Sun ld too. */ |
12980 | fputs (s: "@gottpoff" , stream: file); |
12981 | break; |
12982 | case UNSPEC_TPOFF: |
12983 | fputs (s: "@tpoff" , stream: file); |
12984 | break; |
12985 | case UNSPEC_NTPOFF: |
12986 | if (TARGET_64BIT) |
12987 | fputs (s: "@tpoff" , stream: file); |
12988 | else |
12989 | fputs (s: "@ntpoff" , stream: file); |
12990 | break; |
12991 | case UNSPEC_DTPOFF: |
12992 | fputs (s: "@dtpoff" , stream: file); |
12993 | break; |
12994 | case UNSPEC_GOTNTPOFF: |
12995 | if (TARGET_64BIT) |
12996 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12997 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
12998 | else |
12999 | fputs (s: "@gotntpoff" , stream: file); |
13000 | break; |
13001 | case UNSPEC_INDNTPOFF: |
13002 | fputs (s: "@indntpoff" , stream: file); |
13003 | break; |
13004 | #if TARGET_MACHO |
13005 | case UNSPEC_MACHOPIC_OFFSET: |
13006 | putc ('-', file); |
13007 | machopic_output_function_base_name (file); |
13008 | break; |
13009 | #endif |
13010 | default: |
13011 | output_operand_lossage ("invalid UNSPEC as operand" ); |
13012 | break; |
13013 | } |
13014 | break; |
13015 | |
13016 | default: |
13017 | output_operand_lossage ("invalid expression as operand" ); |
13018 | } |
13019 | } |
13020 | |
13021 | /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
13022 | We need to emit DTP-relative relocations. */ |
13023 | |
13024 | static void ATTRIBUTE_UNUSED |
13025 | i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
13026 | { |
13027 | fputs (ASM_LONG, stream: file); |
13028 | output_addr_const (file, x); |
13029 | fputs (s: "@dtpoff" , stream: file); |
13030 | switch (size) |
13031 | { |
13032 | case 4: |
13033 | break; |
13034 | case 8: |
13035 | fputs (s: ", 0" , stream: file); |
13036 | break; |
13037 | default: |
13038 | gcc_unreachable (); |
13039 | } |
13040 | } |
13041 | |
13042 | /* Return true if X is a representation of the PIC register. This copes |
13043 | with calls from ix86_find_base_term, where the register might have |
13044 | been replaced by a cselib value. */ |
13045 | |
13046 | static bool |
13047 | ix86_pic_register_p (rtx x) |
13048 | { |
13049 | if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) |
13050 | return (pic_offset_table_rtx |
13051 | && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); |
13052 | else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) |
13053 | return true; |
13054 | else if (!REG_P (x)) |
13055 | return false; |
13056 | else if (pic_offset_table_rtx) |
13057 | { |
13058 | if (REGNO (x) == REGNO (pic_offset_table_rtx)) |
13059 | return true; |
13060 | if (HARD_REGISTER_P (x) |
13061 | && !HARD_REGISTER_P (pic_offset_table_rtx) |
13062 | && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) |
13063 | return true; |
13064 | return false; |
13065 | } |
13066 | else |
13067 | return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; |
13068 | } |
13069 | |
13070 | /* Helper function for ix86_delegitimize_address. |
13071 | Attempt to delegitimize TLS local-exec accesses. */ |
13072 | |
13073 | static rtx |
13074 | ix86_delegitimize_tls_address (rtx orig_x) |
13075 | { |
13076 | rtx x = orig_x, unspec; |
13077 | struct ix86_address addr; |
13078 | |
13079 | if (!TARGET_TLS_DIRECT_SEG_REFS) |
13080 | return orig_x; |
13081 | if (MEM_P (x)) |
13082 | x = XEXP (x, 0); |
13083 | if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) |
13084 | return orig_x; |
13085 | if (ix86_decompose_address (addr: x, out: &addr) == 0 |
13086 | || addr.seg != DEFAULT_TLS_SEG_REG |
13087 | || addr.disp == NULL_RTX |
13088 | || GET_CODE (addr.disp) != CONST) |
13089 | return orig_x; |
13090 | unspec = XEXP (addr.disp, 0); |
13091 | if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) |
13092 | unspec = XEXP (unspec, 0); |
13093 | if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) |
13094 | return orig_x; |
13095 | x = XVECEXP (unspec, 0, 0); |
13096 | gcc_assert (GET_CODE (x) == SYMBOL_REF); |
13097 | if (unspec != XEXP (addr.disp, 0)) |
13098 | x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); |
13099 | if (addr.index) |
13100 | { |
13101 | rtx idx = addr.index; |
13102 | if (addr.scale != 1) |
13103 | idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); |
13104 | x = gen_rtx_PLUS (Pmode, idx, x); |
13105 | } |
13106 | if (addr.base) |
13107 | x = gen_rtx_PLUS (Pmode, addr.base, x); |
13108 | if (MEM_P (orig_x)) |
13109 | x = replace_equiv_address_nv (orig_x, x); |
13110 | return x; |
13111 | } |
13112 | |
13113 | /* In the name of slightly smaller debug output, and to cater to |
13114 | general assembler lossage, recognize PIC+GOTOFF and turn it back |
13115 | into a direct symbol reference. |
13116 | |
13117 | On Darwin, this is necessary to avoid a crash, because Darwin |
13118 | has a different PIC label for each routine but the DWARF debugging |
13119 | information is not associated with any particular routine, so it's |
13120 | necessary to remove references to the PIC label from RTL stored by |
13121 | the DWARF output code. |
13122 | |
13123 | This helper is used in the normal ix86_delegitimize_address |
13124 | entrypoint (e.g. used in the target delegitimization hook) and |
13125 | in ix86_find_base_term. As compile time memory optimization, we |
13126 | avoid allocating rtxes that will not change anything on the outcome |
13127 | of the callers (find_base_value and find_base_term). */ |
13128 | |
13129 | static inline rtx |
13130 | ix86_delegitimize_address_1 (rtx x, bool base_term_p) |
13131 | { |
13132 | rtx orig_x = delegitimize_mem_from_attrs (x); |
13133 | /* addend is NULL or some rtx if x is something+GOTOFF where |
13134 | something doesn't include the PIC register. */ |
13135 | rtx addend = NULL_RTX; |
13136 | /* reg_addend is NULL or a multiple of some register. */ |
13137 | rtx reg_addend = NULL_RTX; |
13138 | /* const_addend is NULL or a const_int. */ |
13139 | rtx const_addend = NULL_RTX; |
13140 | /* This is the result, or NULL. */ |
13141 | rtx result = NULL_RTX; |
13142 | |
13143 | x = orig_x; |
13144 | |
13145 | if (MEM_P (x)) |
13146 | x = XEXP (x, 0); |
13147 | |
13148 | if (TARGET_64BIT) |
13149 | { |
13150 | if (GET_CODE (x) == CONST |
13151 | && GET_CODE (XEXP (x, 0)) == PLUS |
13152 | && GET_MODE (XEXP (x, 0)) == Pmode |
13153 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
13154 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC |
13155 | && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) |
13156 | { |
13157 | /* find_base_{value,term} only care about MEMs with arg_pointer_rtx |
13158 | base. A CONST can't be arg_pointer_rtx based. */ |
13159 | if (base_term_p && MEM_P (orig_x)) |
13160 | return orig_x; |
13161 | rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); |
13162 | x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); |
13163 | if (MEM_P (orig_x)) |
13164 | x = replace_equiv_address_nv (orig_x, x); |
13165 | return x; |
13166 | } |
13167 | |
13168 | if (GET_CODE (x) == CONST |
13169 | && GET_CODE (XEXP (x, 0)) == UNSPEC |
13170 | && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL |
13171 | || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) |
13172 | && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) |
13173 | { |
13174 | x = XVECEXP (XEXP (x, 0), 0, 0); |
13175 | if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) |
13176 | { |
13177 | x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x)); |
13178 | if (x == NULL_RTX) |
13179 | return orig_x; |
13180 | } |
13181 | return x; |
13182 | } |
13183 | |
13184 | if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) |
13185 | return ix86_delegitimize_tls_address (orig_x); |
13186 | |
13187 | /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic |
13188 | and -mcmodel=medium -fpic. */ |
13189 | } |
13190 | |
13191 | if (GET_CODE (x) != PLUS |
13192 | || GET_CODE (XEXP (x, 1)) != CONST) |
13193 | return ix86_delegitimize_tls_address (orig_x); |
13194 | |
13195 | if (ix86_pic_register_p (XEXP (x, 0))) |
13196 | /* %ebx + GOT/GOTOFF */ |
13197 | ; |
13198 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
13199 | { |
13200 | /* %ebx + %reg * scale + GOT/GOTOFF */ |
13201 | reg_addend = XEXP (x, 0); |
13202 | if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
13203 | reg_addend = XEXP (reg_addend, 1); |
13204 | else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
13205 | reg_addend = XEXP (reg_addend, 0); |
13206 | else |
13207 | { |
13208 | reg_addend = NULL_RTX; |
13209 | addend = XEXP (x, 0); |
13210 | } |
13211 | } |
13212 | else |
13213 | addend = XEXP (x, 0); |
13214 | |
13215 | x = XEXP (XEXP (x, 1), 0); |
13216 | if (GET_CODE (x) == PLUS |
13217 | && CONST_INT_P (XEXP (x, 1))) |
13218 | { |
13219 | const_addend = XEXP (x, 1); |
13220 | x = XEXP (x, 0); |
13221 | } |
13222 | |
13223 | if (GET_CODE (x) == UNSPEC |
13224 | && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
13225 | || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) |
13226 | || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC |
13227 | && !MEM_P (orig_x) && !addend))) |
13228 | result = XVECEXP (x, 0, 0); |
13229 | |
13230 | if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x) |
13231 | && !MEM_P (orig_x)) |
13232 | result = XVECEXP (x, 0, 0); |
13233 | |
13234 | if (! result) |
13235 | return ix86_delegitimize_tls_address (orig_x); |
13236 | |
13237 | /* For (PLUS something CONST_INT) both find_base_{value,term} just |
13238 | recurse on the first operand. */ |
13239 | if (const_addend && !base_term_p) |
13240 | result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
13241 | if (reg_addend) |
13242 | result = gen_rtx_PLUS (Pmode, reg_addend, result); |
13243 | if (addend) |
13244 | { |
13245 | /* If the rest of original X doesn't involve the PIC register, add |
13246 | addend and subtract pic_offset_table_rtx. This can happen e.g. |
13247 | for code like: |
13248 | leal (%ebx, %ecx, 4), %ecx |
13249 | ... |
13250 | movl foo@GOTOFF(%ecx), %edx |
13251 | in which case we return (%ecx - %ebx) + foo |
13252 | or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg |
13253 | and reload has completed. Don't do the latter for debug, |
13254 | as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ |
13255 | if (pic_offset_table_rtx |
13256 | && (!reload_completed || !ix86_use_pseudo_pic_reg ())) |
13257 | result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
13258 | pic_offset_table_rtx), |
13259 | result); |
13260 | else if (base_term_p |
13261 | && pic_offset_table_rtx |
13262 | && !TARGET_MACHO |
13263 | && !TARGET_VXWORKS_RTP) |
13264 | { |
13265 | rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
13266 | tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); |
13267 | result = gen_rtx_PLUS (Pmode, tmp, result); |
13268 | } |
13269 | else |
13270 | return orig_x; |
13271 | } |
13272 | if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
13273 | { |
13274 | result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode); |
13275 | if (result == NULL_RTX) |
13276 | return orig_x; |
13277 | } |
13278 | return result; |
13279 | } |
13280 | |
13281 | /* The normal instantiation of the above template. */ |
13282 | |
13283 | static rtx |
13284 | ix86_delegitimize_address (rtx x) |
13285 | { |
13286 | return ix86_delegitimize_address_1 (x, base_term_p: false); |
13287 | } |
13288 | |
13289 | /* If X is a machine specific address (i.e. a symbol or label being |
13290 | referenced as a displacement from the GOT implemented using an |
13291 | UNSPEC), then return the base term. Otherwise return X. */ |
13292 | |
13293 | rtx |
13294 | ix86_find_base_term (rtx x) |
13295 | { |
13296 | rtx term; |
13297 | |
13298 | if (TARGET_64BIT) |
13299 | { |
13300 | if (GET_CODE (x) != CONST) |
13301 | return x; |
13302 | term = XEXP (x, 0); |
13303 | if (GET_CODE (term) == PLUS |
13304 | && CONST_INT_P (XEXP (term, 1))) |
13305 | term = XEXP (term, 0); |
13306 | if (GET_CODE (term) != UNSPEC |
13307 | || (XINT (term, 1) != UNSPEC_GOTPCREL |
13308 | && XINT (term, 1) != UNSPEC_PCREL)) |
13309 | return x; |
13310 | |
13311 | return XVECEXP (term, 0, 0); |
13312 | } |
13313 | |
13314 | return ix86_delegitimize_address_1 (x, base_term_p: true); |
13315 | } |
13316 | |
13317 | /* Return true if X shouldn't be emitted into the debug info. |
13318 | Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ |
13319 | symbol easily into the .debug_info section, so we need not to |
13320 | delegitimize, but instead assemble as @gotoff. |
13321 | Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically |
13322 | assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ |
13323 | |
13324 | static bool |
13325 | ix86_const_not_ok_for_debug_p (rtx x) |
13326 | { |
13327 | if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) |
13328 | return true; |
13329 | |
13330 | if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) |
13331 | return true; |
13332 | |
13333 | return false; |
13334 | } |
13335 | |
13336 | static void |
13337 | put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, |
13338 | bool fp, FILE *file) |
13339 | { |
13340 | const char *suffix; |
13341 | |
13342 | if (mode == CCFPmode) |
13343 | { |
13344 | code = ix86_fp_compare_code_to_integer (code); |
13345 | mode = CCmode; |
13346 | } |
13347 | if (reverse) |
13348 | code = reverse_condition (code); |
13349 | |
13350 | switch (code) |
13351 | { |
13352 | case EQ: |
13353 | gcc_assert (mode != CCGZmode); |
13354 | switch (mode) |
13355 | { |
13356 | case E_CCAmode: |
13357 | suffix = "a" ; |
13358 | break; |
13359 | case E_CCCmode: |
13360 | suffix = "c" ; |
13361 | break; |
13362 | case E_CCOmode: |
13363 | suffix = "o" ; |
13364 | break; |
13365 | case E_CCPmode: |
13366 | suffix = "p" ; |
13367 | break; |
13368 | case E_CCSmode: |
13369 | suffix = "s" ; |
13370 | break; |
13371 | default: |
13372 | suffix = "e" ; |
13373 | break; |
13374 | } |
13375 | break; |
13376 | case NE: |
13377 | gcc_assert (mode != CCGZmode); |
13378 | switch (mode) |
13379 | { |
13380 | case E_CCAmode: |
13381 | suffix = "na" ; |
13382 | break; |
13383 | case E_CCCmode: |
13384 | suffix = "nc" ; |
13385 | break; |
13386 | case E_CCOmode: |
13387 | suffix = "no" ; |
13388 | break; |
13389 | case E_CCPmode: |
13390 | suffix = "np" ; |
13391 | break; |
13392 | case E_CCSmode: |
13393 | suffix = "ns" ; |
13394 | break; |
13395 | default: |
13396 | suffix = "ne" ; |
13397 | break; |
13398 | } |
13399 | break; |
13400 | case GT: |
13401 | gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
13402 | suffix = "g" ; |
13403 | break; |
13404 | case GTU: |
13405 | /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
13406 | Those same assemblers have the same but opposite lossage on cmov. */ |
13407 | if (mode == CCmode) |
13408 | suffix = fp ? "nbe" : "a" ; |
13409 | else |
13410 | gcc_unreachable (); |
13411 | break; |
13412 | case LT: |
13413 | switch (mode) |
13414 | { |
13415 | case E_CCNOmode: |
13416 | case E_CCGOCmode: |
13417 | suffix = "s" ; |
13418 | break; |
13419 | |
13420 | case E_CCmode: |
13421 | case E_CCGCmode: |
13422 | case E_CCGZmode: |
13423 | suffix = "l" ; |
13424 | break; |
13425 | |
13426 | default: |
13427 | gcc_unreachable (); |
13428 | } |
13429 | break; |
13430 | case LTU: |
13431 | if (mode == CCmode || mode == CCGZmode) |
13432 | suffix = "b" ; |
13433 | else if (mode == CCCmode) |
13434 | suffix = fp ? "b" : "c" ; |
13435 | else |
13436 | gcc_unreachable (); |
13437 | break; |
13438 | case GE: |
13439 | switch (mode) |
13440 | { |
13441 | case E_CCNOmode: |
13442 | case E_CCGOCmode: |
13443 | suffix = "ns" ; |
13444 | break; |
13445 | |
13446 | case E_CCmode: |
13447 | case E_CCGCmode: |
13448 | case E_CCGZmode: |
13449 | suffix = "ge" ; |
13450 | break; |
13451 | |
13452 | default: |
13453 | gcc_unreachable (); |
13454 | } |
13455 | break; |
13456 | case GEU: |
13457 | if (mode == CCmode || mode == CCGZmode) |
13458 | suffix = "nb" ; |
13459 | else if (mode == CCCmode) |
13460 | suffix = fp ? "nb" : "nc" ; |
13461 | else |
13462 | gcc_unreachable (); |
13463 | break; |
13464 | case LE: |
13465 | gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
13466 | suffix = "le" ; |
13467 | break; |
13468 | case LEU: |
13469 | if (mode == CCmode) |
13470 | suffix = "be" ; |
13471 | else |
13472 | gcc_unreachable (); |
13473 | break; |
13474 | case UNORDERED: |
13475 | suffix = fp ? "u" : "p" ; |
13476 | break; |
13477 | case ORDERED: |
13478 | suffix = fp ? "nu" : "np" ; |
13479 | break; |
13480 | default: |
13481 | gcc_unreachable (); |
13482 | } |
13483 | fputs (s: suffix, stream: file); |
13484 | } |
13485 | |
13486 | /* Print the name of register X to FILE based on its machine mode and number. |
13487 | If CODE is 'w', pretend the mode is HImode. |
13488 | If CODE is 'b', pretend the mode is QImode. |
13489 | If CODE is 'k', pretend the mode is SImode. |
13490 | If CODE is 'q', pretend the mode is DImode. |
13491 | If CODE is 'x', pretend the mode is V4SFmode. |
13492 | If CODE is 't', pretend the mode is V8SFmode. |
13493 | If CODE is 'g', pretend the mode is V16SFmode. |
13494 | If CODE is 'h', pretend the reg is the 'high' byte register. |
13495 | If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. |
13496 | If CODE is 'd', duplicate the operand for AVX instruction. |
13497 | If CODE is 'V', print naked full integer register name without %. |
13498 | */ |
13499 | |
13500 | void |
13501 | print_reg (rtx x, int code, FILE *file) |
13502 | { |
13503 | const char *reg; |
13504 | int msize; |
13505 | unsigned int regno; |
13506 | bool duplicated; |
13507 | |
13508 | if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') |
13509 | putc (c: '%', stream: file); |
13510 | |
13511 | if (x == pc_rtx) |
13512 | { |
13513 | gcc_assert (TARGET_64BIT); |
13514 | fputs (s: "rip" , stream: file); |
13515 | return; |
13516 | } |
13517 | |
13518 | if (code == 'y' && STACK_TOP_P (x)) |
13519 | { |
13520 | fputs (s: "st(0)" , stream: file); |
13521 | return; |
13522 | } |
13523 | |
13524 | if (code == 'w') |
13525 | msize = 2; |
13526 | else if (code == 'b') |
13527 | msize = 1; |
13528 | else if (code == 'k') |
13529 | msize = 4; |
13530 | else if (code == 'q') |
13531 | msize = 8; |
13532 | else if (code == 'h') |
13533 | msize = 0; |
13534 | else if (code == 'x') |
13535 | msize = 16; |
13536 | else if (code == 't') |
13537 | msize = 32; |
13538 | else if (code == 'g') |
13539 | msize = 64; |
13540 | else |
13541 | msize = GET_MODE_SIZE (GET_MODE (x)); |
13542 | |
13543 | regno = REGNO (x); |
13544 | |
13545 | if (regno == ARG_POINTER_REGNUM |
13546 | || regno == FRAME_POINTER_REGNUM |
13547 | || regno == FPSR_REG) |
13548 | { |
13549 | output_operand_lossage |
13550 | ("invalid use of register '%s'" , reg_names[regno]); |
13551 | return; |
13552 | } |
13553 | else if (regno == FLAGS_REG) |
13554 | { |
13555 | output_operand_lossage ("invalid use of asm flag output" ); |
13556 | return; |
13557 | } |
13558 | |
13559 | if (code == 'V') |
13560 | { |
13561 | if (GENERAL_REGNO_P (regno)) |
13562 | msize = GET_MODE_SIZE (word_mode); |
13563 | else |
13564 | error ("%<V%> modifier on non-integer register" ); |
13565 | } |
13566 | |
13567 | duplicated = code == 'd' && TARGET_AVX; |
13568 | |
13569 | switch (msize) |
13570 | { |
13571 | case 16: |
13572 | case 12: |
13573 | case 8: |
13574 | if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) |
13575 | warning (0, "unsupported size for integer register" ); |
13576 | /* FALLTHRU */ |
13577 | case 4: |
13578 | if (LEGACY_INT_REGNO_P (regno)) |
13579 | putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file); |
13580 | /* FALLTHRU */ |
13581 | case 2: |
13582 | normal: |
13583 | reg = hi_reg_name[regno]; |
13584 | break; |
13585 | case 1: |
13586 | if (regno >= ARRAY_SIZE (qi_reg_name)) |
13587 | goto normal; |
13588 | if (!ANY_QI_REGNO_P (regno)) |
13589 | error ("unsupported size for integer register" ); |
13590 | reg = qi_reg_name[regno]; |
13591 | break; |
13592 | case 0: |
13593 | if (regno >= ARRAY_SIZE (qi_high_reg_name)) |
13594 | goto normal; |
13595 | reg = qi_high_reg_name[regno]; |
13596 | break; |
13597 | case 32: |
13598 | case 64: |
13599 | if (SSE_REGNO_P (regno)) |
13600 | { |
13601 | gcc_assert (!duplicated); |
13602 | putc (c: msize == 32 ? 'y' : 'z', stream: file); |
13603 | reg = hi_reg_name[regno] + 1; |
13604 | break; |
13605 | } |
13606 | goto normal; |
13607 | default: |
13608 | gcc_unreachable (); |
13609 | } |
13610 | |
13611 | fputs (s: reg, stream: file); |
13612 | |
13613 | /* Irritatingly, AMD extended registers use |
13614 | different naming convention: "r%d[bwd]" */ |
13615 | if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
13616 | { |
13617 | gcc_assert (TARGET_64BIT); |
13618 | switch (msize) |
13619 | { |
13620 | case 0: |
13621 | error ("extended registers have no high halves" ); |
13622 | break; |
13623 | case 1: |
13624 | putc (c: 'b', stream: file); |
13625 | break; |
13626 | case 2: |
13627 | putc (c: 'w', stream: file); |
13628 | break; |
13629 | case 4: |
13630 | putc (c: 'd', stream: file); |
13631 | break; |
13632 | case 8: |
13633 | /* no suffix */ |
13634 | break; |
13635 | default: |
13636 | error ("unsupported operand size for extended register" ); |
13637 | break; |
13638 | } |
13639 | return; |
13640 | } |
13641 | |
13642 | if (duplicated) |
13643 | { |
13644 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13645 | fprintf (stream: file, format: ", %%%s" , reg); |
13646 | else |
13647 | fprintf (stream: file, format: ", %s" , reg); |
13648 | } |
13649 | } |
13650 | |
13651 | /* Meaning of CODE: |
13652 | L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
13653 | C -- print opcode suffix for set/cmov insn. |
13654 | c -- like C, but print reversed condition |
13655 | F,f -- likewise, but for floating-point. |
13656 | O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
13657 | otherwise nothing |
13658 | R -- print embedded rounding and sae. |
13659 | r -- print only sae. |
13660 | z -- print the opcode suffix for the size of the current operand. |
13661 | Z -- likewise, with special suffixes for x87 instructions. |
13662 | * -- print a star (in certain assembler syntax) |
13663 | A -- print an absolute memory reference. |
13664 | E -- print address with DImode register names if TARGET_64BIT. |
13665 | w -- print the operand as if it's a "word" (HImode) even if it isn't. |
13666 | s -- print a shift double count, followed by the assemblers argument |
13667 | delimiter. |
13668 | b -- print the QImode name of the register for the indicated operand. |
13669 | %b0 would print %al if operands[0] is reg 0. |
13670 | w -- likewise, print the HImode name of the register. |
13671 | k -- likewise, print the SImode name of the register. |
13672 | q -- likewise, print the DImode name of the register. |
13673 | x -- likewise, print the V4SFmode name of the register. |
13674 | t -- likewise, print the V8SFmode name of the register. |
13675 | g -- likewise, print the V16SFmode name of the register. |
13676 | h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
13677 | y -- print "st(0)" instead of "st" as a register. |
13678 | d -- print duplicated register operand for AVX instruction. |
13679 | D -- print condition for SSE cmp instruction. |
13680 | P -- if PIC, print an @PLT suffix. For -fno-plt, load function |
13681 | address from GOT. |
13682 | p -- print raw symbol name. |
13683 | X -- don't print any sort of PIC '@' suffix for a symbol. |
13684 | & -- print some in-use local-dynamic symbol name. |
13685 | H -- print a memory address offset by 8; used for sse high-parts |
13686 | Y -- print condition for XOP pcom* instruction. |
13687 | V -- print naked full integer register name without %. |
13688 | + -- print a branch hint as 'cs' or 'ds' prefix |
13689 | ; -- print a semicolon (after prefixes due to bug in older gas). |
13690 | ~ -- print "i" if TARGET_AVX2, "f" otherwise. |
13691 | ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode |
13692 | M -- print addr32 prefix for TARGET_X32 with VSIB address. |
13693 | ! -- print NOTRACK prefix for jxx/call/ret instructions if required. |
13694 | N -- print maskz if it's constant 0 operand. |
13695 | */ |
13696 | |
13697 | void |
13698 | ix86_print_operand (FILE *file, rtx x, int code) |
13699 | { |
13700 | if (code) |
13701 | { |
13702 | switch (code) |
13703 | { |
13704 | case 'A': |
13705 | switch (ASSEMBLER_DIALECT) |
13706 | { |
13707 | case ASM_ATT: |
13708 | putc (c: '*', stream: file); |
13709 | break; |
13710 | |
13711 | case ASM_INTEL: |
13712 | /* Intel syntax. For absolute addresses, registers should not |
13713 | be surrounded by braces. */ |
13714 | if (!REG_P (x)) |
13715 | { |
13716 | putc (c: '[', stream: file); |
13717 | ix86_print_operand (file, x, code: 0); |
13718 | putc (c: ']', stream: file); |
13719 | return; |
13720 | } |
13721 | break; |
13722 | |
13723 | default: |
13724 | gcc_unreachable (); |
13725 | } |
13726 | |
13727 | ix86_print_operand (file, x, code: 0); |
13728 | return; |
13729 | |
13730 | case 'E': |
13731 | /* Wrap address in an UNSPEC to declare special handling. */ |
13732 | if (TARGET_64BIT) |
13733 | x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); |
13734 | |
13735 | output_address (VOIDmode, x); |
13736 | return; |
13737 | |
13738 | case 'L': |
13739 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13740 | putc (c: 'l', stream: file); |
13741 | return; |
13742 | |
13743 | case 'W': |
13744 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13745 | putc (c: 'w', stream: file); |
13746 | return; |
13747 | |
13748 | case 'B': |
13749 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13750 | putc (c: 'b', stream: file); |
13751 | return; |
13752 | |
13753 | case 'Q': |
13754 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13755 | putc (c: 'l', stream: file); |
13756 | return; |
13757 | |
13758 | case 'S': |
13759 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13760 | putc (c: 's', stream: file); |
13761 | return; |
13762 | |
13763 | case 'T': |
13764 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13765 | putc (c: 't', stream: file); |
13766 | return; |
13767 | |
13768 | case 'O': |
13769 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
13770 | if (ASSEMBLER_DIALECT != ASM_ATT) |
13771 | return; |
13772 | |
13773 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13774 | { |
13775 | case 2: |
13776 | putc ('w', file); |
13777 | break; |
13778 | |
13779 | case 4: |
13780 | putc ('l', file); |
13781 | break; |
13782 | |
13783 | case 8: |
13784 | putc ('q', file); |
13785 | break; |
13786 | |
13787 | default: |
13788 | output_operand_lossage ("invalid operand size for operand " |
13789 | "code 'O'" ); |
13790 | return; |
13791 | } |
13792 | |
13793 | putc ('.', file); |
13794 | #endif |
13795 | return; |
13796 | |
13797 | case 'z': |
13798 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13799 | { |
13800 | /* Opcodes don't get size suffixes if using Intel opcodes. */ |
13801 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13802 | return; |
13803 | |
13804 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13805 | { |
13806 | case 1: |
13807 | putc (c: 'b', stream: file); |
13808 | return; |
13809 | |
13810 | case 2: |
13811 | putc (c: 'w', stream: file); |
13812 | return; |
13813 | |
13814 | case 4: |
13815 | putc (c: 'l', stream: file); |
13816 | return; |
13817 | |
13818 | case 8: |
13819 | putc (c: 'q', stream: file); |
13820 | return; |
13821 | |
13822 | default: |
13823 | output_operand_lossage ("invalid operand size for operand " |
13824 | "code 'z'" ); |
13825 | return; |
13826 | } |
13827 | } |
13828 | |
13829 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13830 | { |
13831 | if (this_is_asm_operands) |
13832 | warning_for_asm (this_is_asm_operands, |
13833 | "non-integer operand used with operand code %<z%>" ); |
13834 | else |
13835 | warning (0, "non-integer operand used with operand code %<z%>" ); |
13836 | } |
13837 | /* FALLTHRU */ |
13838 | |
13839 | case 'Z': |
13840 | /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
13841 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13842 | return; |
13843 | |
13844 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13845 | { |
13846 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13847 | { |
13848 | case 2: |
13849 | #ifdef HAVE_AS_IX86_FILDS |
13850 | putc (c: 's', stream: file); |
13851 | #endif |
13852 | return; |
13853 | |
13854 | case 4: |
13855 | putc (c: 'l', stream: file); |
13856 | return; |
13857 | |
13858 | case 8: |
13859 | #ifdef HAVE_AS_IX86_FILDQ |
13860 | putc (c: 'q', stream: file); |
13861 | #else |
13862 | fputs ("ll" , file); |
13863 | #endif |
13864 | return; |
13865 | |
13866 | default: |
13867 | break; |
13868 | } |
13869 | } |
13870 | else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13871 | { |
13872 | /* 387 opcodes don't get size suffixes |
13873 | if the operands are registers. */ |
13874 | if (STACK_REG_P (x)) |
13875 | return; |
13876 | |
13877 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13878 | { |
13879 | case 4: |
13880 | putc (c: 's', stream: file); |
13881 | return; |
13882 | |
13883 | case 8: |
13884 | putc (c: 'l', stream: file); |
13885 | return; |
13886 | |
13887 | case 12: |
13888 | case 16: |
13889 | putc (c: 't', stream: file); |
13890 | return; |
13891 | |
13892 | default: |
13893 | break; |
13894 | } |
13895 | } |
13896 | else |
13897 | { |
13898 | output_operand_lossage ("invalid operand type used with " |
13899 | "operand code '%c'" , code); |
13900 | return; |
13901 | } |
13902 | |
13903 | output_operand_lossage ("invalid operand size for operand code '%c'" , |
13904 | code); |
13905 | return; |
13906 | |
13907 | case 'd': |
13908 | case 'b': |
13909 | case 'w': |
13910 | case 'k': |
13911 | case 'q': |
13912 | case 'h': |
13913 | case 't': |
13914 | case 'g': |
13915 | case 'y': |
13916 | case 'x': |
13917 | case 'X': |
13918 | case 'P': |
13919 | case 'p': |
13920 | case 'V': |
13921 | break; |
13922 | |
13923 | case 's': |
13924 | if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) |
13925 | { |
13926 | ix86_print_operand (file, x, code: 0); |
13927 | fputs (s: ", " , stream: file); |
13928 | } |
13929 | return; |
13930 | |
13931 | case 'Y': |
13932 | switch (GET_CODE (x)) |
13933 | { |
13934 | case NE: |
13935 | fputs (s: "neq" , stream: file); |
13936 | break; |
13937 | case EQ: |
13938 | fputs (s: "eq" , stream: file); |
13939 | break; |
13940 | case GE: |
13941 | case GEU: |
13942 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt" , stream: file); |
13943 | break; |
13944 | case GT: |
13945 | case GTU: |
13946 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle" , stream: file); |
13947 | break; |
13948 | case LE: |
13949 | case LEU: |
13950 | fputs (s: "le" , stream: file); |
13951 | break; |
13952 | case LT: |
13953 | case LTU: |
13954 | fputs (s: "lt" , stream: file); |
13955 | break; |
13956 | case UNORDERED: |
13957 | fputs (s: "unord" , stream: file); |
13958 | break; |
13959 | case ORDERED: |
13960 | fputs (s: "ord" , stream: file); |
13961 | break; |
13962 | case UNEQ: |
13963 | fputs (s: "ueq" , stream: file); |
13964 | break; |
13965 | case UNGE: |
13966 | fputs (s: "nlt" , stream: file); |
13967 | break; |
13968 | case UNGT: |
13969 | fputs (s: "nle" , stream: file); |
13970 | break; |
13971 | case UNLE: |
13972 | fputs (s: "ule" , stream: file); |
13973 | break; |
13974 | case UNLT: |
13975 | fputs (s: "ult" , stream: file); |
13976 | break; |
13977 | case LTGT: |
13978 | fputs (s: "une" , stream: file); |
13979 | break; |
13980 | default: |
13981 | output_operand_lossage ("operand is not a condition code, " |
13982 | "invalid operand code 'Y'" ); |
13983 | return; |
13984 | } |
13985 | return; |
13986 | |
13987 | case 'D': |
13988 | /* Little bit of braindamage here. The SSE compare instructions |
13989 | does use completely different names for the comparisons that the |
13990 | fp conditional moves. */ |
13991 | switch (GET_CODE (x)) |
13992 | { |
13993 | case UNEQ: |
13994 | if (TARGET_AVX) |
13995 | { |
13996 | fputs (s: "eq_us" , stream: file); |
13997 | break; |
13998 | } |
13999 | /* FALLTHRU */ |
14000 | case EQ: |
14001 | fputs (s: "eq" , stream: file); |
14002 | break; |
14003 | case UNLT: |
14004 | if (TARGET_AVX) |
14005 | { |
14006 | fputs (s: "nge" , stream: file); |
14007 | break; |
14008 | } |
14009 | /* FALLTHRU */ |
14010 | case LT: |
14011 | fputs (s: "lt" , stream: file); |
14012 | break; |
14013 | case UNLE: |
14014 | if (TARGET_AVX) |
14015 | { |
14016 | fputs (s: "ngt" , stream: file); |
14017 | break; |
14018 | } |
14019 | /* FALLTHRU */ |
14020 | case LE: |
14021 | fputs (s: "le" , stream: file); |
14022 | break; |
14023 | case UNORDERED: |
14024 | fputs (s: "unord" , stream: file); |
14025 | break; |
14026 | case LTGT: |
14027 | if (TARGET_AVX) |
14028 | { |
14029 | fputs (s: "neq_oq" , stream: file); |
14030 | break; |
14031 | } |
14032 | /* FALLTHRU */ |
14033 | case NE: |
14034 | fputs (s: "neq" , stream: file); |
14035 | break; |
14036 | case GE: |
14037 | if (TARGET_AVX) |
14038 | { |
14039 | fputs (s: "ge" , stream: file); |
14040 | break; |
14041 | } |
14042 | /* FALLTHRU */ |
14043 | case UNGE: |
14044 | fputs (s: "nlt" , stream: file); |
14045 | break; |
14046 | case GT: |
14047 | if (TARGET_AVX) |
14048 | { |
14049 | fputs (s: "gt" , stream: file); |
14050 | break; |
14051 | } |
14052 | /* FALLTHRU */ |
14053 | case UNGT: |
14054 | fputs (s: "nle" , stream: file); |
14055 | break; |
14056 | case ORDERED: |
14057 | fputs (s: "ord" , stream: file); |
14058 | break; |
14059 | default: |
14060 | output_operand_lossage ("operand is not a condition code, " |
14061 | "invalid operand code 'D'" ); |
14062 | return; |
14063 | } |
14064 | return; |
14065 | |
14066 | case 'F': |
14067 | case 'f': |
14068 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
14069 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14070 | putc ('.', file); |
14071 | gcc_fallthrough (); |
14072 | #endif |
14073 | |
14074 | case 'C': |
14075 | case 'c': |
14076 | if (!COMPARISON_P (x)) |
14077 | { |
14078 | output_operand_lossage ("operand is not a condition code, " |
14079 | "invalid operand code '%c'" , code); |
14080 | return; |
14081 | } |
14082 | put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), |
14083 | reverse: code == 'c' || code == 'f', |
14084 | fp: code == 'F' || code == 'f', |
14085 | file); |
14086 | return; |
14087 | |
14088 | case 'H': |
14089 | if (!offsettable_memref_p (x)) |
14090 | { |
14091 | output_operand_lossage ("operand is not an offsettable memory " |
14092 | "reference, invalid operand code 'H'" ); |
14093 | return; |
14094 | } |
14095 | /* It doesn't actually matter what mode we use here, as we're |
14096 | only going to use this for printing. */ |
14097 | x = adjust_address_nv (x, DImode, 8); |
14098 | /* Output 'qword ptr' for intel assembler dialect. */ |
14099 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14100 | code = 'q'; |
14101 | break; |
14102 | |
14103 | case 'K': |
14104 | if (!CONST_INT_P (x)) |
14105 | { |
14106 | output_operand_lossage ("operand is not an integer, invalid " |
14107 | "operand code 'K'" ); |
14108 | return; |
14109 | } |
14110 | |
14111 | if (INTVAL (x) & IX86_HLE_ACQUIRE) |
14112 | #ifdef HAVE_AS_IX86_HLE |
14113 | fputs (s: "xacquire " , stream: file); |
14114 | #else |
14115 | fputs ("\n" ASM_BYTE "0xf2\n\t" , file); |
14116 | #endif |
14117 | else if (INTVAL (x) & IX86_HLE_RELEASE) |
14118 | #ifdef HAVE_AS_IX86_HLE |
14119 | fputs (s: "xrelease " , stream: file); |
14120 | #else |
14121 | fputs ("\n" ASM_BYTE "0xf3\n\t" , file); |
14122 | #endif |
14123 | /* We do not want to print value of the operand. */ |
14124 | return; |
14125 | |
14126 | case 'N': |
14127 | if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
14128 | fputs (s: "{z}" , stream: file); |
14129 | return; |
14130 | |
14131 | case 'r': |
14132 | if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) |
14133 | { |
14134 | output_operand_lossage ("operand is not a specific integer, " |
14135 | "invalid operand code 'r'" ); |
14136 | return; |
14137 | } |
14138 | |
14139 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14140 | fputs (s: ", " , stream: file); |
14141 | |
14142 | fputs (s: "{sae}" , stream: file); |
14143 | |
14144 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14145 | fputs (s: ", " , stream: file); |
14146 | |
14147 | return; |
14148 | |
14149 | case 'R': |
14150 | if (!CONST_INT_P (x)) |
14151 | { |
14152 | output_operand_lossage ("operand is not an integer, invalid " |
14153 | "operand code 'R'" ); |
14154 | return; |
14155 | } |
14156 | |
14157 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14158 | fputs (s: ", " , stream: file); |
14159 | |
14160 | switch (INTVAL (x)) |
14161 | { |
14162 | case ROUND_NEAREST_INT | ROUND_SAE: |
14163 | fputs (s: "{rn-sae}" , stream: file); |
14164 | break; |
14165 | case ROUND_NEG_INF | ROUND_SAE: |
14166 | fputs (s: "{rd-sae}" , stream: file); |
14167 | break; |
14168 | case ROUND_POS_INF | ROUND_SAE: |
14169 | fputs (s: "{ru-sae}" , stream: file); |
14170 | break; |
14171 | case ROUND_ZERO | ROUND_SAE: |
14172 | fputs (s: "{rz-sae}" , stream: file); |
14173 | break; |
14174 | default: |
14175 | output_operand_lossage ("operand is not a specific integer, " |
14176 | "invalid operand code 'R'" ); |
14177 | } |
14178 | |
14179 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14180 | fputs (s: ", " , stream: file); |
14181 | |
14182 | return; |
14183 | |
14184 | case '*': |
14185 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14186 | putc (c: '*', stream: file); |
14187 | return; |
14188 | |
14189 | case '&': |
14190 | { |
14191 | const char *name = get_some_local_dynamic_name (); |
14192 | if (name == NULL) |
14193 | output_operand_lossage ("'%%&' used without any " |
14194 | "local dynamic TLS references" ); |
14195 | else |
14196 | assemble_name (file, name); |
14197 | return; |
14198 | } |
14199 | |
14200 | case '+': |
14201 | { |
14202 | rtx x; |
14203 | |
14204 | if (!optimize |
14205 | || optimize_function_for_size_p (cfun) |
14206 | || !TARGET_BRANCH_PREDICTION_HINTS) |
14207 | return; |
14208 | |
14209 | x = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
14210 | if (x) |
14211 | { |
14212 | int pred_val = profile_probability::from_reg_br_prob_note |
14213 | (XINT (x, 0)).to_reg_br_prob_base (); |
14214 | |
14215 | if (pred_val < REG_BR_PROB_BASE * 45 / 100 |
14216 | || pred_val > REG_BR_PROB_BASE * 55 / 100) |
14217 | { |
14218 | bool taken = pred_val > REG_BR_PROB_BASE / 2; |
14219 | bool cputaken |
14220 | = final_forward_branch_p (current_output_insn) == 0; |
14221 | |
14222 | /* Emit hints only in the case default branch prediction |
14223 | heuristics would fail. */ |
14224 | if (taken != cputaken) |
14225 | { |
14226 | /* We use 3e (DS) prefix for taken branches and |
14227 | 2e (CS) prefix for not taken branches. */ |
14228 | if (taken) |
14229 | fputs (s: "ds ; " , stream: file); |
14230 | else |
14231 | fputs (s: "cs ; " , stream: file); |
14232 | } |
14233 | } |
14234 | } |
14235 | return; |
14236 | } |
14237 | |
14238 | case ';': |
14239 | #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX |
14240 | putc (';', file); |
14241 | #endif |
14242 | return; |
14243 | |
14244 | case '~': |
14245 | putc (TARGET_AVX2 ? 'i' : 'f', stream: file); |
14246 | return; |
14247 | |
14248 | case 'M': |
14249 | if (TARGET_X32) |
14250 | { |
14251 | /* NB: 32-bit indices in VSIB address are sign-extended |
14252 | to 64 bits. In x32, if 32-bit address 0xf7fa3010 is |
14253 | sign-extended to 0xfffffffff7fa3010 which is invalid |
14254 | address. Add addr32 prefix if there is no base |
14255 | register nor symbol. */ |
14256 | bool ok; |
14257 | struct ix86_address parts; |
14258 | ok = ix86_decompose_address (addr: x, out: &parts); |
14259 | gcc_assert (ok && parts.index == NULL_RTX); |
14260 | if (parts.base == NULL_RTX |
14261 | && (parts.disp == NULL_RTX |
14262 | || !symbolic_operand (parts.disp, |
14263 | GET_MODE (parts.disp)))) |
14264 | fputs (s: "addr32 " , stream: file); |
14265 | } |
14266 | return; |
14267 | |
14268 | case '^': |
14269 | if (TARGET_64BIT && Pmode != word_mode) |
14270 | fputs (s: "addr32 " , stream: file); |
14271 | return; |
14272 | |
14273 | case '!': |
14274 | if (ix86_notrack_prefixed_insn_p (current_output_insn)) |
14275 | fputs (s: "notrack " , stream: file); |
14276 | return; |
14277 | |
14278 | default: |
14279 | output_operand_lossage ("invalid operand code '%c'" , code); |
14280 | } |
14281 | } |
14282 | |
14283 | if (REG_P (x)) |
14284 | print_reg (x, code, file); |
14285 | |
14286 | else if (MEM_P (x)) |
14287 | { |
14288 | rtx addr = XEXP (x, 0); |
14289 | |
14290 | /* No `byte ptr' prefix for call instructions ... */ |
14291 | if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') |
14292 | { |
14293 | machine_mode mode = GET_MODE (x); |
14294 | const char *size; |
14295 | |
14296 | /* Check for explicit size override codes. */ |
14297 | if (code == 'b') |
14298 | size = "BYTE" ; |
14299 | else if (code == 'w') |
14300 | size = "WORD" ; |
14301 | else if (code == 'k') |
14302 | size = "DWORD" ; |
14303 | else if (code == 'q') |
14304 | size = "QWORD" ; |
14305 | else if (code == 'x') |
14306 | size = "XMMWORD" ; |
14307 | else if (code == 't') |
14308 | size = "YMMWORD" ; |
14309 | else if (code == 'g') |
14310 | size = "ZMMWORD" ; |
14311 | else if (mode == BLKmode) |
14312 | /* ... or BLKmode operands, when not overridden. */ |
14313 | size = NULL; |
14314 | else |
14315 | switch (GET_MODE_SIZE (mode)) |
14316 | { |
14317 | case 1: size = "BYTE" ; break; |
14318 | case 2: size = "WORD" ; break; |
14319 | case 4: size = "DWORD" ; break; |
14320 | case 8: size = "QWORD" ; break; |
14321 | case 12: size = "TBYTE" ; break; |
14322 | case 16: |
14323 | if (mode == XFmode) |
14324 | size = "TBYTE" ; |
14325 | else |
14326 | size = "XMMWORD" ; |
14327 | break; |
14328 | case 32: size = "YMMWORD" ; break; |
14329 | case 64: size = "ZMMWORD" ; break; |
14330 | default: |
14331 | gcc_unreachable (); |
14332 | } |
14333 | if (size) |
14334 | { |
14335 | fputs (s: size, stream: file); |
14336 | fputs (s: " PTR " , stream: file); |
14337 | } |
14338 | } |
14339 | |
14340 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14341 | output_operand_lossage ("invalid constraints for operand" ); |
14342 | else |
14343 | ix86_print_operand_address_as |
14344 | (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); |
14345 | } |
14346 | |
14347 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode) |
14348 | { |
14349 | long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
14350 | REAL_MODE_FORMAT (HFmode)); |
14351 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14352 | putc (c: '$', stream: file); |
14353 | fprintf (stream: file, format: "0x%04x" , (unsigned int) l); |
14354 | } |
14355 | |
14356 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) |
14357 | { |
14358 | long l; |
14359 | |
14360 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14361 | |
14362 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14363 | putc (c: '$', stream: file); |
14364 | /* Sign extend 32bit SFmode immediate to 8 bytes. */ |
14365 | if (code == 'q') |
14366 | fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x" , |
14367 | (unsigned long long) (int) l); |
14368 | else |
14369 | fprintf (stream: file, format: "0x%08x" , (unsigned int) l); |
14370 | } |
14371 | |
14372 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) |
14373 | { |
14374 | long l[2]; |
14375 | |
14376 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14377 | |
14378 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14379 | putc (c: '$', stream: file); |
14380 | fprintf (stream: file, format: "0x%lx%08lx" , l[1] & 0xffffffff, l[0] & 0xffffffff); |
14381 | } |
14382 | |
14383 | /* These float cases don't actually occur as immediate operands. */ |
14384 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) |
14385 | { |
14386 | char dstr[30]; |
14387 | |
14388 | real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
14389 | fputs (s: dstr, stream: file); |
14390 | } |
14391 | |
14392 | /* Print bcst_mem_operand. */ |
14393 | else if (GET_CODE (x) == VEC_DUPLICATE) |
14394 | { |
14395 | machine_mode vmode = GET_MODE (x); |
14396 | /* Must be bcst_memory_operand. */ |
14397 | gcc_assert (bcst_mem_operand (x, vmode)); |
14398 | |
14399 | rtx mem = XEXP (x,0); |
14400 | ix86_print_operand (file, x: mem, code: 0); |
14401 | |
14402 | switch (vmode) |
14403 | { |
14404 | case E_V2DImode: |
14405 | case E_V2DFmode: |
14406 | fputs (s: "{1to2}" , stream: file); |
14407 | break; |
14408 | case E_V4SImode: |
14409 | case E_V4SFmode: |
14410 | case E_V4DImode: |
14411 | case E_V4DFmode: |
14412 | fputs (s: "{1to4}" , stream: file); |
14413 | break; |
14414 | case E_V8SImode: |
14415 | case E_V8SFmode: |
14416 | case E_V8DFmode: |
14417 | case E_V8DImode: |
14418 | case E_V8HFmode: |
14419 | fputs (s: "{1to8}" , stream: file); |
14420 | break; |
14421 | case E_V16SFmode: |
14422 | case E_V16SImode: |
14423 | case E_V16HFmode: |
14424 | fputs (s: "{1to16}" , stream: file); |
14425 | break; |
14426 | case E_V32HFmode: |
14427 | fputs (s: "{1to32}" , stream: file); |
14428 | break; |
14429 | default: |
14430 | gcc_unreachable (); |
14431 | } |
14432 | } |
14433 | |
14434 | else |
14435 | { |
14436 | /* We have patterns that allow zero sets of memory, for instance. |
14437 | In 64-bit mode, we should probably support all 8-byte vectors, |
14438 | since we can in fact encode that into an immediate. */ |
14439 | if (GET_CODE (x) == CONST_VECTOR) |
14440 | { |
14441 | if (x != CONST0_RTX (GET_MODE (x))) |
14442 | output_operand_lossage ("invalid vector immediate" ); |
14443 | x = const0_rtx; |
14444 | } |
14445 | |
14446 | if (code == 'P') |
14447 | { |
14448 | if (ix86_force_load_from_GOT_p (x, call_p: true)) |
14449 | { |
14450 | /* For inline assembly statement, load function address |
14451 | from GOT with 'P' operand modifier to avoid PLT. */ |
14452 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
14453 | (TARGET_64BIT |
14454 | ? UNSPEC_GOTPCREL |
14455 | : UNSPEC_GOT)); |
14456 | x = gen_rtx_CONST (Pmode, x); |
14457 | x = gen_const_mem (Pmode, x); |
14458 | ix86_print_operand (file, x, code: 'A'); |
14459 | return; |
14460 | } |
14461 | } |
14462 | else if (code != 'p') |
14463 | { |
14464 | if (CONST_INT_P (x)) |
14465 | { |
14466 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14467 | putc (c: '$', stream: file); |
14468 | } |
14469 | else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF |
14470 | || GET_CODE (x) == LABEL_REF) |
14471 | { |
14472 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14473 | putc (c: '$', stream: file); |
14474 | else |
14475 | fputs (s: "OFFSET FLAT:" , stream: file); |
14476 | } |
14477 | } |
14478 | if (CONST_INT_P (x)) |
14479 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
14480 | else if (flag_pic || MACHOPIC_INDIRECT) |
14481 | output_pic_addr_const (file, x, code); |
14482 | else |
14483 | output_addr_const (file, x); |
14484 | } |
14485 | } |
14486 | |
14487 | static bool |
14488 | ix86_print_operand_punct_valid_p (unsigned char code) |
14489 | { |
14490 | return (code == '*' || code == '+' || code == '&' || code == ';' |
14491 | || code == '~' || code == '^' || code == '!'); |
14492 | } |
14493 | |
14494 | /* Print a memory operand whose address is ADDR. */ |
14495 | |
14496 | static void |
14497 | ix86_print_operand_address_as (FILE *file, rtx addr, |
14498 | addr_space_t as, bool raw) |
14499 | { |
14500 | struct ix86_address parts; |
14501 | rtx base, index, disp; |
14502 | int scale; |
14503 | int ok; |
14504 | bool vsib = false; |
14505 | int code = 0; |
14506 | |
14507 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) |
14508 | { |
14509 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14510 | gcc_assert (parts.index == NULL_RTX); |
14511 | parts.index = XVECEXP (addr, 0, 1); |
14512 | parts.scale = INTVAL (XVECEXP (addr, 0, 2)); |
14513 | addr = XVECEXP (addr, 0, 0); |
14514 | vsib = true; |
14515 | } |
14516 | else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) |
14517 | { |
14518 | gcc_assert (TARGET_64BIT); |
14519 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14520 | code = 'q'; |
14521 | } |
14522 | else |
14523 | ok = ix86_decompose_address (addr, out: &parts); |
14524 | |
14525 | gcc_assert (ok); |
14526 | |
14527 | base = parts.base; |
14528 | index = parts.index; |
14529 | disp = parts.disp; |
14530 | scale = parts.scale; |
14531 | |
14532 | if (ADDR_SPACE_GENERIC_P (as)) |
14533 | as = parts.seg; |
14534 | else |
14535 | gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); |
14536 | |
14537 | if (!ADDR_SPACE_GENERIC_P (as) && !raw) |
14538 | { |
14539 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14540 | putc (c: '%', stream: file); |
14541 | |
14542 | switch (as) |
14543 | { |
14544 | case ADDR_SPACE_SEG_FS: |
14545 | fputs (s: "fs:" , stream: file); |
14546 | break; |
14547 | case ADDR_SPACE_SEG_GS: |
14548 | fputs (s: "gs:" , stream: file); |
14549 | break; |
14550 | default: |
14551 | gcc_unreachable (); |
14552 | } |
14553 | } |
14554 | |
14555 | /* Use one byte shorter RIP relative addressing for 64bit mode. */ |
14556 | if (TARGET_64BIT && !base && !index && !raw) |
14557 | { |
14558 | rtx symbol = disp; |
14559 | |
14560 | if (GET_CODE (disp) == CONST |
14561 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14562 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14563 | symbol = XEXP (XEXP (disp, 0), 0); |
14564 | |
14565 | if (GET_CODE (symbol) == LABEL_REF |
14566 | || (GET_CODE (symbol) == SYMBOL_REF |
14567 | && SYMBOL_REF_TLS_MODEL (symbol) == 0)) |
14568 | base = pc_rtx; |
14569 | } |
14570 | |
14571 | if (!base && !index) |
14572 | { |
14573 | /* Displacement only requires special attention. */ |
14574 | if (CONST_INT_P (disp)) |
14575 | { |
14576 | if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) |
14577 | fputs (s: "ds:" , stream: file); |
14578 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); |
14579 | } |
14580 | /* Load the external function address via the GOT slot to avoid PLT. */ |
14581 | else if (GET_CODE (disp) == CONST |
14582 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
14583 | && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL |
14584 | || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) |
14585 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
14586 | output_pic_addr_const (file, x: disp, code: 0); |
14587 | else if (flag_pic) |
14588 | output_pic_addr_const (file, x: disp, code: 0); |
14589 | else |
14590 | output_addr_const (file, disp); |
14591 | } |
14592 | else |
14593 | { |
14594 | /* Print SImode register names to force addr32 prefix. */ |
14595 | if (SImode_address_operand (addr, VOIDmode)) |
14596 | { |
14597 | if (flag_checking) |
14598 | { |
14599 | gcc_assert (TARGET_64BIT); |
14600 | switch (GET_CODE (addr)) |
14601 | { |
14602 | case SUBREG: |
14603 | gcc_assert (GET_MODE (addr) == SImode); |
14604 | gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); |
14605 | break; |
14606 | case ZERO_EXTEND: |
14607 | case AND: |
14608 | gcc_assert (GET_MODE (addr) == DImode); |
14609 | break; |
14610 | default: |
14611 | gcc_unreachable (); |
14612 | } |
14613 | } |
14614 | gcc_assert (!code); |
14615 | code = 'k'; |
14616 | } |
14617 | else if (code == 0 |
14618 | && TARGET_X32 |
14619 | && disp |
14620 | && CONST_INT_P (disp) |
14621 | && INTVAL (disp) < -16*1024*1024) |
14622 | { |
14623 | /* X32 runs in 64-bit mode, where displacement, DISP, in |
14624 | address DISP(%r64), is encoded as 32-bit immediate sign- |
14625 | extended from 32-bit to 64-bit. For -0x40000300(%r64), |
14626 | address is %r64 + 0xffffffffbffffd00. When %r64 < |
14627 | 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, |
14628 | which is invalid for x32. The correct address is %r64 |
14629 | - 0x40000300 == 0xf7ffdd64. To properly encode |
14630 | -0x40000300(%r64) for x32, we zero-extend negative |
14631 | displacement by forcing addr32 prefix which truncates |
14632 | 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should |
14633 | zero-extend all negative displacements, including -1(%rsp). |
14634 | However, for small negative displacements, sign-extension |
14635 | won't cause overflow. We only zero-extend negative |
14636 | displacements if they < -16*1024*1024, which is also used |
14637 | to check legitimate address displacements for PIC. */ |
14638 | code = 'k'; |
14639 | } |
14640 | |
14641 | /* Since the upper 32 bits of RSP are always zero for x32, |
14642 | we can encode %esp as %rsp to avoid 0x67 prefix if |
14643 | there is no index register. */ |
14644 | if (TARGET_X32 && Pmode == SImode |
14645 | && !index && base && REG_P (base) && REGNO (base) == SP_REG) |
14646 | code = 'q'; |
14647 | |
14648 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14649 | { |
14650 | if (disp) |
14651 | { |
14652 | if (flag_pic) |
14653 | output_pic_addr_const (file, x: disp, code: 0); |
14654 | else if (GET_CODE (disp) == LABEL_REF) |
14655 | output_asm_label (disp); |
14656 | else |
14657 | output_addr_const (file, disp); |
14658 | } |
14659 | |
14660 | putc (c: '(', stream: file); |
14661 | if (base) |
14662 | print_reg (x: base, code, file); |
14663 | if (index) |
14664 | { |
14665 | putc (c: ',', stream: file); |
14666 | print_reg (x: index, code: vsib ? 0 : code, file); |
14667 | if (scale != 1 || vsib) |
14668 | fprintf (stream: file, format: ",%d" , scale); |
14669 | } |
14670 | putc (c: ')', stream: file); |
14671 | } |
14672 | else |
14673 | { |
14674 | rtx offset = NULL_RTX; |
14675 | |
14676 | if (disp) |
14677 | { |
14678 | /* Pull out the offset of a symbol; print any symbol itself. */ |
14679 | if (GET_CODE (disp) == CONST |
14680 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14681 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14682 | { |
14683 | offset = XEXP (XEXP (disp, 0), 1); |
14684 | disp = gen_rtx_CONST (VOIDmode, |
14685 | XEXP (XEXP (disp, 0), 0)); |
14686 | } |
14687 | |
14688 | if (flag_pic) |
14689 | output_pic_addr_const (file, x: disp, code: 0); |
14690 | else if (GET_CODE (disp) == LABEL_REF) |
14691 | output_asm_label (disp); |
14692 | else if (CONST_INT_P (disp)) |
14693 | offset = disp; |
14694 | else |
14695 | output_addr_const (file, disp); |
14696 | } |
14697 | |
14698 | putc (c: '[', stream: file); |
14699 | if (base) |
14700 | { |
14701 | print_reg (x: base, code, file); |
14702 | if (offset) |
14703 | { |
14704 | if (INTVAL (offset) >= 0) |
14705 | putc (c: '+', stream: file); |
14706 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14707 | } |
14708 | } |
14709 | else if (offset) |
14710 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14711 | else |
14712 | putc (c: '0', stream: file); |
14713 | |
14714 | if (index) |
14715 | { |
14716 | putc (c: '+', stream: file); |
14717 | print_reg (x: index, code: vsib ? 0 : code, file); |
14718 | if (scale != 1 || vsib) |
14719 | fprintf (stream: file, format: "*%d" , scale); |
14720 | } |
14721 | putc (c: ']', stream: file); |
14722 | } |
14723 | } |
14724 | } |
14725 | |
14726 | static void |
14727 | ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) |
14728 | { |
14729 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14730 | output_operand_lossage ("invalid constraints for operand" ); |
14731 | else |
14732 | ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false); |
14733 | } |
14734 | |
14735 | /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
14736 | |
14737 | static bool |
14738 | (FILE *file, rtx x) |
14739 | { |
14740 | rtx op; |
14741 | |
14742 | if (GET_CODE (x) != UNSPEC) |
14743 | return false; |
14744 | |
14745 | op = XVECEXP (x, 0, 0); |
14746 | switch (XINT (x, 1)) |
14747 | { |
14748 | case UNSPEC_GOTOFF: |
14749 | output_addr_const (file, op); |
14750 | fputs (s: "@gotoff" , stream: file); |
14751 | break; |
14752 | case UNSPEC_GOTTPOFF: |
14753 | output_addr_const (file, op); |
14754 | /* FIXME: This might be @TPOFF in Sun ld. */ |
14755 | fputs (s: "@gottpoff" , stream: file); |
14756 | break; |
14757 | case UNSPEC_TPOFF: |
14758 | output_addr_const (file, op); |
14759 | fputs (s: "@tpoff" , stream: file); |
14760 | break; |
14761 | case UNSPEC_NTPOFF: |
14762 | output_addr_const (file, op); |
14763 | if (TARGET_64BIT) |
14764 | fputs (s: "@tpoff" , stream: file); |
14765 | else |
14766 | fputs (s: "@ntpoff" , stream: file); |
14767 | break; |
14768 | case UNSPEC_DTPOFF: |
14769 | output_addr_const (file, op); |
14770 | fputs (s: "@dtpoff" , stream: file); |
14771 | break; |
14772 | case UNSPEC_GOTNTPOFF: |
14773 | output_addr_const (file, op); |
14774 | if (TARGET_64BIT) |
14775 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
14776 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
14777 | else |
14778 | fputs (s: "@gotntpoff" , stream: file); |
14779 | break; |
14780 | case UNSPEC_INDNTPOFF: |
14781 | output_addr_const (file, op); |
14782 | fputs (s: "@indntpoff" , stream: file); |
14783 | break; |
14784 | #if TARGET_MACHO |
14785 | case UNSPEC_MACHOPIC_OFFSET: |
14786 | output_addr_const (file, op); |
14787 | putc ('-', file); |
14788 | machopic_output_function_base_name (file); |
14789 | break; |
14790 | #endif |
14791 | |
14792 | default: |
14793 | return false; |
14794 | } |
14795 | |
14796 | return true; |
14797 | } |
14798 | |
14799 | |
14800 | /* Output code to perform a 387 binary operation in INSN, one of PLUS, |
14801 | MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] |
14802 | is the expression of the binary operation. The output may either be |
14803 | emitted here, or returned to the caller, like all output_* functions. |
14804 | |
14805 | There is no guarantee that the operands are the same mode, as they |
14806 | might be within FLOAT or FLOAT_EXTEND expressions. */ |
14807 | |
14808 | #ifndef SYSV386_COMPAT |
14809 | /* Set to 1 for compatibility with brain-damaged assemblers. No-one |
14810 | wants to fix the assemblers because that causes incompatibility |
14811 | with gcc. No-one wants to fix gcc because that causes |
14812 | incompatibility with assemblers... You can use the option of |
14813 | -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ |
14814 | #define SYSV386_COMPAT 1 |
14815 | #endif |
14816 | |
14817 | const char * |
14818 | output_387_binary_op (rtx_insn *insn, rtx *operands) |
14819 | { |
14820 | static char buf[40]; |
14821 | const char *p; |
14822 | bool is_sse |
14823 | = (SSE_REG_P (operands[0]) |
14824 | || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); |
14825 | |
14826 | if (is_sse) |
14827 | p = "%v" ; |
14828 | else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
14829 | || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
14830 | p = "fi" ; |
14831 | else |
14832 | p = "f" ; |
14833 | |
14834 | strcpy (dest: buf, src: p); |
14835 | |
14836 | switch (GET_CODE (operands[3])) |
14837 | { |
14838 | case PLUS: |
14839 | p = "add" ; break; |
14840 | case MINUS: |
14841 | p = "sub" ; break; |
14842 | case MULT: |
14843 | p = "mul" ; break; |
14844 | case DIV: |
14845 | p = "div" ; break; |
14846 | default: |
14847 | gcc_unreachable (); |
14848 | } |
14849 | |
14850 | strcat (dest: buf, src: p); |
14851 | |
14852 | if (is_sse) |
14853 | { |
14854 | p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd" ; |
14855 | strcat (dest: buf, src: p); |
14856 | |
14857 | if (TARGET_AVX) |
14858 | p = "\t{%2, %1, %0|%0, %1, %2}" ; |
14859 | else |
14860 | p = "\t{%2, %0|%0, %2}" ; |
14861 | |
14862 | strcat (dest: buf, src: p); |
14863 | return buf; |
14864 | } |
14865 | |
14866 | /* Even if we do not want to check the inputs, this documents input |
14867 | constraints. Which helps in understanding the following code. */ |
14868 | if (flag_checking) |
14869 | { |
14870 | if (STACK_REG_P (operands[0]) |
14871 | && ((REG_P (operands[1]) |
14872 | && REGNO (operands[0]) == REGNO (operands[1]) |
14873 | && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) |
14874 | || (REG_P (operands[2]) |
14875 | && REGNO (operands[0]) == REGNO (operands[2]) |
14876 | && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) |
14877 | && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) |
14878 | ; /* ok */ |
14879 | else |
14880 | gcc_unreachable (); |
14881 | } |
14882 | |
14883 | switch (GET_CODE (operands[3])) |
14884 | { |
14885 | case MULT: |
14886 | case PLUS: |
14887 | if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) |
14888 | std::swap (a&: operands[1], b&: operands[2]); |
14889 | |
14890 | /* know operands[0] == operands[1]. */ |
14891 | |
14892 | if (MEM_P (operands[2])) |
14893 | { |
14894 | p = "%Z2\t%2" ; |
14895 | break; |
14896 | } |
14897 | |
14898 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
14899 | { |
14900 | if (STACK_TOP_P (operands[0])) |
14901 | /* How is it that we are storing to a dead operand[2]? |
14902 | Well, presumably operands[1] is dead too. We can't |
14903 | store the result to st(0) as st(0) gets popped on this |
14904 | instruction. Instead store to operands[2] (which I |
14905 | think has to be st(1)). st(1) will be popped later. |
14906 | gcc <= 2.8.1 didn't have this check and generated |
14907 | assembly code that the Unixware assembler rejected. */ |
14908 | p = "p\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
14909 | else |
14910 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
14911 | break; |
14912 | } |
14913 | |
14914 | if (STACK_TOP_P (operands[0])) |
14915 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
14916 | else |
14917 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
14918 | break; |
14919 | |
14920 | case MINUS: |
14921 | case DIV: |
14922 | if (MEM_P (operands[1])) |
14923 | { |
14924 | p = "r%Z1\t%1" ; |
14925 | break; |
14926 | } |
14927 | |
14928 | if (MEM_P (operands[2])) |
14929 | { |
14930 | p = "%Z2\t%2" ; |
14931 | break; |
14932 | } |
14933 | |
14934 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
14935 | { |
14936 | #if SYSV386_COMPAT |
14937 | /* The SystemV/386 SVR3.2 assembler, and probably all AT&T |
14938 | derived assemblers, confusingly reverse the direction of |
14939 | the operation for fsub{r} and fdiv{r} when the |
14940 | destination register is not st(0). The Intel assembler |
14941 | doesn't have this brain damage. Read !SYSV386_COMPAT to |
14942 | figure out what the hardware really does. */ |
14943 | if (STACK_TOP_P (operands[0])) |
14944 | p = "{p\t%0, %2|rp\t%2, %0}" ; |
14945 | else |
14946 | p = "{rp\t%2, %0|p\t%0, %2}" ; |
14947 | #else |
14948 | if (STACK_TOP_P (operands[0])) |
14949 | /* As above for fmul/fadd, we can't store to st(0). */ |
14950 | p = "rp\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
14951 | else |
14952 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
14953 | #endif |
14954 | break; |
14955 | } |
14956 | |
14957 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
14958 | { |
14959 | #if SYSV386_COMPAT |
14960 | if (STACK_TOP_P (operands[0])) |
14961 | p = "{rp\t%0, %1|p\t%1, %0}" ; |
14962 | else |
14963 | p = "{p\t%1, %0|rp\t%0, %1}" ; |
14964 | #else |
14965 | if (STACK_TOP_P (operands[0])) |
14966 | p = "p\t{%0, %1|%1, %0}" ; /* st(1) = st(1) op st(0); pop */ |
14967 | else |
14968 | p = "rp\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2); pop */ |
14969 | #endif |
14970 | break; |
14971 | } |
14972 | |
14973 | if (STACK_TOP_P (operands[0])) |
14974 | { |
14975 | if (STACK_TOP_P (operands[1])) |
14976 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
14977 | else |
14978 | p = "r\t{%y1, %0|%0, %y1}" ; /* st(0) = st(r1) op st(0) */ |
14979 | break; |
14980 | } |
14981 | else if (STACK_TOP_P (operands[1])) |
14982 | { |
14983 | #if SYSV386_COMPAT |
14984 | p = "{\t%1, %0|r\t%0, %1}" ; |
14985 | #else |
14986 | p = "r\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2) */ |
14987 | #endif |
14988 | } |
14989 | else |
14990 | { |
14991 | #if SYSV386_COMPAT |
14992 | p = "{r\t%2, %0|\t%0, %2}" ; |
14993 | #else |
14994 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
14995 | #endif |
14996 | } |
14997 | break; |
14998 | |
14999 | default: |
15000 | gcc_unreachable (); |
15001 | } |
15002 | |
15003 | strcat (dest: buf, src: p); |
15004 | return buf; |
15005 | } |
15006 | |
15007 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
15008 | |
15009 | static int |
15010 | ix86_dirflag_mode_needed (rtx_insn *insn) |
15011 | { |
15012 | if (CALL_P (insn)) |
15013 | { |
15014 | if (cfun->machine->func_type == TYPE_NORMAL) |
15015 | return X86_DIRFLAG_ANY; |
15016 | else |
15017 | /* No need to emit CLD in interrupt handler for TARGET_CLD. */ |
15018 | return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; |
15019 | } |
15020 | |
15021 | if (recog_memoized (insn) < 0) |
15022 | return X86_DIRFLAG_ANY; |
15023 | |
15024 | if (get_attr_type (insn) == TYPE_STR) |
15025 | { |
15026 | /* Emit cld instruction if stringops are used in the function. */ |
15027 | if (cfun->machine->func_type == TYPE_NORMAL) |
15028 | return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; |
15029 | else |
15030 | return X86_DIRFLAG_RESET; |
15031 | } |
15032 | |
15033 | return X86_DIRFLAG_ANY; |
15034 | } |
15035 | |
15036 | /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ |
15037 | |
15038 | static bool |
15039 | ix86_check_avx_upper_register (const_rtx exp) |
15040 | { |
15041 | return (SSE_REG_P (exp) |
15042 | && !EXT_REX_SSE_REG_P (exp) |
15043 | && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); |
15044 | } |
15045 | |
15046 | /* Check if a 256bit or 512bit AVX register is referenced in stores. */ |
15047 | |
15048 | static void |
15049 | ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) |
15050 | { |
15051 | if (ix86_check_avx_upper_register (exp: dest)) |
15052 | { |
15053 | bool *used = (bool *) data; |
15054 | *used = true; |
15055 | } |
15056 | } |
15057 | |
15058 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
15059 | |
15060 | static int |
15061 | ix86_avx_u128_mode_needed (rtx_insn *insn) |
15062 | { |
15063 | if (DEBUG_INSN_P (insn)) |
15064 | return AVX_U128_ANY; |
15065 | |
15066 | if (CALL_P (insn)) |
15067 | { |
15068 | rtx link; |
15069 | |
15070 | /* Needed mode is set to AVX_U128_CLEAN if there are |
15071 | no 256bit or 512bit modes used in function arguments. */ |
15072 | for (link = CALL_INSN_FUNCTION_USAGE (insn); |
15073 | link; |
15074 | link = XEXP (link, 1)) |
15075 | { |
15076 | if (GET_CODE (XEXP (link, 0)) == USE) |
15077 | { |
15078 | rtx arg = XEXP (XEXP (link, 0), 0); |
15079 | |
15080 | if (ix86_check_avx_upper_register (exp: arg)) |
15081 | return AVX_U128_DIRTY; |
15082 | } |
15083 | } |
15084 | |
15085 | /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit |
15086 | nor 512bit registers used in the function return register. */ |
15087 | bool avx_upper_reg_found = false; |
15088 | note_stores (insn, ix86_check_avx_upper_stores, |
15089 | &avx_upper_reg_found); |
15090 | if (avx_upper_reg_found) |
15091 | return AVX_U128_DIRTY; |
15092 | |
15093 | /* If the function is known to preserve some SSE registers, |
15094 | RA and previous passes can legitimately rely on that for |
15095 | modes wider than 256 bits. It's only safe to issue a |
15096 | vzeroupper if all SSE registers are clobbered. */ |
15097 | const function_abi &abi = insn_callee_abi (insn); |
15098 | if (vzeroupper_pattern (PATTERN (insn), VOIDmode) |
15099 | /* Should be safe to issue an vzeroupper before sibling_call_p. |
15100 | Also there not mode_exit for sibling_call, so there could be |
15101 | missing vzeroupper for that. */ |
15102 | || !(SIBLING_CALL_P (insn) |
15103 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
15104 | y: abi.mode_clobbers (V4DImode)))) |
15105 | return AVX_U128_ANY; |
15106 | |
15107 | return AVX_U128_CLEAN; |
15108 | } |
15109 | |
15110 | subrtx_iterator::array_type array; |
15111 | |
15112 | rtx set = single_set (insn); |
15113 | if (set) |
15114 | { |
15115 | rtx dest = SET_DEST (set); |
15116 | rtx src = SET_SRC (set); |
15117 | if (ix86_check_avx_upper_register (exp: dest)) |
15118 | { |
15119 | /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the |
15120 | source isn't zero. */ |
15121 | if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1) |
15122 | return AVX_U128_DIRTY; |
15123 | else |
15124 | return AVX_U128_ANY; |
15125 | } |
15126 | else |
15127 | { |
15128 | FOR_EACH_SUBRTX (iter, array, src, NONCONST) |
15129 | if (ix86_check_avx_upper_register (exp: *iter)) |
15130 | return AVX_U128_DIRTY; |
15131 | } |
15132 | |
15133 | /* This isn't YMM/ZMM load/store. */ |
15134 | return AVX_U128_ANY; |
15135 | } |
15136 | |
15137 | /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. |
15138 | Hardware changes state only when a 256bit register is written to, |
15139 | but we need to prevent the compiler from moving optimal insertion |
15140 | point above eventual read from 256bit or 512 bit register. */ |
15141 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
15142 | if (ix86_check_avx_upper_register (exp: *iter)) |
15143 | return AVX_U128_DIRTY; |
15144 | |
15145 | return AVX_U128_ANY; |
15146 | } |
15147 | |
15148 | /* Return mode that i387 must be switched into |
15149 | prior to the execution of insn. */ |
15150 | |
15151 | static int |
15152 | ix86_i387_mode_needed (int entity, rtx_insn *insn) |
15153 | { |
15154 | enum attr_i387_cw mode; |
15155 | |
15156 | /* The mode UNINITIALIZED is used to store control word after a |
15157 | function call or ASM pattern. The mode ANY specify that function |
15158 | has no requirements on the control word and make no changes in the |
15159 | bits we are interested in. */ |
15160 | |
15161 | if (CALL_P (insn) |
15162 | || (NONJUMP_INSN_P (insn) |
15163 | && (asm_noperands (PATTERN (insn)) >= 0 |
15164 | || GET_CODE (PATTERN (insn)) == ASM_INPUT))) |
15165 | return I387_CW_UNINITIALIZED; |
15166 | |
15167 | if (recog_memoized (insn) < 0) |
15168 | return I387_CW_ANY; |
15169 | |
15170 | mode = get_attr_i387_cw (insn); |
15171 | |
15172 | switch (entity) |
15173 | { |
15174 | case I387_ROUNDEVEN: |
15175 | if (mode == I387_CW_ROUNDEVEN) |
15176 | return mode; |
15177 | break; |
15178 | |
15179 | case I387_TRUNC: |
15180 | if (mode == I387_CW_TRUNC) |
15181 | return mode; |
15182 | break; |
15183 | |
15184 | case I387_FLOOR: |
15185 | if (mode == I387_CW_FLOOR) |
15186 | return mode; |
15187 | break; |
15188 | |
15189 | case I387_CEIL: |
15190 | if (mode == I387_CW_CEIL) |
15191 | return mode; |
15192 | break; |
15193 | |
15194 | default: |
15195 | gcc_unreachable (); |
15196 | } |
15197 | |
15198 | return I387_CW_ANY; |
15199 | } |
15200 | |
15201 | /* Return mode that entity must be switched into |
15202 | prior to the execution of insn. */ |
15203 | |
15204 | static int |
15205 | ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) |
15206 | { |
15207 | switch (entity) |
15208 | { |
15209 | case X86_DIRFLAG: |
15210 | return ix86_dirflag_mode_needed (insn); |
15211 | case AVX_U128: |
15212 | return ix86_avx_u128_mode_needed (insn); |
15213 | case I387_ROUNDEVEN: |
15214 | case I387_TRUNC: |
15215 | case I387_FLOOR: |
15216 | case I387_CEIL: |
15217 | return ix86_i387_mode_needed (entity, insn); |
15218 | default: |
15219 | gcc_unreachable (); |
15220 | } |
15221 | return 0; |
15222 | } |
15223 | |
15224 | /* Calculate mode of upper 128bit AVX registers after the insn. */ |
15225 | |
15226 | static int |
15227 | ix86_avx_u128_mode_after (int mode, rtx_insn *insn) |
15228 | { |
15229 | rtx pat = PATTERN (insn); |
15230 | |
15231 | if (vzeroupper_pattern (pat, VOIDmode) |
15232 | || vzeroall_pattern (pat, VOIDmode)) |
15233 | return AVX_U128_CLEAN; |
15234 | |
15235 | /* We know that state is clean after CALL insn if there are no |
15236 | 256bit or 512bit registers used in the function return register. */ |
15237 | if (CALL_P (insn)) |
15238 | { |
15239 | bool avx_upper_reg_found = false; |
15240 | note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); |
15241 | |
15242 | if (avx_upper_reg_found) |
15243 | return AVX_U128_DIRTY; |
15244 | |
15245 | /* If the function desn't clobber any sse registers or only clobber |
15246 | 128-bit part, Then vzeroupper isn't issued before the function exit. |
15247 | the status not CLEAN but ANY after the function. */ |
15248 | const function_abi &abi = insn_callee_abi (insn); |
15249 | if (!(SIBLING_CALL_P (insn) |
15250 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
15251 | y: abi.mode_clobbers (V4DImode)))) |
15252 | return AVX_U128_ANY; |
15253 | |
15254 | return AVX_U128_CLEAN; |
15255 | } |
15256 | |
15257 | /* Otherwise, return current mode. Remember that if insn |
15258 | references AVX 256bit or 512bit registers, the mode was already |
15259 | changed to DIRTY from MODE_NEEDED. */ |
15260 | return mode; |
15261 | } |
15262 | |
15263 | /* Return the mode that an insn results in. */ |
15264 | |
15265 | static int |
15266 | ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) |
15267 | { |
15268 | switch (entity) |
15269 | { |
15270 | case X86_DIRFLAG: |
15271 | return mode; |
15272 | case AVX_U128: |
15273 | return ix86_avx_u128_mode_after (mode, insn); |
15274 | case I387_ROUNDEVEN: |
15275 | case I387_TRUNC: |
15276 | case I387_FLOOR: |
15277 | case I387_CEIL: |
15278 | return mode; |
15279 | default: |
15280 | gcc_unreachable (); |
15281 | } |
15282 | } |
15283 | |
15284 | static int |
15285 | ix86_dirflag_mode_entry (void) |
15286 | { |
15287 | /* For TARGET_CLD or in the interrupt handler we can't assume |
15288 | direction flag state at function entry. */ |
15289 | if (TARGET_CLD |
15290 | || cfun->machine->func_type != TYPE_NORMAL) |
15291 | return X86_DIRFLAG_ANY; |
15292 | |
15293 | return X86_DIRFLAG_RESET; |
15294 | } |
15295 | |
15296 | static int |
15297 | ix86_avx_u128_mode_entry (void) |
15298 | { |
15299 | tree arg; |
15300 | |
15301 | /* Entry mode is set to AVX_U128_DIRTY if there are |
15302 | 256bit or 512bit modes used in function arguments. */ |
15303 | for (arg = DECL_ARGUMENTS (current_function_decl); arg; |
15304 | arg = TREE_CHAIN (arg)) |
15305 | { |
15306 | rtx incoming = DECL_INCOMING_RTL (arg); |
15307 | |
15308 | if (incoming && ix86_check_avx_upper_register (exp: incoming)) |
15309 | return AVX_U128_DIRTY; |
15310 | } |
15311 | |
15312 | return AVX_U128_CLEAN; |
15313 | } |
15314 | |
15315 | /* Return a mode that ENTITY is assumed to be |
15316 | switched to at function entry. */ |
15317 | |
15318 | static int |
15319 | ix86_mode_entry (int entity) |
15320 | { |
15321 | switch (entity) |
15322 | { |
15323 | case X86_DIRFLAG: |
15324 | return ix86_dirflag_mode_entry (); |
15325 | case AVX_U128: |
15326 | return ix86_avx_u128_mode_entry (); |
15327 | case I387_ROUNDEVEN: |
15328 | case I387_TRUNC: |
15329 | case I387_FLOOR: |
15330 | case I387_CEIL: |
15331 | return I387_CW_ANY; |
15332 | default: |
15333 | gcc_unreachable (); |
15334 | } |
15335 | } |
15336 | |
15337 | static int |
15338 | ix86_avx_u128_mode_exit (void) |
15339 | { |
15340 | rtx reg = crtl->return_rtx; |
15341 | |
15342 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit |
15343 | or 512 bit modes used in the function return register. */ |
15344 | if (reg && ix86_check_avx_upper_register (exp: reg)) |
15345 | return AVX_U128_DIRTY; |
15346 | |
15347 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit |
15348 | modes used in function arguments, otherwise return AVX_U128_CLEAN. |
15349 | */ |
15350 | return ix86_avx_u128_mode_entry (); |
15351 | } |
15352 | |
15353 | /* Return a mode that ENTITY is assumed to be |
15354 | switched to at function exit. */ |
15355 | |
15356 | static int |
15357 | ix86_mode_exit (int entity) |
15358 | { |
15359 | switch (entity) |
15360 | { |
15361 | case X86_DIRFLAG: |
15362 | return X86_DIRFLAG_ANY; |
15363 | case AVX_U128: |
15364 | return ix86_avx_u128_mode_exit (); |
15365 | case I387_ROUNDEVEN: |
15366 | case I387_TRUNC: |
15367 | case I387_FLOOR: |
15368 | case I387_CEIL: |
15369 | return I387_CW_ANY; |
15370 | default: |
15371 | gcc_unreachable (); |
15372 | } |
15373 | } |
15374 | |
15375 | static int |
15376 | ix86_mode_priority (int, int n) |
15377 | { |
15378 | return n; |
15379 | } |
15380 | |
15381 | /* Output code to initialize control word copies used by trunc?f?i and |
15382 | rounding patterns. CURRENT_MODE is set to current control word, |
15383 | while NEW_MODE is set to new control word. */ |
15384 | |
15385 | static void |
15386 | emit_i387_cw_initialization (int mode) |
15387 | { |
15388 | rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); |
15389 | rtx new_mode; |
15390 | |
15391 | enum ix86_stack_slot slot; |
15392 | |
15393 | rtx reg = gen_reg_rtx (HImode); |
15394 | |
15395 | emit_insn (gen_x86_fnstcw_1 (stored_mode)); |
15396 | emit_move_insn (reg, copy_rtx (stored_mode)); |
15397 | |
15398 | switch (mode) |
15399 | { |
15400 | case I387_CW_ROUNDEVEN: |
15401 | /* round to nearest */ |
15402 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15403 | slot = SLOT_CW_ROUNDEVEN; |
15404 | break; |
15405 | |
15406 | case I387_CW_TRUNC: |
15407 | /* round toward zero (truncate) */ |
15408 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); |
15409 | slot = SLOT_CW_TRUNC; |
15410 | break; |
15411 | |
15412 | case I387_CW_FLOOR: |
15413 | /* round down toward -oo */ |
15414 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15415 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); |
15416 | slot = SLOT_CW_FLOOR; |
15417 | break; |
15418 | |
15419 | case I387_CW_CEIL: |
15420 | /* round up toward +oo */ |
15421 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15422 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); |
15423 | slot = SLOT_CW_CEIL; |
15424 | break; |
15425 | |
15426 | default: |
15427 | gcc_unreachable (); |
15428 | } |
15429 | |
15430 | gcc_assert (slot < MAX_386_STACK_LOCALS); |
15431 | |
15432 | new_mode = assign_386_stack_local (HImode, slot); |
15433 | emit_move_insn (new_mode, reg); |
15434 | } |
15435 | |
15436 | /* Generate one or more insns to set ENTITY to MODE. */ |
15437 | |
15438 | static void |
15439 | ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, |
15440 | HARD_REG_SET regs_live ATTRIBUTE_UNUSED) |
15441 | { |
15442 | switch (entity) |
15443 | { |
15444 | case X86_DIRFLAG: |
15445 | if (mode == X86_DIRFLAG_RESET) |
15446 | emit_insn (gen_cld ()); |
15447 | break; |
15448 | case AVX_U128: |
15449 | if (mode == AVX_U128_CLEAN) |
15450 | ix86_expand_avx_vzeroupper (); |
15451 | break; |
15452 | case I387_ROUNDEVEN: |
15453 | case I387_TRUNC: |
15454 | case I387_FLOOR: |
15455 | case I387_CEIL: |
15456 | if (mode != I387_CW_ANY |
15457 | && mode != I387_CW_UNINITIALIZED) |
15458 | emit_i387_cw_initialization (mode); |
15459 | break; |
15460 | default: |
15461 | gcc_unreachable (); |
15462 | } |
15463 | } |
15464 | |
15465 | /* Output code for INSN to convert a float to a signed int. OPERANDS |
15466 | are the insn operands. The output may be [HSD]Imode and the input |
15467 | operand may be [SDX]Fmode. */ |
15468 | |
15469 | const char * |
15470 | output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) |
15471 | { |
15472 | bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15473 | bool dimode_p = GET_MODE (operands[0]) == DImode; |
15474 | int round_mode = get_attr_i387_cw (insn); |
15475 | |
15476 | static char buf[40]; |
15477 | const char *p; |
15478 | |
15479 | /* Jump through a hoop or two for DImode, since the hardware has no |
15480 | non-popping instruction. We used to do this a different way, but |
15481 | that was somewhat fragile and broke with post-reload splitters. */ |
15482 | if ((dimode_p || fisttp) && !stack_top_dies) |
15483 | output_asm_insn ("fld\t%y1" , operands); |
15484 | |
15485 | gcc_assert (STACK_TOP_P (operands[1])); |
15486 | gcc_assert (MEM_P (operands[0])); |
15487 | gcc_assert (GET_MODE (operands[1]) != TFmode); |
15488 | |
15489 | if (fisttp) |
15490 | return "fisttp%Z0\t%0" ; |
15491 | |
15492 | strcpy (dest: buf, src: "fist" ); |
15493 | |
15494 | if (round_mode != I387_CW_ANY) |
15495 | output_asm_insn ("fldcw\t%3" , operands); |
15496 | |
15497 | p = "p%Z0\t%0" ; |
15498 | strcat (dest: buf, src: p + !(stack_top_dies || dimode_p)); |
15499 | |
15500 | output_asm_insn (buf, operands); |
15501 | |
15502 | if (round_mode != I387_CW_ANY) |
15503 | output_asm_insn ("fldcw\t%2" , operands); |
15504 | |
15505 | return "" ; |
15506 | } |
15507 | |
15508 | /* Output code for x87 ffreep insn. The OPNO argument, which may only |
15509 | have the values zero or one, indicates the ffreep insn's operand |
15510 | from the OPERANDS array. */ |
15511 | |
15512 | static const char * |
15513 | output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) |
15514 | { |
15515 | if (TARGET_USE_FFREEP) |
15516 | #ifdef HAVE_AS_IX86_FFREEP |
15517 | return opno ? "ffreep\t%y1" : "ffreep\t%y0" ; |
15518 | #else |
15519 | { |
15520 | static char retval[32]; |
15521 | int regno = REGNO (operands[opno]); |
15522 | |
15523 | gcc_assert (STACK_REGNO_P (regno)); |
15524 | |
15525 | regno -= FIRST_STACK_REG; |
15526 | |
15527 | snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf" , regno); |
15528 | return retval; |
15529 | } |
15530 | #endif |
15531 | |
15532 | return opno ? "fstp\t%y1" : "fstp\t%y0" ; |
15533 | } |
15534 | |
15535 | |
15536 | /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi |
15537 | should be used. UNORDERED_P is true when fucom should be used. */ |
15538 | |
15539 | const char * |
15540 | output_fp_compare (rtx_insn *insn, rtx *operands, |
15541 | bool eflags_p, bool unordered_p) |
15542 | { |
15543 | rtx *xops = eflags_p ? &operands[0] : &operands[1]; |
15544 | bool stack_top_dies; |
15545 | |
15546 | static char buf[40]; |
15547 | const char *p; |
15548 | |
15549 | gcc_assert (STACK_TOP_P (xops[0])); |
15550 | |
15551 | stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15552 | |
15553 | if (eflags_p) |
15554 | { |
15555 | p = unordered_p ? "fucomi" : "fcomi" ; |
15556 | strcpy (dest: buf, src: p); |
15557 | |
15558 | p = "p\t{%y1, %0|%0, %y1}" ; |
15559 | strcat (dest: buf, src: p + !stack_top_dies); |
15560 | |
15561 | return buf; |
15562 | } |
15563 | |
15564 | if (STACK_REG_P (xops[1]) |
15565 | && stack_top_dies |
15566 | && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) |
15567 | { |
15568 | gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); |
15569 | |
15570 | /* If both the top of the 387 stack die, and the other operand |
15571 | is also a stack register that dies, then this must be a |
15572 | `fcompp' float compare. */ |
15573 | p = unordered_p ? "fucompp" : "fcompp" ; |
15574 | strcpy (dest: buf, src: p); |
15575 | } |
15576 | else if (const0_operand (xops[1], VOIDmode)) |
15577 | { |
15578 | gcc_assert (!unordered_p); |
15579 | strcpy (dest: buf, src: "ftst" ); |
15580 | } |
15581 | else |
15582 | { |
15583 | if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) |
15584 | { |
15585 | gcc_assert (!unordered_p); |
15586 | p = "ficom" ; |
15587 | } |
15588 | else |
15589 | p = unordered_p ? "fucom" : "fcom" ; |
15590 | |
15591 | strcpy (dest: buf, src: p); |
15592 | |
15593 | p = "p%Z2\t%y2" ; |
15594 | strcat (dest: buf, src: p + !stack_top_dies); |
15595 | } |
15596 | |
15597 | output_asm_insn (buf, operands); |
15598 | return "fnstsw\t%0" ; |
15599 | } |
15600 | |
15601 | void |
15602 | ix86_output_addr_vec_elt (FILE *file, int value) |
15603 | { |
15604 | const char *directive = ASM_LONG; |
15605 | |
15606 | #ifdef ASM_QUAD |
15607 | if (TARGET_LP64) |
15608 | directive = ASM_QUAD; |
15609 | #else |
15610 | gcc_assert (!TARGET_64BIT); |
15611 | #endif |
15612 | |
15613 | fprintf (stream: file, format: "%s%s%d\n" , directive, LPREFIX, value); |
15614 | } |
15615 | |
15616 | void |
15617 | ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
15618 | { |
15619 | const char *directive = ASM_LONG; |
15620 | |
15621 | #ifdef ASM_QUAD |
15622 | if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) |
15623 | directive = ASM_QUAD; |
15624 | #else |
15625 | gcc_assert (!TARGET_64BIT); |
15626 | #endif |
15627 | /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ |
15628 | if (TARGET_64BIT || TARGET_VXWORKS_RTP) |
15629 | fprintf (stream: file, format: "%s%s%d-%s%d\n" , |
15630 | directive, LPREFIX, value, LPREFIX, rel); |
15631 | #if TARGET_MACHO |
15632 | else if (TARGET_MACHO) |
15633 | { |
15634 | fprintf (file, ASM_LONG "%s%d-" , LPREFIX, value); |
15635 | machopic_output_function_base_name (file); |
15636 | putc ('\n', file); |
15637 | } |
15638 | #endif |
15639 | else if (HAVE_AS_GOTOFF_IN_DATA) |
15640 | fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n" , LPREFIX, value); |
15641 | else |
15642 | asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n" , |
15643 | GOT_SYMBOL_NAME, LPREFIX, value); |
15644 | } |
15645 | |
15646 | #define LEA_MAX_STALL (3) |
15647 | #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) |
15648 | |
15649 | /* Increase given DISTANCE in half-cycles according to |
15650 | dependencies between PREV and NEXT instructions. |
15651 | Add 1 half-cycle if there is no dependency and |
15652 | go to next cycle if there is some dependecy. */ |
15653 | |
15654 | static unsigned int |
15655 | increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) |
15656 | { |
15657 | df_ref def, use; |
15658 | |
15659 | if (!prev || !next) |
15660 | return distance + (distance & 1) + 2; |
15661 | |
15662 | if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) |
15663 | return distance + 1; |
15664 | |
15665 | FOR_EACH_INSN_USE (use, next) |
15666 | FOR_EACH_INSN_DEF (def, prev) |
15667 | if (!DF_REF_IS_ARTIFICIAL (def) |
15668 | && DF_REF_REGNO (use) == DF_REF_REGNO (def)) |
15669 | return distance + (distance & 1) + 2; |
15670 | |
15671 | return distance + 1; |
15672 | } |
15673 | |
15674 | /* Function checks if instruction INSN defines register number |
15675 | REGNO1 or REGNO2. */ |
15676 | |
15677 | bool |
15678 | insn_defines_reg (unsigned int regno1, unsigned int regno2, |
15679 | rtx_insn *insn) |
15680 | { |
15681 | df_ref def; |
15682 | |
15683 | FOR_EACH_INSN_DEF (def, insn) |
15684 | if (DF_REF_REG_DEF_P (def) |
15685 | && !DF_REF_IS_ARTIFICIAL (def) |
15686 | && (regno1 == DF_REF_REGNO (def) |
15687 | || regno2 == DF_REF_REGNO (def))) |
15688 | return true; |
15689 | |
15690 | return false; |
15691 | } |
15692 | |
15693 | /* Function checks if instruction INSN uses register number |
15694 | REGNO as a part of address expression. */ |
15695 | |
15696 | static bool |
15697 | insn_uses_reg_mem (unsigned int regno, rtx insn) |
15698 | { |
15699 | df_ref use; |
15700 | |
15701 | FOR_EACH_INSN_USE (use, insn) |
15702 | if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) |
15703 | return true; |
15704 | |
15705 | return false; |
15706 | } |
15707 | |
15708 | /* Search backward for non-agu definition of register number REGNO1 |
15709 | or register number REGNO2 in basic block starting from instruction |
15710 | START up to head of basic block or instruction INSN. |
15711 | |
15712 | Function puts true value into *FOUND var if definition was found |
15713 | and false otherwise. |
15714 | |
15715 | Distance in half-cycles between START and found instruction or head |
15716 | of BB is added to DISTANCE and returned. */ |
15717 | |
15718 | static int |
15719 | distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, |
15720 | rtx_insn *insn, int distance, |
15721 | rtx_insn *start, bool *found) |
15722 | { |
15723 | basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL; |
15724 | rtx_insn *prev = start; |
15725 | rtx_insn *next = NULL; |
15726 | |
15727 | *found = false; |
15728 | |
15729 | while (prev |
15730 | && prev != insn |
15731 | && distance < LEA_SEARCH_THRESHOLD) |
15732 | { |
15733 | if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) |
15734 | { |
15735 | distance = increase_distance (prev, next, distance); |
15736 | if (insn_defines_reg (regno1, regno2, insn: prev)) |
15737 | { |
15738 | if (recog_memoized (insn: prev) < 0 |
15739 | || get_attr_type (prev) != TYPE_LEA) |
15740 | { |
15741 | *found = true; |
15742 | return distance; |
15743 | } |
15744 | } |
15745 | |
15746 | next = prev; |
15747 | } |
15748 | if (prev == BB_HEAD (bb)) |
15749 | break; |
15750 | |
15751 | prev = PREV_INSN (insn: prev); |
15752 | } |
15753 | |
15754 | return distance; |
15755 | } |
15756 | |
15757 | /* Search backward for non-agu definition of register number REGNO1 |
15758 | or register number REGNO2 in INSN's basic block until |
15759 | 1. Pass LEA_SEARCH_THRESHOLD instructions, or |
15760 | 2. Reach neighbor BBs boundary, or |
15761 | 3. Reach agu definition. |
15762 | Returns the distance between the non-agu definition point and INSN. |
15763 | If no definition point, returns -1. */ |
15764 | |
15765 | static int |
15766 | distance_non_agu_define (unsigned int regno1, unsigned int regno2, |
15767 | rtx_insn *insn) |
15768 | { |
15769 | basic_block bb = BLOCK_FOR_INSN (insn); |
15770 | int distance = 0; |
15771 | bool found = false; |
15772 | |
15773 | if (insn != BB_HEAD (bb)) |
15774 | distance = distance_non_agu_define_in_bb (regno1, regno2, insn, |
15775 | distance, start: PREV_INSN (insn), |
15776 | found: &found); |
15777 | |
15778 | if (!found && distance < LEA_SEARCH_THRESHOLD) |
15779 | { |
15780 | edge e; |
15781 | edge_iterator ei; |
15782 | bool simple_loop = false; |
15783 | |
15784 | FOR_EACH_EDGE (e, ei, bb->preds) |
15785 | if (e->src == bb) |
15786 | { |
15787 | simple_loop = true; |
15788 | break; |
15789 | } |
15790 | |
15791 | if (simple_loop) |
15792 | distance = distance_non_agu_define_in_bb (regno1, regno2, |
15793 | insn, distance, |
15794 | BB_END (bb), found: &found); |
15795 | else |
15796 | { |
15797 | int shortest_dist = -1; |
15798 | bool found_in_bb = false; |
15799 | |
15800 | FOR_EACH_EDGE (e, ei, bb->preds) |
15801 | { |
15802 | int bb_dist |
15803 | = distance_non_agu_define_in_bb (regno1, regno2, |
15804 | insn, distance, |
15805 | BB_END (e->src), |
15806 | found: &found_in_bb); |
15807 | if (found_in_bb) |
15808 | { |
15809 | if (shortest_dist < 0) |
15810 | shortest_dist = bb_dist; |
15811 | else if (bb_dist > 0) |
15812 | shortest_dist = MIN (bb_dist, shortest_dist); |
15813 | |
15814 | found = true; |
15815 | } |
15816 | } |
15817 | |
15818 | distance = shortest_dist; |
15819 | } |
15820 | } |
15821 | |
15822 | if (!found) |
15823 | return -1; |
15824 | |
15825 | return distance >> 1; |
15826 | } |
15827 | |
15828 | /* Return the distance in half-cycles between INSN and the next |
15829 | insn that uses register number REGNO in memory address added |
15830 | to DISTANCE. Return -1 if REGNO0 is set. |
15831 | |
15832 | Put true value into *FOUND if register usage was found and |
15833 | false otherwise. |
15834 | Put true value into *REDEFINED if register redefinition was |
15835 | found and false otherwise. */ |
15836 | |
15837 | static int |
15838 | distance_agu_use_in_bb (unsigned int regno, |
15839 | rtx_insn *insn, int distance, rtx_insn *start, |
15840 | bool *found, bool *redefined) |
15841 | { |
15842 | basic_block bb = NULL; |
15843 | rtx_insn *next = start; |
15844 | rtx_insn *prev = NULL; |
15845 | |
15846 | *found = false; |
15847 | *redefined = false; |
15848 | |
15849 | if (start != NULL_RTX) |
15850 | { |
15851 | bb = BLOCK_FOR_INSN (insn: start); |
15852 | if (start != BB_HEAD (bb)) |
15853 | /* If insn and start belong to the same bb, set prev to insn, |
15854 | so the call to increase_distance will increase the distance |
15855 | between insns by 1. */ |
15856 | prev = insn; |
15857 | } |
15858 | |
15859 | while (next |
15860 | && next != insn |
15861 | && distance < LEA_SEARCH_THRESHOLD) |
15862 | { |
15863 | if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) |
15864 | { |
15865 | distance = increase_distance(prev, next, distance); |
15866 | if (insn_uses_reg_mem (regno, insn: next)) |
15867 | { |
15868 | /* Return DISTANCE if OP0 is used in memory |
15869 | address in NEXT. */ |
15870 | *found = true; |
15871 | return distance; |
15872 | } |
15873 | |
15874 | if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next)) |
15875 | { |
15876 | /* Return -1 if OP0 is set in NEXT. */ |
15877 | *redefined = true; |
15878 | return -1; |
15879 | } |
15880 | |
15881 | prev = next; |
15882 | } |
15883 | |
15884 | if (next == BB_END (bb)) |
15885 | break; |
15886 | |
15887 | next = NEXT_INSN (insn: next); |
15888 | } |
15889 | |
15890 | return distance; |
15891 | } |
15892 | |
15893 | /* Return the distance between INSN and the next insn that uses |
15894 | register number REGNO0 in memory address. Return -1 if no such |
15895 | a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ |
15896 | |
15897 | static int |
15898 | distance_agu_use (unsigned int regno0, rtx_insn *insn) |
15899 | { |
15900 | basic_block bb = BLOCK_FOR_INSN (insn); |
15901 | int distance = 0; |
15902 | bool found = false; |
15903 | bool redefined = false; |
15904 | |
15905 | if (insn != BB_END (bb)) |
15906 | distance = distance_agu_use_in_bb (regno: regno0, insn, distance, |
15907 | start: NEXT_INSN (insn), |
15908 | found: &found, redefined: &redefined); |
15909 | |
15910 | if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) |
15911 | { |
15912 | edge e; |
15913 | edge_iterator ei; |
15914 | bool simple_loop = false; |
15915 | |
15916 | FOR_EACH_EDGE (e, ei, bb->succs) |
15917 | if (e->dest == bb) |
15918 | { |
15919 | simple_loop = true; |
15920 | break; |
15921 | } |
15922 | |
15923 | if (simple_loop) |
15924 | distance = distance_agu_use_in_bb (regno: regno0, insn, |
15925 | distance, BB_HEAD (bb), |
15926 | found: &found, redefined: &redefined); |
15927 | else |
15928 | { |
15929 | int shortest_dist = -1; |
15930 | bool found_in_bb = false; |
15931 | bool redefined_in_bb = false; |
15932 | |
15933 | FOR_EACH_EDGE (e, ei, bb->succs) |
15934 | { |
15935 | int bb_dist |
15936 | = distance_agu_use_in_bb (regno: regno0, insn, |
15937 | distance, BB_HEAD (e->dest), |
15938 | found: &found_in_bb, redefined: &redefined_in_bb); |
15939 | if (found_in_bb) |
15940 | { |
15941 | if (shortest_dist < 0) |
15942 | shortest_dist = bb_dist; |
15943 | else if (bb_dist > 0) |
15944 | shortest_dist = MIN (bb_dist, shortest_dist); |
15945 | |
15946 | found = true; |
15947 | } |
15948 | } |
15949 | |
15950 | distance = shortest_dist; |
15951 | } |
15952 | } |
15953 | |
15954 | if (!found || redefined) |
15955 | return -1; |
15956 | |
15957 | return distance >> 1; |
15958 | } |
15959 | |
15960 | /* Define this macro to tune LEA priority vs ADD, it take effect when |
15961 | there is a dilemma of choosing LEA or ADD |
15962 | Negative value: ADD is more preferred than LEA |
15963 | Zero: Neutral |
15964 | Positive value: LEA is more preferred than ADD. */ |
15965 | #define IX86_LEA_PRIORITY 0 |
15966 | |
15967 | /* Return true if usage of lea INSN has performance advantage |
15968 | over a sequence of instructions. Instructions sequence has |
15969 | SPLIT_COST cycles higher latency than lea latency. */ |
15970 | |
15971 | static bool |
15972 | ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, |
15973 | unsigned int regno2, int split_cost, bool has_scale) |
15974 | { |
15975 | int dist_define, dist_use; |
15976 | |
15977 | /* For Atom processors newer than Bonnell, if using a 2-source or |
15978 | 3-source LEA for non-destructive destination purposes, or due to |
15979 | wanting ability to use SCALE, the use of LEA is justified. */ |
15980 | if (!TARGET_CPU_P (BONNELL)) |
15981 | { |
15982 | if (has_scale) |
15983 | return true; |
15984 | if (split_cost < 1) |
15985 | return false; |
15986 | if (regno0 == regno1 || regno0 == regno2) |
15987 | return false; |
15988 | return true; |
15989 | } |
15990 | |
15991 | /* Remember recog_data content. */ |
15992 | struct recog_data_d recog_data_save = recog_data; |
15993 | |
15994 | dist_define = distance_non_agu_define (regno1, regno2, insn); |
15995 | dist_use = distance_agu_use (regno0, insn); |
15996 | |
15997 | /* distance_non_agu_define can call get_attr_type which can call |
15998 | recog_memoized, restore recog_data back to previous content. */ |
15999 | recog_data = recog_data_save; |
16000 | |
16001 | if (dist_define < 0 || dist_define >= LEA_MAX_STALL) |
16002 | { |
16003 | /* If there is no non AGU operand definition, no AGU |
16004 | operand usage and split cost is 0 then both lea |
16005 | and non lea variants have same priority. Currently |
16006 | we prefer lea for 64 bit code and non lea on 32 bit |
16007 | code. */ |
16008 | if (dist_use < 0 && split_cost == 0) |
16009 | return TARGET_64BIT || IX86_LEA_PRIORITY; |
16010 | else |
16011 | return true; |
16012 | } |
16013 | |
16014 | /* With longer definitions distance lea is more preferable. |
16015 | Here we change it to take into account splitting cost and |
16016 | lea priority. */ |
16017 | dist_define += split_cost + IX86_LEA_PRIORITY; |
16018 | |
16019 | /* If there is no use in memory addess then we just check |
16020 | that split cost exceeds AGU stall. */ |
16021 | if (dist_use < 0) |
16022 | return dist_define > LEA_MAX_STALL; |
16023 | |
16024 | /* If this insn has both backward non-agu dependence and forward |
16025 | agu dependence, the one with short distance takes effect. */ |
16026 | return dist_define >= dist_use; |
16027 | } |
16028 | |
16029 | /* Return true if we need to split op0 = op1 + op2 into a sequence of |
16030 | move and add to avoid AGU stalls. */ |
16031 | |
16032 | bool |
16033 | ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) |
16034 | { |
16035 | unsigned int regno0, regno1, regno2; |
16036 | |
16037 | /* Check if we need to optimize. */ |
16038 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16039 | return false; |
16040 | |
16041 | regno0 = true_regnum (operands[0]); |
16042 | regno1 = true_regnum (operands[1]); |
16043 | regno2 = true_regnum (operands[2]); |
16044 | |
16045 | /* We need to split only adds with non destructive |
16046 | destination operand. */ |
16047 | if (regno0 == regno1 || regno0 == regno2) |
16048 | return false; |
16049 | else |
16050 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false); |
16051 | } |
16052 | |
16053 | /* Return true if we should emit lea instruction instead of mov |
16054 | instruction. */ |
16055 | |
16056 | bool |
16057 | ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) |
16058 | { |
16059 | unsigned int regno0, regno1; |
16060 | |
16061 | /* Check if we need to optimize. */ |
16062 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16063 | return false; |
16064 | |
16065 | /* Use lea for reg to reg moves only. */ |
16066 | if (!REG_P (operands[0]) || !REG_P (operands[1])) |
16067 | return false; |
16068 | |
16069 | regno0 = true_regnum (operands[0]); |
16070 | regno1 = true_regnum (operands[1]); |
16071 | |
16072 | return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false); |
16073 | } |
16074 | |
16075 | /* Return true if we need to split lea into a sequence of |
16076 | instructions to avoid AGU stalls during peephole2. */ |
16077 | |
16078 | bool |
16079 | ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) |
16080 | { |
16081 | unsigned int regno0, regno1, regno2; |
16082 | int split_cost; |
16083 | struct ix86_address parts; |
16084 | int ok; |
16085 | |
16086 | /* The "at least two components" test below might not catch simple |
16087 | move or zero extension insns if parts.base is non-NULL and parts.disp |
16088 | is const0_rtx as the only components in the address, e.g. if the |
16089 | register is %rbp or %r13. As this test is much cheaper and moves or |
16090 | zero extensions are the common case, do this check first. */ |
16091 | if (REG_P (operands[1]) |
16092 | || (SImode_address_operand (operands[1], VOIDmode) |
16093 | && REG_P (XEXP (operands[1], 0)))) |
16094 | return false; |
16095 | |
16096 | ok = ix86_decompose_address (addr: operands[1], out: &parts); |
16097 | gcc_assert (ok); |
16098 | |
16099 | /* There should be at least two components in the address. */ |
16100 | if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) |
16101 | + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) |
16102 | return false; |
16103 | |
16104 | /* We should not split into add if non legitimate pic |
16105 | operand is used as displacement. */ |
16106 | if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) |
16107 | return false; |
16108 | |
16109 | regno0 = true_regnum (operands[0]) ; |
16110 | regno1 = INVALID_REGNUM; |
16111 | regno2 = INVALID_REGNUM; |
16112 | |
16113 | if (parts.base) |
16114 | regno1 = true_regnum (parts.base); |
16115 | if (parts.index) |
16116 | regno2 = true_regnum (parts.index); |
16117 | |
16118 | /* Use add for a = a + b and a = b + a since it is faster and shorter |
16119 | than lea for most processors. For the processors like BONNELL, if |
16120 | the destination register of LEA holds an actual address which will |
16121 | be used soon, LEA is better and otherwise ADD is better. */ |
16122 | if (!TARGET_CPU_P (BONNELL) |
16123 | && parts.scale == 1 |
16124 | && (!parts.disp || parts.disp == const0_rtx) |
16125 | && (regno0 == regno1 || regno0 == regno2)) |
16126 | return true; |
16127 | |
16128 | /* Split with -Oz if the encoding requires fewer bytes. */ |
16129 | if (optimize_size > 1 |
16130 | && parts.scale > 1 |
16131 | && !parts.base |
16132 | && (!parts.disp || parts.disp == const0_rtx)) |
16133 | return true; |
16134 | |
16135 | /* Check we need to optimize. */ |
16136 | if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) |
16137 | return false; |
16138 | |
16139 | split_cost = 0; |
16140 | |
16141 | /* Compute how many cycles we will add to execution time |
16142 | if split lea into a sequence of instructions. */ |
16143 | if (parts.base || parts.index) |
16144 | { |
16145 | /* Have to use mov instruction if non desctructive |
16146 | destination form is used. */ |
16147 | if (regno1 != regno0 && regno2 != regno0) |
16148 | split_cost += 1; |
16149 | |
16150 | /* Have to add index to base if both exist. */ |
16151 | if (parts.base && parts.index) |
16152 | split_cost += 1; |
16153 | |
16154 | /* Have to use shift and adds if scale is 2 or greater. */ |
16155 | if (parts.scale > 1) |
16156 | { |
16157 | if (regno0 != regno1) |
16158 | split_cost += 1; |
16159 | else if (regno2 == regno0) |
16160 | split_cost += 4; |
16161 | else |
16162 | split_cost += parts.scale; |
16163 | } |
16164 | |
16165 | /* Have to use add instruction with immediate if |
16166 | disp is non zero. */ |
16167 | if (parts.disp && parts.disp != const0_rtx) |
16168 | split_cost += 1; |
16169 | |
16170 | /* Subtract the price of lea. */ |
16171 | split_cost -= 1; |
16172 | } |
16173 | |
16174 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, |
16175 | has_scale: parts.scale > 1); |
16176 | } |
16177 | |
16178 | /* Return true if it is ok to optimize an ADD operation to LEA |
16179 | operation to avoid flag register consumation. For most processors, |
16180 | ADD is faster than LEA. For the processors like BONNELL, if the |
16181 | destination register of LEA holds an actual address which will be |
16182 | used soon, LEA is better and otherwise ADD is better. */ |
16183 | |
16184 | bool |
16185 | ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) |
16186 | { |
16187 | unsigned int regno0 = true_regnum (operands[0]); |
16188 | unsigned int regno1 = true_regnum (operands[1]); |
16189 | unsigned int regno2 = true_regnum (operands[2]); |
16190 | |
16191 | /* If a = b + c, (a!=b && a!=c), must use lea form. */ |
16192 | if (regno0 != regno1 && regno0 != regno2) |
16193 | return true; |
16194 | |
16195 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16196 | return false; |
16197 | |
16198 | return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false); |
16199 | } |
16200 | |
16201 | /* Return true if destination reg of SET_BODY is shift count of |
16202 | USE_BODY. */ |
16203 | |
16204 | static bool |
16205 | ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) |
16206 | { |
16207 | rtx set_dest; |
16208 | rtx shift_rtx; |
16209 | int i; |
16210 | |
16211 | /* Retrieve destination of SET_BODY. */ |
16212 | switch (GET_CODE (set_body)) |
16213 | { |
16214 | case SET: |
16215 | set_dest = SET_DEST (set_body); |
16216 | if (!set_dest || !REG_P (set_dest)) |
16217 | return false; |
16218 | break; |
16219 | case PARALLEL: |
16220 | for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) |
16221 | if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), |
16222 | use_body)) |
16223 | return true; |
16224 | /* FALLTHROUGH */ |
16225 | default: |
16226 | return false; |
16227 | } |
16228 | |
16229 | /* Retrieve shift count of USE_BODY. */ |
16230 | switch (GET_CODE (use_body)) |
16231 | { |
16232 | case SET: |
16233 | shift_rtx = XEXP (use_body, 1); |
16234 | break; |
16235 | case PARALLEL: |
16236 | for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) |
16237 | if (ix86_dep_by_shift_count_body (set_body, |
16238 | XVECEXP (use_body, 0, i))) |
16239 | return true; |
16240 | /* FALLTHROUGH */ |
16241 | default: |
16242 | return false; |
16243 | } |
16244 | |
16245 | if (shift_rtx |
16246 | && (GET_CODE (shift_rtx) == ASHIFT |
16247 | || GET_CODE (shift_rtx) == LSHIFTRT |
16248 | || GET_CODE (shift_rtx) == ASHIFTRT |
16249 | || GET_CODE (shift_rtx) == ROTATE |
16250 | || GET_CODE (shift_rtx) == ROTATERT)) |
16251 | { |
16252 | rtx shift_count = XEXP (shift_rtx, 1); |
16253 | |
16254 | /* Return true if shift count is dest of SET_BODY. */ |
16255 | if (REG_P (shift_count)) |
16256 | { |
16257 | /* Add check since it can be invoked before register |
16258 | allocation in pre-reload schedule. */ |
16259 | if (reload_completed |
16260 | && true_regnum (set_dest) == true_regnum (shift_count)) |
16261 | return true; |
16262 | else if (REGNO(set_dest) == REGNO(shift_count)) |
16263 | return true; |
16264 | } |
16265 | } |
16266 | |
16267 | return false; |
16268 | } |
16269 | |
16270 | /* Return true if destination reg of SET_INSN is shift count of |
16271 | USE_INSN. */ |
16272 | |
16273 | bool |
16274 | ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) |
16275 | { |
16276 | return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn), |
16277 | use_body: PATTERN (insn: use_insn)); |
16278 | } |
16279 | |
16280 | /* Return TRUE if the operands to a vec_interleave_{high,low}v2df |
16281 | are ok, keeping in mind the possible movddup alternative. */ |
16282 | |
16283 | bool |
16284 | ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) |
16285 | { |
16286 | if (MEM_P (operands[0])) |
16287 | return rtx_equal_p (operands[0], operands[1 + high]); |
16288 | if (MEM_P (operands[1]) && MEM_P (operands[2])) |
16289 | return false; |
16290 | return true; |
16291 | } |
16292 | |
16293 | /* A subroutine of ix86_build_signbit_mask. If VECT is true, |
16294 | then replicate the value for all elements of the vector |
16295 | register. */ |
16296 | |
16297 | rtx |
16298 | ix86_build_const_vector (machine_mode mode, bool vect, rtx value) |
16299 | { |
16300 | int i, n_elt; |
16301 | rtvec v; |
16302 | machine_mode scalar_mode; |
16303 | |
16304 | switch (mode) |
16305 | { |
16306 | case E_V64QImode: |
16307 | case E_V32QImode: |
16308 | case E_V16QImode: |
16309 | case E_V32HImode: |
16310 | case E_V16HImode: |
16311 | case E_V8HImode: |
16312 | case E_V16SImode: |
16313 | case E_V8SImode: |
16314 | case E_V4SImode: |
16315 | case E_V2SImode: |
16316 | case E_V8DImode: |
16317 | case E_V4DImode: |
16318 | case E_V2DImode: |
16319 | gcc_assert (vect); |
16320 | /* FALLTHRU */ |
16321 | case E_V2HFmode: |
16322 | case E_V4HFmode: |
16323 | case E_V8HFmode: |
16324 | case E_V16HFmode: |
16325 | case E_V32HFmode: |
16326 | case E_V16SFmode: |
16327 | case E_V8SFmode: |
16328 | case E_V4SFmode: |
16329 | case E_V2SFmode: |
16330 | case E_V8DFmode: |
16331 | case E_V4DFmode: |
16332 | case E_V2DFmode: |
16333 | n_elt = GET_MODE_NUNITS (mode); |
16334 | v = rtvec_alloc (n_elt); |
16335 | scalar_mode = GET_MODE_INNER (mode); |
16336 | |
16337 | RTVEC_ELT (v, 0) = value; |
16338 | |
16339 | for (i = 1; i < n_elt; ++i) |
16340 | RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); |
16341 | |
16342 | return gen_rtx_CONST_VECTOR (mode, v); |
16343 | |
16344 | default: |
16345 | gcc_unreachable (); |
16346 | } |
16347 | } |
16348 | |
16349 | /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders |
16350 | and ix86_expand_int_vcond. Create a mask for the sign bit in MODE |
16351 | for an SSE register. If VECT is true, then replicate the mask for |
16352 | all elements of the vector register. If INVERT is true, then create |
16353 | a mask excluding the sign bit. */ |
16354 | |
16355 | rtx |
16356 | ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) |
16357 | { |
16358 | machine_mode vec_mode, imode; |
16359 | wide_int w; |
16360 | rtx mask, v; |
16361 | |
16362 | switch (mode) |
16363 | { |
16364 | case E_V2HFmode: |
16365 | case E_V4HFmode: |
16366 | case E_V8HFmode: |
16367 | case E_V16HFmode: |
16368 | case E_V32HFmode: |
16369 | vec_mode = mode; |
16370 | imode = HImode; |
16371 | break; |
16372 | |
16373 | case E_V16SImode: |
16374 | case E_V16SFmode: |
16375 | case E_V8SImode: |
16376 | case E_V4SImode: |
16377 | case E_V8SFmode: |
16378 | case E_V4SFmode: |
16379 | case E_V2SFmode: |
16380 | case E_V2SImode: |
16381 | vec_mode = mode; |
16382 | imode = SImode; |
16383 | break; |
16384 | |
16385 | case E_V8DImode: |
16386 | case E_V4DImode: |
16387 | case E_V2DImode: |
16388 | case E_V8DFmode: |
16389 | case E_V4DFmode: |
16390 | case E_V2DFmode: |
16391 | vec_mode = mode; |
16392 | imode = DImode; |
16393 | break; |
16394 | |
16395 | case E_TImode: |
16396 | case E_TFmode: |
16397 | vec_mode = VOIDmode; |
16398 | imode = TImode; |
16399 | break; |
16400 | |
16401 | default: |
16402 | gcc_unreachable (); |
16403 | } |
16404 | |
16405 | machine_mode inner_mode = GET_MODE_INNER (mode); |
16406 | w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, |
16407 | GET_MODE_BITSIZE (inner_mode)); |
16408 | if (invert) |
16409 | w = wi::bit_not (x: w); |
16410 | |
16411 | /* Force this value into the low part of a fp vector constant. */ |
16412 | mask = immed_wide_int_const (w, imode); |
16413 | mask = gen_lowpart (inner_mode, mask); |
16414 | |
16415 | if (vec_mode == VOIDmode) |
16416 | return force_reg (inner_mode, mask); |
16417 | |
16418 | v = ix86_build_const_vector (mode: vec_mode, vect, value: mask); |
16419 | return force_reg (vec_mode, v); |
16420 | } |
16421 | |
16422 | /* Return HOST_WIDE_INT for const vector OP in MODE. */ |
16423 | |
16424 | HOST_WIDE_INT |
16425 | ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) |
16426 | { |
16427 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
16428 | gcc_unreachable (); |
16429 | |
16430 | int nunits = GET_MODE_NUNITS (mode); |
16431 | wide_int val = wi::zero (GET_MODE_BITSIZE (mode)); |
16432 | machine_mode innermode = GET_MODE_INNER (mode); |
16433 | unsigned int innermode_bits = GET_MODE_BITSIZE (innermode); |
16434 | |
16435 | switch (mode) |
16436 | { |
16437 | case E_V2QImode: |
16438 | case E_V4QImode: |
16439 | case E_V2HImode: |
16440 | case E_V8QImode: |
16441 | case E_V4HImode: |
16442 | case E_V2SImode: |
16443 | for (int i = 0; i < nunits; ++i) |
16444 | { |
16445 | int v = INTVAL (XVECEXP (op, 0, i)); |
16446 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16447 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16448 | } |
16449 | break; |
16450 | case E_V2HFmode: |
16451 | case E_V2BFmode: |
16452 | case E_V4HFmode: |
16453 | case E_V4BFmode: |
16454 | case E_V2SFmode: |
16455 | for (int i = 0; i < nunits; ++i) |
16456 | { |
16457 | rtx x = XVECEXP (op, 0, i); |
16458 | int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
16459 | REAL_MODE_FORMAT (innermode)); |
16460 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16461 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16462 | } |
16463 | break; |
16464 | default: |
16465 | gcc_unreachable (); |
16466 | } |
16467 | |
16468 | return val.to_shwi (); |
16469 | } |
16470 | |
16471 | /* Return TRUE or FALSE depending on whether the first SET in INSN |
16472 | has source and destination with matching CC modes, and that the |
16473 | CC mode is at least as constrained as REQ_MODE. */ |
16474 | |
16475 | bool |
16476 | ix86_match_ccmode (rtx insn, machine_mode req_mode) |
16477 | { |
16478 | rtx set; |
16479 | machine_mode set_mode; |
16480 | |
16481 | set = PATTERN (insn); |
16482 | if (GET_CODE (set) == PARALLEL) |
16483 | set = XVECEXP (set, 0, 0); |
16484 | gcc_assert (GET_CODE (set) == SET); |
16485 | gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); |
16486 | |
16487 | set_mode = GET_MODE (SET_DEST (set)); |
16488 | switch (set_mode) |
16489 | { |
16490 | case E_CCNOmode: |
16491 | if (req_mode != CCNOmode |
16492 | && (req_mode != CCmode |
16493 | || XEXP (SET_SRC (set), 1) != const0_rtx)) |
16494 | return false; |
16495 | break; |
16496 | case E_CCmode: |
16497 | if (req_mode == CCGCmode) |
16498 | return false; |
16499 | /* FALLTHRU */ |
16500 | case E_CCGCmode: |
16501 | if (req_mode == CCGOCmode || req_mode == CCNOmode) |
16502 | return false; |
16503 | /* FALLTHRU */ |
16504 | case E_CCGOCmode: |
16505 | if (req_mode == CCZmode) |
16506 | return false; |
16507 | /* FALLTHRU */ |
16508 | case E_CCZmode: |
16509 | break; |
16510 | |
16511 | case E_CCGZmode: |
16512 | |
16513 | case E_CCAmode: |
16514 | case E_CCCmode: |
16515 | case E_CCOmode: |
16516 | case E_CCPmode: |
16517 | case E_CCSmode: |
16518 | if (set_mode != req_mode) |
16519 | return false; |
16520 | break; |
16521 | |
16522 | default: |
16523 | gcc_unreachable (); |
16524 | } |
16525 | |
16526 | return GET_MODE (SET_SRC (set)) == set_mode; |
16527 | } |
16528 | |
16529 | machine_mode |
16530 | ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) |
16531 | { |
16532 | machine_mode mode = GET_MODE (op0); |
16533 | |
16534 | if (SCALAR_FLOAT_MODE_P (mode)) |
16535 | { |
16536 | gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); |
16537 | return CCFPmode; |
16538 | } |
16539 | |
16540 | switch (code) |
16541 | { |
16542 | /* Only zero flag is needed. */ |
16543 | case EQ: /* ZF=0 */ |
16544 | case NE: /* ZF!=0 */ |
16545 | return CCZmode; |
16546 | /* Codes needing carry flag. */ |
16547 | case GEU: /* CF=0 */ |
16548 | case LTU: /* CF=1 */ |
16549 | rtx geu; |
16550 | /* Detect overflow checks. They need just the carry flag. */ |
16551 | if (GET_CODE (op0) == PLUS |
16552 | && (rtx_equal_p (op1, XEXP (op0, 0)) |
16553 | || rtx_equal_p (op1, XEXP (op0, 1)))) |
16554 | return CCCmode; |
16555 | /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns. |
16556 | Match LTU of op0 |
16557 | (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
16558 | and op1 |
16559 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) |
16560 | where CC_CCC is either CC or CCC. */ |
16561 | else if (code == LTU |
16562 | && GET_CODE (op0) == NEG |
16563 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
16564 | && REG_P (XEXP (geu, 0)) |
16565 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
16566 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
16567 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
16568 | && XEXP (geu, 1) == const0_rtx |
16569 | && GET_CODE (op1) == LTU |
16570 | && REG_P (XEXP (op1, 0)) |
16571 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
16572 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
16573 | && XEXP (op1, 1) == const0_rtx) |
16574 | return CCCmode; |
16575 | /* Similarly for *x86_cmc pattern. |
16576 | Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
16577 | and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)). |
16578 | It is sufficient to test that the operand modes are CCCmode. */ |
16579 | else if (code == LTU |
16580 | && GET_CODE (op0) == NEG |
16581 | && GET_CODE (XEXP (op0, 0)) == LTU |
16582 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
16583 | && GET_CODE (op1) == GEU |
16584 | && GET_MODE (XEXP (op1, 0)) == CCCmode) |
16585 | return CCCmode; |
16586 | else |
16587 | return CCmode; |
16588 | case GTU: /* CF=0 & ZF=0 */ |
16589 | case LEU: /* CF=1 | ZF=1 */ |
16590 | return CCmode; |
16591 | /* Codes possibly doable only with sign flag when |
16592 | comparing against zero. */ |
16593 | case GE: /* SF=OF or SF=0 */ |
16594 | case LT: /* SF<>OF or SF=1 */ |
16595 | if (op1 == const0_rtx) |
16596 | return CCGOCmode; |
16597 | else |
16598 | /* For other cases Carry flag is not required. */ |
16599 | return CCGCmode; |
16600 | /* Codes doable only with sign flag when comparing |
16601 | against zero, but we miss jump instruction for it |
16602 | so we need to use relational tests against overflow |
16603 | that thus needs to be zero. */ |
16604 | case GT: /* ZF=0 & SF=OF */ |
16605 | case LE: /* ZF=1 | SF<>OF */ |
16606 | if (op1 == const0_rtx) |
16607 | return CCNOmode; |
16608 | else |
16609 | return CCGCmode; |
16610 | default: |
16611 | /* CCmode should be used in all other cases. */ |
16612 | return CCmode; |
16613 | } |
16614 | } |
16615 | |
16616 | /* Return TRUE or FALSE depending on whether the ptest instruction |
16617 | INSN has source and destination with suitable matching CC modes. */ |
16618 | |
16619 | bool |
16620 | ix86_match_ptest_ccmode (rtx insn) |
16621 | { |
16622 | rtx set, src; |
16623 | machine_mode set_mode; |
16624 | |
16625 | set = PATTERN (insn); |
16626 | gcc_assert (GET_CODE (set) == SET); |
16627 | src = SET_SRC (set); |
16628 | gcc_assert (GET_CODE (src) == UNSPEC |
16629 | && XINT (src, 1) == UNSPEC_PTEST); |
16630 | |
16631 | set_mode = GET_MODE (src); |
16632 | if (set_mode != CCZmode |
16633 | && set_mode != CCCmode |
16634 | && set_mode != CCmode) |
16635 | return false; |
16636 | return GET_MODE (SET_DEST (set)) == set_mode; |
16637 | } |
16638 | |
16639 | /* Return the fixed registers used for condition codes. */ |
16640 | |
16641 | static bool |
16642 | ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) |
16643 | { |
16644 | *p1 = FLAGS_REG; |
16645 | *p2 = INVALID_REGNUM; |
16646 | return true; |
16647 | } |
16648 | |
16649 | /* If two condition code modes are compatible, return a condition code |
16650 | mode which is compatible with both. Otherwise, return |
16651 | VOIDmode. */ |
16652 | |
16653 | static machine_mode |
16654 | ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) |
16655 | { |
16656 | if (m1 == m2) |
16657 | return m1; |
16658 | |
16659 | if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) |
16660 | return VOIDmode; |
16661 | |
16662 | if ((m1 == CCGCmode && m2 == CCGOCmode) |
16663 | || (m1 == CCGOCmode && m2 == CCGCmode)) |
16664 | return CCGCmode; |
16665 | |
16666 | if ((m1 == CCNOmode && m2 == CCGOCmode) |
16667 | || (m1 == CCGOCmode && m2 == CCNOmode)) |
16668 | return CCNOmode; |
16669 | |
16670 | if (m1 == CCZmode |
16671 | && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) |
16672 | return m2; |
16673 | else if (m2 == CCZmode |
16674 | && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) |
16675 | return m1; |
16676 | |
16677 | switch (m1) |
16678 | { |
16679 | default: |
16680 | gcc_unreachable (); |
16681 | |
16682 | case E_CCmode: |
16683 | case E_CCGCmode: |
16684 | case E_CCGOCmode: |
16685 | case E_CCNOmode: |
16686 | case E_CCAmode: |
16687 | case E_CCCmode: |
16688 | case E_CCOmode: |
16689 | case E_CCPmode: |
16690 | case E_CCSmode: |
16691 | case E_CCZmode: |
16692 | switch (m2) |
16693 | { |
16694 | default: |
16695 | return VOIDmode; |
16696 | |
16697 | case E_CCmode: |
16698 | case E_CCGCmode: |
16699 | case E_CCGOCmode: |
16700 | case E_CCNOmode: |
16701 | case E_CCAmode: |
16702 | case E_CCCmode: |
16703 | case E_CCOmode: |
16704 | case E_CCPmode: |
16705 | case E_CCSmode: |
16706 | case E_CCZmode: |
16707 | return CCmode; |
16708 | } |
16709 | |
16710 | case E_CCFPmode: |
16711 | /* These are only compatible with themselves, which we already |
16712 | checked above. */ |
16713 | return VOIDmode; |
16714 | } |
16715 | } |
16716 | |
16717 | /* Return strategy to use for floating-point. We assume that fcomi is always |
16718 | preferrable where available, since that is also true when looking at size |
16719 | (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ |
16720 | |
16721 | enum ix86_fpcmp_strategy |
16722 | ix86_fp_comparison_strategy (enum rtx_code) |
16723 | { |
16724 | /* Do fcomi/sahf based test when profitable. */ |
16725 | |
16726 | if (TARGET_CMOVE) |
16727 | return IX86_FPCMP_COMI; |
16728 | |
16729 | if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) |
16730 | return IX86_FPCMP_SAHF; |
16731 | |
16732 | return IX86_FPCMP_ARITH; |
16733 | } |
16734 | |
16735 | /* Convert comparison codes we use to represent FP comparison to integer |
16736 | code that will result in proper branch. Return UNKNOWN if no such code |
16737 | is available. */ |
16738 | |
16739 | enum rtx_code |
16740 | ix86_fp_compare_code_to_integer (enum rtx_code code) |
16741 | { |
16742 | switch (code) |
16743 | { |
16744 | case GT: |
16745 | return GTU; |
16746 | case GE: |
16747 | return GEU; |
16748 | case ORDERED: |
16749 | case UNORDERED: |
16750 | return code; |
16751 | case UNEQ: |
16752 | return EQ; |
16753 | case UNLT: |
16754 | return LTU; |
16755 | case UNLE: |
16756 | return LEU; |
16757 | case LTGT: |
16758 | return NE; |
16759 | default: |
16760 | return UNKNOWN; |
16761 | } |
16762 | } |
16763 | |
16764 | /* Zero extend possibly SImode EXP to Pmode register. */ |
16765 | rtx |
16766 | ix86_zero_extend_to_Pmode (rtx exp) |
16767 | { |
16768 | return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); |
16769 | } |
16770 | |
16771 | /* Return true if the function is called via PLT. */ |
16772 | |
16773 | bool |
16774 | ix86_call_use_plt_p (rtx call_op) |
16775 | { |
16776 | if (SYMBOL_REF_LOCAL_P (call_op)) |
16777 | { |
16778 | if (SYMBOL_REF_DECL (call_op) |
16779 | && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL) |
16780 | { |
16781 | /* NB: All ifunc functions must be called via PLT. */ |
16782 | cgraph_node *node |
16783 | = cgraph_node::get (SYMBOL_REF_DECL (call_op)); |
16784 | if (node && node->ifunc_resolver) |
16785 | return true; |
16786 | } |
16787 | return false; |
16788 | } |
16789 | return true; |
16790 | } |
16791 | |
16792 | /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true, |
16793 | the PLT entry will be used as the function address for local IFUNC |
16794 | functions. When the PIC register is needed for PLT call, indirect |
16795 | call via the PLT entry will fail since the PIC register may not be |
16796 | set up properly for indirect call. In this case, we should return |
16797 | false. */ |
16798 | |
16799 | static bool |
16800 | ix86_ifunc_ref_local_ok (void) |
16801 | { |
16802 | return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC); |
16803 | } |
16804 | |
16805 | /* Return true if the function being called was marked with attribute |
16806 | "noplt" or using -fno-plt and we are compiling for non-PIC. We need |
16807 | to handle the non-PIC case in the backend because there is no easy |
16808 | interface for the front-end to force non-PLT calls to use the GOT. |
16809 | This is currently used only with 64-bit or 32-bit GOT32X ELF targets |
16810 | to call the function marked "noplt" indirectly. */ |
16811 | |
16812 | static bool |
16813 | ix86_nopic_noplt_attribute_p (rtx call_op) |
16814 | { |
16815 | if (flag_pic || ix86_cmodel == CM_LARGE |
16816 | || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) |
16817 | || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF |
16818 | || SYMBOL_REF_LOCAL_P (call_op)) |
16819 | return false; |
16820 | |
16821 | tree symbol_decl = SYMBOL_REF_DECL (call_op); |
16822 | |
16823 | if (!flag_plt |
16824 | || (symbol_decl != NULL_TREE |
16825 | && lookup_attribute (attr_name: "noplt" , DECL_ATTRIBUTES (symbol_decl)))) |
16826 | return true; |
16827 | |
16828 | return false; |
16829 | } |
16830 | |
16831 | /* Helper to output the jmp/call. */ |
16832 | static void |
16833 | ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) |
16834 | { |
16835 | if (thunk_name != NULL) |
16836 | { |
16837 | if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
16838 | && ix86_indirect_branch_cs_prefix) |
16839 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
16840 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
16841 | assemble_name (asm_out_file, thunk_name); |
16842 | putc (c: '\n', stream: asm_out_file); |
16843 | if ((ix86_harden_sls & harden_sls_indirect_jmp)) |
16844 | fputs (s: "\tint3\n" , stream: asm_out_file); |
16845 | } |
16846 | else |
16847 | output_indirect_thunk (regno); |
16848 | } |
16849 | |
16850 | /* Output indirect branch via a call and return thunk. CALL_OP is a |
16851 | register which contains the branch target. XASM is the assembly |
16852 | template for CALL_OP. Branch is a tail call if SIBCALL_P is true. |
16853 | A normal call is converted to: |
16854 | |
16855 | call __x86_indirect_thunk_reg |
16856 | |
16857 | and a tail call is converted to: |
16858 | |
16859 | jmp __x86_indirect_thunk_reg |
16860 | */ |
16861 | |
16862 | static void |
16863 | ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) |
16864 | { |
16865 | char thunk_name_buf[32]; |
16866 | char *thunk_name; |
16867 | enum indirect_thunk_prefix need_prefix |
16868 | = indirect_thunk_need_prefix (insn: current_output_insn); |
16869 | int regno = REGNO (call_op); |
16870 | |
16871 | if (cfun->machine->indirect_branch_type |
16872 | != indirect_branch_thunk_inline) |
16873 | { |
16874 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
16875 | SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno); |
16876 | |
16877 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
16878 | thunk_name = thunk_name_buf; |
16879 | } |
16880 | else |
16881 | thunk_name = NULL; |
16882 | |
16883 | if (sibcall_p) |
16884 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16885 | else |
16886 | { |
16887 | if (thunk_name != NULL) |
16888 | { |
16889 | if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno)) |
16890 | && ix86_indirect_branch_cs_prefix) |
16891 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
16892 | fprintf (stream: asm_out_file, format: "\tcall\t" ); |
16893 | assemble_name (asm_out_file, thunk_name); |
16894 | putc (c: '\n', stream: asm_out_file); |
16895 | return; |
16896 | } |
16897 | |
16898 | char indirectlabel1[32]; |
16899 | char indirectlabel2[32]; |
16900 | |
16901 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
16902 | INDIRECT_LABEL, |
16903 | indirectlabelno++); |
16904 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
16905 | INDIRECT_LABEL, |
16906 | indirectlabelno++); |
16907 | |
16908 | /* Jump. */ |
16909 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
16910 | assemble_name_raw (asm_out_file, indirectlabel2); |
16911 | fputc (c: '\n', stream: asm_out_file); |
16912 | |
16913 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
16914 | |
16915 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16916 | |
16917 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
16918 | |
16919 | /* Call. */ |
16920 | fputs (s: "\tcall\t" , stream: asm_out_file); |
16921 | assemble_name_raw (asm_out_file, indirectlabel1); |
16922 | fputc (c: '\n', stream: asm_out_file); |
16923 | } |
16924 | } |
16925 | |
16926 | /* Output indirect branch via a call and return thunk. CALL_OP is |
16927 | the branch target. XASM is the assembly template for CALL_OP. |
16928 | Branch is a tail call if SIBCALL_P is true. A normal call is |
16929 | converted to: |
16930 | |
16931 | jmp L2 |
16932 | L1: |
16933 | push CALL_OP |
16934 | jmp __x86_indirect_thunk |
16935 | L2: |
16936 | call L1 |
16937 | |
16938 | and a tail call is converted to: |
16939 | |
16940 | push CALL_OP |
16941 | jmp __x86_indirect_thunk |
16942 | */ |
16943 | |
16944 | static void |
16945 | ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, |
16946 | bool sibcall_p) |
16947 | { |
16948 | char thunk_name_buf[32]; |
16949 | char *thunk_name; |
16950 | char push_buf[64]; |
16951 | enum indirect_thunk_prefix need_prefix |
16952 | = indirect_thunk_need_prefix (insn: current_output_insn); |
16953 | int regno = -1; |
16954 | |
16955 | if (cfun->machine->indirect_branch_type |
16956 | != indirect_branch_thunk_inline) |
16957 | { |
16958 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
16959 | indirect_thunk_needed = true; |
16960 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
16961 | thunk_name = thunk_name_buf; |
16962 | } |
16963 | else |
16964 | thunk_name = NULL; |
16965 | |
16966 | snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s" , |
16967 | TARGET_64BIT ? 'q' : 'l', xasm); |
16968 | |
16969 | if (sibcall_p) |
16970 | { |
16971 | output_asm_insn (push_buf, &call_op); |
16972 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16973 | } |
16974 | else |
16975 | { |
16976 | char indirectlabel1[32]; |
16977 | char indirectlabel2[32]; |
16978 | |
16979 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
16980 | INDIRECT_LABEL, |
16981 | indirectlabelno++); |
16982 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
16983 | INDIRECT_LABEL, |
16984 | indirectlabelno++); |
16985 | |
16986 | /* Jump. */ |
16987 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
16988 | assemble_name_raw (asm_out_file, indirectlabel2); |
16989 | fputc (c: '\n', stream: asm_out_file); |
16990 | |
16991 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
16992 | |
16993 | /* An external function may be called via GOT, instead of PLT. */ |
16994 | if (MEM_P (call_op)) |
16995 | { |
16996 | struct ix86_address parts; |
16997 | rtx addr = XEXP (call_op, 0); |
16998 | if (ix86_decompose_address (addr, out: &parts) |
16999 | && parts.base == stack_pointer_rtx) |
17000 | { |
17001 | /* Since call will adjust stack by -UNITS_PER_WORD, |
17002 | we must convert "disp(stack, index, scale)" to |
17003 | "disp+UNITS_PER_WORD(stack, index, scale)". */ |
17004 | if (parts.index) |
17005 | { |
17006 | addr = gen_rtx_MULT (Pmode, parts.index, |
17007 | GEN_INT (parts.scale)); |
17008 | addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
17009 | addr); |
17010 | } |
17011 | else |
17012 | addr = stack_pointer_rtx; |
17013 | |
17014 | rtx disp; |
17015 | if (parts.disp != NULL_RTX) |
17016 | disp = plus_constant (Pmode, parts.disp, |
17017 | UNITS_PER_WORD); |
17018 | else |
17019 | disp = GEN_INT (UNITS_PER_WORD); |
17020 | |
17021 | addr = gen_rtx_PLUS (Pmode, addr, disp); |
17022 | call_op = gen_rtx_MEM (GET_MODE (call_op), addr); |
17023 | } |
17024 | } |
17025 | |
17026 | output_asm_insn (push_buf, &call_op); |
17027 | |
17028 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
17029 | |
17030 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
17031 | |
17032 | /* Call. */ |
17033 | fputs (s: "\tcall\t" , stream: asm_out_file); |
17034 | assemble_name_raw (asm_out_file, indirectlabel1); |
17035 | fputc (c: '\n', stream: asm_out_file); |
17036 | } |
17037 | } |
17038 | |
17039 | /* Output indirect branch via a call and return thunk. CALL_OP is |
17040 | the branch target. XASM is the assembly template for CALL_OP. |
17041 | Branch is a tail call if SIBCALL_P is true. */ |
17042 | |
17043 | static void |
17044 | ix86_output_indirect_branch (rtx call_op, const char *xasm, |
17045 | bool sibcall_p) |
17046 | { |
17047 | if (REG_P (call_op)) |
17048 | ix86_output_indirect_branch_via_reg (call_op, sibcall_p); |
17049 | else |
17050 | ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); |
17051 | } |
17052 | |
17053 | /* Output indirect jump. CALL_OP is the jump target. */ |
17054 | |
17055 | const char * |
17056 | ix86_output_indirect_jmp (rtx call_op) |
17057 | { |
17058 | if (cfun->machine->indirect_branch_type != indirect_branch_keep) |
17059 | { |
17060 | /* We can't have red-zone since "call" in the indirect thunk |
17061 | pushes the return address onto stack, destroying red-zone. */ |
17062 | if (ix86_red_zone_used) |
17063 | gcc_unreachable (); |
17064 | |
17065 | ix86_output_indirect_branch (call_op, xasm: "%0" , sibcall_p: true); |
17066 | } |
17067 | else |
17068 | output_asm_insn ("%!jmp\t%A0" , &call_op); |
17069 | return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "" ; |
17070 | } |
17071 | |
17072 | /* Output return instrumentation for current function if needed. */ |
17073 | |
17074 | static void |
17075 | output_return_instrumentation (void) |
17076 | { |
17077 | if (ix86_instrument_return != instrument_return_none |
17078 | && flag_fentry |
17079 | && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) |
17080 | { |
17081 | if (ix86_flag_record_return) |
17082 | fprintf (stream: asm_out_file, format: "1:\n" ); |
17083 | switch (ix86_instrument_return) |
17084 | { |
17085 | case instrument_return_call: |
17086 | fprintf (stream: asm_out_file, format: "\tcall\t__return__\n" ); |
17087 | break; |
17088 | case instrument_return_nop5: |
17089 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
17090 | fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" ); |
17091 | break; |
17092 | case instrument_return_none: |
17093 | break; |
17094 | } |
17095 | |
17096 | if (ix86_flag_record_return) |
17097 | { |
17098 | fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n" ); |
17099 | fprintf (stream: asm_out_file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
17100 | fprintf (stream: asm_out_file, format: "\t.previous\n" ); |
17101 | } |
17102 | } |
17103 | } |
17104 | |
17105 | /* Output function return. CALL_OP is the jump target. Add a REP |
17106 | prefix to RET if LONG_P is true and function return is kept. */ |
17107 | |
17108 | const char * |
17109 | ix86_output_function_return (bool long_p) |
17110 | { |
17111 | output_return_instrumentation (); |
17112 | |
17113 | if (cfun->machine->function_return_type != indirect_branch_keep) |
17114 | { |
17115 | char thunk_name[32]; |
17116 | enum indirect_thunk_prefix need_prefix |
17117 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17118 | |
17119 | if (cfun->machine->function_return_type |
17120 | != indirect_branch_thunk_inline) |
17121 | { |
17122 | bool need_thunk = (cfun->machine->function_return_type |
17123 | == indirect_branch_thunk); |
17124 | indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix, |
17125 | ret_p: true); |
17126 | indirect_return_needed |= need_thunk; |
17127 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
17128 | assemble_name (asm_out_file, thunk_name); |
17129 | putc (c: '\n', stream: asm_out_file); |
17130 | } |
17131 | else |
17132 | output_indirect_thunk (INVALID_REGNUM); |
17133 | |
17134 | return "" ; |
17135 | } |
17136 | |
17137 | output_asm_insn (long_p ? "rep%; ret" : "ret" , nullptr); |
17138 | return (ix86_harden_sls & harden_sls_return) ? "int3" : "" ; |
17139 | } |
17140 | |
17141 | /* Output indirect function return. RET_OP is the function return |
17142 | target. */ |
17143 | |
17144 | const char * |
17145 | ix86_output_indirect_function_return (rtx ret_op) |
17146 | { |
17147 | if (cfun->machine->function_return_type != indirect_branch_keep) |
17148 | { |
17149 | char thunk_name[32]; |
17150 | enum indirect_thunk_prefix need_prefix |
17151 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17152 | unsigned int regno = REGNO (ret_op); |
17153 | gcc_assert (regno == CX_REG); |
17154 | |
17155 | if (cfun->machine->function_return_type |
17156 | != indirect_branch_thunk_inline) |
17157 | { |
17158 | bool need_thunk = (cfun->machine->function_return_type |
17159 | == indirect_branch_thunk); |
17160 | indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true); |
17161 | |
17162 | if (need_thunk) |
17163 | { |
17164 | indirect_return_via_cx = true; |
17165 | SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG); |
17166 | } |
17167 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
17168 | assemble_name (asm_out_file, thunk_name); |
17169 | putc (c: '\n', stream: asm_out_file); |
17170 | } |
17171 | else |
17172 | output_indirect_thunk (regno); |
17173 | } |
17174 | else |
17175 | { |
17176 | output_asm_insn ("%!jmp\t%A0" , &ret_op); |
17177 | if (ix86_harden_sls & harden_sls_indirect_jmp) |
17178 | fputs (s: "\tint3\n" , stream: asm_out_file); |
17179 | } |
17180 | return "" ; |
17181 | } |
17182 | |
17183 | /* Output the assembly for a call instruction. */ |
17184 | |
17185 | const char * |
17186 | ix86_output_call_insn (rtx_insn *insn, rtx call_op) |
17187 | { |
17188 | bool direct_p = constant_call_address_operand (call_op, VOIDmode); |
17189 | bool output_indirect_p |
17190 | = (!TARGET_SEH |
17191 | && cfun->machine->indirect_branch_type != indirect_branch_keep); |
17192 | bool seh_nop_p = false; |
17193 | const char *xasm; |
17194 | |
17195 | if (SIBLING_CALL_P (insn)) |
17196 | { |
17197 | output_return_instrumentation (); |
17198 | if (direct_p) |
17199 | { |
17200 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17201 | { |
17202 | direct_p = false; |
17203 | if (TARGET_64BIT) |
17204 | { |
17205 | if (output_indirect_p) |
17206 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17207 | else |
17208 | xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17209 | } |
17210 | else |
17211 | { |
17212 | if (output_indirect_p) |
17213 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17214 | else |
17215 | xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17216 | } |
17217 | } |
17218 | else |
17219 | xasm = "%!jmp\t%P0" ; |
17220 | } |
17221 | /* SEH epilogue detection requires the indirect branch case |
17222 | to include REX.W. */ |
17223 | else if (TARGET_SEH) |
17224 | xasm = "%!rex.W jmp\t%A0" ; |
17225 | else |
17226 | { |
17227 | if (output_indirect_p) |
17228 | xasm = "%0" ; |
17229 | else |
17230 | xasm = "%!jmp\t%A0" ; |
17231 | } |
17232 | |
17233 | if (output_indirect_p && !direct_p) |
17234 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: true); |
17235 | else |
17236 | { |
17237 | output_asm_insn (xasm, &call_op); |
17238 | if (!direct_p |
17239 | && (ix86_harden_sls & harden_sls_indirect_jmp)) |
17240 | return "int3" ; |
17241 | } |
17242 | return "" ; |
17243 | } |
17244 | |
17245 | /* SEH unwinding can require an extra nop to be emitted in several |
17246 | circumstances. Determine if we have one of those. */ |
17247 | if (TARGET_SEH) |
17248 | { |
17249 | rtx_insn *i; |
17250 | |
17251 | for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i)) |
17252 | { |
17253 | /* Prevent a catch region from being adjacent to a jump that would |
17254 | be interpreted as an epilogue sequence by the unwinder. */ |
17255 | if (JUMP_P(i) && CROSSING_JUMP_P (i)) |
17256 | { |
17257 | seh_nop_p = true; |
17258 | break; |
17259 | } |
17260 | |
17261 | /* If we get to another real insn, we don't need the nop. */ |
17262 | if (INSN_P (i)) |
17263 | break; |
17264 | |
17265 | /* If we get to the epilogue note, prevent a catch region from |
17266 | being adjacent to the standard epilogue sequence. Note that, |
17267 | if non-call exceptions are enabled, we already did it during |
17268 | epilogue expansion, or else, if the insn can throw internally, |
17269 | we already did it during the reorg pass. */ |
17270 | if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG |
17271 | && !flag_non_call_exceptions |
17272 | && !can_throw_internal (insn)) |
17273 | { |
17274 | seh_nop_p = true; |
17275 | break; |
17276 | } |
17277 | } |
17278 | |
17279 | /* If we didn't find a real insn following the call, prevent the |
17280 | unwinder from looking into the next function. */ |
17281 | if (i == NULL) |
17282 | seh_nop_p = true; |
17283 | } |
17284 | |
17285 | if (direct_p) |
17286 | { |
17287 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17288 | { |
17289 | direct_p = false; |
17290 | if (TARGET_64BIT) |
17291 | { |
17292 | if (output_indirect_p) |
17293 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17294 | else |
17295 | xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17296 | } |
17297 | else |
17298 | { |
17299 | if (output_indirect_p) |
17300 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17301 | else |
17302 | xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17303 | } |
17304 | } |
17305 | else |
17306 | xasm = "%!call\t%P0" ; |
17307 | } |
17308 | else |
17309 | { |
17310 | if (output_indirect_p) |
17311 | xasm = "%0" ; |
17312 | else |
17313 | xasm = "%!call\t%A0" ; |
17314 | } |
17315 | |
17316 | if (output_indirect_p && !direct_p) |
17317 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: false); |
17318 | else |
17319 | output_asm_insn (xasm, &call_op); |
17320 | |
17321 | if (seh_nop_p) |
17322 | return "nop" ; |
17323 | |
17324 | return "" ; |
17325 | } |
17326 | |
17327 | /* Return a MEM corresponding to a stack slot with mode MODE. |
17328 | Allocate a new slot if necessary. |
17329 | |
17330 | The RTL for a function can have several slots available: N is |
17331 | which slot to use. */ |
17332 | |
17333 | rtx |
17334 | assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) |
17335 | { |
17336 | struct stack_local_entry *s; |
17337 | |
17338 | gcc_assert (n < MAX_386_STACK_LOCALS); |
17339 | |
17340 | for (s = ix86_stack_locals; s; s = s->next) |
17341 | if (s->mode == mode && s->n == n) |
17342 | return validize_mem (copy_rtx (s->rtl)); |
17343 | |
17344 | int align = 0; |
17345 | /* For DImode with SLOT_FLOATxFDI_387 use 32-bit |
17346 | alignment with -m32 -mpreferred-stack-boundary=2. */ |
17347 | if (mode == DImode |
17348 | && !TARGET_64BIT |
17349 | && n == SLOT_FLOATxFDI_387 |
17350 | && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode)) |
17351 | align = 32; |
17352 | s = ggc_alloc<stack_local_entry> (); |
17353 | s->n = n; |
17354 | s->mode = mode; |
17355 | s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align); |
17356 | |
17357 | s->next = ix86_stack_locals; |
17358 | ix86_stack_locals = s; |
17359 | return validize_mem (copy_rtx (s->rtl)); |
17360 | } |
17361 | |
17362 | static void |
17363 | ix86_instantiate_decls (void) |
17364 | { |
17365 | struct stack_local_entry *s; |
17366 | |
17367 | for (s = ix86_stack_locals; s; s = s->next) |
17368 | if (s->rtl != NULL_RTX) |
17369 | instantiate_decl_rtl (x: s->rtl); |
17370 | } |
17371 | |
17372 | /* Check whether x86 address PARTS is a pc-relative address. */ |
17373 | |
17374 | bool |
17375 | ix86_rip_relative_addr_p (struct ix86_address *parts) |
17376 | { |
17377 | rtx base, index, disp; |
17378 | |
17379 | base = parts->base; |
17380 | index = parts->index; |
17381 | disp = parts->disp; |
17382 | |
17383 | if (disp && !base && !index) |
17384 | { |
17385 | if (TARGET_64BIT) |
17386 | { |
17387 | rtx symbol = disp; |
17388 | |
17389 | if (GET_CODE (disp) == CONST) |
17390 | symbol = XEXP (disp, 0); |
17391 | if (GET_CODE (symbol) == PLUS |
17392 | && CONST_INT_P (XEXP (symbol, 1))) |
17393 | symbol = XEXP (symbol, 0); |
17394 | |
17395 | if (GET_CODE (symbol) == LABEL_REF |
17396 | || (GET_CODE (symbol) == SYMBOL_REF |
17397 | && SYMBOL_REF_TLS_MODEL (symbol) == 0) |
17398 | || (GET_CODE (symbol) == UNSPEC |
17399 | && (XINT (symbol, 1) == UNSPEC_GOTPCREL |
17400 | || XINT (symbol, 1) == UNSPEC_PCREL |
17401 | || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) |
17402 | return true; |
17403 | } |
17404 | } |
17405 | return false; |
17406 | } |
17407 | |
17408 | /* Calculate the length of the memory address in the instruction encoding. |
17409 | Includes addr32 prefix, does not include the one-byte modrm, opcode, |
17410 | or other prefixes. We never generate addr32 prefix for LEA insn. */ |
17411 | |
17412 | int |
17413 | memory_address_length (rtx addr, bool lea) |
17414 | { |
17415 | struct ix86_address parts; |
17416 | rtx base, index, disp; |
17417 | int len; |
17418 | int ok; |
17419 | |
17420 | if (GET_CODE (addr) == PRE_DEC |
17421 | || GET_CODE (addr) == POST_INC |
17422 | || GET_CODE (addr) == PRE_MODIFY |
17423 | || GET_CODE (addr) == POST_MODIFY) |
17424 | return 0; |
17425 | |
17426 | ok = ix86_decompose_address (addr, out: &parts); |
17427 | gcc_assert (ok); |
17428 | |
17429 | len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; |
17430 | |
17431 | /* If this is not LEA instruction, add the length of addr32 prefix. */ |
17432 | if (TARGET_64BIT && !lea |
17433 | && (SImode_address_operand (addr, VOIDmode) |
17434 | || (parts.base && GET_MODE (parts.base) == SImode) |
17435 | || (parts.index && GET_MODE (parts.index) == SImode))) |
17436 | len++; |
17437 | |
17438 | base = parts.base; |
17439 | index = parts.index; |
17440 | disp = parts.disp; |
17441 | |
17442 | if (base && SUBREG_P (base)) |
17443 | base = SUBREG_REG (base); |
17444 | if (index && SUBREG_P (index)) |
17445 | index = SUBREG_REG (index); |
17446 | |
17447 | gcc_assert (base == NULL_RTX || REG_P (base)); |
17448 | gcc_assert (index == NULL_RTX || REG_P (index)); |
17449 | |
17450 | /* Rule of thumb: |
17451 | - esp as the base always wants an index, |
17452 | - ebp as the base always wants a displacement, |
17453 | - r12 as the base always wants an index, |
17454 | - r13 as the base always wants a displacement. */ |
17455 | |
17456 | /* Register Indirect. */ |
17457 | if (base && !index && !disp) |
17458 | { |
17459 | /* esp (for its index) and ebp (for its displacement) need |
17460 | the two-byte modrm form. Similarly for r12 and r13 in 64-bit |
17461 | code. */ |
17462 | if (base == arg_pointer_rtx |
17463 | || base == frame_pointer_rtx |
17464 | || REGNO (base) == SP_REG |
17465 | || REGNO (base) == BP_REG |
17466 | || REGNO (base) == R12_REG |
17467 | || REGNO (base) == R13_REG) |
17468 | len++; |
17469 | } |
17470 | |
17471 | /* Direct Addressing. In 64-bit mode mod 00 r/m 5 |
17472 | is not disp32, but disp32(%rip), so for disp32 |
17473 | SIB byte is needed, unless print_operand_address |
17474 | optimizes it into disp32(%rip) or (%rip) is implied |
17475 | by UNSPEC. */ |
17476 | else if (disp && !base && !index) |
17477 | { |
17478 | len += 4; |
17479 | if (!ix86_rip_relative_addr_p (parts: &parts)) |
17480 | len++; |
17481 | } |
17482 | else |
17483 | { |
17484 | /* Find the length of the displacement constant. */ |
17485 | if (disp) |
17486 | { |
17487 | if (base && satisfies_constraint_K (op: disp)) |
17488 | len += 1; |
17489 | else |
17490 | len += 4; |
17491 | } |
17492 | /* ebp always wants a displacement. Similarly r13. */ |
17493 | else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) |
17494 | len++; |
17495 | |
17496 | /* An index requires the two-byte modrm form.... */ |
17497 | if (index |
17498 | /* ...like esp (or r12), which always wants an index. */ |
17499 | || base == arg_pointer_rtx |
17500 | || base == frame_pointer_rtx |
17501 | || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) |
17502 | len++; |
17503 | } |
17504 | |
17505 | return len; |
17506 | } |
17507 | |
17508 | /* Compute default value for "length_immediate" attribute. When SHORTFORM |
17509 | is set, expect that insn have 8bit immediate alternative. */ |
17510 | int |
17511 | ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) |
17512 | { |
17513 | int len = 0; |
17514 | int i; |
17515 | extract_insn_cached (insn); |
17516 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17517 | if (CONSTANT_P (recog_data.operand[i])) |
17518 | { |
17519 | enum attr_mode mode = get_attr_mode (insn); |
17520 | |
17521 | gcc_assert (!len); |
17522 | if (shortform && CONST_INT_P (recog_data.operand[i])) |
17523 | { |
17524 | HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); |
17525 | switch (mode) |
17526 | { |
17527 | case MODE_QI: |
17528 | len = 1; |
17529 | continue; |
17530 | case MODE_HI: |
17531 | ival = trunc_int_for_mode (ival, HImode); |
17532 | break; |
17533 | case MODE_SI: |
17534 | ival = trunc_int_for_mode (ival, SImode); |
17535 | break; |
17536 | default: |
17537 | break; |
17538 | } |
17539 | if (IN_RANGE (ival, -128, 127)) |
17540 | { |
17541 | len = 1; |
17542 | continue; |
17543 | } |
17544 | } |
17545 | switch (mode) |
17546 | { |
17547 | case MODE_QI: |
17548 | len = 1; |
17549 | break; |
17550 | case MODE_HI: |
17551 | len = 2; |
17552 | break; |
17553 | case MODE_SI: |
17554 | len = 4; |
17555 | break; |
17556 | /* Immediates for DImode instructions are encoded |
17557 | as 32bit sign extended values. */ |
17558 | case MODE_DI: |
17559 | len = 4; |
17560 | break; |
17561 | default: |
17562 | fatal_insn ("unknown insn mode" , insn); |
17563 | } |
17564 | } |
17565 | return len; |
17566 | } |
17567 | |
17568 | /* Compute default value for "length_address" attribute. */ |
17569 | int |
17570 | ix86_attr_length_address_default (rtx_insn *insn) |
17571 | { |
17572 | int i; |
17573 | |
17574 | if (get_attr_type (insn) == TYPE_LEA) |
17575 | { |
17576 | rtx set = PATTERN (insn), addr; |
17577 | |
17578 | if (GET_CODE (set) == PARALLEL) |
17579 | set = XVECEXP (set, 0, 0); |
17580 | |
17581 | gcc_assert (GET_CODE (set) == SET); |
17582 | |
17583 | addr = SET_SRC (set); |
17584 | |
17585 | return memory_address_length (addr, lea: true); |
17586 | } |
17587 | |
17588 | extract_insn_cached (insn); |
17589 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17590 | { |
17591 | rtx op = recog_data.operand[i]; |
17592 | if (MEM_P (op)) |
17593 | { |
17594 | constrain_operands_cached (insn, reload_completed); |
17595 | if (which_alternative != -1) |
17596 | { |
17597 | const char *constraints = recog_data.constraints[i]; |
17598 | int alt = which_alternative; |
17599 | |
17600 | while (*constraints == '=' || *constraints == '+') |
17601 | constraints++; |
17602 | while (alt-- > 0) |
17603 | while (*constraints++ != ',') |
17604 | ; |
17605 | /* Skip ignored operands. */ |
17606 | if (*constraints == 'X') |
17607 | continue; |
17608 | } |
17609 | |
17610 | int len = memory_address_length (XEXP (op, 0), lea: false); |
17611 | |
17612 | /* Account for segment prefix for non-default addr spaces. */ |
17613 | if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) |
17614 | len++; |
17615 | |
17616 | return len; |
17617 | } |
17618 | } |
17619 | return 0; |
17620 | } |
17621 | |
17622 | /* Compute default value for "length_vex" attribute. It includes |
17623 | 2 or 3 byte VEX prefix and 1 opcode byte. */ |
17624 | |
17625 | int |
17626 | ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, |
17627 | bool has_vex_w) |
17628 | { |
17629 | int i, reg_only = 2 + 1; |
17630 | bool has_mem = false; |
17631 | |
17632 | /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 |
17633 | byte VEX prefix. */ |
17634 | if (!has_0f_opcode || has_vex_w) |
17635 | return 3 + 1; |
17636 | |
17637 | /* We can always use 2 byte VEX prefix in 32bit. */ |
17638 | if (!TARGET_64BIT) |
17639 | return 2 + 1; |
17640 | |
17641 | extract_insn_cached (insn); |
17642 | |
17643 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17644 | if (REG_P (recog_data.operand[i])) |
17645 | { |
17646 | /* REX.W bit uses 3 byte VEX prefix. |
17647 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17648 | if (GET_MODE (recog_data.operand[i]) == DImode |
17649 | && GENERAL_REG_P (recog_data.operand[i])) |
17650 | return 3 + 1; |
17651 | |
17652 | /* REX.B bit requires 3-byte VEX. Right here we don't know which |
17653 | operand will be encoded using VEX.B, so be conservative. |
17654 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17655 | if (REX_INT_REGNO_P (recog_data.operand[i]) |
17656 | || REX2_INT_REGNO_P (recog_data.operand[i]) |
17657 | || REX_SSE_REGNO_P (recog_data.operand[i])) |
17658 | reg_only = 3 + 1; |
17659 | } |
17660 | else if (MEM_P (recog_data.operand[i])) |
17661 | { |
17662 | /* REX2.X or REX2.B bits use 3 byte VEX prefix. */ |
17663 | if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i])) |
17664 | return 4; |
17665 | |
17666 | /* REX.X or REX.B bits use 3 byte VEX prefix. */ |
17667 | if (x86_extended_reg_mentioned_p (recog_data.operand[i])) |
17668 | return 3 + 1; |
17669 | |
17670 | has_mem = true; |
17671 | } |
17672 | |
17673 | return has_mem ? 2 + 1 : reg_only; |
17674 | } |
17675 | |
17676 | |
17677 | static bool |
17678 | ix86_class_likely_spilled_p (reg_class_t); |
17679 | |
17680 | /* Returns true if lhs of insn is HW function argument register and set up |
17681 | is_spilled to true if it is likely spilled HW register. */ |
17682 | static bool |
17683 | insn_is_function_arg (rtx insn, bool* is_spilled) |
17684 | { |
17685 | rtx dst; |
17686 | |
17687 | if (!NONDEBUG_INSN_P (insn)) |
17688 | return false; |
17689 | /* Call instructions are not movable, ignore it. */ |
17690 | if (CALL_P (insn)) |
17691 | return false; |
17692 | insn = PATTERN (insn); |
17693 | if (GET_CODE (insn) == PARALLEL) |
17694 | insn = XVECEXP (insn, 0, 0); |
17695 | if (GET_CODE (insn) != SET) |
17696 | return false; |
17697 | dst = SET_DEST (insn); |
17698 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
17699 | && ix86_function_arg_regno_p (REGNO (dst))) |
17700 | { |
17701 | /* Is it likely spilled HW register? */ |
17702 | if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) |
17703 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) |
17704 | *is_spilled = true; |
17705 | return true; |
17706 | } |
17707 | return false; |
17708 | } |
17709 | |
17710 | /* Add output dependencies for chain of function adjacent arguments if only |
17711 | there is a move to likely spilled HW register. Return first argument |
17712 | if at least one dependence was added or NULL otherwise. */ |
17713 | static rtx_insn * |
17714 | add_parameter_dependencies (rtx_insn *call, rtx_insn *head) |
17715 | { |
17716 | rtx_insn *insn; |
17717 | rtx_insn *last = call; |
17718 | rtx_insn *first_arg = NULL; |
17719 | bool is_spilled = false; |
17720 | |
17721 | head = PREV_INSN (insn: head); |
17722 | |
17723 | /* Find nearest to call argument passing instruction. */ |
17724 | while (true) |
17725 | { |
17726 | last = PREV_INSN (insn: last); |
17727 | if (last == head) |
17728 | return NULL; |
17729 | if (!NONDEBUG_INSN_P (last)) |
17730 | continue; |
17731 | if (insn_is_function_arg (insn: last, is_spilled: &is_spilled)) |
17732 | break; |
17733 | return NULL; |
17734 | } |
17735 | |
17736 | first_arg = last; |
17737 | while (true) |
17738 | { |
17739 | insn = PREV_INSN (insn: last); |
17740 | if (!INSN_P (insn)) |
17741 | break; |
17742 | if (insn == head) |
17743 | break; |
17744 | if (!NONDEBUG_INSN_P (insn)) |
17745 | { |
17746 | last = insn; |
17747 | continue; |
17748 | } |
17749 | if (insn_is_function_arg (insn, is_spilled: &is_spilled)) |
17750 | { |
17751 | /* Add output depdendence between two function arguments if chain |
17752 | of output arguments contains likely spilled HW registers. */ |
17753 | if (is_spilled) |
17754 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17755 | first_arg = last = insn; |
17756 | } |
17757 | else |
17758 | break; |
17759 | } |
17760 | if (!is_spilled) |
17761 | return NULL; |
17762 | return first_arg; |
17763 | } |
17764 | |
17765 | /* Add output or anti dependency from insn to first_arg to restrict its code |
17766 | motion. */ |
17767 | static void |
17768 | avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) |
17769 | { |
17770 | rtx set; |
17771 | rtx tmp; |
17772 | |
17773 | set = single_set (insn); |
17774 | if (!set) |
17775 | return; |
17776 | tmp = SET_DEST (set); |
17777 | if (REG_P (tmp)) |
17778 | { |
17779 | /* Add output dependency to the first function argument. */ |
17780 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17781 | return; |
17782 | } |
17783 | /* Add anti dependency. */ |
17784 | add_dependence (first_arg, insn, REG_DEP_ANTI); |
17785 | } |
17786 | |
17787 | /* Avoid cross block motion of function argument through adding dependency |
17788 | from the first non-jump instruction in bb. */ |
17789 | static void |
17790 | add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) |
17791 | { |
17792 | rtx_insn *insn = BB_END (bb); |
17793 | |
17794 | while (insn) |
17795 | { |
17796 | if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) |
17797 | { |
17798 | rtx set = single_set (insn); |
17799 | if (set) |
17800 | { |
17801 | avoid_func_arg_motion (first_arg: arg, insn); |
17802 | return; |
17803 | } |
17804 | } |
17805 | if (insn == BB_HEAD (bb)) |
17806 | return; |
17807 | insn = PREV_INSN (insn); |
17808 | } |
17809 | } |
17810 | |
17811 | /* Hook for pre-reload schedule - avoid motion of function arguments |
17812 | passed in likely spilled HW registers. */ |
17813 | static void |
17814 | ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) |
17815 | { |
17816 | rtx_insn *insn; |
17817 | rtx_insn *first_arg = NULL; |
17818 | if (reload_completed) |
17819 | return; |
17820 | while (head != tail && DEBUG_INSN_P (head)) |
17821 | head = NEXT_INSN (insn: head); |
17822 | for (insn = tail; insn != head; insn = PREV_INSN (insn)) |
17823 | if (INSN_P (insn) && CALL_P (insn)) |
17824 | { |
17825 | first_arg = add_parameter_dependencies (call: insn, head); |
17826 | if (first_arg) |
17827 | { |
17828 | /* Add dependee for first argument to predecessors if only |
17829 | region contains more than one block. */ |
17830 | basic_block bb = BLOCK_FOR_INSN (insn); |
17831 | int rgn = CONTAINING_RGN (bb->index); |
17832 | int nr_blks = RGN_NR_BLOCKS (rgn); |
17833 | /* Skip trivial regions and region head blocks that can have |
17834 | predecessors outside of region. */ |
17835 | if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) |
17836 | { |
17837 | edge e; |
17838 | edge_iterator ei; |
17839 | |
17840 | /* Regions are SCCs with the exception of selective |
17841 | scheduling with pipelining of outer blocks enabled. |
17842 | So also check that immediate predecessors of a non-head |
17843 | block are in the same region. */ |
17844 | FOR_EACH_EDGE (e, ei, bb->preds) |
17845 | { |
17846 | /* Avoid creating of loop-carried dependencies through |
17847 | using topological ordering in the region. */ |
17848 | if (rgn == CONTAINING_RGN (e->src->index) |
17849 | && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) |
17850 | add_dependee_for_func_arg (arg: first_arg, bb: e->src); |
17851 | } |
17852 | } |
17853 | insn = first_arg; |
17854 | if (insn == head) |
17855 | break; |
17856 | } |
17857 | } |
17858 | else if (first_arg) |
17859 | avoid_func_arg_motion (first_arg, insn); |
17860 | } |
17861 | |
17862 | /* Hook for pre-reload schedule - set priority of moves from likely spilled |
17863 | HW registers to maximum, to schedule them at soon as possible. These are |
17864 | moves from function argument registers at the top of the function entry |
17865 | and moves from function return value registers after call. */ |
17866 | static int |
17867 | ix86_adjust_priority (rtx_insn *insn, int priority) |
17868 | { |
17869 | rtx set; |
17870 | |
17871 | if (reload_completed) |
17872 | return priority; |
17873 | |
17874 | if (!NONDEBUG_INSN_P (insn)) |
17875 | return priority; |
17876 | |
17877 | set = single_set (insn); |
17878 | if (set) |
17879 | { |
17880 | rtx tmp = SET_SRC (set); |
17881 | if (REG_P (tmp) |
17882 | && HARD_REGISTER_P (tmp) |
17883 | && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) |
17884 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) |
17885 | return current_sched_info->sched_max_insns_priority; |
17886 | } |
17887 | |
17888 | return priority; |
17889 | } |
17890 | |
17891 | /* Prepare for scheduling pass. */ |
17892 | static void |
17893 | ix86_sched_init_global (FILE *, int, int) |
17894 | { |
17895 | /* Install scheduling hooks for current CPU. Some of these hooks are used |
17896 | in time-critical parts of the scheduler, so we only set them up when |
17897 | they are actually used. */ |
17898 | switch (ix86_tune) |
17899 | { |
17900 | case PROCESSOR_CORE2: |
17901 | case PROCESSOR_NEHALEM: |
17902 | case PROCESSOR_SANDYBRIDGE: |
17903 | case PROCESSOR_HASWELL: |
17904 | case PROCESSOR_TREMONT: |
17905 | case PROCESSOR_ALDERLAKE: |
17906 | case PROCESSOR_GENERIC: |
17907 | /* Do not perform multipass scheduling for pre-reload schedule |
17908 | to save compile time. */ |
17909 | if (reload_completed) |
17910 | { |
17911 | ix86_core2i7_init_hooks (); |
17912 | break; |
17913 | } |
17914 | /* Fall through. */ |
17915 | default: |
17916 | targetm.sched.dfa_post_advance_cycle = NULL; |
17917 | targetm.sched.first_cycle_multipass_init = NULL; |
17918 | targetm.sched.first_cycle_multipass_begin = NULL; |
17919 | targetm.sched.first_cycle_multipass_issue = NULL; |
17920 | targetm.sched.first_cycle_multipass_backtrack = NULL; |
17921 | targetm.sched.first_cycle_multipass_end = NULL; |
17922 | targetm.sched.first_cycle_multipass_fini = NULL; |
17923 | break; |
17924 | } |
17925 | } |
17926 | |
17927 | |
17928 | /* Implement TARGET_STATIC_RTX_ALIGNMENT. */ |
17929 | |
17930 | static HOST_WIDE_INT |
17931 | ix86_static_rtx_alignment (machine_mode mode) |
17932 | { |
17933 | if (mode == DFmode) |
17934 | return 64; |
17935 | if (ALIGN_MODE_128 (mode)) |
17936 | return MAX (128, GET_MODE_ALIGNMENT (mode)); |
17937 | return GET_MODE_ALIGNMENT (mode); |
17938 | } |
17939 | |
17940 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ |
17941 | |
17942 | static HOST_WIDE_INT |
17943 | ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) |
17944 | { |
17945 | if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST |
17946 | || TREE_CODE (exp) == INTEGER_CST) |
17947 | { |
17948 | machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); |
17949 | HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); |
17950 | return MAX (mode_align, align); |
17951 | } |
17952 | else if (!optimize_size && TREE_CODE (exp) == STRING_CST |
17953 | && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) |
17954 | return BITS_PER_WORD; |
17955 | |
17956 | return align; |
17957 | } |
17958 | |
17959 | /* Implement TARGET_EMPTY_RECORD_P. */ |
17960 | |
17961 | static bool |
17962 | ix86_is_empty_record (const_tree type) |
17963 | { |
17964 | if (!TARGET_64BIT) |
17965 | return false; |
17966 | return default_is_empty_record (type); |
17967 | } |
17968 | |
17969 | /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ |
17970 | |
17971 | static void |
17972 | ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) |
17973 | { |
17974 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
17975 | |
17976 | if (!cum->warn_empty) |
17977 | return; |
17978 | |
17979 | if (!TYPE_EMPTY_P (type)) |
17980 | return; |
17981 | |
17982 | /* Don't warn if the function isn't visible outside of the TU. */ |
17983 | if (cum->decl && !TREE_PUBLIC (cum->decl)) |
17984 | return; |
17985 | |
17986 | const_tree ctx = get_ultimate_context (cum->decl); |
17987 | if (ctx != NULL_TREE |
17988 | && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) |
17989 | return; |
17990 | |
17991 | /* If the actual size of the type is zero, then there is no change |
17992 | in how objects of this size are passed. */ |
17993 | if (int_size_in_bytes (type) == 0) |
17994 | return; |
17995 | |
17996 | warning (OPT_Wabi, "empty class %qT parameter passing ABI " |
17997 | "changes in %<-fabi-version=12%> (GCC 8)" , type); |
17998 | |
17999 | /* Only warn once. */ |
18000 | cum->warn_empty = false; |
18001 | } |
18002 | |
18003 | /* This hook returns name of multilib ABI. */ |
18004 | |
18005 | static const char * |
18006 | ix86_get_multilib_abi_name (void) |
18007 | { |
18008 | if (!(TARGET_64BIT_P (ix86_isa_flags))) |
18009 | return "i386" ; |
18010 | else if (TARGET_X32_P (ix86_isa_flags)) |
18011 | return "x32" ; |
18012 | else |
18013 | return "x86_64" ; |
18014 | } |
18015 | |
18016 | /* Compute the alignment for a variable for Intel MCU psABI. TYPE is |
18017 | the data type, and ALIGN is the alignment that the object would |
18018 | ordinarily have. */ |
18019 | |
18020 | static int |
18021 | iamcu_alignment (tree type, int align) |
18022 | { |
18023 | machine_mode mode; |
18024 | |
18025 | if (align < 32 || TYPE_USER_ALIGN (type)) |
18026 | return align; |
18027 | |
18028 | /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 |
18029 | bytes. */ |
18030 | type = strip_array_types (type); |
18031 | if (TYPE_ATOMIC (type)) |
18032 | return align; |
18033 | |
18034 | mode = TYPE_MODE (type); |
18035 | switch (GET_MODE_CLASS (mode)) |
18036 | { |
18037 | case MODE_INT: |
18038 | case MODE_COMPLEX_INT: |
18039 | case MODE_COMPLEX_FLOAT: |
18040 | case MODE_FLOAT: |
18041 | case MODE_DECIMAL_FLOAT: |
18042 | return 32; |
18043 | default: |
18044 | return align; |
18045 | } |
18046 | } |
18047 | |
18048 | /* Compute the alignment for a static variable. |
18049 | TYPE is the data type, and ALIGN is the alignment that |
18050 | the object would ordinarily have. The value of this function is used |
18051 | instead of that alignment to align the object. */ |
18052 | |
18053 | int |
18054 | ix86_data_alignment (tree type, unsigned int align, bool opt) |
18055 | { |
18056 | /* GCC 4.8 and earlier used to incorrectly assume this alignment even |
18057 | for symbols from other compilation units or symbols that don't need |
18058 | to bind locally. In order to preserve some ABI compatibility with |
18059 | those compilers, ensure we don't decrease alignment from what we |
18060 | used to assume. */ |
18061 | |
18062 | unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); |
18063 | |
18064 | /* A data structure, equal or greater than the size of a cache line |
18065 | (64 bytes in the Pentium 4 and other recent Intel processors, including |
18066 | processors based on Intel Core microarchitecture) should be aligned |
18067 | so that its base address is a multiple of a cache line size. */ |
18068 | |
18069 | unsigned int max_align |
18070 | = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); |
18071 | |
18072 | if (max_align < BITS_PER_WORD) |
18073 | max_align = BITS_PER_WORD; |
18074 | |
18075 | switch (ix86_align_data_type) |
18076 | { |
18077 | case ix86_align_data_type_abi: opt = false; break; |
18078 | case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; |
18079 | case ix86_align_data_type_cacheline: break; |
18080 | } |
18081 | |
18082 | if (TARGET_IAMCU) |
18083 | align = iamcu_alignment (type, align); |
18084 | |
18085 | if (opt |
18086 | && AGGREGATE_TYPE_P (type) |
18087 | && TYPE_SIZE (type) |
18088 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) |
18089 | { |
18090 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat) |
18091 | && align < max_align_compat) |
18092 | align = max_align_compat; |
18093 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align) |
18094 | && align < max_align) |
18095 | align = max_align; |
18096 | } |
18097 | |
18098 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
18099 | to 16byte boundary. */ |
18100 | if (TARGET_64BIT) |
18101 | { |
18102 | if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) |
18103 | && TYPE_SIZE (type) |
18104 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
18105 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
18106 | && align < 128) |
18107 | return 128; |
18108 | } |
18109 | |
18110 | if (!opt) |
18111 | return align; |
18112 | |
18113 | if (TREE_CODE (type) == ARRAY_TYPE) |
18114 | { |
18115 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
18116 | return 64; |
18117 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
18118 | return 128; |
18119 | } |
18120 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
18121 | { |
18122 | |
18123 | if (TYPE_MODE (type) == DCmode && align < 64) |
18124 | return 64; |
18125 | if ((TYPE_MODE (type) == XCmode |
18126 | || TYPE_MODE (type) == TCmode) && align < 128) |
18127 | return 128; |
18128 | } |
18129 | else if (RECORD_OR_UNION_TYPE_P (type) |
18130 | && TYPE_FIELDS (type)) |
18131 | { |
18132 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
18133 | return 64; |
18134 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18135 | return 128; |
18136 | } |
18137 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18138 | || TREE_CODE (type) == INTEGER_TYPE) |
18139 | { |
18140 | if (TYPE_MODE (type) == DFmode && align < 64) |
18141 | return 64; |
18142 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18143 | return 128; |
18144 | } |
18145 | |
18146 | return align; |
18147 | } |
18148 | |
18149 | /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */ |
18150 | static void |
18151 | ix86_lower_local_decl_alignment (tree decl) |
18152 | { |
18153 | unsigned int new_align = ix86_local_alignment (decl, VOIDmode, |
18154 | DECL_ALIGN (decl), true); |
18155 | if (new_align < DECL_ALIGN (decl)) |
18156 | SET_DECL_ALIGN (decl, new_align); |
18157 | } |
18158 | |
18159 | /* Compute the alignment for a local variable or a stack slot. EXP is |
18160 | the data type or decl itself, MODE is the widest mode available and |
18161 | ALIGN is the alignment that the object would ordinarily have. The |
18162 | value of this macro is used instead of that alignment to align the |
18163 | object. */ |
18164 | |
18165 | unsigned int |
18166 | ix86_local_alignment (tree exp, machine_mode mode, |
18167 | unsigned int align, bool may_lower) |
18168 | { |
18169 | tree type, decl; |
18170 | |
18171 | if (exp && DECL_P (exp)) |
18172 | { |
18173 | type = TREE_TYPE (exp); |
18174 | decl = exp; |
18175 | } |
18176 | else |
18177 | { |
18178 | type = exp; |
18179 | decl = NULL; |
18180 | } |
18181 | |
18182 | /* Don't do dynamic stack realignment for long long objects with |
18183 | -mpreferred-stack-boundary=2. */ |
18184 | if (may_lower |
18185 | && !TARGET_64BIT |
18186 | && align == 64 |
18187 | && ix86_preferred_stack_boundary < 64 |
18188 | && (mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18189 | && (!type || (!TYPE_USER_ALIGN (type) |
18190 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18191 | && (!decl || !DECL_USER_ALIGN (decl))) |
18192 | align = 32; |
18193 | |
18194 | /* If TYPE is NULL, we are allocating a stack slot for caller-save |
18195 | register in MODE. We will return the largest alignment of XF |
18196 | and DF. */ |
18197 | if (!type) |
18198 | { |
18199 | if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) |
18200 | align = GET_MODE_ALIGNMENT (DFmode); |
18201 | return align; |
18202 | } |
18203 | |
18204 | /* Don't increase alignment for Intel MCU psABI. */ |
18205 | if (TARGET_IAMCU) |
18206 | return align; |
18207 | |
18208 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
18209 | to 16byte boundary. Exact wording is: |
18210 | |
18211 | An array uses the same alignment as its elements, except that a local or |
18212 | global array variable of length at least 16 bytes or |
18213 | a C99 variable-length array variable always has alignment of at least 16 bytes. |
18214 | |
18215 | This was added to allow use of aligned SSE instructions at arrays. This |
18216 | rule is meant for static storage (where compiler cannot do the analysis |
18217 | by itself). We follow it for automatic variables only when convenient. |
18218 | We fully control everything in the function compiled and functions from |
18219 | other unit cannot rely on the alignment. |
18220 | |
18221 | Exclude va_list type. It is the common case of local array where |
18222 | we cannot benefit from the alignment. |
18223 | |
18224 | TODO: Probably one should optimize for size only when var is not escaping. */ |
18225 | if (TARGET_64BIT && optimize_function_for_speed_p (cfun) |
18226 | && TARGET_SSE) |
18227 | { |
18228 | if (AGGREGATE_TYPE_P (type) |
18229 | && (va_list_type_node == NULL_TREE |
18230 | || (TYPE_MAIN_VARIANT (type) |
18231 | != TYPE_MAIN_VARIANT (va_list_type_node))) |
18232 | && TYPE_SIZE (type) |
18233 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
18234 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
18235 | && align < 128) |
18236 | return 128; |
18237 | } |
18238 | if (TREE_CODE (type) == ARRAY_TYPE) |
18239 | { |
18240 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
18241 | return 64; |
18242 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
18243 | return 128; |
18244 | } |
18245 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
18246 | { |
18247 | if (TYPE_MODE (type) == DCmode && align < 64) |
18248 | return 64; |
18249 | if ((TYPE_MODE (type) == XCmode |
18250 | || TYPE_MODE (type) == TCmode) && align < 128) |
18251 | return 128; |
18252 | } |
18253 | else if (RECORD_OR_UNION_TYPE_P (type) |
18254 | && TYPE_FIELDS (type)) |
18255 | { |
18256 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
18257 | return 64; |
18258 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18259 | return 128; |
18260 | } |
18261 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18262 | || TREE_CODE (type) == INTEGER_TYPE) |
18263 | { |
18264 | |
18265 | if (TYPE_MODE (type) == DFmode && align < 64) |
18266 | return 64; |
18267 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18268 | return 128; |
18269 | } |
18270 | return align; |
18271 | } |
18272 | |
18273 | /* Compute the minimum required alignment for dynamic stack realignment |
18274 | purposes for a local variable, parameter or a stack slot. EXP is |
18275 | the data type or decl itself, MODE is its mode and ALIGN is the |
18276 | alignment that the object would ordinarily have. */ |
18277 | |
18278 | unsigned int |
18279 | ix86_minimum_alignment (tree exp, machine_mode mode, |
18280 | unsigned int align) |
18281 | { |
18282 | tree type, decl; |
18283 | |
18284 | if (exp && DECL_P (exp)) |
18285 | { |
18286 | type = TREE_TYPE (exp); |
18287 | decl = exp; |
18288 | } |
18289 | else |
18290 | { |
18291 | type = exp; |
18292 | decl = NULL; |
18293 | } |
18294 | |
18295 | if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) |
18296 | return align; |
18297 | |
18298 | /* Don't do dynamic stack realignment for long long objects with |
18299 | -mpreferred-stack-boundary=2. */ |
18300 | if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18301 | && (!type || (!TYPE_USER_ALIGN (type) |
18302 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18303 | && (!decl || !DECL_USER_ALIGN (decl))) |
18304 | { |
18305 | gcc_checking_assert (!TARGET_STV); |
18306 | return 32; |
18307 | } |
18308 | |
18309 | return align; |
18310 | } |
18311 | |
18312 | /* Find a location for the static chain incoming to a nested function. |
18313 | This is a register, unless all free registers are used by arguments. */ |
18314 | |
18315 | static rtx |
18316 | ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) |
18317 | { |
18318 | unsigned regno; |
18319 | |
18320 | if (TARGET_64BIT) |
18321 | { |
18322 | /* We always use R10 in 64-bit mode. */ |
18323 | regno = R10_REG; |
18324 | } |
18325 | else |
18326 | { |
18327 | const_tree fntype, fndecl; |
18328 | unsigned int ccvt; |
18329 | |
18330 | /* By default in 32-bit mode we use ECX to pass the static chain. */ |
18331 | regno = CX_REG; |
18332 | |
18333 | if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) |
18334 | { |
18335 | fntype = TREE_TYPE (fndecl_or_type); |
18336 | fndecl = fndecl_or_type; |
18337 | } |
18338 | else |
18339 | { |
18340 | fntype = fndecl_or_type; |
18341 | fndecl = NULL; |
18342 | } |
18343 | |
18344 | ccvt = ix86_get_callcvt (type: fntype); |
18345 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
18346 | { |
18347 | /* Fastcall functions use ecx/edx for arguments, which leaves |
18348 | us with EAX for the static chain. |
18349 | Thiscall functions use ecx for arguments, which also |
18350 | leaves us with EAX for the static chain. */ |
18351 | regno = AX_REG; |
18352 | } |
18353 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
18354 | { |
18355 | /* Thiscall functions use ecx for arguments, which leaves |
18356 | us with EAX and EDX for the static chain. |
18357 | We are using for abi-compatibility EAX. */ |
18358 | regno = AX_REG; |
18359 | } |
18360 | else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3) |
18361 | { |
18362 | /* For regparm 3, we have no free call-clobbered registers in |
18363 | which to store the static chain. In order to implement this, |
18364 | we have the trampoline push the static chain to the stack. |
18365 | However, we can't push a value below the return address when |
18366 | we call the nested function directly, so we have to use an |
18367 | alternate entry point. For this we use ESI, and have the |
18368 | alternate entry point push ESI, so that things appear the |
18369 | same once we're executing the nested function. */ |
18370 | if (incoming_p) |
18371 | { |
18372 | if (fndecl == current_function_decl |
18373 | && !ix86_static_chain_on_stack) |
18374 | { |
18375 | gcc_assert (!reload_completed); |
18376 | ix86_static_chain_on_stack = true; |
18377 | } |
18378 | return gen_frame_mem (SImode, |
18379 | plus_constant (Pmode, |
18380 | arg_pointer_rtx, -8)); |
18381 | } |
18382 | regno = SI_REG; |
18383 | } |
18384 | } |
18385 | |
18386 | return gen_rtx_REG (Pmode, regno); |
18387 | } |
18388 | |
18389 | /* Emit RTL insns to initialize the variable parts of a trampoline. |
18390 | FNDECL is the decl of the target address; M_TRAMP is a MEM for |
18391 | the trampoline, and CHAIN_VALUE is an RTX for the static chain |
18392 | to be passed to the target function. */ |
18393 | |
18394 | static void |
18395 | ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
18396 | { |
18397 | rtx mem, fnaddr; |
18398 | int opcode; |
18399 | int offset = 0; |
18400 | bool need_endbr = (flag_cf_protection & CF_BRANCH); |
18401 | |
18402 | fnaddr = XEXP (DECL_RTL (fndecl), 0); |
18403 | |
18404 | if (TARGET_64BIT) |
18405 | { |
18406 | int size; |
18407 | |
18408 | if (need_endbr) |
18409 | { |
18410 | /* Insert ENDBR64. */ |
18411 | mem = adjust_address (m_tramp, SImode, offset); |
18412 | emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); |
18413 | offset += 4; |
18414 | } |
18415 | |
18416 | /* Load the function address to r11. Try to load address using |
18417 | the shorter movl instead of movabs. We may want to support |
18418 | movq for kernel mode, but kernel does not use trampolines at |
18419 | the moment. FNADDR is a 32bit address and may not be in |
18420 | DImode when ptr_mode == SImode. Always use movl in this |
18421 | case. */ |
18422 | if (ptr_mode == SImode |
18423 | || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
18424 | { |
18425 | fnaddr = copy_addr_to_reg (fnaddr); |
18426 | |
18427 | mem = adjust_address (m_tramp, HImode, offset); |
18428 | emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); |
18429 | |
18430 | mem = adjust_address (m_tramp, SImode, offset + 2); |
18431 | emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); |
18432 | offset += 6; |
18433 | } |
18434 | else |
18435 | { |
18436 | mem = adjust_address (m_tramp, HImode, offset); |
18437 | emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); |
18438 | |
18439 | mem = adjust_address (m_tramp, DImode, offset + 2); |
18440 | emit_move_insn (mem, fnaddr); |
18441 | offset += 10; |
18442 | } |
18443 | |
18444 | /* Load static chain using movabs to r10. Use the shorter movl |
18445 | instead of movabs when ptr_mode == SImode. */ |
18446 | if (ptr_mode == SImode) |
18447 | { |
18448 | opcode = 0xba41; |
18449 | size = 6; |
18450 | } |
18451 | else |
18452 | { |
18453 | opcode = 0xba49; |
18454 | size = 10; |
18455 | } |
18456 | |
18457 | mem = adjust_address (m_tramp, HImode, offset); |
18458 | emit_move_insn (mem, gen_int_mode (opcode, HImode)); |
18459 | |
18460 | mem = adjust_address (m_tramp, ptr_mode, offset + 2); |
18461 | emit_move_insn (mem, chain_value); |
18462 | offset += size; |
18463 | |
18464 | /* Jump to r11; the last (unused) byte is a nop, only there to |
18465 | pad the write out to a single 32-bit store. */ |
18466 | mem = adjust_address (m_tramp, SImode, offset); |
18467 | emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); |
18468 | offset += 4; |
18469 | } |
18470 | else |
18471 | { |
18472 | rtx disp, chain; |
18473 | |
18474 | /* Depending on the static chain location, either load a register |
18475 | with a constant, or push the constant to the stack. All of the |
18476 | instructions are the same size. */ |
18477 | chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true); |
18478 | if (REG_P (chain)) |
18479 | { |
18480 | switch (REGNO (chain)) |
18481 | { |
18482 | case AX_REG: |
18483 | opcode = 0xb8; break; |
18484 | case CX_REG: |
18485 | opcode = 0xb9; break; |
18486 | default: |
18487 | gcc_unreachable (); |
18488 | } |
18489 | } |
18490 | else |
18491 | opcode = 0x68; |
18492 | |
18493 | if (need_endbr) |
18494 | { |
18495 | /* Insert ENDBR32. */ |
18496 | mem = adjust_address (m_tramp, SImode, offset); |
18497 | emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); |
18498 | offset += 4; |
18499 | } |
18500 | |
18501 | mem = adjust_address (m_tramp, QImode, offset); |
18502 | emit_move_insn (mem, gen_int_mode (opcode, QImode)); |
18503 | |
18504 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18505 | emit_move_insn (mem, chain_value); |
18506 | offset += 5; |
18507 | |
18508 | mem = adjust_address (m_tramp, QImode, offset); |
18509 | emit_move_insn (mem, gen_int_mode (0xe9, QImode)); |
18510 | |
18511 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18512 | |
18513 | /* Compute offset from the end of the jmp to the target function. |
18514 | In the case in which the trampoline stores the static chain on |
18515 | the stack, we need to skip the first insn which pushes the |
18516 | (call-saved) register static chain; this push is 1 byte. */ |
18517 | offset += 5; |
18518 | int skip = MEM_P (chain) ? 1 : 0; |
18519 | /* Skip ENDBR32 at the entry of the target function. */ |
18520 | if (need_endbr |
18521 | && !cgraph_node::get (decl: fndecl)->only_called_directly_p ()) |
18522 | skip += 4; |
18523 | disp = expand_binop (SImode, sub_optab, fnaddr, |
18524 | plus_constant (Pmode, XEXP (m_tramp, 0), |
18525 | offset - skip), |
18526 | NULL_RTX, 1, OPTAB_DIRECT); |
18527 | emit_move_insn (mem, disp); |
18528 | } |
18529 | |
18530 | gcc_assert (offset <= TRAMPOLINE_SIZE); |
18531 | |
18532 | #ifdef HAVE_ENABLE_EXECUTE_STACK |
18533 | #ifdef CHECK_EXECUTE_STACK_ENABLED |
18534 | if (CHECK_EXECUTE_STACK_ENABLED) |
18535 | #endif |
18536 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack" ), |
18537 | LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); |
18538 | #endif |
18539 | } |
18540 | |
18541 | static bool |
18542 | ix86_allocate_stack_slots_for_args (void) |
18543 | { |
18544 | /* Naked functions should not allocate stack slots for arguments. */ |
18545 | return !ix86_function_naked (fn: current_function_decl); |
18546 | } |
18547 | |
18548 | static bool |
18549 | ix86_warn_func_return (tree decl) |
18550 | { |
18551 | /* Naked functions are implemented entirely in assembly, including the |
18552 | return sequence, so suppress warnings about this. */ |
18553 | return !ix86_function_naked (fn: decl); |
18554 | } |
18555 | |
18556 | /* Return the shift count of a vector by scalar shift builtin second argument |
18557 | ARG1. */ |
18558 | static tree |
18559 | ix86_vector_shift_count (tree arg1) |
18560 | { |
18561 | if (tree_fits_uhwi_p (arg1)) |
18562 | return arg1; |
18563 | else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) |
18564 | { |
18565 | /* The count argument is weird, passed in as various 128-bit |
18566 | (or 64-bit) vectors, the low 64 bits from it are the count. */ |
18567 | unsigned char buf[16]; |
18568 | int len = native_encode_expr (arg1, buf, 16); |
18569 | if (len == 0) |
18570 | return NULL_TREE; |
18571 | tree t = native_interpret_expr (uint64_type_node, buf, len); |
18572 | if (t && tree_fits_uhwi_p (t)) |
18573 | return t; |
18574 | } |
18575 | return NULL_TREE; |
18576 | } |
18577 | |
18578 | /* Return true if arg_mask is all ones, ELEMS is elements number of |
18579 | corresponding vector. */ |
18580 | static bool |
18581 | ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask) |
18582 | { |
18583 | if (TREE_CODE (arg_mask) != INTEGER_CST) |
18584 | return false; |
18585 | |
18586 | unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask); |
18587 | if (elems == HOST_BITS_PER_WIDE_INT) |
18588 | return mask == HOST_WIDE_INT_M1U; |
18589 | if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) |
18590 | return false; |
18591 | |
18592 | return true; |
18593 | } |
18594 | |
18595 | static tree |
18596 | ix86_fold_builtin (tree fndecl, int n_args, |
18597 | tree *args, bool ignore ATTRIBUTE_UNUSED) |
18598 | { |
18599 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) |
18600 | { |
18601 | enum ix86_builtins fn_code |
18602 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
18603 | enum rtx_code rcode; |
18604 | bool is_vshift; |
18605 | unsigned HOST_WIDE_INT mask; |
18606 | |
18607 | switch (fn_code) |
18608 | { |
18609 | case IX86_BUILTIN_CPU_IS: |
18610 | case IX86_BUILTIN_CPU_SUPPORTS: |
18611 | gcc_assert (n_args == 1); |
18612 | return fold_builtin_cpu (fndecl, args); |
18613 | |
18614 | case IX86_BUILTIN_NANQ: |
18615 | case IX86_BUILTIN_NANSQ: |
18616 | { |
18617 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18618 | const char *str = c_getstr (*args); |
18619 | int quiet = fn_code == IX86_BUILTIN_NANQ; |
18620 | REAL_VALUE_TYPE real; |
18621 | |
18622 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) |
18623 | return build_real (type, real); |
18624 | return NULL_TREE; |
18625 | } |
18626 | |
18627 | case IX86_BUILTIN_INFQ: |
18628 | case IX86_BUILTIN_HUGE_VALQ: |
18629 | { |
18630 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18631 | REAL_VALUE_TYPE inf; |
18632 | real_inf (&inf); |
18633 | return build_real (type, inf); |
18634 | } |
18635 | |
18636 | case IX86_BUILTIN_TZCNT16: |
18637 | case IX86_BUILTIN_CTZS: |
18638 | case IX86_BUILTIN_TZCNT32: |
18639 | case IX86_BUILTIN_TZCNT64: |
18640 | gcc_assert (n_args == 1); |
18641 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18642 | { |
18643 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18644 | tree arg = args[0]; |
18645 | if (fn_code == IX86_BUILTIN_TZCNT16 |
18646 | || fn_code == IX86_BUILTIN_CTZS) |
18647 | arg = fold_convert (short_unsigned_type_node, arg); |
18648 | if (integer_zerop (arg)) |
18649 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18650 | else |
18651 | return fold_const_call (CFN_CTZ, type, arg); |
18652 | } |
18653 | break; |
18654 | |
18655 | case IX86_BUILTIN_LZCNT16: |
18656 | case IX86_BUILTIN_CLZS: |
18657 | case IX86_BUILTIN_LZCNT32: |
18658 | case IX86_BUILTIN_LZCNT64: |
18659 | gcc_assert (n_args == 1); |
18660 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18661 | { |
18662 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18663 | tree arg = args[0]; |
18664 | if (fn_code == IX86_BUILTIN_LZCNT16 |
18665 | || fn_code == IX86_BUILTIN_CLZS) |
18666 | arg = fold_convert (short_unsigned_type_node, arg); |
18667 | if (integer_zerop (arg)) |
18668 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18669 | else |
18670 | return fold_const_call (CFN_CLZ, type, arg); |
18671 | } |
18672 | break; |
18673 | |
18674 | case IX86_BUILTIN_BEXTR32: |
18675 | case IX86_BUILTIN_BEXTR64: |
18676 | case IX86_BUILTIN_BEXTRI32: |
18677 | case IX86_BUILTIN_BEXTRI64: |
18678 | gcc_assert (n_args == 2); |
18679 | if (tree_fits_uhwi_p (args[1])) |
18680 | { |
18681 | unsigned HOST_WIDE_INT res = 0; |
18682 | unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); |
18683 | unsigned int start = tree_to_uhwi (args[1]); |
18684 | unsigned int len = (start & 0xff00) >> 8; |
18685 | start &= 0xff; |
18686 | if (start >= prec || len == 0) |
18687 | res = 0; |
18688 | else if (!tree_fits_uhwi_p (args[0])) |
18689 | break; |
18690 | else |
18691 | res = tree_to_uhwi (args[0]) >> start; |
18692 | if (len > prec) |
18693 | len = prec; |
18694 | if (len < HOST_BITS_PER_WIDE_INT) |
18695 | res &= (HOST_WIDE_INT_1U << len) - 1; |
18696 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18697 | } |
18698 | break; |
18699 | |
18700 | case IX86_BUILTIN_BZHI32: |
18701 | case IX86_BUILTIN_BZHI64: |
18702 | gcc_assert (n_args == 2); |
18703 | if (tree_fits_uhwi_p (args[1])) |
18704 | { |
18705 | unsigned int idx = tree_to_uhwi (args[1]) & 0xff; |
18706 | if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) |
18707 | return args[0]; |
18708 | if (idx == 0) |
18709 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); |
18710 | if (!tree_fits_uhwi_p (args[0])) |
18711 | break; |
18712 | unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); |
18713 | res &= ~(HOST_WIDE_INT_M1U << idx); |
18714 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18715 | } |
18716 | break; |
18717 | |
18718 | case IX86_BUILTIN_PDEP32: |
18719 | case IX86_BUILTIN_PDEP64: |
18720 | gcc_assert (n_args == 2); |
18721 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18722 | { |
18723 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18724 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18725 | unsigned HOST_WIDE_INT res = 0; |
18726 | unsigned HOST_WIDE_INT m, k = 1; |
18727 | for (m = 1; m; m <<= 1) |
18728 | if ((mask & m) != 0) |
18729 | { |
18730 | if ((src & k) != 0) |
18731 | res |= m; |
18732 | k <<= 1; |
18733 | } |
18734 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18735 | } |
18736 | break; |
18737 | |
18738 | case IX86_BUILTIN_PEXT32: |
18739 | case IX86_BUILTIN_PEXT64: |
18740 | gcc_assert (n_args == 2); |
18741 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18742 | { |
18743 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18744 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18745 | unsigned HOST_WIDE_INT res = 0; |
18746 | unsigned HOST_WIDE_INT m, k = 1; |
18747 | for (m = 1; m; m <<= 1) |
18748 | if ((mask & m) != 0) |
18749 | { |
18750 | if ((src & m) != 0) |
18751 | res |= k; |
18752 | k <<= 1; |
18753 | } |
18754 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18755 | } |
18756 | break; |
18757 | |
18758 | case IX86_BUILTIN_MOVMSKPS: |
18759 | case IX86_BUILTIN_PMOVMSKB: |
18760 | case IX86_BUILTIN_MOVMSKPD: |
18761 | case IX86_BUILTIN_PMOVMSKB128: |
18762 | case IX86_BUILTIN_MOVMSKPD256: |
18763 | case IX86_BUILTIN_MOVMSKPS256: |
18764 | case IX86_BUILTIN_PMOVMSKB256: |
18765 | gcc_assert (n_args == 1); |
18766 | if (TREE_CODE (args[0]) == VECTOR_CST) |
18767 | { |
18768 | HOST_WIDE_INT res = 0; |
18769 | for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) |
18770 | { |
18771 | tree e = VECTOR_CST_ELT (args[0], i); |
18772 | if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) |
18773 | { |
18774 | if (wi::neg_p (x: wi::to_wide (t: e))) |
18775 | res |= HOST_WIDE_INT_1 << i; |
18776 | } |
18777 | else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) |
18778 | { |
18779 | if (TREE_REAL_CST (e).sign) |
18780 | res |= HOST_WIDE_INT_1 << i; |
18781 | } |
18782 | else |
18783 | return NULL_TREE; |
18784 | } |
18785 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18786 | } |
18787 | break; |
18788 | |
18789 | case IX86_BUILTIN_PSLLD: |
18790 | case IX86_BUILTIN_PSLLD128: |
18791 | case IX86_BUILTIN_PSLLD128_MASK: |
18792 | case IX86_BUILTIN_PSLLD256: |
18793 | case IX86_BUILTIN_PSLLD256_MASK: |
18794 | case IX86_BUILTIN_PSLLD512: |
18795 | case IX86_BUILTIN_PSLLDI: |
18796 | case IX86_BUILTIN_PSLLDI128: |
18797 | case IX86_BUILTIN_PSLLDI128_MASK: |
18798 | case IX86_BUILTIN_PSLLDI256: |
18799 | case IX86_BUILTIN_PSLLDI256_MASK: |
18800 | case IX86_BUILTIN_PSLLDI512: |
18801 | case IX86_BUILTIN_PSLLQ: |
18802 | case IX86_BUILTIN_PSLLQ128: |
18803 | case IX86_BUILTIN_PSLLQ128_MASK: |
18804 | case IX86_BUILTIN_PSLLQ256: |
18805 | case IX86_BUILTIN_PSLLQ256_MASK: |
18806 | case IX86_BUILTIN_PSLLQ512: |
18807 | case IX86_BUILTIN_PSLLQI: |
18808 | case IX86_BUILTIN_PSLLQI128: |
18809 | case IX86_BUILTIN_PSLLQI128_MASK: |
18810 | case IX86_BUILTIN_PSLLQI256: |
18811 | case IX86_BUILTIN_PSLLQI256_MASK: |
18812 | case IX86_BUILTIN_PSLLQI512: |
18813 | case IX86_BUILTIN_PSLLW: |
18814 | case IX86_BUILTIN_PSLLW128: |
18815 | case IX86_BUILTIN_PSLLW128_MASK: |
18816 | case IX86_BUILTIN_PSLLW256: |
18817 | case IX86_BUILTIN_PSLLW256_MASK: |
18818 | case IX86_BUILTIN_PSLLW512_MASK: |
18819 | case IX86_BUILTIN_PSLLWI: |
18820 | case IX86_BUILTIN_PSLLWI128: |
18821 | case IX86_BUILTIN_PSLLWI128_MASK: |
18822 | case IX86_BUILTIN_PSLLWI256: |
18823 | case IX86_BUILTIN_PSLLWI256_MASK: |
18824 | case IX86_BUILTIN_PSLLWI512_MASK: |
18825 | rcode = ASHIFT; |
18826 | is_vshift = false; |
18827 | goto do_shift; |
18828 | case IX86_BUILTIN_PSRAD: |
18829 | case IX86_BUILTIN_PSRAD128: |
18830 | case IX86_BUILTIN_PSRAD128_MASK: |
18831 | case IX86_BUILTIN_PSRAD256: |
18832 | case IX86_BUILTIN_PSRAD256_MASK: |
18833 | case IX86_BUILTIN_PSRAD512: |
18834 | case IX86_BUILTIN_PSRADI: |
18835 | case IX86_BUILTIN_PSRADI128: |
18836 | case IX86_BUILTIN_PSRADI128_MASK: |
18837 | case IX86_BUILTIN_PSRADI256: |
18838 | case IX86_BUILTIN_PSRADI256_MASK: |
18839 | case IX86_BUILTIN_PSRADI512: |
18840 | case IX86_BUILTIN_PSRAQ128_MASK: |
18841 | case IX86_BUILTIN_PSRAQ256_MASK: |
18842 | case IX86_BUILTIN_PSRAQ512: |
18843 | case IX86_BUILTIN_PSRAQI128_MASK: |
18844 | case IX86_BUILTIN_PSRAQI256_MASK: |
18845 | case IX86_BUILTIN_PSRAQI512: |
18846 | case IX86_BUILTIN_PSRAW: |
18847 | case IX86_BUILTIN_PSRAW128: |
18848 | case IX86_BUILTIN_PSRAW128_MASK: |
18849 | case IX86_BUILTIN_PSRAW256: |
18850 | case IX86_BUILTIN_PSRAW256_MASK: |
18851 | case IX86_BUILTIN_PSRAW512: |
18852 | case IX86_BUILTIN_PSRAWI: |
18853 | case IX86_BUILTIN_PSRAWI128: |
18854 | case IX86_BUILTIN_PSRAWI128_MASK: |
18855 | case IX86_BUILTIN_PSRAWI256: |
18856 | case IX86_BUILTIN_PSRAWI256_MASK: |
18857 | case IX86_BUILTIN_PSRAWI512: |
18858 | rcode = ASHIFTRT; |
18859 | is_vshift = false; |
18860 | goto do_shift; |
18861 | case IX86_BUILTIN_PSRLD: |
18862 | case IX86_BUILTIN_PSRLD128: |
18863 | case IX86_BUILTIN_PSRLD128_MASK: |
18864 | case IX86_BUILTIN_PSRLD256: |
18865 | case IX86_BUILTIN_PSRLD256_MASK: |
18866 | case IX86_BUILTIN_PSRLD512: |
18867 | case IX86_BUILTIN_PSRLDI: |
18868 | case IX86_BUILTIN_PSRLDI128: |
18869 | case IX86_BUILTIN_PSRLDI128_MASK: |
18870 | case IX86_BUILTIN_PSRLDI256: |
18871 | case IX86_BUILTIN_PSRLDI256_MASK: |
18872 | case IX86_BUILTIN_PSRLDI512: |
18873 | case IX86_BUILTIN_PSRLQ: |
18874 | case IX86_BUILTIN_PSRLQ128: |
18875 | case IX86_BUILTIN_PSRLQ128_MASK: |
18876 | case IX86_BUILTIN_PSRLQ256: |
18877 | case IX86_BUILTIN_PSRLQ256_MASK: |
18878 | case IX86_BUILTIN_PSRLQ512: |
18879 | case IX86_BUILTIN_PSRLQI: |
18880 | case IX86_BUILTIN_PSRLQI128: |
18881 | case IX86_BUILTIN_PSRLQI128_MASK: |
18882 | case IX86_BUILTIN_PSRLQI256: |
18883 | case IX86_BUILTIN_PSRLQI256_MASK: |
18884 | case IX86_BUILTIN_PSRLQI512: |
18885 | case IX86_BUILTIN_PSRLW: |
18886 | case IX86_BUILTIN_PSRLW128: |
18887 | case IX86_BUILTIN_PSRLW128_MASK: |
18888 | case IX86_BUILTIN_PSRLW256: |
18889 | case IX86_BUILTIN_PSRLW256_MASK: |
18890 | case IX86_BUILTIN_PSRLW512: |
18891 | case IX86_BUILTIN_PSRLWI: |
18892 | case IX86_BUILTIN_PSRLWI128: |
18893 | case IX86_BUILTIN_PSRLWI128_MASK: |
18894 | case IX86_BUILTIN_PSRLWI256: |
18895 | case IX86_BUILTIN_PSRLWI256_MASK: |
18896 | case IX86_BUILTIN_PSRLWI512: |
18897 | rcode = LSHIFTRT; |
18898 | is_vshift = false; |
18899 | goto do_shift; |
18900 | case IX86_BUILTIN_PSLLVV16HI: |
18901 | case IX86_BUILTIN_PSLLVV16SI: |
18902 | case IX86_BUILTIN_PSLLVV2DI: |
18903 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
18904 | case IX86_BUILTIN_PSLLVV32HI: |
18905 | case IX86_BUILTIN_PSLLVV4DI: |
18906 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
18907 | case IX86_BUILTIN_PSLLVV4SI: |
18908 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
18909 | case IX86_BUILTIN_PSLLVV8DI: |
18910 | case IX86_BUILTIN_PSLLVV8HI: |
18911 | case IX86_BUILTIN_PSLLVV8SI: |
18912 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
18913 | rcode = ASHIFT; |
18914 | is_vshift = true; |
18915 | goto do_shift; |
18916 | case IX86_BUILTIN_PSRAVQ128: |
18917 | case IX86_BUILTIN_PSRAVQ256: |
18918 | case IX86_BUILTIN_PSRAVV16HI: |
18919 | case IX86_BUILTIN_PSRAVV16SI: |
18920 | case IX86_BUILTIN_PSRAVV32HI: |
18921 | case IX86_BUILTIN_PSRAVV4SI: |
18922 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
18923 | case IX86_BUILTIN_PSRAVV8DI: |
18924 | case IX86_BUILTIN_PSRAVV8HI: |
18925 | case IX86_BUILTIN_PSRAVV8SI: |
18926 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
18927 | rcode = ASHIFTRT; |
18928 | is_vshift = true; |
18929 | goto do_shift; |
18930 | case IX86_BUILTIN_PSRLVV16HI: |
18931 | case IX86_BUILTIN_PSRLVV16SI: |
18932 | case IX86_BUILTIN_PSRLVV2DI: |
18933 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
18934 | case IX86_BUILTIN_PSRLVV32HI: |
18935 | case IX86_BUILTIN_PSRLVV4DI: |
18936 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
18937 | case IX86_BUILTIN_PSRLVV4SI: |
18938 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
18939 | case IX86_BUILTIN_PSRLVV8DI: |
18940 | case IX86_BUILTIN_PSRLVV8HI: |
18941 | case IX86_BUILTIN_PSRLVV8SI: |
18942 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
18943 | rcode = LSHIFTRT; |
18944 | is_vshift = true; |
18945 | goto do_shift; |
18946 | |
18947 | do_shift: |
18948 | gcc_assert (n_args >= 2); |
18949 | if (TREE_CODE (args[0]) != VECTOR_CST) |
18950 | break; |
18951 | mask = HOST_WIDE_INT_M1U; |
18952 | if (n_args > 2) |
18953 | { |
18954 | /* This is masked shift. */ |
18955 | if (!tree_fits_uhwi_p (args[n_args - 1]) |
18956 | || TREE_SIDE_EFFECTS (args[n_args - 2])) |
18957 | break; |
18958 | mask = tree_to_uhwi (args[n_args - 1]); |
18959 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
18960 | mask |= HOST_WIDE_INT_M1U << elems; |
18961 | if (mask != HOST_WIDE_INT_M1U |
18962 | && TREE_CODE (args[n_args - 2]) != VECTOR_CST) |
18963 | break; |
18964 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
18965 | return args[n_args - 2]; |
18966 | } |
18967 | if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) |
18968 | break; |
18969 | if (tree tem = (is_vshift ? integer_one_node |
18970 | : ix86_vector_shift_count (arg1: args[1]))) |
18971 | { |
18972 | unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); |
18973 | unsigned HOST_WIDE_INT prec |
18974 | = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); |
18975 | if (count == 0 && mask == HOST_WIDE_INT_M1U) |
18976 | return args[0]; |
18977 | if (count >= prec) |
18978 | { |
18979 | if (rcode == ASHIFTRT) |
18980 | count = prec - 1; |
18981 | else if (mask == HOST_WIDE_INT_M1U) |
18982 | return build_zero_cst (TREE_TYPE (args[0])); |
18983 | } |
18984 | tree countt = NULL_TREE; |
18985 | if (!is_vshift) |
18986 | { |
18987 | if (count >= prec) |
18988 | countt = integer_zero_node; |
18989 | else |
18990 | countt = build_int_cst (integer_type_node, count); |
18991 | } |
18992 | tree_vector_builder builder; |
18993 | if (mask != HOST_WIDE_INT_M1U || is_vshift) |
18994 | builder.new_vector (TREE_TYPE (args[0]), |
18995 | npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), |
18996 | nelts_per_pattern: 1); |
18997 | else |
18998 | builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0], |
18999 | allow_stepped_p: false); |
19000 | unsigned int cnt = builder.encoded_nelts (); |
19001 | for (unsigned int i = 0; i < cnt; ++i) |
19002 | { |
19003 | tree elt = VECTOR_CST_ELT (args[0], i); |
19004 | if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) |
19005 | return NULL_TREE; |
19006 | tree type = TREE_TYPE (elt); |
19007 | if (rcode == LSHIFTRT) |
19008 | elt = fold_convert (unsigned_type_for (type), elt); |
19009 | if (is_vshift) |
19010 | { |
19011 | countt = VECTOR_CST_ELT (args[1], i); |
19012 | if (TREE_CODE (countt) != INTEGER_CST |
19013 | || TREE_OVERFLOW (countt)) |
19014 | return NULL_TREE; |
19015 | if (wi::neg_p (x: wi::to_wide (t: countt)) |
19016 | || wi::to_widest (t: countt) >= prec) |
19017 | { |
19018 | if (rcode == ASHIFTRT) |
19019 | countt = build_int_cst (TREE_TYPE (countt), |
19020 | prec - 1); |
19021 | else |
19022 | { |
19023 | elt = build_zero_cst (TREE_TYPE (elt)); |
19024 | countt = build_zero_cst (TREE_TYPE (countt)); |
19025 | } |
19026 | } |
19027 | } |
19028 | else if (count >= prec) |
19029 | elt = build_zero_cst (TREE_TYPE (elt)); |
19030 | elt = const_binop (rcode == ASHIFT |
19031 | ? LSHIFT_EXPR : RSHIFT_EXPR, |
19032 | TREE_TYPE (elt), elt, countt); |
19033 | if (!elt || TREE_CODE (elt) != INTEGER_CST) |
19034 | return NULL_TREE; |
19035 | if (rcode == LSHIFTRT) |
19036 | elt = fold_convert (type, elt); |
19037 | if ((mask & (HOST_WIDE_INT_1U << i)) == 0) |
19038 | { |
19039 | elt = VECTOR_CST_ELT (args[n_args - 2], i); |
19040 | if (TREE_CODE (elt) != INTEGER_CST |
19041 | || TREE_OVERFLOW (elt)) |
19042 | return NULL_TREE; |
19043 | } |
19044 | builder.quick_push (obj: elt); |
19045 | } |
19046 | return builder.build (); |
19047 | } |
19048 | break; |
19049 | |
19050 | default: |
19051 | break; |
19052 | } |
19053 | } |
19054 | |
19055 | #ifdef SUBTARGET_FOLD_BUILTIN |
19056 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); |
19057 | #endif |
19058 | |
19059 | return NULL_TREE; |
19060 | } |
19061 | |
19062 | /* Fold a MD builtin (use ix86_fold_builtin for folding into |
19063 | constant) in GIMPLE. */ |
19064 | |
19065 | bool |
19066 | ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) |
19067 | { |
19068 | gimple *stmt = gsi_stmt (i: *gsi), *g; |
19069 | gimple_seq stmts = NULL; |
19070 | tree fndecl = gimple_call_fndecl (gs: stmt); |
19071 | gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); |
19072 | int n_args = gimple_call_num_args (gs: stmt); |
19073 | enum ix86_builtins fn_code |
19074 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
19075 | tree decl = NULL_TREE; |
19076 | tree arg0, arg1, arg2; |
19077 | enum rtx_code rcode; |
19078 | enum tree_code tcode; |
19079 | unsigned HOST_WIDE_INT count; |
19080 | bool is_vshift; |
19081 | unsigned HOST_WIDE_INT elems; |
19082 | location_t loc; |
19083 | |
19084 | /* Don't fold when there's isa mismatch. */ |
19085 | if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL)) |
19086 | return false; |
19087 | |
19088 | switch (fn_code) |
19089 | { |
19090 | case IX86_BUILTIN_TZCNT32: |
19091 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
19092 | goto fold_tzcnt_lzcnt; |
19093 | |
19094 | case IX86_BUILTIN_TZCNT64: |
19095 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
19096 | goto fold_tzcnt_lzcnt; |
19097 | |
19098 | case IX86_BUILTIN_LZCNT32: |
19099 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ); |
19100 | goto fold_tzcnt_lzcnt; |
19101 | |
19102 | case IX86_BUILTIN_LZCNT64: |
19103 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL); |
19104 | goto fold_tzcnt_lzcnt; |
19105 | |
19106 | fold_tzcnt_lzcnt: |
19107 | gcc_assert (n_args == 1); |
19108 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19109 | if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt)) |
19110 | { |
19111 | int prec = TYPE_PRECISION (TREE_TYPE (arg0)); |
19112 | /* If arg0 is provably non-zero, optimize into generic |
19113 | __builtin_c[tl]z{,ll} function the middle-end handles |
19114 | better. */ |
19115 | if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec))) |
19116 | return false; |
19117 | |
19118 | loc = gimple_location (g: stmt); |
19119 | g = gimple_build_call (decl, 1, arg0); |
19120 | gimple_set_location (g, location: loc); |
19121 | tree lhs = make_ssa_name (integer_type_node); |
19122 | gimple_call_set_lhs (gs: g, lhs); |
19123 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
19124 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs); |
19125 | gimple_set_location (g, location: loc); |
19126 | gsi_replace (gsi, g, false); |
19127 | return true; |
19128 | } |
19129 | break; |
19130 | |
19131 | case IX86_BUILTIN_BZHI32: |
19132 | case IX86_BUILTIN_BZHI64: |
19133 | gcc_assert (n_args == 2); |
19134 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19135 | if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt)) |
19136 | { |
19137 | unsigned int idx = tree_to_uhwi (arg1) & 0xff; |
19138 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19139 | if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) |
19140 | break; |
19141 | loc = gimple_location (g: stmt); |
19142 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19143 | gimple_set_location (g, location: loc); |
19144 | gsi_replace (gsi, g, false); |
19145 | return true; |
19146 | } |
19147 | break; |
19148 | |
19149 | case IX86_BUILTIN_PDEP32: |
19150 | case IX86_BUILTIN_PDEP64: |
19151 | case IX86_BUILTIN_PEXT32: |
19152 | case IX86_BUILTIN_PEXT64: |
19153 | gcc_assert (n_args == 2); |
19154 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19155 | if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt)) |
19156 | { |
19157 | loc = gimple_location (g: stmt); |
19158 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19159 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19160 | gimple_set_location (g, location: loc); |
19161 | gsi_replace (gsi, g, false); |
19162 | return true; |
19163 | } |
19164 | break; |
19165 | |
19166 | case IX86_BUILTIN_PBLENDVB256: |
19167 | case IX86_BUILTIN_BLENDVPS256: |
19168 | case IX86_BUILTIN_BLENDVPD256: |
19169 | /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower |
19170 | to scalar operations and not combined back. */ |
19171 | if (!TARGET_AVX2) |
19172 | break; |
19173 | |
19174 | /* FALLTHRU. */ |
19175 | case IX86_BUILTIN_BLENDVPD: |
19176 | /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2, |
19177 | w/o sse4.2, it's veclowered to scalar operations and |
19178 | not combined back. */ |
19179 | if (!TARGET_SSE4_2) |
19180 | break; |
19181 | /* FALLTHRU. */ |
19182 | case IX86_BUILTIN_PBLENDVB128: |
19183 | case IX86_BUILTIN_BLENDVPS: |
19184 | gcc_assert (n_args == 3); |
19185 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19186 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19187 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19188 | if (gimple_call_lhs (gs: stmt)) |
19189 | { |
19190 | loc = gimple_location (g: stmt); |
19191 | tree type = TREE_TYPE (arg2); |
19192 | if (VECTOR_FLOAT_TYPE_P (type)) |
19193 | { |
19194 | tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode |
19195 | ? intSI_type_node : intDI_type_node; |
19196 | type = get_same_sized_vectype (itype, type); |
19197 | } |
19198 | else |
19199 | type = signed_type_for (type); |
19200 | arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2); |
19201 | tree zero_vec = build_zero_cst (type); |
19202 | tree cmp_type = truth_type_for (type); |
19203 | tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec); |
19204 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19205 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19206 | VEC_COND_EXPR, cmp, |
19207 | arg1, arg0); |
19208 | gimple_set_location (g, location: loc); |
19209 | gsi_replace (gsi, g, false); |
19210 | } |
19211 | else |
19212 | gsi_replace (gsi, gimple_build_nop (), false); |
19213 | return true; |
19214 | |
19215 | |
19216 | case IX86_BUILTIN_PCMPEQB128: |
19217 | case IX86_BUILTIN_PCMPEQW128: |
19218 | case IX86_BUILTIN_PCMPEQD128: |
19219 | case IX86_BUILTIN_PCMPEQQ: |
19220 | case IX86_BUILTIN_PCMPEQB256: |
19221 | case IX86_BUILTIN_PCMPEQW256: |
19222 | case IX86_BUILTIN_PCMPEQD256: |
19223 | case IX86_BUILTIN_PCMPEQQ256: |
19224 | tcode = EQ_EXPR; |
19225 | goto do_cmp; |
19226 | |
19227 | case IX86_BUILTIN_PCMPGTB128: |
19228 | case IX86_BUILTIN_PCMPGTW128: |
19229 | case IX86_BUILTIN_PCMPGTD128: |
19230 | case IX86_BUILTIN_PCMPGTQ: |
19231 | case IX86_BUILTIN_PCMPGTB256: |
19232 | case IX86_BUILTIN_PCMPGTW256: |
19233 | case IX86_BUILTIN_PCMPGTD256: |
19234 | case IX86_BUILTIN_PCMPGTQ256: |
19235 | tcode = GT_EXPR; |
19236 | |
19237 | do_cmp: |
19238 | gcc_assert (n_args == 2); |
19239 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19240 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19241 | if (gimple_call_lhs (gs: stmt)) |
19242 | { |
19243 | loc = gimple_location (g: stmt); |
19244 | tree type = TREE_TYPE (arg0); |
19245 | tree zero_vec = build_zero_cst (type); |
19246 | tree minus_one_vec = build_minus_one_cst (type); |
19247 | tree cmp_type = truth_type_for (type); |
19248 | tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1); |
19249 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19250 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19251 | VEC_COND_EXPR, cmp, |
19252 | minus_one_vec, zero_vec); |
19253 | gimple_set_location (g, location: loc); |
19254 | gsi_replace (gsi, g, false); |
19255 | } |
19256 | else |
19257 | gsi_replace (gsi, gimple_build_nop (), false); |
19258 | return true; |
19259 | |
19260 | case IX86_BUILTIN_PSLLD: |
19261 | case IX86_BUILTIN_PSLLD128: |
19262 | case IX86_BUILTIN_PSLLD128_MASK: |
19263 | case IX86_BUILTIN_PSLLD256: |
19264 | case IX86_BUILTIN_PSLLD256_MASK: |
19265 | case IX86_BUILTIN_PSLLD512: |
19266 | case IX86_BUILTIN_PSLLDI: |
19267 | case IX86_BUILTIN_PSLLDI128: |
19268 | case IX86_BUILTIN_PSLLDI128_MASK: |
19269 | case IX86_BUILTIN_PSLLDI256: |
19270 | case IX86_BUILTIN_PSLLDI256_MASK: |
19271 | case IX86_BUILTIN_PSLLDI512: |
19272 | case IX86_BUILTIN_PSLLQ: |
19273 | case IX86_BUILTIN_PSLLQ128: |
19274 | case IX86_BUILTIN_PSLLQ128_MASK: |
19275 | case IX86_BUILTIN_PSLLQ256: |
19276 | case IX86_BUILTIN_PSLLQ256_MASK: |
19277 | case IX86_BUILTIN_PSLLQ512: |
19278 | case IX86_BUILTIN_PSLLQI: |
19279 | case IX86_BUILTIN_PSLLQI128: |
19280 | case IX86_BUILTIN_PSLLQI128_MASK: |
19281 | case IX86_BUILTIN_PSLLQI256: |
19282 | case IX86_BUILTIN_PSLLQI256_MASK: |
19283 | case IX86_BUILTIN_PSLLQI512: |
19284 | case IX86_BUILTIN_PSLLW: |
19285 | case IX86_BUILTIN_PSLLW128: |
19286 | case IX86_BUILTIN_PSLLW128_MASK: |
19287 | case IX86_BUILTIN_PSLLW256: |
19288 | case IX86_BUILTIN_PSLLW256_MASK: |
19289 | case IX86_BUILTIN_PSLLW512_MASK: |
19290 | case IX86_BUILTIN_PSLLWI: |
19291 | case IX86_BUILTIN_PSLLWI128: |
19292 | case IX86_BUILTIN_PSLLWI128_MASK: |
19293 | case IX86_BUILTIN_PSLLWI256: |
19294 | case IX86_BUILTIN_PSLLWI256_MASK: |
19295 | case IX86_BUILTIN_PSLLWI512_MASK: |
19296 | rcode = ASHIFT; |
19297 | is_vshift = false; |
19298 | goto do_shift; |
19299 | case IX86_BUILTIN_PSRAD: |
19300 | case IX86_BUILTIN_PSRAD128: |
19301 | case IX86_BUILTIN_PSRAD128_MASK: |
19302 | case IX86_BUILTIN_PSRAD256: |
19303 | case IX86_BUILTIN_PSRAD256_MASK: |
19304 | case IX86_BUILTIN_PSRAD512: |
19305 | case IX86_BUILTIN_PSRADI: |
19306 | case IX86_BUILTIN_PSRADI128: |
19307 | case IX86_BUILTIN_PSRADI128_MASK: |
19308 | case IX86_BUILTIN_PSRADI256: |
19309 | case IX86_BUILTIN_PSRADI256_MASK: |
19310 | case IX86_BUILTIN_PSRADI512: |
19311 | case IX86_BUILTIN_PSRAQ128_MASK: |
19312 | case IX86_BUILTIN_PSRAQ256_MASK: |
19313 | case IX86_BUILTIN_PSRAQ512: |
19314 | case IX86_BUILTIN_PSRAQI128_MASK: |
19315 | case IX86_BUILTIN_PSRAQI256_MASK: |
19316 | case IX86_BUILTIN_PSRAQI512: |
19317 | case IX86_BUILTIN_PSRAW: |
19318 | case IX86_BUILTIN_PSRAW128: |
19319 | case IX86_BUILTIN_PSRAW128_MASK: |
19320 | case IX86_BUILTIN_PSRAW256: |
19321 | case IX86_BUILTIN_PSRAW256_MASK: |
19322 | case IX86_BUILTIN_PSRAW512: |
19323 | case IX86_BUILTIN_PSRAWI: |
19324 | case IX86_BUILTIN_PSRAWI128: |
19325 | case IX86_BUILTIN_PSRAWI128_MASK: |
19326 | case IX86_BUILTIN_PSRAWI256: |
19327 | case IX86_BUILTIN_PSRAWI256_MASK: |
19328 | case IX86_BUILTIN_PSRAWI512: |
19329 | rcode = ASHIFTRT; |
19330 | is_vshift = false; |
19331 | goto do_shift; |
19332 | case IX86_BUILTIN_PSRLD: |
19333 | case IX86_BUILTIN_PSRLD128: |
19334 | case IX86_BUILTIN_PSRLD128_MASK: |
19335 | case IX86_BUILTIN_PSRLD256: |
19336 | case IX86_BUILTIN_PSRLD256_MASK: |
19337 | case IX86_BUILTIN_PSRLD512: |
19338 | case IX86_BUILTIN_PSRLDI: |
19339 | case IX86_BUILTIN_PSRLDI128: |
19340 | case IX86_BUILTIN_PSRLDI128_MASK: |
19341 | case IX86_BUILTIN_PSRLDI256: |
19342 | case IX86_BUILTIN_PSRLDI256_MASK: |
19343 | case IX86_BUILTIN_PSRLDI512: |
19344 | case IX86_BUILTIN_PSRLQ: |
19345 | case IX86_BUILTIN_PSRLQ128: |
19346 | case IX86_BUILTIN_PSRLQ128_MASK: |
19347 | case IX86_BUILTIN_PSRLQ256: |
19348 | case IX86_BUILTIN_PSRLQ256_MASK: |
19349 | case IX86_BUILTIN_PSRLQ512: |
19350 | case IX86_BUILTIN_PSRLQI: |
19351 | case IX86_BUILTIN_PSRLQI128: |
19352 | case IX86_BUILTIN_PSRLQI128_MASK: |
19353 | case IX86_BUILTIN_PSRLQI256: |
19354 | case IX86_BUILTIN_PSRLQI256_MASK: |
19355 | case IX86_BUILTIN_PSRLQI512: |
19356 | case IX86_BUILTIN_PSRLW: |
19357 | case IX86_BUILTIN_PSRLW128: |
19358 | case IX86_BUILTIN_PSRLW128_MASK: |
19359 | case IX86_BUILTIN_PSRLW256: |
19360 | case IX86_BUILTIN_PSRLW256_MASK: |
19361 | case IX86_BUILTIN_PSRLW512: |
19362 | case IX86_BUILTIN_PSRLWI: |
19363 | case IX86_BUILTIN_PSRLWI128: |
19364 | case IX86_BUILTIN_PSRLWI128_MASK: |
19365 | case IX86_BUILTIN_PSRLWI256: |
19366 | case IX86_BUILTIN_PSRLWI256_MASK: |
19367 | case IX86_BUILTIN_PSRLWI512: |
19368 | rcode = LSHIFTRT; |
19369 | is_vshift = false; |
19370 | goto do_shift; |
19371 | case IX86_BUILTIN_PSLLVV16HI: |
19372 | case IX86_BUILTIN_PSLLVV16SI: |
19373 | case IX86_BUILTIN_PSLLVV2DI: |
19374 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
19375 | case IX86_BUILTIN_PSLLVV32HI: |
19376 | case IX86_BUILTIN_PSLLVV4DI: |
19377 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
19378 | case IX86_BUILTIN_PSLLVV4SI: |
19379 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
19380 | case IX86_BUILTIN_PSLLVV8DI: |
19381 | case IX86_BUILTIN_PSLLVV8HI: |
19382 | case IX86_BUILTIN_PSLLVV8SI: |
19383 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
19384 | rcode = ASHIFT; |
19385 | is_vshift = true; |
19386 | goto do_shift; |
19387 | case IX86_BUILTIN_PSRAVQ128: |
19388 | case IX86_BUILTIN_PSRAVQ256: |
19389 | case IX86_BUILTIN_PSRAVV16HI: |
19390 | case IX86_BUILTIN_PSRAVV16SI: |
19391 | case IX86_BUILTIN_PSRAVV32HI: |
19392 | case IX86_BUILTIN_PSRAVV4SI: |
19393 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
19394 | case IX86_BUILTIN_PSRAVV8DI: |
19395 | case IX86_BUILTIN_PSRAVV8HI: |
19396 | case IX86_BUILTIN_PSRAVV8SI: |
19397 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
19398 | rcode = ASHIFTRT; |
19399 | is_vshift = true; |
19400 | goto do_shift; |
19401 | case IX86_BUILTIN_PSRLVV16HI: |
19402 | case IX86_BUILTIN_PSRLVV16SI: |
19403 | case IX86_BUILTIN_PSRLVV2DI: |
19404 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
19405 | case IX86_BUILTIN_PSRLVV32HI: |
19406 | case IX86_BUILTIN_PSRLVV4DI: |
19407 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
19408 | case IX86_BUILTIN_PSRLVV4SI: |
19409 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
19410 | case IX86_BUILTIN_PSRLVV8DI: |
19411 | case IX86_BUILTIN_PSRLVV8HI: |
19412 | case IX86_BUILTIN_PSRLVV8SI: |
19413 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
19414 | rcode = LSHIFTRT; |
19415 | is_vshift = true; |
19416 | goto do_shift; |
19417 | |
19418 | do_shift: |
19419 | gcc_assert (n_args >= 2); |
19420 | if (!gimple_call_lhs (gs: stmt)) |
19421 | { |
19422 | gsi_replace (gsi, gimple_build_nop (), false); |
19423 | return true; |
19424 | } |
19425 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19426 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19427 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19428 | /* For masked shift, only optimize if the mask is all ones. */ |
19429 | if (n_args > 2 |
19430 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19431 | break; |
19432 | if (is_vshift) |
19433 | { |
19434 | if (TREE_CODE (arg1) != VECTOR_CST) |
19435 | break; |
19436 | count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); |
19437 | if (integer_zerop (arg1)) |
19438 | count = 0; |
19439 | else if (rcode == ASHIFTRT) |
19440 | break; |
19441 | else |
19442 | for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) |
19443 | { |
19444 | tree elt = VECTOR_CST_ELT (arg1, i); |
19445 | if (!wi::neg_p (x: wi::to_wide (t: elt)) |
19446 | && wi::to_widest (t: elt) < count) |
19447 | return false; |
19448 | } |
19449 | } |
19450 | else |
19451 | { |
19452 | arg1 = ix86_vector_shift_count (arg1); |
19453 | if (!arg1) |
19454 | break; |
19455 | count = tree_to_uhwi (arg1); |
19456 | } |
19457 | if (count == 0) |
19458 | { |
19459 | /* Just return the first argument for shift by 0. */ |
19460 | loc = gimple_location (g: stmt); |
19461 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19462 | gimple_set_location (g, location: loc); |
19463 | gsi_replace (gsi, g, false); |
19464 | return true; |
19465 | } |
19466 | if (rcode != ASHIFTRT |
19467 | && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) |
19468 | { |
19469 | /* For shift counts equal or greater than precision, except for |
19470 | arithmetic right shift the result is zero. */ |
19471 | loc = gimple_location (g: stmt); |
19472 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19473 | build_zero_cst (TREE_TYPE (arg0))); |
19474 | gimple_set_location (g, location: loc); |
19475 | gsi_replace (gsi, g, false); |
19476 | return true; |
19477 | } |
19478 | break; |
19479 | |
19480 | case IX86_BUILTIN_SHUFPD512: |
19481 | case IX86_BUILTIN_SHUFPS512: |
19482 | case IX86_BUILTIN_SHUFPD: |
19483 | case IX86_BUILTIN_SHUFPD256: |
19484 | case IX86_BUILTIN_SHUFPS: |
19485 | case IX86_BUILTIN_SHUFPS256: |
19486 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19487 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19488 | /* This is masked shuffle. Only optimize if the mask is all ones. */ |
19489 | if (n_args > 3 |
19490 | && !ix86_masked_all_ones (elems, |
19491 | arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19492 | break; |
19493 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19494 | if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt)) |
19495 | { |
19496 | unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2); |
19497 | /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */ |
19498 | if (shuffle_mask > 255) |
19499 | return false; |
19500 | |
19501 | machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))); |
19502 | loc = gimple_location (g: stmt); |
19503 | tree itype = (imode == E_DFmode |
19504 | ? long_long_integer_type_node : integer_type_node); |
19505 | tree vtype = build_vector_type (itype, elems); |
19506 | tree_vector_builder elts (vtype, elems, 1); |
19507 | |
19508 | |
19509 | /* Transform integer shuffle_mask to vector perm_mask which |
19510 | is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */ |
19511 | for (unsigned i = 0; i != elems; i++) |
19512 | { |
19513 | unsigned sel_idx; |
19514 | /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6]) |
19515 | provide 2 select constrols for each element of the |
19516 | destination. */ |
19517 | if (imode == E_DFmode) |
19518 | sel_idx = (i & 1) * elems + (i & ~1) |
19519 | + ((shuffle_mask >> i) & 1); |
19520 | else |
19521 | { |
19522 | /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select |
19523 | controls for each element of the destination. */ |
19524 | unsigned j = i % 4; |
19525 | sel_idx = ((i >> 1) & 1) * elems + (i & ~3) |
19526 | + ((shuffle_mask >> 2 * j) & 3); |
19527 | } |
19528 | elts.quick_push (obj: build_int_cst (itype, sel_idx)); |
19529 | } |
19530 | |
19531 | tree perm_mask = elts.build (); |
19532 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19533 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19534 | VEC_PERM_EXPR, |
19535 | arg0, arg1, perm_mask); |
19536 | gimple_set_location (g, location: loc); |
19537 | gsi_replace (gsi, g, false); |
19538 | return true; |
19539 | } |
19540 | // Do not error yet, the constant could be propagated later? |
19541 | break; |
19542 | |
19543 | case IX86_BUILTIN_PABSB: |
19544 | case IX86_BUILTIN_PABSW: |
19545 | case IX86_BUILTIN_PABSD: |
19546 | /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ |
19547 | if (!TARGET_MMX_WITH_SSE) |
19548 | break; |
19549 | /* FALLTHRU. */ |
19550 | case IX86_BUILTIN_PABSB128: |
19551 | case IX86_BUILTIN_PABSB256: |
19552 | case IX86_BUILTIN_PABSB512: |
19553 | case IX86_BUILTIN_PABSW128: |
19554 | case IX86_BUILTIN_PABSW256: |
19555 | case IX86_BUILTIN_PABSW512: |
19556 | case IX86_BUILTIN_PABSD128: |
19557 | case IX86_BUILTIN_PABSD256: |
19558 | case IX86_BUILTIN_PABSD512: |
19559 | case IX86_BUILTIN_PABSQ128: |
19560 | case IX86_BUILTIN_PABSQ256: |
19561 | case IX86_BUILTIN_PABSQ512: |
19562 | case IX86_BUILTIN_PABSB128_MASK: |
19563 | case IX86_BUILTIN_PABSB256_MASK: |
19564 | case IX86_BUILTIN_PABSW128_MASK: |
19565 | case IX86_BUILTIN_PABSW256_MASK: |
19566 | case IX86_BUILTIN_PABSD128_MASK: |
19567 | case IX86_BUILTIN_PABSD256_MASK: |
19568 | gcc_assert (n_args >= 1); |
19569 | if (!gimple_call_lhs (gs: stmt)) |
19570 | { |
19571 | gsi_replace (gsi, gimple_build_nop (), false); |
19572 | return true; |
19573 | } |
19574 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19575 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19576 | /* For masked ABS, only optimize if the mask is all ones. */ |
19577 | if (n_args > 1 |
19578 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19579 | break; |
19580 | { |
19581 | tree utype, ures, vce; |
19582 | utype = unsigned_type_for (TREE_TYPE (arg0)); |
19583 | /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR |
19584 | instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ |
19585 | ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0); |
19586 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19587 | loc = gimple_location (g: stmt); |
19588 | vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); |
19589 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19590 | VIEW_CONVERT_EXPR, vce); |
19591 | gsi_replace (gsi, g, false); |
19592 | } |
19593 | return true; |
19594 | |
19595 | default: |
19596 | break; |
19597 | } |
19598 | |
19599 | return false; |
19600 | } |
19601 | |
19602 | /* Handler for an SVML-style interface to |
19603 | a library with vectorized intrinsics. */ |
19604 | |
19605 | tree |
19606 | ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) |
19607 | { |
19608 | char name[20]; |
19609 | tree fntype, new_fndecl, args; |
19610 | unsigned arity; |
19611 | const char *bname; |
19612 | machine_mode el_mode, in_mode; |
19613 | int n, in_n; |
19614 | |
19615 | /* The SVML is suitable for unsafe math only. */ |
19616 | if (!flag_unsafe_math_optimizations) |
19617 | return NULL_TREE; |
19618 | |
19619 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
19620 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
19621 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
19622 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
19623 | if (el_mode != in_mode |
19624 | || n != in_n) |
19625 | return NULL_TREE; |
19626 | |
19627 | switch (fn) |
19628 | { |
19629 | CASE_CFN_EXP: |
19630 | CASE_CFN_LOG: |
19631 | CASE_CFN_LOG10: |
19632 | CASE_CFN_POW: |
19633 | CASE_CFN_TANH: |
19634 | CASE_CFN_TAN: |
19635 | CASE_CFN_ATAN: |
19636 | CASE_CFN_ATAN2: |
19637 | CASE_CFN_ATANH: |
19638 | CASE_CFN_CBRT: |
19639 | CASE_CFN_SINH: |
19640 | CASE_CFN_SIN: |
19641 | CASE_CFN_ASINH: |
19642 | CASE_CFN_ASIN: |
19643 | CASE_CFN_COSH: |
19644 | CASE_CFN_COS: |
19645 | CASE_CFN_ACOSH: |
19646 | CASE_CFN_ACOS: |
19647 | if ((el_mode != DFmode || n != 2) |
19648 | && (el_mode != SFmode || n != 4)) |
19649 | return NULL_TREE; |
19650 | break; |
19651 | |
19652 | default: |
19653 | return NULL_TREE; |
19654 | } |
19655 | |
19656 | tree fndecl = mathfn_built_in (el_mode == DFmode |
19657 | ? double_type_node : float_type_node, fn); |
19658 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
19659 | |
19660 | if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF) |
19661 | strcpy (dest: name, src: "vmlsLn4" ); |
19662 | else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG) |
19663 | strcpy (dest: name, src: "vmldLn2" ); |
19664 | else if (n == 4) |
19665 | { |
19666 | sprintf (s: name, format: "vmls%s" , bname+10); |
19667 | name[strlen (s: name)-1] = '4'; |
19668 | } |
19669 | else |
19670 | sprintf (s: name, format: "vmld%s2" , bname+10); |
19671 | |
19672 | /* Convert to uppercase. */ |
19673 | name[4] &= ~0x20; |
19674 | |
19675 | arity = 0; |
19676 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
19677 | arity++; |
19678 | |
19679 | if (arity == 1) |
19680 | fntype = build_function_type_list (type_out, type_in, NULL); |
19681 | else |
19682 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
19683 | |
19684 | /* Build a function declaration for the vectorized function. */ |
19685 | new_fndecl = build_decl (BUILTINS_LOCATION, |
19686 | FUNCTION_DECL, get_identifier (name), fntype); |
19687 | TREE_PUBLIC (new_fndecl) = 1; |
19688 | DECL_EXTERNAL (new_fndecl) = 1; |
19689 | DECL_IS_NOVOPS (new_fndecl) = 1; |
19690 | TREE_READONLY (new_fndecl) = 1; |
19691 | |
19692 | return new_fndecl; |
19693 | } |
19694 | |
19695 | /* Handler for an ACML-style interface to |
19696 | a library with vectorized intrinsics. */ |
19697 | |
19698 | tree |
19699 | ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) |
19700 | { |
19701 | char name[20] = "__vr.._" ; |
19702 | tree fntype, new_fndecl, args; |
19703 | unsigned arity; |
19704 | const char *bname; |
19705 | machine_mode el_mode, in_mode; |
19706 | int n, in_n; |
19707 | |
19708 | /* The ACML is 64bits only and suitable for unsafe math only as |
19709 | it does not correctly support parts of IEEE with the required |
19710 | precision such as denormals. */ |
19711 | if (!TARGET_64BIT |
19712 | || !flag_unsafe_math_optimizations) |
19713 | return NULL_TREE; |
19714 | |
19715 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
19716 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
19717 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
19718 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
19719 | if (el_mode != in_mode |
19720 | || n != in_n) |
19721 | return NULL_TREE; |
19722 | |
19723 | switch (fn) |
19724 | { |
19725 | CASE_CFN_SIN: |
19726 | CASE_CFN_COS: |
19727 | CASE_CFN_EXP: |
19728 | CASE_CFN_LOG: |
19729 | CASE_CFN_LOG2: |
19730 | CASE_CFN_LOG10: |
19731 | if (el_mode == DFmode && n == 2) |
19732 | { |
19733 | name[4] = 'd'; |
19734 | name[5] = '2'; |
19735 | } |
19736 | else if (el_mode == SFmode && n == 4) |
19737 | { |
19738 | name[4] = 's'; |
19739 | name[5] = '4'; |
19740 | } |
19741 | else |
19742 | return NULL_TREE; |
19743 | break; |
19744 | |
19745 | default: |
19746 | return NULL_TREE; |
19747 | } |
19748 | |
19749 | tree fndecl = mathfn_built_in (el_mode == DFmode |
19750 | ? double_type_node : float_type_node, fn); |
19751 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
19752 | sprintf (s: name + 7, format: "%s" , bname+10); |
19753 | |
19754 | arity = 0; |
19755 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
19756 | arity++; |
19757 | |
19758 | if (arity == 1) |
19759 | fntype = build_function_type_list (type_out, type_in, NULL); |
19760 | else |
19761 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
19762 | |
19763 | /* Build a function declaration for the vectorized function. */ |
19764 | new_fndecl = build_decl (BUILTINS_LOCATION, |
19765 | FUNCTION_DECL, get_identifier (name), fntype); |
19766 | TREE_PUBLIC (new_fndecl) = 1; |
19767 | DECL_EXTERNAL (new_fndecl) = 1; |
19768 | DECL_IS_NOVOPS (new_fndecl) = 1; |
19769 | TREE_READONLY (new_fndecl) = 1; |
19770 | |
19771 | return new_fndecl; |
19772 | } |
19773 | |
19774 | /* Returns a decl of a function that implements scatter store with |
19775 | register type VECTYPE and index type INDEX_TYPE and SCALE. |
19776 | Return NULL_TREE if it is not available. */ |
19777 | |
19778 | static tree |
19779 | ix86_vectorize_builtin_scatter (const_tree vectype, |
19780 | const_tree index_type, int scale) |
19781 | { |
19782 | bool si; |
19783 | enum ix86_builtins code; |
19784 | const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype)); |
19785 | |
19786 | if (!TARGET_AVX512F) |
19787 | return NULL_TREE; |
19788 | |
19789 | if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64) |
19790 | return NULL_TREE; |
19791 | |
19792 | if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) |
19793 | ? !TARGET_USE_SCATTER_2PARTS |
19794 | : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) |
19795 | ? !TARGET_USE_SCATTER_4PARTS |
19796 | : !TARGET_USE_SCATTER_8PARTS)) |
19797 | return NULL_TREE; |
19798 | |
19799 | if ((TREE_CODE (index_type) != INTEGER_TYPE |
19800 | && !POINTER_TYPE_P (index_type)) |
19801 | || (TYPE_MODE (index_type) != SImode |
19802 | && TYPE_MODE (index_type) != DImode)) |
19803 | return NULL_TREE; |
19804 | |
19805 | if (TYPE_PRECISION (index_type) > POINTER_SIZE) |
19806 | return NULL_TREE; |
19807 | |
19808 | /* v*scatter* insn sign extends index to pointer mode. */ |
19809 | if (TYPE_PRECISION (index_type) < POINTER_SIZE |
19810 | && TYPE_UNSIGNED (index_type)) |
19811 | return NULL_TREE; |
19812 | |
19813 | /* Scale can be 1, 2, 4 or 8. */ |
19814 | if (scale <= 0 |
19815 | || scale > 8 |
19816 | || (scale & (scale - 1)) != 0) |
19817 | return NULL_TREE; |
19818 | |
19819 | si = TYPE_MODE (index_type) == SImode; |
19820 | switch (TYPE_MODE (vectype)) |
19821 | { |
19822 | case E_V8DFmode: |
19823 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; |
19824 | break; |
19825 | case E_V8DImode: |
19826 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; |
19827 | break; |
19828 | case E_V16SFmode: |
19829 | code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; |
19830 | break; |
19831 | case E_V16SImode: |
19832 | code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; |
19833 | break; |
19834 | case E_V4DFmode: |
19835 | if (TARGET_AVX512VL) |
19836 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; |
19837 | else |
19838 | return NULL_TREE; |
19839 | break; |
19840 | case E_V4DImode: |
19841 | if (TARGET_AVX512VL) |
19842 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; |
19843 | else |
19844 | return NULL_TREE; |
19845 | break; |
19846 | case E_V8SFmode: |
19847 | if (TARGET_AVX512VL) |
19848 | code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; |
19849 | else |
19850 | return NULL_TREE; |
19851 | break; |
19852 | case E_V8SImode: |
19853 | if (TARGET_AVX512VL) |
19854 | code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; |
19855 | else |
19856 | return NULL_TREE; |
19857 | break; |
19858 | case E_V2DFmode: |
19859 | if (TARGET_AVX512VL) |
19860 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; |
19861 | else |
19862 | return NULL_TREE; |
19863 | break; |
19864 | case E_V2DImode: |
19865 | if (TARGET_AVX512VL) |
19866 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; |
19867 | else |
19868 | return NULL_TREE; |
19869 | break; |
19870 | case E_V4SFmode: |
19871 | if (TARGET_AVX512VL) |
19872 | code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; |
19873 | else |
19874 | return NULL_TREE; |
19875 | break; |
19876 | case E_V4SImode: |
19877 | if (TARGET_AVX512VL) |
19878 | code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; |
19879 | else |
19880 | return NULL_TREE; |
19881 | break; |
19882 | default: |
19883 | return NULL_TREE; |
19884 | } |
19885 | |
19886 | return get_ix86_builtin (c: code); |
19887 | } |
19888 | |
19889 | /* Return true if it is safe to use the rsqrt optabs to optimize |
19890 | 1.0/sqrt. */ |
19891 | |
19892 | static bool |
19893 | use_rsqrt_p (machine_mode mode) |
19894 | { |
19895 | return ((mode == HFmode |
19896 | || (TARGET_SSE && TARGET_SSE_MATH)) |
19897 | && flag_finite_math_only |
19898 | && !flag_trapping_math |
19899 | && flag_unsafe_math_optimizations); |
19900 | } |
19901 | |
19902 | /* Helper for avx_vpermilps256_operand et al. This is also used by |
19903 | the expansion functions to turn the parallel back into a mask. |
19904 | The return value is 0 for no match and the imm8+1 for a match. */ |
19905 | |
19906 | int |
19907 | avx_vpermilp_parallel (rtx par, machine_mode mode) |
19908 | { |
19909 | unsigned i, nelt = GET_MODE_NUNITS (mode); |
19910 | unsigned mask = 0; |
19911 | unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ |
19912 | |
19913 | if (XVECLEN (par, 0) != (int) nelt) |
19914 | return 0; |
19915 | |
19916 | /* Validate that all of the elements are constants, and not totally |
19917 | out of range. Copy the data into an integral array to make the |
19918 | subsequent checks easier. */ |
19919 | for (i = 0; i < nelt; ++i) |
19920 | { |
19921 | rtx er = XVECEXP (par, 0, i); |
19922 | unsigned HOST_WIDE_INT ei; |
19923 | |
19924 | if (!CONST_INT_P (er)) |
19925 | return 0; |
19926 | ei = INTVAL (er); |
19927 | if (ei >= nelt) |
19928 | return 0; |
19929 | ipar[i] = ei; |
19930 | } |
19931 | |
19932 | switch (mode) |
19933 | { |
19934 | case E_V8DFmode: |
19935 | /* In the 512-bit DFmode case, we can only move elements within |
19936 | a 128-bit lane. First fill the second part of the mask, |
19937 | then fallthru. */ |
19938 | for (i = 4; i < 6; ++i) |
19939 | { |
19940 | if (ipar[i] < 4 || ipar[i] >= 6) |
19941 | return 0; |
19942 | mask |= (ipar[i] - 4) << i; |
19943 | } |
19944 | for (i = 6; i < 8; ++i) |
19945 | { |
19946 | if (ipar[i] < 6) |
19947 | return 0; |
19948 | mask |= (ipar[i] - 6) << i; |
19949 | } |
19950 | /* FALLTHRU */ |
19951 | |
19952 | case E_V4DFmode: |
19953 | /* In the 256-bit DFmode case, we can only move elements within |
19954 | a 128-bit lane. */ |
19955 | for (i = 0; i < 2; ++i) |
19956 | { |
19957 | if (ipar[i] >= 2) |
19958 | return 0; |
19959 | mask |= ipar[i] << i; |
19960 | } |
19961 | for (i = 2; i < 4; ++i) |
19962 | { |
19963 | if (ipar[i] < 2) |
19964 | return 0; |
19965 | mask |= (ipar[i] - 2) << i; |
19966 | } |
19967 | break; |
19968 | |
19969 | case E_V16SFmode: |
19970 | /* In 512 bit SFmode case, permutation in the upper 256 bits |
19971 | must mirror the permutation in the lower 256-bits. */ |
19972 | for (i = 0; i < 8; ++i) |
19973 | if (ipar[i] + 8 != ipar[i + 8]) |
19974 | return 0; |
19975 | /* FALLTHRU */ |
19976 | |
19977 | case E_V8SFmode: |
19978 | /* In 256 bit SFmode case, we have full freedom of |
19979 | movement within the low 128-bit lane, but the high 128-bit |
19980 | lane must mirror the exact same pattern. */ |
19981 | for (i = 0; i < 4; ++i) |
19982 | if (ipar[i] + 4 != ipar[i + 4]) |
19983 | return 0; |
19984 | nelt = 4; |
19985 | /* FALLTHRU */ |
19986 | |
19987 | case E_V2DFmode: |
19988 | case E_V4SFmode: |
19989 | /* In the 128-bit case, we've full freedom in the placement of |
19990 | the elements from the source operand. */ |
19991 | for (i = 0; i < nelt; ++i) |
19992 | mask |= ipar[i] << (i * (nelt / 2)); |
19993 | break; |
19994 | |
19995 | default: |
19996 | gcc_unreachable (); |
19997 | } |
19998 | |
19999 | /* Make sure success has a non-zero value by adding one. */ |
20000 | return mask + 1; |
20001 | } |
20002 | |
20003 | /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by |
20004 | the expansion functions to turn the parallel back into a mask. |
20005 | The return value is 0 for no match and the imm8+1 for a match. */ |
20006 | |
20007 | int |
20008 | avx_vperm2f128_parallel (rtx par, machine_mode mode) |
20009 | { |
20010 | unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; |
20011 | unsigned mask = 0; |
20012 | unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ |
20013 | |
20014 | if (XVECLEN (par, 0) != (int) nelt) |
20015 | return 0; |
20016 | |
20017 | /* Validate that all of the elements are constants, and not totally |
20018 | out of range. Copy the data into an integral array to make the |
20019 | subsequent checks easier. */ |
20020 | for (i = 0; i < nelt; ++i) |
20021 | { |
20022 | rtx er = XVECEXP (par, 0, i); |
20023 | unsigned HOST_WIDE_INT ei; |
20024 | |
20025 | if (!CONST_INT_P (er)) |
20026 | return 0; |
20027 | ei = INTVAL (er); |
20028 | if (ei >= 2 * nelt) |
20029 | return 0; |
20030 | ipar[i] = ei; |
20031 | } |
20032 | |
20033 | /* Validate that the halves of the permute are halves. */ |
20034 | for (i = 0; i < nelt2 - 1; ++i) |
20035 | if (ipar[i] + 1 != ipar[i + 1]) |
20036 | return 0; |
20037 | for (i = nelt2; i < nelt - 1; ++i) |
20038 | if (ipar[i] + 1 != ipar[i + 1]) |
20039 | return 0; |
20040 | |
20041 | /* Reconstruct the mask. */ |
20042 | for (i = 0; i < 2; ++i) |
20043 | { |
20044 | unsigned e = ipar[i * nelt2]; |
20045 | if (e % nelt2) |
20046 | return 0; |
20047 | e /= nelt2; |
20048 | mask |= e << (i * 4); |
20049 | } |
20050 | |
20051 | /* Make sure success has a non-zero value by adding one. */ |
20052 | return mask + 1; |
20053 | } |
20054 | |
20055 | /* Return a mask of VPTERNLOG operands that do not affect output. */ |
20056 | |
20057 | int |
20058 | vpternlog_redundant_operand_mask (rtx pternlog_imm) |
20059 | { |
20060 | int mask = 0; |
20061 | int imm8 = INTVAL (pternlog_imm); |
20062 | |
20063 | if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F)) |
20064 | mask |= 1; |
20065 | if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) |
20066 | mask |= 2; |
20067 | if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) |
20068 | mask |= 4; |
20069 | |
20070 | return mask; |
20071 | } |
20072 | |
20073 | /* Eliminate false dependencies on operands that do not affect output |
20074 | by substituting other operands of a VPTERNLOG. */ |
20075 | |
20076 | void |
20077 | substitute_vpternlog_operands (rtx *operands) |
20078 | { |
20079 | int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]); |
20080 | |
20081 | if (mask & 1) /* The first operand is redundant. */ |
20082 | operands[1] = operands[2]; |
20083 | |
20084 | if (mask & 2) /* The second operand is redundant. */ |
20085 | operands[2] = operands[1]; |
20086 | |
20087 | if (mask & 4) /* The third operand is redundant. */ |
20088 | operands[3] = operands[1]; |
20089 | else if (REG_P (operands[3])) |
20090 | { |
20091 | if (mask & 1) |
20092 | operands[1] = operands[3]; |
20093 | if (mask & 2) |
20094 | operands[2] = operands[3]; |
20095 | } |
20096 | } |
20097 | |
20098 | /* Return a register priority for hard reg REGNO. */ |
20099 | static int |
20100 | ix86_register_priority (int hard_regno) |
20101 | { |
20102 | /* ebp and r13 as the base always wants a displacement, r12 as the |
20103 | base always wants an index. So discourage their usage in an |
20104 | address. */ |
20105 | if (hard_regno == R12_REG || hard_regno == R13_REG) |
20106 | return 0; |
20107 | if (hard_regno == BP_REG) |
20108 | return 1; |
20109 | /* New x86-64 int registers result in bigger code size. Discourage them. */ |
20110 | if (REX_INT_REGNO_P (hard_regno)) |
20111 | return 2; |
20112 | if (REX2_INT_REGNO_P (hard_regno)) |
20113 | return 2; |
20114 | /* New x86-64 SSE registers result in bigger code size. Discourage them. */ |
20115 | if (REX_SSE_REGNO_P (hard_regno)) |
20116 | return 2; |
20117 | if (EXT_REX_SSE_REGNO_P (hard_regno)) |
20118 | return 1; |
20119 | /* Usage of AX register results in smaller code. Prefer it. */ |
20120 | if (hard_regno == AX_REG) |
20121 | return 4; |
20122 | return 3; |
20123 | } |
20124 | |
20125 | /* Implement TARGET_PREFERRED_RELOAD_CLASS. |
20126 | |
20127 | Put float CONST_DOUBLE in the constant pool instead of fp regs. |
20128 | QImode must go into class Q_REGS. |
20129 | Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
20130 | movdf to do mem-to-mem moves through integer regs. */ |
20131 | |
20132 | static reg_class_t |
20133 | ix86_preferred_reload_class (rtx x, reg_class_t regclass) |
20134 | { |
20135 | machine_mode mode = GET_MODE (x); |
20136 | |
20137 | /* We're only allowed to return a subclass of CLASS. Many of the |
20138 | following checks fail for NO_REGS, so eliminate that early. */ |
20139 | if (regclass == NO_REGS) |
20140 | return NO_REGS; |
20141 | |
20142 | /* All classes can load zeros. */ |
20143 | if (x == CONST0_RTX (mode)) |
20144 | return regclass; |
20145 | |
20146 | /* Force constants into memory if we are loading a (nonzero) constant into |
20147 | an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK |
20148 | instructions to load from a constant. */ |
20149 | if (CONSTANT_P (x) |
20150 | && (MAYBE_MMX_CLASS_P (regclass) |
20151 | || MAYBE_SSE_CLASS_P (regclass) |
20152 | || MAYBE_MASK_CLASS_P (regclass))) |
20153 | return NO_REGS; |
20154 | |
20155 | /* Floating-point constants need more complex checks. */ |
20156 | if (CONST_DOUBLE_P (x)) |
20157 | { |
20158 | /* General regs can load everything. */ |
20159 | if (INTEGER_CLASS_P (regclass)) |
20160 | return regclass; |
20161 | |
20162 | /* Floats can load 0 and 1 plus some others. Note that we eliminated |
20163 | zero above. We only want to wind up preferring 80387 registers if |
20164 | we plan on doing computation with them. */ |
20165 | if (IS_STACK_MODE (mode) |
20166 | && standard_80387_constant_p (x) > 0) |
20167 | { |
20168 | /* Limit class to FP regs. */ |
20169 | if (FLOAT_CLASS_P (regclass)) |
20170 | return FLOAT_REGS; |
20171 | } |
20172 | |
20173 | return NO_REGS; |
20174 | } |
20175 | |
20176 | /* Prefer SSE if we can use them for math. Also allow integer regs |
20177 | when moves between register units are cheap. */ |
20178 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20179 | { |
20180 | if (TARGET_INTER_UNIT_MOVES_FROM_VEC |
20181 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
20182 | && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)) |
20183 | return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20184 | else |
20185 | return SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20186 | } |
20187 | |
20188 | /* Generally when we see PLUS here, it's the function invariant |
20189 | (plus soft-fp const_int). Which can only be computed into general |
20190 | regs. */ |
20191 | if (GET_CODE (x) == PLUS) |
20192 | return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; |
20193 | |
20194 | /* QImode constants are easy to load, but non-constant QImode data |
20195 | must go into Q_REGS or ALL_MASK_REGS. */ |
20196 | if (GET_MODE (x) == QImode && !CONSTANT_P (x)) |
20197 | { |
20198 | if (Q_CLASS_P (regclass)) |
20199 | return regclass; |
20200 | else if (reg_class_subset_p (Q_REGS, regclass)) |
20201 | return Q_REGS; |
20202 | else if (MASK_CLASS_P (regclass)) |
20203 | return regclass; |
20204 | else |
20205 | return NO_REGS; |
20206 | } |
20207 | |
20208 | return regclass; |
20209 | } |
20210 | |
20211 | /* Discourage putting floating-point values in SSE registers unless |
20212 | SSE math is being used, and likewise for the 387 registers. */ |
20213 | static reg_class_t |
20214 | ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) |
20215 | { |
20216 | /* Restrict the output reload class to the register bank that we are doing |
20217 | math on. If we would like not to return a subset of CLASS, reject this |
20218 | alternative: if reload cannot do this, it will still use its choice. */ |
20219 | machine_mode mode = GET_MODE (x); |
20220 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20221 | return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; |
20222 | |
20223 | if (IS_STACK_MODE (mode)) |
20224 | return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; |
20225 | |
20226 | return regclass; |
20227 | } |
20228 | |
20229 | static reg_class_t |
20230 | ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, |
20231 | machine_mode mode, secondary_reload_info *sri) |
20232 | { |
20233 | /* Double-word spills from general registers to non-offsettable memory |
20234 | references (zero-extended addresses) require special handling. */ |
20235 | if (TARGET_64BIT |
20236 | && MEM_P (x) |
20237 | && GET_MODE_SIZE (mode) > UNITS_PER_WORD |
20238 | && INTEGER_CLASS_P (rclass) |
20239 | && !offsettable_memref_p (x)) |
20240 | { |
20241 | sri->icode = (in_p |
20242 | ? CODE_FOR_reload_noff_load |
20243 | : CODE_FOR_reload_noff_store); |
20244 | /* Add the cost of moving address to a temporary. */ |
20245 | sri->extra_cost = 1; |
20246 | |
20247 | return NO_REGS; |
20248 | } |
20249 | |
20250 | /* QImode spills from non-QI registers require |
20251 | intermediate register on 32bit targets. */ |
20252 | if (mode == QImode |
20253 | && ((!TARGET_64BIT && !in_p |
20254 | && INTEGER_CLASS_P (rclass) |
20255 | && MAYBE_NON_Q_CLASS_P (rclass)) |
20256 | || (!TARGET_AVX512DQ |
20257 | && MAYBE_MASK_CLASS_P (rclass)))) |
20258 | { |
20259 | int regno = true_regnum (x); |
20260 | |
20261 | /* Return Q_REGS if the operand is in memory. */ |
20262 | if (regno == -1) |
20263 | return Q_REGS; |
20264 | |
20265 | return NO_REGS; |
20266 | } |
20267 | |
20268 | /* Require movement to gpr, and then store to memory. */ |
20269 | if ((mode == HFmode || mode == HImode || mode == V2QImode |
20270 | || mode == BFmode) |
20271 | && !TARGET_SSE4_1 |
20272 | && SSE_CLASS_P (rclass) |
20273 | && !in_p && MEM_P (x)) |
20274 | { |
20275 | sri->extra_cost = 1; |
20276 | return GENERAL_REGS; |
20277 | } |
20278 | |
20279 | /* This condition handles corner case where an expression involving |
20280 | pointers gets vectorized. We're trying to use the address of a |
20281 | stack slot as a vector initializer. |
20282 | |
20283 | (set (reg:V2DI 74 [ vect_cst_.2 ]) |
20284 | (vec_duplicate:V2DI (reg/f:DI 20 frame))) |
20285 | |
20286 | Eventually frame gets turned into sp+offset like this: |
20287 | |
20288 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20289 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20290 | (const_int 392 [0x188])))) |
20291 | |
20292 | That later gets turned into: |
20293 | |
20294 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20295 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20296 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) |
20297 | |
20298 | We'll have the following reload recorded: |
20299 | |
20300 | Reload 0: reload_in (DI) = |
20301 | (plus:DI (reg/f:DI 7 sp) |
20302 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) |
20303 | reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20304 | SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine |
20305 | reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) |
20306 | reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20307 | reload_reg_rtx: (reg:V2DI 22 xmm1) |
20308 | |
20309 | Which isn't going to work since SSE instructions can't handle scalar |
20310 | additions. Returning GENERAL_REGS forces the addition into integer |
20311 | register and reload can handle subsequent reloads without problems. */ |
20312 | |
20313 | if (in_p && GET_CODE (x) == PLUS |
20314 | && SSE_CLASS_P (rclass) |
20315 | && SCALAR_INT_MODE_P (mode)) |
20316 | return GENERAL_REGS; |
20317 | |
20318 | return NO_REGS; |
20319 | } |
20320 | |
20321 | /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ |
20322 | |
20323 | static bool |
20324 | ix86_class_likely_spilled_p (reg_class_t rclass) |
20325 | { |
20326 | switch (rclass) |
20327 | { |
20328 | case AREG: |
20329 | case DREG: |
20330 | case CREG: |
20331 | case BREG: |
20332 | case AD_REGS: |
20333 | case SIREG: |
20334 | case DIREG: |
20335 | case SSE_FIRST_REG: |
20336 | case FP_TOP_REG: |
20337 | case FP_SECOND_REG: |
20338 | return true; |
20339 | |
20340 | default: |
20341 | break; |
20342 | } |
20343 | |
20344 | return false; |
20345 | } |
20346 | |
20347 | /* Return true if a set of DST by the expression SRC should be allowed. |
20348 | This prevents complex sets of likely_spilled hard regs before reload. */ |
20349 | |
20350 | bool |
20351 | ix86_hardreg_mov_ok (rtx dst, rtx src) |
20352 | { |
20353 | /* Avoid complex sets of likely_spilled hard registers before reload. */ |
20354 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
20355 | && !REG_P (src) && !MEM_P (src) |
20356 | && !(VECTOR_MODE_P (GET_MODE (dst)) |
20357 | ? standard_sse_constant_p (x: src, GET_MODE (dst)) |
20358 | : x86_64_immediate_operand (src, GET_MODE (dst))) |
20359 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))) |
20360 | && !reload_completed) |
20361 | return false; |
20362 | return true; |
20363 | } |
20364 | |
20365 | /* If we are copying between registers from different register sets |
20366 | (e.g. FP and integer), we may need a memory location. |
20367 | |
20368 | The function can't work reliably when one of the CLASSES is a class |
20369 | containing registers from multiple sets. We avoid this by never combining |
20370 | different sets in a single alternative in the machine description. |
20371 | Ensure that this constraint holds to avoid unexpected surprises. |
20372 | |
20373 | When STRICT is false, we are being called from REGISTER_MOVE_COST, |
20374 | so do not enforce these sanity checks. |
20375 | |
20376 | To optimize register_move_cost performance, define inline variant. */ |
20377 | |
20378 | static inline bool |
20379 | inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
20380 | reg_class_t class2, int strict) |
20381 | { |
20382 | if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) |
20383 | return false; |
20384 | |
20385 | if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) |
20386 | || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) |
20387 | || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) |
20388 | || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) |
20389 | || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) |
20390 | || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) |
20391 | || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) |
20392 | || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) |
20393 | { |
20394 | gcc_assert (!strict || lra_in_progress); |
20395 | return true; |
20396 | } |
20397 | |
20398 | if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) |
20399 | return true; |
20400 | |
20401 | /* ??? This is a lie. We do have moves between mmx/general, and for |
20402 | mmx/sse2. But by saying we need secondary memory we discourage the |
20403 | register allocator from using the mmx registers unless needed. */ |
20404 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
20405 | return true; |
20406 | |
20407 | /* Between mask and general, we have moves no larger than word size. */ |
20408 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
20409 | { |
20410 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) |
20411 | || GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
20412 | return true; |
20413 | } |
20414 | |
20415 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
20416 | { |
20417 | /* SSE1 doesn't have any direct moves from other classes. */ |
20418 | if (!TARGET_SSE2) |
20419 | return true; |
20420 | |
20421 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))) |
20422 | return true; |
20423 | |
20424 | int msize = GET_MODE_SIZE (mode); |
20425 | |
20426 | /* Between SSE and general, we have moves no larger than word size. */ |
20427 | if (msize > UNITS_PER_WORD) |
20428 | return true; |
20429 | |
20430 | /* In addition to SImode moves, HImode moves are supported for SSE2 and above, |
20431 | Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ |
20432 | int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); |
20433 | |
20434 | if (msize < minsize) |
20435 | return true; |
20436 | |
20437 | /* If the target says that inter-unit moves are more expensive |
20438 | than moving through memory, then don't generate them. */ |
20439 | if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) |
20440 | || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) |
20441 | return true; |
20442 | } |
20443 | |
20444 | return false; |
20445 | } |
20446 | |
20447 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ |
20448 | |
20449 | static bool |
20450 | ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
20451 | reg_class_t class2) |
20452 | { |
20453 | return inline_secondary_memory_needed (mode, class1, class2, strict: true); |
20454 | } |
20455 | |
20456 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. |
20457 | |
20458 | get_secondary_mem widens integral modes to BITS_PER_WORD. |
20459 | There is no need to emit full 64 bit move on 64 bit targets |
20460 | for integral modes that can be moved using 32 bit move. */ |
20461 | |
20462 | static machine_mode |
20463 | ix86_secondary_memory_needed_mode (machine_mode mode) |
20464 | { |
20465 | if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) |
20466 | return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); |
20467 | return mode; |
20468 | } |
20469 | |
20470 | /* Implement the TARGET_CLASS_MAX_NREGS hook. |
20471 | |
20472 | On the 80386, this is the size of MODE in words, |
20473 | except in the FP regs, where a single reg is always enough. */ |
20474 | |
20475 | static unsigned char |
20476 | ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) |
20477 | { |
20478 | if (MAYBE_INTEGER_CLASS_P (rclass)) |
20479 | { |
20480 | if (mode == XFmode) |
20481 | return (TARGET_64BIT ? 2 : 3); |
20482 | else if (mode == XCmode) |
20483 | return (TARGET_64BIT ? 4 : 6); |
20484 | else |
20485 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20486 | } |
20487 | else |
20488 | { |
20489 | if (COMPLEX_MODE_P (mode)) |
20490 | return 2; |
20491 | else |
20492 | return 1; |
20493 | } |
20494 | } |
20495 | |
20496 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
20497 | |
20498 | static bool |
20499 | ix86_can_change_mode_class (machine_mode from, machine_mode to, |
20500 | reg_class_t regclass) |
20501 | { |
20502 | if (from == to) |
20503 | return true; |
20504 | |
20505 | /* x87 registers can't do subreg at all, as all values are reformatted |
20506 | to extended precision. */ |
20507 | if (MAYBE_FLOAT_CLASS_P (regclass)) |
20508 | return false; |
20509 | |
20510 | if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) |
20511 | { |
20512 | /* Vector registers do not support QI or HImode loads. If we don't |
20513 | disallow a change to these modes, reload will assume it's ok to |
20514 | drop the subreg from (subreg:SI (reg:HI 100) 0). This affects |
20515 | the vec_dupv4hi pattern. |
20516 | NB: SSE2 can load 16bit data to sse register via pinsrw. */ |
20517 | int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4; |
20518 | if (GET_MODE_SIZE (from) < mov_size |
20519 | || GET_MODE_SIZE (to) < mov_size) |
20520 | return false; |
20521 | } |
20522 | |
20523 | return true; |
20524 | } |
20525 | |
20526 | /* Return index of MODE in the sse load/store tables. */ |
20527 | |
20528 | static inline int |
20529 | sse_store_index (machine_mode mode) |
20530 | { |
20531 | /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store |
20532 | costs to processor_costs, which requires changes to all entries in |
20533 | processor cost table. */ |
20534 | if (mode == E_HFmode) |
20535 | mode = E_SFmode; |
20536 | |
20537 | switch (GET_MODE_SIZE (mode)) |
20538 | { |
20539 | case 4: |
20540 | return 0; |
20541 | case 8: |
20542 | return 1; |
20543 | case 16: |
20544 | return 2; |
20545 | case 32: |
20546 | return 3; |
20547 | case 64: |
20548 | return 4; |
20549 | default: |
20550 | return -1; |
20551 | } |
20552 | } |
20553 | |
20554 | /* Return the cost of moving data of mode M between a |
20555 | register and memory. A value of 2 is the default; this cost is |
20556 | relative to those in `REGISTER_MOVE_COST'. |
20557 | |
20558 | This function is used extensively by register_move_cost that is used to |
20559 | build tables at startup. Make it inline in this case. |
20560 | When IN is 2, return maximum of in and out move cost. |
20561 | |
20562 | If moving between registers and memory is more expensive than |
20563 | between two registers, you should define this macro to express the |
20564 | relative cost. |
20565 | |
20566 | Model also increased moving costs of QImode registers in non |
20567 | Q_REGS classes. |
20568 | */ |
20569 | static inline int |
20570 | inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) |
20571 | { |
20572 | int cost; |
20573 | |
20574 | if (FLOAT_CLASS_P (regclass)) |
20575 | { |
20576 | int index; |
20577 | switch (mode) |
20578 | { |
20579 | case E_SFmode: |
20580 | index = 0; |
20581 | break; |
20582 | case E_DFmode: |
20583 | index = 1; |
20584 | break; |
20585 | case E_XFmode: |
20586 | index = 2; |
20587 | break; |
20588 | default: |
20589 | return 100; |
20590 | } |
20591 | if (in == 2) |
20592 | return MAX (ix86_cost->hard_register.fp_load [index], |
20593 | ix86_cost->hard_register.fp_store [index]); |
20594 | return in ? ix86_cost->hard_register.fp_load [index] |
20595 | : ix86_cost->hard_register.fp_store [index]; |
20596 | } |
20597 | if (SSE_CLASS_P (regclass)) |
20598 | { |
20599 | int index = sse_store_index (mode); |
20600 | if (index == -1) |
20601 | return 100; |
20602 | if (in == 2) |
20603 | return MAX (ix86_cost->hard_register.sse_load [index], |
20604 | ix86_cost->hard_register.sse_store [index]); |
20605 | return in ? ix86_cost->hard_register.sse_load [index] |
20606 | : ix86_cost->hard_register.sse_store [index]; |
20607 | } |
20608 | if (MASK_CLASS_P (regclass)) |
20609 | { |
20610 | int index; |
20611 | switch (GET_MODE_SIZE (mode)) |
20612 | { |
20613 | case 1: |
20614 | index = 0; |
20615 | break; |
20616 | case 2: |
20617 | index = 1; |
20618 | break; |
20619 | /* DImode loads and stores assumed to cost the same as SImode. */ |
20620 | case 4: |
20621 | case 8: |
20622 | index = 2; |
20623 | break; |
20624 | default: |
20625 | return 100; |
20626 | } |
20627 | |
20628 | if (in == 2) |
20629 | return MAX (ix86_cost->hard_register.mask_load[index], |
20630 | ix86_cost->hard_register.mask_store[index]); |
20631 | return in ? ix86_cost->hard_register.mask_load[2] |
20632 | : ix86_cost->hard_register.mask_store[2]; |
20633 | } |
20634 | if (MMX_CLASS_P (regclass)) |
20635 | { |
20636 | int index; |
20637 | switch (GET_MODE_SIZE (mode)) |
20638 | { |
20639 | case 4: |
20640 | index = 0; |
20641 | break; |
20642 | case 8: |
20643 | index = 1; |
20644 | break; |
20645 | default: |
20646 | return 100; |
20647 | } |
20648 | if (in == 2) |
20649 | return MAX (ix86_cost->hard_register.mmx_load [index], |
20650 | ix86_cost->hard_register.mmx_store [index]); |
20651 | return in ? ix86_cost->hard_register.mmx_load [index] |
20652 | : ix86_cost->hard_register.mmx_store [index]; |
20653 | } |
20654 | switch (GET_MODE_SIZE (mode)) |
20655 | { |
20656 | case 1: |
20657 | if (Q_CLASS_P (regclass) || TARGET_64BIT) |
20658 | { |
20659 | if (!in) |
20660 | return ix86_cost->hard_register.int_store[0]; |
20661 | if (TARGET_PARTIAL_REG_DEPENDENCY |
20662 | && optimize_function_for_speed_p (cfun)) |
20663 | cost = ix86_cost->hard_register.movzbl_load; |
20664 | else |
20665 | cost = ix86_cost->hard_register.int_load[0]; |
20666 | if (in == 2) |
20667 | return MAX (cost, ix86_cost->hard_register.int_store[0]); |
20668 | return cost; |
20669 | } |
20670 | else |
20671 | { |
20672 | if (in == 2) |
20673 | return MAX (ix86_cost->hard_register.movzbl_load, |
20674 | ix86_cost->hard_register.int_store[0] + 4); |
20675 | if (in) |
20676 | return ix86_cost->hard_register.movzbl_load; |
20677 | else |
20678 | return ix86_cost->hard_register.int_store[0] + 4; |
20679 | } |
20680 | break; |
20681 | case 2: |
20682 | { |
20683 | int cost; |
20684 | if (in == 2) |
20685 | cost = MAX (ix86_cost->hard_register.int_load[1], |
20686 | ix86_cost->hard_register.int_store[1]); |
20687 | else |
20688 | cost = in ? ix86_cost->hard_register.int_load[1] |
20689 | : ix86_cost->hard_register.int_store[1]; |
20690 | |
20691 | if (mode == E_HFmode) |
20692 | { |
20693 | /* Prefer SSE over GPR for HFmode. */ |
20694 | int sse_cost; |
20695 | int index = sse_store_index (mode); |
20696 | if (in == 2) |
20697 | sse_cost = MAX (ix86_cost->hard_register.sse_load[index], |
20698 | ix86_cost->hard_register.sse_store[index]); |
20699 | else |
20700 | sse_cost = (in |
20701 | ? ix86_cost->hard_register.sse_load [index] |
20702 | : ix86_cost->hard_register.sse_store [index]); |
20703 | if (sse_cost >= cost) |
20704 | cost = sse_cost + 1; |
20705 | } |
20706 | return cost; |
20707 | } |
20708 | default: |
20709 | if (in == 2) |
20710 | cost = MAX (ix86_cost->hard_register.int_load[2], |
20711 | ix86_cost->hard_register.int_store[2]); |
20712 | else if (in) |
20713 | cost = ix86_cost->hard_register.int_load[2]; |
20714 | else |
20715 | cost = ix86_cost->hard_register.int_store[2]; |
20716 | /* Multiply with the number of GPR moves needed. */ |
20717 | return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20718 | } |
20719 | } |
20720 | |
20721 | static int |
20722 | ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) |
20723 | { |
20724 | return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0); |
20725 | } |
20726 | |
20727 | |
20728 | /* Return the cost of moving data from a register in class CLASS1 to |
20729 | one in class CLASS2. |
20730 | |
20731 | It is not required that the cost always equal 2 when FROM is the same as TO; |
20732 | on some machines it is expensive to move between registers if they are not |
20733 | general registers. */ |
20734 | |
20735 | static int |
20736 | ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, |
20737 | reg_class_t class2_i) |
20738 | { |
20739 | enum reg_class class1 = (enum reg_class) class1_i; |
20740 | enum reg_class class2 = (enum reg_class) class2_i; |
20741 | |
20742 | /* In case we require secondary memory, compute cost of the store followed |
20743 | by load. In order to avoid bad register allocation choices, we need |
20744 | for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ |
20745 | |
20746 | if (inline_secondary_memory_needed (mode, class1, class2, strict: false)) |
20747 | { |
20748 | int cost = 1; |
20749 | |
20750 | cost += inline_memory_move_cost (mode, regclass: class1, in: 2); |
20751 | cost += inline_memory_move_cost (mode, regclass: class2, in: 2); |
20752 | |
20753 | /* In case of copying from general_purpose_register we may emit multiple |
20754 | stores followed by single load causing memory size mismatch stall. |
20755 | Count this as arbitrarily high cost of 20. */ |
20756 | if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD |
20757 | && TARGET_MEMORY_MISMATCH_STALL |
20758 | && targetm.class_max_nregs (class1, mode) |
20759 | > targetm.class_max_nregs (class2, mode)) |
20760 | cost += 20; |
20761 | |
20762 | /* In the case of FP/MMX moves, the registers actually overlap, and we |
20763 | have to switch modes in order to treat them differently. */ |
20764 | if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) |
20765 | || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) |
20766 | cost += 20; |
20767 | |
20768 | return cost; |
20769 | } |
20770 | |
20771 | /* Moves between MMX and non-MMX units require secondary memory. */ |
20772 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
20773 | gcc_unreachable (); |
20774 | |
20775 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
20776 | return (SSE_CLASS_P (class1) |
20777 | ? ix86_cost->hard_register.sse_to_integer |
20778 | : ix86_cost->hard_register.integer_to_sse); |
20779 | |
20780 | /* Moves between mask register and GPR. */ |
20781 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
20782 | { |
20783 | return (MASK_CLASS_P (class1) |
20784 | ? ix86_cost->hard_register.mask_to_integer |
20785 | : ix86_cost->hard_register.integer_to_mask); |
20786 | } |
20787 | /* Moving between mask registers. */ |
20788 | if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) |
20789 | return ix86_cost->hard_register.mask_move; |
20790 | |
20791 | if (MAYBE_FLOAT_CLASS_P (class1)) |
20792 | return ix86_cost->hard_register.fp_move; |
20793 | if (MAYBE_SSE_CLASS_P (class1)) |
20794 | { |
20795 | if (GET_MODE_BITSIZE (mode) <= 128) |
20796 | return ix86_cost->hard_register.xmm_move; |
20797 | if (GET_MODE_BITSIZE (mode) <= 256) |
20798 | return ix86_cost->hard_register.ymm_move; |
20799 | return ix86_cost->hard_register.zmm_move; |
20800 | } |
20801 | if (MAYBE_MMX_CLASS_P (class1)) |
20802 | return ix86_cost->hard_register.mmx_move; |
20803 | return 2; |
20804 | } |
20805 | |
20806 | /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in |
20807 | words of a value of mode MODE but can be less for certain modes in |
20808 | special long registers. |
20809 | |
20810 | Actually there are no two word move instructions for consecutive |
20811 | registers. And only registers 0-3 may have mov byte instructions |
20812 | applied to them. */ |
20813 | |
20814 | static unsigned int |
20815 | ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) |
20816 | { |
20817 | if (GENERAL_REGNO_P (regno)) |
20818 | { |
20819 | if (mode == XFmode) |
20820 | return TARGET_64BIT ? 2 : 3; |
20821 | if (mode == XCmode) |
20822 | return TARGET_64BIT ? 4 : 6; |
20823 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20824 | } |
20825 | if (COMPLEX_MODE_P (mode)) |
20826 | return 2; |
20827 | /* Register pair for mask registers. */ |
20828 | if (mode == P2QImode || mode == P2HImode) |
20829 | return 2; |
20830 | if (mode == V64SFmode || mode == V64SImode) |
20831 | return 4; |
20832 | return 1; |
20833 | } |
20834 | |
20835 | /* Implement REGMODE_NATURAL_SIZE(MODE). */ |
20836 | unsigned int |
20837 | ix86_regmode_natural_size (machine_mode mode) |
20838 | { |
20839 | if (mode == P2HImode || mode == P2QImode) |
20840 | return GET_MODE_SIZE (mode) / 2; |
20841 | return UNITS_PER_WORD; |
20842 | } |
20843 | |
20844 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
20845 | |
20846 | static bool |
20847 | ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) |
20848 | { |
20849 | /* Flags and only flags can only hold CCmode values. */ |
20850 | if (CC_REGNO_P (regno)) |
20851 | return GET_MODE_CLASS (mode) == MODE_CC; |
20852 | if (GET_MODE_CLASS (mode) == MODE_CC |
20853 | || GET_MODE_CLASS (mode) == MODE_RANDOM) |
20854 | return false; |
20855 | if (STACK_REGNO_P (regno)) |
20856 | return VALID_FP_MODE_P (mode); |
20857 | if (MASK_REGNO_P (regno)) |
20858 | { |
20859 | /* Register pair only starts at even register number. */ |
20860 | if ((mode == P2QImode || mode == P2HImode)) |
20861 | return MASK_PAIR_REGNO_P(regno); |
20862 | |
20863 | return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) |
20864 | || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); |
20865 | } |
20866 | |
20867 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
20868 | return false; |
20869 | |
20870 | if (SSE_REGNO_P (regno)) |
20871 | { |
20872 | /* We implement the move patterns for all vector modes into and |
20873 | out of SSE registers, even when no operation instructions |
20874 | are available. */ |
20875 | |
20876 | /* For AVX-512 we allow, regardless of regno: |
20877 | - XI mode |
20878 | - any of 512-bit wide vector mode |
20879 | - any scalar mode. */ |
20880 | if (TARGET_AVX512F |
20881 | && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512) |
20882 | || VALID_AVX512F_SCALAR_MODE (mode))) |
20883 | return true; |
20884 | |
20885 | /* For AVX-5124FMAPS or AVX-5124VNNIW |
20886 | allow V64SF and V64SI modes for special regnos. */ |
20887 | if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) |
20888 | && (mode == V64SFmode || mode == V64SImode) |
20889 | && MOD4_SSE_REGNO_P (regno)) |
20890 | return true; |
20891 | |
20892 | /* TODO check for QI/HI scalars. */ |
20893 | /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ |
20894 | if (TARGET_AVX512VL |
20895 | && (VALID_AVX256_REG_OR_OI_MODE (mode) |
20896 | || VALID_AVX512VL_128_REG_MODE (mode))) |
20897 | return true; |
20898 | |
20899 | /* xmm16-xmm31 are only available for AVX-512. */ |
20900 | if (EXT_REX_SSE_REGNO_P (regno)) |
20901 | return false; |
20902 | |
20903 | /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */ |
20904 | if (TARGET_SSE2 && mode == HImode) |
20905 | return true; |
20906 | |
20907 | /* OImode and AVX modes are available only when AVX is enabled. */ |
20908 | return ((TARGET_AVX |
20909 | && VALID_AVX256_REG_OR_OI_MODE (mode)) |
20910 | || VALID_SSE_REG_MODE (mode) |
20911 | || VALID_SSE2_REG_MODE (mode) |
20912 | || VALID_MMX_REG_MODE (mode) |
20913 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
20914 | } |
20915 | if (MMX_REGNO_P (regno)) |
20916 | { |
20917 | /* We implement the move patterns for 3DNOW modes even in MMX mode, |
20918 | so if the register is available at all, then we can move data of |
20919 | the given mode into or out of it. */ |
20920 | return (VALID_MMX_REG_MODE (mode) |
20921 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
20922 | } |
20923 | |
20924 | if (mode == QImode) |
20925 | { |
20926 | /* Take care for QImode values - they can be in non-QI regs, |
20927 | but then they do cause partial register stalls. */ |
20928 | if (ANY_QI_REGNO_P (regno)) |
20929 | return true; |
20930 | if (!TARGET_PARTIAL_REG_STALL) |
20931 | return true; |
20932 | /* LRA checks if the hard register is OK for the given mode. |
20933 | QImode values can live in non-QI regs, so we allow all |
20934 | registers here. */ |
20935 | if (lra_in_progress) |
20936 | return true; |
20937 | return !can_create_pseudo_p (); |
20938 | } |
20939 | /* We handle both integer and floats in the general purpose registers. */ |
20940 | else if (VALID_INT_MODE_P (mode) |
20941 | || VALID_FP_MODE_P (mode)) |
20942 | return true; |
20943 | /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go |
20944 | on to use that value in smaller contexts, this can easily force a |
20945 | pseudo to be allocated to GENERAL_REGS. Since this is no worse than |
20946 | supporting DImode, allow it. */ |
20947 | else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) |
20948 | return true; |
20949 | |
20950 | return false; |
20951 | } |
20952 | |
20953 | /* Implement TARGET_INSN_CALLEE_ABI. */ |
20954 | |
20955 | const predefined_function_abi & |
20956 | ix86_insn_callee_abi (const rtx_insn *insn) |
20957 | { |
20958 | unsigned int abi_id = 0; |
20959 | rtx pat = PATTERN (insn); |
20960 | if (vzeroupper_pattern (pat, VOIDmode)) |
20961 | abi_id = ABI_VZEROUPPER; |
20962 | |
20963 | return function_abis[abi_id]; |
20964 | } |
20965 | |
20966 | /* Initialize function_abis with corresponding abi_id, |
20967 | currently only handle vzeroupper. */ |
20968 | void |
20969 | ix86_initialize_callee_abi (unsigned int abi_id) |
20970 | { |
20971 | gcc_assert (abi_id == ABI_VZEROUPPER); |
20972 | predefined_function_abi &vzeroupper_abi = function_abis[abi_id]; |
20973 | if (!vzeroupper_abi.initialized_p ()) |
20974 | { |
20975 | HARD_REG_SET full_reg_clobbers; |
20976 | CLEAR_HARD_REG_SET (set&: full_reg_clobbers); |
20977 | vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers); |
20978 | } |
20979 | } |
20980 | |
20981 | void |
20982 | ix86_expand_avx_vzeroupper (void) |
20983 | { |
20984 | /* Initialize vzeroupper_abi here. */ |
20985 | ix86_initialize_callee_abi (ABI_VZEROUPPER); |
20986 | rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ()); |
20987 | /* Return false for non-local goto in can_nonlocal_goto. */ |
20988 | make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN); |
20989 | /* Flag used for call_insn indicates it's a fake call. */ |
20990 | RTX_FLAG (insn, used) = 1; |
20991 | } |
20992 | |
20993 | |
20994 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that |
20995 | saves SSE registers across calls is Win64 (thus no need to check the |
20996 | current ABI here), and with AVX enabled Win64 only guarantees that |
20997 | the low 16 bytes are saved. */ |
20998 | |
20999 | static bool |
21000 | ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, |
21001 | machine_mode mode) |
21002 | { |
21003 | /* Special ABI for vzeroupper which only clobber higher part of sse regs. */ |
21004 | if (abi_id == ABI_VZEROUPPER) |
21005 | return (GET_MODE_SIZE (mode) > 16 |
21006 | && ((TARGET_64BIT && REX_SSE_REGNO_P (regno)) |
21007 | || LEGACY_SSE_REGNO_P (regno))); |
21008 | |
21009 | return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; |
21010 | } |
21011 | |
21012 | /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a |
21013 | tieable integer mode. */ |
21014 | |
21015 | static bool |
21016 | ix86_tieable_integer_mode_p (machine_mode mode) |
21017 | { |
21018 | switch (mode) |
21019 | { |
21020 | case E_HImode: |
21021 | case E_SImode: |
21022 | return true; |
21023 | |
21024 | case E_QImode: |
21025 | return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; |
21026 | |
21027 | case E_DImode: |
21028 | return TARGET_64BIT; |
21029 | |
21030 | default: |
21031 | return false; |
21032 | } |
21033 | } |
21034 | |
21035 | /* Implement TARGET_MODES_TIEABLE_P. |
21036 | |
21037 | Return true if MODE1 is accessible in a register that can hold MODE2 |
21038 | without copying. That is, all register classes that can hold MODE2 |
21039 | can also hold MODE1. */ |
21040 | |
21041 | static bool |
21042 | ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) |
21043 | { |
21044 | if (mode1 == mode2) |
21045 | return true; |
21046 | |
21047 | if (ix86_tieable_integer_mode_p (mode: mode1) |
21048 | && ix86_tieable_integer_mode_p (mode: mode2)) |
21049 | return true; |
21050 | |
21051 | /* MODE2 being XFmode implies fp stack or general regs, which means we |
21052 | can tie any smaller floating point modes to it. Note that we do not |
21053 | tie this with TFmode. */ |
21054 | if (mode2 == XFmode) |
21055 | return mode1 == SFmode || mode1 == DFmode; |
21056 | |
21057 | /* MODE2 being DFmode implies fp stack, general or sse regs, which means |
21058 | that we can tie it with SFmode. */ |
21059 | if (mode2 == DFmode) |
21060 | return mode1 == SFmode; |
21061 | |
21062 | /* If MODE2 is only appropriate for an SSE register, then tie with |
21063 | any other mode acceptable to SSE registers. */ |
21064 | if (GET_MODE_SIZE (mode2) == 64 |
21065 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
21066 | return (GET_MODE_SIZE (mode1) == 64 |
21067 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
21068 | if (GET_MODE_SIZE (mode2) == 32 |
21069 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
21070 | return (GET_MODE_SIZE (mode1) == 32 |
21071 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
21072 | if (GET_MODE_SIZE (mode2) == 16 |
21073 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
21074 | return (GET_MODE_SIZE (mode1) == 16 |
21075 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
21076 | |
21077 | /* If MODE2 is appropriate for an MMX register, then tie |
21078 | with any other mode acceptable to MMX registers. */ |
21079 | if (GET_MODE_SIZE (mode2) == 8 |
21080 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2)) |
21081 | return (GET_MODE_SIZE (mode1) == 8 |
21082 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1)); |
21083 | |
21084 | /* SCmode and DImode can be tied. */ |
21085 | if ((mode1 == E_SCmode && mode2 == E_DImode) |
21086 | || (mode1 == E_DImode && mode2 == E_SCmode)) |
21087 | return TARGET_64BIT; |
21088 | |
21089 | /* [SD]Cmode and V2[SD]Fmode modes can be tied. */ |
21090 | if ((mode1 == E_SCmode && mode2 == E_V2SFmode) |
21091 | || (mode1 == E_V2SFmode && mode2 == E_SCmode) |
21092 | || (mode1 == E_DCmode && mode2 == E_V2DFmode) |
21093 | || (mode1 == E_V2DFmode && mode2 == E_DCmode)) |
21094 | return true; |
21095 | |
21096 | return false; |
21097 | } |
21098 | |
21099 | /* Return the cost of moving between two registers of mode MODE. */ |
21100 | |
21101 | static int |
21102 | ix86_set_reg_reg_cost (machine_mode mode) |
21103 | { |
21104 | unsigned int units = UNITS_PER_WORD; |
21105 | |
21106 | switch (GET_MODE_CLASS (mode)) |
21107 | { |
21108 | default: |
21109 | break; |
21110 | |
21111 | case MODE_CC: |
21112 | units = GET_MODE_SIZE (CCmode); |
21113 | break; |
21114 | |
21115 | case MODE_FLOAT: |
21116 | if ((TARGET_SSE && mode == TFmode) |
21117 | || (TARGET_80387 && mode == XFmode) |
21118 | || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) |
21119 | || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) |
21120 | units = GET_MODE_SIZE (mode); |
21121 | break; |
21122 | |
21123 | case MODE_COMPLEX_FLOAT: |
21124 | if ((TARGET_SSE && mode == TCmode) |
21125 | || (TARGET_80387 && mode == XCmode) |
21126 | || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) |
21127 | || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) |
21128 | units = GET_MODE_SIZE (mode); |
21129 | break; |
21130 | |
21131 | case MODE_VECTOR_INT: |
21132 | case MODE_VECTOR_FLOAT: |
21133 | if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) |
21134 | || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
21135 | || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
21136 | || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
21137 | || ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
21138 | && VALID_MMX_REG_MODE (mode))) |
21139 | units = GET_MODE_SIZE (mode); |
21140 | } |
21141 | |
21142 | /* Return the cost of moving between two registers of mode MODE, |
21143 | assuming that the move will be in pieces of at most UNITS bytes. */ |
21144 | return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); |
21145 | } |
21146 | |
21147 | /* Return cost of vector operation in MODE given that scalar version has |
21148 | COST. */ |
21149 | |
21150 | static int |
21151 | ix86_vec_cost (machine_mode mode, int cost) |
21152 | { |
21153 | if (!VECTOR_MODE_P (mode)) |
21154 | return cost; |
21155 | |
21156 | if (GET_MODE_BITSIZE (mode) == 128 |
21157 | && TARGET_SSE_SPLIT_REGS) |
21158 | return cost * GET_MODE_BITSIZE (mode) / 64; |
21159 | else if (GET_MODE_BITSIZE (mode) > 128 |
21160 | && TARGET_AVX256_SPLIT_REGS) |
21161 | return cost * GET_MODE_BITSIZE (mode) / 128; |
21162 | else if (GET_MODE_BITSIZE (mode) > 256 |
21163 | && TARGET_AVX512_SPLIT_REGS) |
21164 | return cost * GET_MODE_BITSIZE (mode) / 256; |
21165 | return cost; |
21166 | } |
21167 | |
21168 | /* Return cost of vec_widen_<s>mult_hi/lo_<mode>, |
21169 | vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */ |
21170 | static int |
21171 | ix86_widen_mult_cost (const struct processor_costs *cost, |
21172 | enum machine_mode mode, bool uns_p) |
21173 | { |
21174 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); |
21175 | int = 0; |
21176 | int basic_cost = 0; |
21177 | switch (mode) |
21178 | { |
21179 | case V8HImode: |
21180 | case V16HImode: |
21181 | if (!uns_p || mode == V16HImode) |
21182 | extra_cost = cost->sse_op * 2; |
21183 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21184 | break; |
21185 | case V4SImode: |
21186 | case V8SImode: |
21187 | /* pmulhw/pmullw can be used. */ |
21188 | basic_cost = cost->mulss * 2 + cost->sse_op * 2; |
21189 | break; |
21190 | case V2DImode: |
21191 | /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, |
21192 | require extra 4 mul, 4 add, 4 cmp and 2 shift. */ |
21193 | if (!TARGET_SSE4_1 && !uns_p) |
21194 | extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4 |
21195 | + cost->sse_op * 2; |
21196 | /* Fallthru. */ |
21197 | case V4DImode: |
21198 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21199 | break; |
21200 | default: |
21201 | /* Not implemented. */ |
21202 | return 100; |
21203 | } |
21204 | return ix86_vec_cost (mode, cost: basic_cost + extra_cost); |
21205 | } |
21206 | |
21207 | /* Return cost of multiplication in MODE. */ |
21208 | |
21209 | static int |
21210 | ix86_multiplication_cost (const struct processor_costs *cost, |
21211 | enum machine_mode mode) |
21212 | { |
21213 | machine_mode inner_mode = mode; |
21214 | if (VECTOR_MODE_P (mode)) |
21215 | inner_mode = GET_MODE_INNER (mode); |
21216 | |
21217 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21218 | return inner_mode == DFmode ? cost->mulsd : cost->mulss; |
21219 | else if (X87_FLOAT_MODE_P (mode)) |
21220 | return cost->fmul; |
21221 | else if (FLOAT_MODE_P (mode)) |
21222 | return ix86_vec_cost (mode, |
21223 | cost: inner_mode == DFmode ? cost->mulsd : cost->mulss); |
21224 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21225 | { |
21226 | int nmults, nops; |
21227 | /* Cost of reading the memory. */ |
21228 | int ; |
21229 | |
21230 | switch (mode) |
21231 | { |
21232 | case V4QImode: |
21233 | case V8QImode: |
21234 | /* Partial V*QImode is emulated with 4-6 insns. */ |
21235 | nmults = 1; |
21236 | nops = 3; |
21237 | extra = 0; |
21238 | |
21239 | if (TARGET_AVX512BW && TARGET_AVX512VL) |
21240 | ; |
21241 | else if (TARGET_AVX2) |
21242 | nops += 2; |
21243 | else if (TARGET_XOP) |
21244 | extra += cost->sse_load[2]; |
21245 | else |
21246 | { |
21247 | nops += 1; |
21248 | extra += cost->sse_load[2]; |
21249 | } |
21250 | goto do_qimode; |
21251 | |
21252 | case V16QImode: |
21253 | /* V*QImode is emulated with 4-11 insns. */ |
21254 | nmults = 1; |
21255 | nops = 3; |
21256 | extra = 0; |
21257 | |
21258 | if (TARGET_AVX2 && !TARGET_PREFER_AVX128) |
21259 | { |
21260 | if (!(TARGET_AVX512BW && TARGET_AVX512VL)) |
21261 | nops += 3; |
21262 | } |
21263 | else if (TARGET_XOP) |
21264 | { |
21265 | nmults += 1; |
21266 | nops += 2; |
21267 | extra += cost->sse_load[2]; |
21268 | } |
21269 | else |
21270 | { |
21271 | nmults += 1; |
21272 | nops += 4; |
21273 | extra += cost->sse_load[2]; |
21274 | } |
21275 | goto do_qimode; |
21276 | |
21277 | case V32QImode: |
21278 | nmults = 1; |
21279 | nops = 3; |
21280 | extra = 0; |
21281 | |
21282 | if (!TARGET_AVX512BW || TARGET_PREFER_AVX256) |
21283 | { |
21284 | nmults += 1; |
21285 | nops += 4; |
21286 | extra += cost->sse_load[3] * 2; |
21287 | } |
21288 | goto do_qimode; |
21289 | |
21290 | case V64QImode: |
21291 | nmults = 2; |
21292 | nops = 9; |
21293 | extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2; |
21294 | |
21295 | do_qimode: |
21296 | return ix86_vec_cost (mode, cost: cost->mulss * nmults |
21297 | + cost->sse_op * nops) + extra; |
21298 | |
21299 | case V4SImode: |
21300 | /* pmulld is used in this case. No emulation is needed. */ |
21301 | if (TARGET_SSE4_1) |
21302 | goto do_native; |
21303 | /* V4SImode is emulated with 7 insns. */ |
21304 | else |
21305 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5); |
21306 | |
21307 | case V2DImode: |
21308 | case V4DImode: |
21309 | /* vpmullq is used in this case. No emulation is needed. */ |
21310 | if (TARGET_AVX512DQ && TARGET_AVX512VL) |
21311 | goto do_native; |
21312 | /* V*DImode is emulated with 6-8 insns. */ |
21313 | else if (TARGET_XOP && mode == V2DImode) |
21314 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4); |
21315 | /* FALLTHRU */ |
21316 | case V8DImode: |
21317 | /* vpmullq is used in this case. No emulation is needed. */ |
21318 | if (TARGET_AVX512DQ && mode == V8DImode) |
21319 | goto do_native; |
21320 | else |
21321 | return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5); |
21322 | |
21323 | default: |
21324 | do_native: |
21325 | return ix86_vec_cost (mode, cost: cost->mulss); |
21326 | } |
21327 | } |
21328 | else |
21329 | return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); |
21330 | } |
21331 | |
21332 | /* Return cost of multiplication in MODE. */ |
21333 | |
21334 | static int |
21335 | ix86_division_cost (const struct processor_costs *cost, |
21336 | enum machine_mode mode) |
21337 | { |
21338 | machine_mode inner_mode = mode; |
21339 | if (VECTOR_MODE_P (mode)) |
21340 | inner_mode = GET_MODE_INNER (mode); |
21341 | |
21342 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21343 | return inner_mode == DFmode ? cost->divsd : cost->divss; |
21344 | else if (X87_FLOAT_MODE_P (mode)) |
21345 | return cost->fdiv; |
21346 | else if (FLOAT_MODE_P (mode)) |
21347 | return ix86_vec_cost (mode, |
21348 | cost: inner_mode == DFmode ? cost->divsd : cost->divss); |
21349 | else |
21350 | return cost->divide[MODE_INDEX (mode)]; |
21351 | } |
21352 | |
21353 | /* Return cost of shift in MODE. |
21354 | If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. |
21355 | AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE |
21356 | if op1 is a result of subreg. |
21357 | |
21358 | SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ |
21359 | |
21360 | static int |
21361 | ix86_shift_rotate_cost (const struct processor_costs *cost, |
21362 | enum rtx_code code, |
21363 | enum machine_mode mode, bool constant_op1, |
21364 | HOST_WIDE_INT op1_val, |
21365 | bool and_in_op1, |
21366 | bool shift_and_truncate, |
21367 | bool *skip_op0, bool *skip_op1) |
21368 | { |
21369 | if (skip_op0) |
21370 | *skip_op0 = *skip_op1 = false; |
21371 | |
21372 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21373 | { |
21374 | int count; |
21375 | /* Cost of reading the memory. */ |
21376 | int ; |
21377 | |
21378 | switch (mode) |
21379 | { |
21380 | case V4QImode: |
21381 | case V8QImode: |
21382 | if (TARGET_AVX2) |
21383 | /* Use vpbroadcast. */ |
21384 | extra = cost->sse_op; |
21385 | else |
21386 | extra = cost->sse_load[2]; |
21387 | |
21388 | if (constant_op1) |
21389 | { |
21390 | if (code == ASHIFTRT) |
21391 | { |
21392 | count = 4; |
21393 | extra *= 2; |
21394 | } |
21395 | else |
21396 | count = 2; |
21397 | } |
21398 | else if (TARGET_AVX512BW && TARGET_AVX512VL) |
21399 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
21400 | else if (TARGET_SSE4_1) |
21401 | count = 5; |
21402 | else if (code == ASHIFTRT) |
21403 | count = 6; |
21404 | else |
21405 | count = 5; |
21406 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
21407 | |
21408 | case V16QImode: |
21409 | if (TARGET_XOP) |
21410 | { |
21411 | /* For XOP we use vpshab, which requires a broadcast of the |
21412 | value to the variable shift insn. For constants this |
21413 | means a V16Q const in mem; even when we can perform the |
21414 | shift with one insn set the cost to prefer paddb. */ |
21415 | if (constant_op1) |
21416 | { |
21417 | extra = cost->sse_load[2]; |
21418 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
21419 | } |
21420 | else |
21421 | { |
21422 | count = (code == ASHIFT) ? 3 : 4; |
21423 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
21424 | } |
21425 | } |
21426 | /* FALLTHRU */ |
21427 | case V32QImode: |
21428 | if (TARGET_AVX2) |
21429 | /* Use vpbroadcast. */ |
21430 | extra = cost->sse_op; |
21431 | else |
21432 | extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; |
21433 | |
21434 | if (constant_op1) |
21435 | { |
21436 | if (code == ASHIFTRT) |
21437 | { |
21438 | count = 4; |
21439 | extra *= 2; |
21440 | } |
21441 | else |
21442 | count = 2; |
21443 | } |
21444 | else if (TARGET_AVX512BW |
21445 | && ((mode == V32QImode && !TARGET_PREFER_AVX256) |
21446 | || (mode == V16QImode && TARGET_AVX512VL |
21447 | && !TARGET_PREFER_AVX128))) |
21448 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
21449 | else if (TARGET_AVX2 |
21450 | && mode == V16QImode && !TARGET_PREFER_AVX128) |
21451 | count = 6; |
21452 | else if (TARGET_SSE4_1) |
21453 | count = 9; |
21454 | else if (code == ASHIFTRT) |
21455 | count = 10; |
21456 | else |
21457 | count = 9; |
21458 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
21459 | |
21460 | case V2DImode: |
21461 | case V4DImode: |
21462 | /* V*DImode arithmetic right shift is emulated. */ |
21463 | if (code == ASHIFTRT && !TARGET_AVX512VL) |
21464 | { |
21465 | if (constant_op1) |
21466 | { |
21467 | if (op1_val == 63) |
21468 | count = TARGET_SSE4_2 ? 1 : 2; |
21469 | else if (TARGET_XOP) |
21470 | count = 2; |
21471 | else if (TARGET_SSE4_1) |
21472 | count = 3; |
21473 | else |
21474 | count = 4; |
21475 | } |
21476 | else if (TARGET_XOP) |
21477 | count = 3; |
21478 | else if (TARGET_SSE4_2) |
21479 | count = 4; |
21480 | else |
21481 | count = 5; |
21482 | |
21483 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
21484 | } |
21485 | /* FALLTHRU */ |
21486 | default: |
21487 | return ix86_vec_cost (mode, cost: cost->sse_op); |
21488 | } |
21489 | } |
21490 | |
21491 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21492 | { |
21493 | if (constant_op1) |
21494 | { |
21495 | if (op1_val > 32) |
21496 | return cost->shift_const + COSTS_N_INSNS (2); |
21497 | else |
21498 | return cost->shift_const * 2; |
21499 | } |
21500 | else |
21501 | { |
21502 | if (and_in_op1) |
21503 | return cost->shift_var * 2; |
21504 | else |
21505 | return cost->shift_var * 6 + COSTS_N_INSNS (2); |
21506 | } |
21507 | } |
21508 | else |
21509 | { |
21510 | if (constant_op1) |
21511 | return cost->shift_const; |
21512 | else if (shift_and_truncate) |
21513 | { |
21514 | if (skip_op0) |
21515 | *skip_op0 = *skip_op1 = true; |
21516 | /* Return the cost after shift-and truncation. */ |
21517 | return cost->shift_var; |
21518 | } |
21519 | else |
21520 | return cost->shift_var; |
21521 | } |
21522 | } |
21523 | |
21524 | /* Compute a (partial) cost for rtx X. Return true if the complete |
21525 | cost has been computed, and false if subexpressions should be |
21526 | scanned. In either case, *TOTAL contains the cost result. */ |
21527 | |
21528 | static bool |
21529 | ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
21530 | int *total, bool speed) |
21531 | { |
21532 | rtx mask; |
21533 | enum rtx_code code = GET_CODE (x); |
21534 | enum rtx_code outer_code = (enum rtx_code) outer_code_i; |
21535 | const struct processor_costs *cost |
21536 | = speed ? ix86_tune_cost : &ix86_size_cost; |
21537 | int src_cost; |
21538 | |
21539 | switch (code) |
21540 | { |
21541 | case SET: |
21542 | if (register_operand (SET_DEST (x), VOIDmode) |
21543 | && register_operand (SET_SRC (x), VOIDmode)) |
21544 | { |
21545 | *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); |
21546 | return true; |
21547 | } |
21548 | |
21549 | if (register_operand (SET_SRC (x), VOIDmode)) |
21550 | /* Avoid potentially incorrect high cost from rtx_costs |
21551 | for non-tieable SUBREGs. */ |
21552 | src_cost = 0; |
21553 | else |
21554 | { |
21555 | src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); |
21556 | |
21557 | if (CONSTANT_P (SET_SRC (x))) |
21558 | /* Constant costs assume a base value of COSTS_N_INSNS (1) and add |
21559 | a small value, possibly zero for cheap constants. */ |
21560 | src_cost += COSTS_N_INSNS (1); |
21561 | } |
21562 | |
21563 | *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); |
21564 | return true; |
21565 | |
21566 | case CONST_INT: |
21567 | case CONST: |
21568 | case LABEL_REF: |
21569 | case SYMBOL_REF: |
21570 | if (x86_64_immediate_operand (x, VOIDmode)) |
21571 | *total = 0; |
21572 | else |
21573 | *total = 1; |
21574 | return true; |
21575 | |
21576 | case CONST_DOUBLE: |
21577 | if (IS_STACK_MODE (mode)) |
21578 | switch (standard_80387_constant_p (x)) |
21579 | { |
21580 | case -1: |
21581 | case 0: |
21582 | break; |
21583 | case 1: /* 0.0 */ |
21584 | *total = 1; |
21585 | return true; |
21586 | default: /* Other constants */ |
21587 | *total = 2; |
21588 | return true; |
21589 | } |
21590 | /* FALLTHRU */ |
21591 | |
21592 | case CONST_VECTOR: |
21593 | switch (standard_sse_constant_p (x, pred_mode: mode)) |
21594 | { |
21595 | case 0: |
21596 | break; |
21597 | case 1: /* 0: xor eliminates false dependency */ |
21598 | *total = 0; |
21599 | return true; |
21600 | default: /* -1: cmp contains false dependency */ |
21601 | *total = 1; |
21602 | return true; |
21603 | } |
21604 | /* FALLTHRU */ |
21605 | |
21606 | case CONST_WIDE_INT: |
21607 | /* Fall back to (MEM (SYMBOL_REF)), since that's where |
21608 | it'll probably end up. Add a penalty for size. */ |
21609 | *total = (COSTS_N_INSNS (1) |
21610 | + (!TARGET_64BIT && flag_pic) |
21611 | + (GET_MODE_SIZE (mode) <= 4 |
21612 | ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); |
21613 | return true; |
21614 | |
21615 | case ZERO_EXTEND: |
21616 | /* The zero extensions is often completely free on x86_64, so make |
21617 | it as cheap as possible. */ |
21618 | if (TARGET_64BIT && mode == DImode |
21619 | && GET_MODE (XEXP (x, 0)) == SImode) |
21620 | *total = 1; |
21621 | else if (TARGET_ZERO_EXTEND_WITH_AND) |
21622 | *total = cost->add; |
21623 | else |
21624 | *total = cost->movzx; |
21625 | return false; |
21626 | |
21627 | case SIGN_EXTEND: |
21628 | *total = cost->movsx; |
21629 | return false; |
21630 | |
21631 | case ASHIFT: |
21632 | if (SCALAR_INT_MODE_P (mode) |
21633 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
21634 | && CONST_INT_P (XEXP (x, 1))) |
21635 | { |
21636 | HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
21637 | if (value == 1) |
21638 | { |
21639 | *total = cost->add; |
21640 | return false; |
21641 | } |
21642 | if ((value == 2 || value == 3) |
21643 | && cost->lea <= cost->shift_const) |
21644 | { |
21645 | *total = cost->lea; |
21646 | return false; |
21647 | } |
21648 | } |
21649 | /* FALLTHRU */ |
21650 | |
21651 | case ROTATE: |
21652 | case ASHIFTRT: |
21653 | case LSHIFTRT: |
21654 | case ROTATERT: |
21655 | bool skip_op0, skip_op1; |
21656 | *total = ix86_shift_rotate_cost (cost, code, mode, |
21657 | CONSTANT_P (XEXP (x, 1)), |
21658 | CONST_INT_P (XEXP (x, 1)) |
21659 | ? INTVAL (XEXP (x, 1)) : -1, |
21660 | GET_CODE (XEXP (x, 1)) == AND, |
21661 | SUBREG_P (XEXP (x, 1)) |
21662 | && GET_CODE (XEXP (XEXP (x, 1), |
21663 | 0)) == AND, |
21664 | skip_op0: &skip_op0, skip_op1: &skip_op1); |
21665 | if (skip_op0 || skip_op1) |
21666 | { |
21667 | if (!skip_op0) |
21668 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
21669 | if (!skip_op1) |
21670 | *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); |
21671 | return true; |
21672 | } |
21673 | return false; |
21674 | |
21675 | case FMA: |
21676 | { |
21677 | rtx sub; |
21678 | |
21679 | gcc_assert (FLOAT_MODE_P (mode)); |
21680 | gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); |
21681 | |
21682 | *total = ix86_vec_cost (mode, |
21683 | GET_MODE_INNER (mode) == SFmode |
21684 | ? cost->fmass : cost->fmasd); |
21685 | *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); |
21686 | |
21687 | /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ |
21688 | sub = XEXP (x, 0); |
21689 | if (GET_CODE (sub) == NEG) |
21690 | sub = XEXP (sub, 0); |
21691 | *total += rtx_cost (sub, mode, FMA, 0, speed); |
21692 | |
21693 | sub = XEXP (x, 2); |
21694 | if (GET_CODE (sub) == NEG) |
21695 | sub = XEXP (sub, 0); |
21696 | *total += rtx_cost (sub, mode, FMA, 2, speed); |
21697 | return true; |
21698 | } |
21699 | |
21700 | case MULT: |
21701 | if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) |
21702 | { |
21703 | rtx op0 = XEXP (x, 0); |
21704 | rtx op1 = XEXP (x, 1); |
21705 | int nbits; |
21706 | if (CONST_INT_P (XEXP (x, 1))) |
21707 | { |
21708 | unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
21709 | for (nbits = 0; value != 0; value &= value - 1) |
21710 | nbits++; |
21711 | } |
21712 | else |
21713 | /* This is arbitrary. */ |
21714 | nbits = 7; |
21715 | |
21716 | /* Compute costs correctly for widening multiplication. */ |
21717 | if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) |
21718 | && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 |
21719 | == GET_MODE_SIZE (mode)) |
21720 | { |
21721 | int is_mulwiden = 0; |
21722 | machine_mode inner_mode = GET_MODE (op0); |
21723 | |
21724 | if (GET_CODE (op0) == GET_CODE (op1)) |
21725 | is_mulwiden = 1, op1 = XEXP (op1, 0); |
21726 | else if (CONST_INT_P (op1)) |
21727 | { |
21728 | if (GET_CODE (op0) == SIGN_EXTEND) |
21729 | is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) |
21730 | == INTVAL (op1); |
21731 | else |
21732 | is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); |
21733 | } |
21734 | |
21735 | if (is_mulwiden) |
21736 | op0 = XEXP (op0, 0), mode = GET_MODE (op0); |
21737 | } |
21738 | |
21739 | int mult_init; |
21740 | // Double word multiplication requires 3 mults and 2 adds. |
21741 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21742 | { |
21743 | mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)] |
21744 | + 2 * cost->add; |
21745 | nbits *= 3; |
21746 | } |
21747 | else mult_init = cost->mult_init[MODE_INDEX (mode)]; |
21748 | |
21749 | *total = (mult_init |
21750 | + nbits * cost->mult_bit |
21751 | + rtx_cost (op0, mode, outer_code, opno, speed) |
21752 | + rtx_cost (op1, mode, outer_code, opno, speed)); |
21753 | |
21754 | return true; |
21755 | } |
21756 | *total = ix86_multiplication_cost (cost, mode); |
21757 | return false; |
21758 | |
21759 | case DIV: |
21760 | case UDIV: |
21761 | case MOD: |
21762 | case UMOD: |
21763 | *total = ix86_division_cost (cost, mode); |
21764 | return false; |
21765 | |
21766 | case PLUS: |
21767 | if (GET_MODE_CLASS (mode) == MODE_INT |
21768 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
21769 | { |
21770 | if (GET_CODE (XEXP (x, 0)) == PLUS |
21771 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
21772 | && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
21773 | && CONSTANT_P (XEXP (x, 1))) |
21774 | { |
21775 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); |
21776 | if (val == 2 || val == 4 || val == 8) |
21777 | { |
21778 | *total = cost->lea; |
21779 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21780 | outer_code, opno, speed); |
21781 | *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, |
21782 | outer_code, opno, speed); |
21783 | *total += rtx_cost (XEXP (x, 1), mode, |
21784 | outer_code, opno, speed); |
21785 | return true; |
21786 | } |
21787 | } |
21788 | else if (GET_CODE (XEXP (x, 0)) == MULT |
21789 | && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
21790 | { |
21791 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); |
21792 | if (val == 2 || val == 4 || val == 8) |
21793 | { |
21794 | *total = cost->lea; |
21795 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21796 | outer_code, opno, speed); |
21797 | *total += rtx_cost (XEXP (x, 1), mode, |
21798 | outer_code, opno, speed); |
21799 | return true; |
21800 | } |
21801 | } |
21802 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
21803 | { |
21804 | rtx op = XEXP (XEXP (x, 0), 0); |
21805 | |
21806 | /* Add with carry, ignore the cost of adding a carry flag. */ |
21807 | if (ix86_carry_flag_operator (op, mode) |
21808 | || ix86_carry_flag_unset_operator (op, mode)) |
21809 | *total = cost->add; |
21810 | else |
21811 | { |
21812 | *total = cost->lea; |
21813 | *total += rtx_cost (op, mode, |
21814 | outer_code, opno, speed); |
21815 | } |
21816 | |
21817 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21818 | outer_code, opno, speed); |
21819 | *total += rtx_cost (XEXP (x, 1), mode, |
21820 | outer_code, opno, speed); |
21821 | return true; |
21822 | } |
21823 | } |
21824 | /* FALLTHRU */ |
21825 | |
21826 | case MINUS: |
21827 | /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ |
21828 | if (GET_MODE_CLASS (mode) == MODE_INT |
21829 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD |
21830 | && GET_CODE (XEXP (x, 0)) == MINUS |
21831 | && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode) |
21832 | || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode))) |
21833 | { |
21834 | *total = cost->add; |
21835 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21836 | outer_code, opno, speed); |
21837 | *total += rtx_cost (XEXP (x, 1), mode, |
21838 | outer_code, opno, speed); |
21839 | return true; |
21840 | } |
21841 | |
21842 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21843 | *total = cost->addss; |
21844 | else if (X87_FLOAT_MODE_P (mode)) |
21845 | *total = cost->fadd; |
21846 | else if (FLOAT_MODE_P (mode)) |
21847 | *total = ix86_vec_cost (mode, cost: cost->addss); |
21848 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21849 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21850 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21851 | *total = cost->add * 2; |
21852 | else |
21853 | *total = cost->add; |
21854 | return false; |
21855 | |
21856 | case IOR: |
21857 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21858 | || SSE_FLOAT_MODE_P (mode)) |
21859 | { |
21860 | /* (ior (not ...) ...) can be a single insn in AVX512. */ |
21861 | if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F |
21862 | && ((TARGET_EVEX512 |
21863 | && GET_MODE_SIZE (mode) == 64) |
21864 | || (TARGET_AVX512VL |
21865 | && (GET_MODE_SIZE (mode) == 32 |
21866 | || GET_MODE_SIZE (mode) == 16)))) |
21867 | { |
21868 | rtx right = GET_CODE (XEXP (x, 1)) != NOT |
21869 | ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0); |
21870 | |
21871 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21872 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21873 | outer_code, opno, speed) |
21874 | + rtx_cost (right, mode, outer_code, opno, speed); |
21875 | return true; |
21876 | } |
21877 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21878 | } |
21879 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21880 | *total = cost->add * 2; |
21881 | else |
21882 | *total = cost->add; |
21883 | return false; |
21884 | |
21885 | case XOR: |
21886 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21887 | || SSE_FLOAT_MODE_P (mode)) |
21888 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21889 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21890 | *total = cost->add * 2; |
21891 | else |
21892 | *total = cost->add; |
21893 | return false; |
21894 | |
21895 | case AND: |
21896 | if (address_no_seg_operand (x, mode)) |
21897 | { |
21898 | *total = cost->lea; |
21899 | return true; |
21900 | } |
21901 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21902 | || SSE_FLOAT_MODE_P (mode)) |
21903 | { |
21904 | /* pandn is a single instruction. */ |
21905 | if (GET_CODE (XEXP (x, 0)) == NOT) |
21906 | { |
21907 | rtx right = XEXP (x, 1); |
21908 | |
21909 | /* (and (not ...) (not ...)) can be a single insn in AVX512. */ |
21910 | if (GET_CODE (right) == NOT && TARGET_AVX512F |
21911 | && ((TARGET_EVEX512 |
21912 | && GET_MODE_SIZE (mode) == 64) |
21913 | || (TARGET_AVX512VL |
21914 | && (GET_MODE_SIZE (mode) == 32 |
21915 | || GET_MODE_SIZE (mode) == 16)))) |
21916 | right = XEXP (right, 0); |
21917 | |
21918 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21919 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21920 | outer_code, opno, speed) |
21921 | + rtx_cost (right, mode, outer_code, opno, speed); |
21922 | return true; |
21923 | } |
21924 | else if (GET_CODE (XEXP (x, 1)) == NOT) |
21925 | { |
21926 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21927 | + rtx_cost (XEXP (x, 0), mode, |
21928 | outer_code, opno, speed) |
21929 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21930 | outer_code, opno, speed); |
21931 | return true; |
21932 | } |
21933 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21934 | } |
21935 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21936 | { |
21937 | if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
21938 | { |
21939 | *total = cost->add * 2 |
21940 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21941 | outer_code, opno, speed) |
21942 | + rtx_cost (XEXP (x, 1), mode, |
21943 | outer_code, opno, speed); |
21944 | return true; |
21945 | } |
21946 | else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT) |
21947 | { |
21948 | *total = cost->add * 2 |
21949 | + rtx_cost (XEXP (x, 0), mode, |
21950 | outer_code, opno, speed) |
21951 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21952 | outer_code, opno, speed); |
21953 | return true; |
21954 | } |
21955 | *total = cost->add * 2; |
21956 | } |
21957 | else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
21958 | { |
21959 | *total = cost->add |
21960 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21961 | outer_code, opno, speed) |
21962 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
21963 | return true; |
21964 | } |
21965 | else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT) |
21966 | { |
21967 | *total = cost->add |
21968 | + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
21969 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21970 | outer_code, opno, speed); |
21971 | return true; |
21972 | } |
21973 | else |
21974 | *total = cost->add; |
21975 | return false; |
21976 | |
21977 | case NOT: |
21978 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21979 | { |
21980 | /* (not (xor ...)) can be a single insn in AVX512. */ |
21981 | if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F |
21982 | && ((TARGET_EVEX512 |
21983 | && GET_MODE_SIZE (mode) == 64) |
21984 | || (TARGET_AVX512VL |
21985 | && (GET_MODE_SIZE (mode) == 32 |
21986 | || GET_MODE_SIZE (mode) == 16)))) |
21987 | { |
21988 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21989 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21990 | outer_code, opno, speed) |
21991 | + rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21992 | outer_code, opno, speed); |
21993 | return true; |
21994 | } |
21995 | |
21996 | // vnot is pxor -1. |
21997 | *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1; |
21998 | } |
21999 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22000 | *total = cost->add * 2; |
22001 | else |
22002 | *total = cost->add; |
22003 | return false; |
22004 | |
22005 | case NEG: |
22006 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22007 | *total = cost->sse_op; |
22008 | else if (X87_FLOAT_MODE_P (mode)) |
22009 | *total = cost->fchs; |
22010 | else if (FLOAT_MODE_P (mode)) |
22011 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22012 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
22013 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22014 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22015 | *total = cost->add * 3; |
22016 | else |
22017 | *total = cost->add; |
22018 | return false; |
22019 | |
22020 | case COMPARE: |
22021 | rtx op0, op1; |
22022 | op0 = XEXP (x, 0); |
22023 | op1 = XEXP (x, 1); |
22024 | if (GET_CODE (op0) == ZERO_EXTRACT |
22025 | && XEXP (op0, 1) == const1_rtx |
22026 | && CONST_INT_P (XEXP (op0, 2)) |
22027 | && op1 == const0_rtx) |
22028 | { |
22029 | /* This kind of construct is implemented using test[bwl]. |
22030 | Treat it as if we had an AND. */ |
22031 | mode = GET_MODE (XEXP (op0, 0)); |
22032 | *total = (cost->add |
22033 | + rtx_cost (XEXP (op0, 0), mode, outer_code, |
22034 | opno, speed) |
22035 | + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); |
22036 | return true; |
22037 | } |
22038 | |
22039 | if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1)) |
22040 | { |
22041 | /* This is an overflow detection, count it as a normal compare. */ |
22042 | *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed); |
22043 | return true; |
22044 | } |
22045 | |
22046 | rtx geu; |
22047 | /* Match x |
22048 | (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
22049 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */ |
22050 | if (mode == CCCmode |
22051 | && GET_CODE (op0) == NEG |
22052 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
22053 | && REG_P (XEXP (geu, 0)) |
22054 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
22055 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
22056 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
22057 | && XEXP (geu, 1) == const0_rtx |
22058 | && GET_CODE (op1) == LTU |
22059 | && REG_P (XEXP (op1, 0)) |
22060 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
22061 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
22062 | && XEXP (op1, 1) == const0_rtx) |
22063 | { |
22064 | /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */ |
22065 | *total = 0; |
22066 | return true; |
22067 | } |
22068 | /* Match x |
22069 | (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
22070 | (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */ |
22071 | if (mode == CCCmode |
22072 | && GET_CODE (op0) == NEG |
22073 | && GET_CODE (XEXP (op0, 0)) == LTU |
22074 | && REG_P (XEXP (XEXP (op0, 0), 0)) |
22075 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
22076 | && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG |
22077 | && XEXP (XEXP (op0, 0), 1) == const0_rtx |
22078 | && GET_CODE (op1) == GEU |
22079 | && REG_P (XEXP (op1, 0)) |
22080 | && GET_MODE (XEXP (op1, 0)) == CCCmode |
22081 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
22082 | && XEXP (op1, 1) == const0_rtx) |
22083 | { |
22084 | /* This is *x86_cmc. */ |
22085 | if (!speed) |
22086 | *total = COSTS_N_BYTES (1); |
22087 | else if (TARGET_SLOW_STC) |
22088 | *total = COSTS_N_INSNS (2); |
22089 | else |
22090 | *total = COSTS_N_INSNS (1); |
22091 | return true; |
22092 | } |
22093 | |
22094 | if (SCALAR_INT_MODE_P (GET_MODE (op0)) |
22095 | && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
22096 | { |
22097 | if (op1 == const0_rtx) |
22098 | *total = cost->add |
22099 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed); |
22100 | else |
22101 | *total = 3*cost->add |
22102 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed) |
22103 | + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed); |
22104 | return true; |
22105 | } |
22106 | |
22107 | /* The embedded comparison operand is completely free. */ |
22108 | if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx) |
22109 | *total = 0; |
22110 | |
22111 | return false; |
22112 | |
22113 | case FLOAT_EXTEND: |
22114 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22115 | *total = 0; |
22116 | else |
22117 | *total = ix86_vec_cost (mode, cost: cost->addss); |
22118 | return false; |
22119 | |
22120 | case FLOAT_TRUNCATE: |
22121 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22122 | *total = cost->fadd; |
22123 | else |
22124 | *total = ix86_vec_cost (mode, cost: cost->addss); |
22125 | return false; |
22126 | |
22127 | case ABS: |
22128 | /* SSE requires memory load for the constant operand. It may make |
22129 | sense to account for this. Of course the constant operand may or |
22130 | may not be reused. */ |
22131 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22132 | *total = cost->sse_op; |
22133 | else if (X87_FLOAT_MODE_P (mode)) |
22134 | *total = cost->fabs; |
22135 | else if (FLOAT_MODE_P (mode)) |
22136 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22137 | return false; |
22138 | |
22139 | case SQRT: |
22140 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22141 | *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; |
22142 | else if (X87_FLOAT_MODE_P (mode)) |
22143 | *total = cost->fsqrt; |
22144 | else if (FLOAT_MODE_P (mode)) |
22145 | *total = ix86_vec_cost (mode, |
22146 | cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd); |
22147 | return false; |
22148 | |
22149 | case UNSPEC: |
22150 | if (XINT (x, 1) == UNSPEC_TP) |
22151 | *total = 0; |
22152 | else if (XINT (x, 1) == UNSPEC_VTERNLOG) |
22153 | { |
22154 | *total = cost->sse_op; |
22155 | return true; |
22156 | } |
22157 | else if (XINT (x, 1) == UNSPEC_PTEST) |
22158 | { |
22159 | *total = cost->sse_op; |
22160 | rtx test_op0 = XVECEXP (x, 0, 0); |
22161 | if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) |
22162 | return false; |
22163 | if (GET_CODE (test_op0) == AND) |
22164 | { |
22165 | rtx and_op0 = XEXP (test_op0, 0); |
22166 | if (GET_CODE (and_op0) == NOT) |
22167 | and_op0 = XEXP (and_op0, 0); |
22168 | *total += rtx_cost (and_op0, GET_MODE (and_op0), |
22169 | AND, 0, speed) |
22170 | + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), |
22171 | AND, 1, speed); |
22172 | } |
22173 | else |
22174 | *total = rtx_cost (test_op0, GET_MODE (test_op0), |
22175 | UNSPEC, 0, speed); |
22176 | return true; |
22177 | } |
22178 | return false; |
22179 | |
22180 | case VEC_SELECT: |
22181 | case VEC_CONCAT: |
22182 | case VEC_DUPLICATE: |
22183 | /* ??? Assume all of these vector manipulation patterns are |
22184 | recognizable. In which case they all pretty much have the |
22185 | same cost. */ |
22186 | *total = cost->sse_op; |
22187 | return true; |
22188 | case VEC_MERGE: |
22189 | mask = XEXP (x, 2); |
22190 | /* This is masked instruction, assume the same cost, |
22191 | as nonmasked variant. */ |
22192 | if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) |
22193 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); |
22194 | else |
22195 | *total = cost->sse_op; |
22196 | return true; |
22197 | |
22198 | case MEM: |
22199 | /* An insn that accesses memory is slightly more expensive |
22200 | than one that does not. */ |
22201 | if (speed) |
22202 | *total += 1; |
22203 | return false; |
22204 | |
22205 | case ZERO_EXTRACT: |
22206 | if (XEXP (x, 1) == const1_rtx |
22207 | && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND |
22208 | && GET_MODE (XEXP (x, 2)) == SImode |
22209 | && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode) |
22210 | { |
22211 | /* Ignore cost of zero extension and masking of last argument. */ |
22212 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22213 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22214 | *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed); |
22215 | return true; |
22216 | } |
22217 | return false; |
22218 | |
22219 | case IF_THEN_ELSE: |
22220 | if (TARGET_XOP |
22221 | && VECTOR_MODE_P (mode) |
22222 | && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32)) |
22223 | { |
22224 | /* vpcmov. */ |
22225 | *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6); |
22226 | if (!REG_P (XEXP (x, 0))) |
22227 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22228 | if (!REG_P (XEXP (x, 1))) |
22229 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22230 | if (!REG_P (XEXP (x, 2))) |
22231 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
22232 | return true; |
22233 | } |
22234 | else if (TARGET_CMOVE |
22235 | && SCALAR_INT_MODE_P (mode) |
22236 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
22237 | { |
22238 | /* cmov. */ |
22239 | *total = COSTS_N_INSNS (1); |
22240 | if (!REG_P (XEXP (x, 0))) |
22241 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22242 | if (!REG_P (XEXP (x, 1))) |
22243 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22244 | if (!REG_P (XEXP (x, 2))) |
22245 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
22246 | return true; |
22247 | } |
22248 | return false; |
22249 | |
22250 | default: |
22251 | return false; |
22252 | } |
22253 | } |
22254 | |
22255 | #if TARGET_MACHO |
22256 | |
22257 | static int current_machopic_label_num; |
22258 | |
22259 | /* Given a symbol name and its associated stub, write out the |
22260 | definition of the stub. */ |
22261 | |
22262 | void |
22263 | machopic_output_stub (FILE *file, const char *symb, const char *stub) |
22264 | { |
22265 | unsigned int length; |
22266 | char *binder_name, *symbol_name, lazy_ptr_name[32]; |
22267 | int label = ++current_machopic_label_num; |
22268 | |
22269 | /* For 64-bit we shouldn't get here. */ |
22270 | gcc_assert (!TARGET_64BIT); |
22271 | |
22272 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ |
22273 | symb = targetm.strip_name_encoding (symb); |
22274 | |
22275 | length = strlen (stub); |
22276 | binder_name = XALLOCAVEC (char, length + 32); |
22277 | GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); |
22278 | |
22279 | length = strlen (symb); |
22280 | symbol_name = XALLOCAVEC (char, length + 32); |
22281 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); |
22282 | |
22283 | sprintf (lazy_ptr_name, "L%d$lz" , label); |
22284 | |
22285 | if (MACHOPIC_ATT_STUB) |
22286 | switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); |
22287 | else if (MACHOPIC_PURE) |
22288 | switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); |
22289 | else |
22290 | switch_to_section (darwin_sections[machopic_symbol_stub_section]); |
22291 | |
22292 | fprintf (file, "%s:\n" , stub); |
22293 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
22294 | |
22295 | if (MACHOPIC_ATT_STUB) |
22296 | { |
22297 | fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n" ); |
22298 | } |
22299 | else if (MACHOPIC_PURE) |
22300 | { |
22301 | /* PIC stub. */ |
22302 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
22303 | rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
22304 | output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ |
22305 | fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n" , |
22306 | label, lazy_ptr_name, label); |
22307 | fprintf (file, "\tjmp\t*%%ecx\n" ); |
22308 | } |
22309 | else |
22310 | fprintf (file, "\tjmp\t*%s\n" , lazy_ptr_name); |
22311 | |
22312 | /* The AT&T-style ("self-modifying") stub is not lazily bound, thus |
22313 | it needs no stub-binding-helper. */ |
22314 | if (MACHOPIC_ATT_STUB) |
22315 | return; |
22316 | |
22317 | fprintf (file, "%s:\n" , binder_name); |
22318 | |
22319 | if (MACHOPIC_PURE) |
22320 | { |
22321 | fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n" , lazy_ptr_name, binder_name); |
22322 | fprintf (file, "\tpushl\t%%ecx\n" ); |
22323 | } |
22324 | else |
22325 | fprintf (file, "\tpushl\t$%s\n" , lazy_ptr_name); |
22326 | |
22327 | fputs ("\tjmp\tdyld_stub_binding_helper\n" , file); |
22328 | |
22329 | /* N.B. Keep the correspondence of these |
22330 | 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the |
22331 | old-pic/new-pic/non-pic stubs; altering this will break |
22332 | compatibility with existing dylibs. */ |
22333 | if (MACHOPIC_PURE) |
22334 | { |
22335 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
22336 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); |
22337 | } |
22338 | else |
22339 | /* 16-byte -mdynamic-no-pic stub. */ |
22340 | switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); |
22341 | |
22342 | fprintf (file, "%s:\n" , lazy_ptr_name); |
22343 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
22344 | fprintf (file, ASM_LONG "%s\n" , binder_name); |
22345 | } |
22346 | #endif /* TARGET_MACHO */ |
22347 | |
22348 | /* Order the registers for register allocator. */ |
22349 | |
22350 | void |
22351 | x86_order_regs_for_local_alloc (void) |
22352 | { |
22353 | int pos = 0; |
22354 | int i; |
22355 | |
22356 | /* First allocate the local general purpose registers. */ |
22357 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
22358 | if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i)) |
22359 | reg_alloc_order [pos++] = i; |
22360 | |
22361 | /* Global general purpose registers. */ |
22362 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
22363 | if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i)) |
22364 | reg_alloc_order [pos++] = i; |
22365 | |
22366 | /* x87 registers come first in case we are doing FP math |
22367 | using them. */ |
22368 | if (!TARGET_SSE_MATH) |
22369 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
22370 | reg_alloc_order [pos++] = i; |
22371 | |
22372 | /* SSE registers. */ |
22373 | for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) |
22374 | reg_alloc_order [pos++] = i; |
22375 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
22376 | reg_alloc_order [pos++] = i; |
22377 | |
22378 | /* Extended REX SSE registers. */ |
22379 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
22380 | reg_alloc_order [pos++] = i; |
22381 | |
22382 | /* Mask register. */ |
22383 | for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) |
22384 | reg_alloc_order [pos++] = i; |
22385 | |
22386 | /* x87 registers. */ |
22387 | if (TARGET_SSE_MATH) |
22388 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
22389 | reg_alloc_order [pos++] = i; |
22390 | |
22391 | for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) |
22392 | reg_alloc_order [pos++] = i; |
22393 | |
22394 | /* Initialize the rest of array as we do not allocate some registers |
22395 | at all. */ |
22396 | while (pos < FIRST_PSEUDO_REGISTER) |
22397 | reg_alloc_order [pos++] = 0; |
22398 | } |
22399 | |
22400 | static bool |
22401 | ix86_ms_bitfield_layout_p (const_tree record_type) |
22402 | { |
22403 | return ((TARGET_MS_BITFIELD_LAYOUT |
22404 | && !lookup_attribute (attr_name: "gcc_struct" , TYPE_ATTRIBUTES (record_type))) |
22405 | || lookup_attribute (attr_name: "ms_struct" , TYPE_ATTRIBUTES (record_type))); |
22406 | } |
22407 | |
22408 | /* Returns an expression indicating where the this parameter is |
22409 | located on entry to the FUNCTION. */ |
22410 | |
22411 | static rtx |
22412 | x86_this_parameter (tree function) |
22413 | { |
22414 | tree type = TREE_TYPE (function); |
22415 | bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; |
22416 | int nregs; |
22417 | |
22418 | if (TARGET_64BIT) |
22419 | { |
22420 | const int *parm_regs; |
22421 | |
22422 | if (ix86_function_type_abi (fntype: type) == MS_ABI) |
22423 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
22424 | else |
22425 | parm_regs = x86_64_int_parameter_registers; |
22426 | return gen_rtx_REG (Pmode, parm_regs[aggr]); |
22427 | } |
22428 | |
22429 | nregs = ix86_function_regparm (type, decl: function); |
22430 | |
22431 | if (nregs > 0 && !stdarg_p (type)) |
22432 | { |
22433 | int regno; |
22434 | unsigned int ccvt = ix86_get_callcvt (type); |
22435 | |
22436 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
22437 | regno = aggr ? DX_REG : CX_REG; |
22438 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
22439 | { |
22440 | regno = CX_REG; |
22441 | if (aggr) |
22442 | return gen_rtx_MEM (SImode, |
22443 | plus_constant (Pmode, stack_pointer_rtx, 4)); |
22444 | } |
22445 | else |
22446 | { |
22447 | regno = AX_REG; |
22448 | if (aggr) |
22449 | { |
22450 | regno = DX_REG; |
22451 | if (nregs == 1) |
22452 | return gen_rtx_MEM (SImode, |
22453 | plus_constant (Pmode, |
22454 | stack_pointer_rtx, 4)); |
22455 | } |
22456 | } |
22457 | return gen_rtx_REG (SImode, regno); |
22458 | } |
22459 | |
22460 | return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, |
22461 | aggr ? 8 : 4)); |
22462 | } |
22463 | |
22464 | /* Determine whether x86_output_mi_thunk can succeed. */ |
22465 | |
22466 | static bool |
22467 | x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, |
22468 | const_tree function) |
22469 | { |
22470 | /* 64-bit can handle anything. */ |
22471 | if (TARGET_64BIT) |
22472 | return true; |
22473 | |
22474 | /* For 32-bit, everything's fine if we have one free register. */ |
22475 | if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3) |
22476 | return true; |
22477 | |
22478 | /* Need a free register for vcall_offset. */ |
22479 | if (vcall_offset) |
22480 | return false; |
22481 | |
22482 | /* Need a free register for GOT references. */ |
22483 | if (flag_pic && !targetm.binds_local_p (function)) |
22484 | return false; |
22485 | |
22486 | /* Otherwise ok. */ |
22487 | return true; |
22488 | } |
22489 | |
22490 | /* Output the assembler code for a thunk function. THUNK_DECL is the |
22491 | declaration for the thunk function itself, FUNCTION is the decl for |
22492 | the target function. DELTA is an immediate constant offset to be |
22493 | added to THIS. If VCALL_OFFSET is nonzero, the word at |
22494 | *(*this + vcall_offset) should be added to THIS. */ |
22495 | |
22496 | static void |
22497 | x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, |
22498 | HOST_WIDE_INT vcall_offset, tree function) |
22499 | { |
22500 | const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); |
22501 | rtx this_param = x86_this_parameter (function); |
22502 | rtx this_reg, tmp, fnaddr; |
22503 | unsigned int tmp_regno; |
22504 | rtx_insn *insn; |
22505 | int saved_flag_force_indirect_call = flag_force_indirect_call; |
22506 | |
22507 | if (TARGET_64BIT) |
22508 | tmp_regno = R10_REG; |
22509 | else |
22510 | { |
22511 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); |
22512 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
22513 | tmp_regno = AX_REG; |
22514 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
22515 | tmp_regno = DX_REG; |
22516 | else |
22517 | tmp_regno = CX_REG; |
22518 | |
22519 | if (flag_pic) |
22520 | flag_force_indirect_call = 0; |
22521 | } |
22522 | |
22523 | emit_note (NOTE_INSN_PROLOGUE_END); |
22524 | |
22525 | /* CET is enabled, insert EB instruction. */ |
22526 | if ((flag_cf_protection & CF_BRANCH)) |
22527 | emit_insn (gen_nop_endbr ()); |
22528 | |
22529 | /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
22530 | pull it in now and let DELTA benefit. */ |
22531 | if (REG_P (this_param)) |
22532 | this_reg = this_param; |
22533 | else if (vcall_offset) |
22534 | { |
22535 | /* Put the this parameter into %eax. */ |
22536 | this_reg = gen_rtx_REG (Pmode, AX_REG); |
22537 | emit_move_insn (this_reg, this_param); |
22538 | } |
22539 | else |
22540 | this_reg = NULL_RTX; |
22541 | |
22542 | /* Adjust the this parameter by a fixed constant. */ |
22543 | if (delta) |
22544 | { |
22545 | rtx delta_rtx = GEN_INT (delta); |
22546 | rtx delta_dst = this_reg ? this_reg : this_param; |
22547 | |
22548 | if (TARGET_64BIT) |
22549 | { |
22550 | if (!x86_64_general_operand (delta_rtx, Pmode)) |
22551 | { |
22552 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
22553 | emit_move_insn (tmp, delta_rtx); |
22554 | delta_rtx = tmp; |
22555 | } |
22556 | } |
22557 | |
22558 | ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx); |
22559 | } |
22560 | |
22561 | /* Adjust the this parameter by a value stored in the vtable. */ |
22562 | if (vcall_offset) |
22563 | { |
22564 | rtx vcall_addr, vcall_mem, this_mem; |
22565 | |
22566 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
22567 | |
22568 | this_mem = gen_rtx_MEM (ptr_mode, this_reg); |
22569 | if (Pmode != ptr_mode) |
22570 | this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); |
22571 | emit_move_insn (tmp, this_mem); |
22572 | |
22573 | /* Adjust the this parameter. */ |
22574 | vcall_addr = plus_constant (Pmode, tmp, vcall_offset); |
22575 | if (TARGET_64BIT |
22576 | && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true)) |
22577 | { |
22578 | rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); |
22579 | emit_move_insn (tmp2, GEN_INT (vcall_offset)); |
22580 | vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); |
22581 | } |
22582 | |
22583 | vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); |
22584 | if (Pmode != ptr_mode) |
22585 | emit_insn (gen_addsi_1_zext (this_reg, |
22586 | gen_rtx_REG (ptr_mode, |
22587 | REGNO (this_reg)), |
22588 | vcall_mem)); |
22589 | else |
22590 | ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem); |
22591 | } |
22592 | |
22593 | /* If necessary, drop THIS back to its stack slot. */ |
22594 | if (this_reg && this_reg != this_param) |
22595 | emit_move_insn (this_param, this_reg); |
22596 | |
22597 | fnaddr = XEXP (DECL_RTL (function), 0); |
22598 | if (TARGET_64BIT) |
22599 | { |
22600 | if (!flag_pic || targetm.binds_local_p (function) |
22601 | || TARGET_PECOFF) |
22602 | ; |
22603 | else |
22604 | { |
22605 | tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); |
22606 | tmp = gen_rtx_CONST (Pmode, tmp); |
22607 | fnaddr = gen_const_mem (Pmode, tmp); |
22608 | } |
22609 | } |
22610 | else |
22611 | { |
22612 | if (!flag_pic || targetm.binds_local_p (function)) |
22613 | ; |
22614 | #if TARGET_MACHO |
22615 | else if (TARGET_MACHO) |
22616 | { |
22617 | fnaddr = machopic_indirect_call_target (DECL_RTL (function)); |
22618 | fnaddr = XEXP (fnaddr, 0); |
22619 | } |
22620 | #endif /* TARGET_MACHO */ |
22621 | else |
22622 | { |
22623 | tmp = gen_rtx_REG (Pmode, CX_REG); |
22624 | output_set_got (dest: tmp, NULL_RTX); |
22625 | |
22626 | fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); |
22627 | fnaddr = gen_rtx_CONST (Pmode, fnaddr); |
22628 | fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); |
22629 | fnaddr = gen_const_mem (Pmode, fnaddr); |
22630 | } |
22631 | } |
22632 | |
22633 | /* Our sibling call patterns do not allow memories, because we have no |
22634 | predicate that can distinguish between frame and non-frame memory. |
22635 | For our purposes here, we can get away with (ab)using a jump pattern, |
22636 | because we're going to do no optimization. */ |
22637 | if (MEM_P (fnaddr)) |
22638 | { |
22639 | if (sibcall_insn_operand (fnaddr, word_mode)) |
22640 | { |
22641 | fnaddr = XEXP (DECL_RTL (function), 0); |
22642 | tmp = gen_rtx_MEM (QImode, fnaddr); |
22643 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
22644 | tmp = emit_call_insn (tmp); |
22645 | SIBLING_CALL_P (tmp) = 1; |
22646 | } |
22647 | else |
22648 | emit_jump_insn (gen_indirect_jump (fnaddr)); |
22649 | } |
22650 | else |
22651 | { |
22652 | if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) |
22653 | { |
22654 | // CM_LARGE_PIC always uses pseudo PIC register which is |
22655 | // uninitialized. Since FUNCTION is local and calling it |
22656 | // doesn't go through PLT, we use scratch register %r11 as |
22657 | // PIC register and initialize it here. |
22658 | pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); |
22659 | ix86_init_large_pic_reg (tmp_regno); |
22660 | fnaddr = legitimize_pic_address (orig: fnaddr, |
22661 | reg: gen_rtx_REG (Pmode, tmp_regno)); |
22662 | } |
22663 | |
22664 | if (!sibcall_insn_operand (fnaddr, word_mode)) |
22665 | { |
22666 | tmp = gen_rtx_REG (word_mode, tmp_regno); |
22667 | if (GET_MODE (fnaddr) != word_mode) |
22668 | fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); |
22669 | emit_move_insn (tmp, fnaddr); |
22670 | fnaddr = tmp; |
22671 | } |
22672 | |
22673 | tmp = gen_rtx_MEM (QImode, fnaddr); |
22674 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
22675 | tmp = emit_call_insn (tmp); |
22676 | SIBLING_CALL_P (tmp) = 1; |
22677 | } |
22678 | emit_barrier (); |
22679 | |
22680 | /* Emit just enough of rest_of_compilation to get the insns emitted. */ |
22681 | insn = get_insns (); |
22682 | shorten_branches (insn); |
22683 | assemble_start_function (thunk_fndecl, fnname); |
22684 | final_start_function (insn, file, 1); |
22685 | final (insn, file, 1); |
22686 | final_end_function (); |
22687 | assemble_end_function (thunk_fndecl, fnname); |
22688 | |
22689 | flag_force_indirect_call = saved_flag_force_indirect_call; |
22690 | } |
22691 | |
22692 | static void |
22693 | x86_file_start (void) |
22694 | { |
22695 | default_file_start (); |
22696 | if (TARGET_16BIT) |
22697 | fputs (s: "\t.code16gcc\n" , stream: asm_out_file); |
22698 | #if TARGET_MACHO |
22699 | darwin_file_start (); |
22700 | #endif |
22701 | if (X86_FILE_START_VERSION_DIRECTIVE) |
22702 | fputs (s: "\t.version\t\"01.01\"\n" , stream: asm_out_file); |
22703 | if (X86_FILE_START_FLTUSED) |
22704 | fputs (s: "\t.global\t__fltused\n" , stream: asm_out_file); |
22705 | if (ix86_asm_dialect == ASM_INTEL) |
22706 | fputs (s: "\t.intel_syntax noprefix\n" , stream: asm_out_file); |
22707 | } |
22708 | |
22709 | int |
22710 | x86_field_alignment (tree type, int computed) |
22711 | { |
22712 | machine_mode mode; |
22713 | |
22714 | if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) |
22715 | return computed; |
22716 | if (TARGET_IAMCU) |
22717 | return iamcu_alignment (type, align: computed); |
22718 | type = strip_array_types (type); |
22719 | mode = TYPE_MODE (type); |
22720 | if (mode == DFmode || mode == DCmode |
22721 | || GET_MODE_CLASS (mode) == MODE_INT |
22722 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) |
22723 | { |
22724 | if (TYPE_ATOMIC (type) && computed > 32) |
22725 | { |
22726 | static bool warned; |
22727 | |
22728 | if (!warned && warn_psabi) |
22729 | { |
22730 | const char *url |
22731 | = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic" ; |
22732 | |
22733 | warned = true; |
22734 | inform (input_location, "the alignment of %<_Atomic %T%> " |
22735 | "fields changed in %{GCC 11.1%}" , |
22736 | TYPE_MAIN_VARIANT (type), url); |
22737 | } |
22738 | } |
22739 | else |
22740 | return MIN (32, computed); |
22741 | } |
22742 | return computed; |
22743 | } |
22744 | |
22745 | /* Print call to TARGET to FILE. */ |
22746 | |
22747 | static void |
22748 | x86_print_call_or_nop (FILE *file, const char *target) |
22749 | { |
22750 | if (flag_nop_mcount || !strcmp (s1: target, s2: "nop" )) |
22751 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
22752 | fprintf (stream: file, format: "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" ); |
22753 | else |
22754 | fprintf (stream: file, format: "1:\tcall\t%s\n" , target); |
22755 | } |
22756 | |
22757 | static bool |
22758 | current_fentry_name (const char **name) |
22759 | { |
22760 | tree attr = lookup_attribute (attr_name: "fentry_name" , |
22761 | DECL_ATTRIBUTES (current_function_decl)); |
22762 | if (!attr) |
22763 | return false; |
22764 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
22765 | return true; |
22766 | } |
22767 | |
22768 | static bool |
22769 | current_fentry_section (const char **name) |
22770 | { |
22771 | tree attr = lookup_attribute (attr_name: "fentry_section" , |
22772 | DECL_ATTRIBUTES (current_function_decl)); |
22773 | if (!attr) |
22774 | return false; |
22775 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
22776 | return true; |
22777 | } |
22778 | |
22779 | /* Return a caller-saved register which isn't live or a callee-saved |
22780 | register which has been saved on stack in the prologue at entry for |
22781 | profile. */ |
22782 | |
22783 | static int |
22784 | x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED) |
22785 | { |
22786 | /* Use %r10 if the profiler is emitted before the prologue or it isn't |
22787 | used by DRAP. */ |
22788 | if (ix86_profile_before_prologue () |
22789 | || !crtl->drap_reg |
22790 | || REGNO (crtl->drap_reg) != R10_REG) |
22791 | return R10_REG; |
22792 | |
22793 | /* The profiler is emitted after the prologue. If there is a |
22794 | caller-saved register which isn't live or a callee-saved |
22795 | register saved on stack in the prologue, use it. */ |
22796 | |
22797 | bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
22798 | |
22799 | int i; |
22800 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
22801 | if (GENERAL_REGNO_P (i) |
22802 | && i != R10_REG |
22803 | #ifdef NO_PROFILE_COUNTERS |
22804 | && (r11_ok || i != R11_REG) |
22805 | #else |
22806 | && i != R11_REG |
22807 | #endif |
22808 | && TEST_HARD_REG_BIT (accessible_reg_set, bit: i) |
22809 | && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true) |
22810 | || (call_used_regs[i] |
22811 | && !fixed_regs[i] |
22812 | && !REGNO_REG_SET_P (reg_live, i)))) |
22813 | return i; |
22814 | |
22815 | sorry ("no register available for profiling %<-mcmodel=large%s%>" , |
22816 | ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "" ); |
22817 | |
22818 | return R10_REG; |
22819 | } |
22820 | |
22821 | /* Output assembler code to FILE to increment profiler label # LABELNO |
22822 | for profiling a function entry. */ |
22823 | void |
22824 | x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
22825 | { |
22826 | if (cfun->machine->insn_queued_at_entrance) |
22827 | { |
22828 | if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR) |
22829 | fprintf (stream: file, format: "\t%s\n" , TARGET_64BIT ? "endbr64" : "endbr32" ); |
22830 | unsigned int patch_area_size |
22831 | = crtl->patch_area_size - crtl->patch_area_entry; |
22832 | if (patch_area_size) |
22833 | ix86_output_patchable_area (patch_area_size, |
22834 | crtl->patch_area_entry == 0); |
22835 | } |
22836 | |
22837 | const char *mcount_name = MCOUNT_NAME; |
22838 | |
22839 | if (current_fentry_name (name: &mcount_name)) |
22840 | ; |
22841 | else if (fentry_name) |
22842 | mcount_name = fentry_name; |
22843 | else if (flag_fentry) |
22844 | mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; |
22845 | |
22846 | if (TARGET_64BIT) |
22847 | { |
22848 | #ifndef NO_PROFILE_COUNTERS |
22849 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22850 | fprintf (file, "\tlea\tr11, %sP%d[rip]\n" , LPREFIX, labelno); |
22851 | else |
22852 | fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n" , LPREFIX, labelno); |
22853 | #endif |
22854 | |
22855 | int scratch; |
22856 | const char *reg; |
22857 | char legacy_reg[4] = { 0 }; |
22858 | |
22859 | if (!TARGET_PECOFF) |
22860 | { |
22861 | switch (ix86_cmodel) |
22862 | { |
22863 | case CM_LARGE: |
22864 | scratch = x86_64_select_profile_regnum (r11_ok: true); |
22865 | reg = hi_reg_name[scratch]; |
22866 | if (LEGACY_INT_REGNO_P (scratch)) |
22867 | { |
22868 | legacy_reg[0] = 'r'; |
22869 | legacy_reg[1] = reg[0]; |
22870 | legacy_reg[2] = reg[1]; |
22871 | reg = legacy_reg; |
22872 | } |
22873 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22874 | fprintf (stream: file, format: "1:\tmovabs\t%s, OFFSET FLAT:%s\n" |
22875 | "\tcall\t%s\n" , reg, mcount_name, reg); |
22876 | else |
22877 | fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n" , |
22878 | mcount_name, reg, reg); |
22879 | break; |
22880 | case CM_LARGE_PIC: |
22881 | #ifdef NO_PROFILE_COUNTERS |
22882 | scratch = x86_64_select_profile_regnum (r11_ok: false); |
22883 | reg = hi_reg_name[scratch]; |
22884 | if (LEGACY_INT_REGNO_P (scratch)) |
22885 | { |
22886 | legacy_reg[0] = 'r'; |
22887 | legacy_reg[1] = reg[0]; |
22888 | legacy_reg[2] = reg[1]; |
22889 | reg = legacy_reg; |
22890 | } |
22891 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22892 | { |
22893 | fprintf (stream: file, format: "1:movabs\tr11, " |
22894 | "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n" ); |
22895 | fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n" , reg); |
22896 | fprintf (stream: file, format: "\tadd\t%s, r11\n" , reg); |
22897 | fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n" , |
22898 | mcount_name); |
22899 | fprintf (stream: file, format: "\tadd\t%s, r11\n" , reg); |
22900 | fprintf (stream: file, format: "\tcall\t%s\n" , reg); |
22901 | break; |
22902 | } |
22903 | fprintf (stream: file, |
22904 | format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n" ); |
22905 | fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n" , reg); |
22906 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n" , reg); |
22907 | fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n" , mcount_name); |
22908 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n" , reg); |
22909 | fprintf (stream: file, format: "\tcall\t*%%%s\n" , reg); |
22910 | #else |
22911 | sorry ("profiling %<-mcmodel=large%> with PIC is not supported" ); |
22912 | #endif |
22913 | break; |
22914 | case CM_SMALL_PIC: |
22915 | case CM_MEDIUM_PIC: |
22916 | if (!ix86_direct_extern_access) |
22917 | { |
22918 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22919 | fprintf (stream: file, format: "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n" , |
22920 | mcount_name); |
22921 | else |
22922 | fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n" , |
22923 | mcount_name); |
22924 | break; |
22925 | } |
22926 | /* fall through */ |
22927 | default: |
22928 | x86_print_call_or_nop (file, target: mcount_name); |
22929 | break; |
22930 | } |
22931 | } |
22932 | else |
22933 | x86_print_call_or_nop (file, target: mcount_name); |
22934 | } |
22935 | else if (flag_pic) |
22936 | { |
22937 | #ifndef NO_PROFILE_COUNTERS |
22938 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22939 | fprintf (file, |
22940 | "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n" , |
22941 | LPREFIX, labelno); |
22942 | else |
22943 | fprintf (file, |
22944 | "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n" , |
22945 | LPREFIX, labelno); |
22946 | #endif |
22947 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22948 | fprintf (stream: file, format: "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n" , mcount_name); |
22949 | else |
22950 | fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n" , mcount_name); |
22951 | } |
22952 | else |
22953 | { |
22954 | #ifndef NO_PROFILE_COUNTERS |
22955 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
22956 | fprintf (file, |
22957 | "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n" , |
22958 | LPREFIX, labelno); |
22959 | else |
22960 | fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n" , |
22961 | LPREFIX, labelno); |
22962 | #endif |
22963 | x86_print_call_or_nop (file, target: mcount_name); |
22964 | } |
22965 | |
22966 | if (flag_record_mcount |
22967 | || lookup_attribute (attr_name: "fentry_section" , |
22968 | DECL_ATTRIBUTES (current_function_decl))) |
22969 | { |
22970 | const char *sname = "__mcount_loc" ; |
22971 | |
22972 | if (current_fentry_section (name: &sname)) |
22973 | ; |
22974 | else if (fentry_section) |
22975 | sname = fentry_section; |
22976 | |
22977 | fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n" , sname); |
22978 | fprintf (stream: file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
22979 | fprintf (stream: file, format: "\t.previous\n" ); |
22980 | } |
22981 | } |
22982 | |
22983 | /* We don't have exact information about the insn sizes, but we may assume |
22984 | quite safely that we are informed about all 1 byte insns and memory |
22985 | address sizes. This is enough to eliminate unnecessary padding in |
22986 | 99% of cases. */ |
22987 | |
22988 | int |
22989 | ix86_min_insn_size (rtx_insn *insn) |
22990 | { |
22991 | int l = 0, len; |
22992 | |
22993 | if (!INSN_P (insn) || !active_insn_p (insn)) |
22994 | return 0; |
22995 | |
22996 | /* Discard alignments we've emit and jump instructions. */ |
22997 | if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
22998 | && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
22999 | return 0; |
23000 | |
23001 | /* Important case - calls are always 5 bytes. |
23002 | It is common to have many calls in the row. */ |
23003 | if (CALL_P (insn) |
23004 | && symbolic_reference_mentioned_p (op: PATTERN (insn)) |
23005 | && !SIBLING_CALL_P (insn)) |
23006 | return 5; |
23007 | len = get_attr_length (insn); |
23008 | if (len <= 1) |
23009 | return 1; |
23010 | |
23011 | /* For normal instructions we rely on get_attr_length being exact, |
23012 | with a few exceptions. */ |
23013 | if (!JUMP_P (insn)) |
23014 | { |
23015 | enum attr_type type = get_attr_type (insn); |
23016 | |
23017 | switch (type) |
23018 | { |
23019 | case TYPE_MULTI: |
23020 | if (GET_CODE (PATTERN (insn)) == ASM_INPUT |
23021 | || asm_noperands (PATTERN (insn)) >= 0) |
23022 | return 0; |
23023 | break; |
23024 | case TYPE_OTHER: |
23025 | case TYPE_FCMP: |
23026 | break; |
23027 | default: |
23028 | /* Otherwise trust get_attr_length. */ |
23029 | return len; |
23030 | } |
23031 | |
23032 | l = get_attr_length_address (insn); |
23033 | if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn))) |
23034 | l = 4; |
23035 | } |
23036 | if (l) |
23037 | return 1+l; |
23038 | else |
23039 | return 2; |
23040 | } |
23041 | |
23042 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
23043 | |
23044 | /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte |
23045 | window. */ |
23046 | |
23047 | static void |
23048 | ix86_avoid_jump_mispredicts (void) |
23049 | { |
23050 | rtx_insn *insn, *start = get_insns (); |
23051 | int nbytes = 0, njumps = 0; |
23052 | bool isjump = false; |
23053 | |
23054 | /* Look for all minimal intervals of instructions containing 4 jumps. |
23055 | The intervals are bounded by START and INSN. NBYTES is the total |
23056 | size of instructions in the interval including INSN and not including |
23057 | START. When the NBYTES is smaller than 16 bytes, it is possible |
23058 | that the end of START and INSN ends up in the same 16byte page. |
23059 | |
23060 | The smallest offset in the page INSN can start is the case where START |
23061 | ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
23062 | We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). |
23063 | |
23064 | Don't consider asm goto as jump, while it can contain a jump, it doesn't |
23065 | have to, control transfer to label(s) can be performed through other |
23066 | means, and also we estimate minimum length of all asm stmts as 0. */ |
23067 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
23068 | { |
23069 | int min_size; |
23070 | |
23071 | if (LABEL_P (insn)) |
23072 | { |
23073 | align_flags alignment = label_to_alignment (insn); |
23074 | int align = alignment.levels[0].log; |
23075 | int max_skip = alignment.levels[0].maxskip; |
23076 | |
23077 | if (max_skip > 15) |
23078 | max_skip = 15; |
23079 | /* If align > 3, only up to 16 - max_skip - 1 bytes can be |
23080 | already in the current 16 byte page, because otherwise |
23081 | ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer |
23082 | bytes to reach 16 byte boundary. */ |
23083 | if (align <= 0 |
23084 | || (align <= 3 && max_skip != (1 << align) - 1)) |
23085 | max_skip = 0; |
23086 | if (dump_file) |
23087 | fprintf (stream: dump_file, format: "Label %i with max_skip %i\n" , |
23088 | INSN_UID (insn), max_skip); |
23089 | if (max_skip) |
23090 | { |
23091 | while (nbytes + max_skip >= 16) |
23092 | { |
23093 | start = NEXT_INSN (insn: start); |
23094 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
23095 | || CALL_P (start)) |
23096 | njumps--, isjump = true; |
23097 | else |
23098 | isjump = false; |
23099 | nbytes -= ix86_min_insn_size (insn: start); |
23100 | } |
23101 | } |
23102 | continue; |
23103 | } |
23104 | |
23105 | min_size = ix86_min_insn_size (insn); |
23106 | nbytes += min_size; |
23107 | if (dump_file) |
23108 | fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n" , |
23109 | INSN_UID (insn), min_size); |
23110 | if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) |
23111 | || CALL_P (insn)) |
23112 | njumps++; |
23113 | else |
23114 | continue; |
23115 | |
23116 | while (njumps > 3) |
23117 | { |
23118 | start = NEXT_INSN (insn: start); |
23119 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
23120 | || CALL_P (start)) |
23121 | njumps--, isjump = true; |
23122 | else |
23123 | isjump = false; |
23124 | nbytes -= ix86_min_insn_size (insn: start); |
23125 | } |
23126 | gcc_assert (njumps >= 0); |
23127 | if (dump_file) |
23128 | fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n" , |
23129 | INSN_UID (insn: start), INSN_UID (insn), nbytes); |
23130 | |
23131 | if (njumps == 3 && isjump && nbytes < 16) |
23132 | { |
23133 | int padsize = 15 - nbytes + ix86_min_insn_size (insn); |
23134 | |
23135 | if (dump_file) |
23136 | fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n" , |
23137 | INSN_UID (insn), padsize); |
23138 | emit_insn_before (gen_pad (GEN_INT (padsize)), insn); |
23139 | } |
23140 | } |
23141 | } |
23142 | #endif |
23143 | |
23144 | /* AMD Athlon works faster |
23145 | when RET is not destination of conditional jump or directly preceded |
23146 | by other jump instruction. We avoid the penalty by inserting NOP just |
23147 | before the RET instructions in such cases. */ |
23148 | static void |
23149 | ix86_pad_returns (void) |
23150 | { |
23151 | edge e; |
23152 | edge_iterator ei; |
23153 | |
23154 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
23155 | { |
23156 | basic_block bb = e->src; |
23157 | rtx_insn *ret = BB_END (bb); |
23158 | rtx_insn *prev; |
23159 | bool replace = false; |
23160 | |
23161 | if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) |
23162 | || optimize_bb_for_size_p (bb)) |
23163 | continue; |
23164 | for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev)) |
23165 | if (active_insn_p (prev) || LABEL_P (prev)) |
23166 | break; |
23167 | if (prev && LABEL_P (prev)) |
23168 | { |
23169 | edge e; |
23170 | edge_iterator ei; |
23171 | |
23172 | FOR_EACH_EDGE (e, ei, bb->preds) |
23173 | if (EDGE_FREQUENCY (e) && e->src->index >= 0 |
23174 | && !(e->flags & EDGE_FALLTHRU)) |
23175 | { |
23176 | replace = true; |
23177 | break; |
23178 | } |
23179 | } |
23180 | if (!replace) |
23181 | { |
23182 | prev = prev_active_insn (ret); |
23183 | if (prev |
23184 | && ((JUMP_P (prev) && any_condjump_p (prev)) |
23185 | || CALL_P (prev))) |
23186 | replace = true; |
23187 | /* Empty functions get branch mispredict even when |
23188 | the jump destination is not visible to us. */ |
23189 | if (!prev && !optimize_function_for_size_p (cfun)) |
23190 | replace = true; |
23191 | } |
23192 | if (replace) |
23193 | { |
23194 | emit_jump_insn_before (gen_simple_return_internal_long (), ret); |
23195 | delete_insn (ret); |
23196 | } |
23197 | } |
23198 | } |
23199 | |
23200 | /* Count the minimum number of instructions in BB. Return 4 if the |
23201 | number of instructions >= 4. */ |
23202 | |
23203 | static int |
23204 | ix86_count_insn_bb (basic_block bb) |
23205 | { |
23206 | rtx_insn *insn; |
23207 | int insn_count = 0; |
23208 | |
23209 | /* Count number of instructions in this block. Return 4 if the number |
23210 | of instructions >= 4. */ |
23211 | FOR_BB_INSNS (bb, insn) |
23212 | { |
23213 | /* Only happen in exit blocks. */ |
23214 | if (JUMP_P (insn) |
23215 | && ANY_RETURN_P (PATTERN (insn))) |
23216 | break; |
23217 | |
23218 | if (NONDEBUG_INSN_P (insn) |
23219 | && GET_CODE (PATTERN (insn)) != USE |
23220 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
23221 | { |
23222 | insn_count++; |
23223 | if (insn_count >= 4) |
23224 | return insn_count; |
23225 | } |
23226 | } |
23227 | |
23228 | return insn_count; |
23229 | } |
23230 | |
23231 | |
23232 | /* Count the minimum number of instructions in code path in BB. |
23233 | Return 4 if the number of instructions >= 4. */ |
23234 | |
23235 | static int |
23236 | ix86_count_insn (basic_block bb) |
23237 | { |
23238 | edge e; |
23239 | edge_iterator ei; |
23240 | int min_prev_count; |
23241 | |
23242 | /* Only bother counting instructions along paths with no |
23243 | more than 2 basic blocks between entry and exit. Given |
23244 | that BB has an edge to exit, determine if a predecessor |
23245 | of BB has an edge from entry. If so, compute the number |
23246 | of instructions in the predecessor block. If there |
23247 | happen to be multiple such blocks, compute the minimum. */ |
23248 | min_prev_count = 4; |
23249 | FOR_EACH_EDGE (e, ei, bb->preds) |
23250 | { |
23251 | edge prev_e; |
23252 | edge_iterator prev_ei; |
23253 | |
23254 | if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
23255 | { |
23256 | min_prev_count = 0; |
23257 | break; |
23258 | } |
23259 | FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) |
23260 | { |
23261 | if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
23262 | { |
23263 | int count = ix86_count_insn_bb (bb: e->src); |
23264 | if (count < min_prev_count) |
23265 | min_prev_count = count; |
23266 | break; |
23267 | } |
23268 | } |
23269 | } |
23270 | |
23271 | if (min_prev_count < 4) |
23272 | min_prev_count += ix86_count_insn_bb (bb); |
23273 | |
23274 | return min_prev_count; |
23275 | } |
23276 | |
23277 | /* Pad short function to 4 instructions. */ |
23278 | |
23279 | static void |
23280 | ix86_pad_short_function (void) |
23281 | { |
23282 | edge e; |
23283 | edge_iterator ei; |
23284 | |
23285 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
23286 | { |
23287 | rtx_insn *ret = BB_END (e->src); |
23288 | if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) |
23289 | { |
23290 | int insn_count = ix86_count_insn (bb: e->src); |
23291 | |
23292 | /* Pad short function. */ |
23293 | if (insn_count < 4) |
23294 | { |
23295 | rtx_insn *insn = ret; |
23296 | |
23297 | /* Find epilogue. */ |
23298 | while (insn |
23299 | && (!NOTE_P (insn) |
23300 | || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) |
23301 | insn = PREV_INSN (insn); |
23302 | |
23303 | if (!insn) |
23304 | insn = ret; |
23305 | |
23306 | /* Two NOPs count as one instruction. */ |
23307 | insn_count = 2 * (4 - insn_count); |
23308 | emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); |
23309 | } |
23310 | } |
23311 | } |
23312 | } |
23313 | |
23314 | /* Fix up a Windows system unwinder issue. If an EH region falls through into |
23315 | the epilogue, the Windows system unwinder will apply epilogue logic and |
23316 | produce incorrect offsets. This can be avoided by adding a nop between |
23317 | the last insn that can throw and the first insn of the epilogue. */ |
23318 | |
23319 | static void |
23320 | ix86_seh_fixup_eh_fallthru (void) |
23321 | { |
23322 | edge e; |
23323 | edge_iterator ei; |
23324 | |
23325 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
23326 | { |
23327 | rtx_insn *insn, *next; |
23328 | |
23329 | /* Find the beginning of the epilogue. */ |
23330 | for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) |
23331 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) |
23332 | break; |
23333 | if (insn == NULL) |
23334 | continue; |
23335 | |
23336 | /* We only care about preceding insns that can throw. */ |
23337 | insn = prev_active_insn (insn); |
23338 | if (insn == NULL || !can_throw_internal (insn)) |
23339 | continue; |
23340 | |
23341 | /* Do not separate calls from their debug information. */ |
23342 | for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next)) |
23343 | if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) |
23344 | insn = next; |
23345 | else |
23346 | break; |
23347 | |
23348 | emit_insn_after (gen_nops (const1_rtx), insn); |
23349 | } |
23350 | } |
23351 | /* Split vector load from parm_decl to elemental loads to avoid STLF |
23352 | stalls. */ |
23353 | static void |
23354 | ix86_split_stlf_stall_load () |
23355 | { |
23356 | rtx_insn* insn, *start = get_insns (); |
23357 | unsigned window = 0; |
23358 | |
23359 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
23360 | { |
23361 | if (!NONDEBUG_INSN_P (insn)) |
23362 | continue; |
23363 | window++; |
23364 | /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each |
23365 | other, just emulate for pipeline) before stalled load, stlf stall |
23366 | case is as fast as no stall cases on CLX. |
23367 | Since CFG is freed before machine_reorg, just do a rough |
23368 | calculation of the window according to the layout. */ |
23369 | if (window > (unsigned) x86_stlf_window_ninsns) |
23370 | return; |
23371 | |
23372 | if (any_uncondjump_p (insn) |
23373 | || ANY_RETURN_P (PATTERN (insn)) |
23374 | || CALL_P (insn)) |
23375 | return; |
23376 | |
23377 | rtx set = single_set (insn); |
23378 | if (!set) |
23379 | continue; |
23380 | rtx src = SET_SRC (set); |
23381 | if (!MEM_P (src) |
23382 | /* Only handle V2DFmode load since it doesn't need any scratch |
23383 | register. */ |
23384 | || GET_MODE (src) != E_V2DFmode |
23385 | || !MEM_EXPR (src) |
23386 | || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL) |
23387 | continue; |
23388 | |
23389 | rtx zero = CONST0_RTX (V2DFmode); |
23390 | rtx dest = SET_DEST (set); |
23391 | rtx m = adjust_address (src, DFmode, 0); |
23392 | rtx loadlpd = gen_sse2_loadlpd (dest, zero, m); |
23393 | emit_insn_before (loadlpd, insn); |
23394 | m = adjust_address (src, DFmode, 8); |
23395 | rtx loadhpd = gen_sse2_loadhpd (dest, dest, m); |
23396 | if (dump_file && (dump_flags & TDF_DETAILS)) |
23397 | { |
23398 | fputs (s: "Due to potential STLF stall, split instruction:\n" , |
23399 | stream: dump_file); |
23400 | print_rtl_single (dump_file, insn); |
23401 | fputs (s: "To:\n" , stream: dump_file); |
23402 | print_rtl_single (dump_file, loadlpd); |
23403 | print_rtl_single (dump_file, loadhpd); |
23404 | } |
23405 | PATTERN (insn) = loadhpd; |
23406 | INSN_CODE (insn) = -1; |
23407 | gcc_assert (recog_memoized (insn) != -1); |
23408 | } |
23409 | } |
23410 | |
23411 | /* Implement machine specific optimizations. We implement padding of returns |
23412 | for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ |
23413 | static void |
23414 | ix86_reorg (void) |
23415 | { |
23416 | /* We are freeing block_for_insn in the toplev to keep compatibility |
23417 | with old MDEP_REORGS that are not CFG based. Recompute it now. */ |
23418 | compute_bb_for_insn (); |
23419 | |
23420 | if (TARGET_SEH && current_function_has_exception_handlers ()) |
23421 | ix86_seh_fixup_eh_fallthru (); |
23422 | |
23423 | if (optimize && optimize_function_for_speed_p (cfun)) |
23424 | { |
23425 | if (TARGET_SSE2) |
23426 | ix86_split_stlf_stall_load (); |
23427 | if (TARGET_PAD_SHORT_FUNCTION) |
23428 | ix86_pad_short_function (); |
23429 | else if (TARGET_PAD_RETURNS) |
23430 | ix86_pad_returns (); |
23431 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
23432 | if (TARGET_FOUR_JUMP_LIMIT) |
23433 | ix86_avoid_jump_mispredicts (); |
23434 | #endif |
23435 | } |
23436 | } |
23437 | |
23438 | /* Return nonzero when QImode register that must be represented via REX prefix |
23439 | is used. */ |
23440 | bool |
23441 | x86_extended_QIreg_mentioned_p (rtx_insn *insn) |
23442 | { |
23443 | int i; |
23444 | extract_insn_cached (insn); |
23445 | for (i = 0; i < recog_data.n_operands; i++) |
23446 | if (GENERAL_REG_P (recog_data.operand[i]) |
23447 | && !QI_REGNO_P (REGNO (recog_data.operand[i]))) |
23448 | return true; |
23449 | return false; |
23450 | } |
23451 | |
23452 | /* Return true when INSN mentions register that must be encoded using REX |
23453 | prefix. */ |
23454 | bool |
23455 | x86_extended_reg_mentioned_p (rtx insn) |
23456 | { |
23457 | subrtx_iterator::array_type array; |
23458 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
23459 | { |
23460 | const_rtx x = *iter; |
23461 | if (REG_P (x) |
23462 | && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)) |
23463 | || REX2_INT_REGNO_P (REGNO (x)))) |
23464 | return true; |
23465 | } |
23466 | return false; |
23467 | } |
23468 | |
23469 | /* Return true when INSN mentions register that must be encoded using REX2 |
23470 | prefix. */ |
23471 | bool |
23472 | x86_extended_rex2reg_mentioned_p (rtx insn) |
23473 | { |
23474 | subrtx_iterator::array_type array; |
23475 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
23476 | { |
23477 | const_rtx x = *iter; |
23478 | if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x))) |
23479 | return true; |
23480 | } |
23481 | return false; |
23482 | } |
23483 | |
23484 | /* Return true when rtx operands mentions register that must be encoded using |
23485 | evex prefix. */ |
23486 | bool |
23487 | x86_evex_reg_mentioned_p (rtx operands[], int nops) |
23488 | { |
23489 | int i; |
23490 | for (i = 0; i < nops; i++) |
23491 | if (EXT_REX_SSE_REG_P (operands[i]) |
23492 | || x86_extended_rex2reg_mentioned_p (insn: operands[i])) |
23493 | return true; |
23494 | return false; |
23495 | } |
23496 | |
23497 | /* If profitable, negate (without causing overflow) integer constant |
23498 | of mode MODE at location LOC. Return true in this case. */ |
23499 | bool |
23500 | x86_maybe_negate_const_int (rtx *loc, machine_mode mode) |
23501 | { |
23502 | HOST_WIDE_INT val; |
23503 | |
23504 | if (!CONST_INT_P (*loc)) |
23505 | return false; |
23506 | |
23507 | switch (mode) |
23508 | { |
23509 | case E_DImode: |
23510 | /* DImode x86_64 constants must fit in 32 bits. */ |
23511 | gcc_assert (x86_64_immediate_operand (*loc, mode)); |
23512 | |
23513 | mode = SImode; |
23514 | break; |
23515 | |
23516 | case E_SImode: |
23517 | case E_HImode: |
23518 | case E_QImode: |
23519 | break; |
23520 | |
23521 | default: |
23522 | gcc_unreachable (); |
23523 | } |
23524 | |
23525 | /* Avoid overflows. */ |
23526 | if (mode_signbit_p (mode, *loc)) |
23527 | return false; |
23528 | |
23529 | val = INTVAL (*loc); |
23530 | |
23531 | /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. |
23532 | Exceptions: -128 encodes smaller than 128, so swap sign and op. */ |
23533 | if ((val < 0 && val != -128) |
23534 | || val == 128) |
23535 | { |
23536 | *loc = GEN_INT (-val); |
23537 | return true; |
23538 | } |
23539 | |
23540 | return false; |
23541 | } |
23542 | |
23543 | /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
23544 | optabs would emit if we didn't have TFmode patterns. */ |
23545 | |
23546 | void |
23547 | x86_emit_floatuns (rtx operands[2]) |
23548 | { |
23549 | rtx_code_label *neglab, *donelab; |
23550 | rtx i0, i1, f0, in, out; |
23551 | machine_mode mode, inmode; |
23552 | |
23553 | inmode = GET_MODE (operands[1]); |
23554 | gcc_assert (inmode == SImode || inmode == DImode); |
23555 | |
23556 | out = operands[0]; |
23557 | in = force_reg (inmode, operands[1]); |
23558 | mode = GET_MODE (out); |
23559 | neglab = gen_label_rtx (); |
23560 | donelab = gen_label_rtx (); |
23561 | f0 = gen_reg_rtx (mode); |
23562 | |
23563 | emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); |
23564 | |
23565 | expand_float (out, in, 0); |
23566 | |
23567 | emit_jump_insn (gen_jump (donelab)); |
23568 | emit_barrier (); |
23569 | |
23570 | emit_label (neglab); |
23571 | |
23572 | i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, |
23573 | 1, OPTAB_DIRECT); |
23574 | i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, |
23575 | 1, OPTAB_DIRECT); |
23576 | i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); |
23577 | |
23578 | expand_float (f0, i0, 0); |
23579 | |
23580 | emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); |
23581 | |
23582 | emit_label (donelab); |
23583 | } |
23584 | |
23585 | /* Return the diagnostic message string if conversion from FROMTYPE to |
23586 | TOTYPE is not allowed, NULL otherwise. */ |
23587 | |
23588 | static const char * |
23589 | ix86_invalid_conversion (const_tree fromtype, const_tree totype) |
23590 | { |
23591 | machine_mode from_mode = element_mode (fromtype); |
23592 | machine_mode to_mode = element_mode (totype); |
23593 | |
23594 | if (!TARGET_SSE2 && from_mode != to_mode) |
23595 | { |
23596 | /* Do no allow conversions to/from BFmode/HFmode scalar types |
23597 | when TARGET_SSE2 is not available. */ |
23598 | if (from_mode == BFmode) |
23599 | return N_("invalid conversion from type %<__bf16%> " |
23600 | "without option %<-msse2%>" ); |
23601 | if (from_mode == HFmode) |
23602 | return N_("invalid conversion from type %<_Float16%> " |
23603 | "without option %<-msse2%>" ); |
23604 | if (to_mode == BFmode) |
23605 | return N_("invalid conversion to type %<__bf16%> " |
23606 | "without option %<-msse2%>" ); |
23607 | if (to_mode == HFmode) |
23608 | return N_("invalid conversion to type %<_Float16%> " |
23609 | "without option %<-msse2%>" ); |
23610 | } |
23611 | |
23612 | /* Warn for silent implicit conversion between __bf16 and short, |
23613 | since __bfloat16 is refined as real __bf16 instead of short |
23614 | since GCC13. */ |
23615 | if (element_mode (fromtype) != element_mode (totype) |
23616 | && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT)) |
23617 | { |
23618 | /* Warn for silent implicit conversion where user may expect |
23619 | a bitcast. */ |
23620 | if ((TYPE_MODE (fromtype) == BFmode |
23621 | && TYPE_MODE (totype) == HImode) |
23622 | || (TYPE_MODE (totype) == BFmode |
23623 | && TYPE_MODE (fromtype) == HImode)) |
23624 | warning (0, "%<__bfloat16%> is redefined from typedef %<short%> " |
23625 | "to real %<__bf16%> since GCC 13.1, be careful of " |
23626 | "implicit conversion between %<__bf16%> and %<short%>; " |
23627 | "an explicit bitcast may be needed here" ); |
23628 | } |
23629 | |
23630 | /* Conversion allowed. */ |
23631 | return NULL; |
23632 | } |
23633 | |
23634 | /* Return the diagnostic message string if the unary operation OP is |
23635 | not permitted on TYPE, NULL otherwise. */ |
23636 | |
23637 | static const char * |
23638 | ix86_invalid_unary_op (int op, const_tree type) |
23639 | { |
23640 | machine_mode mmode = element_mode (type); |
23641 | /* Reject all single-operand operations on BFmode/HFmode except for & |
23642 | when TARGET_SSE2 is not available. */ |
23643 | if (!TARGET_SSE2 && op != ADDR_EXPR) |
23644 | { |
23645 | if (mmode == BFmode) |
23646 | return N_("operation not permitted on type %<__bf16%> " |
23647 | "without option %<-msse2%>" ); |
23648 | if (mmode == HFmode) |
23649 | return N_("operation not permitted on type %<_Float16%> " |
23650 | "without option %<-msse2%>" ); |
23651 | } |
23652 | |
23653 | /* Operation allowed. */ |
23654 | return NULL; |
23655 | } |
23656 | |
23657 | /* Return the diagnostic message string if the binary operation OP is |
23658 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ |
23659 | |
23660 | static const char * |
23661 | ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, |
23662 | const_tree type2) |
23663 | { |
23664 | machine_mode type1_mode = element_mode (type1); |
23665 | machine_mode type2_mode = element_mode (type2); |
23666 | /* Reject all 2-operand operations on BFmode or HFmode |
23667 | when TARGET_SSE2 is not available. */ |
23668 | if (!TARGET_SSE2) |
23669 | { |
23670 | if (type1_mode == BFmode || type2_mode == BFmode) |
23671 | return N_("operation not permitted on type %<__bf16%> " |
23672 | "without option %<-msse2%>" ); |
23673 | |
23674 | if (type1_mode == HFmode || type2_mode == HFmode) |
23675 | return N_("operation not permitted on type %<_Float16%> " |
23676 | "without option %<-msse2%>" ); |
23677 | } |
23678 | |
23679 | /* Operation allowed. */ |
23680 | return NULL; |
23681 | } |
23682 | |
23683 | |
23684 | /* Target hook for scalar_mode_supported_p. */ |
23685 | static bool |
23686 | ix86_scalar_mode_supported_p (scalar_mode mode) |
23687 | { |
23688 | if (DECIMAL_FLOAT_MODE_P (mode)) |
23689 | return default_decimal_float_supported_p (); |
23690 | else if (mode == TFmode) |
23691 | return true; |
23692 | else if (mode == HFmode || mode == BFmode) |
23693 | return true; |
23694 | else |
23695 | return default_scalar_mode_supported_p (mode); |
23696 | } |
23697 | |
23698 | /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE |
23699 | if MODE is HFmode, and punt to the generic implementation otherwise. */ |
23700 | |
23701 | static bool |
23702 | ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode) |
23703 | { |
23704 | /* NB: Always return TRUE for HFmode so that the _Float16 type will |
23705 | be defined by the C front-end for AVX512FP16 intrinsics. We will |
23706 | issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't |
23707 | enabled. */ |
23708 | return ((mode == HFmode || mode == BFmode) |
23709 | ? true |
23710 | : default_libgcc_floating_mode_supported_p (mode)); |
23711 | } |
23712 | |
23713 | /* Implements target hook vector_mode_supported_p. */ |
23714 | static bool |
23715 | ix86_vector_mode_supported_p (machine_mode mode) |
23716 | { |
23717 | /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be |
23718 | either. */ |
23719 | if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode) |
23720 | return false; |
23721 | if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
23722 | return true; |
23723 | if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
23724 | return true; |
23725 | if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
23726 | return true; |
23727 | if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) |
23728 | return true; |
23729 | if ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
23730 | && VALID_MMX_REG_MODE (mode)) |
23731 | return true; |
23732 | if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) |
23733 | && VALID_MMX_REG_MODE_3DNOW (mode)) |
23734 | return true; |
23735 | if (mode == V2QImode) |
23736 | return true; |
23737 | return false; |
23738 | } |
23739 | |
23740 | /* Target hook for c_mode_for_suffix. */ |
23741 | static machine_mode |
23742 | ix86_c_mode_for_suffix (char suffix) |
23743 | { |
23744 | if (suffix == 'q') |
23745 | return TFmode; |
23746 | if (suffix == 'w') |
23747 | return XFmode; |
23748 | |
23749 | return VOIDmode; |
23750 | } |
23751 | |
23752 | /* Helper function to map common constraints to non-EGPR ones. |
23753 | All related constraints have h prefix, and h plus Upper letter |
23754 | means the constraint is strictly EGPR enabled, while h plus |
23755 | lower letter indicates the constraint is strictly gpr16 only. |
23756 | |
23757 | Specially for "g" constraint, split it to rmi as there is |
23758 | no corresponding general constraint define for backend. |
23759 | |
23760 | Here is the full list to map constraints that may involve |
23761 | gpr to h prefixed. |
23762 | |
23763 | "g" -> "jrjmi" |
23764 | "r" -> "jr" |
23765 | "m" -> "jm" |
23766 | "<" -> "j<" |
23767 | ">" -> "j>" |
23768 | "o" -> "jo" |
23769 | "V" -> "jV" |
23770 | "p" -> "jp" |
23771 | "Bm" -> "ja" |
23772 | */ |
23773 | |
23774 | static void map_egpr_constraints (vec<const char *> &constraints) |
23775 | { |
23776 | for (size_t i = 0; i < constraints.length(); i++) |
23777 | { |
23778 | const char *cur = constraints[i]; |
23779 | |
23780 | if (startswith (str: cur, prefix: "=@cc" )) |
23781 | continue; |
23782 | |
23783 | int len = strlen (s: cur); |
23784 | auto_vec<char> buf; |
23785 | |
23786 | for (int j = 0; j < len; j++) |
23787 | { |
23788 | switch (cur[j]) |
23789 | { |
23790 | case 'g': |
23791 | buf.safe_push (obj: 'j'); |
23792 | buf.safe_push (obj: 'r'); |
23793 | buf.safe_push (obj: 'j'); |
23794 | buf.safe_push (obj: 'm'); |
23795 | buf.safe_push (obj: 'i'); |
23796 | break; |
23797 | case 'r': |
23798 | case 'm': |
23799 | case '<': |
23800 | case '>': |
23801 | case 'o': |
23802 | case 'V': |
23803 | case 'p': |
23804 | buf.safe_push (obj: 'j'); |
23805 | buf.safe_push (obj: cur[j]); |
23806 | break; |
23807 | case 'B': |
23808 | if (cur[j + 1] == 'm') |
23809 | { |
23810 | buf.safe_push (obj: 'j'); |
23811 | buf.safe_push (obj: 'a'); |
23812 | j++; |
23813 | } |
23814 | else |
23815 | { |
23816 | buf.safe_push (obj: cur[j]); |
23817 | buf.safe_push (obj: cur[j + 1]); |
23818 | j++; |
23819 | } |
23820 | break; |
23821 | case 'T': |
23822 | case 'Y': |
23823 | case 'W': |
23824 | case 'j': |
23825 | buf.safe_push (obj: cur[j]); |
23826 | buf.safe_push (obj: cur[j + 1]); |
23827 | j++; |
23828 | break; |
23829 | default: |
23830 | buf.safe_push (obj: cur[j]); |
23831 | break; |
23832 | } |
23833 | } |
23834 | buf.safe_push (obj: '\0'); |
23835 | constraints[i] = xstrdup (buf.address ()); |
23836 | } |
23837 | } |
23838 | |
23839 | /* Worker function for TARGET_MD_ASM_ADJUST. |
23840 | |
23841 | We implement asm flag outputs, and maintain source compatibility |
23842 | with the old cc0-based compiler. */ |
23843 | |
23844 | static rtx_insn * |
23845 | ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, |
23846 | vec<machine_mode> & /*input_modes*/, |
23847 | vec<const char *> &constraints, vec<rtx> &/*uses*/, |
23848 | vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, |
23849 | location_t loc) |
23850 | { |
23851 | bool saw_asm_flag = false; |
23852 | |
23853 | start_sequence (); |
23854 | |
23855 | if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32) |
23856 | map_egpr_constraints (constraints); |
23857 | |
23858 | for (unsigned i = 0, n = outputs.length (); i < n; ++i) |
23859 | { |
23860 | const char *con = constraints[i]; |
23861 | if (!startswith (str: con, prefix: "=@cc" )) |
23862 | continue; |
23863 | con += 4; |
23864 | if (strchr (s: con, c: ',') != NULL) |
23865 | { |
23866 | error_at (loc, "alternatives not allowed in %<asm%> flag output" ); |
23867 | continue; |
23868 | } |
23869 | |
23870 | bool invert = false; |
23871 | if (con[0] == 'n') |
23872 | invert = true, con++; |
23873 | |
23874 | machine_mode mode = CCmode; |
23875 | rtx_code code = UNKNOWN; |
23876 | |
23877 | switch (con[0]) |
23878 | { |
23879 | case 'a': |
23880 | if (con[1] == 0) |
23881 | mode = CCAmode, code = EQ; |
23882 | else if (con[1] == 'e' && con[2] == 0) |
23883 | mode = CCCmode, code = NE; |
23884 | break; |
23885 | case 'b': |
23886 | if (con[1] == 0) |
23887 | mode = CCCmode, code = EQ; |
23888 | else if (con[1] == 'e' && con[2] == 0) |
23889 | mode = CCAmode, code = NE; |
23890 | break; |
23891 | case 'c': |
23892 | if (con[1] == 0) |
23893 | mode = CCCmode, code = EQ; |
23894 | break; |
23895 | case 'e': |
23896 | if (con[1] == 0) |
23897 | mode = CCZmode, code = EQ; |
23898 | break; |
23899 | case 'g': |
23900 | if (con[1] == 0) |
23901 | mode = CCGCmode, code = GT; |
23902 | else if (con[1] == 'e' && con[2] == 0) |
23903 | mode = CCGCmode, code = GE; |
23904 | break; |
23905 | case 'l': |
23906 | if (con[1] == 0) |
23907 | mode = CCGCmode, code = LT; |
23908 | else if (con[1] == 'e' && con[2] == 0) |
23909 | mode = CCGCmode, code = LE; |
23910 | break; |
23911 | case 'o': |
23912 | if (con[1] == 0) |
23913 | mode = CCOmode, code = EQ; |
23914 | break; |
23915 | case 'p': |
23916 | if (con[1] == 0) |
23917 | mode = CCPmode, code = EQ; |
23918 | break; |
23919 | case 's': |
23920 | if (con[1] == 0) |
23921 | mode = CCSmode, code = EQ; |
23922 | break; |
23923 | case 'z': |
23924 | if (con[1] == 0) |
23925 | mode = CCZmode, code = EQ; |
23926 | break; |
23927 | } |
23928 | if (code == UNKNOWN) |
23929 | { |
23930 | error_at (loc, "unknown %<asm%> flag output %qs" , constraints[i]); |
23931 | continue; |
23932 | } |
23933 | if (invert) |
23934 | code = reverse_condition (code); |
23935 | |
23936 | rtx dest = outputs[i]; |
23937 | if (!saw_asm_flag) |
23938 | { |
23939 | /* This is the first asm flag output. Here we put the flags |
23940 | register in as the real output and adjust the condition to |
23941 | allow it. */ |
23942 | constraints[i] = "=Bf" ; |
23943 | outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); |
23944 | saw_asm_flag = true; |
23945 | } |
23946 | else |
23947 | { |
23948 | /* We don't need the flags register as output twice. */ |
23949 | constraints[i] = "=X" ; |
23950 | outputs[i] = gen_rtx_SCRATCH (SImode); |
23951 | } |
23952 | |
23953 | rtx x = gen_rtx_REG (mode, FLAGS_REG); |
23954 | x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); |
23955 | |
23956 | machine_mode dest_mode = GET_MODE (dest); |
23957 | if (!SCALAR_INT_MODE_P (dest_mode)) |
23958 | { |
23959 | error_at (loc, "invalid type for %<asm%> flag output" ); |
23960 | continue; |
23961 | } |
23962 | |
23963 | if (dest_mode == QImode) |
23964 | emit_insn (gen_rtx_SET (dest, x)); |
23965 | else |
23966 | { |
23967 | rtx reg = gen_reg_rtx (QImode); |
23968 | emit_insn (gen_rtx_SET (reg, x)); |
23969 | |
23970 | reg = convert_to_mode (dest_mode, reg, 1); |
23971 | emit_move_insn (dest, reg); |
23972 | } |
23973 | } |
23974 | |
23975 | rtx_insn *seq = get_insns (); |
23976 | end_sequence (); |
23977 | |
23978 | if (saw_asm_flag) |
23979 | return seq; |
23980 | else |
23981 | { |
23982 | /* If we had no asm flag outputs, clobber the flags. */ |
23983 | clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG)); |
23984 | SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG); |
23985 | return NULL; |
23986 | } |
23987 | } |
23988 | |
23989 | /* Implements target vector targetm.asm.encode_section_info. */ |
23990 | |
23991 | static void ATTRIBUTE_UNUSED |
23992 | ix86_encode_section_info (tree decl, rtx rtl, int first) |
23993 | { |
23994 | default_encode_section_info (decl, rtl, first); |
23995 | |
23996 | if (ix86_in_large_data_p (exp: decl)) |
23997 | SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; |
23998 | } |
23999 | |
24000 | /* Worker function for REVERSE_CONDITION. */ |
24001 | |
24002 | enum rtx_code |
24003 | ix86_reverse_condition (enum rtx_code code, machine_mode mode) |
24004 | { |
24005 | return (mode == CCFPmode |
24006 | ? reverse_condition_maybe_unordered (code) |
24007 | : reverse_condition (code)); |
24008 | } |
24009 | |
24010 | /* Output code to perform an x87 FP register move, from OPERANDS[1] |
24011 | to OPERANDS[0]. */ |
24012 | |
24013 | const char * |
24014 | output_387_reg_move (rtx_insn *insn, rtx *operands) |
24015 | { |
24016 | if (REG_P (operands[0])) |
24017 | { |
24018 | if (REG_P (operands[1]) |
24019 | && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
24020 | { |
24021 | if (REGNO (operands[0]) == FIRST_STACK_REG) |
24022 | return output_387_ffreep (operands, opno: 0); |
24023 | return "fstp\t%y0" ; |
24024 | } |
24025 | if (STACK_TOP_P (operands[0])) |
24026 | return "fld%Z1\t%y1" ; |
24027 | return "fst\t%y0" ; |
24028 | } |
24029 | else if (MEM_P (operands[0])) |
24030 | { |
24031 | gcc_assert (REG_P (operands[1])); |
24032 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
24033 | return "fstp%Z0\t%y0" ; |
24034 | else |
24035 | { |
24036 | /* There is no non-popping store to memory for XFmode. |
24037 | So if we need one, follow the store with a load. */ |
24038 | if (GET_MODE (operands[0]) == XFmode) |
24039 | return "fstp%Z0\t%y0\n\tfld%Z0\t%y0" ; |
24040 | else |
24041 | return "fst%Z0\t%y0" ; |
24042 | } |
24043 | } |
24044 | else |
24045 | gcc_unreachable(); |
24046 | } |
24047 | #ifdef TARGET_SOLARIS |
24048 | /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ |
24049 | |
24050 | static void |
24051 | i386_solaris_elf_named_section (const char *name, unsigned int flags, |
24052 | tree decl) |
24053 | { |
24054 | /* With Binutils 2.15, the "@unwind" marker must be specified on |
24055 | every occurrence of the ".eh_frame" section, not just the first |
24056 | one. */ |
24057 | if (TARGET_64BIT |
24058 | && strcmp (name, ".eh_frame" ) == 0) |
24059 | { |
24060 | fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n" , name, |
24061 | flags & SECTION_WRITE ? "aw" : "a" ); |
24062 | return; |
24063 | } |
24064 | |
24065 | #ifndef USE_GAS |
24066 | if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) |
24067 | { |
24068 | solaris_elf_asm_comdat_section (name, flags, decl); |
24069 | return; |
24070 | } |
24071 | |
24072 | /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the |
24073 | SPARC assembler. One cannot mix single-letter flags and #exclude, so |
24074 | only emit the latter here. */ |
24075 | if (flags & SECTION_EXCLUDE) |
24076 | { |
24077 | fprintf (asm_out_file, "\t.section\t%s,#exclude\n" , name); |
24078 | return; |
24079 | } |
24080 | #endif |
24081 | |
24082 | default_elf_asm_named_section (name, flags, decl); |
24083 | } |
24084 | #endif /* TARGET_SOLARIS */ |
24085 | |
24086 | /* Return the mangling of TYPE if it is an extended fundamental type. */ |
24087 | |
24088 | static const char * |
24089 | ix86_mangle_type (const_tree type) |
24090 | { |
24091 | type = TYPE_MAIN_VARIANT (type); |
24092 | |
24093 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE |
24094 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) |
24095 | return NULL; |
24096 | |
24097 | if (type == float128_type_node || type == float64x_type_node) |
24098 | return NULL; |
24099 | |
24100 | switch (TYPE_MODE (type)) |
24101 | { |
24102 | case E_BFmode: |
24103 | return "DF16b" ; |
24104 | case E_HFmode: |
24105 | /* _Float16 is "DF16_". |
24106 | Align with clang's decision in https://reviews.llvm.org/D33719. */ |
24107 | return "DF16_" ; |
24108 | case E_TFmode: |
24109 | /* __float128 is "g". */ |
24110 | return "g" ; |
24111 | case E_XFmode: |
24112 | /* "long double" or __float80 is "e". */ |
24113 | return "e" ; |
24114 | default: |
24115 | return NULL; |
24116 | } |
24117 | } |
24118 | |
24119 | /* Create C++ tinfo symbols for only conditionally available fundamental |
24120 | types. */ |
24121 | |
24122 | static void |
24123 | ix86_emit_support_tinfos (emit_support_tinfos_callback callback) |
24124 | { |
24125 | extern tree ix86_float16_type_node; |
24126 | extern tree ix86_bf16_type_node; |
24127 | |
24128 | if (!TARGET_SSE2) |
24129 | { |
24130 | if (!float16_type_node) |
24131 | float16_type_node = ix86_float16_type_node; |
24132 | if (!bfloat16_type_node) |
24133 | bfloat16_type_node = ix86_bf16_type_node; |
24134 | callback (float16_type_node); |
24135 | callback (bfloat16_type_node); |
24136 | float16_type_node = NULL_TREE; |
24137 | bfloat16_type_node = NULL_TREE; |
24138 | } |
24139 | } |
24140 | |
24141 | static GTY(()) tree ix86_tls_stack_chk_guard_decl; |
24142 | |
24143 | static tree |
24144 | ix86_stack_protect_guard (void) |
24145 | { |
24146 | if (TARGET_SSP_TLS_GUARD) |
24147 | { |
24148 | tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); |
24149 | int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); |
24150 | tree type = build_qualified_type (type_node, qual); |
24151 | tree t; |
24152 | |
24153 | if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str)) |
24154 | { |
24155 | t = ix86_tls_stack_chk_guard_decl; |
24156 | |
24157 | if (t == NULL) |
24158 | { |
24159 | rtx x; |
24160 | |
24161 | t = build_decl |
24162 | (UNKNOWN_LOCATION, VAR_DECL, |
24163 | get_identifier (ix86_stack_protector_guard_symbol_str), |
24164 | type); |
24165 | TREE_STATIC (t) = 1; |
24166 | TREE_PUBLIC (t) = 1; |
24167 | DECL_EXTERNAL (t) = 1; |
24168 | TREE_USED (t) = 1; |
24169 | TREE_THIS_VOLATILE (t) = 1; |
24170 | DECL_ARTIFICIAL (t) = 1; |
24171 | DECL_IGNORED_P (t) = 1; |
24172 | |
24173 | /* Do not share RTL as the declaration is visible outside of |
24174 | current function. */ |
24175 | x = DECL_RTL (t); |
24176 | RTX_FLAG (x, used) = 1; |
24177 | |
24178 | ix86_tls_stack_chk_guard_decl = t; |
24179 | } |
24180 | } |
24181 | else |
24182 | { |
24183 | tree asptrtype = build_pointer_type (type); |
24184 | |
24185 | t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); |
24186 | t = build2 (MEM_REF, asptrtype, t, |
24187 | build_int_cst (asptrtype, 0)); |
24188 | TREE_THIS_VOLATILE (t) = 1; |
24189 | } |
24190 | |
24191 | return t; |
24192 | } |
24193 | |
24194 | return default_stack_protect_guard (); |
24195 | } |
24196 | |
24197 | /* For 32-bit code we can save PIC register setup by using |
24198 | __stack_chk_fail_local hidden function instead of calling |
24199 | __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC |
24200 | register, so it is better to call __stack_chk_fail directly. */ |
24201 | |
24202 | static tree ATTRIBUTE_UNUSED |
24203 | ix86_stack_protect_fail (void) |
24204 | { |
24205 | return TARGET_64BIT |
24206 | ? default_external_stack_protect_fail () |
24207 | : default_hidden_stack_protect_fail (); |
24208 | } |
24209 | |
24210 | /* Select a format to encode pointers in exception handling data. CODE |
24211 | is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is |
24212 | true if the symbol may be affected by dynamic relocations. |
24213 | |
24214 | ??? All x86 object file formats are capable of representing this. |
24215 | After all, the relocation needed is the same as for the call insn. |
24216 | Whether or not a particular assembler allows us to enter such, I |
24217 | guess we'll have to see. */ |
24218 | |
24219 | int |
24220 | asm_preferred_eh_data_format (int code, int global) |
24221 | { |
24222 | /* PE-COFF is effectively always -fPIC because of the .reloc section. */ |
24223 | if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access) |
24224 | { |
24225 | int type = DW_EH_PE_sdata8; |
24226 | if (ptr_mode == SImode |
24227 | || ix86_cmodel == CM_SMALL_PIC |
24228 | || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) |
24229 | type = DW_EH_PE_sdata4; |
24230 | return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; |
24231 | } |
24232 | |
24233 | if (ix86_cmodel == CM_SMALL |
24234 | || (ix86_cmodel == CM_MEDIUM && code)) |
24235 | return DW_EH_PE_udata4; |
24236 | |
24237 | return DW_EH_PE_absptr; |
24238 | } |
24239 | |
24240 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ |
24241 | static int |
24242 | ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
24243 | tree vectype, int) |
24244 | { |
24245 | bool fp = false; |
24246 | machine_mode mode = TImode; |
24247 | int index; |
24248 | if (vectype != NULL) |
24249 | { |
24250 | fp = FLOAT_TYPE_P (vectype); |
24251 | mode = TYPE_MODE (vectype); |
24252 | } |
24253 | |
24254 | switch (type_of_cost) |
24255 | { |
24256 | case scalar_stmt: |
24257 | return fp ? ix86_cost->addss : COSTS_N_INSNS (1); |
24258 | |
24259 | case scalar_load: |
24260 | /* load/store costs are relative to register move which is 2. Recompute |
24261 | it to COSTS_N_INSNS so everything have same base. */ |
24262 | return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] |
24263 | : ix86_cost->int_load [2]) / 2; |
24264 | |
24265 | case scalar_store: |
24266 | return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] |
24267 | : ix86_cost->int_store [2]) / 2; |
24268 | |
24269 | case vector_stmt: |
24270 | return ix86_vec_cost (mode, |
24271 | cost: fp ? ix86_cost->addss : ix86_cost->sse_op); |
24272 | |
24273 | case vector_load: |
24274 | index = sse_store_index (mode); |
24275 | /* See PR82713 - we may end up being called on non-vector type. */ |
24276 | if (index < 0) |
24277 | index = 2; |
24278 | return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; |
24279 | |
24280 | case vector_store: |
24281 | index = sse_store_index (mode); |
24282 | /* See PR82713 - we may end up being called on non-vector type. */ |
24283 | if (index < 0) |
24284 | index = 2; |
24285 | return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; |
24286 | |
24287 | case vec_to_scalar: |
24288 | case scalar_to_vec: |
24289 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24290 | |
24291 | /* We should have separate costs for unaligned loads and gather/scatter. |
24292 | Do that incrementally. */ |
24293 | case unaligned_load: |
24294 | index = sse_store_index (mode); |
24295 | /* See PR82713 - we may end up being called on non-vector type. */ |
24296 | if (index < 0) |
24297 | index = 2; |
24298 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; |
24299 | |
24300 | case unaligned_store: |
24301 | index = sse_store_index (mode); |
24302 | /* See PR82713 - we may end up being called on non-vector type. */ |
24303 | if (index < 0) |
24304 | index = 2; |
24305 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; |
24306 | |
24307 | case vector_gather_load: |
24308 | return ix86_vec_cost (mode, |
24309 | COSTS_N_INSNS |
24310 | (ix86_cost->gather_static |
24311 | + ix86_cost->gather_per_elt |
24312 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
24313 | |
24314 | case vector_scatter_store: |
24315 | return ix86_vec_cost (mode, |
24316 | COSTS_N_INSNS |
24317 | (ix86_cost->scatter_static |
24318 | + ix86_cost->scatter_per_elt |
24319 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
24320 | |
24321 | case cond_branch_taken: |
24322 | return ix86_cost->cond_taken_branch_cost; |
24323 | |
24324 | case cond_branch_not_taken: |
24325 | return ix86_cost->cond_not_taken_branch_cost; |
24326 | |
24327 | case vec_perm: |
24328 | case vec_promote_demote: |
24329 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24330 | |
24331 | case vec_construct: |
24332 | { |
24333 | int n = TYPE_VECTOR_SUBPARTS (node: vectype); |
24334 | /* N - 1 element inserts into an SSE vector, the possible |
24335 | GPR -> XMM move is accounted for in add_stmt_cost. */ |
24336 | if (GET_MODE_BITSIZE (mode) <= 128) |
24337 | return (n - 1) * ix86_cost->sse_op; |
24338 | /* One vinserti128 for combining two SSE vectors for AVX256. */ |
24339 | else if (GET_MODE_BITSIZE (mode) == 256) |
24340 | return ((n - 2) * ix86_cost->sse_op |
24341 | + ix86_vec_cost (mode, cost: ix86_cost->addss)); |
24342 | /* One vinserti64x4 and two vinserti128 for combining SSE |
24343 | and AVX256 vectors to AVX512. */ |
24344 | else if (GET_MODE_BITSIZE (mode) == 512) |
24345 | return ((n - 4) * ix86_cost->sse_op |
24346 | + 3 * ix86_vec_cost (mode, cost: ix86_cost->addss)); |
24347 | gcc_unreachable (); |
24348 | } |
24349 | |
24350 | default: |
24351 | gcc_unreachable (); |
24352 | } |
24353 | } |
24354 | |
24355 | |
24356 | /* This function returns the calling abi specific va_list type node. |
24357 | It returns the FNDECL specific va_list type. */ |
24358 | |
24359 | static tree |
24360 | ix86_fn_abi_va_list (tree fndecl) |
24361 | { |
24362 | if (!TARGET_64BIT) |
24363 | return va_list_type_node; |
24364 | gcc_assert (fndecl != NULL_TREE); |
24365 | |
24366 | if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI) |
24367 | return ms_va_list_type_node; |
24368 | else |
24369 | return sysv_va_list_type_node; |
24370 | } |
24371 | |
24372 | /* Returns the canonical va_list type specified by TYPE. If there |
24373 | is no valid TYPE provided, it return NULL_TREE. */ |
24374 | |
24375 | static tree |
24376 | ix86_canonical_va_list_type (tree type) |
24377 | { |
24378 | if (TARGET_64BIT) |
24379 | { |
24380 | if (lookup_attribute (attr_name: "ms_abi va_list" , TYPE_ATTRIBUTES (type))) |
24381 | return ms_va_list_type_node; |
24382 | |
24383 | if ((TREE_CODE (type) == ARRAY_TYPE |
24384 | && integer_zerop (array_type_nelts (type))) |
24385 | || POINTER_TYPE_P (type)) |
24386 | { |
24387 | tree elem_type = TREE_TYPE (type); |
24388 | if (TREE_CODE (elem_type) == RECORD_TYPE |
24389 | && lookup_attribute (attr_name: "sysv_abi va_list" , |
24390 | TYPE_ATTRIBUTES (elem_type))) |
24391 | return sysv_va_list_type_node; |
24392 | } |
24393 | |
24394 | return NULL_TREE; |
24395 | } |
24396 | |
24397 | return std_canonical_va_list_type (type); |
24398 | } |
24399 | |
24400 | /* Iterate through the target-specific builtin types for va_list. |
24401 | IDX denotes the iterator, *PTREE is set to the result type of |
24402 | the va_list builtin, and *PNAME to its internal type. |
24403 | Returns zero if there is no element for this index, otherwise |
24404 | IDX should be increased upon the next call. |
24405 | Note, do not iterate a base builtin's name like __builtin_va_list. |
24406 | Used from c_common_nodes_and_builtins. */ |
24407 | |
24408 | static int |
24409 | ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
24410 | { |
24411 | if (TARGET_64BIT) |
24412 | { |
24413 | switch (idx) |
24414 | { |
24415 | default: |
24416 | break; |
24417 | |
24418 | case 0: |
24419 | *ptree = ms_va_list_type_node; |
24420 | *pname = "__builtin_ms_va_list" ; |
24421 | return 1; |
24422 | |
24423 | case 1: |
24424 | *ptree = sysv_va_list_type_node; |
24425 | *pname = "__builtin_sysv_va_list" ; |
24426 | return 1; |
24427 | } |
24428 | } |
24429 | |
24430 | return 0; |
24431 | } |
24432 | |
24433 | #undef TARGET_SCHED_DISPATCH |
24434 | #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch |
24435 | #undef TARGET_SCHED_DISPATCH_DO |
24436 | #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch |
24437 | #undef TARGET_SCHED_REASSOCIATION_WIDTH |
24438 | #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width |
24439 | #undef TARGET_SCHED_REORDER |
24440 | #define TARGET_SCHED_REORDER ix86_atom_sched_reorder |
24441 | #undef TARGET_SCHED_ADJUST_PRIORITY |
24442 | #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority |
24443 | #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK |
24444 | #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ |
24445 | ix86_dependencies_evaluation_hook |
24446 | |
24447 | |
24448 | /* Implementation of reassociation_width target hook used by |
24449 | reassoc phase to identify parallelism level in reassociated |
24450 | tree. Statements tree_code is passed in OPC. Arguments type |
24451 | is passed in MODE. */ |
24452 | |
24453 | static int |
24454 | ix86_reassociation_width (unsigned int op, machine_mode mode) |
24455 | { |
24456 | int width = 1; |
24457 | /* Vector part. */ |
24458 | if (VECTOR_MODE_P (mode)) |
24459 | { |
24460 | int div = 1; |
24461 | if (INTEGRAL_MODE_P (mode)) |
24462 | width = ix86_cost->reassoc_vec_int; |
24463 | else if (FLOAT_MODE_P (mode)) |
24464 | width = ix86_cost->reassoc_vec_fp; |
24465 | |
24466 | if (width == 1) |
24467 | return 1; |
24468 | |
24469 | /* Integer vector instructions execute in FP unit |
24470 | and can execute 3 additions and one multiplication per cycle. */ |
24471 | if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 |
24472 | || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4 |
24473 | || ix86_tune == PROCESSOR_ZNVER5) |
24474 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
24475 | return 1; |
24476 | |
24477 | /* Account for targets that splits wide vectors into multiple parts. */ |
24478 | if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) |
24479 | div = GET_MODE_BITSIZE (mode) / 256; |
24480 | else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) |
24481 | div = GET_MODE_BITSIZE (mode) / 128; |
24482 | else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) |
24483 | div = GET_MODE_BITSIZE (mode) / 64; |
24484 | width = (width + div - 1) / div; |
24485 | } |
24486 | /* Scalar part. */ |
24487 | else if (INTEGRAL_MODE_P (mode)) |
24488 | width = ix86_cost->reassoc_int; |
24489 | else if (FLOAT_MODE_P (mode)) |
24490 | width = ix86_cost->reassoc_fp; |
24491 | |
24492 | /* Avoid using too many registers in 32bit mode. */ |
24493 | if (!TARGET_64BIT && width > 2) |
24494 | width = 2; |
24495 | return width; |
24496 | } |
24497 | |
24498 | /* ??? No autovectorization into MMX or 3DNOW until we can reliably |
24499 | place emms and femms instructions. */ |
24500 | |
24501 | static machine_mode |
24502 | ix86_preferred_simd_mode (scalar_mode mode) |
24503 | { |
24504 | if (!TARGET_SSE) |
24505 | return word_mode; |
24506 | |
24507 | switch (mode) |
24508 | { |
24509 | case E_QImode: |
24510 | if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24511 | return V64QImode; |
24512 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24513 | return V32QImode; |
24514 | else |
24515 | return V16QImode; |
24516 | |
24517 | case E_HImode: |
24518 | if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24519 | return V32HImode; |
24520 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24521 | return V16HImode; |
24522 | else |
24523 | return V8HImode; |
24524 | |
24525 | case E_SImode: |
24526 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24527 | return V16SImode; |
24528 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24529 | return V8SImode; |
24530 | else |
24531 | return V4SImode; |
24532 | |
24533 | case E_DImode: |
24534 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24535 | return V8DImode; |
24536 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24537 | return V4DImode; |
24538 | else |
24539 | return V2DImode; |
24540 | |
24541 | case E_HFmode: |
24542 | if (TARGET_AVX512FP16) |
24543 | { |
24544 | if (TARGET_AVX512VL) |
24545 | { |
24546 | if (TARGET_PREFER_AVX128) |
24547 | return V8HFmode; |
24548 | else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512) |
24549 | return V16HFmode; |
24550 | } |
24551 | if (TARGET_EVEX512) |
24552 | return V32HFmode; |
24553 | } |
24554 | return word_mode; |
24555 | |
24556 | case E_SFmode: |
24557 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24558 | return V16SFmode; |
24559 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24560 | return V8SFmode; |
24561 | else |
24562 | return V4SFmode; |
24563 | |
24564 | case E_DFmode: |
24565 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24566 | return V8DFmode; |
24567 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24568 | return V4DFmode; |
24569 | else if (TARGET_SSE2) |
24570 | return V2DFmode; |
24571 | /* FALLTHRU */ |
24572 | |
24573 | default: |
24574 | return word_mode; |
24575 | } |
24576 | } |
24577 | |
24578 | /* If AVX is enabled then try vectorizing with both 256bit and 128bit |
24579 | vectors. If AVX512F is enabled then try vectorizing with 512bit, |
24580 | 256bit and 128bit vectors. */ |
24581 | |
24582 | static unsigned int |
24583 | ix86_autovectorize_vector_modes (vector_modes *modes, bool all) |
24584 | { |
24585 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24586 | { |
24587 | modes->safe_push (V64QImode); |
24588 | modes->safe_push (V32QImode); |
24589 | modes->safe_push (V16QImode); |
24590 | } |
24591 | else if (TARGET_AVX512F && TARGET_EVEX512 && all) |
24592 | { |
24593 | modes->safe_push (V32QImode); |
24594 | modes->safe_push (V16QImode); |
24595 | modes->safe_push (V64QImode); |
24596 | } |
24597 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24598 | { |
24599 | modes->safe_push (V32QImode); |
24600 | modes->safe_push (V16QImode); |
24601 | } |
24602 | else if (TARGET_AVX && all) |
24603 | { |
24604 | modes->safe_push (V16QImode); |
24605 | modes->safe_push (V32QImode); |
24606 | } |
24607 | else if (TARGET_SSE2) |
24608 | modes->safe_push (V16QImode); |
24609 | |
24610 | if (TARGET_MMX_WITH_SSE) |
24611 | modes->safe_push (V8QImode); |
24612 | |
24613 | if (TARGET_SSE2) |
24614 | modes->safe_push (V4QImode); |
24615 | |
24616 | return 0; |
24617 | } |
24618 | |
24619 | /* Implemenation of targetm.vectorize.get_mask_mode. */ |
24620 | |
24621 | static opt_machine_mode |
24622 | ix86_get_mask_mode (machine_mode data_mode) |
24623 | { |
24624 | unsigned vector_size = GET_MODE_SIZE (data_mode); |
24625 | unsigned nunits = GET_MODE_NUNITS (data_mode); |
24626 | unsigned elem_size = vector_size / nunits; |
24627 | |
24628 | /* Scalar mask case. */ |
24629 | if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) |
24630 | || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) |
24631 | /* AVX512FP16 only supports vector comparison |
24632 | to kmask for _Float16. */ |
24633 | || (TARGET_AVX512VL && TARGET_AVX512FP16 |
24634 | && GET_MODE_INNER (data_mode) == E_HFmode)) |
24635 | { |
24636 | if (elem_size == 4 |
24637 | || elem_size == 8 |
24638 | || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2))) |
24639 | return smallest_int_mode_for_size (size: nunits); |
24640 | } |
24641 | |
24642 | scalar_int_mode elem_mode |
24643 | = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT); |
24644 | |
24645 | gcc_assert (elem_size * nunits == vector_size); |
24646 | |
24647 | return mode_for_vector (elem_mode, nunits); |
24648 | } |
24649 | |
24650 | |
24651 | |
24652 | /* Return class of registers which could be used for pseudo of MODE |
24653 | and of class RCLASS for spilling instead of memory. Return NO_REGS |
24654 | if it is not possible or non-profitable. */ |
24655 | |
24656 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
24657 | |
24658 | static reg_class_t |
24659 | ix86_spill_class (reg_class_t rclass, machine_mode mode) |
24660 | { |
24661 | if (0 && TARGET_GENERAL_REGS_SSE_SPILL |
24662 | && TARGET_SSE2 |
24663 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
24664 | && TARGET_INTER_UNIT_MOVES_FROM_VEC |
24665 | && (mode == SImode || (TARGET_64BIT && mode == DImode)) |
24666 | && INTEGER_CLASS_P (rclass)) |
24667 | return ALL_SSE_REGS; |
24668 | return NO_REGS; |
24669 | } |
24670 | |
24671 | /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, |
24672 | but returns a lower bound. */ |
24673 | |
24674 | static unsigned int |
24675 | ix86_max_noce_ifcvt_seq_cost (edge e) |
24676 | { |
24677 | bool predictable_p = predictable_edge_p (e); |
24678 | if (predictable_p) |
24679 | { |
24680 | if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost)) |
24681 | return param_max_rtl_if_conversion_predictable_cost; |
24682 | } |
24683 | else |
24684 | { |
24685 | if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost)) |
24686 | return param_max_rtl_if_conversion_unpredictable_cost; |
24687 | } |
24688 | |
24689 | return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2); |
24690 | } |
24691 | |
24692 | /* Return true if SEQ is a good candidate as a replacement for the |
24693 | if-convertible sequence described in IF_INFO. */ |
24694 | |
24695 | static bool |
24696 | ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) |
24697 | { |
24698 | if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p) |
24699 | { |
24700 | int cmov_cnt = 0; |
24701 | /* Punt if SEQ contains more than one CMOV or FCMOV instruction. |
24702 | Maybe we should allow even more conditional moves as long as they |
24703 | are used far enough not to stall the CPU, or also consider |
24704 | IF_INFO->TEST_BB succ edge probabilities. */ |
24705 | for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) |
24706 | { |
24707 | rtx set = single_set (insn); |
24708 | if (!set) |
24709 | continue; |
24710 | if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) |
24711 | continue; |
24712 | rtx src = SET_SRC (set); |
24713 | machine_mode mode = GET_MODE (src); |
24714 | if (GET_MODE_CLASS (mode) != MODE_INT |
24715 | && GET_MODE_CLASS (mode) != MODE_FLOAT) |
24716 | continue; |
24717 | if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) |
24718 | || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2)))) |
24719 | continue; |
24720 | /* insn is CMOV or FCMOV. */ |
24721 | if (++cmov_cnt > 1) |
24722 | return false; |
24723 | } |
24724 | } |
24725 | return default_noce_conversion_profitable_p (seq, if_info); |
24726 | } |
24727 | |
24728 | /* x86-specific vector costs. */ |
24729 | class ix86_vector_costs : public vector_costs |
24730 | { |
24731 | public: |
24732 | ix86_vector_costs (vec_info *, bool); |
24733 | |
24734 | unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, |
24735 | stmt_vec_info stmt_info, slp_tree node, |
24736 | tree vectype, int misalign, |
24737 | vect_cost_model_location where) override; |
24738 | void finish_cost (const vector_costs *) override; |
24739 | |
24740 | private: |
24741 | |
24742 | /* Estimate register pressure of the vectorized code. */ |
24743 | void ix86_vect_estimate_reg_pressure (); |
24744 | /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for |
24745 | estimation of register pressure. |
24746 | ??? Currently it's only used by vec_construct/scalar_to_vec |
24747 | where we know it's not loaded from memory. */ |
24748 | unsigned m_num_gpr_needed[3]; |
24749 | unsigned m_num_sse_needed[3]; |
24750 | }; |
24751 | |
24752 | ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar) |
24753 | : vector_costs (vinfo, costing_for_scalar), |
24754 | m_num_gpr_needed (), |
24755 | m_num_sse_needed () |
24756 | { |
24757 | } |
24758 | |
24759 | /* Implement targetm.vectorize.create_costs. */ |
24760 | |
24761 | static vector_costs * |
24762 | ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) |
24763 | { |
24764 | return new ix86_vector_costs (vinfo, costing_for_scalar); |
24765 | } |
24766 | |
24767 | unsigned |
24768 | ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, |
24769 | stmt_vec_info stmt_info, slp_tree node, |
24770 | tree vectype, int misalign, |
24771 | vect_cost_model_location where) |
24772 | { |
24773 | unsigned retval = 0; |
24774 | bool scalar_p |
24775 | = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store); |
24776 | int stmt_cost = - 1; |
24777 | |
24778 | bool fp = false; |
24779 | machine_mode mode = scalar_p ? SImode : TImode; |
24780 | |
24781 | if (vectype != NULL) |
24782 | { |
24783 | fp = FLOAT_TYPE_P (vectype); |
24784 | mode = TYPE_MODE (vectype); |
24785 | if (scalar_p) |
24786 | mode = TYPE_MODE (TREE_TYPE (vectype)); |
24787 | } |
24788 | |
24789 | if ((kind == vector_stmt || kind == scalar_stmt) |
24790 | && stmt_info |
24791 | && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN) |
24792 | { |
24793 | tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt); |
24794 | /*machine_mode inner_mode = mode; |
24795 | if (VECTOR_MODE_P (mode)) |
24796 | inner_mode = GET_MODE_INNER (mode);*/ |
24797 | |
24798 | switch (subcode) |
24799 | { |
24800 | case PLUS_EXPR: |
24801 | case POINTER_PLUS_EXPR: |
24802 | case MINUS_EXPR: |
24803 | if (kind == scalar_stmt) |
24804 | { |
24805 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24806 | stmt_cost = ix86_cost->addss; |
24807 | else if (X87_FLOAT_MODE_P (mode)) |
24808 | stmt_cost = ix86_cost->fadd; |
24809 | else |
24810 | stmt_cost = ix86_cost->add; |
24811 | } |
24812 | else |
24813 | stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss |
24814 | : ix86_cost->sse_op); |
24815 | break; |
24816 | |
24817 | case MULT_EXPR: |
24818 | /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw, |
24819 | take it as MULT_EXPR. */ |
24820 | case MULT_HIGHPART_EXPR: |
24821 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
24822 | break; |
24823 | /* There's no direct instruction for WIDEN_MULT_EXPR, |
24824 | take emulation into account. */ |
24825 | case WIDEN_MULT_EXPR: |
24826 | stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode, |
24827 | TYPE_UNSIGNED (vectype)); |
24828 | break; |
24829 | |
24830 | case NEGATE_EXPR: |
24831 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24832 | stmt_cost = ix86_cost->sse_op; |
24833 | else if (X87_FLOAT_MODE_P (mode)) |
24834 | stmt_cost = ix86_cost->fchs; |
24835 | else if (VECTOR_MODE_P (mode)) |
24836 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24837 | else |
24838 | stmt_cost = ix86_cost->add; |
24839 | break; |
24840 | case TRUNC_DIV_EXPR: |
24841 | case CEIL_DIV_EXPR: |
24842 | case FLOOR_DIV_EXPR: |
24843 | case ROUND_DIV_EXPR: |
24844 | case TRUNC_MOD_EXPR: |
24845 | case CEIL_MOD_EXPR: |
24846 | case FLOOR_MOD_EXPR: |
24847 | case RDIV_EXPR: |
24848 | case ROUND_MOD_EXPR: |
24849 | case EXACT_DIV_EXPR: |
24850 | stmt_cost = ix86_division_cost (cost: ix86_cost, mode); |
24851 | break; |
24852 | |
24853 | case RSHIFT_EXPR: |
24854 | case LSHIFT_EXPR: |
24855 | case LROTATE_EXPR: |
24856 | case RROTATE_EXPR: |
24857 | { |
24858 | tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
24859 | tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
24860 | stmt_cost = ix86_shift_rotate_cost |
24861 | (cost: ix86_cost, |
24862 | code: (subcode == RSHIFT_EXPR |
24863 | && !TYPE_UNSIGNED (TREE_TYPE (op1))) |
24864 | ? ASHIFTRT : LSHIFTRT, mode, |
24865 | TREE_CODE (op2) == INTEGER_CST, |
24866 | op1_val: cst_and_fits_in_hwi (op2) |
24867 | ? int_cst_value (op2) : -1, |
24868 | and_in_op1: false, shift_and_truncate: false, NULL, NULL); |
24869 | } |
24870 | break; |
24871 | case NOP_EXPR: |
24872 | /* Only sign-conversions are free. */ |
24873 | if (tree_nop_conversion_p |
24874 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), |
24875 | TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) |
24876 | stmt_cost = 0; |
24877 | break; |
24878 | |
24879 | case BIT_IOR_EXPR: |
24880 | case ABS_EXPR: |
24881 | case ABSU_EXPR: |
24882 | case MIN_EXPR: |
24883 | case MAX_EXPR: |
24884 | case BIT_XOR_EXPR: |
24885 | case BIT_AND_EXPR: |
24886 | case BIT_NOT_EXPR: |
24887 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24888 | stmt_cost = ix86_cost->sse_op; |
24889 | else if (VECTOR_MODE_P (mode)) |
24890 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24891 | else |
24892 | stmt_cost = ix86_cost->add; |
24893 | break; |
24894 | default: |
24895 | break; |
24896 | } |
24897 | } |
24898 | |
24899 | combined_fn cfn; |
24900 | if ((kind == vector_stmt || kind == scalar_stmt) |
24901 | && stmt_info |
24902 | && stmt_info->stmt |
24903 | && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) |
24904 | switch (cfn) |
24905 | { |
24906 | case CFN_FMA: |
24907 | stmt_cost = ix86_vec_cost (mode, |
24908 | cost: mode == SFmode ? ix86_cost->fmass |
24909 | : ix86_cost->fmasd); |
24910 | break; |
24911 | case CFN_MULH: |
24912 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
24913 | break; |
24914 | default: |
24915 | break; |
24916 | } |
24917 | |
24918 | /* If we do elementwise loads into a vector then we are bound by |
24919 | latency and execution resources for the many scalar loads |
24920 | (AGU and load ports). Try to account for this by scaling the |
24921 | construction cost by the number of elements involved. */ |
24922 | if ((kind == vec_construct || kind == vec_to_scalar) |
24923 | && stmt_info |
24924 | && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type |
24925 | || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) |
24926 | && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE |
24927 | && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) |
24928 | != INTEGER_CST)) |
24929 | || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)) |
24930 | { |
24931 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24932 | stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1); |
24933 | } |
24934 | else if ((kind == vec_construct || kind == scalar_to_vec) |
24935 | && node |
24936 | && SLP_TREE_DEF_TYPE (node) == vect_external_def) |
24937 | { |
24938 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24939 | unsigned i; |
24940 | tree op; |
24941 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24942 | if (TREE_CODE (op) == SSA_NAME) |
24943 | TREE_VISITED (op) = 0; |
24944 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24945 | { |
24946 | if (TREE_CODE (op) != SSA_NAME |
24947 | || TREE_VISITED (op)) |
24948 | continue; |
24949 | TREE_VISITED (op) = 1; |
24950 | gimple *def = SSA_NAME_DEF_STMT (op); |
24951 | tree tem; |
24952 | if (is_gimple_assign (gs: def) |
24953 | && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)) |
24954 | && ((tem = gimple_assign_rhs1 (gs: def)), true) |
24955 | && TREE_CODE (tem) == SSA_NAME |
24956 | /* A sign-change expands to nothing. */ |
24957 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)), |
24958 | TREE_TYPE (tem))) |
24959 | def = SSA_NAME_DEF_STMT (tem); |
24960 | /* When the component is loaded from memory we can directly |
24961 | move it to a vector register, otherwise we have to go |
24962 | via a GPR or via vpinsr which involves similar cost. |
24963 | Likewise with a BIT_FIELD_REF extracting from a vector |
24964 | register we can hope to avoid using a GPR. */ |
24965 | if (!is_gimple_assign (gs: def) |
24966 | || ((!gimple_assign_load_p (def) |
24967 | || (!TARGET_SSE4_1 |
24968 | && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1)) |
24969 | && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF |
24970 | || !VECTOR_TYPE_P (TREE_TYPE |
24971 | (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) |
24972 | { |
24973 | if (fp) |
24974 | m_num_sse_needed[where]++; |
24975 | else |
24976 | { |
24977 | m_num_gpr_needed[where]++; |
24978 | stmt_cost += ix86_cost->sse_to_integer; |
24979 | } |
24980 | } |
24981 | } |
24982 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24983 | if (TREE_CODE (op) == SSA_NAME) |
24984 | TREE_VISITED (op) = 0; |
24985 | } |
24986 | if (stmt_cost == -1) |
24987 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24988 | |
24989 | /* Penalize DFmode vector operations for Bonnell. */ |
24990 | if (TARGET_CPU_P (BONNELL) && kind == vector_stmt |
24991 | && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) |
24992 | stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ |
24993 | |
24994 | /* Statements in an inner loop relative to the loop being |
24995 | vectorized are weighted more heavily. The value here is |
24996 | arbitrary and could potentially be improved with analysis. */ |
24997 | retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); |
24998 | |
24999 | /* We need to multiply all vector stmt cost by 1.7 (estimated cost) |
25000 | for Silvermont as it has out of order integer pipeline and can execute |
25001 | 2 scalar instruction per tick, but has in order SIMD pipeline. */ |
25002 | if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT) |
25003 | || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL)) |
25004 | && stmt_info && stmt_info->stmt) |
25005 | { |
25006 | tree lhs_op = gimple_get_lhs (stmt_info->stmt); |
25007 | if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) |
25008 | retval = (retval * 17) / 10; |
25009 | } |
25010 | |
25011 | m_costs[where] += retval; |
25012 | |
25013 | return retval; |
25014 | } |
25015 | |
25016 | void |
25017 | ix86_vector_costs::ix86_vect_estimate_reg_pressure () |
25018 | { |
25019 | unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2; |
25020 | unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2; |
25021 | |
25022 | /* Any better way to have target available fp registers, currently use SSE_REGS. */ |
25023 | unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8; |
25024 | for (unsigned i = 0; i != 3; i++) |
25025 | { |
25026 | if (m_num_gpr_needed[i] > target_avail_regs) |
25027 | m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs); |
25028 | /* Only measure sse registers pressure. */ |
25029 | if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse)) |
25030 | m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse); |
25031 | } |
25032 | } |
25033 | |
25034 | void |
25035 | ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) |
25036 | { |
25037 | loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo); |
25038 | if (loop_vinfo && !m_costing_for_scalar) |
25039 | { |
25040 | /* We are currently not asking the vectorizer to compare costs |
25041 | between different vector mode sizes. When using predication |
25042 | that will end up always choosing the prefered mode size even |
25043 | if there's a smaller mode covering all lanes. Test for this |
25044 | situation and artificially reject the larger mode attempt. |
25045 | ??? We currently lack masked ops for sub-SSE sized modes, |
25046 | so we could restrict this rejection to AVX and AVX512 modes |
25047 | but error on the safe side for now. */ |
25048 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) |
25049 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
25050 | && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
25051 | && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()) |
25052 | > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo)))) |
25053 | m_costs[vect_body] = INT_MAX; |
25054 | } |
25055 | |
25056 | ix86_vect_estimate_reg_pressure (); |
25057 | |
25058 | vector_costs::finish_cost (scalar_costs); |
25059 | } |
25060 | |
25061 | /* Validate target specific memory model bits in VAL. */ |
25062 | |
25063 | static unsigned HOST_WIDE_INT |
25064 | ix86_memmodel_check (unsigned HOST_WIDE_INT val) |
25065 | { |
25066 | enum memmodel model = memmodel_from_int (val); |
25067 | bool strong; |
25068 | |
25069 | if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE |
25070 | |MEMMODEL_MASK) |
25071 | || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) |
25072 | { |
25073 | warning (OPT_Winvalid_memory_model, |
25074 | "unknown architecture specific memory model" ); |
25075 | return MEMMODEL_SEQ_CST; |
25076 | } |
25077 | strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); |
25078 | if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) |
25079 | { |
25080 | warning (OPT_Winvalid_memory_model, |
25081 | "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger " |
25082 | "memory model" ); |
25083 | return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; |
25084 | } |
25085 | if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) |
25086 | { |
25087 | warning (OPT_Winvalid_memory_model, |
25088 | "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger " |
25089 | "memory model" ); |
25090 | return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; |
25091 | } |
25092 | return val; |
25093 | } |
25094 | |
25095 | /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, |
25096 | CLONEI->vecsize_float and if CLONEI->simdlen is 0, also |
25097 | CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, |
25098 | or number of vecsize_mangle variants that should be emitted. */ |
25099 | |
25100 | static int |
25101 | ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, |
25102 | struct cgraph_simd_clone *clonei, |
25103 | tree base_type, int num, |
25104 | bool explicit_p) |
25105 | { |
25106 | int ret = 1; |
25107 | |
25108 | if (clonei->simdlen |
25109 | && (clonei->simdlen < 2 |
25110 | || clonei->simdlen > 1024 |
25111 | || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) |
25112 | { |
25113 | if (explicit_p) |
25114 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
25115 | "unsupported simdlen %wd" , clonei->simdlen.to_constant ()); |
25116 | return 0; |
25117 | } |
25118 | |
25119 | tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); |
25120 | if (TREE_CODE (ret_type) != VOID_TYPE) |
25121 | switch (TYPE_MODE (ret_type)) |
25122 | { |
25123 | case E_QImode: |
25124 | case E_HImode: |
25125 | case E_SImode: |
25126 | case E_DImode: |
25127 | case E_SFmode: |
25128 | case E_DFmode: |
25129 | /* case E_SCmode: */ |
25130 | /* case E_DCmode: */ |
25131 | if (!AGGREGATE_TYPE_P (ret_type)) |
25132 | break; |
25133 | /* FALLTHRU */ |
25134 | default: |
25135 | if (explicit_p) |
25136 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
25137 | "unsupported return type %qT for simd" , ret_type); |
25138 | return 0; |
25139 | } |
25140 | |
25141 | tree t; |
25142 | int i; |
25143 | tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); |
25144 | bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); |
25145 | |
25146 | for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; |
25147 | t && t != void_list_node; t = TREE_CHAIN (t), i++) |
25148 | { |
25149 | tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); |
25150 | switch (TYPE_MODE (arg_type)) |
25151 | { |
25152 | case E_QImode: |
25153 | case E_HImode: |
25154 | case E_SImode: |
25155 | case E_DImode: |
25156 | case E_SFmode: |
25157 | case E_DFmode: |
25158 | /* case E_SCmode: */ |
25159 | /* case E_DCmode: */ |
25160 | if (!AGGREGATE_TYPE_P (arg_type)) |
25161 | break; |
25162 | /* FALLTHRU */ |
25163 | default: |
25164 | if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) |
25165 | break; |
25166 | if (explicit_p) |
25167 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
25168 | "unsupported argument type %qT for simd" , arg_type); |
25169 | return 0; |
25170 | } |
25171 | } |
25172 | |
25173 | if (!TREE_PUBLIC (node->decl) || !explicit_p) |
25174 | { |
25175 | /* If the function isn't exported, we can pick up just one ISA |
25176 | for the clones. */ |
25177 | if (TARGET_AVX512F && TARGET_EVEX512) |
25178 | clonei->vecsize_mangle = 'e'; |
25179 | else if (TARGET_AVX2) |
25180 | clonei->vecsize_mangle = 'd'; |
25181 | else if (TARGET_AVX) |
25182 | clonei->vecsize_mangle = 'c'; |
25183 | else |
25184 | clonei->vecsize_mangle = 'b'; |
25185 | ret = 1; |
25186 | } |
25187 | else |
25188 | { |
25189 | clonei->vecsize_mangle = "bcde" [num]; |
25190 | ret = 4; |
25191 | } |
25192 | clonei->mask_mode = VOIDmode; |
25193 | switch (clonei->vecsize_mangle) |
25194 | { |
25195 | case 'b': |
25196 | clonei->vecsize_int = 128; |
25197 | clonei->vecsize_float = 128; |
25198 | break; |
25199 | case 'c': |
25200 | clonei->vecsize_int = 128; |
25201 | clonei->vecsize_float = 256; |
25202 | break; |
25203 | case 'd': |
25204 | clonei->vecsize_int = 256; |
25205 | clonei->vecsize_float = 256; |
25206 | break; |
25207 | case 'e': |
25208 | clonei->vecsize_int = 512; |
25209 | clonei->vecsize_float = 512; |
25210 | if (TYPE_MODE (base_type) == QImode) |
25211 | clonei->mask_mode = DImode; |
25212 | else |
25213 | clonei->mask_mode = SImode; |
25214 | break; |
25215 | } |
25216 | if (clonei->simdlen == 0) |
25217 | { |
25218 | if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) |
25219 | clonei->simdlen = clonei->vecsize_int; |
25220 | else |
25221 | clonei->simdlen = clonei->vecsize_float; |
25222 | clonei->simdlen = clonei->simdlen |
25223 | / GET_MODE_BITSIZE (TYPE_MODE (base_type)); |
25224 | } |
25225 | else if (clonei->simdlen > 16) |
25226 | { |
25227 | /* For compatibility with ICC, use the same upper bounds |
25228 | for simdlen. In particular, for CTYPE below, use the return type, |
25229 | unless the function returns void, in that case use the characteristic |
25230 | type. If it is possible for given SIMDLEN to pass CTYPE value |
25231 | in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs |
25232 | for 64-bit code), accept that SIMDLEN, otherwise warn and don't |
25233 | emit corresponding clone. */ |
25234 | tree ctype = ret_type; |
25235 | if (VOID_TYPE_P (ret_type)) |
25236 | ctype = base_type; |
25237 | int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; |
25238 | if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) |
25239 | cnt /= clonei->vecsize_int; |
25240 | else |
25241 | cnt /= clonei->vecsize_float; |
25242 | if (cnt > (TARGET_64BIT ? 16 : 8)) |
25243 | { |
25244 | if (explicit_p) |
25245 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
25246 | "unsupported simdlen %wd" , |
25247 | clonei->simdlen.to_constant ()); |
25248 | return 0; |
25249 | } |
25250 | } |
25251 | return ret; |
25252 | } |
25253 | |
25254 | /* If SIMD clone NODE can't be used in a vectorized loop |
25255 | in current function, return -1, otherwise return a badness of using it |
25256 | (0 if it is most desirable from vecsize_mangle point of view, 1 |
25257 | slightly less desirable, etc.). */ |
25258 | |
25259 | static int |
25260 | ix86_simd_clone_usable (struct cgraph_node *node) |
25261 | { |
25262 | switch (node->simdclone->vecsize_mangle) |
25263 | { |
25264 | case 'b': |
25265 | if (!TARGET_SSE2) |
25266 | return -1; |
25267 | if (!TARGET_AVX) |
25268 | return 0; |
25269 | return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1; |
25270 | case 'c': |
25271 | if (!TARGET_AVX) |
25272 | return -1; |
25273 | return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0; |
25274 | case 'd': |
25275 | if (!TARGET_AVX2) |
25276 | return -1; |
25277 | return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0; |
25278 | case 'e': |
25279 | if (!TARGET_AVX512F || !TARGET_EVEX512) |
25280 | return -1; |
25281 | return 0; |
25282 | default: |
25283 | gcc_unreachable (); |
25284 | } |
25285 | } |
25286 | |
25287 | /* This function adjusts the unroll factor based on |
25288 | the hardware capabilities. For ex, bdver3 has |
25289 | a loop buffer which makes unrolling of smaller |
25290 | loops less important. This function decides the |
25291 | unroll factor using number of memory references |
25292 | (value 32 is used) as a heuristic. */ |
25293 | |
25294 | static unsigned |
25295 | ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) |
25296 | { |
25297 | basic_block *bbs; |
25298 | rtx_insn *insn; |
25299 | unsigned i; |
25300 | unsigned mem_count = 0; |
25301 | |
25302 | /* Unroll small size loop when unroll factor is not explicitly |
25303 | specified. */ |
25304 | if (ix86_unroll_only_small_loops && !loop->unroll) |
25305 | { |
25306 | if (loop->ninsns <= ix86_cost->small_unroll_ninsns) |
25307 | return MIN (nunroll, ix86_cost->small_unroll_factor); |
25308 | else |
25309 | return 1; |
25310 | } |
25311 | |
25312 | if (!TARGET_ADJUST_UNROLL) |
25313 | return nunroll; |
25314 | |
25315 | /* Count the number of memory references within the loop body. |
25316 | This value determines the unrolling factor for bdver3 and bdver4 |
25317 | architectures. */ |
25318 | subrtx_iterator::array_type array; |
25319 | bbs = get_loop_body (loop); |
25320 | for (i = 0; i < loop->num_nodes; i++) |
25321 | FOR_BB_INSNS (bbs[i], insn) |
25322 | if (NONDEBUG_INSN_P (insn)) |
25323 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
25324 | if (const_rtx x = *iter) |
25325 | if (MEM_P (x)) |
25326 | { |
25327 | machine_mode mode = GET_MODE (x); |
25328 | unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
25329 | if (n_words > 4) |
25330 | mem_count += 2; |
25331 | else |
25332 | mem_count += 1; |
25333 | } |
25334 | free (ptr: bbs); |
25335 | |
25336 | if (mem_count && mem_count <=32) |
25337 | return MIN (nunroll, 32 / mem_count); |
25338 | |
25339 | return nunroll; |
25340 | } |
25341 | |
25342 | |
25343 | /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ |
25344 | |
25345 | static bool |
25346 | ix86_float_exceptions_rounding_supported_p (void) |
25347 | { |
25348 | /* For x87 floating point with standard excess precision handling, |
25349 | there is no adddf3 pattern (since x87 floating point only has |
25350 | XFmode operations) so the default hook implementation gets this |
25351 | wrong. */ |
25352 | return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH); |
25353 | } |
25354 | |
25355 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ |
25356 | |
25357 | static void |
25358 | ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
25359 | { |
25360 | if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH)) |
25361 | return; |
25362 | tree exceptions_var = create_tmp_var_raw (integer_type_node); |
25363 | if (TARGET_80387) |
25364 | { |
25365 | tree fenv_index_type = build_index_type (size_int (6)); |
25366 | tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); |
25367 | tree fenv_var = create_tmp_var_raw (fenv_type); |
25368 | TREE_ADDRESSABLE (fenv_var) = 1; |
25369 | tree fenv_ptr = build_pointer_type (fenv_type); |
25370 | tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); |
25371 | fenv_addr = fold_convert (ptr_type_node, fenv_addr); |
25372 | tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV); |
25373 | tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV); |
25374 | tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW); |
25375 | tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX); |
25376 | tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); |
25377 | tree hold_fnclex = build_call_expr (fnclex, 0); |
25378 | fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, |
25379 | NULL_TREE, NULL_TREE); |
25380 | *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, |
25381 | hold_fnclex); |
25382 | *clear = build_call_expr (fnclex, 0); |
25383 | tree sw_var = create_tmp_var_raw (short_unsigned_type_node); |
25384 | tree fnstsw_call = build_call_expr (fnstsw, 0); |
25385 | tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var, |
25386 | fnstsw_call, NULL_TREE, NULL_TREE); |
25387 | tree exceptions_x87 = fold_convert (integer_type_node, sw_var); |
25388 | tree update_mod = build4 (TARGET_EXPR, integer_type_node, |
25389 | exceptions_var, exceptions_x87, |
25390 | NULL_TREE, NULL_TREE); |
25391 | *update = build2 (COMPOUND_EXPR, integer_type_node, |
25392 | sw_mod, update_mod); |
25393 | tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); |
25394 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); |
25395 | } |
25396 | if (TARGET_SSE && TARGET_SSE_MATH) |
25397 | { |
25398 | tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); |
25399 | tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); |
25400 | tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR); |
25401 | tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR); |
25402 | tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); |
25403 | tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node, |
25404 | mxcsr_orig_var, stmxcsr_hold_call, |
25405 | NULL_TREE, NULL_TREE); |
25406 | tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, |
25407 | mxcsr_orig_var, |
25408 | build_int_cst (unsigned_type_node, 0x1f80)); |
25409 | hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, |
25410 | build_int_cst (unsigned_type_node, 0xffffffc0)); |
25411 | tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node, |
25412 | mxcsr_mod_var, hold_mod_val, |
25413 | NULL_TREE, NULL_TREE); |
25414 | tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
25415 | tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, |
25416 | hold_assign_orig, hold_assign_mod); |
25417 | hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, |
25418 | ldmxcsr_hold_call); |
25419 | if (*hold) |
25420 | *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); |
25421 | else |
25422 | *hold = hold_all; |
25423 | tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
25424 | if (*clear) |
25425 | *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, |
25426 | ldmxcsr_clear_call); |
25427 | else |
25428 | *clear = ldmxcsr_clear_call; |
25429 | tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); |
25430 | tree exceptions_sse = fold_convert (integer_type_node, |
25431 | stxmcsr_update_call); |
25432 | if (*update) |
25433 | { |
25434 | tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, |
25435 | exceptions_var, exceptions_sse); |
25436 | tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, |
25437 | exceptions_var, exceptions_mod); |
25438 | *update = build2 (COMPOUND_EXPR, integer_type_node, *update, |
25439 | exceptions_assign); |
25440 | } |
25441 | else |
25442 | *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var, |
25443 | exceptions_sse, NULL_TREE, NULL_TREE); |
25444 | tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); |
25445 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
25446 | ldmxcsr_update_call); |
25447 | } |
25448 | tree atomic_feraiseexcept |
25449 | = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT); |
25450 | tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, |
25451 | 1, exceptions_var); |
25452 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
25453 | atomic_feraiseexcept_call); |
25454 | } |
25455 | |
25456 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
25457 | /* For i386, common symbol is local only for non-PIE binaries. For |
25458 | x86-64, common symbol is local only for non-PIE binaries or linker |
25459 | supports copy reloc in PIE binaries. */ |
25460 | |
25461 | static bool |
25462 | ix86_binds_local_p (const_tree exp) |
25463 | { |
25464 | bool direct_extern_access |
25465 | = (ix86_direct_extern_access |
25466 | && !(VAR_OR_FUNCTION_DECL_P (exp) |
25467 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
25468 | DECL_ATTRIBUTES (exp)))); |
25469 | if (!direct_extern_access) |
25470 | ix86_has_no_direct_extern_access = true; |
25471 | return default_binds_local_p_3 (exp, flag_shlib != 0, true, |
25472 | direct_extern_access, |
25473 | (direct_extern_access |
25474 | && (!flag_pic |
25475 | || (TARGET_64BIT |
25476 | && HAVE_LD_PIE_COPYRELOC != 0)))); |
25477 | } |
25478 | |
25479 | /* If flag_pic or ix86_direct_extern_access is false, then neither |
25480 | local nor global relocs should be placed in readonly memory. */ |
25481 | |
25482 | static int |
25483 | ix86_reloc_rw_mask (void) |
25484 | { |
25485 | return (flag_pic || !ix86_direct_extern_access) ? 3 : 0; |
25486 | } |
25487 | #endif |
25488 | |
25489 | /* Return true iff ADDR can be used as a symbolic base address. */ |
25490 | |
25491 | static bool |
25492 | symbolic_base_address_p (rtx addr) |
25493 | { |
25494 | if (GET_CODE (addr) == SYMBOL_REF) |
25495 | return true; |
25496 | |
25497 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF) |
25498 | return true; |
25499 | |
25500 | return false; |
25501 | } |
25502 | |
25503 | /* Return true iff ADDR can be used as a base address. */ |
25504 | |
25505 | static bool |
25506 | base_address_p (rtx addr) |
25507 | { |
25508 | if (REG_P (addr)) |
25509 | return true; |
25510 | |
25511 | if (symbolic_base_address_p (addr)) |
25512 | return true; |
25513 | |
25514 | return false; |
25515 | } |
25516 | |
25517 | /* If MEM is in the form of [(base+symbase)+offset], extract the three |
25518 | parts of address and set to BASE, SYMBASE and OFFSET, otherwise |
25519 | return false. */ |
25520 | |
25521 | static bool |
25522 | (rtx mem, rtx *base, rtx *symbase, rtx *offset) |
25523 | { |
25524 | rtx addr; |
25525 | |
25526 | gcc_assert (MEM_P (mem)); |
25527 | |
25528 | addr = XEXP (mem, 0); |
25529 | |
25530 | if (GET_CODE (addr) == CONST) |
25531 | addr = XEXP (addr, 0); |
25532 | |
25533 | if (base_address_p (addr)) |
25534 | { |
25535 | *base = addr; |
25536 | *symbase = const0_rtx; |
25537 | *offset = const0_rtx; |
25538 | return true; |
25539 | } |
25540 | |
25541 | if (GET_CODE (addr) == PLUS |
25542 | && base_address_p (XEXP (addr, 0))) |
25543 | { |
25544 | rtx addend = XEXP (addr, 1); |
25545 | |
25546 | if (GET_CODE (addend) == CONST) |
25547 | addend = XEXP (addend, 0); |
25548 | |
25549 | if (CONST_INT_P (addend)) |
25550 | { |
25551 | *base = XEXP (addr, 0); |
25552 | *symbase = const0_rtx; |
25553 | *offset = addend; |
25554 | return true; |
25555 | } |
25556 | |
25557 | /* Also accept REG + symbolic ref, with or without a CONST_INT |
25558 | offset. */ |
25559 | if (REG_P (XEXP (addr, 0))) |
25560 | { |
25561 | if (symbolic_base_address_p (addr: addend)) |
25562 | { |
25563 | *base = XEXP (addr, 0); |
25564 | *symbase = addend; |
25565 | *offset = const0_rtx; |
25566 | return true; |
25567 | } |
25568 | |
25569 | if (GET_CODE (addend) == PLUS |
25570 | && symbolic_base_address_p (XEXP (addend, 0)) |
25571 | && CONST_INT_P (XEXP (addend, 1))) |
25572 | { |
25573 | *base = XEXP (addr, 0); |
25574 | *symbase = XEXP (addend, 0); |
25575 | *offset = XEXP (addend, 1); |
25576 | return true; |
25577 | } |
25578 | } |
25579 | } |
25580 | |
25581 | return false; |
25582 | } |
25583 | |
25584 | /* Given OPERANDS of consecutive load/store, check if we can merge |
25585 | them into move multiple. LOAD is true if they are load instructions. |
25586 | MODE is the mode of memory operands. */ |
25587 | |
25588 | bool |
25589 | ix86_operands_ok_for_move_multiple (rtx *operands, bool load, |
25590 | machine_mode mode) |
25591 | { |
25592 | HOST_WIDE_INT offval_1, offval_2, msize; |
25593 | rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, |
25594 | symbase_1, symbase_2, offset_1, offset_2; |
25595 | |
25596 | if (load) |
25597 | { |
25598 | mem_1 = operands[1]; |
25599 | mem_2 = operands[3]; |
25600 | reg_1 = operands[0]; |
25601 | reg_2 = operands[2]; |
25602 | } |
25603 | else |
25604 | { |
25605 | mem_1 = operands[0]; |
25606 | mem_2 = operands[2]; |
25607 | reg_1 = operands[1]; |
25608 | reg_2 = operands[3]; |
25609 | } |
25610 | |
25611 | gcc_assert (REG_P (reg_1) && REG_P (reg_2)); |
25612 | |
25613 | if (REGNO (reg_1) != REGNO (reg_2)) |
25614 | return false; |
25615 | |
25616 | /* Check if the addresses are in the form of [base+offset]. */ |
25617 | if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1)) |
25618 | return false; |
25619 | if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2)) |
25620 | return false; |
25621 | |
25622 | /* Check if the bases are the same. */ |
25623 | if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2)) |
25624 | return false; |
25625 | |
25626 | offval_1 = INTVAL (offset_1); |
25627 | offval_2 = INTVAL (offset_2); |
25628 | msize = GET_MODE_SIZE (mode); |
25629 | /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ |
25630 | if (offval_1 + msize != offval_2) |
25631 | return false; |
25632 | |
25633 | return true; |
25634 | } |
25635 | |
25636 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ |
25637 | |
25638 | static bool |
25639 | ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, |
25640 | optimization_type opt_type) |
25641 | { |
25642 | switch (op) |
25643 | { |
25644 | case asin_optab: |
25645 | case acos_optab: |
25646 | case log1p_optab: |
25647 | case exp_optab: |
25648 | case exp10_optab: |
25649 | case exp2_optab: |
25650 | case expm1_optab: |
25651 | case ldexp_optab: |
25652 | case scalb_optab: |
25653 | case round_optab: |
25654 | case lround_optab: |
25655 | return opt_type == OPTIMIZE_FOR_SPEED; |
25656 | |
25657 | case rint_optab: |
25658 | if (SSE_FLOAT_MODE_P (mode1) |
25659 | && TARGET_SSE_MATH |
25660 | && !flag_trapping_math |
25661 | && !TARGET_SSE4_1 |
25662 | && mode1 != HFmode) |
25663 | return opt_type == OPTIMIZE_FOR_SPEED; |
25664 | return true; |
25665 | |
25666 | case floor_optab: |
25667 | case ceil_optab: |
25668 | case btrunc_optab: |
25669 | if (((SSE_FLOAT_MODE_P (mode1) |
25670 | && TARGET_SSE_MATH |
25671 | && TARGET_SSE4_1) |
25672 | || mode1 == HFmode) |
25673 | && !flag_trapping_math) |
25674 | return true; |
25675 | return opt_type == OPTIMIZE_FOR_SPEED; |
25676 | |
25677 | case rsqrt_optab: |
25678 | return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1); |
25679 | |
25680 | default: |
25681 | return true; |
25682 | } |
25683 | } |
25684 | |
25685 | /* Address space support. |
25686 | |
25687 | This is not "far pointers" in the 16-bit sense, but an easy way |
25688 | to use %fs and %gs segment prefixes. Therefore: |
25689 | |
25690 | (a) All address spaces have the same modes, |
25691 | (b) All address spaces have the same addresss forms, |
25692 | (c) While %fs and %gs are technically subsets of the generic |
25693 | address space, they are probably not subsets of each other. |
25694 | (d) Since we have no access to the segment base register values |
25695 | without resorting to a system call, we cannot convert a |
25696 | non-default address space to a default address space. |
25697 | Therefore we do not claim %fs or %gs are subsets of generic. |
25698 | |
25699 | Therefore we can (mostly) use the default hooks. */ |
25700 | |
25701 | /* All use of segmentation is assumed to make address 0 valid. */ |
25702 | |
25703 | static bool |
25704 | ix86_addr_space_zero_address_valid (addr_space_t as) |
25705 | { |
25706 | return as != ADDR_SPACE_GENERIC; |
25707 | } |
25708 | |
25709 | static void |
25710 | ix86_init_libfuncs (void) |
25711 | { |
25712 | if (TARGET_64BIT) |
25713 | { |
25714 | set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4" ); |
25715 | set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4" ); |
25716 | } |
25717 | else |
25718 | { |
25719 | set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4" ); |
25720 | set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4" ); |
25721 | } |
25722 | |
25723 | #if TARGET_MACHO |
25724 | darwin_rename_builtins (); |
25725 | #endif |
25726 | } |
25727 | |
25728 | /* Set the value of FLT_EVAL_METHOD in float.h. When using only the |
25729 | FPU, assume that the fpcw is set to extended precision; when using |
25730 | only SSE, rounding is correct; when using both SSE and the FPU, |
25731 | the rounding precision is indeterminate, since either may be chosen |
25732 | apparently at random. */ |
25733 | |
25734 | static enum flt_eval_method |
25735 | ix86_get_excess_precision (enum excess_precision_type type) |
25736 | { |
25737 | switch (type) |
25738 | { |
25739 | case EXCESS_PRECISION_TYPE_FAST: |
25740 | /* The fastest type to promote to will always be the native type, |
25741 | whether that occurs with implicit excess precision or |
25742 | otherwise. */ |
25743 | return TARGET_AVX512FP16 |
25744 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 |
25745 | : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25746 | case EXCESS_PRECISION_TYPE_STANDARD: |
25747 | case EXCESS_PRECISION_TYPE_IMPLICIT: |
25748 | /* Otherwise, the excess precision we want when we are |
25749 | in a standards compliant mode, and the implicit precision we |
25750 | provide would be identical were it not for the unpredictable |
25751 | cases. */ |
25752 | if (TARGET_AVX512FP16 && TARGET_SSE_MATH) |
25753 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
25754 | else if (!TARGET_80387) |
25755 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25756 | else if (!TARGET_MIX_SSE_I387) |
25757 | { |
25758 | if (!(TARGET_SSE && TARGET_SSE_MATH)) |
25759 | return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE; |
25760 | else if (TARGET_SSE2) |
25761 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25762 | } |
25763 | |
25764 | /* If we are in standards compliant mode, but we know we will |
25765 | calculate in unpredictable precision, return |
25766 | FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit |
25767 | excess precision if the target can't guarantee it will honor |
25768 | it. */ |
25769 | return (type == EXCESS_PRECISION_TYPE_STANDARD |
25770 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT |
25771 | : FLT_EVAL_METHOD_UNPREDICTABLE); |
25772 | case EXCESS_PRECISION_TYPE_FLOAT16: |
25773 | if (TARGET_80387 |
25774 | && !(TARGET_SSE_MATH && TARGET_SSE)) |
25775 | error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>" ); |
25776 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
25777 | default: |
25778 | gcc_unreachable (); |
25779 | } |
25780 | |
25781 | return FLT_EVAL_METHOD_UNPREDICTABLE; |
25782 | } |
25783 | |
25784 | /* Return true if _BitInt(N) is supported and fill its details into *INFO. */ |
25785 | bool |
25786 | ix86_bitint_type_info (int n, struct bitint_info *info) |
25787 | { |
25788 | if (n <= 8) |
25789 | info->limb_mode = QImode; |
25790 | else if (n <= 16) |
25791 | info->limb_mode = HImode; |
25792 | else if (n <= 32 || (!TARGET_64BIT && n > 64)) |
25793 | info->limb_mode = SImode; |
25794 | else |
25795 | info->limb_mode = DImode; |
25796 | info->abi_limb_mode = info->limb_mode; |
25797 | info->big_endian = false; |
25798 | info->extended = false; |
25799 | return true; |
25800 | } |
25801 | |
25802 | /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that |
25803 | decrements by exactly 2 no matter what the position was, there is no pushb. |
25804 | |
25805 | But as CIE data alignment factor on this arch is -4 for 32bit targets |
25806 | and -8 for 64bit targets, we need to make sure all stack pointer adjustments |
25807 | are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ |
25808 | |
25809 | poly_int64 |
25810 | ix86_push_rounding (poly_int64 bytes) |
25811 | { |
25812 | return ROUND_UP (bytes, UNITS_PER_WORD); |
25813 | } |
25814 | |
25815 | /* Use 8 bits metadata start from bit48 for LAM_U48, |
25816 | 6 bits metadat start from bit57 for LAM_U57. */ |
25817 | #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \ |
25818 | ? 48 \ |
25819 | : (ix86_lam_type == lam_u57 ? 57 : 0)) |
25820 | #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \ |
25821 | ? 8 \ |
25822 | : (ix86_lam_type == lam_u57 ? 6 : 0)) |
25823 | |
25824 | /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */ |
25825 | bool |
25826 | ix86_memtag_can_tag_addresses () |
25827 | { |
25828 | return ix86_lam_type != lam_none && TARGET_LP64; |
25829 | } |
25830 | |
25831 | /* Implement TARGET_MEMTAG_TAG_SIZE. */ |
25832 | unsigned char |
25833 | ix86_memtag_tag_size () |
25834 | { |
25835 | return IX86_HWASAN_TAG_SIZE; |
25836 | } |
25837 | |
25838 | /* Implement TARGET_MEMTAG_SET_TAG. */ |
25839 | rtx |
25840 | ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target) |
25841 | { |
25842 | /* default_memtag_insert_random_tag may |
25843 | generate tag with value more than 6 bits. */ |
25844 | if (ix86_lam_type == lam_u57) |
25845 | { |
25846 | unsigned HOST_WIDE_INT and_imm |
25847 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
25848 | |
25849 | emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm))); |
25850 | } |
25851 | tag = expand_simple_binop (Pmode, ASHIFT, tag, |
25852 | GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX, |
25853 | /* unsignedp = */1, OPTAB_WIDEN); |
25854 | rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target, |
25855 | /* unsignedp = */1, OPTAB_DIRECT); |
25856 | return ret; |
25857 | } |
25858 | |
25859 | /* Implement TARGET_MEMTAG_EXTRACT_TAG. */ |
25860 | rtx |
25861 | (rtx tagged_pointer, rtx target) |
25862 | { |
25863 | rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer, |
25864 | GEN_INT (IX86_HWASAN_SHIFT), target, |
25865 | /* unsignedp = */0, |
25866 | OPTAB_DIRECT); |
25867 | rtx ret = gen_reg_rtx (QImode); |
25868 | /* Mask off bit63 when LAM_U57. */ |
25869 | if (ix86_lam_type == lam_u57) |
25870 | { |
25871 | unsigned HOST_WIDE_INT and_imm |
25872 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
25873 | emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag), |
25874 | gen_int_mode (and_imm, QImode))); |
25875 | } |
25876 | else |
25877 | emit_move_insn (ret, gen_lowpart (QImode, tag)); |
25878 | return ret; |
25879 | } |
25880 | |
25881 | /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */ |
25882 | rtx |
25883 | ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target) |
25884 | { |
25885 | /* Leave bit63 alone. */ |
25886 | rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) |
25887 | + (HOST_WIDE_INT_1U << 63) - 1), |
25888 | Pmode); |
25889 | rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer, |
25890 | tag_mask, target, true, |
25891 | OPTAB_DIRECT); |
25892 | gcc_assert (untagged_base); |
25893 | return untagged_base; |
25894 | } |
25895 | |
25896 | /* Implement TARGET_MEMTAG_ADD_TAG. */ |
25897 | rtx |
25898 | ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset) |
25899 | { |
25900 | rtx base_tag = gen_reg_rtx (QImode); |
25901 | rtx base_addr = gen_reg_rtx (Pmode); |
25902 | rtx tagged_addr = gen_reg_rtx (Pmode); |
25903 | rtx new_tag = gen_reg_rtx (QImode); |
25904 | unsigned HOST_WIDE_INT and_imm |
25905 | = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1; |
25906 | |
25907 | /* When there's "overflow" in tag adding, |
25908 | need to mask the most significant bit off. */ |
25909 | emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX)); |
25910 | emit_move_insn (base_addr, |
25911 | ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX)); |
25912 | emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode))); |
25913 | emit_move_insn (new_tag, base_tag); |
25914 | emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode))); |
25915 | emit_move_insn (tagged_addr, |
25916 | ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX)); |
25917 | return plus_constant (Pmode, tagged_addr, offset); |
25918 | } |
25919 | |
25920 | /* Target-specific selftests. */ |
25921 | |
25922 | #if CHECKING_P |
25923 | |
25924 | namespace selftest { |
25925 | |
25926 | /* Verify that hard regs are dumped as expected (in compact mode). */ |
25927 | |
25928 | static void |
25929 | ix86_test_dumping_hard_regs () |
25930 | { |
25931 | ASSERT_RTL_DUMP_EQ ("(reg:SI ax)" , gen_raw_REG (SImode, 0)); |
25932 | ASSERT_RTL_DUMP_EQ ("(reg:SI dx)" , gen_raw_REG (SImode, 1)); |
25933 | } |
25934 | |
25935 | /* Test dumping an insn with repeated references to the same SCRATCH, |
25936 | to verify the rtx_reuse code. */ |
25937 | |
25938 | static void |
25939 | ix86_test_dumping_memory_blockage () |
25940 | { |
25941 | set_new_first_and_last_insn (NULL, NULL); |
25942 | |
25943 | rtx pat = gen_memory_blockage (); |
25944 | rtx_reuse_manager r; |
25945 | r.preprocess (x: pat); |
25946 | |
25947 | /* Verify that the repeated references to the SCRATCH show use |
25948 | reuse IDS. The first should be prefixed with a reuse ID, |
25949 | and the second should be dumped as a "reuse_rtx" of that ID. |
25950 | The expected string assumes Pmode == DImode. */ |
25951 | if (Pmode == DImode) |
25952 | ASSERT_RTL_DUMP_EQ_WITH_REUSE |
25953 | ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n" |
25954 | " (unspec:BLK [\n" |
25955 | " (mem/v:BLK (reuse_rtx 0) [0 A8])\n" |
25956 | " ] UNSPEC_MEMORY_BLOCKAGE)))\n" , pat, &r); |
25957 | } |
25958 | |
25959 | /* Verify loading an RTL dump; specifically a dump of copying |
25960 | a param on x86_64 from a hard reg into the frame. |
25961 | This test is target-specific since the dump contains target-specific |
25962 | hard reg names. */ |
25963 | |
25964 | static void |
25965 | ix86_test_loading_dump_fragment_1 () |
25966 | { |
25967 | rtl_dump_test t (SELFTEST_LOCATION, |
25968 | locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl" )); |
25969 | |
25970 | rtx_insn *insn = get_insn_by_uid (uid: 1); |
25971 | |
25972 | /* The block structure and indentation here is purely for |
25973 | readability; it mirrors the structure of the rtx. */ |
25974 | tree mem_expr; |
25975 | { |
25976 | rtx pat = PATTERN (insn); |
25977 | ASSERT_EQ (SET, GET_CODE (pat)); |
25978 | { |
25979 | rtx dest = SET_DEST (pat); |
25980 | ASSERT_EQ (MEM, GET_CODE (dest)); |
25981 | /* Verify the "/c" was parsed. */ |
25982 | ASSERT_TRUE (RTX_FLAG (dest, call)); |
25983 | ASSERT_EQ (SImode, GET_MODE (dest)); |
25984 | { |
25985 | rtx addr = XEXP (dest, 0); |
25986 | ASSERT_EQ (PLUS, GET_CODE (addr)); |
25987 | ASSERT_EQ (DImode, GET_MODE (addr)); |
25988 | { |
25989 | rtx lhs = XEXP (addr, 0); |
25990 | /* Verify that the "frame" REG was consolidated. */ |
25991 | ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs); |
25992 | } |
25993 | { |
25994 | rtx rhs = XEXP (addr, 1); |
25995 | ASSERT_EQ (CONST_INT, GET_CODE (rhs)); |
25996 | ASSERT_EQ (-4, INTVAL (rhs)); |
25997 | } |
25998 | } |
25999 | /* Verify the "[1 i+0 S4 A32]" was parsed. */ |
26000 | ASSERT_EQ (1, MEM_ALIAS_SET (dest)); |
26001 | /* "i" should have been handled by synthesizing a global int |
26002 | variable named "i". */ |
26003 | mem_expr = MEM_EXPR (dest); |
26004 | ASSERT_NE (mem_expr, NULL); |
26005 | ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr)); |
26006 | ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr)); |
26007 | ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr))); |
26008 | ASSERT_STREQ ("i" , IDENTIFIER_POINTER (DECL_NAME (mem_expr))); |
26009 | /* "+0". */ |
26010 | ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest)); |
26011 | ASSERT_EQ (0, MEM_OFFSET (dest)); |
26012 | /* "S4". */ |
26013 | ASSERT_EQ (4, MEM_SIZE (dest)); |
26014 | /* "A32. */ |
26015 | ASSERT_EQ (32, MEM_ALIGN (dest)); |
26016 | } |
26017 | { |
26018 | rtx src = SET_SRC (pat); |
26019 | ASSERT_EQ (REG, GET_CODE (src)); |
26020 | ASSERT_EQ (SImode, GET_MODE (src)); |
26021 | ASSERT_EQ (5, REGNO (src)); |
26022 | tree reg_expr = REG_EXPR (src); |
26023 | /* "i" here should point to the same var as for the MEM_EXPR. */ |
26024 | ASSERT_EQ (reg_expr, mem_expr); |
26025 | } |
26026 | } |
26027 | } |
26028 | |
26029 | /* Verify that the RTL loader copes with a call_insn dump. |
26030 | This test is target-specific since the dump contains a target-specific |
26031 | hard reg name. */ |
26032 | |
26033 | static void |
26034 | ix86_test_loading_call_insn () |
26035 | { |
26036 | /* The test dump includes register "xmm0", where requires TARGET_SSE |
26037 | to exist. */ |
26038 | if (!TARGET_SSE) |
26039 | return; |
26040 | |
26041 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl" )); |
26042 | |
26043 | rtx_insn *insn = get_insns (); |
26044 | ASSERT_EQ (CALL_INSN, GET_CODE (insn)); |
26045 | |
26046 | /* "/j". */ |
26047 | ASSERT_TRUE (RTX_FLAG (insn, jump)); |
26048 | |
26049 | rtx pat = PATTERN (insn); |
26050 | ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat))); |
26051 | |
26052 | /* Verify REG_NOTES. */ |
26053 | { |
26054 | /* "(expr_list:REG_CALL_DECL". */ |
26055 | ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn))); |
26056 | rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn)); |
26057 | ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0)); |
26058 | |
26059 | /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */ |
26060 | rtx_expr_list *note1 = note0->next (); |
26061 | ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1)); |
26062 | |
26063 | ASSERT_EQ (NULL, note1->next ()); |
26064 | } |
26065 | |
26066 | /* Verify CALL_INSN_FUNCTION_USAGE. */ |
26067 | { |
26068 | /* "(expr_list:DF (use (reg:DF 21 xmm0))". */ |
26069 | rtx_expr_list *usage |
26070 | = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn)); |
26071 | ASSERT_EQ (EXPR_LIST, GET_CODE (usage)); |
26072 | ASSERT_EQ (DFmode, GET_MODE (usage)); |
26073 | ASSERT_EQ (USE, GET_CODE (usage->element ())); |
26074 | ASSERT_EQ (NULL, usage->next ()); |
26075 | } |
26076 | } |
26077 | |
26078 | /* Verify that the RTL loader copes a dump from print_rtx_function. |
26079 | This test is target-specific since the dump contains target-specific |
26080 | hard reg names. */ |
26081 | |
26082 | static void |
26083 | ix86_test_loading_full_dump () |
26084 | { |
26085 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl" )); |
26086 | |
26087 | ASSERT_STREQ ("times_two" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
26088 | |
26089 | rtx_insn *insn_1 = get_insn_by_uid (uid: 1); |
26090 | ASSERT_EQ (NOTE, GET_CODE (insn_1)); |
26091 | |
26092 | rtx_insn *insn_7 = get_insn_by_uid (uid: 7); |
26093 | ASSERT_EQ (INSN, GET_CODE (insn_7)); |
26094 | ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7))); |
26095 | |
26096 | rtx_insn *insn_15 = get_insn_by_uid (uid: 15); |
26097 | ASSERT_EQ (INSN, GET_CODE (insn_15)); |
26098 | ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15))); |
26099 | |
26100 | /* Verify crtl->return_rtx. */ |
26101 | ASSERT_EQ (REG, GET_CODE (crtl->return_rtx)); |
26102 | ASSERT_EQ (0, REGNO (crtl->return_rtx)); |
26103 | ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx)); |
26104 | } |
26105 | |
26106 | /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns. |
26107 | In particular, verify that it correctly loads the 2nd operand. |
26108 | This test is target-specific since these are machine-specific |
26109 | operands (and enums). */ |
26110 | |
26111 | static void |
26112 | ix86_test_loading_unspec () |
26113 | { |
26114 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl" )); |
26115 | |
26116 | ASSERT_STREQ ("test_unspec" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
26117 | |
26118 | ASSERT_TRUE (cfun); |
26119 | |
26120 | /* Test of an UNSPEC. */ |
26121 | rtx_insn *insn = get_insns (); |
26122 | ASSERT_EQ (INSN, GET_CODE (insn)); |
26123 | rtx set = single_set (insn); |
26124 | ASSERT_NE (NULL, set); |
26125 | rtx dst = SET_DEST (set); |
26126 | ASSERT_EQ (MEM, GET_CODE (dst)); |
26127 | rtx src = SET_SRC (set); |
26128 | ASSERT_EQ (UNSPEC, GET_CODE (src)); |
26129 | ASSERT_EQ (BLKmode, GET_MODE (src)); |
26130 | ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1)); |
26131 | |
26132 | rtx v0 = XVECEXP (src, 0, 0); |
26133 | |
26134 | /* Verify that the two uses of the first SCRATCH have pointer |
26135 | equality. */ |
26136 | rtx scratch_a = XEXP (dst, 0); |
26137 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_a)); |
26138 | |
26139 | rtx scratch_b = XEXP (v0, 0); |
26140 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_b)); |
26141 | |
26142 | ASSERT_EQ (scratch_a, scratch_b); |
26143 | |
26144 | /* Verify that the two mems are thus treated as equal. */ |
26145 | ASSERT_TRUE (rtx_equal_p (dst, v0)); |
26146 | |
26147 | /* Verify that the insn is recognized. */ |
26148 | ASSERT_NE(-1, recog_memoized (insn)); |
26149 | |
26150 | /* Test of an UNSPEC_VOLATILE, which has its own enum values. */ |
26151 | insn = NEXT_INSN (insn); |
26152 | ASSERT_EQ (INSN, GET_CODE (insn)); |
26153 | |
26154 | set = single_set (insn); |
26155 | ASSERT_NE (NULL, set); |
26156 | |
26157 | src = SET_SRC (set); |
26158 | ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src)); |
26159 | ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1)); |
26160 | } |
26161 | |
26162 | /* Run all target-specific selftests. */ |
26163 | |
26164 | static void |
26165 | ix86_run_selftests (void) |
26166 | { |
26167 | ix86_test_dumping_hard_regs (); |
26168 | ix86_test_dumping_memory_blockage (); |
26169 | |
26170 | /* Various tests of loading RTL dumps, here because they contain |
26171 | ix86-isms (e.g. names of hard regs). */ |
26172 | ix86_test_loading_dump_fragment_1 (); |
26173 | ix86_test_loading_call_insn (); |
26174 | ix86_test_loading_full_dump (); |
26175 | ix86_test_loading_unspec (); |
26176 | } |
26177 | |
26178 | } // namespace selftest |
26179 | |
26180 | #endif /* CHECKING_P */ |
26181 | |
26182 | static const scoped_attribute_specs *const ix86_attribute_table[] = |
26183 | { |
26184 | &ix86_gnu_attribute_table |
26185 | }; |
26186 | |
26187 | /* Initialize the GCC target structure. */ |
26188 | #undef TARGET_RETURN_IN_MEMORY |
26189 | #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
26190 | |
26191 | #undef TARGET_LEGITIMIZE_ADDRESS |
26192 | #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address |
26193 | |
26194 | #undef TARGET_ATTRIBUTE_TABLE |
26195 | #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
26196 | #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P |
26197 | #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true |
26198 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26199 | # undef TARGET_MERGE_DECL_ATTRIBUTES |
26200 | # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
26201 | #endif |
26202 | |
26203 | #undef TARGET_INVALID_CONVERSION |
26204 | #define TARGET_INVALID_CONVERSION ix86_invalid_conversion |
26205 | |
26206 | #undef TARGET_INVALID_UNARY_OP |
26207 | #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op |
26208 | |
26209 | #undef TARGET_INVALID_BINARY_OP |
26210 | #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op |
26211 | |
26212 | #undef TARGET_COMP_TYPE_ATTRIBUTES |
26213 | #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes |
26214 | |
26215 | #undef TARGET_INIT_BUILTINS |
26216 | #define TARGET_INIT_BUILTINS ix86_init_builtins |
26217 | #undef TARGET_BUILTIN_DECL |
26218 | #define TARGET_BUILTIN_DECL ix86_builtin_decl |
26219 | #undef TARGET_EXPAND_BUILTIN |
26220 | #define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
26221 | |
26222 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION |
26223 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ |
26224 | ix86_builtin_vectorized_function |
26225 | |
26226 | #undef TARGET_VECTORIZE_BUILTIN_GATHER |
26227 | #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather |
26228 | |
26229 | #undef TARGET_VECTORIZE_BUILTIN_SCATTER |
26230 | #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter |
26231 | |
26232 | #undef TARGET_BUILTIN_RECIPROCAL |
26233 | #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal |
26234 | |
26235 | #undef TARGET_ASM_FUNCTION_EPILOGUE |
26236 | #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue |
26237 | |
26238 | #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY |
26239 | #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \ |
26240 | ix86_print_patchable_function_entry |
26241 | |
26242 | #undef TARGET_ENCODE_SECTION_INFO |
26243 | #ifndef SUBTARGET_ENCODE_SECTION_INFO |
26244 | #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info |
26245 | #else |
26246 | #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO |
26247 | #endif |
26248 | |
26249 | #undef TARGET_ASM_OPEN_PAREN |
26250 | #define TARGET_ASM_OPEN_PAREN "" |
26251 | #undef TARGET_ASM_CLOSE_PAREN |
26252 | #define TARGET_ASM_CLOSE_PAREN "" |
26253 | |
26254 | #undef TARGET_ASM_BYTE_OP |
26255 | #define TARGET_ASM_BYTE_OP ASM_BYTE |
26256 | |
26257 | #undef TARGET_ASM_ALIGNED_HI_OP |
26258 | #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
26259 | #undef TARGET_ASM_ALIGNED_SI_OP |
26260 | #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG |
26261 | #ifdef ASM_QUAD |
26262 | #undef TARGET_ASM_ALIGNED_DI_OP |
26263 | #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD |
26264 | #endif |
26265 | |
26266 | #undef TARGET_PROFILE_BEFORE_PROLOGUE |
26267 | #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue |
26268 | |
26269 | #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME |
26270 | #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name |
26271 | |
26272 | #undef TARGET_ASM_UNALIGNED_HI_OP |
26273 | #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP |
26274 | #undef TARGET_ASM_UNALIGNED_SI_OP |
26275 | #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP |
26276 | #undef TARGET_ASM_UNALIGNED_DI_OP |
26277 | #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP |
26278 | |
26279 | #undef TARGET_PRINT_OPERAND |
26280 | #define TARGET_PRINT_OPERAND ix86_print_operand |
26281 | #undef TARGET_PRINT_OPERAND_ADDRESS |
26282 | #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address |
26283 | #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
26284 | #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p |
26285 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
26286 | #define i386_asm_output_addr_const_extra |
26287 | |
26288 | #undef TARGET_SCHED_INIT_GLOBAL |
26289 | #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global |
26290 | #undef TARGET_SCHED_ADJUST_COST |
26291 | #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost |
26292 | #undef TARGET_SCHED_ISSUE_RATE |
26293 | #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate |
26294 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
26295 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
26296 | ia32_multipass_dfa_lookahead |
26297 | #undef TARGET_SCHED_MACRO_FUSION_P |
26298 | #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p |
26299 | #undef TARGET_SCHED_MACRO_FUSION_PAIR_P |
26300 | #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p |
26301 | |
26302 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
26303 | #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall |
26304 | |
26305 | #undef TARGET_MEMMODEL_CHECK |
26306 | #define TARGET_MEMMODEL_CHECK ix86_memmodel_check |
26307 | |
26308 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
26309 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv |
26310 | |
26311 | #ifdef HAVE_AS_TLS |
26312 | #undef TARGET_HAVE_TLS |
26313 | #define TARGET_HAVE_TLS true |
26314 | #endif |
26315 | #undef TARGET_CANNOT_FORCE_CONST_MEM |
26316 | #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem |
26317 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P |
26318 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true |
26319 | |
26320 | #undef TARGET_DELEGITIMIZE_ADDRESS |
26321 | #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address |
26322 | |
26323 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P |
26324 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p |
26325 | |
26326 | #undef TARGET_MS_BITFIELD_LAYOUT_P |
26327 | #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p |
26328 | |
26329 | #if TARGET_MACHO |
26330 | #undef TARGET_BINDS_LOCAL_P |
26331 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p |
26332 | #else |
26333 | #undef TARGET_BINDS_LOCAL_P |
26334 | #define TARGET_BINDS_LOCAL_P ix86_binds_local_p |
26335 | #endif |
26336 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26337 | #undef TARGET_BINDS_LOCAL_P |
26338 | #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p |
26339 | #endif |
26340 | |
26341 | #undef TARGET_ASM_OUTPUT_MI_THUNK |
26342 | #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk |
26343 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
26344 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk |
26345 | |
26346 | #undef TARGET_ASM_FILE_START |
26347 | #define TARGET_ASM_FILE_START x86_file_start |
26348 | |
26349 | #undef TARGET_OPTION_OVERRIDE |
26350 | #define TARGET_OPTION_OVERRIDE ix86_option_override |
26351 | |
26352 | #undef TARGET_REGISTER_MOVE_COST |
26353 | #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost |
26354 | #undef TARGET_MEMORY_MOVE_COST |
26355 | #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost |
26356 | #undef TARGET_RTX_COSTS |
26357 | #define TARGET_RTX_COSTS ix86_rtx_costs |
26358 | #undef TARGET_ADDRESS_COST |
26359 | #define TARGET_ADDRESS_COST ix86_address_cost |
26360 | |
26361 | #undef TARGET_OVERLAP_OP_BY_PIECES_P |
26362 | #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true |
26363 | |
26364 | #undef TARGET_FLAGS_REGNUM |
26365 | #define TARGET_FLAGS_REGNUM FLAGS_REG |
26366 | #undef TARGET_FIXED_CONDITION_CODE_REGS |
26367 | #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs |
26368 | #undef TARGET_CC_MODES_COMPATIBLE |
26369 | #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible |
26370 | |
26371 | #undef TARGET_MACHINE_DEPENDENT_REORG |
26372 | #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg |
26373 | |
26374 | #undef TARGET_BUILD_BUILTIN_VA_LIST |
26375 | #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list |
26376 | |
26377 | #undef TARGET_FOLD_BUILTIN |
26378 | #define TARGET_FOLD_BUILTIN ix86_fold_builtin |
26379 | |
26380 | #undef TARGET_GIMPLE_FOLD_BUILTIN |
26381 | #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin |
26382 | |
26383 | #undef TARGET_COMPARE_VERSION_PRIORITY |
26384 | #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority |
26385 | |
26386 | #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY |
26387 | #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ |
26388 | ix86_generate_version_dispatcher_body |
26389 | |
26390 | #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER |
26391 | #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ |
26392 | ix86_get_function_versions_dispatcher |
26393 | |
26394 | #undef TARGET_ENUM_VA_LIST_P |
26395 | #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list |
26396 | |
26397 | #undef TARGET_FN_ABI_VA_LIST |
26398 | #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list |
26399 | |
26400 | #undef TARGET_CANONICAL_VA_LIST_TYPE |
26401 | #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type |
26402 | |
26403 | #undef TARGET_EXPAND_BUILTIN_VA_START |
26404 | #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start |
26405 | |
26406 | #undef TARGET_MD_ASM_ADJUST |
26407 | #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust |
26408 | |
26409 | #undef TARGET_C_EXCESS_PRECISION |
26410 | #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision |
26411 | #undef TARGET_C_BITINT_TYPE_INFO |
26412 | #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info |
26413 | #undef TARGET_PROMOTE_PROTOTYPES |
26414 | #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true |
26415 | #undef TARGET_PUSH_ARGUMENT |
26416 | #define TARGET_PUSH_ARGUMENT ix86_push_argument |
26417 | #undef TARGET_SETUP_INCOMING_VARARGS |
26418 | #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs |
26419 | #undef TARGET_MUST_PASS_IN_STACK |
26420 | #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack |
26421 | #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
26422 | #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args |
26423 | #undef TARGET_FUNCTION_ARG_ADVANCE |
26424 | #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance |
26425 | #undef TARGET_FUNCTION_ARG |
26426 | #define TARGET_FUNCTION_ARG ix86_function_arg |
26427 | #undef TARGET_INIT_PIC_REG |
26428 | #define TARGET_INIT_PIC_REG ix86_init_pic_reg |
26429 | #undef TARGET_USE_PSEUDO_PIC_REG |
26430 | #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg |
26431 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
26432 | #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary |
26433 | #undef TARGET_PASS_BY_REFERENCE |
26434 | #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference |
26435 | #undef TARGET_INTERNAL_ARG_POINTER |
26436 | #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer |
26437 | #undef TARGET_UPDATE_STACK_BOUNDARY |
26438 | #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary |
26439 | #undef TARGET_GET_DRAP_RTX |
26440 | #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx |
26441 | #undef TARGET_STRICT_ARGUMENT_NAMING |
26442 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true |
26443 | #undef TARGET_STATIC_CHAIN |
26444 | #define TARGET_STATIC_CHAIN ix86_static_chain |
26445 | #undef TARGET_TRAMPOLINE_INIT |
26446 | #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init |
26447 | #undef TARGET_RETURN_POPS_ARGS |
26448 | #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args |
26449 | |
26450 | #undef TARGET_WARN_FUNC_RETURN |
26451 | #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return |
26452 | |
26453 | #undef TARGET_LEGITIMATE_COMBINED_INSN |
26454 | #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn |
26455 | |
26456 | #undef TARGET_ASAN_SHADOW_OFFSET |
26457 | #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset |
26458 | |
26459 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
26460 | #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
26461 | |
26462 | #undef TARGET_SCALAR_MODE_SUPPORTED_P |
26463 | #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p |
26464 | |
26465 | #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P |
26466 | #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ |
26467 | ix86_libgcc_floating_mode_supported_p |
26468 | |
26469 | #undef TARGET_VECTOR_MODE_SUPPORTED_P |
26470 | #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p |
26471 | |
26472 | #undef TARGET_C_MODE_FOR_SUFFIX |
26473 | #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix |
26474 | |
26475 | #ifdef HAVE_AS_TLS |
26476 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
26477 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel |
26478 | #endif |
26479 | |
26480 | #ifdef SUBTARGET_INSERT_ATTRIBUTES |
26481 | #undef TARGET_INSERT_ATTRIBUTES |
26482 | #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
26483 | #endif |
26484 | |
26485 | #undef TARGET_MANGLE_TYPE |
26486 | #define TARGET_MANGLE_TYPE ix86_mangle_type |
26487 | |
26488 | #undef TARGET_EMIT_SUPPORT_TINFOS |
26489 | #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos |
26490 | |
26491 | #undef TARGET_STACK_PROTECT_GUARD |
26492 | #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard |
26493 | |
26494 | #if !TARGET_MACHO |
26495 | #undef TARGET_STACK_PROTECT_FAIL |
26496 | #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
26497 | #endif |
26498 | |
26499 | #undef TARGET_FUNCTION_VALUE |
26500 | #define TARGET_FUNCTION_VALUE ix86_function_value |
26501 | |
26502 | #undef TARGET_FUNCTION_VALUE_REGNO_P |
26503 | #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p |
26504 | |
26505 | #undef TARGET_ZERO_CALL_USED_REGS |
26506 | #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs |
26507 | |
26508 | #undef TARGET_PROMOTE_FUNCTION_MODE |
26509 | #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode |
26510 | |
26511 | #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE |
26512 | #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change |
26513 | |
26514 | #undef TARGET_MEMBER_TYPE_FORCES_BLK |
26515 | #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk |
26516 | |
26517 | #undef TARGET_INSTANTIATE_DECLS |
26518 | #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls |
26519 | |
26520 | #undef TARGET_SECONDARY_RELOAD |
26521 | #define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
26522 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
26523 | #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed |
26524 | #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
26525 | #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode |
26526 | |
26527 | #undef TARGET_CLASS_MAX_NREGS |
26528 | #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs |
26529 | |
26530 | #undef TARGET_PREFERRED_RELOAD_CLASS |
26531 | #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class |
26532 | #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS |
26533 | #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class |
26534 | #undef TARGET_CLASS_LIKELY_SPILLED_P |
26535 | #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p |
26536 | |
26537 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
26538 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
26539 | ix86_builtin_vectorization_cost |
26540 | #undef TARGET_VECTORIZE_VEC_PERM_CONST |
26541 | #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const |
26542 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
26543 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ |
26544 | ix86_preferred_simd_mode |
26545 | #undef TARGET_VECTORIZE_SPLIT_REDUCTION |
26546 | #define TARGET_VECTORIZE_SPLIT_REDUCTION \ |
26547 | ix86_split_reduction |
26548 | #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES |
26549 | #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ |
26550 | ix86_autovectorize_vector_modes |
26551 | #undef TARGET_VECTORIZE_GET_MASK_MODE |
26552 | #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode |
26553 | #undef TARGET_VECTORIZE_CREATE_COSTS |
26554 | #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs |
26555 | |
26556 | #undef TARGET_SET_CURRENT_FUNCTION |
26557 | #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function |
26558 | |
26559 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P |
26560 | #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p |
26561 | |
26562 | #undef TARGET_OPTION_SAVE |
26563 | #define TARGET_OPTION_SAVE ix86_function_specific_save |
26564 | |
26565 | #undef TARGET_OPTION_RESTORE |
26566 | #define TARGET_OPTION_RESTORE ix86_function_specific_restore |
26567 | |
26568 | #undef TARGET_OPTION_POST_STREAM_IN |
26569 | #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in |
26570 | |
26571 | #undef TARGET_OPTION_PRINT |
26572 | #define TARGET_OPTION_PRINT ix86_function_specific_print |
26573 | |
26574 | #undef TARGET_OPTION_FUNCTION_VERSIONS |
26575 | #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions |
26576 | |
26577 | #undef TARGET_CAN_INLINE_P |
26578 | #define TARGET_CAN_INLINE_P ix86_can_inline_p |
26579 | |
26580 | #undef TARGET_LEGITIMATE_ADDRESS_P |
26581 | #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p |
26582 | |
26583 | #undef TARGET_REGISTER_PRIORITY |
26584 | #define TARGET_REGISTER_PRIORITY ix86_register_priority |
26585 | |
26586 | #undef TARGET_REGISTER_USAGE_LEVELING_P |
26587 | #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true |
26588 | |
26589 | #undef TARGET_LEGITIMATE_CONSTANT_P |
26590 | #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p |
26591 | |
26592 | #undef TARGET_COMPUTE_FRAME_LAYOUT |
26593 | #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout |
26594 | |
26595 | #undef TARGET_FRAME_POINTER_REQUIRED |
26596 | #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
26597 | |
26598 | #undef TARGET_CAN_ELIMINATE |
26599 | #define TARGET_CAN_ELIMINATE ix86_can_eliminate |
26600 | |
26601 | #undef TARGET_EXTRA_LIVE_ON_ENTRY |
26602 | #define ix86_live_on_entry |
26603 | |
26604 | #undef TARGET_ASM_CODE_END |
26605 | #define TARGET_ASM_CODE_END ix86_code_end |
26606 | |
26607 | #undef TARGET_CONDITIONAL_REGISTER_USAGE |
26608 | #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage |
26609 | |
26610 | #undef TARGET_CANONICALIZE_COMPARISON |
26611 | #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison |
26612 | |
26613 | #undef TARGET_LOOP_UNROLL_ADJUST |
26614 | #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust |
26615 | |
26616 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
26617 | #undef TARGET_SPILL_CLASS |
26618 | #define TARGET_SPILL_CLASS ix86_spill_class |
26619 | |
26620 | #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN |
26621 | #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ |
26622 | ix86_simd_clone_compute_vecsize_and_simdlen |
26623 | |
26624 | #undef TARGET_SIMD_CLONE_ADJUST |
26625 | #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust |
26626 | |
26627 | #undef TARGET_SIMD_CLONE_USABLE |
26628 | #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable |
26629 | |
26630 | #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA |
26631 | #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa |
26632 | |
26633 | #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P |
26634 | #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ |
26635 | ix86_float_exceptions_rounding_supported_p |
26636 | |
26637 | #undef TARGET_MODE_EMIT |
26638 | #define TARGET_MODE_EMIT ix86_emit_mode_set |
26639 | |
26640 | #undef TARGET_MODE_NEEDED |
26641 | #define TARGET_MODE_NEEDED ix86_mode_needed |
26642 | |
26643 | #undef TARGET_MODE_AFTER |
26644 | #define TARGET_MODE_AFTER ix86_mode_after |
26645 | |
26646 | #undef TARGET_MODE_ENTRY |
26647 | #define TARGET_MODE_ENTRY ix86_mode_entry |
26648 | |
26649 | #undef TARGET_MODE_EXIT |
26650 | #define TARGET_MODE_EXIT ix86_mode_exit |
26651 | |
26652 | #undef TARGET_MODE_PRIORITY |
26653 | #define TARGET_MODE_PRIORITY ix86_mode_priority |
26654 | |
26655 | #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS |
26656 | #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true |
26657 | |
26658 | #undef TARGET_OFFLOAD_OPTIONS |
26659 | #define TARGET_OFFLOAD_OPTIONS \ |
26660 | ix86_offload_options |
26661 | |
26662 | #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT |
26663 | #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 |
26664 | |
26665 | #undef TARGET_OPTAB_SUPPORTED_P |
26666 | #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p |
26667 | |
26668 | #undef TARGET_HARD_REGNO_SCRATCH_OK |
26669 | #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok |
26670 | |
26671 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS |
26672 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST |
26673 | |
26674 | #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID |
26675 | #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid |
26676 | |
26677 | #undef TARGET_INIT_LIBFUNCS |
26678 | #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs |
26679 | |
26680 | #undef TARGET_EXPAND_DIVMOD_LIBFUNC |
26681 | #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc |
26682 | |
26683 | #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST |
26684 | #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost |
26685 | |
26686 | #undef TARGET_NOCE_CONVERSION_PROFITABLE_P |
26687 | #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p |
26688 | |
26689 | #undef TARGET_HARD_REGNO_NREGS |
26690 | #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs |
26691 | #undef TARGET_HARD_REGNO_MODE_OK |
26692 | #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok |
26693 | |
26694 | #undef TARGET_MODES_TIEABLE_P |
26695 | #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p |
26696 | |
26697 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
26698 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ |
26699 | ix86_hard_regno_call_part_clobbered |
26700 | |
26701 | #undef TARGET_INSN_CALLEE_ABI |
26702 | #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi |
26703 | |
26704 | #undef TARGET_CAN_CHANGE_MODE_CLASS |
26705 | #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class |
26706 | |
26707 | #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT |
26708 | #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment |
26709 | |
26710 | #undef TARGET_STATIC_RTX_ALIGNMENT |
26711 | #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment |
26712 | #undef TARGET_CONSTANT_ALIGNMENT |
26713 | #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment |
26714 | |
26715 | #undef TARGET_EMPTY_RECORD_P |
26716 | #define TARGET_EMPTY_RECORD_P ix86_is_empty_record |
26717 | |
26718 | #undef TARGET_WARN_PARAMETER_PASSING_ABI |
26719 | #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi |
26720 | |
26721 | #undef TARGET_GET_MULTILIB_ABI_NAME |
26722 | #define TARGET_GET_MULTILIB_ABI_NAME \ |
26723 | ix86_get_multilib_abi_name |
26724 | |
26725 | #undef TARGET_IFUNC_REF_LOCAL_OK |
26726 | #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok |
26727 | |
26728 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26729 | # undef TARGET_ASM_RELOC_RW_MASK |
26730 | # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask |
26731 | #endif |
26732 | |
26733 | #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES |
26734 | #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses |
26735 | |
26736 | #undef TARGET_MEMTAG_ADD_TAG |
26737 | #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag |
26738 | |
26739 | #undef TARGET_MEMTAG_SET_TAG |
26740 | #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag |
26741 | |
26742 | #undef TARGET_MEMTAG_EXTRACT_TAG |
26743 | #define ix86_memtag_extract_tag |
26744 | |
26745 | #undef TARGET_MEMTAG_UNTAGGED_POINTER |
26746 | #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer |
26747 | |
26748 | #undef TARGET_MEMTAG_TAG_SIZE |
26749 | #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size |
26750 | |
26751 | static bool |
26752 | ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) |
26753 | { |
26754 | #ifdef OPTION_GLIBC |
26755 | if (OPTION_GLIBC) |
26756 | return (built_in_function)fcode == BUILT_IN_MEMPCPY; |
26757 | else |
26758 | return false; |
26759 | #else |
26760 | return false; |
26761 | #endif |
26762 | } |
26763 | |
26764 | #undef TARGET_LIBC_HAS_FAST_FUNCTION |
26765 | #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function |
26766 | |
26767 | static unsigned |
26768 | ix86_libm_function_max_error (unsigned cfn, machine_mode mode, |
26769 | bool boundary_p) |
26770 | { |
26771 | #ifdef OPTION_GLIBC |
26772 | bool glibc_p = OPTION_GLIBC; |
26773 | #else |
26774 | bool glibc_p = false; |
26775 | #endif |
26776 | if (glibc_p) |
26777 | { |
26778 | /* If __FAST_MATH__ is defined, glibc provides libmvec. */ |
26779 | unsigned int libmvec_ret = 0; |
26780 | if (!flag_trapping_math |
26781 | && flag_unsafe_math_optimizations |
26782 | && flag_finite_math_only |
26783 | && !flag_signed_zeros |
26784 | && !flag_errno_math) |
26785 | switch (cfn) |
26786 | { |
26787 | CASE_CFN_COS: |
26788 | CASE_CFN_COS_FN: |
26789 | CASE_CFN_SIN: |
26790 | CASE_CFN_SIN_FN: |
26791 | if (!boundary_p) |
26792 | { |
26793 | /* With non-default rounding modes, libmvec provides |
26794 | complete garbage in results. E.g. |
26795 | _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD |
26796 | returns 0.00333309174f rather than 1.40129846e-45f. */ |
26797 | if (flag_rounding_math) |
26798 | return ~0U; |
26799 | /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html |
26800 | claims libmvec maximum error is 4ulps. |
26801 | My own random testing indicates 2ulps for SFmode and |
26802 | 0.5ulps for DFmode, but let's go with the 4ulps. */ |
26803 | libmvec_ret = 4; |
26804 | } |
26805 | break; |
26806 | default: |
26807 | break; |
26808 | } |
26809 | unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode, |
26810 | boundary_p); |
26811 | return MAX (ret, libmvec_ret); |
26812 | } |
26813 | return default_libm_function_max_error (cfn, mode, boundary_p); |
26814 | } |
26815 | |
26816 | #undef TARGET_LIBM_FUNCTION_MAX_ERROR |
26817 | #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error |
26818 | |
26819 | #if CHECKING_P |
26820 | #undef TARGET_RUN_TARGET_SELFTESTS |
26821 | #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests |
26822 | #endif /* #if CHECKING_P */ |
26823 | |
26824 | struct gcc_target targetm = TARGET_INITIALIZER; |
26825 | |
26826 | #include "gt-i386.h" |
26827 | |