1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#define INCLUDE_STRING
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "rtl.h"
28#include "tree.h"
29#include "memmodel.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "cfgloop.h"
33#include "df.h"
34#include "tm_p.h"
35#include "stringpool.h"
36#include "expmed.h"
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "cgraph.h"
42#include "diagnostic.h"
43#include "cfgbuild.h"
44#include "alias.h"
45#include "fold-const.h"
46#include "attribs.h"
47#include "calls.h"
48#include "stor-layout.h"
49#include "varasm.h"
50#include "output.h"
51#include "insn-attr.h"
52#include "flags.h"
53#include "except.h"
54#include "explow.h"
55#include "expr.h"
56#include "cfgrtl.h"
57#include "common/common-target.h"
58#include "langhooks.h"
59#include "reload.h"
60#include "gimplify.h"
61#include "dwarf2.h"
62#include "tm-constrs.h"
63#include "cselib.h"
64#include "sched-int.h"
65#include "opts.h"
66#include "tree-pass.h"
67#include "context.h"
68#include "pass_manager.h"
69#include "target-globals.h"
70#include "gimple-iterator.h"
71#include "gimple-fold.h"
72#include "tree-vectorizer.h"
73#include "shrink-wrap.h"
74#include "builtins.h"
75#include "rtl-iter.h"
76#include "tree-iterator.h"
77#include "dbgcnt.h"
78#include "case-cfn-macros.h"
79#include "dojump.h"
80#include "fold-const-call.h"
81#include "tree-vrp.h"
82#include "tree-ssanames.h"
83#include "selftest.h"
84#include "selftest-rtl.h"
85#include "print-rtl.h"
86#include "intl.h"
87#include "ifcvt.h"
88#include "symbol-summary.h"
89#include "sreal.h"
90#include "ipa-cp.h"
91#include "ipa-prop.h"
92#include "ipa-fnsummary.h"
93#include "wide-int-bitmask.h"
94#include "tree-vector-builder.h"
95#include "debug.h"
96#include "dwarf2out.h"
97#include "i386-options.h"
98#include "i386-builtins.h"
99#include "i386-expand.h"
100#include "i386-features.h"
101#include "function-abi.h"
102#include "rtl-error.h"
103
104/* This file should be included last. */
105#include "target-def.h"
106
107static rtx legitimize_dllimport_symbol (rtx, bool);
108static rtx legitimize_pe_coff_extern_decl (rtx, bool);
109static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
110static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
111
112
113#ifndef CHECK_STACK_LIMIT
114#define CHECK_STACK_LIMIT (-1)
115#endif
116
117/* Return index of given mode in mult and division cost tables. */
118#define MODE_INDEX(mode) \
119 ((mode) == QImode ? 0 \
120 : (mode) == HImode ? 1 \
121 : (mode) == SImode ? 2 \
122 : (mode) == DImode ? 3 \
123 : 4)
124
125
126/* Set by -mtune. */
127const struct processor_costs *ix86_tune_cost = NULL;
128
129/* Set by -mtune or -Os. */
130const struct processor_costs *ix86_cost = NULL;
131
132/* In case the average insn count for single function invocation is
133 lower than this constant, emit fast (but longer) prologue and
134 epilogue code. */
135#define FAST_PROLOGUE_INSN_COUNT 20
136
137/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
138static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
139static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
140static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
141
142/* Array of the smallest class containing reg number REGNO, indexed by
143 REGNO. Used by REGNO_REG_CLASS in i386.h. */
144
145enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
146{
147 /* ax, dx, cx, bx */
148 AREG, DREG, CREG, BREG,
149 /* si, di, bp, sp */
150 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
151 /* FP registers */
152 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
153 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
154 /* arg pointer, flags, fpsr, frame */
155 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
156 /* SSE registers */
157 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
158 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
159 /* MMX registers */
160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
162 /* REX registers */
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
165 /* SSE REX registers */
166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
168 /* AVX-512 SSE registers */
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
173 /* Mask registers. */
174 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
176 /* REX2 registers */
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
181};
182
183/* The "default" register map used in 32bit mode. */
184
185int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
186{
187 /* general regs */
188 0, 2, 1, 3, 6, 7, 4, 5,
189 /* fp regs */
190 12, 13, 14, 15, 16, 17, 18, 19,
191 /* arg, flags, fpsr, frame */
192 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
194 /* SSE */
195 21, 22, 23, 24, 25, 26, 27, 28,
196 /* MMX */
197 29, 30, 31, 32, 33, 34, 35, 36,
198 /* extended integer registers */
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 /* extended sse registers */
202 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
204 /* AVX-512 registers 16-23 */
205 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
207 /* AVX-512 registers 24-31 */
208 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
210 /* Mask registers */
211 93, 94, 95, 96, 97, 98, 99, 100
212};
213
214/* The "default" register map used in 64bit mode. */
215
216int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
217{
218 /* general regs */
219 0, 1, 2, 3, 4, 5, 6, 7,
220 /* fp regs */
221 33, 34, 35, 36, 37, 38, 39, 40,
222 /* arg, flags, fpsr, frame */
223 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
225 /* SSE */
226 17, 18, 19, 20, 21, 22, 23, 24,
227 /* MMX */
228 41, 42, 43, 44, 45, 46, 47, 48,
229 /* extended integer registers */
230 8, 9, 10, 11, 12, 13, 14, 15,
231 /* extended SSE registers */
232 25, 26, 27, 28, 29, 30, 31, 32,
233 /* AVX-512 registers 16-23 */
234 67, 68, 69, 70, 71, 72, 73, 74,
235 /* AVX-512 registers 24-31 */
236 75, 76, 77, 78, 79, 80, 81, 82,
237 /* Mask registers */
238 118, 119, 120, 121, 122, 123, 124, 125,
239 /* rex2 extend interger registers */
240 130, 131, 132, 133, 134, 135, 136, 137,
241 138, 139, 140, 141, 142, 143, 144, 145
242};
243
244/* Define the register numbers to be used in Dwarf debugging information.
245 The SVR4 reference port C compiler uses the following register numbers
246 in its Dwarf output code:
247 0 for %eax (gcc regno = 0)
248 1 for %ecx (gcc regno = 2)
249 2 for %edx (gcc regno = 1)
250 3 for %ebx (gcc regno = 3)
251 4 for %esp (gcc regno = 7)
252 5 for %ebp (gcc regno = 6)
253 6 for %esi (gcc regno = 4)
254 7 for %edi (gcc regno = 5)
255 The following three DWARF register numbers are never generated by
256 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
257 believed these numbers have these meanings.
258 8 for %eip (no gcc equivalent)
259 9 for %eflags (gcc regno = 17)
260 10 for %trapno (no gcc equivalent)
261 It is not at all clear how we should number the FP stack registers
262 for the x86 architecture. If the version of SDB on x86/svr4 were
263 a bit less brain dead with respect to floating-point then we would
264 have a precedent to follow with respect to DWARF register numbers
265 for x86 FP registers, but the SDB on x86/svr4 was so completely
266 broken with respect to FP registers that it is hardly worth thinking
267 of it as something to strive for compatibility with.
268 The version of x86/svr4 SDB I had does (partially)
269 seem to believe that DWARF register number 11 is associated with
270 the x86 register %st(0), but that's about all. Higher DWARF
271 register numbers don't seem to be associated with anything in
272 particular, and even for DWARF regno 11, SDB only seemed to under-
273 stand that it should say that a variable lives in %st(0) (when
274 asked via an `=' command) if we said it was in DWARF regno 11,
275 but SDB still printed garbage when asked for the value of the
276 variable in question (via a `/' command).
277 (Also note that the labels SDB printed for various FP stack regs
278 when doing an `x' command were all wrong.)
279 Note that these problems generally don't affect the native SVR4
280 C compiler because it doesn't allow the use of -O with -g and
281 because when it is *not* optimizing, it allocates a memory
282 location for each floating-point variable, and the memory
283 location is what gets described in the DWARF AT_location
284 attribute for the variable in question.
285 Regardless of the severe mental illness of the x86/svr4 SDB, we
286 do something sensible here and we use the following DWARF
287 register numbers. Note that these are all stack-top-relative
288 numbers.
289 11 for %st(0) (gcc regno = 8)
290 12 for %st(1) (gcc regno = 9)
291 13 for %st(2) (gcc regno = 10)
292 14 for %st(3) (gcc regno = 11)
293 15 for %st(4) (gcc regno = 12)
294 16 for %st(5) (gcc regno = 13)
295 17 for %st(6) (gcc regno = 14)
296 18 for %st(7) (gcc regno = 15)
297*/
298int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
299{
300 /* general regs */
301 0, 2, 1, 3, 6, 7, 5, 4,
302 /* fp regs */
303 11, 12, 13, 14, 15, 16, 17, 18,
304 /* arg, flags, fpsr, frame */
305 IGNORED_DWARF_REGNUM, 9,
306 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
307 /* SSE registers */
308 21, 22, 23, 24, 25, 26, 27, 28,
309 /* MMX registers */
310 29, 30, 31, 32, 33, 34, 35, 36,
311 /* extended integer registers */
312 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
314 /* extended sse registers */
315 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
317 /* AVX-512 registers 16-23 */
318 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
320 /* AVX-512 registers 24-31 */
321 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
323 /* Mask registers */
324 93, 94, 95, 96, 97, 98, 99, 100
325};
326
327/* Define parameter passing and return registers. */
328
329static int const x86_64_int_parameter_registers[6] =
330{
331 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
332};
333
334static int const x86_64_ms_abi_int_parameter_registers[4] =
335{
336 CX_REG, DX_REG, R8_REG, R9_REG
337};
338
339static int const x86_64_int_return_registers[4] =
340{
341 AX_REG, DX_REG, DI_REG, SI_REG
342};
343
344/* Define the structure for the machine field in struct function. */
345
346struct GTY(()) stack_local_entry {
347 unsigned short mode;
348 unsigned short n;
349 rtx rtl;
350 struct stack_local_entry *next;
351};
352
353/* Which cpu are we scheduling for. */
354enum attr_cpu ix86_schedule;
355
356/* Which cpu are we optimizing for. */
357enum processor_type ix86_tune;
358
359/* Which instruction set architecture to use. */
360enum processor_type ix86_arch;
361
362/* True if processor has SSE prefetch instruction. */
363unsigned char ix86_prefetch_sse;
364
365/* Preferred alignment for stack boundary in bits. */
366unsigned int ix86_preferred_stack_boundary;
367
368/* Alignment for incoming stack boundary in bits specified at
369 command line. */
370unsigned int ix86_user_incoming_stack_boundary;
371
372/* Default alignment for incoming stack boundary in bits. */
373unsigned int ix86_default_incoming_stack_boundary;
374
375/* Alignment for incoming stack boundary in bits. */
376unsigned int ix86_incoming_stack_boundary;
377
378/* True if there is no direct access to extern symbols. */
379bool ix86_has_no_direct_extern_access;
380
381/* Calling abi specific va_list type nodes. */
382tree sysv_va_list_type_node;
383tree ms_va_list_type_node;
384
385/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
386char internal_label_prefix[16];
387int internal_label_prefix_len;
388
389/* Fence to use after loop using movnt. */
390tree x86_mfence;
391
392/* Register class used for passing given 64bit part of the argument.
393 These represent classes as documented by the PS ABI, with the exception
394 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
395 use SF or DFmode move instead of DImode to avoid reformatting penalties.
396
397 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
398 whenever possible (upper half does contain padding). */
399enum x86_64_reg_class
400 {
401 X86_64_NO_CLASS,
402 X86_64_INTEGER_CLASS,
403 X86_64_INTEGERSI_CLASS,
404 X86_64_SSE_CLASS,
405 X86_64_SSEHF_CLASS,
406 X86_64_SSESF_CLASS,
407 X86_64_SSEDF_CLASS,
408 X86_64_SSEUP_CLASS,
409 X86_64_X87_CLASS,
410 X86_64_X87UP_CLASS,
411 X86_64_COMPLEX_X87_CLASS,
412 X86_64_MEMORY_CLASS
413 };
414
415#define MAX_CLASSES 8
416
417/* Table of constants used by fldpi, fldln2, etc.... */
418static REAL_VALUE_TYPE ext_80387_constants_table [5];
419static bool ext_80387_constants_init;
420
421
422static rtx ix86_function_value (const_tree, const_tree, bool);
423static bool ix86_function_value_regno_p (const unsigned int);
424static unsigned int ix86_function_arg_boundary (machine_mode,
425 const_tree);
426static rtx ix86_static_chain (const_tree, bool);
427static int ix86_function_regparm (const_tree, const_tree);
428static void ix86_compute_frame_layout (void);
429static tree ix86_canonical_va_list_type (tree);
430static unsigned int split_stack_prologue_scratch_regno (void);
431static bool i386_asm_output_addr_const_extra (FILE *, rtx);
432
433static bool ix86_can_inline_p (tree, tree);
434static unsigned int ix86_minimum_incoming_stack_boundary (bool);
435
436
437/* Whether -mtune= or -march= were specified */
438int ix86_tune_defaulted;
439int ix86_arch_specified;
440
441/* Return true if a red-zone is in use. We can't use red-zone when
442 there are local indirect jumps, like "indirect_jump" or "tablejump",
443 which jumps to another place in the function, since "call" in the
444 indirect thunk pushes the return address onto stack, destroying
445 red-zone.
446
447 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
448 for CALL, in red-zone, we can allow local indirect jumps with
449 indirect thunk. */
450
451bool
452ix86_using_red_zone (void)
453{
454 return (TARGET_RED_ZONE
455 && !TARGET_64BIT_MS_ABI
456 && (!cfun->machine->has_local_indirect_jump
457 || cfun->machine->indirect_branch_type == indirect_branch_keep));
458}
459
460/* Return true, if profiling code should be emitted before
461 prologue. Otherwise it returns false.
462 Note: For x86 with "hotfix" it is sorried. */
463static bool
464ix86_profile_before_prologue (void)
465{
466 return flag_fentry != 0;
467}
468
469/* Update register usage after having seen the compiler flags. */
470
471static void
472ix86_conditional_register_usage (void)
473{
474 int i, c_mask;
475
476 /* If there are no caller-saved registers, preserve all registers.
477 except fixed_regs and registers used for function return value
478 since aggregate_value_p checks call_used_regs[regno] on return
479 value. */
480 if (cfun
481 && (cfun->machine->call_saved_registers
482 == TYPE_NO_CALLER_SAVED_REGISTERS))
483 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
484 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
485 call_used_regs[i] = 0;
486
487 /* For 32-bit targets, disable the REX registers. */
488 if (! TARGET_64BIT)
489 {
490 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
492 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
493 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
494 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
495 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
496 }
497
498 /* See the definition of CALL_USED_REGISTERS in i386.h. */
499 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
500
501 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
502
503 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
504 {
505 /* Set/reset conditionally defined registers from
506 CALL_USED_REGISTERS initializer. */
507 if (call_used_regs[i] > 1)
508 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
509
510 /* Calculate registers of CLOBBERED_REGS register set
511 as call used registers from GENERAL_REGS register set. */
512 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i)
513 && call_used_regs[i])
514 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i);
515 }
516
517 /* If MMX is disabled, disable the registers. */
518 if (! TARGET_MMX)
519 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
520
521 /* If SSE is disabled, disable the registers. */
522 if (! TARGET_SSE)
523 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
524
525 /* If the FPU is disabled, disable the registers. */
526 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
527 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
528
529 /* If AVX512F is disabled, disable the registers. */
530 if (! TARGET_AVX512F)
531 {
532 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
533 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
534
535 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
536 }
537
538 /* If APX is disabled, disable the registers. */
539 if (! (TARGET_APX_EGPR && TARGET_64BIT))
540 {
541 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
542 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
543 }
544}
545
546/* Canonicalize a comparison from one we don't have to one we do have. */
547
548static void
549ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
550 bool op0_preserve_value)
551{
552 /* The order of operands in x87 ficom compare is forced by combine in
553 simplify_comparison () function. Float operator is treated as RTX_OBJ
554 with a precedence over other operators and is always put in the first
555 place. Swap condition and operands to match ficom instruction. */
556 if (!op0_preserve_value
557 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
558 {
559 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
560
561 /* We are called only for compares that are split to SAHF instruction.
562 Ensure that we have setcc/jcc insn for the swapped condition. */
563 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
564 {
565 std::swap (a&: *op0, b&: *op1);
566 *code = (int) scode;
567 }
568 }
569}
570
571
572/* Hook to determine if one function can safely inline another. */
573
574static bool
575ix86_can_inline_p (tree caller, tree callee)
576{
577 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
578 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
579
580 /* Changes of those flags can be tolerated for always inlines. Lets hope
581 user knows what he is doing. */
582 unsigned HOST_WIDE_INT always_inline_safe_mask
583 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
584 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
585 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
586 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
587 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
588 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
589 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
590
591
592 if (!callee_tree)
593 callee_tree = target_option_default_node;
594 if (!caller_tree)
595 caller_tree = target_option_default_node;
596 if (callee_tree == caller_tree)
597 return true;
598
599 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
600 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
601 bool ret = false;
602 bool always_inline
603 = (DECL_DISREGARD_INLINE_LIMITS (callee)
604 && lookup_attribute (attr_name: "always_inline",
605 DECL_ATTRIBUTES (callee)));
606
607 /* If callee only uses GPRs, ignore MASK_80387. */
608 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
609 always_inline_safe_mask |= MASK_80387;
610
611 cgraph_node *callee_node = cgraph_node::get (decl: callee);
612 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
613 function can inline a SSE2 function but a SSE2 function can't inline
614 a SSE4 function. */
615 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
616 != callee_opts->x_ix86_isa_flags)
617 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
618 != callee_opts->x_ix86_isa_flags2))
619 ret = false;
620
621 /* See if we have the same non-isa options. */
622 else if ((!always_inline
623 && caller_opts->x_target_flags != callee_opts->x_target_flags)
624 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
625 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
626 ret = false;
627
628 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
629 /* If the calle doesn't use FP expressions differences in
630 ix86_fpmath can be ignored. We are called from FEs
631 for multi-versioning call optimization, so beware of
632 ipa_fn_summaries not available. */
633 && (! ipa_fn_summaries
634 || ipa_fn_summaries->get (node: callee_node) == NULL
635 || ipa_fn_summaries->get (node: callee_node)->fp_expressions))
636 ret = false;
637
638 /* At this point we cannot identify whether arch or tune setting
639 comes from target attribute or not. So the most conservative way
640 is to allow the callee that uses default arch and tune string to
641 be inlined. */
642 else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64")
643 && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic"))
644 ret = true;
645
646 /* See if arch, tune, etc. are the same. As previous ISA flags already
647 checks if callee's ISA is subset of caller's, do not block
648 always_inline attribute for callee even it has different arch. */
649 else if (!always_inline && caller_opts->arch != callee_opts->arch)
650 ret = false;
651
652 else if (!always_inline && caller_opts->tune != callee_opts->tune)
653 ret = false;
654
655 else if (!always_inline
656 && caller_opts->branch_cost != callee_opts->branch_cost)
657 ret = false;
658
659 else
660 ret = true;
661
662 return ret;
663}
664
665/* Return true if this goes in large data/bss. */
666
667static bool
668ix86_in_large_data_p (tree exp)
669{
670 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
671 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
672 return false;
673
674 if (exp == NULL_TREE)
675 return false;
676
677 /* Functions are never large data. */
678 if (TREE_CODE (exp) == FUNCTION_DECL)
679 return false;
680
681 /* Automatic variables are never large data. */
682 if (VAR_P (exp) && !is_global_var (t: exp))
683 return false;
684
685 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
686 {
687 const char *section = DECL_SECTION_NAME (exp);
688 if (strcmp (s1: section, s2: ".ldata") == 0
689 || strcmp (s1: section, s2: ".lbss") == 0)
690 return true;
691 return false;
692 }
693 else
694 {
695 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
696
697 /* If this is an incomplete type with size 0, then we can't put it
698 in data because it might be too big when completed. Also,
699 int_size_in_bytes returns -1 if size can vary or is larger than
700 an integer in which case also it is safer to assume that it goes in
701 large data. */
702 if (size <= 0 || size > ix86_section_threshold)
703 return true;
704 }
705
706 return false;
707}
708
709/* i386-specific section flag to mark large sections. */
710#define SECTION_LARGE SECTION_MACH_DEP
711
712/* Switch to the appropriate section for output of DECL.
713 DECL is either a `VAR_DECL' node or a constant of some sort.
714 RELOC indicates whether forming the initial value of DECL requires
715 link-time relocations. */
716
717ATTRIBUTE_UNUSED static section *
718x86_64_elf_select_section (tree decl, int reloc,
719 unsigned HOST_WIDE_INT align)
720{
721 if (ix86_in_large_data_p (exp: decl))
722 {
723 const char *sname = NULL;
724 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
725 switch (categorize_decl_for_section (decl, reloc))
726 {
727 case SECCAT_DATA:
728 sname = ".ldata";
729 break;
730 case SECCAT_DATA_REL:
731 sname = ".ldata.rel";
732 break;
733 case SECCAT_DATA_REL_LOCAL:
734 sname = ".ldata.rel.local";
735 break;
736 case SECCAT_DATA_REL_RO:
737 sname = ".ldata.rel.ro";
738 break;
739 case SECCAT_DATA_REL_RO_LOCAL:
740 sname = ".ldata.rel.ro.local";
741 break;
742 case SECCAT_BSS:
743 sname = ".lbss";
744 flags |= SECTION_BSS;
745 break;
746 case SECCAT_RODATA:
747 case SECCAT_RODATA_MERGE_STR:
748 case SECCAT_RODATA_MERGE_STR_INIT:
749 case SECCAT_RODATA_MERGE_CONST:
750 sname = ".lrodata";
751 flags &= ~SECTION_WRITE;
752 break;
753 case SECCAT_SRODATA:
754 case SECCAT_SDATA:
755 case SECCAT_SBSS:
756 gcc_unreachable ();
757 case SECCAT_TEXT:
758 case SECCAT_TDATA:
759 case SECCAT_TBSS:
760 /* We don't split these for medium model. Place them into
761 default sections and hope for best. */
762 break;
763 }
764 if (sname)
765 {
766 /* We might get called with string constants, but get_named_section
767 doesn't like them as they are not DECLs. Also, we need to set
768 flags in that case. */
769 if (!DECL_P (decl))
770 return get_section (sname, flags, NULL);
771 return get_named_section (decl, sname, reloc);
772 }
773 }
774 return default_elf_select_section (decl, reloc, align);
775}
776
777/* Select a set of attributes for section NAME based on the properties
778 of DECL and whether or not RELOC indicates that DECL's initializer
779 might contain runtime relocations. */
780
781static unsigned int ATTRIBUTE_UNUSED
782x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
783{
784 unsigned int flags = default_section_type_flags (decl, name, reloc);
785
786 if (ix86_in_large_data_p (exp: decl))
787 flags |= SECTION_LARGE;
788
789 if (decl == NULL_TREE
790 && (strcmp (s1: name, s2: ".ldata.rel.ro") == 0
791 || strcmp (s1: name, s2: ".ldata.rel.ro.local") == 0))
792 flags |= SECTION_RELRO;
793
794 if (strcmp (s1: name, s2: ".lbss") == 0
795 || startswith (str: name, prefix: ".lbss.")
796 || startswith (str: name, prefix: ".gnu.linkonce.lb."))
797 flags |= SECTION_BSS;
798
799 return flags;
800}
801
802/* Build up a unique section name, expressed as a
803 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
804 RELOC indicates whether the initial value of EXP requires
805 link-time relocations. */
806
807static void ATTRIBUTE_UNUSED
808x86_64_elf_unique_section (tree decl, int reloc)
809{
810 if (ix86_in_large_data_p (exp: decl))
811 {
812 const char *prefix = NULL;
813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
814 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
815
816 switch (categorize_decl_for_section (decl, reloc))
817 {
818 case SECCAT_DATA:
819 case SECCAT_DATA_REL:
820 case SECCAT_DATA_REL_LOCAL:
821 case SECCAT_DATA_REL_RO:
822 case SECCAT_DATA_REL_RO_LOCAL:
823 prefix = one_only ? ".ld" : ".ldata";
824 break;
825 case SECCAT_BSS:
826 prefix = one_only ? ".lb" : ".lbss";
827 break;
828 case SECCAT_RODATA:
829 case SECCAT_RODATA_MERGE_STR:
830 case SECCAT_RODATA_MERGE_STR_INIT:
831 case SECCAT_RODATA_MERGE_CONST:
832 prefix = one_only ? ".lr" : ".lrodata";
833 break;
834 case SECCAT_SRODATA:
835 case SECCAT_SDATA:
836 case SECCAT_SBSS:
837 gcc_unreachable ();
838 case SECCAT_TEXT:
839 case SECCAT_TDATA:
840 case SECCAT_TBSS:
841 /* We don't split these for medium model. Place them into
842 default sections and hope for best. */
843 break;
844 }
845 if (prefix)
846 {
847 const char *name, *linkonce;
848 char *string;
849
850 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
851 name = targetm.strip_name_encoding (name);
852
853 /* If we're using one_only, then there needs to be a .gnu.linkonce
854 prefix to the section name. */
855 linkonce = one_only ? ".gnu.linkonce" : "";
856
857 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
858
859 set_decl_section_name (decl, string);
860 return;
861 }
862 }
863 default_unique_section (decl, reloc);
864}
865
866#ifdef COMMON_ASM_OP
867
868#ifndef LARGECOMM_SECTION_ASM_OP
869#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
870#endif
871
872/* This says how to output assembler code to declare an
873 uninitialized external linkage data object.
874
875 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
876 large objects. */
877void
878x86_elf_aligned_decl_common (FILE *file, tree decl,
879 const char *name, unsigned HOST_WIDE_INT size,
880 unsigned align)
881{
882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
883 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
884 && size > (unsigned int)ix86_section_threshold)
885 {
886 switch_to_section (get_named_section (decl, ".lbss", 0));
887 fputs (LARGECOMM_SECTION_ASM_OP, stream: file);
888 }
889 else
890 fputs (COMMON_ASM_OP, stream: file);
891 assemble_name (file, name);
892 fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
893 size, align / BITS_PER_UNIT);
894}
895#endif
896
897/* Utility function for targets to use in implementing
898 ASM_OUTPUT_ALIGNED_BSS. */
899
900void
901x86_output_aligned_bss (FILE *file, tree decl, const char *name,
902 unsigned HOST_WIDE_INT size, unsigned align)
903{
904 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
905 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
906 && size > (unsigned int)ix86_section_threshold)
907 switch_to_section (get_named_section (decl, ".lbss", 0));
908 else
909 switch_to_section (bss_section);
910 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
911#ifdef ASM_DECLARE_OBJECT_NAME
912 last_assemble_variable_decl = decl;
913 ASM_DECLARE_OBJECT_NAME (file, name, decl);
914#else
915 /* Standard thing is just output label for the object. */
916 ASM_OUTPUT_LABEL (file, name);
917#endif /* ASM_DECLARE_OBJECT_NAME */
918 ASM_OUTPUT_SKIP (file, size ? size : 1);
919}
920
921/* Decide whether we must probe the stack before any space allocation
922 on this target. It's essentially TARGET_STACK_PROBE except when
923 -fstack-check causes the stack to be already probed differently. */
924
925bool
926ix86_target_stack_probe (void)
927{
928 /* Do not probe the stack twice if static stack checking is enabled. */
929 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
930 return false;
931
932 return TARGET_STACK_PROBE;
933}
934
935/* Decide whether we can make a sibling call to a function. DECL is the
936 declaration of the function being targeted by the call and EXP is the
937 CALL_EXPR representing the call. */
938
939static bool
940ix86_function_ok_for_sibcall (tree decl, tree exp)
941{
942 tree type, decl_or_type;
943 rtx a, b;
944 bool bind_global = decl && !targetm.binds_local_p (decl);
945
946 if (ix86_function_naked (fn: current_function_decl))
947 return false;
948
949 /* Sibling call isn't OK if there are no caller-saved registers
950 since all registers must be preserved before return. */
951 if (cfun->machine->call_saved_registers
952 == TYPE_NO_CALLER_SAVED_REGISTERS)
953 return false;
954
955 /* If we are generating position-independent code, we cannot sibcall
956 optimize direct calls to global functions, as the PLT requires
957 %ebx be live. (Darwin does not have a PLT.) */
958 if (!TARGET_MACHO
959 && !TARGET_64BIT
960 && flag_pic
961 && flag_plt
962 && bind_global)
963 return false;
964
965 /* If we need to align the outgoing stack, then sibcalling would
966 unalign the stack, which may break the called function. */
967 if (ix86_minimum_incoming_stack_boundary (true)
968 < PREFERRED_STACK_BOUNDARY)
969 return false;
970
971 if (decl)
972 {
973 decl_or_type = decl;
974 type = TREE_TYPE (decl);
975 }
976 else
977 {
978 /* We're looking at the CALL_EXPR, we need the type of the function. */
979 type = CALL_EXPR_FN (exp); /* pointer expression */
980 type = TREE_TYPE (type); /* pointer type */
981 type = TREE_TYPE (type); /* function type */
982 decl_or_type = type;
983 }
984
985 /* Sibling call isn't OK if callee has no callee-saved registers
986 and the calling function has callee-saved registers. */
987 if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
988 && (cfun->machine->call_saved_registers
989 != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
990 && lookup_attribute (attr_name: "no_callee_saved_registers",
991 TYPE_ATTRIBUTES (type)))
992 return false;
993
994 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
995 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
996 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
997 || (REG_PARM_STACK_SPACE (decl_or_type)
998 != REG_PARM_STACK_SPACE (current_function_decl)))
999 {
1000 maybe_complain_about_tail_call (exp,
1001 "inconsistent size of stack space"
1002 " allocated for arguments which are"
1003 " passed in registers");
1004 return false;
1005 }
1006
1007 /* Check that the return value locations are the same. Like
1008 if we are returning floats on the 80387 register stack, we cannot
1009 make a sibcall from a function that doesn't return a float to a
1010 function that does or, conversely, from a function that does return
1011 a float to a function that doesn't; the necessary stack adjustment
1012 would not be executed. This is also the place we notice
1013 differences in the return value ABI. Note that it is ok for one
1014 of the functions to have void return type as long as the return
1015 value of the other is passed in a register. */
1016 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1017 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1018 cfun->decl, false);
1019 if (STACK_REG_P (a) || STACK_REG_P (b))
1020 {
1021 if (!rtx_equal_p (a, b))
1022 return false;
1023 }
1024 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1025 ;
1026 else if (!rtx_equal_p (a, b))
1027 return false;
1028
1029 if (TARGET_64BIT)
1030 {
1031 /* The SYSV ABI has more call-clobbered registers;
1032 disallow sibcalls from MS to SYSV. */
1033 if (cfun->machine->call_abi == MS_ABI
1034 && ix86_function_type_abi (type) == SYSV_ABI)
1035 return false;
1036 }
1037 else
1038 {
1039 /* If this call is indirect, we'll need to be able to use a
1040 call-clobbered register for the address of the target function.
1041 Make sure that all such registers are not used for passing
1042 parameters. Note that DLLIMPORT functions and call to global
1043 function via GOT slot are indirect. */
1044 if (!decl
1045 || (bind_global && flag_pic && !flag_plt)
1046 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1047 || flag_force_indirect_call)
1048 {
1049 /* Check if regparm >= 3 since arg_reg_available is set to
1050 false if regparm == 0. If regparm is 1 or 2, there is
1051 always a call-clobbered register available.
1052
1053 ??? The symbol indirect call doesn't need a call-clobbered
1054 register. But we don't know if this is a symbol indirect
1055 call or not here. */
1056 if (ix86_function_regparm (type, decl) >= 3
1057 && !cfun->machine->arg_reg_available)
1058 return false;
1059 }
1060 }
1061
1062 if (decl && ix86_use_pseudo_pic_reg ())
1063 {
1064 /* When PIC register is used, it must be restored after ifunc
1065 function returns. */
1066 cgraph_node *node = cgraph_node::get (decl);
1067 if (node && node->ifunc_resolver)
1068 return false;
1069 }
1070
1071 /* Disable sibcall if callee has indirect_return attribute and
1072 caller doesn't since callee will return to the caller's caller
1073 via an indirect jump. */
1074 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1075 == (CF_RETURN | CF_BRANCH))
1076 && lookup_attribute (attr_name: "indirect_return", TYPE_ATTRIBUTES (type))
1077 && !lookup_attribute (attr_name: "indirect_return",
1078 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1079 return false;
1080
1081 /* Otherwise okay. That also includes certain types of indirect calls. */
1082 return true;
1083}
1084
1085/* This function determines from TYPE the calling-convention. */
1086
1087unsigned int
1088ix86_get_callcvt (const_tree type)
1089{
1090 unsigned int ret = 0;
1091 bool is_stdarg;
1092 tree attrs;
1093
1094 if (TARGET_64BIT)
1095 return IX86_CALLCVT_CDECL;
1096
1097 attrs = TYPE_ATTRIBUTES (type);
1098 if (attrs != NULL_TREE)
1099 {
1100 if (lookup_attribute (attr_name: "cdecl", list: attrs))
1101 ret |= IX86_CALLCVT_CDECL;
1102 else if (lookup_attribute (attr_name: "stdcall", list: attrs))
1103 ret |= IX86_CALLCVT_STDCALL;
1104 else if (lookup_attribute (attr_name: "fastcall", list: attrs))
1105 ret |= IX86_CALLCVT_FASTCALL;
1106 else if (lookup_attribute (attr_name: "thiscall", list: attrs))
1107 ret |= IX86_CALLCVT_THISCALL;
1108
1109 /* Regparam isn't allowed for thiscall and fastcall. */
1110 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1111 {
1112 if (lookup_attribute (attr_name: "regparm", list: attrs))
1113 ret |= IX86_CALLCVT_REGPARM;
1114 if (lookup_attribute (attr_name: "sseregparm", list: attrs))
1115 ret |= IX86_CALLCVT_SSEREGPARM;
1116 }
1117
1118 if (IX86_BASE_CALLCVT(ret) != 0)
1119 return ret;
1120 }
1121
1122 is_stdarg = stdarg_p (type);
1123 if (TARGET_RTD && !is_stdarg)
1124 return IX86_CALLCVT_STDCALL | ret;
1125
1126 if (ret != 0
1127 || is_stdarg
1128 || TREE_CODE (type) != METHOD_TYPE
1129 || ix86_function_type_abi (type) != MS_ABI)
1130 return IX86_CALLCVT_CDECL | ret;
1131
1132 return IX86_CALLCVT_THISCALL;
1133}
1134
1135/* Return 0 if the attributes for two types are incompatible, 1 if they
1136 are compatible, and 2 if they are nearly compatible (which causes a
1137 warning to be generated). */
1138
1139static int
1140ix86_comp_type_attributes (const_tree type1, const_tree type2)
1141{
1142 unsigned int ccvt1, ccvt2;
1143
1144 if (TREE_CODE (type1) != FUNCTION_TYPE
1145 && TREE_CODE (type1) != METHOD_TYPE)
1146 return 1;
1147
1148 ccvt1 = ix86_get_callcvt (type: type1);
1149 ccvt2 = ix86_get_callcvt (type: type2);
1150 if (ccvt1 != ccvt2)
1151 return 0;
1152 if (ix86_function_regparm (type1, NULL)
1153 != ix86_function_regparm (type2, NULL))
1154 return 0;
1155
1156 if (lookup_attribute (attr_name: "no_callee_saved_registers",
1157 TYPE_ATTRIBUTES (type1))
1158 != lookup_attribute (attr_name: "no_callee_saved_registers",
1159 TYPE_ATTRIBUTES (type2)))
1160 return 0;
1161
1162 return 1;
1163}
1164
1165/* Return the regparm value for a function with the indicated TYPE and DECL.
1166 DECL may be NULL when calling function indirectly
1167 or considering a libcall. */
1168
1169static int
1170ix86_function_regparm (const_tree type, const_tree decl)
1171{
1172 tree attr;
1173 int regparm;
1174 unsigned int ccvt;
1175
1176 if (TARGET_64BIT)
1177 return (ix86_function_type_abi (type) == SYSV_ABI
1178 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1179 ccvt = ix86_get_callcvt (type);
1180 regparm = ix86_regparm;
1181
1182 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1183 {
1184 attr = lookup_attribute (attr_name: "regparm", TYPE_ATTRIBUTES (type));
1185 if (attr)
1186 {
1187 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1188 return regparm;
1189 }
1190 }
1191 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1192 return 2;
1193 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1194 return 1;
1195
1196 /* Use register calling convention for local functions when possible. */
1197 if (decl
1198 && TREE_CODE (decl) == FUNCTION_DECL)
1199 {
1200 cgraph_node *target = cgraph_node::get (decl);
1201 if (target)
1202 target = target->function_symbol ();
1203
1204 /* Caller and callee must agree on the calling convention, so
1205 checking here just optimize means that with
1206 __attribute__((optimize (...))) caller could use regparm convention
1207 and callee not, or vice versa. Instead look at whether the callee
1208 is optimized or not. */
1209 if (target && opt_for_fn (target->decl, optimize)
1210 && !(profile_flag && !flag_fentry))
1211 {
1212 if (target->local && target->can_change_signature)
1213 {
1214 int local_regparm, globals = 0, regno;
1215
1216 /* Make sure no regparm register is taken by a
1217 fixed register variable. */
1218 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1219 local_regparm++)
1220 if (fixed_regs[local_regparm])
1221 break;
1222
1223 /* We don't want to use regparm(3) for nested functions as
1224 these use a static chain pointer in the third argument. */
1225 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1226 local_regparm = 2;
1227
1228 /* Save a register for the split stack. */
1229 if (flag_split_stack)
1230 {
1231 if (local_regparm == 3)
1232 local_regparm = 2;
1233 else if (local_regparm == 2
1234 && DECL_STATIC_CHAIN (target->decl))
1235 local_regparm = 1;
1236 }
1237
1238 /* Each fixed register usage increases register pressure,
1239 so less registers should be used for argument passing.
1240 This functionality can be overriden by an explicit
1241 regparm value. */
1242 for (regno = AX_REG; regno <= DI_REG; regno++)
1243 if (fixed_regs[regno])
1244 globals++;
1245
1246 local_regparm
1247 = globals < local_regparm ? local_regparm - globals : 0;
1248
1249 if (local_regparm > regparm)
1250 regparm = local_regparm;
1251 }
1252 }
1253 }
1254
1255 return regparm;
1256}
1257
1258/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1259 DFmode (2) arguments in SSE registers for a function with the
1260 indicated TYPE and DECL. DECL may be NULL when calling function
1261 indirectly or considering a libcall. Return -1 if any FP parameter
1262 should be rejected by error. This is used in siutation we imply SSE
1263 calling convetion but the function is called from another function with
1264 SSE disabled. Otherwise return 0. */
1265
1266static int
1267ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1268{
1269 gcc_assert (!TARGET_64BIT);
1270
1271 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1272 by the sseregparm attribute. */
1273 if (TARGET_SSEREGPARM
1274 || (type && lookup_attribute (attr_name: "sseregparm", TYPE_ATTRIBUTES (type))))
1275 {
1276 if (!TARGET_SSE)
1277 {
1278 if (warn)
1279 {
1280 if (decl)
1281 error ("calling %qD with attribute sseregparm without "
1282 "SSE/SSE2 enabled", decl);
1283 else
1284 error ("calling %qT with attribute sseregparm without "
1285 "SSE/SSE2 enabled", type);
1286 }
1287 return 0;
1288 }
1289
1290 return 2;
1291 }
1292
1293 if (!decl)
1294 return 0;
1295
1296 cgraph_node *target = cgraph_node::get (decl);
1297 if (target)
1298 target = target->function_symbol ();
1299
1300 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1301 (and DFmode for SSE2) arguments in SSE registers. */
1302 if (target
1303 /* TARGET_SSE_MATH */
1304 && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE)
1305 && opt_for_fn (target->decl, optimize)
1306 && !(profile_flag && !flag_fentry))
1307 {
1308 if (target->local && target->can_change_signature)
1309 {
1310 /* Refuse to produce wrong code when local function with SSE enabled
1311 is called from SSE disabled function.
1312 FIXME: We need a way to detect these cases cross-ltrans partition
1313 and avoid using SSE calling conventions on local functions called
1314 from function with SSE disabled. For now at least delay the
1315 warning until we know we are going to produce wrong code.
1316 See PR66047 */
1317 if (!TARGET_SSE && warn)
1318 return -1;
1319 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1320 ->x_ix86_isa_flags) ? 2 : 1;
1321 }
1322 }
1323
1324 return 0;
1325}
1326
1327/* Return true if EAX is live at the start of the function. Used by
1328 ix86_expand_prologue to determine if we need special help before
1329 calling allocate_stack_worker. */
1330
1331static bool
1332ix86_eax_live_at_start_p (void)
1333{
1334 /* Cheat. Don't bother working forward from ix86_function_regparm
1335 to the function type to whether an actual argument is located in
1336 eax. Instead just look at cfg info, which is still close enough
1337 to correct at this point. This gives false positives for broken
1338 functions that might use uninitialized data that happens to be
1339 allocated in eax, but who cares? */
1340 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1341}
1342
1343static bool
1344ix86_keep_aggregate_return_pointer (tree fntype)
1345{
1346 tree attr;
1347
1348 if (!TARGET_64BIT)
1349 {
1350 attr = lookup_attribute (attr_name: "callee_pop_aggregate_return",
1351 TYPE_ATTRIBUTES (fntype));
1352 if (attr)
1353 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1354
1355 /* For 32-bit MS-ABI the default is to keep aggregate
1356 return pointer. */
1357 if (ix86_function_type_abi (fntype) == MS_ABI)
1358 return true;
1359 }
1360 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1361}
1362
1363/* Value is the number of bytes of arguments automatically
1364 popped when returning from a subroutine call.
1365 FUNDECL is the declaration node of the function (as a tree),
1366 FUNTYPE is the data type of the function (as a tree),
1367 or for a library call it is an identifier node for the subroutine name.
1368 SIZE is the number of bytes of arguments passed on the stack.
1369
1370 On the 80386, the RTD insn may be used to pop them if the number
1371 of args is fixed, but if the number is variable then the caller
1372 must pop them all. RTD can't be used for library calls now
1373 because the library is compiled with the Unix compiler.
1374 Use of RTD is a selectable option, since it is incompatible with
1375 standard Unix calling sequences. If the option is not selected,
1376 the caller must always pop the args.
1377
1378 The attribute stdcall is equivalent to RTD on a per module basis. */
1379
1380static poly_int64
1381ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1382{
1383 unsigned int ccvt;
1384
1385 /* None of the 64-bit ABIs pop arguments. */
1386 if (TARGET_64BIT)
1387 return 0;
1388
1389 ccvt = ix86_get_callcvt (type: funtype);
1390
1391 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1392 | IX86_CALLCVT_THISCALL)) != 0
1393 && ! stdarg_p (funtype))
1394 return size;
1395
1396 /* Lose any fake structure return argument if it is passed on the stack. */
1397 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1398 && !ix86_keep_aggregate_return_pointer (fntype: funtype))
1399 {
1400 int nregs = ix86_function_regparm (type: funtype, decl: fundecl);
1401 if (nregs == 0)
1402 return GET_MODE_SIZE (Pmode);
1403 }
1404
1405 return 0;
1406}
1407
1408/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1409
1410static bool
1411ix86_legitimate_combined_insn (rtx_insn *insn)
1412{
1413 int i;
1414
1415 /* Check operand constraints in case hard registers were propagated
1416 into insn pattern. This check prevents combine pass from
1417 generating insn patterns with invalid hard register operands.
1418 These invalid insns can eventually confuse reload to error out
1419 with a spill failure. See also PRs 46829 and 46843. */
1420
1421 gcc_assert (INSN_CODE (insn) >= 0);
1422
1423 extract_insn (insn);
1424 preprocess_constraints (insn);
1425
1426 int n_operands = recog_data.n_operands;
1427 int n_alternatives = recog_data.n_alternatives;
1428 for (i = 0; i < n_operands; i++)
1429 {
1430 rtx op = recog_data.operand[i];
1431 machine_mode mode = GET_MODE (op);
1432 const operand_alternative *op_alt;
1433 int offset = 0;
1434 bool win;
1435 int j;
1436
1437 /* A unary operator may be accepted by the predicate, but it
1438 is irrelevant for matching constraints. */
1439 if (UNARY_P (op))
1440 op = XEXP (op, 0);
1441
1442 if (SUBREG_P (op))
1443 {
1444 if (REG_P (SUBREG_REG (op))
1445 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1446 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1447 GET_MODE (SUBREG_REG (op)),
1448 SUBREG_BYTE (op),
1449 GET_MODE (op));
1450 op = SUBREG_REG (op);
1451 }
1452
1453 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1454 continue;
1455
1456 op_alt = recog_op_alt;
1457
1458 /* Operand has no constraints, anything is OK. */
1459 win = !n_alternatives;
1460
1461 alternative_mask preferred = get_preferred_alternatives (insn);
1462 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1463 {
1464 if (!TEST_BIT (preferred, j))
1465 continue;
1466 if (op_alt[i].anything_ok
1467 || (op_alt[i].matches != -1
1468 && operands_match_p
1469 (recog_data.operand[i],
1470 recog_data.operand[op_alt[i].matches]))
1471 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1472 {
1473 win = true;
1474 break;
1475 }
1476 }
1477
1478 if (!win)
1479 return false;
1480 }
1481
1482 return true;
1483}
1484
1485/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1486
1487static unsigned HOST_WIDE_INT
1488ix86_asan_shadow_offset (void)
1489{
1490 return SUBTARGET_SHADOW_OFFSET;
1491}
1492
1493/* Argument support functions. */
1494
1495/* Return true when register may be used to pass function parameters. */
1496bool
1497ix86_function_arg_regno_p (int regno)
1498{
1499 int i;
1500 enum calling_abi call_abi;
1501 const int *parm_regs;
1502
1503 if (TARGET_SSE && SSE_REGNO_P (regno)
1504 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1505 return true;
1506
1507 if (!TARGET_64BIT)
1508 return (regno < REGPARM_MAX
1509 || (TARGET_MMX && MMX_REGNO_P (regno)
1510 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1511
1512 /* TODO: The function should depend on current function ABI but
1513 builtins.cc would need updating then. Therefore we use the
1514 default ABI. */
1515 call_abi = ix86_cfun_abi ();
1516
1517 /* RAX is used as hidden argument to va_arg functions. */
1518 if (call_abi == SYSV_ABI && regno == AX_REG)
1519 return true;
1520
1521 if (call_abi == MS_ABI)
1522 parm_regs = x86_64_ms_abi_int_parameter_registers;
1523 else
1524 parm_regs = x86_64_int_parameter_registers;
1525
1526 for (i = 0; i < (call_abi == MS_ABI
1527 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1528 if (regno == parm_regs[i])
1529 return true;
1530 return false;
1531}
1532
1533/* Return if we do not know how to pass ARG solely in registers. */
1534
1535static bool
1536ix86_must_pass_in_stack (const function_arg_info &arg)
1537{
1538 if (must_pass_in_stack_var_size_or_pad (arg))
1539 return true;
1540
1541 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1542 The layout_type routine is crafty and tries to trick us into passing
1543 currently unsupported vector types on the stack by using TImode. */
1544 return (!TARGET_64BIT && arg.mode == TImode
1545 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1546}
1547
1548/* It returns the size, in bytes, of the area reserved for arguments passed
1549 in registers for the function represented by fndecl dependent to the used
1550 abi format. */
1551int
1552ix86_reg_parm_stack_space (const_tree fndecl)
1553{
1554 enum calling_abi call_abi = SYSV_ABI;
1555 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1556 call_abi = ix86_function_abi (fndecl);
1557 else
1558 call_abi = ix86_function_type_abi (fndecl);
1559 if (TARGET_64BIT && call_abi == MS_ABI)
1560 return 32;
1561 return 0;
1562}
1563
1564/* We add this as a workaround in order to use libc_has_function
1565 hook in i386.md. */
1566bool
1567ix86_libc_has_function (enum function_class fn_class)
1568{
1569 return targetm.libc_has_function (fn_class, NULL_TREE);
1570}
1571
1572/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1573 specifying the call abi used. */
1574enum calling_abi
1575ix86_function_type_abi (const_tree fntype)
1576{
1577 enum calling_abi abi = ix86_abi;
1578
1579 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1580 return abi;
1581
1582 if (abi == SYSV_ABI
1583 && lookup_attribute (attr_name: "ms_abi", TYPE_ATTRIBUTES (fntype)))
1584 {
1585 static int warned;
1586 if (TARGET_X32 && !warned)
1587 {
1588 error ("X32 does not support %<ms_abi%> attribute");
1589 warned = 1;
1590 }
1591
1592 abi = MS_ABI;
1593 }
1594 else if (abi == MS_ABI
1595 && lookup_attribute (attr_name: "sysv_abi", TYPE_ATTRIBUTES (fntype)))
1596 abi = SYSV_ABI;
1597
1598 return abi;
1599}
1600
1601enum calling_abi
1602ix86_function_abi (const_tree fndecl)
1603{
1604 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1605}
1606
1607/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1608 specifying the call abi used. */
1609enum calling_abi
1610ix86_cfun_abi (void)
1611{
1612 return cfun ? cfun->machine->call_abi : ix86_abi;
1613}
1614
1615bool
1616ix86_function_ms_hook_prologue (const_tree fn)
1617{
1618 if (fn && lookup_attribute (attr_name: "ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1619 {
1620 if (decl_function_context (fn) != NULL_TREE)
1621 error_at (DECL_SOURCE_LOCATION (fn),
1622 "%<ms_hook_prologue%> attribute is not compatible "
1623 "with nested function");
1624 else
1625 return true;
1626 }
1627 return false;
1628}
1629
1630bool
1631ix86_function_naked (const_tree fn)
1632{
1633 if (fn && lookup_attribute (attr_name: "naked", DECL_ATTRIBUTES (fn)))
1634 return true;
1635
1636 return false;
1637}
1638
1639/* Write the extra assembler code needed to declare a function properly. */
1640
1641void
1642ix86_asm_output_function_label (FILE *out_file, const char *fname,
1643 tree decl)
1644{
1645 bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl);
1646
1647 if (cfun)
1648 cfun->machine->function_label_emitted = true;
1649
1650 if (is_ms_hook)
1651 {
1652 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1653 unsigned int filler_cc = 0xcccccccc;
1654
1655 for (i = 0; i < filler_count; i += 4)
1656 fprintf (stream: out_file, ASM_LONG " %#x\n", filler_cc);
1657 }
1658
1659#ifdef SUBTARGET_ASM_UNWIND_INIT
1660 SUBTARGET_ASM_UNWIND_INIT (out_file);
1661#endif
1662
1663 assemble_function_label_raw (out_file, fname);
1664
1665 /* Output magic byte marker, if hot-patch attribute is set. */
1666 if (is_ms_hook)
1667 {
1668 if (TARGET_64BIT)
1669 {
1670 /* leaq [%rsp + 0], %rsp */
1671 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1672 stream: out_file);
1673 }
1674 else
1675 {
1676 /* movl.s %edi, %edi
1677 push %ebp
1678 movl.s %esp, %ebp */
1679 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", stream: out_file);
1680 }
1681 }
1682}
1683
1684/* Implementation of call abi switching target hook. Specific to FNDECL
1685 the specific call register sets are set. See also
1686 ix86_conditional_register_usage for more details. */
1687void
1688ix86_call_abi_override (const_tree fndecl)
1689{
1690 cfun->machine->call_abi = ix86_function_abi (fndecl);
1691}
1692
1693/* Return 1 if pseudo register should be created and used to hold
1694 GOT address for PIC code. */
1695bool
1696ix86_use_pseudo_pic_reg (void)
1697{
1698 if ((TARGET_64BIT
1699 && (ix86_cmodel == CM_SMALL_PIC
1700 || TARGET_PECOFF))
1701 || !flag_pic)
1702 return false;
1703 return true;
1704}
1705
1706/* Initialize large model PIC register. */
1707
1708static void
1709ix86_init_large_pic_reg (unsigned int tmp_regno)
1710{
1711 rtx_code_label *label;
1712 rtx tmp_reg;
1713
1714 gcc_assert (Pmode == DImode);
1715 label = gen_label_rtx ();
1716 emit_label (label);
1717 LABEL_PRESERVE_P (label) = 1;
1718 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1719 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1720 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1721 label));
1722 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1723 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1724 const char *name = LABEL_NAME (label);
1725 PUT_CODE (label, NOTE);
1726 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1727 NOTE_DELETED_LABEL_NAME (label) = name;
1728}
1729
1730/* Create and initialize PIC register if required. */
1731static void
1732ix86_init_pic_reg (void)
1733{
1734 edge entry_edge;
1735 rtx_insn *seq;
1736
1737 if (!ix86_use_pseudo_pic_reg ())
1738 return;
1739
1740 start_sequence ();
1741
1742 if (TARGET_64BIT)
1743 {
1744 if (ix86_cmodel == CM_LARGE_PIC)
1745 ix86_init_large_pic_reg (R11_REG);
1746 else
1747 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1748 }
1749 else
1750 {
1751 /* If there is future mcount call in the function it is more profitable
1752 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1753 rtx reg = crtl->profile
1754 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1755 : pic_offset_table_rtx;
1756 rtx_insn *insn = emit_insn (gen_set_got (reg));
1757 RTX_FRAME_RELATED_P (insn) = 1;
1758 if (crtl->profile)
1759 emit_move_insn (pic_offset_table_rtx, reg);
1760 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1761 }
1762
1763 seq = get_insns ();
1764 end_sequence ();
1765
1766 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1767 insert_insn_on_edge (seq, entry_edge);
1768 commit_one_edge_insertion (e: entry_edge);
1769}
1770
1771/* Initialize a variable CUM of type CUMULATIVE_ARGS
1772 for a call to a function whose data type is FNTYPE.
1773 For a library call, FNTYPE is 0. */
1774
1775void
1776init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1777 tree fntype, /* tree ptr for function decl */
1778 rtx libname, /* SYMBOL_REF of library name or 0 */
1779 tree fndecl,
1780 int caller)
1781{
1782 struct cgraph_node *local_info_node = NULL;
1783 struct cgraph_node *target = NULL;
1784
1785 /* Set silent_p to false to raise an error for invalid calls when
1786 expanding function body. */
1787 cfun->machine->silent_p = false;
1788
1789 memset (s: cum, c: 0, n: sizeof (*cum));
1790
1791 if (fndecl)
1792 {
1793 target = cgraph_node::get (decl: fndecl);
1794 if (target)
1795 {
1796 target = target->function_symbol ();
1797 local_info_node = cgraph_node::local_info_node (decl: target->decl);
1798 cum->call_abi = ix86_function_abi (fndecl: target->decl);
1799 }
1800 else
1801 cum->call_abi = ix86_function_abi (fndecl);
1802 }
1803 else
1804 cum->call_abi = ix86_function_type_abi (fntype);
1805
1806 cum->caller = caller;
1807
1808 /* Set up the number of registers to use for passing arguments. */
1809 cum->nregs = ix86_regparm;
1810 if (TARGET_64BIT)
1811 {
1812 cum->nregs = (cum->call_abi == SYSV_ABI
1813 ? X86_64_REGPARM_MAX
1814 : X86_64_MS_REGPARM_MAX);
1815 }
1816 if (TARGET_SSE)
1817 {
1818 cum->sse_nregs = SSE_REGPARM_MAX;
1819 if (TARGET_64BIT)
1820 {
1821 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1822 ? X86_64_SSE_REGPARM_MAX
1823 : X86_64_MS_SSE_REGPARM_MAX);
1824 }
1825 }
1826 if (TARGET_MMX)
1827 cum->mmx_nregs = MMX_REGPARM_MAX;
1828 cum->warn_avx512f = true;
1829 cum->warn_avx = true;
1830 cum->warn_sse = true;
1831 cum->warn_mmx = true;
1832
1833 /* Because type might mismatch in between caller and callee, we need to
1834 use actual type of function for local calls.
1835 FIXME: cgraph_analyze can be told to actually record if function uses
1836 va_start so for local functions maybe_vaarg can be made aggressive
1837 helping K&R code.
1838 FIXME: once typesytem is fixed, we won't need this code anymore. */
1839 if (local_info_node && local_info_node->local
1840 && local_info_node->can_change_signature)
1841 fntype = TREE_TYPE (target->decl);
1842 cum->stdarg = stdarg_p (fntype);
1843 cum->maybe_vaarg = (fntype
1844 ? (!prototype_p (fntype) || stdarg_p (fntype))
1845 : !libname);
1846
1847 cum->decl = fndecl;
1848
1849 cum->warn_empty = !warn_abi || cum->stdarg;
1850 if (!cum->warn_empty && fntype)
1851 {
1852 function_args_iterator iter;
1853 tree argtype;
1854 bool seen_empty_type = false;
1855 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1856 {
1857 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1858 break;
1859 if (TYPE_EMPTY_P (argtype))
1860 seen_empty_type = true;
1861 else if (seen_empty_type)
1862 {
1863 cum->warn_empty = true;
1864 break;
1865 }
1866 }
1867 }
1868
1869 if (!TARGET_64BIT)
1870 {
1871 /* If there are variable arguments, then we won't pass anything
1872 in registers in 32-bit mode. */
1873 if (stdarg_p (fntype))
1874 {
1875 cum->nregs = 0;
1876 /* Since in 32-bit, variable arguments are always passed on
1877 stack, there is scratch register available for indirect
1878 sibcall. */
1879 cfun->machine->arg_reg_available = true;
1880 cum->sse_nregs = 0;
1881 cum->mmx_nregs = 0;
1882 cum->warn_avx512f = false;
1883 cum->warn_avx = false;
1884 cum->warn_sse = false;
1885 cum->warn_mmx = false;
1886 return;
1887 }
1888
1889 /* Use ecx and edx registers if function has fastcall attribute,
1890 else look for regparm information. */
1891 if (fntype)
1892 {
1893 unsigned int ccvt = ix86_get_callcvt (type: fntype);
1894 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1895 {
1896 cum->nregs = 1;
1897 cum->fastcall = 1; /* Same first register as in fastcall. */
1898 }
1899 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1900 {
1901 cum->nregs = 2;
1902 cum->fastcall = 1;
1903 }
1904 else
1905 cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl);
1906 }
1907
1908 /* Set up the number of SSE registers used for passing SFmode
1909 and DFmode arguments. Warn for mismatching ABI. */
1910 cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true);
1911 }
1912
1913 cfun->machine->arg_reg_available = (cum->nregs > 0);
1914}
1915
1916/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1917 But in the case of vector types, it is some vector mode.
1918
1919 When we have only some of our vector isa extensions enabled, then there
1920 are some modes for which vector_mode_supported_p is false. For these
1921 modes, the generic vector support in gcc will choose some non-vector mode
1922 in order to implement the type. By computing the natural mode, we'll
1923 select the proper ABI location for the operand and not depend on whatever
1924 the middle-end decides to do with these vector types.
1925
1926 The midde-end can't deal with the vector types > 16 bytes. In this
1927 case, we return the original mode and warn ABI change if CUM isn't
1928 NULL.
1929
1930 If INT_RETURN is true, warn ABI change if the vector mode isn't
1931 available for function return value. */
1932
1933static machine_mode
1934type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1935 bool in_return)
1936{
1937 machine_mode mode = TYPE_MODE (type);
1938
1939 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1940 {
1941 HOST_WIDE_INT size = int_size_in_bytes (type);
1942 if ((size == 8 || size == 16 || size == 32 || size == 64)
1943 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1944 && TYPE_VECTOR_SUBPARTS (node: type) > 1)
1945 {
1946 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1947
1948 /* There are no XFmode vector modes ... */
1949 if (innermode == XFmode)
1950 return mode;
1951
1952 /* ... and no decimal float vector modes. */
1953 if (DECIMAL_FLOAT_MODE_P (innermode))
1954 return mode;
1955
1956 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1957 mode = MIN_MODE_VECTOR_FLOAT;
1958 else
1959 mode = MIN_MODE_VECTOR_INT;
1960
1961 /* Get the mode which has this inner mode and number of units. */
1962 FOR_EACH_MODE_FROM (mode, mode)
1963 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type)
1964 && GET_MODE_INNER (mode) == innermode)
1965 {
1966 if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
1967 && !TARGET_IAMCU)
1968 {
1969 static bool warnedavx512f;
1970 static bool warnedavx512f_ret;
1971
1972 if (cum && cum->warn_avx512f && !warnedavx512f)
1973 {
1974 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1975 "without AVX512F enabled changes the ABI"))
1976 warnedavx512f = true;
1977 }
1978 else if (in_return && !warnedavx512f_ret)
1979 {
1980 if (warning (OPT_Wpsabi, "AVX512F vector return "
1981 "without AVX512F enabled changes the ABI"))
1982 warnedavx512f_ret = true;
1983 }
1984
1985 return TYPE_MODE (type);
1986 }
1987 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1988 {
1989 static bool warnedavx;
1990 static bool warnedavx_ret;
1991
1992 if (cum && cum->warn_avx && !warnedavx)
1993 {
1994 if (warning (OPT_Wpsabi, "AVX vector argument "
1995 "without AVX enabled changes the ABI"))
1996 warnedavx = true;
1997 }
1998 else if (in_return && !warnedavx_ret)
1999 {
2000 if (warning (OPT_Wpsabi, "AVX vector return "
2001 "without AVX enabled changes the ABI"))
2002 warnedavx_ret = true;
2003 }
2004
2005 return TYPE_MODE (type);
2006 }
2007 else if (((size == 8 && TARGET_64BIT) || size == 16)
2008 && !TARGET_SSE
2009 && !TARGET_IAMCU)
2010 {
2011 static bool warnedsse;
2012 static bool warnedsse_ret;
2013
2014 if (cum && cum->warn_sse && !warnedsse)
2015 {
2016 if (warning (OPT_Wpsabi, "SSE vector argument "
2017 "without SSE enabled changes the ABI"))
2018 warnedsse = true;
2019 }
2020 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2021 {
2022 if (warning (OPT_Wpsabi, "SSE vector return "
2023 "without SSE enabled changes the ABI"))
2024 warnedsse_ret = true;
2025 }
2026 }
2027 else if ((size == 8 && !TARGET_64BIT)
2028 && (!cfun
2029 || cfun->machine->func_type == TYPE_NORMAL)
2030 && !TARGET_MMX
2031 && !TARGET_IAMCU)
2032 {
2033 static bool warnedmmx;
2034 static bool warnedmmx_ret;
2035
2036 if (cum && cum->warn_mmx && !warnedmmx)
2037 {
2038 if (warning (OPT_Wpsabi, "MMX vector argument "
2039 "without MMX enabled changes the ABI"))
2040 warnedmmx = true;
2041 }
2042 else if (in_return && !warnedmmx_ret)
2043 {
2044 if (warning (OPT_Wpsabi, "MMX vector return "
2045 "without MMX enabled changes the ABI"))
2046 warnedmmx_ret = true;
2047 }
2048 }
2049 return mode;
2050 }
2051
2052 gcc_unreachable ();
2053 }
2054 }
2055
2056 return mode;
2057}
2058
2059/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2060 this may not agree with the mode that the type system has chosen for the
2061 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2062 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2063
2064static rtx
2065gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2066 unsigned int regno)
2067{
2068 rtx tmp;
2069
2070 if (orig_mode != BLKmode)
2071 tmp = gen_rtx_REG (orig_mode, regno);
2072 else
2073 {
2074 tmp = gen_rtx_REG (mode, regno);
2075 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2076 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2077 }
2078
2079 return tmp;
2080}
2081
2082/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2083 of this code is to classify each 8bytes of incoming argument by the register
2084 class and assign registers accordingly. */
2085
2086/* Return the union class of CLASS1 and CLASS2.
2087 See the x86-64 PS ABI for details. */
2088
2089static enum x86_64_reg_class
2090merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2091{
2092 /* Rule #1: If both classes are equal, this is the resulting class. */
2093 if (class1 == class2)
2094 return class1;
2095
2096 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2097 the other class. */
2098 if (class1 == X86_64_NO_CLASS)
2099 return class2;
2100 if (class2 == X86_64_NO_CLASS)
2101 return class1;
2102
2103 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2104 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2105 return X86_64_MEMORY_CLASS;
2106
2107 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2108 if ((class1 == X86_64_INTEGERSI_CLASS
2109 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2110 || (class2 == X86_64_INTEGERSI_CLASS
2111 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2112 return X86_64_INTEGERSI_CLASS;
2113 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2114 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2115 return X86_64_INTEGER_CLASS;
2116
2117 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2118 MEMORY is used. */
2119 if (class1 == X86_64_X87_CLASS
2120 || class1 == X86_64_X87UP_CLASS
2121 || class1 == X86_64_COMPLEX_X87_CLASS
2122 || class2 == X86_64_X87_CLASS
2123 || class2 == X86_64_X87UP_CLASS
2124 || class2 == X86_64_COMPLEX_X87_CLASS)
2125 return X86_64_MEMORY_CLASS;
2126
2127 /* Rule #6: Otherwise class SSE is used. */
2128 return X86_64_SSE_CLASS;
2129}
2130
2131/* Classify the argument of type TYPE and mode MODE.
2132 CLASSES will be filled by the register class used to pass each word
2133 of the operand. The number of words is returned. In case the parameter
2134 should be passed in memory, 0 is returned. As a special case for zero
2135 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2136
2137 BIT_OFFSET is used internally for handling records and specifies offset
2138 of the offset in bits modulo 512 to avoid overflow cases.
2139
2140 See the x86-64 PS ABI for details.
2141*/
2142
2143static int
2144classify_argument (machine_mode mode, const_tree type,
2145 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2146 int &zero_width_bitfields)
2147{
2148 HOST_WIDE_INT bytes
2149 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2150 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2151
2152 /* Variable sized entities are always passed/returned in memory. */
2153 if (bytes < 0)
2154 return 0;
2155
2156 if (mode != VOIDmode)
2157 {
2158 /* The value of "named" doesn't matter. */
2159 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2160 if (targetm.calls.must_pass_in_stack (arg))
2161 return 0;
2162 }
2163
2164 if (type && (AGGREGATE_TYPE_P (type)
2165 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2166 {
2167 int i;
2168 tree field;
2169 enum x86_64_reg_class subclasses[MAX_CLASSES];
2170
2171 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2172 if (bytes > 64)
2173 return 0;
2174
2175 for (i = 0; i < words; i++)
2176 classes[i] = X86_64_NO_CLASS;
2177
2178 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2179 signalize memory class, so handle it as special case. */
2180 if (!words)
2181 {
2182 classes[0] = X86_64_NO_CLASS;
2183 return 1;
2184 }
2185
2186 /* Classify each field of record and merge classes. */
2187 switch (TREE_CODE (type))
2188 {
2189 case RECORD_TYPE:
2190 /* And now merge the fields of structure. */
2191 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2192 {
2193 if (TREE_CODE (field) == FIELD_DECL)
2194 {
2195 int num;
2196
2197 if (TREE_TYPE (field) == error_mark_node)
2198 continue;
2199
2200 /* Bitfields are always classified as integer. Handle them
2201 early, since later code would consider them to be
2202 misaligned integers. */
2203 if (DECL_BIT_FIELD (field))
2204 {
2205 if (integer_zerop (DECL_SIZE (field)))
2206 {
2207 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2208 continue;
2209 if (zero_width_bitfields != 2)
2210 {
2211 zero_width_bitfields = 1;
2212 continue;
2213 }
2214 }
2215 for (i = (int_bit_position (field)
2216 + (bit_offset % 64)) / 8 / 8;
2217 i < ((int_bit_position (field) + (bit_offset % 64))
2218 + tree_to_shwi (DECL_SIZE (field))
2219 + 63) / 8 / 8; i++)
2220 classes[i]
2221 = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]);
2222 }
2223 else
2224 {
2225 int pos;
2226
2227 type = TREE_TYPE (field);
2228
2229 /* Flexible array member is ignored. */
2230 if (TYPE_MODE (type) == BLKmode
2231 && TREE_CODE (type) == ARRAY_TYPE
2232 && TYPE_SIZE (type) == NULL_TREE
2233 && TYPE_DOMAIN (type) != NULL_TREE
2234 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2235 == NULL_TREE))
2236 {
2237 static bool warned;
2238
2239 if (!warned && warn_psabi)
2240 {
2241 warned = true;
2242 inform (input_location,
2243 "the ABI of passing struct with"
2244 " a flexible array member has"
2245 " changed in GCC 4.4");
2246 }
2247 continue;
2248 }
2249 num = classify_argument (TYPE_MODE (type), type,
2250 classes: subclasses,
2251 bit_offset: (int_bit_position (field)
2252 + bit_offset) % 512,
2253 zero_width_bitfields);
2254 if (!num)
2255 return 0;
2256 pos = (int_bit_position (field)
2257 + (bit_offset % 64)) / 8 / 8;
2258 for (i = 0; i < num && (i + pos) < words; i++)
2259 classes[i + pos]
2260 = merge_classes (class1: subclasses[i], class2: classes[i + pos]);
2261 }
2262 }
2263 }
2264 break;
2265
2266 case ARRAY_TYPE:
2267 /* Arrays are handled as small records. */
2268 {
2269 int num;
2270 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2271 TREE_TYPE (type), classes: subclasses, bit_offset,
2272 zero_width_bitfields);
2273 if (!num)
2274 return 0;
2275
2276 /* The partial classes are now full classes. */
2277 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2278 subclasses[0] = X86_64_SSE_CLASS;
2279 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2280 subclasses[0] = X86_64_SSE_CLASS;
2281 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2282 && !((bit_offset % 64) == 0 && bytes == 4))
2283 subclasses[0] = X86_64_INTEGER_CLASS;
2284
2285 for (i = 0; i < words; i++)
2286 classes[i] = subclasses[i % num];
2287
2288 break;
2289 }
2290 case UNION_TYPE:
2291 case QUAL_UNION_TYPE:
2292 /* Unions are similar to RECORD_TYPE but offset is always 0.
2293 */
2294 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2295 {
2296 if (TREE_CODE (field) == FIELD_DECL)
2297 {
2298 int num;
2299
2300 if (TREE_TYPE (field) == error_mark_node)
2301 continue;
2302
2303 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2304 TREE_TYPE (field), classes: subclasses,
2305 bit_offset, zero_width_bitfields);
2306 if (!num)
2307 return 0;
2308 for (i = 0; i < num && i < words; i++)
2309 classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]);
2310 }
2311 }
2312 break;
2313
2314 case BITINT_TYPE:
2315 /* _BitInt(N) for N > 64 is passed as structure containing
2316 (N + 63) / 64 64-bit elements. */
2317 if (words > 2)
2318 return 0;
2319 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2320 return 2;
2321
2322 default:
2323 gcc_unreachable ();
2324 }
2325
2326 if (words > 2)
2327 {
2328 /* When size > 16 bytes, if the first one isn't
2329 X86_64_SSE_CLASS or any other ones aren't
2330 X86_64_SSEUP_CLASS, everything should be passed in
2331 memory. */
2332 if (classes[0] != X86_64_SSE_CLASS)
2333 return 0;
2334
2335 for (i = 1; i < words; i++)
2336 if (classes[i] != X86_64_SSEUP_CLASS)
2337 return 0;
2338 }
2339
2340 /* Final merger cleanup. */
2341 for (i = 0; i < words; i++)
2342 {
2343 /* If one class is MEMORY, everything should be passed in
2344 memory. */
2345 if (classes[i] == X86_64_MEMORY_CLASS)
2346 return 0;
2347
2348 /* The X86_64_SSEUP_CLASS should be always preceded by
2349 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2350 if (classes[i] == X86_64_SSEUP_CLASS
2351 && classes[i - 1] != X86_64_SSE_CLASS
2352 && classes[i - 1] != X86_64_SSEUP_CLASS)
2353 {
2354 /* The first one should never be X86_64_SSEUP_CLASS. */
2355 gcc_assert (i != 0);
2356 classes[i] = X86_64_SSE_CLASS;
2357 }
2358
2359 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2360 everything should be passed in memory. */
2361 if (classes[i] == X86_64_X87UP_CLASS
2362 && (classes[i - 1] != X86_64_X87_CLASS))
2363 {
2364 static bool warned;
2365
2366 /* The first one should never be X86_64_X87UP_CLASS. */
2367 gcc_assert (i != 0);
2368 if (!warned && warn_psabi)
2369 {
2370 warned = true;
2371 inform (input_location,
2372 "the ABI of passing union with %<long double%>"
2373 " has changed in GCC 4.4");
2374 }
2375 return 0;
2376 }
2377 }
2378 return words;
2379 }
2380
2381 /* Compute alignment needed. We align all types to natural boundaries with
2382 exception of XFmode that is aligned to 64bits. */
2383 if (mode != VOIDmode && mode != BLKmode)
2384 {
2385 int mode_alignment = GET_MODE_BITSIZE (mode);
2386
2387 if (mode == XFmode)
2388 mode_alignment = 128;
2389 else if (mode == XCmode)
2390 mode_alignment = 256;
2391 if (COMPLEX_MODE_P (mode))
2392 mode_alignment /= 2;
2393 /* Misaligned fields are always returned in memory. */
2394 if (bit_offset % mode_alignment)
2395 return 0;
2396 }
2397
2398 /* for V1xx modes, just use the base mode */
2399 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2400 && GET_MODE_UNIT_SIZE (mode) == bytes)
2401 mode = GET_MODE_INNER (mode);
2402
2403 /* Classification of atomic types. */
2404 switch (mode)
2405 {
2406 case E_SDmode:
2407 case E_DDmode:
2408 classes[0] = X86_64_SSE_CLASS;
2409 return 1;
2410 case E_TDmode:
2411 classes[0] = X86_64_SSE_CLASS;
2412 classes[1] = X86_64_SSEUP_CLASS;
2413 return 2;
2414 case E_DImode:
2415 case E_SImode:
2416 case E_HImode:
2417 case E_QImode:
2418 case E_CSImode:
2419 case E_CHImode:
2420 case E_CQImode:
2421 {
2422 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2423
2424 /* Analyze last 128 bits only. */
2425 size = (size - 1) & 0x7f;
2426
2427 if (size < 32)
2428 {
2429 classes[0] = X86_64_INTEGERSI_CLASS;
2430 return 1;
2431 }
2432 else if (size < 64)
2433 {
2434 classes[0] = X86_64_INTEGER_CLASS;
2435 return 1;
2436 }
2437 else if (size < 64+32)
2438 {
2439 classes[0] = X86_64_INTEGER_CLASS;
2440 classes[1] = X86_64_INTEGERSI_CLASS;
2441 return 2;
2442 }
2443 else if (size < 64+64)
2444 {
2445 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2446 return 2;
2447 }
2448 else
2449 gcc_unreachable ();
2450 }
2451 case E_CDImode:
2452 case E_TImode:
2453 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2454 return 2;
2455 case E_COImode:
2456 case E_OImode:
2457 /* OImode shouldn't be used directly. */
2458 gcc_unreachable ();
2459 case E_CTImode:
2460 return 0;
2461 case E_HFmode:
2462 case E_BFmode:
2463 if (!(bit_offset % 64))
2464 classes[0] = X86_64_SSEHF_CLASS;
2465 else
2466 classes[0] = X86_64_SSE_CLASS;
2467 return 1;
2468 case E_SFmode:
2469 if (!(bit_offset % 64))
2470 classes[0] = X86_64_SSESF_CLASS;
2471 else
2472 classes[0] = X86_64_SSE_CLASS;
2473 return 1;
2474 case E_DFmode:
2475 classes[0] = X86_64_SSEDF_CLASS;
2476 return 1;
2477 case E_XFmode:
2478 classes[0] = X86_64_X87_CLASS;
2479 classes[1] = X86_64_X87UP_CLASS;
2480 return 2;
2481 case E_TFmode:
2482 classes[0] = X86_64_SSE_CLASS;
2483 classes[1] = X86_64_SSEUP_CLASS;
2484 return 2;
2485 case E_HCmode:
2486 case E_BCmode:
2487 classes[0] = X86_64_SSE_CLASS;
2488 if (!(bit_offset % 64))
2489 return 1;
2490 else
2491 {
2492 classes[1] = X86_64_SSEHF_CLASS;
2493 return 2;
2494 }
2495 case E_SCmode:
2496 classes[0] = X86_64_SSE_CLASS;
2497 if (!(bit_offset % 64))
2498 return 1;
2499 else
2500 {
2501 static bool warned;
2502
2503 if (!warned && warn_psabi)
2504 {
2505 warned = true;
2506 inform (input_location,
2507 "the ABI of passing structure with %<complex float%>"
2508 " member has changed in GCC 4.4");
2509 }
2510 classes[1] = X86_64_SSESF_CLASS;
2511 return 2;
2512 }
2513 case E_DCmode:
2514 classes[0] = X86_64_SSEDF_CLASS;
2515 classes[1] = X86_64_SSEDF_CLASS;
2516 return 2;
2517 case E_XCmode:
2518 classes[0] = X86_64_COMPLEX_X87_CLASS;
2519 return 1;
2520 case E_TCmode:
2521 /* This modes is larger than 16 bytes. */
2522 return 0;
2523 case E_V8SFmode:
2524 case E_V8SImode:
2525 case E_V32QImode:
2526 case E_V16HFmode:
2527 case E_V16BFmode:
2528 case E_V16HImode:
2529 case E_V4DFmode:
2530 case E_V4DImode:
2531 classes[0] = X86_64_SSE_CLASS;
2532 classes[1] = X86_64_SSEUP_CLASS;
2533 classes[2] = X86_64_SSEUP_CLASS;
2534 classes[3] = X86_64_SSEUP_CLASS;
2535 return 4;
2536 case E_V8DFmode:
2537 case E_V16SFmode:
2538 case E_V32HFmode:
2539 case E_V32BFmode:
2540 case E_V8DImode:
2541 case E_V16SImode:
2542 case E_V32HImode:
2543 case E_V64QImode:
2544 classes[0] = X86_64_SSE_CLASS;
2545 classes[1] = X86_64_SSEUP_CLASS;
2546 classes[2] = X86_64_SSEUP_CLASS;
2547 classes[3] = X86_64_SSEUP_CLASS;
2548 classes[4] = X86_64_SSEUP_CLASS;
2549 classes[5] = X86_64_SSEUP_CLASS;
2550 classes[6] = X86_64_SSEUP_CLASS;
2551 classes[7] = X86_64_SSEUP_CLASS;
2552 return 8;
2553 case E_V4SFmode:
2554 case E_V4SImode:
2555 case E_V16QImode:
2556 case E_V8HImode:
2557 case E_V8HFmode:
2558 case E_V8BFmode:
2559 case E_V2DFmode:
2560 case E_V2DImode:
2561 classes[0] = X86_64_SSE_CLASS;
2562 classes[1] = X86_64_SSEUP_CLASS;
2563 return 2;
2564 case E_V1TImode:
2565 case E_V1DImode:
2566 case E_V2SFmode:
2567 case E_V2SImode:
2568 case E_V4HImode:
2569 case E_V4HFmode:
2570 case E_V4BFmode:
2571 case E_V2HFmode:
2572 case E_V2BFmode:
2573 case E_V8QImode:
2574 classes[0] = X86_64_SSE_CLASS;
2575 return 1;
2576 case E_BLKmode:
2577 case E_VOIDmode:
2578 return 0;
2579 default:
2580 gcc_assert (VECTOR_MODE_P (mode));
2581
2582 if (bytes > 16)
2583 return 0;
2584
2585 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2586
2587 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2588 classes[0] = X86_64_INTEGERSI_CLASS;
2589 else
2590 classes[0] = X86_64_INTEGER_CLASS;
2591 classes[1] = X86_64_INTEGER_CLASS;
2592 return 1 + (bytes > 8);
2593 }
2594}
2595
2596/* Wrapper around classify_argument with the extra zero_width_bitfields
2597 argument, to diagnose GCC 12.1 ABI differences for C. */
2598
2599static int
2600classify_argument (machine_mode mode, const_tree type,
2601 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2602{
2603 int zero_width_bitfields = 0;
2604 static bool warned = false;
2605 int n = classify_argument (mode, type, classes, bit_offset,
2606 zero_width_bitfields);
2607 if (!zero_width_bitfields || warned || !warn_psabi)
2608 return n;
2609 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2610 zero_width_bitfields = 2;
2611 if (classify_argument (mode, type, classes: alt_classes, bit_offset,
2612 zero_width_bitfields) != n)
2613 zero_width_bitfields = 3;
2614 else
2615 for (int i = 0; i < n; i++)
2616 if (classes[i] != alt_classes[i])
2617 {
2618 zero_width_bitfields = 3;
2619 break;
2620 }
2621 if (zero_width_bitfields == 3)
2622 {
2623 warned = true;
2624 const char *url
2625 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2626
2627 inform (input_location,
2628 "the ABI of passing C structures with zero-width bit-fields"
2629 " has changed in GCC %{12.1%}", url);
2630 }
2631 return n;
2632}
2633
2634/* Examine the argument and return set number of register required in each
2635 class. Return true iff parameter should be passed in memory. */
2636
2637static bool
2638examine_argument (machine_mode mode, const_tree type, int in_return,
2639 int *int_nregs, int *sse_nregs)
2640{
2641 enum x86_64_reg_class regclass[MAX_CLASSES];
2642 int n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2643
2644 *int_nregs = 0;
2645 *sse_nregs = 0;
2646
2647 if (!n)
2648 return true;
2649 for (n--; n >= 0; n--)
2650 switch (regclass[n])
2651 {
2652 case X86_64_INTEGER_CLASS:
2653 case X86_64_INTEGERSI_CLASS:
2654 (*int_nregs)++;
2655 break;
2656 case X86_64_SSE_CLASS:
2657 case X86_64_SSEHF_CLASS:
2658 case X86_64_SSESF_CLASS:
2659 case X86_64_SSEDF_CLASS:
2660 (*sse_nregs)++;
2661 break;
2662 case X86_64_NO_CLASS:
2663 case X86_64_SSEUP_CLASS:
2664 break;
2665 case X86_64_X87_CLASS:
2666 case X86_64_X87UP_CLASS:
2667 case X86_64_COMPLEX_X87_CLASS:
2668 if (!in_return)
2669 return true;
2670 break;
2671 case X86_64_MEMORY_CLASS:
2672 gcc_unreachable ();
2673 }
2674
2675 return false;
2676}
2677
2678/* Construct container for the argument used by GCC interface. See
2679 FUNCTION_ARG for the detailed description. */
2680
2681static rtx
2682construct_container (machine_mode mode, machine_mode orig_mode,
2683 const_tree type, int in_return, int nintregs, int nsseregs,
2684 const int *intreg, int sse_regno)
2685{
2686 /* The following variables hold the static issued_error state. */
2687 static bool issued_sse_arg_error;
2688 static bool issued_sse_ret_error;
2689 static bool issued_x87_ret_error;
2690
2691 machine_mode tmpmode;
2692 int bytes
2693 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2694 enum x86_64_reg_class regclass[MAX_CLASSES];
2695 int n;
2696 int i;
2697 int nexps = 0;
2698 int needed_sseregs, needed_intregs;
2699 rtx exp[MAX_CLASSES];
2700 rtx ret;
2701
2702 n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2703 if (!n)
2704 return NULL;
2705 if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs,
2706 sse_nregs: &needed_sseregs))
2707 return NULL;
2708 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2709 return NULL;
2710
2711 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2712 some less clueful developer tries to use floating-point anyway. */
2713 if (needed_sseregs
2714 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2715 {
2716 /* Return early if we shouldn't raise an error for invalid
2717 calls. */
2718 if (cfun != NULL && cfun->machine->silent_p)
2719 return NULL;
2720 if (in_return)
2721 {
2722 if (!issued_sse_ret_error)
2723 {
2724 if (VALID_SSE2_TYPE_MODE (mode))
2725 error ("SSE register return with SSE2 disabled");
2726 else
2727 error ("SSE register return with SSE disabled");
2728 issued_sse_ret_error = true;
2729 }
2730 }
2731 else if (!issued_sse_arg_error)
2732 {
2733 if (VALID_SSE2_TYPE_MODE (mode))
2734 error ("SSE register argument with SSE2 disabled");
2735 else
2736 error ("SSE register argument with SSE disabled");
2737 issued_sse_arg_error = true;
2738 }
2739 return NULL;
2740 }
2741
2742 /* Likewise, error if the ABI requires us to return values in the
2743 x87 registers and the user specified -mno-80387. */
2744 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2745 for (i = 0; i < n; i++)
2746 if (regclass[i] == X86_64_X87_CLASS
2747 || regclass[i] == X86_64_X87UP_CLASS
2748 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2749 {
2750 /* Return early if we shouldn't raise an error for invalid
2751 calls. */
2752 if (cfun != NULL && cfun->machine->silent_p)
2753 return NULL;
2754 if (!issued_x87_ret_error)
2755 {
2756 error ("x87 register return with x87 disabled");
2757 issued_x87_ret_error = true;
2758 }
2759 return NULL;
2760 }
2761
2762 /* First construct simple cases. Avoid SCmode, since we want to use
2763 single register to pass this type. */
2764 if (n == 1 && mode != SCmode && mode != HCmode)
2765 switch (regclass[0])
2766 {
2767 case X86_64_INTEGER_CLASS:
2768 case X86_64_INTEGERSI_CLASS:
2769 return gen_rtx_REG (mode, intreg[0]);
2770 case X86_64_SSE_CLASS:
2771 case X86_64_SSEHF_CLASS:
2772 case X86_64_SSESF_CLASS:
2773 case X86_64_SSEDF_CLASS:
2774 if (mode != BLKmode)
2775 return gen_reg_or_parallel (mode, orig_mode,
2776 GET_SSE_REGNO (sse_regno));
2777 break;
2778 case X86_64_X87_CLASS:
2779 case X86_64_COMPLEX_X87_CLASS:
2780 return gen_rtx_REG (mode, FIRST_STACK_REG);
2781 case X86_64_NO_CLASS:
2782 /* Zero sized array, struct or class. */
2783 return NULL;
2784 default:
2785 gcc_unreachable ();
2786 }
2787 if (n == 2
2788 && regclass[0] == X86_64_SSE_CLASS
2789 && regclass[1] == X86_64_SSEUP_CLASS
2790 && mode != BLKmode)
2791 return gen_reg_or_parallel (mode, orig_mode,
2792 GET_SSE_REGNO (sse_regno));
2793 if (n == 4
2794 && regclass[0] == X86_64_SSE_CLASS
2795 && regclass[1] == X86_64_SSEUP_CLASS
2796 && regclass[2] == X86_64_SSEUP_CLASS
2797 && regclass[3] == X86_64_SSEUP_CLASS
2798 && mode != BLKmode)
2799 return gen_reg_or_parallel (mode, orig_mode,
2800 GET_SSE_REGNO (sse_regno));
2801 if (n == 8
2802 && regclass[0] == X86_64_SSE_CLASS
2803 && regclass[1] == X86_64_SSEUP_CLASS
2804 && regclass[2] == X86_64_SSEUP_CLASS
2805 && regclass[3] == X86_64_SSEUP_CLASS
2806 && regclass[4] == X86_64_SSEUP_CLASS
2807 && regclass[5] == X86_64_SSEUP_CLASS
2808 && regclass[6] == X86_64_SSEUP_CLASS
2809 && regclass[7] == X86_64_SSEUP_CLASS
2810 && mode != BLKmode)
2811 return gen_reg_or_parallel (mode, orig_mode,
2812 GET_SSE_REGNO (sse_regno));
2813 if (n == 2
2814 && regclass[0] == X86_64_X87_CLASS
2815 && regclass[1] == X86_64_X87UP_CLASS)
2816 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2817
2818 if (n == 2
2819 && regclass[0] == X86_64_INTEGER_CLASS
2820 && regclass[1] == X86_64_INTEGER_CLASS
2821 && (mode == CDImode || mode == TImode || mode == BLKmode)
2822 && intreg[0] + 1 == intreg[1])
2823 {
2824 if (mode == BLKmode)
2825 {
2826 /* Use TImode for BLKmode values in 2 integer registers. */
2827 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2828 gen_rtx_REG (TImode, intreg[0]),
2829 GEN_INT (0));
2830 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2831 XVECEXP (ret, 0, 0) = exp[0];
2832 return ret;
2833 }
2834 else
2835 return gen_rtx_REG (mode, intreg[0]);
2836 }
2837
2838 /* Otherwise figure out the entries of the PARALLEL. */
2839 for (i = 0; i < n; i++)
2840 {
2841 int pos;
2842
2843 switch (regclass[i])
2844 {
2845 case X86_64_NO_CLASS:
2846 break;
2847 case X86_64_INTEGER_CLASS:
2848 case X86_64_INTEGERSI_CLASS:
2849 /* Merge TImodes on aligned occasions here too. */
2850 if (i * 8 + 8 > bytes)
2851 {
2852 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2853 if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode))
2854 /* We've requested 24 bytes we
2855 don't have mode for. Use DImode. */
2856 tmpmode = DImode;
2857 }
2858 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2859 tmpmode = SImode;
2860 else
2861 tmpmode = DImode;
2862 exp [nexps++]
2863 = gen_rtx_EXPR_LIST (VOIDmode,
2864 gen_rtx_REG (tmpmode, *intreg),
2865 GEN_INT (i*8));
2866 intreg++;
2867 break;
2868 case X86_64_SSEHF_CLASS:
2869 tmpmode = (mode == BFmode ? BFmode : HFmode);
2870 exp [nexps++]
2871 = gen_rtx_EXPR_LIST (VOIDmode,
2872 gen_rtx_REG (tmpmode,
2873 GET_SSE_REGNO (sse_regno)),
2874 GEN_INT (i*8));
2875 sse_regno++;
2876 break;
2877 case X86_64_SSESF_CLASS:
2878 exp [nexps++]
2879 = gen_rtx_EXPR_LIST (VOIDmode,
2880 gen_rtx_REG (SFmode,
2881 GET_SSE_REGNO (sse_regno)),
2882 GEN_INT (i*8));
2883 sse_regno++;
2884 break;
2885 case X86_64_SSEDF_CLASS:
2886 exp [nexps++]
2887 = gen_rtx_EXPR_LIST (VOIDmode,
2888 gen_rtx_REG (DFmode,
2889 GET_SSE_REGNO (sse_regno)),
2890 GEN_INT (i*8));
2891 sse_regno++;
2892 break;
2893 case X86_64_SSE_CLASS:
2894 pos = i;
2895 switch (n)
2896 {
2897 case 1:
2898 tmpmode = DImode;
2899 break;
2900 case 2:
2901 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2902 {
2903 tmpmode = TImode;
2904 i++;
2905 }
2906 else
2907 tmpmode = DImode;
2908 break;
2909 case 4:
2910 gcc_assert (i == 0
2911 && regclass[1] == X86_64_SSEUP_CLASS
2912 && regclass[2] == X86_64_SSEUP_CLASS
2913 && regclass[3] == X86_64_SSEUP_CLASS);
2914 tmpmode = OImode;
2915 i += 3;
2916 break;
2917 case 8:
2918 gcc_assert (i == 0
2919 && regclass[1] == X86_64_SSEUP_CLASS
2920 && regclass[2] == X86_64_SSEUP_CLASS
2921 && regclass[3] == X86_64_SSEUP_CLASS
2922 && regclass[4] == X86_64_SSEUP_CLASS
2923 && regclass[5] == X86_64_SSEUP_CLASS
2924 && regclass[6] == X86_64_SSEUP_CLASS
2925 && regclass[7] == X86_64_SSEUP_CLASS);
2926 tmpmode = XImode;
2927 i += 7;
2928 break;
2929 default:
2930 gcc_unreachable ();
2931 }
2932 exp [nexps++]
2933 = gen_rtx_EXPR_LIST (VOIDmode,
2934 gen_rtx_REG (tmpmode,
2935 GET_SSE_REGNO (sse_regno)),
2936 GEN_INT (pos*8));
2937 sse_regno++;
2938 break;
2939 default:
2940 gcc_unreachable ();
2941 }
2942 }
2943
2944 /* Empty aligned struct, union or class. */
2945 if (nexps == 0)
2946 return NULL;
2947
2948 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2949 for (i = 0; i < nexps; i++)
2950 XVECEXP (ret, 0, i) = exp [i];
2951 return ret;
2952}
2953
2954/* Update the data in CUM to advance over an argument of mode MODE
2955 and data type TYPE. (TYPE is null for libcalls where that information
2956 may not be available.)
2957
2958 Return a number of integer regsiters advanced over. */
2959
2960static int
2961function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2962 const_tree type, HOST_WIDE_INT bytes,
2963 HOST_WIDE_INT words)
2964{
2965 int res = 0;
2966 bool error_p = false;
2967
2968 if (TARGET_IAMCU)
2969 {
2970 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2971 bytes in registers. */
2972 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2973 goto pass_in_reg;
2974 return res;
2975 }
2976
2977 switch (mode)
2978 {
2979 default:
2980 break;
2981
2982 case E_BLKmode:
2983 if (bytes < 0)
2984 break;
2985 /* FALLTHRU */
2986
2987 case E_DImode:
2988 case E_SImode:
2989 case E_HImode:
2990 case E_QImode:
2991pass_in_reg:
2992 cum->words += words;
2993 cum->nregs -= words;
2994 cum->regno += words;
2995 if (cum->nregs >= 0)
2996 res = words;
2997 if (cum->nregs <= 0)
2998 {
2999 cum->nregs = 0;
3000 cfun->machine->arg_reg_available = false;
3001 cum->regno = 0;
3002 }
3003 break;
3004
3005 case E_OImode:
3006 /* OImode shouldn't be used directly. */
3007 gcc_unreachable ();
3008
3009 case E_DFmode:
3010 if (cum->float_in_sse == -1)
3011 error_p = true;
3012 if (cum->float_in_sse < 2)
3013 break;
3014 /* FALLTHRU */
3015 case E_SFmode:
3016 if (cum->float_in_sse == -1)
3017 error_p = true;
3018 if (cum->float_in_sse < 1)
3019 break;
3020 /* FALLTHRU */
3021
3022 case E_V16HFmode:
3023 case E_V16BFmode:
3024 case E_V8SFmode:
3025 case E_V8SImode:
3026 case E_V64QImode:
3027 case E_V32HImode:
3028 case E_V16SImode:
3029 case E_V8DImode:
3030 case E_V32HFmode:
3031 case E_V32BFmode:
3032 case E_V16SFmode:
3033 case E_V8DFmode:
3034 case E_V32QImode:
3035 case E_V16HImode:
3036 case E_V4DFmode:
3037 case E_V4DImode:
3038 case E_TImode:
3039 case E_V16QImode:
3040 case E_V8HImode:
3041 case E_V4SImode:
3042 case E_V2DImode:
3043 case E_V8HFmode:
3044 case E_V8BFmode:
3045 case E_V4SFmode:
3046 case E_V2DFmode:
3047 if (!type || !AGGREGATE_TYPE_P (type))
3048 {
3049 cum->sse_words += words;
3050 cum->sse_nregs -= 1;
3051 cum->sse_regno += 1;
3052 if (cum->sse_nregs <= 0)
3053 {
3054 cum->sse_nregs = 0;
3055 cum->sse_regno = 0;
3056 }
3057 }
3058 break;
3059
3060 case E_V8QImode:
3061 case E_V4HImode:
3062 case E_V4HFmode:
3063 case E_V4BFmode:
3064 case E_V2SImode:
3065 case E_V2SFmode:
3066 case E_V1TImode:
3067 case E_V1DImode:
3068 if (!type || !AGGREGATE_TYPE_P (type))
3069 {
3070 cum->mmx_words += words;
3071 cum->mmx_nregs -= 1;
3072 cum->mmx_regno += 1;
3073 if (cum->mmx_nregs <= 0)
3074 {
3075 cum->mmx_nregs = 0;
3076 cum->mmx_regno = 0;
3077 }
3078 }
3079 break;
3080 }
3081 if (error_p)
3082 {
3083 cum->float_in_sse = 0;
3084 error ("calling %qD with SSE calling convention without "
3085 "SSE/SSE2 enabled", cum->decl);
3086 sorry ("this is a GCC bug that can be worked around by adding "
3087 "attribute used to function called");
3088 }
3089
3090 return res;
3091}
3092
3093static int
3094function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3095 const_tree type, HOST_WIDE_INT words, bool named)
3096{
3097 int int_nregs, sse_nregs;
3098
3099 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3100 if (!named && (VALID_AVX512F_REG_MODE (mode)
3101 || VALID_AVX256_REG_MODE (mode)))
3102 return 0;
3103
3104 if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs)
3105 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3106 {
3107 cum->nregs -= int_nregs;
3108 cum->sse_nregs -= sse_nregs;
3109 cum->regno += int_nregs;
3110 cum->sse_regno += sse_nregs;
3111 return int_nregs;
3112 }
3113 else
3114 {
3115 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3116 cum->words = ROUND_UP (cum->words, align);
3117 cum->words += words;
3118 return 0;
3119 }
3120}
3121
3122static int
3123function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3124 HOST_WIDE_INT words)
3125{
3126 /* Otherwise, this should be passed indirect. */
3127 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3128
3129 cum->words += words;
3130 if (cum->nregs > 0)
3131 {
3132 cum->nregs -= 1;
3133 cum->regno += 1;
3134 return 1;
3135 }
3136 return 0;
3137}
3138
3139/* Update the data in CUM to advance over argument ARG. */
3140
3141static void
3142ix86_function_arg_advance (cumulative_args_t cum_v,
3143 const function_arg_info &arg)
3144{
3145 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3146 machine_mode mode = arg.mode;
3147 HOST_WIDE_INT bytes, words;
3148 int nregs;
3149
3150 /* The argument of interrupt handler is a special case and is
3151 handled in ix86_function_arg. */
3152 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3153 return;
3154
3155 bytes = arg.promoted_size_in_bytes ();
3156 words = CEIL (bytes, UNITS_PER_WORD);
3157
3158 if (arg.type)
3159 mode = type_natural_mode (type: arg.type, NULL, in_return: false);
3160
3161 if (TARGET_64BIT)
3162 {
3163 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3164
3165 if (call_abi == MS_ABI)
3166 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3167 else
3168 nregs = function_arg_advance_64 (cum, mode, type: arg.type, words,
3169 named: arg.named);
3170 }
3171 else
3172 nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words);
3173
3174 if (!nregs)
3175 {
3176 /* Track if there are outgoing arguments on stack. */
3177 if (cum->caller)
3178 cfun->machine->outgoing_args_on_stack = true;
3179 }
3180}
3181
3182/* Define where to put the arguments to a function.
3183 Value is zero to push the argument on the stack,
3184 or a hard register in which to store the argument.
3185
3186 MODE is the argument's machine mode.
3187 TYPE is the data type of the argument (as a tree).
3188 This is null for libcalls where that information may
3189 not be available.
3190 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3191 the preceding args and about the function being called.
3192 NAMED is nonzero if this argument is a named parameter
3193 (otherwise it is an extra parameter matching an ellipsis). */
3194
3195static rtx
3196function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3197 machine_mode orig_mode, const_tree type,
3198 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3199{
3200 bool error_p = false;
3201
3202 /* Avoid the AL settings for the Unix64 ABI. */
3203 if (mode == VOIDmode)
3204 return constm1_rtx;
3205
3206 if (TARGET_IAMCU)
3207 {
3208 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3209 bytes in registers. */
3210 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3211 goto pass_in_reg;
3212 return NULL_RTX;
3213 }
3214
3215 switch (mode)
3216 {
3217 default:
3218 break;
3219
3220 case E_BLKmode:
3221 if (bytes < 0)
3222 break;
3223 /* FALLTHRU */
3224 case E_DImode:
3225 case E_SImode:
3226 case E_HImode:
3227 case E_QImode:
3228pass_in_reg:
3229 if (words <= cum->nregs)
3230 {
3231 int regno = cum->regno;
3232
3233 /* Fastcall allocates the first two DWORD (SImode) or
3234 smaller arguments to ECX and EDX if it isn't an
3235 aggregate type . */
3236 if (cum->fastcall)
3237 {
3238 if (mode == BLKmode
3239 || mode == DImode
3240 || (type && AGGREGATE_TYPE_P (type)))
3241 break;
3242
3243 /* ECX not EAX is the first allocated register. */
3244 if (regno == AX_REG)
3245 regno = CX_REG;
3246 }
3247 return gen_rtx_REG (mode, regno);
3248 }
3249 break;
3250
3251 case E_DFmode:
3252 if (cum->float_in_sse == -1)
3253 error_p = true;
3254 if (cum->float_in_sse < 2)
3255 break;
3256 /* FALLTHRU */
3257 case E_SFmode:
3258 if (cum->float_in_sse == -1)
3259 error_p = true;
3260 if (cum->float_in_sse < 1)
3261 break;
3262 /* FALLTHRU */
3263 case E_TImode:
3264 /* In 32bit, we pass TImode in xmm registers. */
3265 case E_V16QImode:
3266 case E_V8HImode:
3267 case E_V4SImode:
3268 case E_V2DImode:
3269 case E_V8HFmode:
3270 case E_V8BFmode:
3271 case E_V4SFmode:
3272 case E_V2DFmode:
3273 if (!type || !AGGREGATE_TYPE_P (type))
3274 {
3275 if (cum->sse_nregs)
3276 return gen_reg_or_parallel (mode, orig_mode,
3277 regno: cum->sse_regno + FIRST_SSE_REG);
3278 }
3279 break;
3280
3281 case E_OImode:
3282 case E_XImode:
3283 /* OImode and XImode shouldn't be used directly. */
3284 gcc_unreachable ();
3285
3286 case E_V64QImode:
3287 case E_V32HImode:
3288 case E_V16SImode:
3289 case E_V8DImode:
3290 case E_V32HFmode:
3291 case E_V32BFmode:
3292 case E_V16SFmode:
3293 case E_V8DFmode:
3294 case E_V16HFmode:
3295 case E_V16BFmode:
3296 case E_V8SFmode:
3297 case E_V8SImode:
3298 case E_V32QImode:
3299 case E_V16HImode:
3300 case E_V4DFmode:
3301 case E_V4DImode:
3302 if (!type || !AGGREGATE_TYPE_P (type))
3303 {
3304 if (cum->sse_nregs)
3305 return gen_reg_or_parallel (mode, orig_mode,
3306 regno: cum->sse_regno + FIRST_SSE_REG);
3307 }
3308 break;
3309
3310 case E_V8QImode:
3311 case E_V4HImode:
3312 case E_V4HFmode:
3313 case E_V4BFmode:
3314 case E_V2SImode:
3315 case E_V2SFmode:
3316 case E_V1TImode:
3317 case E_V1DImode:
3318 if (!type || !AGGREGATE_TYPE_P (type))
3319 {
3320 if (cum->mmx_nregs)
3321 return gen_reg_or_parallel (mode, orig_mode,
3322 regno: cum->mmx_regno + FIRST_MMX_REG);
3323 }
3324 break;
3325 }
3326 if (error_p)
3327 {
3328 cum->float_in_sse = 0;
3329 error ("calling %qD with SSE calling convention without "
3330 "SSE/SSE2 enabled", cum->decl);
3331 sorry ("this is a GCC bug that can be worked around by adding "
3332 "attribute used to function called");
3333 }
3334
3335 return NULL_RTX;
3336}
3337
3338static rtx
3339function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3340 machine_mode orig_mode, const_tree type, bool named)
3341{
3342 /* Handle a hidden AL argument containing number of registers
3343 for varargs x86-64 functions. */
3344 if (mode == VOIDmode)
3345 return GEN_INT (cum->maybe_vaarg
3346 ? (cum->sse_nregs < 0
3347 ? X86_64_SSE_REGPARM_MAX
3348 : cum->sse_regno)
3349 : -1);
3350
3351 switch (mode)
3352 {
3353 default:
3354 break;
3355
3356 case E_V16HFmode:
3357 case E_V16BFmode:
3358 case E_V8SFmode:
3359 case E_V8SImode:
3360 case E_V32QImode:
3361 case E_V16HImode:
3362 case E_V4DFmode:
3363 case E_V4DImode:
3364 case E_V32HFmode:
3365 case E_V32BFmode:
3366 case E_V16SFmode:
3367 case E_V16SImode:
3368 case E_V64QImode:
3369 case E_V32HImode:
3370 case E_V8DFmode:
3371 case E_V8DImode:
3372 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3373 if (!named)
3374 return NULL;
3375 break;
3376 }
3377
3378 return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs,
3379 nsseregs: cum->sse_nregs,
3380 intreg: &x86_64_int_parameter_registers [cum->regno],
3381 sse_regno: cum->sse_regno);
3382}
3383
3384static rtx
3385function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3386 machine_mode orig_mode, bool named, const_tree type,
3387 HOST_WIDE_INT bytes)
3388{
3389 unsigned int regno;
3390
3391 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3392 We use value of -2 to specify that current function call is MSABI. */
3393 if (mode == VOIDmode)
3394 return GEN_INT (-2);
3395
3396 /* If we've run out of registers, it goes on the stack. */
3397 if (cum->nregs == 0)
3398 return NULL_RTX;
3399
3400 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3401
3402 /* Only floating point modes are passed in anything but integer regs. */
3403 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3404 {
3405 if (named)
3406 {
3407 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3408 regno = cum->regno + FIRST_SSE_REG;
3409 }
3410 else
3411 {
3412 rtx t1, t2;
3413
3414 /* Unnamed floating parameters are passed in both the
3415 SSE and integer registers. */
3416 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3417 t2 = gen_rtx_REG (mode, regno);
3418 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3419 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3420 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3421 }
3422 }
3423 /* Handle aggregated types passed in register. */
3424 if (orig_mode == BLKmode)
3425 {
3426 if (bytes > 0 && bytes <= 8)
3427 mode = (bytes > 4 ? DImode : SImode);
3428 if (mode == BLKmode)
3429 mode = DImode;
3430 }
3431
3432 return gen_reg_or_parallel (mode, orig_mode, regno);
3433}
3434
3435/* Return where to put the arguments to a function.
3436 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3437
3438 ARG describes the argument while CUM gives information about the
3439 preceding args and about the function being called. */
3440
3441static rtx
3442ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3443{
3444 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3445 machine_mode mode = arg.mode;
3446 HOST_WIDE_INT bytes, words;
3447 rtx reg;
3448
3449 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3450 {
3451 gcc_assert (arg.type != NULL_TREE);
3452 if (POINTER_TYPE_P (arg.type))
3453 {
3454 /* This is the pointer argument. */
3455 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3456 /* It is at -WORD(AP) in the current frame in interrupt and
3457 exception handlers. */
3458 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3459 }
3460 else
3461 {
3462 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3463 && TREE_CODE (arg.type) == INTEGER_TYPE
3464 && TYPE_MODE (arg.type) == word_mode);
3465 /* The error code is the word-mode integer argument at
3466 -2 * WORD(AP) in the current frame of the exception
3467 handler. */
3468 reg = gen_rtx_MEM (word_mode,
3469 plus_constant (Pmode,
3470 arg_pointer_rtx,
3471 -2 * UNITS_PER_WORD));
3472 }
3473 return reg;
3474 }
3475
3476 bytes = arg.promoted_size_in_bytes ();
3477 words = CEIL (bytes, UNITS_PER_WORD);
3478
3479 /* To simplify the code below, represent vector types with a vector mode
3480 even if MMX/SSE are not active. */
3481 if (arg.type && VECTOR_TYPE_P (arg.type))
3482 mode = type_natural_mode (type: arg.type, cum, in_return: false);
3483
3484 if (TARGET_64BIT)
3485 {
3486 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3487
3488 if (call_abi == MS_ABI)
3489 reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named,
3490 type: arg.type, bytes);
3491 else
3492 reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named);
3493 }
3494 else
3495 reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words);
3496
3497 /* Track if there are outgoing arguments on stack. */
3498 if (reg == NULL_RTX && cum->caller)
3499 cfun->machine->outgoing_args_on_stack = true;
3500
3501 return reg;
3502}
3503
3504/* A C expression that indicates when an argument must be passed by
3505 reference. If nonzero for an argument, a copy of that argument is
3506 made in memory and a pointer to the argument is passed instead of
3507 the argument itself. The pointer is passed in whatever way is
3508 appropriate for passing a pointer to that type. */
3509
3510static bool
3511ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3512{
3513 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3514
3515 if (TARGET_64BIT)
3516 {
3517 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3518
3519 /* See Windows x64 Software Convention. */
3520 if (call_abi == MS_ABI)
3521 {
3522 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3523
3524 if (tree type = arg.type)
3525 {
3526 /* Arrays are passed by reference. */
3527 if (TREE_CODE (type) == ARRAY_TYPE)
3528 return true;
3529
3530 if (RECORD_OR_UNION_TYPE_P (type))
3531 {
3532 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3533 are passed by reference. */
3534 msize = int_size_in_bytes (type);
3535 }
3536 }
3537
3538 /* __m128 is passed by reference. */
3539 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3540 }
3541 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3542 return true;
3543 }
3544
3545 return false;
3546}
3547
3548/* Return true when TYPE should be 128bit aligned for 32bit argument
3549 passing ABI. XXX: This function is obsolete and is only used for
3550 checking psABI compatibility with previous versions of GCC. */
3551
3552static bool
3553ix86_compat_aligned_value_p (const_tree type)
3554{
3555 machine_mode mode = TYPE_MODE (type);
3556 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3557 || mode == TDmode
3558 || mode == TFmode
3559 || mode == TCmode)
3560 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3561 return true;
3562 if (TYPE_ALIGN (type) < 128)
3563 return false;
3564
3565 if (AGGREGATE_TYPE_P (type))
3566 {
3567 /* Walk the aggregates recursively. */
3568 switch (TREE_CODE (type))
3569 {
3570 case RECORD_TYPE:
3571 case UNION_TYPE:
3572 case QUAL_UNION_TYPE:
3573 {
3574 tree field;
3575
3576 /* Walk all the structure fields. */
3577 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3578 {
3579 if (TREE_CODE (field) == FIELD_DECL
3580 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3581 return true;
3582 }
3583 break;
3584 }
3585
3586 case ARRAY_TYPE:
3587 /* Just for use if some languages passes arrays by value. */
3588 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3589 return true;
3590 break;
3591
3592 default:
3593 gcc_unreachable ();
3594 }
3595 }
3596 return false;
3597}
3598
3599/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3600 XXX: This function is obsolete and is only used for checking psABI
3601 compatibility with previous versions of GCC. */
3602
3603static unsigned int
3604ix86_compat_function_arg_boundary (machine_mode mode,
3605 const_tree type, unsigned int align)
3606{
3607 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3608 natural boundaries. */
3609 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3610 {
3611 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3612 make an exception for SSE modes since these require 128bit
3613 alignment.
3614
3615 The handling here differs from field_alignment. ICC aligns MMX
3616 arguments to 4 byte boundaries, while structure fields are aligned
3617 to 8 byte boundaries. */
3618 if (!type)
3619 {
3620 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3621 align = PARM_BOUNDARY;
3622 }
3623 else
3624 {
3625 if (!ix86_compat_aligned_value_p (type))
3626 align = PARM_BOUNDARY;
3627 }
3628 }
3629 if (align > BIGGEST_ALIGNMENT)
3630 align = BIGGEST_ALIGNMENT;
3631 return align;
3632}
3633
3634/* Return true when TYPE should be 128bit aligned for 32bit argument
3635 passing ABI. */
3636
3637static bool
3638ix86_contains_aligned_value_p (const_tree type)
3639{
3640 machine_mode mode = TYPE_MODE (type);
3641
3642 if (mode == XFmode || mode == XCmode)
3643 return false;
3644
3645 if (TYPE_ALIGN (type) < 128)
3646 return false;
3647
3648 if (AGGREGATE_TYPE_P (type))
3649 {
3650 /* Walk the aggregates recursively. */
3651 switch (TREE_CODE (type))
3652 {
3653 case RECORD_TYPE:
3654 case UNION_TYPE:
3655 case QUAL_UNION_TYPE:
3656 {
3657 tree field;
3658
3659 /* Walk all the structure fields. */
3660 for (field = TYPE_FIELDS (type);
3661 field;
3662 field = DECL_CHAIN (field))
3663 {
3664 if (TREE_CODE (field) == FIELD_DECL
3665 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3666 return true;
3667 }
3668 break;
3669 }
3670
3671 case ARRAY_TYPE:
3672 /* Just for use if some languages passes arrays by value. */
3673 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3674 return true;
3675 break;
3676
3677 default:
3678 gcc_unreachable ();
3679 }
3680 }
3681 else
3682 return TYPE_ALIGN (type) >= 128;
3683
3684 return false;
3685}
3686
3687/* Gives the alignment boundary, in bits, of an argument with the
3688 specified mode and type. */
3689
3690static unsigned int
3691ix86_function_arg_boundary (machine_mode mode, const_tree type)
3692{
3693 unsigned int align;
3694 if (type)
3695 {
3696 /* Since the main variant type is used for call, we convert it to
3697 the main variant type. */
3698 type = TYPE_MAIN_VARIANT (type);
3699 align = TYPE_ALIGN (type);
3700 if (TYPE_EMPTY_P (type))
3701 return PARM_BOUNDARY;
3702 }
3703 else
3704 align = GET_MODE_ALIGNMENT (mode);
3705 if (align < PARM_BOUNDARY)
3706 align = PARM_BOUNDARY;
3707 else
3708 {
3709 static bool warned;
3710 unsigned int saved_align = align;
3711
3712 if (!TARGET_64BIT)
3713 {
3714 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3715 if (!type)
3716 {
3717 if (mode == XFmode || mode == XCmode)
3718 align = PARM_BOUNDARY;
3719 }
3720 else if (!ix86_contains_aligned_value_p (type))
3721 align = PARM_BOUNDARY;
3722
3723 if (align < 128)
3724 align = PARM_BOUNDARY;
3725 }
3726
3727 if (warn_psabi
3728 && !warned
3729 && align != ix86_compat_function_arg_boundary (mode, type,
3730 align: saved_align))
3731 {
3732 warned = true;
3733 inform (input_location,
3734 "the ABI for passing parameters with %d-byte"
3735 " alignment has changed in GCC 4.6",
3736 align / BITS_PER_UNIT);
3737 }
3738 }
3739
3740 return align;
3741}
3742
3743/* Return true if N is a possible register number of function value. */
3744
3745static bool
3746ix86_function_value_regno_p (const unsigned int regno)
3747{
3748 switch (regno)
3749 {
3750 case AX_REG:
3751 return true;
3752 case DX_REG:
3753 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3754 case DI_REG:
3755 case SI_REG:
3756 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3757
3758 /* Complex values are returned in %st(0)/%st(1) pair. */
3759 case ST0_REG:
3760 case ST1_REG:
3761 /* TODO: The function should depend on current function ABI but
3762 builtins.cc would need updating then. Therefore we use the
3763 default ABI. */
3764 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3765 return false;
3766 return TARGET_FLOAT_RETURNS_IN_80387;
3767
3768 /* Complex values are returned in %xmm0/%xmm1 pair. */
3769 case XMM0_REG:
3770 case XMM1_REG:
3771 return TARGET_SSE;
3772
3773 case MM0_REG:
3774 if (TARGET_MACHO || TARGET_64BIT)
3775 return false;
3776 return TARGET_MMX;
3777 }
3778
3779 return false;
3780}
3781
3782/* Check whether the register REGNO should be zeroed on X86.
3783 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3784 together, no need to zero it again.
3785 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3786
3787static bool
3788zero_call_used_regno_p (const unsigned int regno,
3789 bool all_sse_zeroed,
3790 bool need_zero_mmx)
3791{
3792 return GENERAL_REGNO_P (regno)
3793 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3794 || MASK_REGNO_P (regno)
3795 || (need_zero_mmx && MMX_REGNO_P (regno));
3796}
3797
3798/* Return the machine_mode that is used to zero register REGNO. */
3799
3800static machine_mode
3801zero_call_used_regno_mode (const unsigned int regno)
3802{
3803 /* NB: We only need to zero the lower 32 bits for integer registers
3804 and the lower 128 bits for vector registers since destination are
3805 zero-extended to the full register width. */
3806 if (GENERAL_REGNO_P (regno))
3807 return SImode;
3808 else if (SSE_REGNO_P (regno))
3809 return V4SFmode;
3810 else if (MASK_REGNO_P (regno))
3811 return HImode;
3812 else if (MMX_REGNO_P (regno))
3813 return V2SImode;
3814 else
3815 gcc_unreachable ();
3816}
3817
3818/* Generate a rtx to zero all vector registers together if possible,
3819 otherwise, return NULL. */
3820
3821static rtx
3822zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3823{
3824 if (!TARGET_AVX)
3825 return NULL;
3826
3827 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3828 if ((LEGACY_SSE_REGNO_P (regno)
3829 || (TARGET_64BIT
3830 && (REX_SSE_REGNO_P (regno)
3831 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3832 && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3833 return NULL;
3834
3835 return gen_avx_vzeroall ();
3836}
3837
3838/* Generate insns to zero all st registers together.
3839 Return true when zeroing instructions are generated.
3840 Assume the number of st registers that are zeroed is num_of_st,
3841 we will emit the following sequence to zero them together:
3842 fldz; \
3843 fldz; \
3844 ...
3845 fldz; \
3846 fstp %%st(0); \
3847 fstp %%st(0); \
3848 ...
3849 fstp %%st(0);
3850 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3851 mark stack slots empty.
3852
3853 How to compute the num_of_st:
3854 There is no direct mapping from stack registers to hard register
3855 numbers. If one stack register needs to be cleared, we don't know
3856 where in the stack the value remains. So, if any stack register
3857 needs to be cleared, the whole stack should be cleared. However,
3858 x87 stack registers that hold the return value should be excluded.
3859 x87 returns in the top (two for complex values) register, so
3860 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3861 return the value of num_of_st. */
3862
3863
3864static int
3865zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3866{
3867
3868 /* If the FPU is disabled, no need to zero all st registers. */
3869 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3870 return 0;
3871
3872 unsigned int num_of_st = 0;
3873 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3874 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3875 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3876 {
3877 num_of_st++;
3878 break;
3879 }
3880
3881 if (num_of_st == 0)
3882 return 0;
3883
3884 bool return_with_x87 = false;
3885 return_with_x87 = (crtl->return_rtx
3886 && (STACK_REG_P (crtl->return_rtx)));
3887
3888 bool complex_return = false;
3889 complex_return = (crtl->return_rtx
3890 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3891
3892 if (return_with_x87)
3893 if (complex_return)
3894 num_of_st = 6;
3895 else
3896 num_of_st = 7;
3897 else
3898 num_of_st = 8;
3899
3900 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3901 for (unsigned int i = 0; i < num_of_st; i++)
3902 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3903
3904 for (unsigned int i = 0; i < num_of_st; i++)
3905 {
3906 rtx insn;
3907 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3908 add_reg_note (insn, REG_DEAD, st_reg);
3909 }
3910 return num_of_st;
3911}
3912
3913
3914/* When the routine exit in MMX mode, if any ST register needs
3915 to be zeroed, we should clear all MMX registers except the
3916 RET_MMX_REGNO that holds the return value. */
3917static bool
3918zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3919 unsigned int ret_mmx_regno)
3920{
3921 bool need_zero_all_mm = false;
3922 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3923 if (STACK_REGNO_P (regno)
3924 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3925 {
3926 need_zero_all_mm = true;
3927 break;
3928 }
3929
3930 if (!need_zero_all_mm)
3931 return false;
3932
3933 machine_mode mode = V2SImode;
3934 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3935 if (regno != ret_mmx_regno)
3936 {
3937 rtx reg = gen_rtx_REG (mode, regno);
3938 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3939 }
3940 return true;
3941}
3942
3943/* TARGET_ZERO_CALL_USED_REGS. */
3944/* Generate a sequence of instructions that zero registers specified by
3945 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3946 zeroed. */
3947static HARD_REG_SET
3948ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3949{
3950 HARD_REG_SET zeroed_hardregs;
3951 bool all_sse_zeroed = false;
3952 int all_st_zeroed_num = 0;
3953 bool all_mm_zeroed = false;
3954
3955 CLEAR_HARD_REG_SET (set&: zeroed_hardregs);
3956
3957 /* first, let's see whether we can zero all vector registers together. */
3958 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3959 if (zero_all_vec_insn)
3960 {
3961 emit_insn (zero_all_vec_insn);
3962 all_sse_zeroed = true;
3963 }
3964
3965 /* mm/st registers are shared registers set, we should follow the following
3966 rules to clear them:
3967 MMX exit mode x87 exit mode
3968 -------------|----------------------|---------------
3969 uses x87 reg | clear all MMX | clear all x87
3970 uses MMX reg | clear individual MMX | clear all x87
3971 x87 + MMX | clear all MMX | clear all x87
3972
3973 first, we should decide which mode (MMX mode or x87 mode) the function
3974 exit with. */
3975
3976 bool exit_with_mmx_mode = (crtl->return_rtx
3977 && (MMX_REG_P (crtl->return_rtx)));
3978
3979 if (!exit_with_mmx_mode)
3980 /* x87 exit mode, we should zero all st registers together. */
3981 {
3982 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3983
3984 if (all_st_zeroed_num > 0)
3985 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
3986 /* x87 stack registers that hold the return value should be excluded.
3987 x87 returns in the top (two for complex values) register. */
3988 if (all_st_zeroed_num == 8
3989 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
3990 || (all_st_zeroed_num == 6
3991 && (regno == (REGNO (crtl->return_rtx) + 1)))))
3992 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
3993 }
3994 else
3995 /* MMX exit mode, check whether we can zero all mm registers. */
3996 {
3997 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3998 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3999 ret_mmx_regno: exit_mmx_regno);
4000 if (all_mm_zeroed)
4001 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4002 if (regno != exit_mmx_regno)
4003 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4004 }
4005
4006 /* Now, generate instructions to zero all the other registers. */
4007
4008 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4009 {
4010 if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
4011 continue;
4012 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4013 need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed))
4014 continue;
4015
4016 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4017
4018 machine_mode mode = zero_call_used_regno_mode (regno);
4019
4020 rtx reg = gen_rtx_REG (mode, regno);
4021 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4022
4023 switch (mode)
4024 {
4025 case E_SImode:
4026 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4027 {
4028 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4029 gen_rtx_REG (CCmode,
4030 FLAGS_REG));
4031 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4032 tmp,
4033 clob));
4034 }
4035 /* FALLTHRU. */
4036
4037 case E_V4SFmode:
4038 case E_HImode:
4039 case E_V2SImode:
4040 emit_insn (tmp);
4041 break;
4042
4043 default:
4044 gcc_unreachable ();
4045 }
4046 }
4047 return zeroed_hardregs;
4048}
4049
4050/* Define how to find the value returned by a function.
4051 VALTYPE is the data type of the value (as a tree).
4052 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4053 otherwise, FUNC is 0. */
4054
4055static rtx
4056function_value_32 (machine_mode orig_mode, machine_mode mode,
4057 const_tree fntype, const_tree fn)
4058{
4059 unsigned int regno;
4060
4061 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4062 we normally prevent this case when mmx is not available. However
4063 some ABIs may require the result to be returned like DImode. */
4064 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4065 regno = FIRST_MMX_REG;
4066
4067 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4068 we prevent this case when sse is not available. However some ABIs
4069 may require the result to be returned like integer TImode. */
4070 else if (mode == TImode
4071 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4072 regno = FIRST_SSE_REG;
4073
4074 /* 32-byte vector modes in %ymm0. */
4075 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4076 regno = FIRST_SSE_REG;
4077
4078 /* 64-byte vector modes in %zmm0. */
4079 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4080 regno = FIRST_SSE_REG;
4081
4082 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4083 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4084 regno = FIRST_FLOAT_REG;
4085 else
4086 /* Most things go in %eax. */
4087 regno = AX_REG;
4088
4089 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4090 if (mode == HFmode || mode == BFmode)
4091 {
4092 if (!TARGET_SSE2)
4093 {
4094 error ("SSE register return with SSE2 disabled");
4095 regno = AX_REG;
4096 }
4097 else
4098 regno = FIRST_SSE_REG;
4099 }
4100
4101 if (mode == HCmode)
4102 {
4103 if (!TARGET_SSE2)
4104 error ("SSE register return with SSE2 disabled");
4105
4106 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4107 XVECEXP (ret, 0, 0)
4108 = gen_rtx_EXPR_LIST (VOIDmode,
4109 gen_rtx_REG (SImode,
4110 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4111 GEN_INT (0));
4112 return ret;
4113 }
4114
4115 /* Override FP return register with %xmm0 for local functions when
4116 SSE math is enabled or for functions with sseregparm attribute. */
4117 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4118 {
4119 int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false);
4120 if (sse_level == -1)
4121 {
4122 error ("calling %qD with SSE calling convention without "
4123 "SSE/SSE2 enabled", fn);
4124 sorry ("this is a GCC bug that can be worked around by adding "
4125 "attribute used to function called");
4126 }
4127 else if ((sse_level >= 1 && mode == SFmode)
4128 || (sse_level == 2 && mode == DFmode))
4129 regno = FIRST_SSE_REG;
4130 }
4131
4132 /* OImode shouldn't be used directly. */
4133 gcc_assert (mode != OImode);
4134
4135 return gen_rtx_REG (orig_mode, regno);
4136}
4137
4138static rtx
4139function_value_64 (machine_mode orig_mode, machine_mode mode,
4140 const_tree valtype)
4141{
4142 rtx ret;
4143
4144 /* Handle libcalls, which don't provide a type node. */
4145 if (valtype == NULL)
4146 {
4147 unsigned int regno;
4148
4149 switch (mode)
4150 {
4151 case E_BFmode:
4152 case E_HFmode:
4153 case E_HCmode:
4154 case E_SFmode:
4155 case E_SCmode:
4156 case E_DFmode:
4157 case E_DCmode:
4158 case E_TFmode:
4159 case E_SDmode:
4160 case E_DDmode:
4161 case E_TDmode:
4162 regno = FIRST_SSE_REG;
4163 break;
4164 case E_XFmode:
4165 case E_XCmode:
4166 regno = FIRST_FLOAT_REG;
4167 break;
4168 case E_TCmode:
4169 return NULL;
4170 default:
4171 regno = AX_REG;
4172 }
4173
4174 return gen_rtx_REG (mode, regno);
4175 }
4176 else if (POINTER_TYPE_P (valtype))
4177 {
4178 /* Pointers are always returned in word_mode. */
4179 mode = word_mode;
4180 }
4181
4182 ret = construct_container (mode, orig_mode, type: valtype, in_return: 1,
4183 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4184 intreg: x86_64_int_return_registers, sse_regno: 0);
4185
4186 /* For zero sized structures, construct_container returns NULL, but we
4187 need to keep rest of compiler happy by returning meaningful value. */
4188 if (!ret)
4189 ret = gen_rtx_REG (orig_mode, AX_REG);
4190
4191 return ret;
4192}
4193
4194static rtx
4195function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4196 const_tree fntype, const_tree fn, const_tree valtype)
4197{
4198 unsigned int regno;
4199
4200 /* Floating point return values in %st(0)
4201 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4202 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4203 && (GET_MODE_SIZE (mode) > 8
4204 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4205 {
4206 regno = FIRST_FLOAT_REG;
4207 return gen_rtx_REG (orig_mode, regno);
4208 }
4209 else
4210 return function_value_32(orig_mode, mode, fntype,fn);
4211}
4212
4213static rtx
4214function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4215 const_tree valtype)
4216{
4217 unsigned int regno = AX_REG;
4218
4219 if (TARGET_SSE)
4220 {
4221 switch (GET_MODE_SIZE (mode))
4222 {
4223 case 16:
4224 if (valtype != NULL_TREE
4225 && !VECTOR_INTEGER_TYPE_P (valtype)
4226 && !VECTOR_INTEGER_TYPE_P (valtype)
4227 && !INTEGRAL_TYPE_P (valtype)
4228 && !VECTOR_FLOAT_TYPE_P (valtype))
4229 break;
4230 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4231 && !COMPLEX_MODE_P (mode))
4232 regno = FIRST_SSE_REG;
4233 break;
4234 case 8:
4235 case 4:
4236 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4237 break;
4238 if (mode == SFmode || mode == DFmode)
4239 regno = FIRST_SSE_REG;
4240 break;
4241 default:
4242 break;
4243 }
4244 }
4245 return gen_rtx_REG (orig_mode, regno);
4246}
4247
4248static rtx
4249ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4250 machine_mode orig_mode, machine_mode mode)
4251{
4252 const_tree fn, fntype;
4253
4254 fn = NULL_TREE;
4255 if (fntype_or_decl && DECL_P (fntype_or_decl))
4256 fn = fntype_or_decl;
4257 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4258
4259 if (ix86_function_type_abi (fntype) == MS_ABI)
4260 {
4261 if (TARGET_64BIT)
4262 return function_value_ms_64 (orig_mode, mode, valtype);
4263 else
4264 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4265 }
4266 else if (TARGET_64BIT)
4267 return function_value_64 (orig_mode, mode, valtype);
4268 else
4269 return function_value_32 (orig_mode, mode, fntype, fn);
4270}
4271
4272static rtx
4273ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4274{
4275 machine_mode mode, orig_mode;
4276
4277 orig_mode = TYPE_MODE (valtype);
4278 mode = type_natural_mode (type: valtype, NULL, in_return: true);
4279 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4280}
4281
4282/* Pointer function arguments and return values are promoted to
4283 word_mode for normal functions. */
4284
4285static machine_mode
4286ix86_promote_function_mode (const_tree type, machine_mode mode,
4287 int *punsignedp, const_tree fntype,
4288 int for_return)
4289{
4290 if (cfun->machine->func_type == TYPE_NORMAL
4291 && type != NULL_TREE
4292 && POINTER_TYPE_P (type))
4293 {
4294 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4295 return word_mode;
4296 }
4297 return default_promote_function_mode (type, mode, punsignedp, fntype,
4298 for_return);
4299}
4300
4301/* Return true if a structure, union or array with MODE containing FIELD
4302 should be accessed using BLKmode. */
4303
4304static bool
4305ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4306{
4307 /* Union with XFmode must be in BLKmode. */
4308 return (mode == XFmode
4309 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4310 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4311}
4312
4313rtx
4314ix86_libcall_value (machine_mode mode)
4315{
4316 return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode);
4317}
4318
4319/* Return true iff type is returned in memory. */
4320
4321static bool
4322ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4323{
4324 const machine_mode mode = type_natural_mode (type, NULL, in_return: true);
4325 HOST_WIDE_INT size;
4326
4327 if (TARGET_64BIT)
4328 {
4329 if (ix86_function_type_abi (fntype) == MS_ABI)
4330 {
4331 size = int_size_in_bytes (type);
4332
4333 /* __m128 is returned in xmm0. */
4334 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4335 || INTEGRAL_TYPE_P (type)
4336 || VECTOR_FLOAT_TYPE_P (type))
4337 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4338 && !COMPLEX_MODE_P (mode)
4339 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4340 return false;
4341
4342 /* Otherwise, the size must be exactly in [1248]. */
4343 return size != 1 && size != 2 && size != 4 && size != 8;
4344 }
4345 else
4346 {
4347 int needed_intregs, needed_sseregs;
4348
4349 return examine_argument (mode, type, in_return: 1,
4350 int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4351 }
4352 }
4353 else
4354 {
4355 size = int_size_in_bytes (type);
4356
4357 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4358 bytes in registers. */
4359 if (TARGET_IAMCU)
4360 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4361
4362 if (mode == BLKmode)
4363 return true;
4364
4365 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4366 return false;
4367
4368 if (VECTOR_MODE_P (mode) || mode == TImode)
4369 {
4370 /* User-created vectors small enough to fit in EAX. */
4371 if (size < 8)
4372 return false;
4373
4374 /* Unless ABI prescibes otherwise,
4375 MMX/3dNow values are returned in MM0 if available. */
4376
4377 if (size == 8)
4378 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4379
4380 /* SSE values are returned in XMM0 if available. */
4381 if (size == 16)
4382 return !TARGET_SSE;
4383
4384 /* AVX values are returned in YMM0 if available. */
4385 if (size == 32)
4386 return !TARGET_AVX;
4387
4388 /* AVX512F values are returned in ZMM0 if available. */
4389 if (size == 64)
4390 return !TARGET_AVX512F || !TARGET_EVEX512;
4391 }
4392
4393 if (mode == XFmode)
4394 return false;
4395
4396 if (size > 12)
4397 return true;
4398
4399 /* OImode shouldn't be used directly. */
4400 gcc_assert (mode != OImode);
4401
4402 return false;
4403 }
4404}
4405
4406/* Implement TARGET_PUSH_ARGUMENT. */
4407
4408static bool
4409ix86_push_argument (unsigned int npush)
4410{
4411 /* If SSE2 is available, use vector move to put large argument onto
4412 stack. NB: In 32-bit mode, use 8-byte vector move. */
4413 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4414 && TARGET_PUSH_ARGS
4415 && !ACCUMULATE_OUTGOING_ARGS);
4416}
4417
4418
4419/* Create the va_list data type. */
4420
4421static tree
4422ix86_build_builtin_va_list_64 (void)
4423{
4424 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4425
4426 record = lang_hooks.types.make_type (RECORD_TYPE);
4427 type_decl = build_decl (BUILTINS_LOCATION,
4428 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4429
4430 f_gpr = build_decl (BUILTINS_LOCATION,
4431 FIELD_DECL, get_identifier ("gp_offset"),
4432 unsigned_type_node);
4433 f_fpr = build_decl (BUILTINS_LOCATION,
4434 FIELD_DECL, get_identifier ("fp_offset"),
4435 unsigned_type_node);
4436 f_ovf = build_decl (BUILTINS_LOCATION,
4437 FIELD_DECL, get_identifier ("overflow_arg_area"),
4438 ptr_type_node);
4439 f_sav = build_decl (BUILTINS_LOCATION,
4440 FIELD_DECL, get_identifier ("reg_save_area"),
4441 ptr_type_node);
4442
4443 va_list_gpr_counter_field = f_gpr;
4444 va_list_fpr_counter_field = f_fpr;
4445
4446 DECL_FIELD_CONTEXT (f_gpr) = record;
4447 DECL_FIELD_CONTEXT (f_fpr) = record;
4448 DECL_FIELD_CONTEXT (f_ovf) = record;
4449 DECL_FIELD_CONTEXT (f_sav) = record;
4450
4451 TYPE_STUB_DECL (record) = type_decl;
4452 TYPE_NAME (record) = type_decl;
4453 TYPE_FIELDS (record) = f_gpr;
4454 DECL_CHAIN (f_gpr) = f_fpr;
4455 DECL_CHAIN (f_fpr) = f_ovf;
4456 DECL_CHAIN (f_ovf) = f_sav;
4457
4458 layout_type (record);
4459
4460 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4461 NULL_TREE, TYPE_ATTRIBUTES (record));
4462
4463 /* The correct type is an array type of one element. */
4464 return build_array_type (record, build_index_type (size_zero_node));
4465}
4466
4467/* Setup the builtin va_list data type and for 64-bit the additional
4468 calling convention specific va_list data types. */
4469
4470static tree
4471ix86_build_builtin_va_list (void)
4472{
4473 if (TARGET_64BIT)
4474 {
4475 /* Initialize ABI specific va_list builtin types.
4476
4477 In lto1, we can encounter two va_list types:
4478 - one as a result of the type-merge across TUs, and
4479 - the one constructed here.
4480 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4481 a type identity check in canonical_va_list_type based on
4482 TYPE_MAIN_VARIANT (which we used to have) will not work.
4483 Instead, we tag each va_list_type_node with its unique attribute, and
4484 look for the attribute in the type identity check in
4485 canonical_va_list_type.
4486
4487 Tagging sysv_va_list_type_node directly with the attribute is
4488 problematic since it's a array of one record, which will degrade into a
4489 pointer to record when used as parameter (see build_va_arg comments for
4490 an example), dropping the attribute in the process. So we tag the
4491 record instead. */
4492
4493 /* For SYSV_ABI we use an array of one record. */
4494 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4495
4496 /* For MS_ABI we use plain pointer to argument area. */
4497 tree char_ptr_type = build_pointer_type (char_type_node);
4498 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4499 TYPE_ATTRIBUTES (char_ptr_type));
4500 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4501
4502 return ((ix86_abi == MS_ABI)
4503 ? ms_va_list_type_node
4504 : sysv_va_list_type_node);
4505 }
4506 else
4507 {
4508 /* For i386 we use plain pointer to argument area. */
4509 return build_pointer_type (char_type_node);
4510 }
4511}
4512
4513/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4514
4515static void
4516setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4517{
4518 rtx save_area, mem;
4519 alias_set_type set;
4520 int i, max;
4521
4522 /* GPR size of varargs save area. */
4523 if (cfun->va_list_gpr_size)
4524 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4525 else
4526 ix86_varargs_gpr_size = 0;
4527
4528 /* FPR size of varargs save area. We don't need it if we don't pass
4529 anything in SSE registers. */
4530 if (TARGET_SSE && cfun->va_list_fpr_size)
4531 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4532 else
4533 ix86_varargs_fpr_size = 0;
4534
4535 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4536 return;
4537
4538 save_area = frame_pointer_rtx;
4539 set = get_varargs_alias_set ();
4540
4541 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4542 if (max > X86_64_REGPARM_MAX)
4543 max = X86_64_REGPARM_MAX;
4544
4545 for (i = cum->regno; i < max; i++)
4546 {
4547 mem = gen_rtx_MEM (word_mode,
4548 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4549 MEM_NOTRAP_P (mem) = 1;
4550 set_mem_alias_set (mem, set);
4551 emit_move_insn (mem,
4552 gen_rtx_REG (word_mode,
4553 x86_64_int_parameter_registers[i]));
4554 }
4555
4556 if (ix86_varargs_fpr_size)
4557 {
4558 machine_mode smode;
4559 rtx_code_label *label;
4560 rtx test;
4561
4562 /* Now emit code to save SSE registers. The AX parameter contains number
4563 of SSE parameter registers used to call this function, though all we
4564 actually check here is the zero/non-zero status. */
4565
4566 label = gen_label_rtx ();
4567 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4568 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4569 label));
4570
4571 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4572 we used movdqa (i.e. TImode) instead? Perhaps even better would
4573 be if we could determine the real mode of the data, via a hook
4574 into pass_stdarg. Ignore all that for now. */
4575 smode = V4SFmode;
4576 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4577 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4578
4579 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4580 if (max > X86_64_SSE_REGPARM_MAX)
4581 max = X86_64_SSE_REGPARM_MAX;
4582
4583 for (i = cum->sse_regno; i < max; ++i)
4584 {
4585 mem = plus_constant (Pmode, save_area,
4586 i * 16 + ix86_varargs_gpr_size);
4587 mem = gen_rtx_MEM (smode, mem);
4588 MEM_NOTRAP_P (mem) = 1;
4589 set_mem_alias_set (mem, set);
4590 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4591
4592 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4593 }
4594
4595 emit_label (label);
4596 }
4597}
4598
4599static void
4600setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4601{
4602 alias_set_type set = get_varargs_alias_set ();
4603 int i;
4604
4605 /* Reset to zero, as there might be a sysv vaarg used
4606 before. */
4607 ix86_varargs_gpr_size = 0;
4608 ix86_varargs_fpr_size = 0;
4609
4610 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4611 {
4612 rtx reg, mem;
4613
4614 mem = gen_rtx_MEM (Pmode,
4615 plus_constant (Pmode, virtual_incoming_args_rtx,
4616 i * UNITS_PER_WORD));
4617 MEM_NOTRAP_P (mem) = 1;
4618 set_mem_alias_set (mem, set);
4619
4620 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4621 emit_move_insn (mem, reg);
4622 }
4623}
4624
4625static void
4626ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4627 const function_arg_info &arg,
4628 int *, int no_rtl)
4629{
4630 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
4631 CUMULATIVE_ARGS next_cum;
4632 tree fntype;
4633
4634 /* This argument doesn't appear to be used anymore. Which is good,
4635 because the old code here didn't suppress rtl generation. */
4636 gcc_assert (!no_rtl);
4637
4638 if (!TARGET_64BIT)
4639 return;
4640
4641 fntype = TREE_TYPE (current_function_decl);
4642
4643 /* For varargs, we do not want to skip the dummy va_dcl argument.
4644 For stdargs, we do want to skip the last named argument. */
4645 next_cum = *cum;
4646 if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4647 || arg.type != NULL_TREE)
4648 && stdarg_p (fntype))
4649 ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg);
4650
4651 if (cum->call_abi == MS_ABI)
4652 setup_incoming_varargs_ms_64 (&next_cum);
4653 else
4654 setup_incoming_varargs_64 (&next_cum);
4655}
4656
4657/* Checks if TYPE is of kind va_list char *. */
4658
4659static bool
4660is_va_list_char_pointer (tree type)
4661{
4662 tree canonic;
4663
4664 /* For 32-bit it is always true. */
4665 if (!TARGET_64BIT)
4666 return true;
4667 canonic = ix86_canonical_va_list_type (type);
4668 return (canonic == ms_va_list_type_node
4669 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4670}
4671
4672/* Implement va_start. */
4673
4674static void
4675ix86_va_start (tree valist, rtx nextarg)
4676{
4677 HOST_WIDE_INT words, n_gpr, n_fpr;
4678 tree f_gpr, f_fpr, f_ovf, f_sav;
4679 tree gpr, fpr, ovf, sav, t;
4680 tree type;
4681 rtx ovf_rtx;
4682
4683 if (flag_split_stack
4684 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4685 {
4686 unsigned int scratch_regno;
4687
4688 /* When we are splitting the stack, we can't refer to the stack
4689 arguments using internal_arg_pointer, because they may be on
4690 the old stack. The split stack prologue will arrange to
4691 leave a pointer to the old stack arguments in a scratch
4692 register, which we here copy to a pseudo-register. The split
4693 stack prologue can't set the pseudo-register directly because
4694 it (the prologue) runs before any registers have been saved. */
4695
4696 scratch_regno = split_stack_prologue_scratch_regno ();
4697 if (scratch_regno != INVALID_REGNUM)
4698 {
4699 rtx reg;
4700 rtx_insn *seq;
4701
4702 reg = gen_reg_rtx (Pmode);
4703 cfun->machine->split_stack_varargs_pointer = reg;
4704
4705 start_sequence ();
4706 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4707 seq = get_insns ();
4708 end_sequence ();
4709
4710 push_topmost_sequence ();
4711 emit_insn_after (seq, entry_of_function ());
4712 pop_topmost_sequence ();
4713 }
4714 }
4715
4716 /* Only 64bit target needs something special. */
4717 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4718 {
4719 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4720 std_expand_builtin_va_start (valist, nextarg);
4721 else
4722 {
4723 rtx va_r, next;
4724
4725 va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE);
4726 next = expand_binop (ptr_mode, add_optab,
4727 cfun->machine->split_stack_varargs_pointer,
4728 crtl->args.arg_offset_rtx,
4729 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4730 convert_move (va_r, next, 0);
4731 }
4732 return;
4733 }
4734
4735 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4736 f_fpr = DECL_CHAIN (f_gpr);
4737 f_ovf = DECL_CHAIN (f_fpr);
4738 f_sav = DECL_CHAIN (f_ovf);
4739
4740 valist = build_simple_mem_ref (valist);
4741 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4742 /* The following should be folded into the MEM_REF offset. */
4743 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4744 f_gpr, NULL_TREE);
4745 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4746 f_fpr, NULL_TREE);
4747 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4748 f_ovf, NULL_TREE);
4749 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4750 f_sav, NULL_TREE);
4751
4752 /* Count number of gp and fp argument registers used. */
4753 words = crtl->args.info.words;
4754 n_gpr = crtl->args.info.regno;
4755 n_fpr = crtl->args.info.sse_regno;
4756
4757 if (cfun->va_list_gpr_size)
4758 {
4759 type = TREE_TYPE (gpr);
4760 t = build2 (MODIFY_EXPR, type,
4761 gpr, build_int_cst (type, n_gpr * 8));
4762 TREE_SIDE_EFFECTS (t) = 1;
4763 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4764 }
4765
4766 if (TARGET_SSE && cfun->va_list_fpr_size)
4767 {
4768 type = TREE_TYPE (fpr);
4769 t = build2 (MODIFY_EXPR, type, fpr,
4770 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4771 TREE_SIDE_EFFECTS (t) = 1;
4772 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4773 }
4774
4775 /* Find the overflow area. */
4776 type = TREE_TYPE (ovf);
4777 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4778 ovf_rtx = crtl->args.internal_arg_pointer;
4779 else
4780 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4781 t = make_tree (type, ovf_rtx);
4782 if (words != 0)
4783 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4784
4785 t = build2 (MODIFY_EXPR, type, ovf, t);
4786 TREE_SIDE_EFFECTS (t) = 1;
4787 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4788
4789 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4790 {
4791 /* Find the register save area.
4792 Prologue of the function save it right above stack frame. */
4793 type = TREE_TYPE (sav);
4794 t = make_tree (type, frame_pointer_rtx);
4795 if (!ix86_varargs_gpr_size)
4796 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4797
4798 t = build2 (MODIFY_EXPR, type, sav, t);
4799 TREE_SIDE_EFFECTS (t) = 1;
4800 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4801 }
4802}
4803
4804/* Implement va_arg. */
4805
4806static tree
4807ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4808 gimple_seq *post_p)
4809{
4810 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4811 tree f_gpr, f_fpr, f_ovf, f_sav;
4812 tree gpr, fpr, ovf, sav, t;
4813 int size, rsize;
4814 tree lab_false, lab_over = NULL_TREE;
4815 tree addr, t2;
4816 rtx container;
4817 int indirect_p = 0;
4818 tree ptrtype;
4819 machine_mode nat_mode;
4820 unsigned int arg_boundary;
4821 unsigned int type_align;
4822
4823 /* Only 64bit target needs something special. */
4824 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4825 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4826
4827 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4828 f_fpr = DECL_CHAIN (f_gpr);
4829 f_ovf = DECL_CHAIN (f_fpr);
4830 f_sav = DECL_CHAIN (f_ovf);
4831
4832 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4833 valist, f_gpr, NULL_TREE);
4834
4835 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4836 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4837 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4838
4839 indirect_p = pass_va_arg_by_reference (type);
4840 if (indirect_p)
4841 type = build_pointer_type (type);
4842 size = arg_int_size_in_bytes (type);
4843 rsize = CEIL (size, UNITS_PER_WORD);
4844
4845 nat_mode = type_natural_mode (type, NULL, in_return: false);
4846 switch (nat_mode)
4847 {
4848 case E_V16HFmode:
4849 case E_V16BFmode:
4850 case E_V8SFmode:
4851 case E_V8SImode:
4852 case E_V32QImode:
4853 case E_V16HImode:
4854 case E_V4DFmode:
4855 case E_V4DImode:
4856 case E_V32HFmode:
4857 case E_V32BFmode:
4858 case E_V16SFmode:
4859 case E_V16SImode:
4860 case E_V64QImode:
4861 case E_V32HImode:
4862 case E_V8DFmode:
4863 case E_V8DImode:
4864 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4865 if (!TARGET_64BIT_MS_ABI)
4866 {
4867 container = NULL;
4868 break;
4869 }
4870 /* FALLTHRU */
4871
4872 default:
4873 container = construct_container (mode: nat_mode, TYPE_MODE (type),
4874 type, in_return: 0, X86_64_REGPARM_MAX,
4875 X86_64_SSE_REGPARM_MAX, intreg,
4876 sse_regno: 0);
4877 break;
4878 }
4879
4880 /* Pull the value out of the saved registers. */
4881
4882 addr = create_tmp_var (ptr_type_node, "addr");
4883 type_align = TYPE_ALIGN (type);
4884
4885 if (container)
4886 {
4887 int needed_intregs, needed_sseregs;
4888 bool need_temp;
4889 tree int_addr, sse_addr;
4890
4891 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4892 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4893
4894 examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4895
4896 need_temp = (!REG_P (container)
4897 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4898 || TYPE_ALIGN (type) > 128));
4899
4900 /* In case we are passing structure, verify that it is consecutive block
4901 on the register save area. If not we need to do moves. */
4902 if (!need_temp && !REG_P (container))
4903 {
4904 /* Verify that all registers are strictly consecutive */
4905 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4906 {
4907 int i;
4908
4909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4910 {
4911 rtx slot = XVECEXP (container, 0, i);
4912 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4913 || INTVAL (XEXP (slot, 1)) != i * 16)
4914 need_temp = true;
4915 }
4916 }
4917 else
4918 {
4919 int i;
4920
4921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4922 {
4923 rtx slot = XVECEXP (container, 0, i);
4924 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4925 || INTVAL (XEXP (slot, 1)) != i * 8)
4926 need_temp = true;
4927 }
4928 }
4929 }
4930 if (!need_temp)
4931 {
4932 int_addr = addr;
4933 sse_addr = addr;
4934 }
4935 else
4936 {
4937 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4938 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4939 }
4940
4941 /* First ensure that we fit completely in registers. */
4942 if (needed_intregs)
4943 {
4944 t = build_int_cst (TREE_TYPE (gpr),
4945 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4946 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4947 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4948 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4949 gimplify_and_add (t, pre_p);
4950 }
4951 if (needed_sseregs)
4952 {
4953 t = build_int_cst (TREE_TYPE (fpr),
4954 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4955 + X86_64_REGPARM_MAX * 8);
4956 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4957 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4958 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4959 gimplify_and_add (t, pre_p);
4960 }
4961
4962 /* Compute index to start of area used for integer regs. */
4963 if (needed_intregs)
4964 {
4965 /* int_addr = gpr + sav; */
4966 t = fold_build_pointer_plus (sav, gpr);
4967 gimplify_assign (int_addr, t, pre_p);
4968 }
4969 if (needed_sseregs)
4970 {
4971 /* sse_addr = fpr + sav; */
4972 t = fold_build_pointer_plus (sav, fpr);
4973 gimplify_assign (sse_addr, t, pre_p);
4974 }
4975 if (need_temp)
4976 {
4977 int i, prev_size = 0;
4978 tree temp = create_tmp_var (type, "va_arg_tmp");
4979 TREE_ADDRESSABLE (temp) = 1;
4980
4981 /* addr = &temp; */
4982 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4983 gimplify_assign (addr, t, pre_p);
4984
4985 for (i = 0; i < XVECLEN (container, 0); i++)
4986 {
4987 rtx slot = XVECEXP (container, 0, i);
4988 rtx reg = XEXP (slot, 0);
4989 machine_mode mode = GET_MODE (reg);
4990 tree piece_type;
4991 tree addr_type;
4992 tree daddr_type;
4993 tree src_addr, src;
4994 int src_offset;
4995 tree dest_addr, dest;
4996 int cur_size = GET_MODE_SIZE (mode);
4997
4998 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4999 prev_size = INTVAL (XEXP (slot, 1));
5000 if (prev_size + cur_size > size)
5001 {
5002 cur_size = size - prev_size;
5003 unsigned int nbits = cur_size * BITS_PER_UNIT;
5004 if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode))
5005 mode = QImode;
5006 }
5007 piece_type = lang_hooks.types.type_for_mode (mode, 1);
5008 if (mode == GET_MODE (reg))
5009 addr_type = build_pointer_type (piece_type);
5010 else
5011 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5012 true);
5013 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5014 true);
5015
5016 if (SSE_REGNO_P (REGNO (reg)))
5017 {
5018 src_addr = sse_addr;
5019 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5020 }
5021 else
5022 {
5023 src_addr = int_addr;
5024 src_offset = REGNO (reg) * 8;
5025 }
5026 src_addr = fold_convert (addr_type, src_addr);
5027 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5028
5029 dest_addr = fold_convert (daddr_type, addr);
5030 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5031 if (cur_size == GET_MODE_SIZE (mode))
5032 {
5033 src = build_va_arg_indirect_ref (src_addr);
5034 dest = build_va_arg_indirect_ref (dest_addr);
5035
5036 gimplify_assign (dest, src, pre_p);
5037 }
5038 else
5039 {
5040 tree copy
5041 = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY),
5042 3, dest_addr, src_addr,
5043 size_int (cur_size));
5044 gimplify_and_add (copy, pre_p);
5045 }
5046 prev_size += cur_size;
5047 }
5048 }
5049
5050 if (needed_intregs)
5051 {
5052 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5053 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5054 gimplify_assign (gpr, t, pre_p);
5055 /* The GPR save area guarantees only 8-byte alignment. */
5056 if (!need_temp)
5057 type_align = MIN (type_align, 64);
5058 }
5059
5060 if (needed_sseregs)
5061 {
5062 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5063 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5064 gimplify_assign (unshare_expr (fpr), t, pre_p);
5065 }
5066
5067 gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over));
5068
5069 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false));
5070 }
5071
5072 /* ... otherwise out of the overflow area. */
5073
5074 /* When we align parameter on stack for caller, if the parameter
5075 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5076 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5077 here with caller. */
5078 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5079 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5080 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5081
5082 /* Care for on-stack alignment if needed. */
5083 if (arg_boundary <= 64 || size == 0)
5084 t = ovf;
5085 else
5086 {
5087 HOST_WIDE_INT align = arg_boundary / 8;
5088 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5089 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5090 build_int_cst (TREE_TYPE (t), -align));
5091 }
5092
5093 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5094 gimplify_assign (addr, t, pre_p);
5095
5096 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5097 gimplify_assign (unshare_expr (ovf), t, pre_p);
5098
5099 if (container)
5100 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over));
5101
5102 type = build_aligned_type (type, type_align);
5103 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5104 addr = fold_convert (ptrtype, addr);
5105
5106 if (indirect_p)
5107 addr = build_va_arg_indirect_ref (addr);
5108 return build_va_arg_indirect_ref (addr);
5109}
5110
5111/* Return true if OPNUM's MEM should be matched
5112 in movabs* patterns. */
5113
5114bool
5115ix86_check_movabs (rtx insn, int opnum)
5116{
5117 rtx set, mem;
5118
5119 set = PATTERN (insn);
5120 if (GET_CODE (set) == PARALLEL)
5121 set = XVECEXP (set, 0, 0);
5122 gcc_assert (GET_CODE (set) == SET);
5123 mem = XEXP (set, opnum);
5124 while (SUBREG_P (mem))
5125 mem = SUBREG_REG (mem);
5126 gcc_assert (MEM_P (mem));
5127 return volatile_ok || !MEM_VOLATILE_P (mem);
5128}
5129
5130/* Return false if INSN contains a MEM with a non-default address space. */
5131bool
5132ix86_check_no_addr_space (rtx insn)
5133{
5134 subrtx_var_iterator::array_type array;
5135 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5136 {
5137 rtx x = *iter;
5138 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5139 return false;
5140 }
5141 return true;
5142}
5143
5144/* Initialize the table of extra 80387 mathematical constants. */
5145
5146static void
5147init_ext_80387_constants (void)
5148{
5149 static const char * cst[5] =
5150 {
5151 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5152 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5153 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5154 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5155 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5156 };
5157 int i;
5158
5159 for (i = 0; i < 5; i++)
5160 {
5161 real_from_string (&ext_80387_constants_table[i], cst[i]);
5162 /* Ensure each constant is rounded to XFmode precision. */
5163 real_convert (&ext_80387_constants_table[i],
5164 XFmode, &ext_80387_constants_table[i]);
5165 }
5166
5167 ext_80387_constants_init = 1;
5168}
5169
5170/* Return non-zero if the constant is something that
5171 can be loaded with a special instruction. */
5172
5173int
5174standard_80387_constant_p (rtx x)
5175{
5176 machine_mode mode = GET_MODE (x);
5177
5178 const REAL_VALUE_TYPE *r;
5179
5180 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5181 return -1;
5182
5183 if (x == CONST0_RTX (mode))
5184 return 1;
5185 if (x == CONST1_RTX (mode))
5186 return 2;
5187
5188 r = CONST_DOUBLE_REAL_VALUE (x);
5189
5190 /* For XFmode constants, try to find a special 80387 instruction when
5191 optimizing for size or on those CPUs that benefit from them. */
5192 if (mode == XFmode
5193 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5194 && !flag_rounding_math)
5195 {
5196 int i;
5197
5198 if (! ext_80387_constants_init)
5199 init_ext_80387_constants ();
5200
5201 for (i = 0; i < 5; i++)
5202 if (real_identical (r, &ext_80387_constants_table[i]))
5203 return i + 3;
5204 }
5205
5206 /* Load of the constant -0.0 or -1.0 will be split as
5207 fldz;fchs or fld1;fchs sequence. */
5208 if (real_isnegzero (r))
5209 return 8;
5210 if (real_identical (r, &dconstm1))
5211 return 9;
5212
5213 return 0;
5214}
5215
5216/* Return the opcode of the special instruction to be used to load
5217 the constant X. */
5218
5219const char *
5220standard_80387_constant_opcode (rtx x)
5221{
5222 switch (standard_80387_constant_p (x))
5223 {
5224 case 1:
5225 return "fldz";
5226 case 2:
5227 return "fld1";
5228 case 3:
5229 return "fldlg2";
5230 case 4:
5231 return "fldln2";
5232 case 5:
5233 return "fldl2e";
5234 case 6:
5235 return "fldl2t";
5236 case 7:
5237 return "fldpi";
5238 case 8:
5239 case 9:
5240 return "#";
5241 default:
5242 gcc_unreachable ();
5243 }
5244}
5245
5246/* Return the CONST_DOUBLE representing the 80387 constant that is
5247 loaded by the specified special instruction. The argument IDX
5248 matches the return value from standard_80387_constant_p. */
5249
5250rtx
5251standard_80387_constant_rtx (int idx)
5252{
5253 int i;
5254
5255 if (! ext_80387_constants_init)
5256 init_ext_80387_constants ();
5257
5258 switch (idx)
5259 {
5260 case 3:
5261 case 4:
5262 case 5:
5263 case 6:
5264 case 7:
5265 i = idx - 3;
5266 break;
5267
5268 default:
5269 gcc_unreachable ();
5270 }
5271
5272 return const_double_from_real_value (ext_80387_constants_table[i],
5273 XFmode);
5274}
5275
5276/* Return 1 if X is all bits 0, 2 if X is all bits 1
5277 and 3 if X is all bits 1 with zero extend
5278 in supported SSE/AVX vector mode. */
5279
5280int
5281standard_sse_constant_p (rtx x, machine_mode pred_mode)
5282{
5283 machine_mode mode;
5284
5285 if (!TARGET_SSE)
5286 return 0;
5287
5288 mode = GET_MODE (x);
5289
5290 if (x == const0_rtx || const0_operand (x, mode))
5291 return 1;
5292
5293 if (x == constm1_rtx
5294 || vector_all_ones_operand (x, mode)
5295 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5296 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5297 && float_vector_all_ones_operand (x, mode)))
5298 {
5299 /* VOIDmode integer constant, get mode from the predicate. */
5300 if (mode == VOIDmode)
5301 mode = pred_mode;
5302
5303 switch (GET_MODE_SIZE (mode))
5304 {
5305 case 64:
5306 if (TARGET_AVX512F && TARGET_EVEX512)
5307 return 2;
5308 break;
5309 case 32:
5310 if (TARGET_AVX2)
5311 return 2;
5312 break;
5313 case 16:
5314 if (TARGET_SSE2)
5315 return 2;
5316 break;
5317 case 0:
5318 /* VOIDmode */
5319 gcc_unreachable ();
5320 default:
5321 break;
5322 }
5323 }
5324
5325 if (vector_all_ones_zero_extend_half_operand (x, mode)
5326 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5327 return 3;
5328
5329 return 0;
5330}
5331
5332/* Return the opcode of the special instruction to be used to load
5333 the constant operands[1] into operands[0]. */
5334
5335const char *
5336standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5337{
5338 machine_mode mode;
5339 rtx x = operands[1];
5340
5341 gcc_assert (TARGET_SSE);
5342
5343 mode = GET_MODE (x);
5344
5345 if (x == const0_rtx || const0_operand (x, mode))
5346 {
5347 switch (get_attr_mode (insn))
5348 {
5349 case MODE_TI:
5350 if (!EXT_REX_SSE_REG_P (operands[0]))
5351 return "%vpxor\t%0, %d0";
5352 /* FALLTHRU */
5353 case MODE_XI:
5354 case MODE_OI:
5355 if (EXT_REX_SSE_REG_P (operands[0]))
5356 {
5357 if (TARGET_AVX512VL)
5358 return "vpxord\t%x0, %x0, %x0";
5359 else if (TARGET_EVEX512)
5360 return "vpxord\t%g0, %g0, %g0";
5361 else
5362 gcc_unreachable ();
5363 }
5364 return "vpxor\t%x0, %x0, %x0";
5365
5366 case MODE_V2DF:
5367 if (!EXT_REX_SSE_REG_P (operands[0]))
5368 return "%vxorpd\t%0, %d0";
5369 /* FALLTHRU */
5370 case MODE_V8DF:
5371 case MODE_V4DF:
5372 if (EXT_REX_SSE_REG_P (operands[0]))
5373 {
5374 if (TARGET_AVX512DQ)
5375 {
5376 if (TARGET_AVX512VL)
5377 return "vxorpd\t%x0, %x0, %x0";
5378 else if (TARGET_EVEX512)
5379 return "vxorpd\t%g0, %g0, %g0";
5380 else
5381 gcc_unreachable ();
5382 }
5383 else
5384 {
5385 if (TARGET_AVX512VL)
5386 return "vpxorq\t%x0, %x0, %x0";
5387 else if (TARGET_EVEX512)
5388 return "vpxorq\t%g0, %g0, %g0";
5389 else
5390 gcc_unreachable ();
5391 }
5392 }
5393 return "vxorpd\t%x0, %x0, %x0";
5394
5395 case MODE_V4SF:
5396 if (!EXT_REX_SSE_REG_P (operands[0]))
5397 return "%vxorps\t%0, %d0";
5398 /* FALLTHRU */
5399 case MODE_V16SF:
5400 case MODE_V8SF:
5401 if (EXT_REX_SSE_REG_P (operands[0]))
5402 {
5403 if (TARGET_AVX512DQ)
5404 {
5405 if (TARGET_AVX512VL)
5406 return "vxorps\t%x0, %x0, %x0";
5407 else if (TARGET_EVEX512)
5408 return "vxorps\t%g0, %g0, %g0";
5409 else
5410 gcc_unreachable ();
5411 }
5412 else
5413 {
5414 if (TARGET_AVX512VL)
5415 return "vpxord\t%x0, %x0, %x0";
5416 else if (TARGET_EVEX512)
5417 return "vpxord\t%g0, %g0, %g0";
5418 else
5419 gcc_unreachable ();
5420 }
5421 }
5422 return "vxorps\t%x0, %x0, %x0";
5423
5424 default:
5425 gcc_unreachable ();
5426 }
5427 }
5428 else if (x == constm1_rtx
5429 || vector_all_ones_operand (x, mode)
5430 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5431 && float_vector_all_ones_operand (x, mode)))
5432 {
5433 enum attr_mode insn_mode = get_attr_mode (insn);
5434
5435 switch (insn_mode)
5436 {
5437 case MODE_XI:
5438 case MODE_V8DF:
5439 case MODE_V16SF:
5440 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5441 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5442
5443 case MODE_OI:
5444 case MODE_V4DF:
5445 case MODE_V8SF:
5446 gcc_assert (TARGET_AVX2);
5447 /* FALLTHRU */
5448 case MODE_TI:
5449 case MODE_V2DF:
5450 case MODE_V4SF:
5451 gcc_assert (TARGET_SSE2);
5452 if (EXT_REX_SSE_REG_P (operands[0]))
5453 {
5454 if (TARGET_AVX512VL)
5455 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5456 else if (TARGET_EVEX512)
5457 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5458 else
5459 gcc_unreachable ();
5460 }
5461 return (TARGET_AVX
5462 ? "vpcmpeqd\t%0, %0, %0"
5463 : "pcmpeqd\t%0, %0");
5464
5465 default:
5466 gcc_unreachable ();
5467 }
5468 }
5469 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5470 {
5471 if (GET_MODE_SIZE (mode) == 64)
5472 {
5473 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5474 return "vpcmpeqd\t%t0, %t0, %t0";
5475 }
5476 else if (GET_MODE_SIZE (mode) == 32)
5477 {
5478 gcc_assert (TARGET_AVX);
5479 return "vpcmpeqd\t%x0, %x0, %x0";
5480 }
5481 gcc_unreachable ();
5482 }
5483 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5484 {
5485 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5486 return "vpcmpeqd\t%x0, %x0, %x0";
5487 }
5488
5489 gcc_unreachable ();
5490}
5491
5492/* Returns true if INSN can be transformed from a memory load
5493 to a supported FP constant load. */
5494
5495bool
5496ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5497{
5498 rtx src = find_constant_src (insn);
5499
5500 gcc_assert (REG_P (dst));
5501
5502 if (src == NULL
5503 || (SSE_REGNO_P (REGNO (dst))
5504 && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1)
5505 || (!TARGET_AVX512VL
5506 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5507 && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1)
5508 || (STACK_REGNO_P (REGNO (dst))
5509 && standard_80387_constant_p (x: src) < 1))
5510 return false;
5511
5512 return true;
5513}
5514
5515/* Predicate for pre-reload splitters with associated instructions,
5516 which can match any time before the split1 pass (usually combine),
5517 then are unconditionally split in that pass and should not be
5518 matched again afterwards. */
5519
5520bool
5521ix86_pre_reload_split (void)
5522{
5523 return (can_create_pseudo_p ()
5524 && !(cfun->curr_properties & PROP_rtl_split_insns));
5525}
5526
5527/* Return the opcode of the TYPE_SSEMOV instruction. To move from
5528 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5529 TARGET_AVX512VL or it is a register to register move which can
5530 be done with zmm register move. */
5531
5532static const char *
5533ix86_get_ssemov (rtx *operands, unsigned size,
5534 enum attr_mode insn_mode, machine_mode mode)
5535{
5536 char buf[128];
5537 bool misaligned_p = (misaligned_operand (operands[0], mode)
5538 || misaligned_operand (operands[1], mode));
5539 bool evex_reg_p = (size == 64
5540 || EXT_REX_SSE_REG_P (operands[0])
5541 || EXT_REX_SSE_REG_P (operands[1]));
5542
5543 bool egpr_p = (TARGET_APX_EGPR
5544 && (x86_extended_rex2reg_mentioned_p (operands[0])
5545 || x86_extended_rex2reg_mentioned_p (operands[1])));
5546 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5547
5548 machine_mode scalar_mode;
5549
5550 const char *opcode = NULL;
5551 enum
5552 {
5553 opcode_int,
5554 opcode_float,
5555 opcode_double
5556 } type = opcode_int;
5557
5558 switch (insn_mode)
5559 {
5560 case MODE_V16SF:
5561 case MODE_V8SF:
5562 case MODE_V4SF:
5563 scalar_mode = E_SFmode;
5564 type = opcode_float;
5565 break;
5566 case MODE_V8DF:
5567 case MODE_V4DF:
5568 case MODE_V2DF:
5569 scalar_mode = E_DFmode;
5570 type = opcode_double;
5571 break;
5572 case MODE_XI:
5573 case MODE_OI:
5574 case MODE_TI:
5575 scalar_mode = GET_MODE_INNER (mode);
5576 break;
5577 default:
5578 gcc_unreachable ();
5579 }
5580
5581 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5582 we can only use zmm register move without memory operand. */
5583 if (evex_reg_p
5584 && !TARGET_AVX512VL
5585 && GET_MODE_SIZE (mode) < 64)
5586 {
5587 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5588 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5589 AVX512VL is disabled, LRA can still generate reg to
5590 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5591 modes. */
5592 if (memory_operand (operands[0], mode)
5593 || memory_operand (operands[1], mode))
5594 gcc_unreachable ();
5595 size = 64;
5596 /* We need TARGET_EVEX512 to move into zmm register. */
5597 gcc_assert (TARGET_EVEX512);
5598 switch (type)
5599 {
5600 case opcode_int:
5601 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5602 opcode = (misaligned_p
5603 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5604 : "vmovdqa64");
5605 else
5606 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5607 break;
5608 case opcode_float:
5609 opcode = misaligned_p ? "vmovups" : "vmovaps";
5610 break;
5611 case opcode_double:
5612 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5613 break;
5614 }
5615 }
5616 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5617 {
5618 switch (scalar_mode)
5619 {
5620 case E_HFmode:
5621 case E_BFmode:
5622 if (evex_reg_p || egpr_vl)
5623 opcode = (misaligned_p
5624 ? (TARGET_AVX512BW
5625 ? "vmovdqu16"
5626 : "vmovdqu64")
5627 : "vmovdqa64");
5628 else if (egpr_p)
5629 opcode = (misaligned_p
5630 ? (TARGET_AVX512BW
5631 ? "vmovdqu16"
5632 : "%vmovups")
5633 : "%vmovaps");
5634 else
5635 opcode = (misaligned_p
5636 ? (TARGET_AVX512BW
5637 ? "vmovdqu16"
5638 : "%vmovdqu")
5639 : "%vmovdqa");
5640 break;
5641 case E_SFmode:
5642 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5643 break;
5644 case E_DFmode:
5645 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5646 break;
5647 case E_TFmode:
5648 if (evex_reg_p || egpr_vl)
5649 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5650 else if (egpr_p)
5651 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5652 else
5653 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5654 break;
5655 default:
5656 gcc_unreachable ();
5657 }
5658 }
5659 else if (SCALAR_INT_MODE_P (scalar_mode))
5660 {
5661 switch (scalar_mode)
5662 {
5663 case E_QImode:
5664 if (evex_reg_p || egpr_vl)
5665 opcode = (misaligned_p
5666 ? (TARGET_AVX512BW
5667 ? "vmovdqu8"
5668 : "vmovdqu64")
5669 : "vmovdqa64");
5670 else if (egpr_p)
5671 opcode = (misaligned_p
5672 ? (TARGET_AVX512BW
5673 ? "vmovdqu8"
5674 : "%vmovups")
5675 : "%vmovaps");
5676 else
5677 opcode = (misaligned_p
5678 ? (TARGET_AVX512BW
5679 ? "vmovdqu8"
5680 : "%vmovdqu")
5681 : "%vmovdqa");
5682 break;
5683 case E_HImode:
5684 if (evex_reg_p || egpr_vl)
5685 opcode = (misaligned_p
5686 ? (TARGET_AVX512BW
5687 ? "vmovdqu16"
5688 : "vmovdqu64")
5689 : "vmovdqa64");
5690 else if (egpr_p)
5691 opcode = (misaligned_p
5692 ? (TARGET_AVX512BW
5693 ? "vmovdqu16"
5694 : "%vmovups")
5695 : "%vmovaps");
5696 else
5697 opcode = (misaligned_p
5698 ? (TARGET_AVX512BW
5699 ? "vmovdqu16"
5700 : "%vmovdqu")
5701 : "%vmovdqa");
5702 break;
5703 case E_SImode:
5704 if (evex_reg_p || egpr_vl)
5705 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5706 else if (egpr_p)
5707 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5708 else
5709 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5710 break;
5711 case E_DImode:
5712 case E_TImode:
5713 case E_OImode:
5714 if (evex_reg_p || egpr_vl)
5715 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5716 else if (egpr_p)
5717 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5718 else
5719 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5720 break;
5721 case E_XImode:
5722 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5723 break;
5724 default:
5725 gcc_unreachable ();
5726 }
5727 }
5728 else
5729 gcc_unreachable ();
5730
5731 switch (size)
5732 {
5733 case 64:
5734 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}",
5735 opcode);
5736 break;
5737 case 32:
5738 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}",
5739 opcode);
5740 break;
5741 case 16:
5742 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}",
5743 opcode);
5744 break;
5745 default:
5746 gcc_unreachable ();
5747 }
5748 output_asm_insn (buf, operands);
5749 return "";
5750}
5751
5752/* Return the template of the TYPE_SSEMOV instruction to move
5753 operands[1] into operands[0]. */
5754
5755const char *
5756ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5757{
5758 machine_mode mode = GET_MODE (operands[0]);
5759 if (get_attr_type (insn) != TYPE_SSEMOV
5760 || mode != GET_MODE (operands[1]))
5761 gcc_unreachable ();
5762
5763 enum attr_mode insn_mode = get_attr_mode (insn);
5764
5765 switch (insn_mode)
5766 {
5767 case MODE_XI:
5768 case MODE_V8DF:
5769 case MODE_V16SF:
5770 return ix86_get_ssemov (operands, size: 64, insn_mode, mode);
5771
5772 case MODE_OI:
5773 case MODE_V4DF:
5774 case MODE_V8SF:
5775 return ix86_get_ssemov (operands, size: 32, insn_mode, mode);
5776
5777 case MODE_TI:
5778 case MODE_V2DF:
5779 case MODE_V4SF:
5780 return ix86_get_ssemov (operands, size: 16, insn_mode, mode);
5781
5782 case MODE_DI:
5783 /* Handle broken assemblers that require movd instead of movq. */
5784 if (GENERAL_REG_P (operands[0]))
5785 {
5786 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5787 return "%vmovq\t{%1, %q0|%q0, %1}";
5788 else
5789 return "%vmovd\t{%1, %q0|%q0, %1}";
5790 }
5791 else if (GENERAL_REG_P (operands[1]))
5792 {
5793 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5794 return "%vmovq\t{%q1, %0|%0, %q1}";
5795 else
5796 return "%vmovd\t{%q1, %0|%0, %q1}";
5797 }
5798 else
5799 return "%vmovq\t{%1, %0|%0, %1}";
5800
5801 case MODE_SI:
5802 if (GENERAL_REG_P (operands[0]))
5803 return "%vmovd\t{%1, %k0|%k0, %1}";
5804 else if (GENERAL_REG_P (operands[1]))
5805 return "%vmovd\t{%k1, %0|%0, %k1}";
5806 else
5807 return "%vmovd\t{%1, %0|%0, %1}";
5808
5809 case MODE_HI:
5810 if (GENERAL_REG_P (operands[0]))
5811 return "vmovw\t{%1, %k0|%k0, %1}";
5812 else if (GENERAL_REG_P (operands[1]))
5813 return "vmovw\t{%k1, %0|%0, %k1}";
5814 else
5815 return "vmovw\t{%1, %0|%0, %1}";
5816
5817 case MODE_DF:
5818 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5819 return "vmovsd\t{%d1, %0|%0, %d1}";
5820 else
5821 return "%vmovsd\t{%1, %0|%0, %1}";
5822
5823 case MODE_SF:
5824 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5825 return "vmovss\t{%d1, %0|%0, %d1}";
5826 else
5827 return "%vmovss\t{%1, %0|%0, %1}";
5828
5829 case MODE_HF:
5830 case MODE_BF:
5831 if (REG_P (operands[0]) && REG_P (operands[1]))
5832 return "vmovsh\t{%d1, %0|%0, %d1}";
5833 else
5834 return "vmovsh\t{%1, %0|%0, %1}";
5835
5836 case MODE_V1DF:
5837 gcc_assert (!TARGET_AVX);
5838 return "movlpd\t{%1, %0|%0, %1}";
5839
5840 case MODE_V2SF:
5841 if (TARGET_AVX && REG_P (operands[0]))
5842 return "vmovlps\t{%1, %d0|%d0, %1}";
5843 else
5844 return "%vmovlps\t{%1, %0|%0, %1}";
5845
5846 default:
5847 gcc_unreachable ();
5848 }
5849}
5850
5851/* Returns true if OP contains a symbol reference */
5852
5853bool
5854symbolic_reference_mentioned_p (rtx op)
5855{
5856 const char *fmt;
5857 int i;
5858
5859 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5860 return true;
5861
5862 fmt = GET_RTX_FORMAT (GET_CODE (op));
5863 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5864 {
5865 if (fmt[i] == 'E')
5866 {
5867 int j;
5868
5869 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5870 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5871 return true;
5872 }
5873
5874 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5875 return true;
5876 }
5877
5878 return false;
5879}
5880
5881/* Return true if it is appropriate to emit `ret' instructions in the
5882 body of a function. Do this only if the epilogue is simple, needing a
5883 couple of insns. Prior to reloading, we can't tell how many registers
5884 must be saved, so return false then. Return false if there is no frame
5885 marker to de-allocate. */
5886
5887bool
5888ix86_can_use_return_insn_p (void)
5889{
5890 if (ix86_function_ms_hook_prologue (fn: current_function_decl))
5891 return false;
5892
5893 if (ix86_function_naked (fn: current_function_decl))
5894 return false;
5895
5896 /* Don't use `ret' instruction in interrupt handler. */
5897 if (! reload_completed
5898 || frame_pointer_needed
5899 || cfun->machine->func_type != TYPE_NORMAL)
5900 return 0;
5901
5902 /* Don't allow more than 32k pop, since that's all we can do
5903 with one instruction. */
5904 if (crtl->args.pops_args && crtl->args.size >= 32768)
5905 return 0;
5906
5907 struct ix86_frame &frame = cfun->machine->frame;
5908 return (frame.stack_pointer_offset == UNITS_PER_WORD
5909 && (frame.nregs + frame.nsseregs) == 0);
5910}
5911
5912/* Return stack frame size. get_frame_size () returns used stack slots
5913 during compilation, which may be optimized out later. If stack frame
5914 is needed, stack_frame_required should be true. */
5915
5916static HOST_WIDE_INT
5917ix86_get_frame_size (void)
5918{
5919 if (cfun->machine->stack_frame_required)
5920 return get_frame_size ();
5921 else
5922 return 0;
5923}
5924
5925/* Value should be nonzero if functions must have frame pointers.
5926 Zero means the frame pointer need not be set up (and parms may
5927 be accessed via the stack pointer) in functions that seem suitable. */
5928
5929static bool
5930ix86_frame_pointer_required (void)
5931{
5932 /* If we accessed previous frames, then the generated code expects
5933 to be able to access the saved ebp value in our frame. */
5934 if (cfun->machine->accesses_prev_frame)
5935 return true;
5936
5937 /* Several x86 os'es need a frame pointer for other reasons,
5938 usually pertaining to setjmp. */
5939 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5940 return true;
5941
5942 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5943 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5944 return true;
5945
5946 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5947 allocation is 4GB. */
5948 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5949 return true;
5950
5951 /* SSE saves require frame-pointer when stack is misaligned. */
5952 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5953 return true;
5954
5955 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5956 turns off the frame pointer by default. Turn it back on now if
5957 we've not got a leaf function. */
5958 if (TARGET_OMIT_LEAF_FRAME_POINTER
5959 && (!crtl->is_leaf
5960 || ix86_current_function_calls_tls_descriptor))
5961 return true;
5962
5963 /* Several versions of mcount for the x86 assumes that there is a
5964 frame, so we cannot allow profiling without a frame pointer. */
5965 if (crtl->profile && !flag_fentry)
5966 return true;
5967
5968 return false;
5969}
5970
5971/* Record that the current function accesses previous call frames. */
5972
5973void
5974ix86_setup_frame_addresses (void)
5975{
5976 cfun->machine->accesses_prev_frame = 1;
5977}
5978
5979#ifndef USE_HIDDEN_LINKONCE
5980# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5981# define USE_HIDDEN_LINKONCE 1
5982# else
5983# define USE_HIDDEN_LINKONCE 0
5984# endif
5985#endif
5986
5987/* Label count for call and return thunks. It is used to make unique
5988 labels in call and return thunks. */
5989static int indirectlabelno;
5990
5991/* True if call thunk function is needed. */
5992static bool indirect_thunk_needed = false;
5993
5994/* Bit masks of integer registers, which contain branch target, used
5995 by call thunk functions. */
5996static HARD_REG_SET indirect_thunks_used;
5997
5998/* True if return thunk function is needed. */
5999static bool indirect_return_needed = false;
6000
6001/* True if return thunk function via CX is needed. */
6002static bool indirect_return_via_cx;
6003
6004#ifndef INDIRECT_LABEL
6005# define INDIRECT_LABEL "LIND"
6006#endif
6007
6008/* Indicate what prefix is needed for an indirect branch. */
6009enum indirect_thunk_prefix
6010{
6011 indirect_thunk_prefix_none,
6012 indirect_thunk_prefix_nt
6013};
6014
6015/* Return the prefix needed for an indirect branch INSN. */
6016
6017enum indirect_thunk_prefix
6018indirect_thunk_need_prefix (rtx_insn *insn)
6019{
6020 enum indirect_thunk_prefix need_prefix;
6021 if ((cfun->machine->indirect_branch_type
6022 == indirect_branch_thunk_extern)
6023 && ix86_notrack_prefixed_insn_p (insn))
6024 {
6025 /* NOTRACK prefix is only used with external thunk so that it
6026 can be properly updated to support CET at run-time. */
6027 need_prefix = indirect_thunk_prefix_nt;
6028 }
6029 else
6030 need_prefix = indirect_thunk_prefix_none;
6031 return need_prefix;
6032}
6033
6034/* Fills in the label name that should be used for the indirect thunk. */
6035
6036static void
6037indirect_thunk_name (char name[32], unsigned int regno,
6038 enum indirect_thunk_prefix need_prefix,
6039 bool ret_p)
6040{
6041 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6042 gcc_unreachable ();
6043
6044 if (USE_HIDDEN_LINKONCE)
6045 {
6046 const char *prefix;
6047
6048 if (need_prefix == indirect_thunk_prefix_nt
6049 && regno != INVALID_REGNUM)
6050 {
6051 /* NOTRACK prefix is only used with external thunk via
6052 register so that NOTRACK prefix can be added to indirect
6053 branch via register to support CET at run-time. */
6054 prefix = "_nt";
6055 }
6056 else
6057 prefix = "";
6058
6059 const char *ret = ret_p ? "return" : "indirect";
6060
6061 if (regno != INVALID_REGNUM)
6062 {
6063 const char *reg_prefix;
6064 if (LEGACY_INT_REGNO_P (regno))
6065 reg_prefix = TARGET_64BIT ? "r" : "e";
6066 else
6067 reg_prefix = "";
6068 sprintf (s: name, format: "__x86_%s_thunk%s_%s%s",
6069 ret, prefix, reg_prefix, reg_names[regno]);
6070 }
6071 else
6072 sprintf (s: name, format: "__x86_%s_thunk%s", ret, prefix);
6073 }
6074 else
6075 {
6076 if (regno != INVALID_REGNUM)
6077 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6078 else
6079 {
6080 if (ret_p)
6081 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6082 else
6083 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6084 }
6085 }
6086}
6087
6088/* Output a call and return thunk for indirect branch. If REGNO != -1,
6089 the function address is in REGNO and the call and return thunk looks like:
6090
6091 call L2
6092 L1:
6093 pause
6094 lfence
6095 jmp L1
6096 L2:
6097 mov %REG, (%sp)
6098 ret
6099
6100 Otherwise, the function address is on the top of stack and the
6101 call and return thunk looks like:
6102
6103 call L2
6104 L1:
6105 pause
6106 lfence
6107 jmp L1
6108 L2:
6109 lea WORD_SIZE(%sp), %sp
6110 ret
6111 */
6112
6113static void
6114output_indirect_thunk (unsigned int regno)
6115{
6116 char indirectlabel1[32];
6117 char indirectlabel2[32];
6118
6119 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6120 indirectlabelno++);
6121 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6122 indirectlabelno++);
6123
6124 /* Call */
6125 fputs (s: "\tcall\t", stream: asm_out_file);
6126 assemble_name_raw (asm_out_file, indirectlabel2);
6127 fputc (c: '\n', stream: asm_out_file);
6128
6129 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6130
6131 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6132 Usage of both pause + lfence is compromise solution. */
6133 fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n");
6134
6135 /* Jump. */
6136 fputs (s: "\tjmp\t", stream: asm_out_file);
6137 assemble_name_raw (asm_out_file, indirectlabel1);
6138 fputc (c: '\n', stream: asm_out_file);
6139
6140 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6141
6142 /* The above call insn pushed a word to stack. Adjust CFI info. */
6143 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6144 {
6145 if (! dwarf2out_do_cfi_asm ())
6146 {
6147 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6148 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6149 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6150 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6151 }
6152 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6153 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6154 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6155 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6156 dwarf2out_emit_cfi (cfi: xcfi);
6157 }
6158
6159 if (regno != INVALID_REGNUM)
6160 {
6161 /* MOV. */
6162 rtx xops[2];
6163 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6164 xops[1] = gen_rtx_REG (word_mode, regno);
6165 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6166 }
6167 else
6168 {
6169 /* LEA. */
6170 rtx xops[2];
6171 xops[0] = stack_pointer_rtx;
6172 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6173 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6174 }
6175
6176 fputs (s: "\tret\n", stream: asm_out_file);
6177 if ((ix86_harden_sls & harden_sls_return))
6178 fputs (s: "\tint3\n", stream: asm_out_file);
6179}
6180
6181/* Output a funtion with a call and return thunk for indirect branch.
6182 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6183 Otherwise, the function address is on the top of stack. Thunk is
6184 used for function return if RET_P is true. */
6185
6186static void
6187output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6188 unsigned int regno, bool ret_p)
6189{
6190 char name[32];
6191 tree decl;
6192
6193 /* Create __x86_indirect_thunk. */
6194 indirect_thunk_name (name, regno, need_prefix, ret_p);
6195 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6196 get_identifier (name),
6197 build_function_type_list (void_type_node, NULL_TREE));
6198 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6199 NULL_TREE, void_type_node);
6200 TREE_PUBLIC (decl) = 1;
6201 TREE_STATIC (decl) = 1;
6202 DECL_IGNORED_P (decl) = 1;
6203
6204#if TARGET_MACHO
6205 if (TARGET_MACHO)
6206 {
6207 switch_to_section (darwin_sections[picbase_thunk_section]);
6208 fputs ("\t.weak_definition\t", asm_out_file);
6209 assemble_name (asm_out_file, name);
6210 fputs ("\n\t.private_extern\t", asm_out_file);
6211 assemble_name (asm_out_file, name);
6212 putc ('\n', asm_out_file);
6213 ASM_OUTPUT_LABEL (asm_out_file, name);
6214 DECL_WEAK (decl) = 1;
6215 }
6216 else
6217#endif
6218 if (USE_HIDDEN_LINKONCE)
6219 {
6220 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6221
6222 targetm.asm_out.unique_section (decl, 0);
6223 switch_to_section (get_named_section (decl, NULL, 0));
6224
6225 targetm.asm_out.globalize_label (asm_out_file, name);
6226 fputs (s: "\t.hidden\t", stream: asm_out_file);
6227 assemble_name (asm_out_file, name);
6228 putc (c: '\n', stream: asm_out_file);
6229 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6230 }
6231 else
6232 {
6233 switch_to_section (text_section);
6234 ASM_OUTPUT_LABEL (asm_out_file, name);
6235 }
6236
6237 DECL_INITIAL (decl) = make_node (BLOCK);
6238 current_function_decl = decl;
6239 allocate_struct_function (decl, false);
6240 init_function_start (decl);
6241 /* We're about to hide the function body from callees of final_* by
6242 emitting it directly; tell them we're a thunk, if they care. */
6243 cfun->is_thunk = true;
6244 first_function_block_is_cold = false;
6245 /* Make sure unwind info is emitted for the thunk if needed. */
6246 final_start_function (emit_barrier (), asm_out_file, 1);
6247
6248 output_indirect_thunk (regno);
6249
6250 final_end_function ();
6251 init_insn_lengths ();
6252 free_after_compilation (cfun);
6253 set_cfun (NULL);
6254 current_function_decl = NULL;
6255}
6256
6257static int pic_labels_used;
6258
6259/* Fills in the label name that should be used for a pc thunk for
6260 the given register. */
6261
6262static void
6263get_pc_thunk_name (char name[32], unsigned int regno)
6264{
6265 gcc_assert (!TARGET_64BIT);
6266
6267 if (USE_HIDDEN_LINKONCE)
6268 sprintf (s: name, format: "__x86.get_pc_thunk.%s", reg_names[regno]);
6269 else
6270 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6271}
6272
6273
6274/* This function generates code for -fpic that loads %ebx with
6275 the return address of the caller and then returns. */
6276
6277static void
6278ix86_code_end (void)
6279{
6280 rtx xops[2];
6281 unsigned int regno;
6282
6283 if (indirect_return_needed)
6284 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6285 INVALID_REGNUM, ret_p: true);
6286 if (indirect_return_via_cx)
6287 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6288 CX_REG, ret_p: true);
6289 if (indirect_thunk_needed)
6290 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6291 INVALID_REGNUM, ret_p: false);
6292
6293 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6294 {
6295 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6296 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6297 regno, ret_p: false);
6298 }
6299
6300 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6301 {
6302 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6303 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6304 regno, ret_p: false);
6305 }
6306
6307 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6308 {
6309 char name[32];
6310 tree decl;
6311
6312 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6313 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6314 regno, ret_p: false);
6315
6316 if (!(pic_labels_used & (1 << regno)))
6317 continue;
6318
6319 get_pc_thunk_name (name, regno);
6320
6321 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6322 get_identifier (name),
6323 build_function_type_list (void_type_node, NULL_TREE));
6324 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6325 NULL_TREE, void_type_node);
6326 TREE_PUBLIC (decl) = 1;
6327 TREE_STATIC (decl) = 1;
6328 DECL_IGNORED_P (decl) = 1;
6329
6330#if TARGET_MACHO
6331 if (TARGET_MACHO)
6332 {
6333 switch_to_section (darwin_sections[picbase_thunk_section]);
6334 fputs ("\t.weak_definition\t", asm_out_file);
6335 assemble_name (asm_out_file, name);
6336 fputs ("\n\t.private_extern\t", asm_out_file);
6337 assemble_name (asm_out_file, name);
6338 putc ('\n', asm_out_file);
6339 ASM_OUTPUT_LABEL (asm_out_file, name);
6340 DECL_WEAK (decl) = 1;
6341 }
6342 else
6343#endif
6344 if (USE_HIDDEN_LINKONCE)
6345 {
6346 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6347
6348 targetm.asm_out.unique_section (decl, 0);
6349 switch_to_section (get_named_section (decl, NULL, 0));
6350
6351 targetm.asm_out.globalize_label (asm_out_file, name);
6352 fputs (s: "\t.hidden\t", stream: asm_out_file);
6353 assemble_name (asm_out_file, name);
6354 putc (c: '\n', stream: asm_out_file);
6355 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6356 }
6357 else
6358 {
6359 switch_to_section (text_section);
6360 ASM_OUTPUT_LABEL (asm_out_file, name);
6361 }
6362
6363 DECL_INITIAL (decl) = make_node (BLOCK);
6364 current_function_decl = decl;
6365 allocate_struct_function (decl, false);
6366 init_function_start (decl);
6367 /* We're about to hide the function body from callees of final_* by
6368 emitting it directly; tell them we're a thunk, if they care. */
6369 cfun->is_thunk = true;
6370 first_function_block_is_cold = false;
6371 /* Make sure unwind info is emitted for the thunk if needed. */
6372 final_start_function (emit_barrier (), asm_out_file, 1);
6373
6374 /* Pad stack IP move with 4 instructions (two NOPs count
6375 as one instruction). */
6376 if (TARGET_PAD_SHORT_FUNCTION)
6377 {
6378 int i = 8;
6379
6380 while (i--)
6381 fputs (s: "\tnop\n", stream: asm_out_file);
6382 }
6383
6384 xops[0] = gen_rtx_REG (Pmode, regno);
6385 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6386 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6387 fputs (s: "\tret\n", stream: asm_out_file);
6388 final_end_function ();
6389 init_insn_lengths ();
6390 free_after_compilation (cfun);
6391 set_cfun (NULL);
6392 current_function_decl = NULL;
6393 }
6394
6395 if (flag_split_stack)
6396 file_end_indicate_split_stack ();
6397}
6398
6399/* Emit code for the SET_GOT patterns. */
6400
6401const char *
6402output_set_got (rtx dest, rtx label)
6403{
6404 rtx xops[3];
6405
6406 xops[0] = dest;
6407
6408 if (TARGET_VXWORKS_RTP && flag_pic)
6409 {
6410 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6411 xops[2] = gen_rtx_MEM (Pmode,
6412 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6413 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6414
6415 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6416 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6417 an unadorned address. */
6418 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6419 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6420 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6421 return "";
6422 }
6423
6424 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6425
6426 if (flag_pic)
6427 {
6428 char name[32];
6429 get_pc_thunk_name (name, REGNO (dest));
6430 pic_labels_used |= 1 << REGNO (dest);
6431
6432 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6433 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6434 output_asm_insn ("%!call\t%X2", xops);
6435
6436#if TARGET_MACHO
6437 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6438 This is what will be referenced by the Mach-O PIC subsystem. */
6439 if (machopic_should_output_picbase_label () || !label)
6440 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6441
6442 /* When we are restoring the pic base at the site of a nonlocal label,
6443 and we decided to emit the pic base above, we will still output a
6444 local label used for calculating the correction offset (even though
6445 the offset will be 0 in that case). */
6446 if (label)
6447 targetm.asm_out.internal_label (asm_out_file, "L",
6448 CODE_LABEL_NUMBER (label));
6449#endif
6450 }
6451 else
6452 {
6453 if (TARGET_MACHO)
6454 /* We don't need a pic base, we're not producing pic. */
6455 gcc_unreachable ();
6456
6457 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6458 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6459 targetm.asm_out.internal_label (asm_out_file, "L",
6460 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6461 }
6462
6463 if (!TARGET_MACHO)
6464 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6465
6466 return "";
6467}
6468
6469/* Generate an "push" pattern for input ARG. */
6470
6471rtx
6472gen_push (rtx arg, bool ppx_p)
6473{
6474 struct machine_function *m = cfun->machine;
6475
6476 if (m->fs.cfa_reg == stack_pointer_rtx)
6477 m->fs.cfa_offset += UNITS_PER_WORD;
6478 m->fs.sp_offset += UNITS_PER_WORD;
6479
6480 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6481 arg = gen_rtx_REG (word_mode, REGNO (arg));
6482
6483 rtx stack = gen_rtx_MEM (word_mode,
6484 gen_rtx_PRE_DEC (Pmode,
6485 stack_pointer_rtx));
6486 return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6487}
6488
6489rtx
6490gen_pushfl (void)
6491{
6492 struct machine_function *m = cfun->machine;
6493 rtx flags, mem;
6494
6495 if (m->fs.cfa_reg == stack_pointer_rtx)
6496 m->fs.cfa_offset += UNITS_PER_WORD;
6497 m->fs.sp_offset += UNITS_PER_WORD;
6498
6499 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6500
6501 mem = gen_rtx_MEM (word_mode,
6502 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6503
6504 return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags);
6505}
6506
6507/* Generate an "pop" pattern for input ARG. */
6508
6509rtx
6510gen_pop (rtx arg, bool ppx_p)
6511{
6512 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6513 arg = gen_rtx_REG (word_mode, REGNO (arg));
6514
6515 rtx stack = gen_rtx_MEM (word_mode,
6516 gen_rtx_POST_INC (Pmode,
6517 stack_pointer_rtx));
6518
6519 return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6520}
6521
6522rtx
6523gen_popfl (void)
6524{
6525 rtx flags, mem;
6526
6527 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6528
6529 mem = gen_rtx_MEM (word_mode,
6530 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6531
6532 return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem);
6533}
6534
6535/* Generate a "push2" pattern for input ARG. */
6536rtx
6537gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6538{
6539 struct machine_function *m = cfun->machine;
6540 const int offset = UNITS_PER_WORD * 2;
6541
6542 if (m->fs.cfa_reg == stack_pointer_rtx)
6543 m->fs.cfa_offset += offset;
6544 m->fs.sp_offset += offset;
6545
6546 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6547 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6548
6549 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6550 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6551
6552 return ppx_p ? gen_push2p_di (mem, reg1, reg2):
6553 gen_push2_di (mem, reg1, reg2);
6554}
6555
6556/* Return >= 0 if there is an unused call-clobbered register available
6557 for the entire function. */
6558
6559static unsigned int
6560ix86_select_alt_pic_regnum (void)
6561{
6562 if (ix86_use_pseudo_pic_reg ())
6563 return INVALID_REGNUM;
6564
6565 if (crtl->is_leaf
6566 && !crtl->profile
6567 && !ix86_current_function_calls_tls_descriptor)
6568 {
6569 int i, drap;
6570 /* Can't use the same register for both PIC and DRAP. */
6571 if (crtl->drap_reg)
6572 drap = REGNO (crtl->drap_reg);
6573 else
6574 drap = -1;
6575 for (i = 2; i >= 0; --i)
6576 if (i != drap && !df_regs_ever_live_p (i))
6577 return i;
6578 }
6579
6580 return INVALID_REGNUM;
6581}
6582
6583/* Return true if REGNO is used by the epilogue. */
6584
6585bool
6586ix86_epilogue_uses (int regno)
6587{
6588 /* If there are no caller-saved registers, we preserve all registers,
6589 except for MMX and x87 registers which aren't supported when saving
6590 and restoring registers. Don't explicitly save SP register since
6591 it is always preserved. */
6592 return (epilogue_completed
6593 && (cfun->machine->call_saved_registers
6594 == TYPE_NO_CALLER_SAVED_REGISTERS)
6595 && !fixed_regs[regno]
6596 && !STACK_REGNO_P (regno)
6597 && !MMX_REGNO_P (regno));
6598}
6599
6600/* Return nonzero if register REGNO can be used as a scratch register
6601 in peephole2. */
6602
6603static bool
6604ix86_hard_regno_scratch_ok (unsigned int regno)
6605{
6606 /* If there are no caller-saved registers, we can't use any register
6607 as a scratch register after epilogue and use REGNO as scratch
6608 register only if it has been used before to avoid saving and
6609 restoring it. */
6610 return ((cfun->machine->call_saved_registers
6611 != TYPE_NO_CALLER_SAVED_REGISTERS)
6612 || (!epilogue_completed
6613 && df_regs_ever_live_p (regno)));
6614}
6615
6616/* Return TRUE if we need to save REGNO. */
6617
6618bool
6619ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6620{
6621 rtx reg;
6622
6623 switch (cfun->machine->call_saved_registers)
6624 {
6625 case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6626 break;
6627
6628 case TYPE_NO_CALLER_SAVED_REGISTERS:
6629 /* If there are no caller-saved registers, we preserve all
6630 registers, except for MMX and x87 registers which aren't
6631 supported when saving and restoring registers. Don't
6632 explicitly save SP register since it is always preserved.
6633
6634 Don't preserve registers used for function return value. */
6635 reg = crtl->return_rtx;
6636 if (reg)
6637 {
6638 unsigned int i = REGNO (reg);
6639 unsigned int nregs = REG_NREGS (reg);
6640 while (nregs-- > 0)
6641 if ((i + nregs) == regno)
6642 return false;
6643 }
6644
6645 return (df_regs_ever_live_p (regno)
6646 && !fixed_regs[regno]
6647 && !STACK_REGNO_P (regno)
6648 && !MMX_REGNO_P (regno)
6649 && (regno != HARD_FRAME_POINTER_REGNUM
6650 || !frame_pointer_needed));
6651
6652 case TYPE_NO_CALLEE_SAVED_REGISTERS:
6653 return false;
6654
6655 case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
6656 if (regno != HARD_FRAME_POINTER_REGNUM)
6657 return false;
6658 break;
6659 }
6660
6661 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6662 && pic_offset_table_rtx)
6663 {
6664 if (ix86_use_pseudo_pic_reg ())
6665 {
6666 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6667 _mcount in prologue. */
6668 if (!TARGET_64BIT && flag_pic && crtl->profile)
6669 return true;
6670 }
6671 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6672 || crtl->profile
6673 || crtl->calls_eh_return
6674 || crtl->uses_const_pool
6675 || cfun->has_nonlocal_label)
6676 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6677 }
6678
6679 if (crtl->calls_eh_return && maybe_eh_return)
6680 {
6681 unsigned i;
6682 for (i = 0; ; i++)
6683 {
6684 unsigned test = EH_RETURN_DATA_REGNO (i);
6685 if (test == INVALID_REGNUM)
6686 break;
6687 if (test == regno)
6688 return true;
6689 }
6690 }
6691
6692 if (ignore_outlined && cfun->machine->call_ms2sysv)
6693 {
6694 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6695 + xlogue_layout::MIN_REGS;
6696 if (xlogue_layout::is_stub_managed_reg (regno, count))
6697 return false;
6698 }
6699
6700 if (crtl->drap_reg
6701 && regno == REGNO (crtl->drap_reg)
6702 && !cfun->machine->no_drap_save_restore)
6703 return true;
6704
6705 return (df_regs_ever_live_p (regno)
6706 && !call_used_or_fixed_reg_p (regno)
6707 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6708}
6709
6710/* Return number of saved general prupose registers. */
6711
6712static int
6713ix86_nsaved_regs (void)
6714{
6715 int nregs = 0;
6716 int regno;
6717
6718 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6719 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6720 nregs ++;
6721 return nregs;
6722}
6723
6724/* Return number of saved SSE registers. */
6725
6726static int
6727ix86_nsaved_sseregs (void)
6728{
6729 int nregs = 0;
6730 int regno;
6731
6732 if (!TARGET_64BIT_MS_ABI)
6733 return 0;
6734 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6735 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6736 nregs ++;
6737 return nregs;
6738}
6739
6740/* Given FROM and TO register numbers, say whether this elimination is
6741 allowed. If stack alignment is needed, we can only replace argument
6742 pointer with hard frame pointer, or replace frame pointer with stack
6743 pointer. Otherwise, frame pointer elimination is automatically
6744 handled and all other eliminations are valid. */
6745
6746static bool
6747ix86_can_eliminate (const int from, const int to)
6748{
6749 if (stack_realign_fp)
6750 return ((from == ARG_POINTER_REGNUM
6751 && to == HARD_FRAME_POINTER_REGNUM)
6752 || (from == FRAME_POINTER_REGNUM
6753 && to == STACK_POINTER_REGNUM));
6754 else
6755 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6756}
6757
6758/* Return the offset between two registers, one to be eliminated, and the other
6759 its replacement, at the start of a routine. */
6760
6761HOST_WIDE_INT
6762ix86_initial_elimination_offset (int from, int to)
6763{
6764 struct ix86_frame &frame = cfun->machine->frame;
6765
6766 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6767 return frame.hard_frame_pointer_offset;
6768 else if (from == FRAME_POINTER_REGNUM
6769 && to == HARD_FRAME_POINTER_REGNUM)
6770 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6771 else
6772 {
6773 gcc_assert (to == STACK_POINTER_REGNUM);
6774
6775 if (from == ARG_POINTER_REGNUM)
6776 return frame.stack_pointer_offset;
6777
6778 gcc_assert (from == FRAME_POINTER_REGNUM);
6779 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6780 }
6781}
6782
6783/* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6784void
6785warn_once_call_ms2sysv_xlogues (const char *feature)
6786{
6787 static bool warned_once = false;
6788 if (!warned_once)
6789 {
6790 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6791 feature);
6792 warned_once = true;
6793 }
6794}
6795
6796/* Return the probing interval for -fstack-clash-protection. */
6797
6798static HOST_WIDE_INT
6799get_probe_interval (void)
6800{
6801 if (flag_stack_clash_protection)
6802 return (HOST_WIDE_INT_1U
6803 << param_stack_clash_protection_probe_interval);
6804 else
6805 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6806}
6807
6808/* When using -fsplit-stack, the allocation routines set a field in
6809 the TCB to the bottom of the stack plus this much space, measured
6810 in bytes. */
6811
6812#define SPLIT_STACK_AVAILABLE 256
6813
6814/* Return true if push2/pop2 can be generated. */
6815
6816static bool
6817ix86_can_use_push2pop2 (void)
6818{
6819 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6820 unsigned int incoming_stack_boundary
6821 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6822 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6823 return incoming_stack_boundary % 128 == 0;
6824}
6825
6826/* Helper function to determine whether push2/pop2 can be used in prologue or
6827 epilogue for register save/restore. */
6828static bool
6829ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6830{
6831 if (!ix86_can_use_push2pop2 ())
6832 return false;
6833 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6834 return TARGET_APX_PUSH2POP2
6835 && !cfun->machine->frame.save_regs_using_mov
6836 && cfun->machine->func_type == TYPE_NORMAL
6837 && (nregs + aligned) >= 3;
6838}
6839
6840/* Fill structure ix86_frame about frame of currently computed function. */
6841
6842static void
6843ix86_compute_frame_layout (void)
6844{
6845 struct ix86_frame *frame = &cfun->machine->frame;
6846 struct machine_function *m = cfun->machine;
6847 unsigned HOST_WIDE_INT stack_alignment_needed;
6848 HOST_WIDE_INT offset;
6849 unsigned HOST_WIDE_INT preferred_alignment;
6850 HOST_WIDE_INT size = ix86_get_frame_size ();
6851 HOST_WIDE_INT to_allocate;
6852
6853 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6854 * ms_abi functions that call a sysv function. We now need to prune away
6855 * cases where it should be disabled. */
6856 if (TARGET_64BIT && m->call_ms2sysv)
6857 {
6858 gcc_assert (TARGET_64BIT_MS_ABI);
6859 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6860 gcc_assert (!TARGET_SEH);
6861 gcc_assert (TARGET_SSE);
6862 gcc_assert (!ix86_using_red_zone ());
6863
6864 if (crtl->calls_eh_return)
6865 {
6866 gcc_assert (!reload_completed);
6867 m->call_ms2sysv = false;
6868 warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return");
6869 }
6870
6871 else if (ix86_static_chain_on_stack)
6872 {
6873 gcc_assert (!reload_completed);
6874 m->call_ms2sysv = false;
6875 warn_once_call_ms2sysv_xlogues (feature: "static call chains");
6876 }
6877
6878 /* Finally, compute which registers the stub will manage. */
6879 else
6880 {
6881 unsigned count = xlogue_layout::count_stub_managed_regs ();
6882 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6883 m->call_ms2sysv_pad_in = 0;
6884 }
6885 }
6886
6887 frame->nregs = ix86_nsaved_regs ();
6888 frame->nsseregs = ix86_nsaved_sseregs ();
6889
6890 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6891 except for function prologues, leaf functions and when the defult
6892 incoming stack boundary is overriden at command line or via
6893 force_align_arg_pointer attribute.
6894
6895 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6896 at call sites, including profile function calls.
6897
6898 For APX push2/pop2, the stack also requires 128b alignment. */
6899 if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs)
6900 && crtl->preferred_stack_boundary < 128)
6901 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6902 && crtl->preferred_stack_boundary < 128)
6903 && (!crtl->is_leaf || cfun->calls_alloca != 0
6904 || ix86_current_function_calls_tls_descriptor
6905 || (TARGET_MACHO && crtl->profile)
6906 || ix86_incoming_stack_boundary < 128)))
6907 {
6908 crtl->preferred_stack_boundary = 128;
6909 if (crtl->stack_alignment_needed < 128)
6910 crtl->stack_alignment_needed = 128;
6911 }
6912
6913 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6914 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6915
6916 gcc_assert (!size || stack_alignment_needed);
6917 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6918 gcc_assert (preferred_alignment <= stack_alignment_needed);
6919
6920 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6921 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6922 if (TARGET_64BIT && m->call_ms2sysv)
6923 {
6924 gcc_assert (stack_alignment_needed >= 16);
6925 gcc_assert (!frame->nsseregs);
6926 }
6927
6928 /* For SEH we have to limit the amount of code movement into the prologue.
6929 At present we do this via a BLOCKAGE, at which point there's very little
6930 scheduling that can be done, which means that there's very little point
6931 in doing anything except PUSHs. */
6932 if (TARGET_SEH)
6933 m->use_fast_prologue_epilogue = false;
6934 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6935 {
6936 int count = frame->nregs;
6937 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
6938
6939 /* The fast prologue uses move instead of push to save registers. This
6940 is significantly longer, but also executes faster as modern hardware
6941 can execute the moves in parallel, but can't do that for push/pop.
6942
6943 Be careful about choosing what prologue to emit: When function takes
6944 many instructions to execute we may use slow version as well as in
6945 case function is known to be outside hot spot (this is known with
6946 feedback only). Weight the size of function by number of registers
6947 to save as it is cheap to use one or two push instructions but very
6948 slow to use many of them.
6949
6950 Calling this hook multiple times with the same frame requirements
6951 must produce the same layout, since the RA might otherwise be
6952 unable to reach a fixed point or might fail its final sanity checks.
6953 This means that once we've assumed that a function does or doesn't
6954 have a particular size, we have to stick to that assumption
6955 regardless of how the function has changed since. */
6956 if (count)
6957 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6958 if (node->frequency < NODE_FREQUENCY_NORMAL
6959 || (flag_branch_probabilities
6960 && node->frequency < NODE_FREQUENCY_HOT))
6961 m->use_fast_prologue_epilogue = false;
6962 else
6963 {
6964 if (count != frame->expensive_count)
6965 {
6966 frame->expensive_count = count;
6967 frame->expensive_p = expensive_function_p (count);
6968 }
6969 m->use_fast_prologue_epilogue = !frame->expensive_p;
6970 }
6971 }
6972
6973 frame->save_regs_using_mov
6974 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6975
6976 /* Skip return address and error code in exception handler. */
6977 offset = INCOMING_FRAME_SP_OFFSET;
6978
6979 /* Skip pushed static chain. */
6980 if (ix86_static_chain_on_stack)
6981 offset += UNITS_PER_WORD;
6982
6983 /* Skip saved base pointer. */
6984 if (frame_pointer_needed)
6985 offset += UNITS_PER_WORD;
6986 frame->hfp_save_offset = offset;
6987
6988 /* The traditional frame pointer location is at the top of the frame. */
6989 frame->hard_frame_pointer_offset = offset;
6990
6991 /* Register save area */
6992 offset += frame->nregs * UNITS_PER_WORD;
6993 frame->reg_save_offset = offset;
6994
6995 /* Calculate the size of the va-arg area (not including padding, if any). */
6996 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6997
6998 /* Also adjust stack_realign_offset for the largest alignment of
6999 stack slot actually used. */
7000 if (stack_realign_fp
7001 || (cfun->machine->max_used_stack_alignment != 0
7002 && (offset % cfun->machine->max_used_stack_alignment) != 0))
7003 {
7004 /* We may need a 16-byte aligned stack for the remainder of the
7005 register save area, but the stack frame for the local function
7006 may require a greater alignment if using AVX/2/512. In order
7007 to avoid wasting space, we first calculate the space needed for
7008 the rest of the register saves, add that to the stack pointer,
7009 and then realign the stack to the boundary of the start of the
7010 frame for the local function. */
7011 HOST_WIDE_INT space_needed = 0;
7012 HOST_WIDE_INT sse_reg_space_needed = 0;
7013
7014 if (TARGET_64BIT)
7015 {
7016 if (m->call_ms2sysv)
7017 {
7018 m->call_ms2sysv_pad_in = 0;
7019 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7020 }
7021
7022 else if (frame->nsseregs)
7023 /* The only ABI that has saved SSE registers (Win64) also has a
7024 16-byte aligned default stack. However, many programs violate
7025 the ABI, and Wine64 forces stack realignment to compensate. */
7026 space_needed = frame->nsseregs * 16;
7027
7028 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7029
7030 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7031 rounding to be pedantic. */
7032 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7033 }
7034 else
7035 space_needed = frame->va_arg_size;
7036
7037 /* Record the allocation size required prior to the realignment AND. */
7038 frame->stack_realign_allocate = space_needed;
7039
7040 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7041 before this point are not directly comparable with values below
7042 this point. Use sp_valid_at to determine if the stack pointer is
7043 valid for a given offset, fp_valid_at for the frame pointer, or
7044 choose_baseaddr to have a base register chosen for you.
7045
7046 Note that the result of (frame->stack_realign_offset
7047 & (stack_alignment_needed - 1)) may not equal zero. */
7048 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7049 frame->stack_realign_offset = offset - space_needed;
7050 frame->sse_reg_save_offset = frame->stack_realign_offset
7051 + sse_reg_space_needed;
7052 }
7053 else
7054 {
7055 frame->stack_realign_offset = offset;
7056
7057 if (TARGET_64BIT && m->call_ms2sysv)
7058 {
7059 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7060 offset += xlogue_layout::get_instance ().get_stack_space_used ();
7061 }
7062
7063 /* Align and set SSE register save area. */
7064 else if (frame->nsseregs)
7065 {
7066 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7067 required and the DRAP re-alignment boundary is at least 16 bytes,
7068 then we want the SSE register save area properly aligned. */
7069 if (ix86_incoming_stack_boundary >= 128
7070 || (stack_realign_drap && stack_alignment_needed >= 16))
7071 offset = ROUND_UP (offset, 16);
7072 offset += frame->nsseregs * 16;
7073 }
7074 frame->sse_reg_save_offset = offset;
7075 offset += frame->va_arg_size;
7076 }
7077
7078 /* Align start of frame for local function. When a function call
7079 is removed, it may become a leaf function. But if argument may
7080 be passed on stack, we need to align the stack when there is no
7081 tail call. */
7082 if (m->call_ms2sysv
7083 || frame->va_arg_size != 0
7084 || size != 0
7085 || !crtl->is_leaf
7086 || (!crtl->tail_call_emit
7087 && cfun->machine->outgoing_args_on_stack)
7088 || cfun->calls_alloca
7089 || ix86_current_function_calls_tls_descriptor)
7090 offset = ROUND_UP (offset, stack_alignment_needed);
7091
7092 /* Frame pointer points here. */
7093 frame->frame_pointer_offset = offset;
7094
7095 offset += size;
7096
7097 /* Add outgoing arguments area. Can be skipped if we eliminated
7098 all the function calls as dead code.
7099 Skipping is however impossible when function calls alloca. Alloca
7100 expander assumes that last crtl->outgoing_args_size
7101 of stack frame are unused. */
7102 if (ACCUMULATE_OUTGOING_ARGS
7103 && (!crtl->is_leaf || cfun->calls_alloca
7104 || ix86_current_function_calls_tls_descriptor))
7105 {
7106 offset += crtl->outgoing_args_size;
7107 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7108 }
7109 else
7110 frame->outgoing_arguments_size = 0;
7111
7112 /* Align stack boundary. Only needed if we're calling another function
7113 or using alloca. */
7114 if (!crtl->is_leaf || cfun->calls_alloca
7115 || ix86_current_function_calls_tls_descriptor)
7116 offset = ROUND_UP (offset, preferred_alignment);
7117
7118 /* We've reached end of stack frame. */
7119 frame->stack_pointer_offset = offset;
7120
7121 /* Size prologue needs to allocate. */
7122 to_allocate = offset - frame->sse_reg_save_offset;
7123
7124 if ((!to_allocate && frame->nregs <= 1)
7125 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7126 /* If static stack checking is enabled and done with probes,
7127 the registers need to be saved before allocating the frame. */
7128 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7129 /* If stack clash probing needs a loop, then it needs a
7130 scratch register. But the returned register is only guaranteed
7131 to be safe to use after register saves are complete. So if
7132 stack clash protections are enabled and the allocated frame is
7133 larger than the probe interval, then use pushes to save
7134 callee saved registers. */
7135 || (flag_stack_clash_protection
7136 && !ix86_target_stack_probe ()
7137 && to_allocate > get_probe_interval ()))
7138 frame->save_regs_using_mov = false;
7139
7140 if (ix86_using_red_zone ()
7141 && crtl->sp_is_unchanging
7142 && crtl->is_leaf
7143 && !ix86_pc_thunk_call_expanded
7144 && !ix86_current_function_calls_tls_descriptor)
7145 {
7146 frame->red_zone_size = to_allocate;
7147 if (frame->save_regs_using_mov)
7148 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7149 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7150 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7151 }
7152 else
7153 frame->red_zone_size = 0;
7154 frame->stack_pointer_offset -= frame->red_zone_size;
7155
7156 /* The SEH frame pointer location is near the bottom of the frame.
7157 This is enforced by the fact that the difference between the
7158 stack pointer and the frame pointer is limited to 240 bytes in
7159 the unwind data structure. */
7160 if (TARGET_SEH)
7161 {
7162 /* Force the frame pointer to point at or below the lowest register save
7163 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7164 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7165
7166 /* If we can leave the frame pointer where it is, do so; however return
7167 the establisher frame for __builtin_frame_address (0) or else if the
7168 frame overflows the SEH maximum frame size.
7169
7170 Note that the value returned by __builtin_frame_address (0) is quite
7171 constrained, because setjmp is piggybacked on the SEH machinery with
7172 recent versions of MinGW:
7173
7174 # elif defined(__SEH__)
7175 # if defined(__aarch64__) || defined(_ARM64_)
7176 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7177 # elif (__MINGW_GCC_VERSION < 40702)
7178 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7179 # else
7180 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7181 # endif
7182
7183 and the second argument passed to _setjmp, if not null, is forwarded
7184 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7185 built an ExceptionRecord on the fly describing the setjmp buffer). */
7186 const HOST_WIDE_INT diff
7187 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7188 if (diff <= 255 && !crtl->accesses_prior_frames)
7189 {
7190 /* The resulting diff will be a multiple of 16 lower than 255,
7191 i.e. at most 240 as required by the unwind data structure. */
7192 frame->hard_frame_pointer_offset += (diff & 15);
7193 }
7194 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7195 {
7196 /* Ideally we'd determine what portion of the local stack frame
7197 (within the constraint of the lowest 240) is most heavily used.
7198 But without that complication, simply bias the frame pointer
7199 by 128 bytes so as to maximize the amount of the local stack
7200 frame that is addressable with 8-bit offsets. */
7201 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7202 }
7203 else
7204 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7205 }
7206}
7207
7208/* This is semi-inlined memory_address_length, but simplified
7209 since we know that we're always dealing with reg+offset, and
7210 to avoid having to create and discard all that rtl. */
7211
7212static inline int
7213choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7214{
7215 int len = 4;
7216
7217 if (offset == 0)
7218 {
7219 /* EBP and R13 cannot be encoded without an offset. */
7220 len = (regno == BP_REG || regno == R13_REG);
7221 }
7222 else if (IN_RANGE (offset, -128, 127))
7223 len = 1;
7224
7225 /* ESP and R12 must be encoded with a SIB byte. */
7226 if (regno == SP_REG || regno == R12_REG)
7227 len++;
7228
7229 return len;
7230}
7231
7232/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7233 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7234
7235static bool
7236sp_valid_at (HOST_WIDE_INT cfa_offset)
7237{
7238 const struct machine_frame_state &fs = cfun->machine->fs;
7239 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7240 {
7241 /* Validate that the cfa_offset isn't in a "no-man's land". */
7242 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7243 return false;
7244 }
7245 return fs.sp_valid;
7246}
7247
7248/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7249 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7250
7251static inline bool
7252fp_valid_at (HOST_WIDE_INT cfa_offset)
7253{
7254 const struct machine_frame_state &fs = cfun->machine->fs;
7255 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7256 {
7257 /* Validate that the cfa_offset isn't in a "no-man's land". */
7258 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7259 return false;
7260 }
7261 return fs.fp_valid;
7262}
7263
7264/* Choose a base register based upon alignment requested, speed and/or
7265 size. */
7266
7267static void
7268choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7269 HOST_WIDE_INT &base_offset,
7270 unsigned int align_reqested, unsigned int *align)
7271{
7272 const struct machine_function *m = cfun->machine;
7273 unsigned int hfp_align;
7274 unsigned int drap_align;
7275 unsigned int sp_align;
7276 bool hfp_ok = fp_valid_at (cfa_offset);
7277 bool drap_ok = m->fs.drap_valid;
7278 bool sp_ok = sp_valid_at (cfa_offset);
7279
7280 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7281
7282 /* Filter out any registers that don't meet the requested alignment
7283 criteria. */
7284 if (align_reqested)
7285 {
7286 if (m->fs.realigned)
7287 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7288 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7289 notes (which we would need to use a realigned stack pointer),
7290 so disable on SEH targets. */
7291 else if (m->fs.sp_realigned)
7292 sp_align = crtl->stack_alignment_needed;
7293
7294 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7295 drap_ok = drap_ok && drap_align >= align_reqested;
7296 sp_ok = sp_ok && sp_align >= align_reqested;
7297 }
7298
7299 if (m->use_fast_prologue_epilogue)
7300 {
7301 /* Choose the base register most likely to allow the most scheduling
7302 opportunities. Generally FP is valid throughout the function,
7303 while DRAP must be reloaded within the epilogue. But choose either
7304 over the SP due to increased encoding size. */
7305
7306 if (hfp_ok)
7307 {
7308 base_reg = hard_frame_pointer_rtx;
7309 base_offset = m->fs.fp_offset - cfa_offset;
7310 }
7311 else if (drap_ok)
7312 {
7313 base_reg = crtl->drap_reg;
7314 base_offset = 0 - cfa_offset;
7315 }
7316 else if (sp_ok)
7317 {
7318 base_reg = stack_pointer_rtx;
7319 base_offset = m->fs.sp_offset - cfa_offset;
7320 }
7321 }
7322 else
7323 {
7324 HOST_WIDE_INT toffset;
7325 int len = 16, tlen;
7326
7327 /* Choose the base register with the smallest address encoding.
7328 With a tie, choose FP > DRAP > SP. */
7329 if (sp_ok)
7330 {
7331 base_reg = stack_pointer_rtx;
7332 base_offset = m->fs.sp_offset - cfa_offset;
7333 len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset);
7334 }
7335 if (drap_ok)
7336 {
7337 toffset = 0 - cfa_offset;
7338 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset);
7339 if (tlen <= len)
7340 {
7341 base_reg = crtl->drap_reg;
7342 base_offset = toffset;
7343 len = tlen;
7344 }
7345 }
7346 if (hfp_ok)
7347 {
7348 toffset = m->fs.fp_offset - cfa_offset;
7349 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset);
7350 if (tlen <= len)
7351 {
7352 base_reg = hard_frame_pointer_rtx;
7353 base_offset = toffset;
7354 }
7355 }
7356 }
7357
7358 /* Set the align return value. */
7359 if (align)
7360 {
7361 if (base_reg == stack_pointer_rtx)
7362 *align = sp_align;
7363 else if (base_reg == crtl->drap_reg)
7364 *align = drap_align;
7365 else if (base_reg == hard_frame_pointer_rtx)
7366 *align = hfp_align;
7367 }
7368}
7369
7370/* Return an RTX that points to CFA_OFFSET within the stack frame and
7371 the alignment of address. If ALIGN is non-null, it should point to
7372 an alignment value (in bits) that is preferred or zero and will
7373 recieve the alignment of the base register that was selected,
7374 irrespective of rather or not CFA_OFFSET is a multiple of that
7375 alignment value. If it is possible for the base register offset to be
7376 non-immediate then SCRATCH_REGNO should specify a scratch register to
7377 use.
7378
7379 The valid base registers are taken from CFUN->MACHINE->FS. */
7380
7381static rtx
7382choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7383 unsigned int scratch_regno = INVALID_REGNUM)
7384{
7385 rtx base_reg = NULL;
7386 HOST_WIDE_INT base_offset = 0;
7387
7388 /* If a specific alignment is requested, try to get a base register
7389 with that alignment first. */
7390 if (align && *align)
7391 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align);
7392
7393 if (!base_reg)
7394 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align);
7395
7396 gcc_assert (base_reg != NULL);
7397
7398 rtx base_offset_rtx = GEN_INT (base_offset);
7399
7400 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7401 {
7402 gcc_assert (scratch_regno != INVALID_REGNUM);
7403
7404 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7405 emit_move_insn (scratch_reg, base_offset_rtx);
7406
7407 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7408 }
7409
7410 return plus_constant (Pmode, base_reg, base_offset);
7411}
7412
7413/* Emit code to save registers in the prologue. */
7414
7415static void
7416ix86_emit_save_regs (void)
7417{
7418 int regno;
7419 rtx_insn *insn;
7420
7421 if (!TARGET_APX_PUSH2POP2
7422 || !ix86_can_use_push2pop2 ()
7423 || cfun->machine->func_type != TYPE_NORMAL)
7424 {
7425 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7426 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7427 {
7428 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7429 TARGET_APX_PPX));
7430 RTX_FRAME_RELATED_P (insn) = 1;
7431 }
7432 }
7433 else
7434 {
7435 int regno_list[2];
7436 regno_list[0] = regno_list[1] = -1;
7437 int loaded_regnum = 0;
7438 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7439
7440 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7441 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7442 {
7443 if (aligned)
7444 {
7445 regno_list[loaded_regnum++] = regno;
7446 if (loaded_regnum == 2)
7447 {
7448 gcc_assert (regno_list[0] != -1
7449 && regno_list[1] != -1
7450 && regno_list[0] != regno_list[1]);
7451 const int offset = UNITS_PER_WORD * 2;
7452 rtx mem = gen_rtx_MEM (TImode,
7453 gen_rtx_PRE_DEC (Pmode,
7454 stack_pointer_rtx));
7455 insn = emit_insn (gen_push2 (mem,
7456 reg1: gen_rtx_REG (word_mode,
7457 regno_list[0]),
7458 reg2: gen_rtx_REG (word_mode,
7459 regno_list[1]),
7460 TARGET_APX_PPX));
7461 RTX_FRAME_RELATED_P (insn) = 1;
7462 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7463
7464 for (int i = 0; i < 2; i++)
7465 {
7466 rtx dwarf_reg = gen_rtx_REG (word_mode,
7467 regno_list[i]);
7468 rtx sp_offset = plus_constant (Pmode,
7469 stack_pointer_rtx,
7470 + UNITS_PER_WORD
7471 * (1 - i));
7472 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7473 sp_offset),
7474 dwarf_reg);
7475 RTX_FRAME_RELATED_P (tmp) = 1;
7476 XVECEXP (dwarf, 0, i + 1) = tmp;
7477 }
7478 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7479 plus_constant (Pmode,
7480 stack_pointer_rtx,
7481 -offset));
7482 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7483 XVECEXP (dwarf, 0, 0) = sp_tmp;
7484 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7485
7486 loaded_regnum = 0;
7487 regno_list[0] = regno_list[1] = -1;
7488 }
7489 }
7490 else
7491 {
7492 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7493 TARGET_APX_PPX));
7494 RTX_FRAME_RELATED_P (insn) = 1;
7495 aligned = true;
7496 }
7497 }
7498 if (loaded_regnum == 1)
7499 {
7500 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode,
7501 regno_list[0]),
7502 TARGET_APX_PPX));
7503 RTX_FRAME_RELATED_P (insn) = 1;
7504 }
7505 }
7506}
7507
7508/* Emit a single register save at CFA - CFA_OFFSET. */
7509
7510static void
7511ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7512 HOST_WIDE_INT cfa_offset)
7513{
7514 struct machine_function *m = cfun->machine;
7515 rtx reg = gen_rtx_REG (mode, regno);
7516 rtx mem, addr, base, insn;
7517 unsigned int align = GET_MODE_ALIGNMENT (mode);
7518
7519 addr = choose_baseaddr (cfa_offset, align: &align);
7520 mem = gen_frame_mem (mode, addr);
7521
7522 /* The location aligment depends upon the base register. */
7523 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7524 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7525 set_mem_align (mem, align);
7526
7527 insn = emit_insn (gen_rtx_SET (mem, reg));
7528 RTX_FRAME_RELATED_P (insn) = 1;
7529
7530 base = addr;
7531 if (GET_CODE (base) == PLUS)
7532 base = XEXP (base, 0);
7533 gcc_checking_assert (REG_P (base));
7534
7535 /* When saving registers into a re-aligned local stack frame, avoid
7536 any tricky guessing by dwarf2out. */
7537 if (m->fs.realigned)
7538 {
7539 gcc_checking_assert (stack_realign_drap);
7540
7541 if (regno == REGNO (crtl->drap_reg))
7542 {
7543 /* A bit of a hack. We force the DRAP register to be saved in
7544 the re-aligned stack frame, which provides us with a copy
7545 of the CFA that will last past the prologue. Install it. */
7546 gcc_checking_assert (cfun->machine->fs.fp_valid);
7547 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7548 cfun->machine->fs.fp_offset - cfa_offset);
7549 mem = gen_rtx_MEM (mode, addr);
7550 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7551 }
7552 else
7553 {
7554 /* The frame pointer is a stable reference within the
7555 aligned frame. Use it. */
7556 gcc_checking_assert (cfun->machine->fs.fp_valid);
7557 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7558 cfun->machine->fs.fp_offset - cfa_offset);
7559 mem = gen_rtx_MEM (mode, addr);
7560 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7561 }
7562 }
7563
7564 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7565 && cfa_offset >= m->fs.sp_realigned_offset)
7566 {
7567 gcc_checking_assert (stack_realign_fp);
7568 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7569 }
7570
7571 /* The memory may not be relative to the current CFA register,
7572 which means that we may need to generate a new pattern for
7573 use by the unwind info. */
7574 else if (base != m->fs.cfa_reg)
7575 {
7576 addr = plus_constant (Pmode, m->fs.cfa_reg,
7577 m->fs.cfa_offset - cfa_offset);
7578 mem = gen_rtx_MEM (mode, addr);
7579 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7580 }
7581}
7582
7583/* Emit code to save registers using MOV insns.
7584 First register is stored at CFA - CFA_OFFSET. */
7585static void
7586ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7587{
7588 unsigned int regno;
7589
7590 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7591 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7592 {
7593 ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset);
7594 cfa_offset -= UNITS_PER_WORD;
7595 }
7596}
7597
7598/* Emit code to save SSE registers using MOV insns.
7599 First register is stored at CFA - CFA_OFFSET. */
7600static void
7601ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7602{
7603 unsigned int regno;
7604
7605 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7606 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7607 {
7608 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7609 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7610 }
7611}
7612
7613static GTY(()) rtx queued_cfa_restores;
7614
7615/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7616 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7617 Don't add the note if the previously saved value will be left untouched
7618 within stack red-zone till return, as unwinders can find the same value
7619 in the register and on the stack. */
7620
7621static void
7622ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7623{
7624 if (!crtl->shrink_wrapped
7625 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7626 return;
7627
7628 if (insn)
7629 {
7630 add_reg_note (insn, REG_CFA_RESTORE, reg);
7631 RTX_FRAME_RELATED_P (insn) = 1;
7632 }
7633 else
7634 queued_cfa_restores
7635 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7636}
7637
7638/* Add queued REG_CFA_RESTORE notes if any to INSN. */
7639
7640static void
7641ix86_add_queued_cfa_restore_notes (rtx insn)
7642{
7643 rtx last;
7644 if (!queued_cfa_restores)
7645 return;
7646 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7647 ;
7648 XEXP (last, 1) = REG_NOTES (insn);
7649 REG_NOTES (insn) = queued_cfa_restores;
7650 queued_cfa_restores = NULL_RTX;
7651 RTX_FRAME_RELATED_P (insn) = 1;
7652}
7653
7654/* Expand prologue or epilogue stack adjustment.
7655 The pattern exist to put a dependency on all ebp-based memory accesses.
7656 STYLE should be negative if instructions should be marked as frame related,
7657 zero if %r11 register is live and cannot be freely used and positive
7658 otherwise. */
7659
7660static rtx
7661pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7662 int style, bool set_cfa)
7663{
7664 struct machine_function *m = cfun->machine;
7665 rtx addend = offset;
7666 rtx insn;
7667 bool add_frame_related_expr = false;
7668
7669 if (!x86_64_immediate_operand (offset, Pmode))
7670 {
7671 /* r11 is used by indirect sibcall return as well, set before the
7672 epilogue and used after the epilogue. */
7673 if (style)
7674 addend = gen_rtx_REG (Pmode, R11_REG);
7675 else
7676 {
7677 gcc_assert (src != hard_frame_pointer_rtx
7678 && dest != hard_frame_pointer_rtx);
7679 addend = hard_frame_pointer_rtx;
7680 }
7681 emit_insn (gen_rtx_SET (addend, offset));
7682 if (style < 0)
7683 add_frame_related_expr = true;
7684 }
7685
7686 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7687 (Pmode, x0: dest, x1: src, x2: addend));
7688 if (style >= 0)
7689 ix86_add_queued_cfa_restore_notes (insn);
7690
7691 if (set_cfa)
7692 {
7693 rtx r;
7694
7695 gcc_assert (m->fs.cfa_reg == src);
7696 m->fs.cfa_offset += INTVAL (offset);
7697 m->fs.cfa_reg = dest;
7698
7699 r = gen_rtx_PLUS (Pmode, src, offset);
7700 r = gen_rtx_SET (dest, r);
7701 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7702 RTX_FRAME_RELATED_P (insn) = 1;
7703 }
7704 else if (style < 0)
7705 {
7706 RTX_FRAME_RELATED_P (insn) = 1;
7707 if (add_frame_related_expr)
7708 {
7709 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7710 r = gen_rtx_SET (dest, r);
7711 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7712 }
7713 }
7714
7715 if (dest == stack_pointer_rtx)
7716 {
7717 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7718 bool valid = m->fs.sp_valid;
7719 bool realigned = m->fs.sp_realigned;
7720
7721 if (src == hard_frame_pointer_rtx)
7722 {
7723 valid = m->fs.fp_valid;
7724 realigned = false;
7725 ooffset = m->fs.fp_offset;
7726 }
7727 else if (src == crtl->drap_reg)
7728 {
7729 valid = m->fs.drap_valid;
7730 realigned = false;
7731 ooffset = 0;
7732 }
7733 else
7734 {
7735 /* Else there are two possibilities: SP itself, which we set
7736 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7737 taken care of this by hand along the eh_return path. */
7738 gcc_checking_assert (src == stack_pointer_rtx
7739 || offset == const0_rtx);
7740 }
7741
7742 m->fs.sp_offset = ooffset - INTVAL (offset);
7743 m->fs.sp_valid = valid;
7744 m->fs.sp_realigned = realigned;
7745 }
7746 return insn;
7747}
7748
7749/* Find an available register to be used as dynamic realign argument
7750 pointer regsiter. Such a register will be written in prologue and
7751 used in begin of body, so it must not be
7752 1. parameter passing register.
7753 2. GOT pointer.
7754 We reuse static-chain register if it is available. Otherwise, we
7755 use DI for i386 and R13 for x86-64. We chose R13 since it has
7756 shorter encoding.
7757
7758 Return: the regno of chosen register. */
7759
7760static unsigned int
7761find_drap_reg (void)
7762{
7763 tree decl = cfun->decl;
7764
7765 /* Always use callee-saved register if there are no caller-saved
7766 registers. */
7767 if (TARGET_64BIT)
7768 {
7769 /* Use R13 for nested function or function need static chain.
7770 Since function with tail call may use any caller-saved
7771 registers in epilogue, DRAP must not use caller-saved
7772 register in such case. */
7773 if (DECL_STATIC_CHAIN (decl)
7774 || (cfun->machine->call_saved_registers
7775 == TYPE_NO_CALLER_SAVED_REGISTERS)
7776 || crtl->tail_call_emit)
7777 return R13_REG;
7778
7779 return R10_REG;
7780 }
7781 else
7782 {
7783 /* Use DI for nested function or function need static chain.
7784 Since function with tail call may use any caller-saved
7785 registers in epilogue, DRAP must not use caller-saved
7786 register in such case. */
7787 if (DECL_STATIC_CHAIN (decl)
7788 || (cfun->machine->call_saved_registers
7789 == TYPE_NO_CALLER_SAVED_REGISTERS)
7790 || crtl->tail_call_emit
7791 || crtl->calls_eh_return)
7792 return DI_REG;
7793
7794 /* Reuse static chain register if it isn't used for parameter
7795 passing. */
7796 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7797 {
7798 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7799 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7800 return CX_REG;
7801 }
7802 return DI_REG;
7803 }
7804}
7805
7806/* Return minimum incoming stack alignment. */
7807
7808static unsigned int
7809ix86_minimum_incoming_stack_boundary (bool sibcall)
7810{
7811 unsigned int incoming_stack_boundary;
7812
7813 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7814 if (cfun->machine->func_type != TYPE_NORMAL)
7815 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7816 /* Prefer the one specified at command line. */
7817 else if (ix86_user_incoming_stack_boundary)
7818 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7819 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7820 if -mstackrealign is used, it isn't used for sibcall check and
7821 estimated stack alignment is 128bit. */
7822 else if (!sibcall
7823 && ix86_force_align_arg_pointer
7824 && crtl->stack_alignment_estimated == 128)
7825 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7826 else
7827 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7828
7829 /* Incoming stack alignment can be changed on individual functions
7830 via force_align_arg_pointer attribute. We use the smallest
7831 incoming stack boundary. */
7832 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7833 && lookup_attribute (attr_name: "force_align_arg_pointer",
7834 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7835 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7836
7837 /* The incoming stack frame has to be aligned at least at
7838 parm_stack_boundary. */
7839 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7840 incoming_stack_boundary = crtl->parm_stack_boundary;
7841
7842 /* Stack at entrance of main is aligned by runtime. We use the
7843 smallest incoming stack boundary. */
7844 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7845 && DECL_NAME (current_function_decl)
7846 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7847 && DECL_FILE_SCOPE_P (current_function_decl))
7848 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7849
7850 return incoming_stack_boundary;
7851}
7852
7853/* Update incoming stack boundary and estimated stack alignment. */
7854
7855static void
7856ix86_update_stack_boundary (void)
7857{
7858 ix86_incoming_stack_boundary
7859 = ix86_minimum_incoming_stack_boundary (sibcall: false);
7860
7861 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7862 if (TARGET_64BIT
7863 && cfun->stdarg
7864 && crtl->stack_alignment_estimated < 128)
7865 crtl->stack_alignment_estimated = 128;
7866
7867 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7868 if (ix86_tls_descriptor_calls_expanded_in_cfun
7869 && crtl->preferred_stack_boundary < 128)
7870 crtl->preferred_stack_boundary = 128;
7871}
7872
7873/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7874 needed or an rtx for DRAP otherwise. */
7875
7876static rtx
7877ix86_get_drap_rtx (void)
7878{
7879 /* We must use DRAP if there are outgoing arguments on stack or
7880 the stack pointer register is clobbered by asm statment and
7881 ACCUMULATE_OUTGOING_ARGS is false. */
7882 if (ix86_force_drap
7883 || ((cfun->machine->outgoing_args_on_stack
7884 || crtl->sp_is_clobbered_by_asm)
7885 && !ACCUMULATE_OUTGOING_ARGS))
7886 crtl->need_drap = true;
7887
7888 if (stack_realign_drap)
7889 {
7890 /* Assign DRAP to vDRAP and returns vDRAP */
7891 unsigned int regno = find_drap_reg ();
7892 rtx drap_vreg;
7893 rtx arg_ptr;
7894 rtx_insn *seq, *insn;
7895
7896 arg_ptr = gen_rtx_REG (Pmode, regno);
7897 crtl->drap_reg = arg_ptr;
7898
7899 start_sequence ();
7900 drap_vreg = copy_to_reg (arg_ptr);
7901 seq = get_insns ();
7902 end_sequence ();
7903
7904 insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ()));
7905 if (!optimize)
7906 {
7907 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7908 RTX_FRAME_RELATED_P (insn) = 1;
7909 }
7910 return drap_vreg;
7911 }
7912 else
7913 return NULL;
7914}
7915
7916/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7917
7918static rtx
7919ix86_internal_arg_pointer (void)
7920{
7921 return virtual_incoming_args_rtx;
7922}
7923
7924struct scratch_reg {
7925 rtx reg;
7926 bool saved;
7927};
7928
7929/* Return a short-lived scratch register for use on function entry.
7930 In 32-bit mode, it is valid only after the registers are saved
7931 in the prologue. This register must be released by means of
7932 release_scratch_register_on_entry once it is dead. */
7933
7934static void
7935get_scratch_register_on_entry (struct scratch_reg *sr)
7936{
7937 int regno;
7938
7939 sr->saved = false;
7940
7941 if (TARGET_64BIT)
7942 {
7943 /* We always use R11 in 64-bit mode. */
7944 regno = R11_REG;
7945 }
7946 else
7947 {
7948 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7949 bool fastcall_p
7950 = lookup_attribute (attr_name: "fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7951 bool thiscall_p
7952 = lookup_attribute (attr_name: "thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7953 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7954 int regparm = ix86_function_regparm (type: fntype, decl);
7955 int drap_regno
7956 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7957
7958 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7959 for the static chain register. */
7960 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7961 && drap_regno != AX_REG)
7962 regno = AX_REG;
7963 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7964 for the static chain register. */
7965 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7966 regno = AX_REG;
7967 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7968 regno = DX_REG;
7969 /* ecx is the static chain register. */
7970 else if (regparm < 3 && !fastcall_p && !thiscall_p
7971 && !static_chain_p
7972 && drap_regno != CX_REG)
7973 regno = CX_REG;
7974 else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false))
7975 regno = BX_REG;
7976 /* esi is the static chain register. */
7977 else if (!(regparm == 3 && static_chain_p)
7978 && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false))
7979 regno = SI_REG;
7980 else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false))
7981 regno = DI_REG;
7982 else
7983 {
7984 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7985 sr->saved = true;
7986 }
7987 }
7988
7989 sr->reg = gen_rtx_REG (Pmode, regno);
7990 if (sr->saved)
7991 {
7992 rtx_insn *insn = emit_insn (gen_push (arg: sr->reg));
7993 RTX_FRAME_RELATED_P (insn) = 1;
7994 }
7995}
7996
7997/* Release a scratch register obtained from the preceding function.
7998
7999 If RELEASE_VIA_POP is true, we just pop the register off the stack
8000 to release it. This is what non-Linux systems use with -fstack-check.
8001
8002 Otherwise we use OFFSET to locate the saved register and the
8003 allocated stack space becomes part of the local frame and is
8004 deallocated by the epilogue. */
8005
8006static void
8007release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8008 bool release_via_pop)
8009{
8010 if (sr->saved)
8011 {
8012 if (release_via_pop)
8013 {
8014 struct machine_function *m = cfun->machine;
8015 rtx x, insn = emit_insn (gen_pop (arg: sr->reg));
8016
8017 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8018 RTX_FRAME_RELATED_P (insn) = 1;
8019 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8020 x = gen_rtx_SET (stack_pointer_rtx, x);
8021 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8022 m->fs.sp_offset -= UNITS_PER_WORD;
8023 }
8024 else
8025 {
8026 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8027 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8028 emit_insn (x);
8029 }
8030 }
8031}
8032
8033/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8034
8035 If INT_REGISTERS_SAVED is true, then integer registers have already been
8036 pushed on the stack.
8037
8038 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8039 beyond SIZE bytes.
8040
8041 This assumes no knowledge of the current probing state, i.e. it is never
8042 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8043 a suitable probe. */
8044
8045static void
8046ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8047 const bool int_registers_saved,
8048 const bool protection_area)
8049{
8050 struct machine_function *m = cfun->machine;
8051
8052 /* If this function does not statically allocate stack space, then
8053 no probes are needed. */
8054 if (!size)
8055 {
8056 /* However, the allocation of space via pushes for register
8057 saves could be viewed as allocating space, but without the
8058 need to probe. */
8059 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8060 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8061 else
8062 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8063 return;
8064 }
8065
8066 /* If we are a noreturn function, then we have to consider the
8067 possibility that we're called via a jump rather than a call.
8068
8069 Thus we don't have the implicit probe generated by saving the
8070 return address into the stack at the call. Thus, the stack
8071 pointer could be anywhere in the guard page. The safe thing
8072 to do is emit a probe now.
8073
8074 The probe can be avoided if we have already emitted any callee
8075 register saves into the stack or have a frame pointer (which will
8076 have been saved as well). Those saves will function as implicit
8077 probes.
8078
8079 ?!? This should be revamped to work like aarch64 and s390 where
8080 we track the offset from the most recent probe. Normally that
8081 offset would be zero. For a noreturn function we would reset
8082 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8083 we just probe when we cross PROBE_INTERVAL. */
8084 if (TREE_THIS_VOLATILE (cfun->decl)
8085 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8086 {
8087 /* We can safely use any register here since we're just going to push
8088 its value and immediately pop it back. But we do try and avoid
8089 argument passing registers so as not to introduce dependencies in
8090 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8091 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8092 rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg));
8093 rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg));
8094 m->fs.sp_offset -= UNITS_PER_WORD;
8095 if (m->fs.cfa_reg == stack_pointer_rtx)
8096 {
8097 m->fs.cfa_offset -= UNITS_PER_WORD;
8098 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8099 x = gen_rtx_SET (stack_pointer_rtx, x);
8100 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8101 RTX_FRAME_RELATED_P (insn_push) = 1;
8102 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8103 x = gen_rtx_SET (stack_pointer_rtx, x);
8104 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8105 RTX_FRAME_RELATED_P (insn_pop) = 1;
8106 }
8107 emit_insn (gen_blockage ());
8108 }
8109
8110 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8111 const int dope = 4 * UNITS_PER_WORD;
8112
8113 /* If there is protection area, take it into account in the size. */
8114 if (protection_area)
8115 size += probe_interval + dope;
8116
8117 /* If we allocate less than the size of the guard statically,
8118 then no probing is necessary, but we do need to allocate
8119 the stack. */
8120 else if (size < (1 << param_stack_clash_protection_guard_size))
8121 {
8122 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8123 GEN_INT (-size), style: -1,
8124 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8125 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8126 return;
8127 }
8128
8129 /* We're allocating a large enough stack frame that we need to
8130 emit probes. Either emit them inline or in a loop depending
8131 on the size. */
8132 if (size <= 4 * probe_interval)
8133 {
8134 HOST_WIDE_INT i;
8135 for (i = probe_interval; i <= size; i += probe_interval)
8136 {
8137 /* Allocate PROBE_INTERVAL bytes. */
8138 rtx insn
8139 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8140 GEN_INT (-probe_interval), style: -1,
8141 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8142 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8143
8144 /* And probe at *sp. */
8145 emit_stack_probe (stack_pointer_rtx);
8146 emit_insn (gen_blockage ());
8147 }
8148
8149 /* We need to allocate space for the residual, but we do not need
8150 to probe the residual... */
8151 HOST_WIDE_INT residual = (i - probe_interval - size);
8152 if (residual)
8153 {
8154 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8155 GEN_INT (residual), style: -1,
8156 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8157
8158 /* ...except if there is a protection area to maintain. */
8159 if (protection_area)
8160 emit_stack_probe (stack_pointer_rtx);
8161 }
8162
8163 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8164 }
8165 else
8166 {
8167 /* We expect the GP registers to be saved when probes are used
8168 as the probing sequences might need a scratch register and
8169 the routine to allocate one assumes the integer registers
8170 have already been saved. */
8171 gcc_assert (int_registers_saved);
8172
8173 struct scratch_reg sr;
8174 get_scratch_register_on_entry (sr: &sr);
8175
8176 /* If we needed to save a register, then account for any space
8177 that was pushed (we are not going to pop the register when
8178 we do the restore). */
8179 if (sr.saved)
8180 size -= UNITS_PER_WORD;
8181
8182 /* Step 1: round SIZE down to a multiple of the interval. */
8183 HOST_WIDE_INT rounded_size = size & -probe_interval;
8184
8185 /* Step 2: compute final value of the loop counter. Use lea if
8186 possible. */
8187 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8188 rtx insn;
8189 if (address_no_seg_operand (addr, Pmode))
8190 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8191 else
8192 {
8193 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8194 insn = emit_insn (gen_rtx_SET (sr.reg,
8195 gen_rtx_PLUS (Pmode, sr.reg,
8196 stack_pointer_rtx)));
8197 }
8198 if (m->fs.cfa_reg == stack_pointer_rtx)
8199 {
8200 add_reg_note (insn, REG_CFA_DEF_CFA,
8201 plus_constant (Pmode, sr.reg,
8202 m->fs.cfa_offset + rounded_size));
8203 RTX_FRAME_RELATED_P (insn) = 1;
8204 }
8205
8206 /* Step 3: the loop. */
8207 rtx size_rtx = GEN_INT (rounded_size);
8208 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg,
8209 x2: size_rtx));
8210 if (m->fs.cfa_reg == stack_pointer_rtx)
8211 {
8212 m->fs.cfa_offset += rounded_size;
8213 add_reg_note (insn, REG_CFA_DEF_CFA,
8214 plus_constant (Pmode, stack_pointer_rtx,
8215 m->fs.cfa_offset));
8216 RTX_FRAME_RELATED_P (insn) = 1;
8217 }
8218 m->fs.sp_offset += rounded_size;
8219 emit_insn (gen_blockage ());
8220
8221 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8222 is equal to ROUNDED_SIZE. */
8223
8224 if (size != rounded_size)
8225 {
8226 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8227 GEN_INT (rounded_size - size), style: -1,
8228 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8229
8230 if (protection_area)
8231 emit_stack_probe (stack_pointer_rtx);
8232 }
8233
8234 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8235
8236 /* This does not deallocate the space reserved for the scratch
8237 register. That will be deallocated in the epilogue. */
8238 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false);
8239 }
8240
8241 /* Adjust back to account for the protection area. */
8242 if (protection_area)
8243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8244 GEN_INT (probe_interval + dope), style: -1,
8245 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8246
8247 /* Make sure nothing is scheduled before we are done. */
8248 emit_insn (gen_blockage ());
8249}
8250
8251/* Adjust the stack pointer up to REG while probing it. */
8252
8253const char *
8254output_adjust_stack_and_probe (rtx reg)
8255{
8256 static int labelno = 0;
8257 char loop_lab[32];
8258 rtx xops[2];
8259
8260 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8261
8262 /* Loop. */
8263 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8264
8265 /* SP = SP + PROBE_INTERVAL. */
8266 xops[0] = stack_pointer_rtx;
8267 xops[1] = GEN_INT (get_probe_interval ());
8268 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8269
8270 /* Probe at SP. */
8271 xops[1] = const0_rtx;
8272 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8273
8274 /* Test if SP == LAST_ADDR. */
8275 xops[0] = stack_pointer_rtx;
8276 xops[1] = reg;
8277 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8278
8279 /* Branch. */
8280 fputs (s: "\tjne\t", stream: asm_out_file);
8281 assemble_name_raw (asm_out_file, loop_lab);
8282 fputc (c: '\n', stream: asm_out_file);
8283
8284 return "";
8285}
8286
8287/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8288 inclusive. These are offsets from the current stack pointer.
8289
8290 INT_REGISTERS_SAVED is true if integer registers have already been
8291 pushed on the stack. */
8292
8293static void
8294ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8295 const bool int_registers_saved)
8296{
8297 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8298
8299 /* See if we have a constant small number of probes to generate. If so,
8300 that's the easy case. The run-time loop is made up of 6 insns in the
8301 generic case while the compile-time loop is made up of n insns for n #
8302 of intervals. */
8303 if (size <= 6 * probe_interval)
8304 {
8305 HOST_WIDE_INT i;
8306
8307 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8308 it exceeds SIZE. If only one probe is needed, this will not
8309 generate any code. Then probe at FIRST + SIZE. */
8310 for (i = probe_interval; i < size; i += probe_interval)
8311 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8312 -(first + i)));
8313
8314 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8315 -(first + size)));
8316 }
8317
8318 /* Otherwise, do the same as above, but in a loop. Note that we must be
8319 extra careful with variables wrapping around because we might be at
8320 the very top (or the very bottom) of the address space and we have
8321 to be able to handle this case properly; in particular, we use an
8322 equality test for the loop condition. */
8323 else
8324 {
8325 /* We expect the GP registers to be saved when probes are used
8326 as the probing sequences might need a scratch register and
8327 the routine to allocate one assumes the integer registers
8328 have already been saved. */
8329 gcc_assert (int_registers_saved);
8330
8331 HOST_WIDE_INT rounded_size, last;
8332 struct scratch_reg sr;
8333
8334 get_scratch_register_on_entry (sr: &sr);
8335
8336
8337 /* Step 1: round SIZE to the previous multiple of the interval. */
8338
8339 rounded_size = ROUND_DOWN (size, probe_interval);
8340
8341
8342 /* Step 2: compute initial and final value of the loop counter. */
8343
8344 /* TEST_OFFSET = FIRST. */
8345 emit_move_insn (sr.reg, GEN_INT (-first));
8346
8347 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8348 last = first + rounded_size;
8349
8350
8351 /* Step 3: the loop
8352
8353 do
8354 {
8355 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8356 probe at TEST_ADDR
8357 }
8358 while (TEST_ADDR != LAST_ADDR)
8359
8360 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8361 until it is equal to ROUNDED_SIZE. */
8362
8363 emit_insn
8364 (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last)));
8365
8366
8367 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8368 that SIZE is equal to ROUNDED_SIZE. */
8369
8370 if (size != rounded_size)
8371 emit_stack_probe (plus_constant (Pmode,
8372 gen_rtx_PLUS (Pmode,
8373 stack_pointer_rtx,
8374 sr.reg),
8375 rounded_size - size));
8376
8377 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true);
8378 }
8379
8380 /* Make sure nothing is scheduled before we are done. */
8381 emit_insn (gen_blockage ());
8382}
8383
8384/* Probe a range of stack addresses from REG to END, inclusive. These are
8385 offsets from the current stack pointer. */
8386
8387const char *
8388output_probe_stack_range (rtx reg, rtx end)
8389{
8390 static int labelno = 0;
8391 char loop_lab[32];
8392 rtx xops[3];
8393
8394 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8395
8396 /* Loop. */
8397 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8398
8399 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8400 xops[0] = reg;
8401 xops[1] = GEN_INT (get_probe_interval ());
8402 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8403
8404 /* Probe at TEST_ADDR. */
8405 xops[0] = stack_pointer_rtx;
8406 xops[1] = reg;
8407 xops[2] = const0_rtx;
8408 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8409
8410 /* Test if TEST_ADDR == LAST_ADDR. */
8411 xops[0] = reg;
8412 xops[1] = end;
8413 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8414
8415 /* Branch. */
8416 fputs (s: "\tjne\t", stream: asm_out_file);
8417 assemble_name_raw (asm_out_file, loop_lab);
8418 fputc (c: '\n', stream: asm_out_file);
8419
8420 return "";
8421}
8422
8423/* Set stack_frame_required to false if stack frame isn't required.
8424 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8425 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8426
8427static void
8428ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8429 bool check_stack_slot)
8430{
8431 HARD_REG_SET set_up_by_prologue, prologue_used;
8432 basic_block bb;
8433
8434 CLEAR_HARD_REG_SET (set&: prologue_used);
8435 CLEAR_HARD_REG_SET (set&: set_up_by_prologue);
8436 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8437 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8438 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode,
8439 HARD_FRAME_POINTER_REGNUM);
8440
8441 /* The preferred stack alignment is the minimum stack alignment. */
8442 if (stack_alignment > crtl->preferred_stack_boundary)
8443 stack_alignment = crtl->preferred_stack_boundary;
8444
8445 bool require_stack_frame = false;
8446
8447 FOR_EACH_BB_FN (bb, cfun)
8448 {
8449 rtx_insn *insn;
8450 FOR_BB_INSNS (bb, insn)
8451 if (NONDEBUG_INSN_P (insn)
8452 && requires_stack_frame_p (insn, prologue_used,
8453 set_up_by_prologue))
8454 {
8455 require_stack_frame = true;
8456
8457 if (check_stack_slot)
8458 {
8459 /* Find the maximum stack alignment. */
8460 subrtx_iterator::array_type array;
8461 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8462 if (MEM_P (*iter)
8463 && (reg_mentioned_p (stack_pointer_rtx,
8464 *iter)
8465 || reg_mentioned_p (frame_pointer_rtx,
8466 *iter)))
8467 {
8468 unsigned int alignment = MEM_ALIGN (*iter);
8469 if (alignment > stack_alignment)
8470 stack_alignment = alignment;
8471 }
8472 }
8473 }
8474 }
8475
8476 cfun->machine->stack_frame_required = require_stack_frame;
8477}
8478
8479/* Finalize stack_realign_needed and frame_pointer_needed flags, which
8480 will guide prologue/epilogue to be generated in correct form. */
8481
8482static void
8483ix86_finalize_stack_frame_flags (void)
8484{
8485 /* Check if stack realign is really needed after reload, and
8486 stores result in cfun */
8487 unsigned int incoming_stack_boundary
8488 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8489 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8490 unsigned int stack_alignment
8491 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8492 ? crtl->max_used_stack_slot_alignment
8493 : crtl->stack_alignment_needed);
8494 unsigned int stack_realign
8495 = (incoming_stack_boundary < stack_alignment);
8496 bool recompute_frame_layout_p = false;
8497
8498 if (crtl->stack_realign_finalized)
8499 {
8500 /* After stack_realign_needed is finalized, we can't no longer
8501 change it. */
8502 gcc_assert (crtl->stack_realign_needed == stack_realign);
8503 return;
8504 }
8505
8506 /* It is always safe to compute max_used_stack_alignment. We
8507 compute it only if 128-bit aligned load/store may be generated
8508 on misaligned stack slot which will lead to segfault. */
8509 bool check_stack_slot
8510 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8511 ix86_find_max_used_stack_alignment (stack_alignment,
8512 check_stack_slot);
8513
8514 /* If the only reason for frame_pointer_needed is that we conservatively
8515 assumed stack realignment might be needed or -fno-omit-frame-pointer
8516 is used, but in the end nothing that needed the stack alignment had
8517 been spilled nor stack access, clear frame_pointer_needed and say we
8518 don't need stack realignment.
8519
8520 When vector register is used for piecewise move and store, we don't
8521 increase stack_alignment_needed as there is no register spill for
8522 piecewise move and store. Since stack_realign_needed is set to true
8523 by checking stack_alignment_estimated which is updated by pseudo
8524 vector register usage, we also need to check stack_realign_needed to
8525 eliminate frame pointer. */
8526 if ((stack_realign
8527 || (!flag_omit_frame_pointer && optimize)
8528 || crtl->stack_realign_needed)
8529 && frame_pointer_needed
8530 && crtl->is_leaf
8531 && crtl->sp_is_unchanging
8532 && !ix86_current_function_calls_tls_descriptor
8533 && !crtl->accesses_prior_frames
8534 && !cfun->calls_alloca
8535 && !crtl->calls_eh_return
8536 /* See ira_setup_eliminable_regset for the rationale. */
8537 && !(STACK_CHECK_MOVING_SP
8538 && flag_stack_check
8539 && flag_exceptions
8540 && cfun->can_throw_non_call_exceptions)
8541 && !ix86_frame_pointer_required ()
8542 && ix86_get_frame_size () == 0
8543 && ix86_nsaved_sseregs () == 0
8544 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8545 {
8546 if (cfun->machine->stack_frame_required)
8547 {
8548 /* Stack frame is required. If stack alignment needed is less
8549 than incoming stack boundary, don't realign stack. */
8550 stack_realign = incoming_stack_boundary < stack_alignment;
8551 if (!stack_realign)
8552 {
8553 crtl->max_used_stack_slot_alignment
8554 = incoming_stack_boundary;
8555 crtl->stack_alignment_needed
8556 = incoming_stack_boundary;
8557 /* Also update preferred_stack_boundary for leaf
8558 functions. */
8559 crtl->preferred_stack_boundary
8560 = incoming_stack_boundary;
8561 }
8562 }
8563 else
8564 {
8565 /* If drap has been set, but it actually isn't live at the
8566 start of the function, there is no reason to set it up. */
8567 if (crtl->drap_reg)
8568 {
8569 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8570 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8571 REGNO (crtl->drap_reg)))
8572 {
8573 crtl->drap_reg = NULL_RTX;
8574 crtl->need_drap = false;
8575 }
8576 }
8577 else
8578 cfun->machine->no_drap_save_restore = true;
8579
8580 frame_pointer_needed = false;
8581 stack_realign = false;
8582 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8583 crtl->stack_alignment_needed = incoming_stack_boundary;
8584 crtl->stack_alignment_estimated = incoming_stack_boundary;
8585 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8586 crtl->preferred_stack_boundary = incoming_stack_boundary;
8587 df_finish_pass (true);
8588 df_scan_alloc (NULL);
8589 df_scan_blocks ();
8590 df_compute_regs_ever_live (true);
8591 df_analyze ();
8592
8593 if (flag_var_tracking)
8594 {
8595 /* Since frame pointer is no longer available, replace it with
8596 stack pointer - UNITS_PER_WORD in debug insns. */
8597 df_ref ref, next;
8598 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8599 ref; ref = next)
8600 {
8601 next = DF_REF_NEXT_REG (ref);
8602 if (!DF_REF_INSN_INFO (ref))
8603 continue;
8604
8605 /* Make sure the next ref is for a different instruction,
8606 so that we're not affected by the rescan. */
8607 rtx_insn *insn = DF_REF_INSN (ref);
8608 while (next && DF_REF_INSN (next) == insn)
8609 next = DF_REF_NEXT_REG (next);
8610
8611 if (DEBUG_INSN_P (insn))
8612 {
8613 bool changed = false;
8614 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8615 {
8616 rtx *loc = DF_REF_LOC (ref);
8617 if (*loc == hard_frame_pointer_rtx)
8618 {
8619 *loc = plus_constant (Pmode,
8620 stack_pointer_rtx,
8621 -UNITS_PER_WORD);
8622 changed = true;
8623 }
8624 }
8625 if (changed)
8626 df_insn_rescan (insn);
8627 }
8628 }
8629 }
8630
8631 recompute_frame_layout_p = true;
8632 }
8633 }
8634 else if (crtl->max_used_stack_slot_alignment >= 128
8635 && cfun->machine->stack_frame_required)
8636 {
8637 /* We don't need to realign stack. max_used_stack_alignment is
8638 used to decide how stack frame should be aligned. This is
8639 independent of any psABIs nor 32-bit vs 64-bit. */
8640 cfun->machine->max_used_stack_alignment
8641 = stack_alignment / BITS_PER_UNIT;
8642 }
8643
8644 if (crtl->stack_realign_needed != stack_realign)
8645 recompute_frame_layout_p = true;
8646 crtl->stack_realign_needed = stack_realign;
8647 crtl->stack_realign_finalized = true;
8648 if (recompute_frame_layout_p)
8649 ix86_compute_frame_layout ();
8650}
8651
8652/* Delete SET_GOT right after entry block if it is allocated to reg. */
8653
8654static void
8655ix86_elim_entry_set_got (rtx reg)
8656{
8657 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8658 rtx_insn *c_insn = BB_HEAD (bb);
8659 if (!NONDEBUG_INSN_P (c_insn))
8660 c_insn = next_nonnote_nondebug_insn (c_insn);
8661 if (c_insn && NONJUMP_INSN_P (c_insn))
8662 {
8663 rtx pat = PATTERN (insn: c_insn);
8664 if (GET_CODE (pat) == PARALLEL)
8665 {
8666 rtx set = XVECEXP (pat, 0, 0);
8667 if (GET_CODE (set) == SET
8668 && GET_CODE (SET_SRC (set)) == UNSPEC
8669 && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
8670 && REGNO (SET_DEST (set)) == REGNO (reg))
8671 delete_insn (c_insn);
8672 }
8673 }
8674}
8675
8676static rtx
8677gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8678{
8679 rtx addr, mem;
8680
8681 if (offset)
8682 addr = plus_constant (Pmode, frame_reg, offset);
8683 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8684 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8685}
8686
8687static inline rtx
8688gen_frame_load (rtx reg, rtx frame_reg, int offset)
8689{
8690 return gen_frame_set (reg, frame_reg, offset, store: false);
8691}
8692
8693static inline rtx
8694gen_frame_store (rtx reg, rtx frame_reg, int offset)
8695{
8696 return gen_frame_set (reg, frame_reg, offset, store: true);
8697}
8698
8699static void
8700ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8701{
8702 struct machine_function *m = cfun->machine;
8703 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8704 + m->call_ms2sysv_extra_regs;
8705 rtvec v = rtvec_alloc (ncregs + 1);
8706 unsigned int align, i, vi = 0;
8707 rtx_insn *insn;
8708 rtx sym, addr;
8709 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8710 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8711
8712 /* AL should only be live with sysv_abi. */
8713 gcc_assert (!ix86_eax_live_at_start_p ());
8714 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8715
8716 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8717 we've actually realigned the stack or not. */
8718 align = GET_MODE_ALIGNMENT (V4SFmode);
8719 addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset
8720 + xlogue.get_stub_ptr_offset (), align: &align, AX_REG);
8721 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8722
8723 emit_insn (gen_rtx_SET (rax, addr));
8724
8725 /* Get the stub symbol. */
8726 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8727 : XLOGUE_STUB_SAVE);
8728 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8729
8730 for (i = 0; i < ncregs; ++i)
8731 {
8732 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
8733 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8734 r.regno);
8735 RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset);
8736 }
8737
8738 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8739
8740 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8741 RTX_FRAME_RELATED_P (insn) = true;
8742}
8743
8744/* Generate and return an insn body to AND X with Y. */
8745
8746static rtx_insn *
8747gen_and2_insn (rtx x, rtx y)
8748{
8749 enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x));
8750
8751 gcc_assert (insn_operand_matches (icode, 0, x));
8752 gcc_assert (insn_operand_matches (icode, 1, x));
8753 gcc_assert (insn_operand_matches (icode, 2, y));
8754
8755 return GEN_FCN (icode) (x, x, y);
8756}
8757
8758/* Expand the prologue into a bunch of separate insns. */
8759
8760void
8761ix86_expand_prologue (void)
8762{
8763 struct machine_function *m = cfun->machine;
8764 rtx insn, t;
8765 HOST_WIDE_INT allocate;
8766 bool int_registers_saved;
8767 bool sse_registers_saved;
8768 bool save_stub_call_needed;
8769 rtx static_chain = NULL_RTX;
8770
8771 ix86_last_zero_store_uid = 0;
8772 if (ix86_function_naked (fn: current_function_decl))
8773 {
8774 if (flag_stack_usage_info)
8775 current_function_static_stack_size = 0;
8776 return;
8777 }
8778
8779 ix86_finalize_stack_frame_flags ();
8780
8781 /* DRAP should not coexist with stack_realign_fp */
8782 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8783
8784 memset (s: &m->fs, c: 0, n: sizeof (m->fs));
8785
8786 /* Initialize CFA state for before the prologue. */
8787 m->fs.cfa_reg = stack_pointer_rtx;
8788 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8789
8790 /* Track SP offset to the CFA. We continue tracking this after we've
8791 swapped the CFA register away from SP. In the case of re-alignment
8792 this is fudged; we're interested to offsets within the local frame. */
8793 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8794 m->fs.sp_valid = true;
8795 m->fs.sp_realigned = false;
8796
8797 const struct ix86_frame &frame = cfun->machine->frame;
8798
8799 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl))
8800 {
8801 /* We should have already generated an error for any use of
8802 ms_hook on a nested function. */
8803 gcc_checking_assert (!ix86_static_chain_on_stack);
8804
8805 /* Check if profiling is active and we shall use profiling before
8806 prologue variant. If so sorry. */
8807 if (crtl->profile && flag_fentry != 0)
8808 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8809 "with %<-mfentry%> for 32-bit");
8810
8811 /* In ix86_asm_output_function_label we emitted:
8812 8b ff movl.s %edi,%edi
8813 55 push %ebp
8814 8b ec movl.s %esp,%ebp
8815
8816 This matches the hookable function prologue in Win32 API
8817 functions in Microsoft Windows XP Service Pack 2 and newer.
8818 Wine uses this to enable Windows apps to hook the Win32 API
8819 functions provided by Wine.
8820
8821 What that means is that we've already set up the frame pointer. */
8822
8823 if (frame_pointer_needed
8824 && !(crtl->drap_reg && crtl->stack_realign_needed))
8825 {
8826 rtx push, mov;
8827
8828 /* We've decided to use the frame pointer already set up.
8829 Describe this to the unwinder by pretending that both
8830 push and mov insns happen right here.
8831
8832 Putting the unwind info here at the end of the ms_hook
8833 is done so that we can make absolutely certain we get
8834 the required byte sequence at the start of the function,
8835 rather than relying on an assembler that can produce
8836 the exact encoding required.
8837
8838 However it does mean (in the unpatched case) that we have
8839 a 1 insn window where the asynchronous unwind info is
8840 incorrect. However, if we placed the unwind info at
8841 its correct location we would have incorrect unwind info
8842 in the patched case. Which is probably all moot since
8843 I don't expect Wine generates dwarf2 unwind info for the
8844 system libraries that use this feature. */
8845
8846 insn = emit_insn (gen_blockage ());
8847
8848 push = gen_push (hard_frame_pointer_rtx);
8849 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8850 stack_pointer_rtx);
8851 RTX_FRAME_RELATED_P (push) = 1;
8852 RTX_FRAME_RELATED_P (mov) = 1;
8853
8854 RTX_FRAME_RELATED_P (insn) = 1;
8855 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8856 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8857
8858 /* Note that gen_push incremented m->fs.cfa_offset, even
8859 though we didn't emit the push insn here. */
8860 m->fs.cfa_reg = hard_frame_pointer_rtx;
8861 m->fs.fp_offset = m->fs.cfa_offset;
8862 m->fs.fp_valid = true;
8863 }
8864 else
8865 {
8866 /* The frame pointer is not needed so pop %ebp again.
8867 This leaves us with a pristine state. */
8868 emit_insn (gen_pop (hard_frame_pointer_rtx));
8869 }
8870 }
8871
8872 /* The first insn of a function that accepts its static chain on the
8873 stack is to push the register that would be filled in by a direct
8874 call. This insn will be skipped by the trampoline. */
8875 else if (ix86_static_chain_on_stack)
8876 {
8877 static_chain = ix86_static_chain (cfun->decl, false);
8878 insn = emit_insn (gen_push (arg: static_chain));
8879 emit_insn (gen_blockage ());
8880
8881 /* We don't want to interpret this push insn as a register save,
8882 only as a stack adjustment. The real copy of the register as
8883 a save will be done later, if needed. */
8884 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8885 t = gen_rtx_SET (stack_pointer_rtx, t);
8886 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8887 RTX_FRAME_RELATED_P (insn) = 1;
8888 }
8889
8890 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8891 of DRAP is needed and stack realignment is really needed after reload */
8892 if (stack_realign_drap)
8893 {
8894 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8895
8896 /* Can't use DRAP in interrupt function. */
8897 if (cfun->machine->func_type != TYPE_NORMAL)
8898 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8899 "in interrupt service routine. This may be worked "
8900 "around by avoiding functions with aggregate return.");
8901
8902 /* Only need to push parameter pointer reg if it is caller saved. */
8903 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8904 {
8905 /* Push arg pointer reg */
8906 insn = emit_insn (gen_push (crtl->drap_reg));
8907 RTX_FRAME_RELATED_P (insn) = 1;
8908 }
8909
8910 /* Grab the argument pointer. */
8911 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8912 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8913 RTX_FRAME_RELATED_P (insn) = 1;
8914 m->fs.cfa_reg = crtl->drap_reg;
8915 m->fs.cfa_offset = 0;
8916
8917 /* Align the stack. */
8918 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8919 GEN_INT (-align_bytes)));
8920 RTX_FRAME_RELATED_P (insn) = 1;
8921
8922 /* Replicate the return address on the stack so that return
8923 address can be reached via (argp - 1) slot. This is needed
8924 to implement macro RETURN_ADDR_RTX and intrinsic function
8925 expand_builtin_return_addr etc. */
8926 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8927 t = gen_frame_mem (word_mode, t);
8928 insn = emit_insn (gen_push (arg: t));
8929 RTX_FRAME_RELATED_P (insn) = 1;
8930
8931 /* For the purposes of frame and register save area addressing,
8932 we've started over with a new frame. */
8933 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8934 m->fs.realigned = true;
8935
8936 if (static_chain)
8937 {
8938 /* Replicate static chain on the stack so that static chain
8939 can be reached via (argp - 2) slot. This is needed for
8940 nested function with stack realignment. */
8941 insn = emit_insn (gen_push (arg: static_chain));
8942 RTX_FRAME_RELATED_P (insn) = 1;
8943 }
8944 }
8945
8946 int_registers_saved = (frame.nregs == 0);
8947 sse_registers_saved = (frame.nsseregs == 0);
8948 save_stub_call_needed = (m->call_ms2sysv);
8949 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8950
8951 if (frame_pointer_needed && !m->fs.fp_valid)
8952 {
8953 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8954 slower on all targets. Also sdb didn't like it. */
8955 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8956 RTX_FRAME_RELATED_P (insn) = 1;
8957
8958 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8959 {
8960 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8961 RTX_FRAME_RELATED_P (insn) = 1;
8962
8963 if (m->fs.cfa_reg == stack_pointer_rtx)
8964 m->fs.cfa_reg = hard_frame_pointer_rtx;
8965 m->fs.fp_offset = m->fs.sp_offset;
8966 m->fs.fp_valid = true;
8967 }
8968 }
8969
8970 if (!int_registers_saved)
8971 {
8972 /* If saving registers via PUSH, do so now. */
8973 if (!frame.save_regs_using_mov)
8974 {
8975 ix86_emit_save_regs ();
8976 int_registers_saved = true;
8977 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8978 }
8979
8980 /* When using red zone we may start register saving before allocating
8981 the stack frame saving one cycle of the prologue. However, avoid
8982 doing this if we have to probe the stack; at least on x86_64 the
8983 stack probe can turn into a call that clobbers a red zone location. */
8984 else if (ix86_using_red_zone ()
8985 && (! TARGET_STACK_PROBE
8986 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8987 {
8988 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
8989 cfun->machine->red_zone_used = true;
8990 int_registers_saved = true;
8991 }
8992 }
8993
8994 if (frame.red_zone_size != 0)
8995 cfun->machine->red_zone_used = true;
8996
8997 if (stack_realign_fp)
8998 {
8999 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9000 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9001
9002 /* Record last valid frame pointer offset. */
9003 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9004
9005 /* The computation of the size of the re-aligned stack frame means
9006 that we must allocate the size of the register save area before
9007 performing the actual alignment. Otherwise we cannot guarantee
9008 that there's enough storage above the realignment point. */
9009 allocate = frame.reg_save_offset - m->fs.sp_offset
9010 + frame.stack_realign_allocate;
9011 if (allocate)
9012 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9013 GEN_INT (-allocate), style: -1, set_cfa: false);
9014
9015 /* Align the stack. */
9016 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9017 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9018 m->fs.sp_realigned_offset = m->fs.sp_offset
9019 - frame.stack_realign_allocate;
9020 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9021 Beyond this point, stack access should be done via choose_baseaddr or
9022 by using sp_valid_at and fp_valid_at to determine the correct base
9023 register. Henceforth, any CFA offset should be thought of as logical
9024 and not physical. */
9025 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9026 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9027 m->fs.sp_realigned = true;
9028
9029 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9030 is needed to describe where a register is saved using a realigned
9031 stack pointer, so we need to invalidate the stack pointer for that
9032 target. */
9033 if (TARGET_SEH)
9034 m->fs.sp_valid = false;
9035
9036 /* If SP offset is non-immediate after allocation of the stack frame,
9037 then emit SSE saves or stub call prior to allocating the rest of the
9038 stack frame. This is less efficient for the out-of-line stub because
9039 we can't combine allocations across the call barrier, but it's better
9040 than using a scratch register. */
9041 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9042 - m->fs.sp_realigned_offset),
9043 Pmode))
9044 {
9045 if (!sse_registers_saved)
9046 {
9047 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9048 sse_registers_saved = true;
9049 }
9050 else if (save_stub_call_needed)
9051 {
9052 ix86_emit_outlined_ms2sysv_save (frame);
9053 save_stub_call_needed = false;
9054 }
9055 }
9056 }
9057
9058 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9059
9060 if (flag_stack_usage_info)
9061 {
9062 /* We start to count from ARG_POINTER. */
9063 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9064
9065 /* If it was realigned, take into account the fake frame. */
9066 if (stack_realign_drap)
9067 {
9068 if (ix86_static_chain_on_stack)
9069 stack_size += UNITS_PER_WORD;
9070
9071 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9072 stack_size += UNITS_PER_WORD;
9073
9074 /* This over-estimates by 1 minimal-stack-alignment-unit but
9075 mitigates that by counting in the new return address slot. */
9076 current_function_dynamic_stack_size
9077 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9078 }
9079
9080 current_function_static_stack_size = stack_size;
9081 }
9082
9083 /* On SEH target with very large frame size, allocate an area to save
9084 SSE registers (as the very large allocation won't be described). */
9085 if (TARGET_SEH
9086 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9087 && !sse_registers_saved)
9088 {
9089 HOST_WIDE_INT sse_size
9090 = frame.sse_reg_save_offset - frame.reg_save_offset;
9091
9092 gcc_assert (int_registers_saved);
9093
9094 /* No need to do stack checking as the area will be immediately
9095 written. */
9096 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9097 GEN_INT (-sse_size), style: -1,
9098 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9099 allocate -= sse_size;
9100 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9101 sse_registers_saved = true;
9102 }
9103
9104 /* If stack clash protection is requested, then probe the stack, unless it
9105 is already probed on the target. */
9106 if (allocate >= 0
9107 && flag_stack_clash_protection
9108 && !ix86_target_stack_probe ())
9109 {
9110 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false);
9111 allocate = 0;
9112 }
9113
9114 /* The stack has already been decremented by the instruction calling us
9115 so probe if the size is non-negative to preserve the protection area. */
9116 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9117 {
9118 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9119
9120 if (STACK_CHECK_MOVING_SP)
9121 {
9122 if (crtl->is_leaf
9123 && !cfun->calls_alloca
9124 && allocate <= probe_interval)
9125 ;
9126
9127 else
9128 {
9129 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true);
9130 allocate = 0;
9131 }
9132 }
9133
9134 else
9135 {
9136 HOST_WIDE_INT size = allocate;
9137
9138 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9139 size = 0x80000000 - get_stack_check_protect () - 1;
9140
9141 if (TARGET_STACK_PROBE)
9142 {
9143 if (crtl->is_leaf && !cfun->calls_alloca)
9144 {
9145 if (size > probe_interval)
9146 ix86_emit_probe_stack_range (first: 0, size, int_registers_saved);
9147 }
9148 else
9149 ix86_emit_probe_stack_range (first: 0,
9150 size: size + get_stack_check_protect (),
9151 int_registers_saved);
9152 }
9153 else
9154 {
9155 if (crtl->is_leaf && !cfun->calls_alloca)
9156 {
9157 if (size > probe_interval
9158 && size > get_stack_check_protect ())
9159 ix86_emit_probe_stack_range (first: get_stack_check_protect (),
9160 size: (size
9161 - get_stack_check_protect ()),
9162 int_registers_saved);
9163 }
9164 else
9165 ix86_emit_probe_stack_range (first: get_stack_check_protect (), size,
9166 int_registers_saved);
9167 }
9168 }
9169 }
9170
9171 if (allocate == 0)
9172 ;
9173 else if (!ix86_target_stack_probe ()
9174 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9175 {
9176 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9177 GEN_INT (-allocate), style: -1,
9178 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9179 }
9180 else
9181 {
9182 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9183 rtx r10 = NULL;
9184 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9185 bool eax_live = ix86_eax_live_at_start_p ();
9186 bool r10_live = false;
9187
9188 if (TARGET_64BIT)
9189 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9190
9191 if (eax_live)
9192 {
9193 insn = emit_insn (gen_push (arg: eax));
9194 allocate -= UNITS_PER_WORD;
9195 /* Note that SEH directives need to continue tracking the stack
9196 pointer even after the frame pointer has been set up. */
9197 if (sp_is_cfa_reg || TARGET_SEH)
9198 {
9199 if (sp_is_cfa_reg)
9200 m->fs.cfa_offset += UNITS_PER_WORD;
9201 RTX_FRAME_RELATED_P (insn) = 1;
9202 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9203 gen_rtx_SET (stack_pointer_rtx,
9204 plus_constant (Pmode,
9205 stack_pointer_rtx,
9206 -UNITS_PER_WORD)));
9207 }
9208 }
9209
9210 if (r10_live)
9211 {
9212 r10 = gen_rtx_REG (Pmode, R10_REG);
9213 insn = emit_insn (gen_push (arg: r10));
9214 allocate -= UNITS_PER_WORD;
9215 if (sp_is_cfa_reg || TARGET_SEH)
9216 {
9217 if (sp_is_cfa_reg)
9218 m->fs.cfa_offset += UNITS_PER_WORD;
9219 RTX_FRAME_RELATED_P (insn) = 1;
9220 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9221 gen_rtx_SET (stack_pointer_rtx,
9222 plus_constant (Pmode,
9223 stack_pointer_rtx,
9224 -UNITS_PER_WORD)));
9225 }
9226 }
9227
9228 emit_move_insn (eax, GEN_INT (allocate));
9229 emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax));
9230
9231 /* Use the fact that AX still contains ALLOCATE. */
9232 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9233 (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax));
9234
9235 if (sp_is_cfa_reg || TARGET_SEH)
9236 {
9237 if (sp_is_cfa_reg)
9238 m->fs.cfa_offset += allocate;
9239 RTX_FRAME_RELATED_P (insn) = 1;
9240 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9241 gen_rtx_SET (stack_pointer_rtx,
9242 plus_constant (Pmode, stack_pointer_rtx,
9243 -allocate)));
9244 }
9245 m->fs.sp_offset += allocate;
9246
9247 /* Use stack_pointer_rtx for relative addressing so that code works for
9248 realigned stack. But this means that we need a blockage to prevent
9249 stores based on the frame pointer from being scheduled before. */
9250 if (r10_live && eax_live)
9251 {
9252 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9253 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9254 gen_frame_mem (word_mode, t));
9255 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9256 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9257 gen_frame_mem (word_mode, t));
9258 emit_insn (gen_memory_blockage ());
9259 }
9260 else if (eax_live || r10_live)
9261 {
9262 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9263 emit_move_insn (gen_rtx_REG (word_mode,
9264 (eax_live ? AX_REG : R10_REG)),
9265 gen_frame_mem (word_mode, t));
9266 emit_insn (gen_memory_blockage ());
9267 }
9268 }
9269 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9270
9271 /* If we havn't already set up the frame pointer, do so now. */
9272 if (frame_pointer_needed && !m->fs.fp_valid)
9273 {
9274 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9275 GEN_INT (frame.stack_pointer_offset
9276 - frame.hard_frame_pointer_offset));
9277 insn = emit_insn (insn);
9278 RTX_FRAME_RELATED_P (insn) = 1;
9279 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9280
9281 if (m->fs.cfa_reg == stack_pointer_rtx)
9282 m->fs.cfa_reg = hard_frame_pointer_rtx;
9283 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9284 m->fs.fp_valid = true;
9285 }
9286
9287 if (!int_registers_saved)
9288 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9289 if (!sse_registers_saved)
9290 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9291 else if (save_stub_call_needed)
9292 ix86_emit_outlined_ms2sysv_save (frame);
9293
9294 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9295 in PROLOGUE. */
9296 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9297 {
9298 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9299 insn = emit_insn (gen_set_got (pic));
9300 RTX_FRAME_RELATED_P (insn) = 1;
9301 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9302 emit_insn (gen_prologue_use (pic));
9303 /* Deleting already emmitted SET_GOT if exist and allocated to
9304 REAL_PIC_OFFSET_TABLE_REGNUM. */
9305 ix86_elim_entry_set_got (reg: pic);
9306 }
9307
9308 if (crtl->drap_reg && !crtl->stack_realign_needed)
9309 {
9310 /* vDRAP is setup but after reload it turns out stack realign
9311 isn't necessary, here we will emit prologue to setup DRAP
9312 without stack realign adjustment */
9313 t = choose_baseaddr (cfa_offset: 0, NULL);
9314 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9315 }
9316
9317 /* Prevent instructions from being scheduled into register save push
9318 sequence when access to the redzone area is done through frame pointer.
9319 The offset between the frame pointer and the stack pointer is calculated
9320 relative to the value of the stack pointer at the end of the function
9321 prologue, and moving instructions that access redzone area via frame
9322 pointer inside push sequence violates this assumption. */
9323 if (frame_pointer_needed && frame.red_zone_size)
9324 emit_insn (gen_memory_blockage ());
9325
9326 /* SEH requires that the prologue end within 256 bytes of the start of
9327 the function. Prevent instruction schedules that would extend that.
9328 Further, prevent alloca modifications to the stack pointer from being
9329 combined with prologue modifications. */
9330 if (TARGET_SEH)
9331 emit_insn (gen_prologue_use (stack_pointer_rtx));
9332}
9333
9334/* Emit code to restore REG using a POP or POPP insn. */
9335
9336static void
9337ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9338{
9339 struct machine_function *m = cfun->machine;
9340 rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p));
9341
9342 ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset);
9343 m->fs.sp_offset -= UNITS_PER_WORD;
9344
9345 if (m->fs.cfa_reg == crtl->drap_reg
9346 && REGNO (reg) == REGNO (crtl->drap_reg))
9347 {
9348 /* Previously we'd represented the CFA as an expression
9349 like *(%ebp - 8). We've just popped that value from
9350 the stack, which means we need to reset the CFA to
9351 the drap register. This will remain until we restore
9352 the stack pointer. */
9353 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9354 RTX_FRAME_RELATED_P (insn) = 1;
9355
9356 /* This means that the DRAP register is valid for addressing too. */
9357 m->fs.drap_valid = true;
9358 return;
9359 }
9360
9361 if (m->fs.cfa_reg == stack_pointer_rtx)
9362 {
9363 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9364 x = gen_rtx_SET (stack_pointer_rtx, x);
9365 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9366 RTX_FRAME_RELATED_P (insn) = 1;
9367
9368 m->fs.cfa_offset -= UNITS_PER_WORD;
9369 }
9370
9371 /* When the frame pointer is the CFA, and we pop it, we are
9372 swapping back to the stack pointer as the CFA. This happens
9373 for stack frames that don't allocate other data, so we assume
9374 the stack pointer is now pointing at the return address, i.e.
9375 the function entry state, which makes the offset be 1 word. */
9376 if (reg == hard_frame_pointer_rtx)
9377 {
9378 m->fs.fp_valid = false;
9379 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9380 {
9381 m->fs.cfa_reg = stack_pointer_rtx;
9382 m->fs.cfa_offset -= UNITS_PER_WORD;
9383
9384 add_reg_note (insn, REG_CFA_DEF_CFA,
9385 plus_constant (Pmode, stack_pointer_rtx,
9386 m->fs.cfa_offset));
9387 RTX_FRAME_RELATED_P (insn) = 1;
9388 }
9389 }
9390}
9391
9392/* Emit code to restore REG using a POP2 insn. */
9393static void
9394ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9395{
9396 struct machine_function *m = cfun->machine;
9397 const int offset = UNITS_PER_WORD * 2;
9398 rtx_insn *insn;
9399
9400 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9401 stack_pointer_rtx));
9402
9403 if (ppx_p)
9404 insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9405 else
9406 insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9407
9408 RTX_FRAME_RELATED_P (insn) = 1;
9409
9410 rtx dwarf = NULL_RTX;
9411 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9412 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9413 REG_NOTES (insn) = dwarf;
9414 m->fs.sp_offset -= offset;
9415
9416 if (m->fs.cfa_reg == crtl->drap_reg
9417 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9418 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9419 {
9420 /* Previously we'd represented the CFA as an expression
9421 like *(%ebp - 8). We've just popped that value from
9422 the stack, which means we need to reset the CFA to
9423 the drap register. This will remain until we restore
9424 the stack pointer. */
9425 add_reg_note (insn, REG_CFA_DEF_CFA,
9426 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9427 RTX_FRAME_RELATED_P (insn) = 1;
9428
9429 /* This means that the DRAP register is valid for addressing too. */
9430 m->fs.drap_valid = true;
9431 return;
9432 }
9433
9434 if (m->fs.cfa_reg == stack_pointer_rtx)
9435 {
9436 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9437 x = gen_rtx_SET (stack_pointer_rtx, x);
9438 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9439 RTX_FRAME_RELATED_P (insn) = 1;
9440
9441 m->fs.cfa_offset -= offset;
9442 }
9443
9444 /* When the frame pointer is the CFA, and we pop it, we are
9445 swapping back to the stack pointer as the CFA. This happens
9446 for stack frames that don't allocate other data, so we assume
9447 the stack pointer is now pointing at the return address, i.e.
9448 the function entry state, which makes the offset be 1 word. */
9449 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9450 {
9451 m->fs.fp_valid = false;
9452 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9453 {
9454 m->fs.cfa_reg = stack_pointer_rtx;
9455 m->fs.cfa_offset -= offset;
9456
9457 add_reg_note (insn, REG_CFA_DEF_CFA,
9458 plus_constant (Pmode, stack_pointer_rtx,
9459 m->fs.cfa_offset));
9460 RTX_FRAME_RELATED_P (insn) = 1;
9461 }
9462 }
9463}
9464
9465/* Emit code to restore saved registers using POP insns. */
9466
9467static void
9468ix86_emit_restore_regs_using_pop (bool ppx_p)
9469{
9470 unsigned int regno;
9471
9472 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9473 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9474 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p);
9475}
9476
9477/* Emit code to restore saved registers using POP2 insns. */
9478
9479static void
9480ix86_emit_restore_regs_using_pop2 (void)
9481{
9482 int regno;
9483 int regno_list[2];
9484 regno_list[0] = regno_list[1] = -1;
9485 int loaded_regnum = 0;
9486 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9487
9488 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9489 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9490 {
9491 if (aligned)
9492 {
9493 regno_list[loaded_regnum++] = regno;
9494 if (loaded_regnum == 2)
9495 {
9496 gcc_assert (regno_list[0] != -1
9497 && regno_list[1] != -1
9498 && regno_list[0] != regno_list[1]);
9499
9500 ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode,
9501 regno_list[0]),
9502 reg2: gen_rtx_REG (word_mode,
9503 regno_list[1]),
9504 TARGET_APX_PPX);
9505 loaded_regnum = 0;
9506 regno_list[0] = regno_list[1] = -1;
9507 }
9508 }
9509 else
9510 {
9511 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno),
9512 TARGET_APX_PPX);
9513 aligned = true;
9514 }
9515 }
9516
9517 if (loaded_regnum == 1)
9518 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]),
9519 TARGET_APX_PPX);
9520}
9521
9522/* Emit code and notes for the LEAVE instruction. If insn is non-null,
9523 omits the emit and only attaches the notes. */
9524
9525static void
9526ix86_emit_leave (rtx_insn *insn)
9527{
9528 struct machine_function *m = cfun->machine;
9529
9530 if (!insn)
9531 insn = emit_insn (gen_leave (arg0: word_mode));
9532
9533 ix86_add_queued_cfa_restore_notes (insn);
9534
9535 gcc_assert (m->fs.fp_valid);
9536 m->fs.sp_valid = true;
9537 m->fs.sp_realigned = false;
9538 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9539 m->fs.fp_valid = false;
9540
9541 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9542 {
9543 m->fs.cfa_reg = stack_pointer_rtx;
9544 m->fs.cfa_offset = m->fs.sp_offset;
9545
9546 add_reg_note (insn, REG_CFA_DEF_CFA,
9547 plus_constant (Pmode, stack_pointer_rtx,
9548 m->fs.sp_offset));
9549 RTX_FRAME_RELATED_P (insn) = 1;
9550 }
9551 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9552 cfa_offset: m->fs.fp_offset);
9553}
9554
9555/* Emit code to restore saved registers using MOV insns.
9556 First register is restored from CFA - CFA_OFFSET. */
9557static void
9558ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9559 bool maybe_eh_return)
9560{
9561 struct machine_function *m = cfun->machine;
9562 unsigned int regno;
9563
9564 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9565 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9566 {
9567 rtx reg = gen_rtx_REG (word_mode, regno);
9568 rtx mem;
9569 rtx_insn *insn;
9570
9571 mem = choose_baseaddr (cfa_offset, NULL);
9572 mem = gen_frame_mem (word_mode, mem);
9573 insn = emit_move_insn (reg, mem);
9574
9575 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9576 {
9577 /* Previously we'd represented the CFA as an expression
9578 like *(%ebp - 8). We've just popped that value from
9579 the stack, which means we need to reset the CFA to
9580 the drap register. This will remain until we restore
9581 the stack pointer. */
9582 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9583 RTX_FRAME_RELATED_P (insn) = 1;
9584
9585 /* This means that the DRAP register is valid for addressing. */
9586 m->fs.drap_valid = true;
9587 }
9588 else
9589 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9590
9591 cfa_offset -= UNITS_PER_WORD;
9592 }
9593}
9594
9595/* Emit code to restore saved registers using MOV insns.
9596 First register is restored from CFA - CFA_OFFSET. */
9597static void
9598ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9599 bool maybe_eh_return)
9600{
9601 unsigned int regno;
9602
9603 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9604 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9605 {
9606 rtx reg = gen_rtx_REG (V4SFmode, regno);
9607 rtx mem;
9608 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9609
9610 mem = choose_baseaddr (cfa_offset, align: &align);
9611 mem = gen_rtx_MEM (V4SFmode, mem);
9612
9613 /* The location aligment depends upon the base register. */
9614 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9615 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9616 set_mem_align (mem, align);
9617 emit_insn (gen_rtx_SET (reg, mem));
9618
9619 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9620
9621 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9622 }
9623}
9624
9625static void
9626ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9627 bool use_call, int style)
9628{
9629 struct machine_function *m = cfun->machine;
9630 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9631 + m->call_ms2sysv_extra_regs;
9632 rtvec v;
9633 unsigned int elems_needed, align, i, vi = 0;
9634 rtx_insn *insn;
9635 rtx sym, tmp;
9636 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9637 rtx r10 = NULL_RTX;
9638 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9639 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9640 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9641 rtx rsi_frame_load = NULL_RTX;
9642 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9643 enum xlogue_stub stub;
9644
9645 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9646
9647 /* If using a realigned stack, we should never start with padding. */
9648 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9649
9650 /* Setup RSI as the stub's base pointer. */
9651 align = GET_MODE_ALIGNMENT (V4SFmode);
9652 tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG);
9653 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9654
9655 emit_insn (gen_rtx_SET (rsi, tmp));
9656
9657 /* Get a symbol for the stub. */
9658 if (frame_pointer_needed)
9659 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9660 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9661 else
9662 stub = use_call ? XLOGUE_STUB_RESTORE
9663 : XLOGUE_STUB_RESTORE_TAIL;
9664 sym = xlogue.get_stub_rtx (stub);
9665
9666 elems_needed = ncregs;
9667 if (use_call)
9668 elems_needed += 1;
9669 else
9670 elems_needed += frame_pointer_needed ? 5 : 3;
9671 v = rtvec_alloc (elems_needed);
9672
9673 /* We call the epilogue stub when we need to pop incoming args or we are
9674 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9675 epilogue stub and it is the tail-call. */
9676 if (use_call)
9677 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9678 else
9679 {
9680 RTVEC_ELT (v, vi++) = ret_rtx;
9681 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9682 if (frame_pointer_needed)
9683 {
9684 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9685 gcc_assert (m->fs.fp_valid);
9686 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9687
9688 tmp = plus_constant (DImode, rbp, 8);
9689 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9690 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9691 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9692 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9693 }
9694 else
9695 {
9696 /* If no hard frame pointer, we set R10 to the SP restore value. */
9697 gcc_assert (!m->fs.fp_valid);
9698 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9699 gcc_assert (m->fs.sp_valid);
9700
9701 r10 = gen_rtx_REG (DImode, R10_REG);
9702 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9703 emit_insn (gen_rtx_SET (r10, tmp));
9704
9705 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9706 }
9707 }
9708
9709 /* Generate frame load insns and restore notes. */
9710 for (i = 0; i < ncregs; ++i)
9711 {
9712 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
9713 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9714 rtx reg, frame_load;
9715
9716 reg = gen_rtx_REG (mode, r.regno);
9717 frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset);
9718
9719 /* Save RSI frame load insn & note to add last. */
9720 if (r.regno == SI_REG)
9721 {
9722 gcc_assert (!rsi_frame_load);
9723 rsi_frame_load = frame_load;
9724 rsi_restore_offset = r.offset;
9725 }
9726 else
9727 {
9728 RTVEC_ELT (v, vi++) = frame_load;
9729 ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset);
9730 }
9731 }
9732
9733 /* Add RSI frame load & restore note at the end. */
9734 gcc_assert (rsi_frame_load);
9735 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9736 RTVEC_ELT (v, vi++) = rsi_frame_load;
9737 ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG),
9738 cfa_offset: rsi_restore_offset);
9739
9740 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9741 if (!use_call && !frame_pointer_needed)
9742 {
9743 gcc_assert (m->fs.sp_valid);
9744 gcc_assert (!m->fs.sp_realigned);
9745
9746 /* At this point, R10 should point to frame.stack_realign_offset. */
9747 if (m->fs.cfa_reg == stack_pointer_rtx)
9748 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9749 m->fs.sp_offset = frame.stack_realign_offset;
9750 }
9751
9752 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9753 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9754 if (use_call)
9755 insn = emit_insn (tmp);
9756 else
9757 {
9758 insn = emit_jump_insn (tmp);
9759 JUMP_LABEL (insn) = ret_rtx;
9760
9761 if (frame_pointer_needed)
9762 ix86_emit_leave (insn);
9763 else
9764 {
9765 /* Need CFA adjust note. */
9766 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9767 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9768 }
9769 }
9770
9771 RTX_FRAME_RELATED_P (insn) = true;
9772 ix86_add_queued_cfa_restore_notes (insn);
9773
9774 /* If we're not doing a tail-call, we need to adjust the stack. */
9775 if (use_call && m->fs.sp_valid)
9776 {
9777 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9779 GEN_INT (dealloc), style,
9780 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9781 }
9782}
9783
9784/* Restore function stack, frame, and registers. */
9785
9786void
9787ix86_expand_epilogue (int style)
9788{
9789 struct machine_function *m = cfun->machine;
9790 struct machine_frame_state frame_state_save = m->fs;
9791 bool restore_regs_via_mov;
9792 bool using_drap;
9793 bool restore_stub_is_tail = false;
9794
9795 if (ix86_function_naked (fn: current_function_decl))
9796 {
9797 /* The program should not reach this point. */
9798 emit_insn (gen_ud2 ());
9799 return;
9800 }
9801
9802 ix86_finalize_stack_frame_flags ();
9803 const struct ix86_frame &frame = cfun->machine->frame;
9804
9805 m->fs.sp_realigned = stack_realign_fp;
9806 m->fs.sp_valid = stack_realign_fp
9807 || !frame_pointer_needed
9808 || crtl->sp_is_unchanging;
9809 gcc_assert (!m->fs.sp_valid
9810 || m->fs.sp_offset == frame.stack_pointer_offset);
9811
9812 /* The FP must be valid if the frame pointer is present. */
9813 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9814 gcc_assert (!m->fs.fp_valid
9815 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9816
9817 /* We must have *some* valid pointer to the stack frame. */
9818 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9819
9820 /* The DRAP is never valid at this point. */
9821 gcc_assert (!m->fs.drap_valid);
9822
9823 /* See the comment about red zone and frame
9824 pointer usage in ix86_expand_prologue. */
9825 if (frame_pointer_needed && frame.red_zone_size)
9826 emit_insn (gen_memory_blockage ());
9827
9828 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9829 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9830
9831 /* Determine the CFA offset of the end of the red-zone. */
9832 m->fs.red_zone_offset = 0;
9833 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9834 {
9835 /* The red-zone begins below return address and error code in
9836 exception handler. */
9837 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9838
9839 /* When the register save area is in the aligned portion of
9840 the stack, determine the maximum runtime displacement that
9841 matches up with the aligned frame. */
9842 if (stack_realign_drap)
9843 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9844 + UNITS_PER_WORD);
9845 }
9846
9847 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9848
9849 /* Special care must be taken for the normal return case of a function
9850 using eh_return: the eax and edx registers are marked as saved, but
9851 not restored along this path. Adjust the save location to match. */
9852 if (crtl->calls_eh_return && style != 2)
9853 reg_save_offset -= 2 * UNITS_PER_WORD;
9854
9855 /* EH_RETURN requires the use of moves to function properly. */
9856 if (crtl->calls_eh_return)
9857 restore_regs_via_mov = true;
9858 /* SEH requires the use of pops to identify the epilogue. */
9859 else if (TARGET_SEH)
9860 restore_regs_via_mov = false;
9861 /* If we're only restoring one register and sp cannot be used then
9862 using a move instruction to restore the register since it's
9863 less work than reloading sp and popping the register. */
9864 else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1)
9865 restore_regs_via_mov = true;
9866 else if (TARGET_EPILOGUE_USING_MOVE
9867 && cfun->machine->use_fast_prologue_epilogue
9868 && (frame.nregs > 1
9869 || m->fs.sp_offset != reg_save_offset))
9870 restore_regs_via_mov = true;
9871 else if (frame_pointer_needed
9872 && !frame.nregs
9873 && m->fs.sp_offset != reg_save_offset)
9874 restore_regs_via_mov = true;
9875 else if (frame_pointer_needed
9876 && TARGET_USE_LEAVE
9877 && cfun->machine->use_fast_prologue_epilogue
9878 && frame.nregs == 1)
9879 restore_regs_via_mov = true;
9880 else
9881 restore_regs_via_mov = false;
9882
9883 if (restore_regs_via_mov || frame.nsseregs)
9884 {
9885 /* Ensure that the entire register save area is addressable via
9886 the stack pointer, if we will restore SSE regs via sp. */
9887 if (TARGET_64BIT
9888 && m->fs.sp_offset > 0x7fffffff
9889 && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1)
9890 && (frame.nsseregs + frame.nregs) != 0)
9891 {
9892 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9893 GEN_INT (m->fs.sp_offset
9894 - frame.sse_reg_save_offset),
9895 style,
9896 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9897 }
9898 }
9899
9900 /* If there are any SSE registers to restore, then we have to do it
9901 via moves, since there's obviously no pop for SSE regs. */
9902 if (frame.nsseregs)
9903 ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset,
9904 maybe_eh_return: style == 2);
9905
9906 if (m->call_ms2sysv)
9907 {
9908 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9909
9910 /* We cannot use a tail-call for the stub if:
9911 1. We have to pop incoming args,
9912 2. We have additional int regs to restore, or
9913 3. A sibling call will be the tail-call, or
9914 4. We are emitting an eh_return_internal epilogue.
9915
9916 TODO: Item 4 has not yet tested!
9917
9918 If any of the above are true, we will call the stub rather than
9919 jump to it. */
9920 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9921 ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style);
9922 }
9923
9924 /* If using out-of-line stub that is a tail-call, then...*/
9925 if (m->call_ms2sysv && restore_stub_is_tail)
9926 {
9927 /* TODO: parinoid tests. (remove eventually) */
9928 gcc_assert (m->fs.sp_valid);
9929 gcc_assert (!m->fs.sp_realigned);
9930 gcc_assert (!m->fs.fp_valid);
9931 gcc_assert (!m->fs.realigned);
9932 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9933 gcc_assert (!crtl->drap_reg);
9934 gcc_assert (!frame.nregs);
9935 }
9936 else if (restore_regs_via_mov)
9937 {
9938 rtx t;
9939
9940 if (frame.nregs)
9941 ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2);
9942
9943 /* eh_return epilogues need %ecx added to the stack pointer. */
9944 if (style == 2)
9945 {
9946 rtx sa = EH_RETURN_STACKADJ_RTX;
9947 rtx_insn *insn;
9948
9949 /* Stack realignment doesn't work with eh_return. */
9950 if (crtl->stack_realign_needed)
9951 sorry ("Stack realignment not supported with "
9952 "%<__builtin_eh_return%>");
9953
9954 /* regparm nested functions don't work with eh_return. */
9955 if (ix86_static_chain_on_stack)
9956 sorry ("regparm nested function not supported with "
9957 "%<__builtin_eh_return%>");
9958
9959 if (frame_pointer_needed)
9960 {
9961 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9962 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9963 emit_insn (gen_rtx_SET (sa, t));
9964
9965 /* NB: eh_return epilogues must restore the frame pointer
9966 in word_mode since the upper 32 bits of RBP register
9967 can have any values. */
9968 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9969 rtx frame_reg = gen_rtx_REG (word_mode,
9970 HARD_FRAME_POINTER_REGNUM);
9971 insn = emit_move_insn (frame_reg, t);
9972
9973 /* Note that we use SA as a temporary CFA, as the return
9974 address is at the proper place relative to it. We
9975 pretend this happens at the FP restore insn because
9976 prior to this insn the FP would be stored at the wrong
9977 offset relative to SA, and after this insn we have no
9978 other reasonable register to use for the CFA. We don't
9979 bother resetting the CFA to the SP for the duration of
9980 the return insn, unless the control flow instrumentation
9981 is done. In this case the SP is used later and we have
9982 to reset CFA to SP. */
9983 add_reg_note (insn, REG_CFA_DEF_CFA,
9984 plus_constant (Pmode, sa, UNITS_PER_WORD));
9985 ix86_add_queued_cfa_restore_notes (insn);
9986 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9987 RTX_FRAME_RELATED_P (insn) = 1;
9988
9989 m->fs.cfa_reg = sa;
9990 m->fs.cfa_offset = UNITS_PER_WORD;
9991 m->fs.fp_valid = false;
9992
9993 pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa,
9994 const0_rtx, style,
9995 flag_cf_protection);
9996 }
9997 else
9998 {
9999 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10000 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10001 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10002 ix86_add_queued_cfa_restore_notes (insn);
10003
10004 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10005 if (m->fs.cfa_offset != UNITS_PER_WORD)
10006 {
10007 m->fs.cfa_offset = UNITS_PER_WORD;
10008 add_reg_note (insn, REG_CFA_DEF_CFA,
10009 plus_constant (Pmode, stack_pointer_rtx,
10010 UNITS_PER_WORD));
10011 RTX_FRAME_RELATED_P (insn) = 1;
10012 }
10013 }
10014 m->fs.sp_offset = UNITS_PER_WORD;
10015 m->fs.sp_valid = true;
10016 m->fs.sp_realigned = false;
10017 }
10018 }
10019 else
10020 {
10021 /* SEH requires that the function end with (1) a stack adjustment
10022 if necessary, (2) a sequence of pops, and (3) a return or
10023 jump instruction. Prevent insns from the function body from
10024 being scheduled into this sequence. */
10025 if (TARGET_SEH)
10026 {
10027 /* Prevent a catch region from being adjacent to the standard
10028 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10029 nor several other flags that would be interesting to test are
10030 set up yet. */
10031 if (flag_non_call_exceptions)
10032 emit_insn (gen_nops (const1_rtx));
10033 else
10034 emit_insn (gen_blockage ());
10035 }
10036
10037 /* First step is to deallocate the stack frame so that we can
10038 pop the registers. If the stack pointer was realigned, it needs
10039 to be restored now. Also do it on SEH target for very large
10040 frame as the emitted instructions aren't allowed by the ABI
10041 in epilogues. */
10042 if (!m->fs.sp_valid || m->fs.sp_realigned
10043 || (TARGET_SEH
10044 && (m->fs.sp_offset - reg_save_offset
10045 >= SEH_MAX_FRAME_SIZE)))
10046 {
10047 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10048 GEN_INT (m->fs.fp_offset
10049 - reg_save_offset),
10050 style, set_cfa: false);
10051 }
10052 else if (m->fs.sp_offset != reg_save_offset)
10053 {
10054 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10055 GEN_INT (m->fs.sp_offset
10056 - reg_save_offset),
10057 style,
10058 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10059 }
10060
10061 if (TARGET_APX_PUSH2POP2
10062 && ix86_can_use_push2pop2 ()
10063 && m->func_type == TYPE_NORMAL)
10064 ix86_emit_restore_regs_using_pop2 ();
10065 else
10066 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10067 }
10068
10069 /* If we used a stack pointer and haven't already got rid of it,
10070 then do so now. */
10071 if (m->fs.fp_valid)
10072 {
10073 /* If the stack pointer is valid and pointing at the frame
10074 pointer store address, then we only need a pop. */
10075 if (sp_valid_at (cfa_offset: frame.hfp_save_offset)
10076 && m->fs.sp_offset == frame.hfp_save_offset)
10077 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10078 /* Leave results in shorter dependency chains on CPUs that are
10079 able to grok it fast. */
10080 else if (TARGET_USE_LEAVE
10081 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10082 || !cfun->machine->use_fast_prologue_epilogue)
10083 ix86_emit_leave (NULL);
10084 else
10085 {
10086 pro_epilogue_adjust_stack (stack_pointer_rtx,
10087 hard_frame_pointer_rtx,
10088 const0_rtx, style, set_cfa: !using_drap);
10089 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10090 }
10091 }
10092
10093 if (using_drap)
10094 {
10095 int param_ptr_offset = UNITS_PER_WORD;
10096 rtx_insn *insn;
10097
10098 gcc_assert (stack_realign_drap);
10099
10100 if (ix86_static_chain_on_stack)
10101 param_ptr_offset += UNITS_PER_WORD;
10102 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10103 param_ptr_offset += UNITS_PER_WORD;
10104
10105 insn = emit_insn (gen_rtx_SET
10106 (stack_pointer_rtx,
10107 plus_constant (Pmode, crtl->drap_reg,
10108 -param_ptr_offset)));
10109 m->fs.cfa_reg = stack_pointer_rtx;
10110 m->fs.cfa_offset = param_ptr_offset;
10111 m->fs.sp_offset = param_ptr_offset;
10112 m->fs.realigned = false;
10113
10114 add_reg_note (insn, REG_CFA_DEF_CFA,
10115 plus_constant (Pmode, stack_pointer_rtx,
10116 param_ptr_offset));
10117 RTX_FRAME_RELATED_P (insn) = 1;
10118
10119 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10120 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10121 }
10122
10123 /* At this point the stack pointer must be valid, and we must have
10124 restored all of the registers. We may not have deallocated the
10125 entire stack frame. We've delayed this until now because it may
10126 be possible to merge the local stack deallocation with the
10127 deallocation forced by ix86_static_chain_on_stack. */
10128 gcc_assert (m->fs.sp_valid);
10129 gcc_assert (!m->fs.sp_realigned);
10130 gcc_assert (!m->fs.fp_valid);
10131 gcc_assert (!m->fs.realigned);
10132 if (m->fs.sp_offset != UNITS_PER_WORD)
10133 {
10134 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10135 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10136 style, set_cfa: true);
10137 }
10138 else
10139 ix86_add_queued_cfa_restore_notes (insn: get_last_insn ());
10140
10141 /* Sibcall epilogues don't want a return instruction. */
10142 if (style == 0)
10143 {
10144 m->fs = frame_state_save;
10145 return;
10146 }
10147
10148 if (cfun->machine->func_type != TYPE_NORMAL)
10149 emit_jump_insn (gen_interrupt_return ());
10150 else if (crtl->args.pops_args && crtl->args.size)
10151 {
10152 rtx popc = GEN_INT (crtl->args.pops_args);
10153
10154 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10155 address, do explicit add, and jump indirectly to the caller. */
10156
10157 if (crtl->args.pops_args >= 65536)
10158 {
10159 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10160 rtx_insn *insn;
10161
10162 /* There is no "pascal" calling convention in any 64bit ABI. */
10163 gcc_assert (!TARGET_64BIT);
10164
10165 insn = emit_insn (gen_pop (arg: ecx));
10166 m->fs.cfa_offset -= UNITS_PER_WORD;
10167 m->fs.sp_offset -= UNITS_PER_WORD;
10168
10169 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10170 x = gen_rtx_SET (stack_pointer_rtx, x);
10171 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10172 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10173 RTX_FRAME_RELATED_P (insn) = 1;
10174
10175 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10176 offset: popc, style: -1, set_cfa: true);
10177 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10178 }
10179 else
10180 emit_jump_insn (gen_simple_return_pop_internal (popc));
10181 }
10182 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10183 {
10184 /* In case of return from EH a simple return cannot be used
10185 as a return address will be compared with a shadow stack
10186 return address. Use indirect jump instead. */
10187 if (style == 2 && flag_cf_protection)
10188 {
10189 /* Register used in indirect jump must be in word_mode. But
10190 Pmode may not be the same as word_mode for x32. */
10191 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10192 rtx_insn *insn;
10193
10194 insn = emit_insn (gen_pop (arg: ecx));
10195 m->fs.cfa_offset -= UNITS_PER_WORD;
10196 m->fs.sp_offset -= UNITS_PER_WORD;
10197
10198 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10199 x = gen_rtx_SET (stack_pointer_rtx, x);
10200 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10201 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10202 RTX_FRAME_RELATED_P (insn) = 1;
10203
10204 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10205 }
10206 else
10207 emit_jump_insn (gen_simple_return_internal ());
10208 }
10209
10210 /* Restore the state back to the state from the prologue,
10211 so that it's correct for the next epilogue. */
10212 m->fs = frame_state_save;
10213}
10214
10215/* Reset from the function's potential modifications. */
10216
10217static void
10218ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10219{
10220 if (pic_offset_table_rtx
10221 && !ix86_use_pseudo_pic_reg ())
10222 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10223
10224 if (TARGET_MACHO)
10225 {
10226 rtx_insn *insn = get_last_insn ();
10227 rtx_insn *deleted_debug_label = NULL;
10228
10229 /* Mach-O doesn't support labels at the end of objects, so if
10230 it looks like we might want one, take special action.
10231 First, collect any sequence of deleted debug labels. */
10232 while (insn
10233 && NOTE_P (insn)
10234 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10235 {
10236 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10237 notes only, instead set their CODE_LABEL_NUMBER to -1,
10238 otherwise there would be code generation differences
10239 in between -g and -g0. */
10240 if (NOTE_P (insn) && NOTE_KIND (insn)
10241 == NOTE_INSN_DELETED_DEBUG_LABEL)
10242 deleted_debug_label = insn;
10243 insn = PREV_INSN (insn);
10244 }
10245
10246 /* If we have:
10247 label:
10248 barrier
10249 then this needs to be detected, so skip past the barrier. */
10250
10251 if (insn && BARRIER_P (insn))
10252 insn = PREV_INSN (insn);
10253
10254 /* Up to now we've only seen notes or barriers. */
10255 if (insn)
10256 {
10257 if (LABEL_P (insn)
10258 || (NOTE_P (insn)
10259 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10260 /* Trailing label. */
10261 fputs (s: "\tnop\n", stream: file);
10262 else if (cfun && ! cfun->is_thunk)
10263 {
10264 /* See if we have a completely empty function body, skipping
10265 the special case of the picbase thunk emitted as asm. */
10266 while (insn && ! INSN_P (insn))
10267 insn = PREV_INSN (insn);
10268 /* If we don't find any insns, we've got an empty function body;
10269 I.e. completely empty - without a return or branch. This is
10270 taken as the case where a function body has been removed
10271 because it contains an inline __builtin_unreachable(). GCC
10272 declares that reaching __builtin_unreachable() means UB so
10273 we're not obliged to do anything special; however, we want
10274 non-zero-sized function bodies. To meet this, and help the
10275 user out, let's trap the case. */
10276 if (insn == NULL)
10277 fputs (s: "\tud2\n", stream: file);
10278 }
10279 }
10280 else if (deleted_debug_label)
10281 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10282 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10283 CODE_LABEL_NUMBER (insn) = -1;
10284 }
10285}
10286
10287/* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10288
10289void
10290ix86_print_patchable_function_entry (FILE *file,
10291 unsigned HOST_WIDE_INT patch_area_size,
10292 bool record_p)
10293{
10294 if (cfun->machine->function_label_emitted)
10295 {
10296 /* NB: When ix86_print_patchable_function_entry is called after
10297 function table has been emitted, we have inserted or queued
10298 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10299 place. There is nothing to do here. */
10300 return;
10301 }
10302
10303 default_print_patchable_function_entry (file, patch_area_size,
10304 record_p);
10305}
10306
10307/* Output patchable area. NB: default_print_patchable_function_entry
10308 isn't available in i386.md. */
10309
10310void
10311ix86_output_patchable_area (unsigned int patch_area_size,
10312 bool record_p)
10313{
10314 default_print_patchable_function_entry (asm_out_file,
10315 patch_area_size,
10316 record_p);
10317}
10318
10319/* Return a scratch register to use in the split stack prologue. The
10320 split stack prologue is used for -fsplit-stack. It is the first
10321 instructions in the function, even before the regular prologue.
10322 The scratch register can be any caller-saved register which is not
10323 used for parameters or for the static chain. */
10324
10325static unsigned int
10326split_stack_prologue_scratch_regno (void)
10327{
10328 if (TARGET_64BIT)
10329 return R11_REG;
10330 else
10331 {
10332 bool is_fastcall, is_thiscall;
10333 int regparm;
10334
10335 is_fastcall = (lookup_attribute (attr_name: "fastcall",
10336 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10337 != NULL);
10338 is_thiscall = (lookup_attribute (attr_name: "thiscall",
10339 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10340 != NULL);
10341 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10342
10343 if (is_fastcall)
10344 {
10345 if (DECL_STATIC_CHAIN (cfun->decl))
10346 {
10347 sorry ("%<-fsplit-stack%> does not support fastcall with "
10348 "nested function");
10349 return INVALID_REGNUM;
10350 }
10351 return AX_REG;
10352 }
10353 else if (is_thiscall)
10354 {
10355 if (!DECL_STATIC_CHAIN (cfun->decl))
10356 return DX_REG;
10357 return AX_REG;
10358 }
10359 else if (regparm < 3)
10360 {
10361 if (!DECL_STATIC_CHAIN (cfun->decl))
10362 return CX_REG;
10363 else
10364 {
10365 if (regparm >= 2)
10366 {
10367 sorry ("%<-fsplit-stack%> does not support 2 register "
10368 "parameters for a nested function");
10369 return INVALID_REGNUM;
10370 }
10371 return DX_REG;
10372 }
10373 }
10374 else
10375 {
10376 /* FIXME: We could make this work by pushing a register
10377 around the addition and comparison. */
10378 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10379 return INVALID_REGNUM;
10380 }
10381 }
10382}
10383
10384/* A SYMBOL_REF for the function which allocates new stackspace for
10385 -fsplit-stack. */
10386
10387static GTY(()) rtx split_stack_fn;
10388
10389/* A SYMBOL_REF for the more stack function when using the large
10390 model. */
10391
10392static GTY(()) rtx split_stack_fn_large;
10393
10394/* Return location of the stack guard value in the TLS block. */
10395
10396rtx
10397ix86_split_stack_guard (void)
10398{
10399 int offset;
10400 addr_space_t as = DEFAULT_TLS_SEG_REG;
10401 rtx r;
10402
10403 gcc_assert (flag_split_stack);
10404
10405#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10406 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10407#else
10408 gcc_unreachable ();
10409#endif
10410
10411 r = GEN_INT (offset);
10412 r = gen_const_mem (Pmode, r);
10413 set_mem_addr_space (r, as);
10414
10415 return r;
10416}
10417
10418/* Handle -fsplit-stack. These are the first instructions in the
10419 function, even before the regular prologue. */
10420
10421void
10422ix86_expand_split_stack_prologue (void)
10423{
10424 HOST_WIDE_INT allocate;
10425 unsigned HOST_WIDE_INT args_size;
10426 rtx_code_label *label;
10427 rtx limit, current, allocate_rtx, call_fusage;
10428 rtx_insn *call_insn;
10429 unsigned int scratch_regno = INVALID_REGNUM;
10430 rtx scratch_reg = NULL_RTX;
10431 rtx_code_label *varargs_label = NULL;
10432 rtx fn;
10433
10434 gcc_assert (flag_split_stack && reload_completed);
10435
10436 ix86_finalize_stack_frame_flags ();
10437 struct ix86_frame &frame = cfun->machine->frame;
10438 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10439
10440 /* This is the label we will branch to if we have enough stack
10441 space. We expect the basic block reordering pass to reverse this
10442 branch if optimizing, so that we branch in the unlikely case. */
10443 label = gen_label_rtx ();
10444
10445 /* We need to compare the stack pointer minus the frame size with
10446 the stack boundary in the TCB. The stack boundary always gives
10447 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10448 can compare directly. Otherwise we need to do an addition. */
10449
10450 limit = ix86_split_stack_guard ();
10451
10452 if (allocate >= SPLIT_STACK_AVAILABLE
10453 || flag_force_indirect_call)
10454 {
10455 scratch_regno = split_stack_prologue_scratch_regno ();
10456 if (scratch_regno == INVALID_REGNUM)
10457 return;
10458 }
10459
10460 if (allocate >= SPLIT_STACK_AVAILABLE)
10461 {
10462 rtx offset;
10463
10464 /* We need a scratch register to hold the stack pointer minus
10465 the required frame size. Since this is the very start of the
10466 function, the scratch register can be any caller-saved
10467 register which is not used for parameters. */
10468 offset = GEN_INT (- allocate);
10469
10470 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10471 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10472 {
10473 /* We don't use gen_add in this case because it will
10474 want to split to lea, but when not optimizing the insn
10475 will not be split after this point. */
10476 emit_insn (gen_rtx_SET (scratch_reg,
10477 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10478 offset)));
10479 }
10480 else
10481 {
10482 emit_move_insn (scratch_reg, offset);
10483 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10484 }
10485 current = scratch_reg;
10486 }
10487 else
10488 current = stack_pointer_rtx;
10489
10490 ix86_expand_branch (GEU, current, limit, label);
10491 rtx_insn *jump_insn = get_last_insn ();
10492 JUMP_LABEL (jump_insn) = label;
10493
10494 /* Mark the jump as very likely to be taken. */
10495 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10496
10497 if (split_stack_fn == NULL_RTX)
10498 {
10499 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10500 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10501 }
10502 fn = split_stack_fn;
10503
10504 /* Get more stack space. We pass in the desired stack space and the
10505 size of the arguments to copy to the new stack. In 32-bit mode
10506 we push the parameters; __morestack will return on a new stack
10507 anyhow. In 64-bit mode we pass the parameters in r10 and
10508 r11. */
10509 allocate_rtx = GEN_INT (allocate);
10510 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10511 call_fusage = NULL_RTX;
10512 rtx pop = NULL_RTX;
10513 if (TARGET_64BIT)
10514 {
10515 rtx reg10, reg11;
10516
10517 reg10 = gen_rtx_REG (DImode, R10_REG);
10518 reg11 = gen_rtx_REG (DImode, R11_REG);
10519
10520 /* If this function uses a static chain, it will be in %r10.
10521 Preserve it across the call to __morestack. */
10522 if (DECL_STATIC_CHAIN (cfun->decl))
10523 {
10524 rtx rax;
10525
10526 rax = gen_rtx_REG (word_mode, AX_REG);
10527 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10528 use_reg (fusage: &call_fusage, reg: rax);
10529 }
10530
10531 if (flag_force_indirect_call
10532 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10533 {
10534 HOST_WIDE_INT argval;
10535
10536 if (split_stack_fn_large == NULL_RTX)
10537 {
10538 split_stack_fn_large
10539 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10540 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10541 }
10542
10543 fn = split_stack_fn_large;
10544
10545 if (ix86_cmodel == CM_LARGE_PIC)
10546 {
10547 rtx_code_label *label;
10548 rtx x;
10549
10550 gcc_assert (Pmode == DImode);
10551
10552 label = gen_label_rtx ();
10553 emit_label (label);
10554 LABEL_PRESERVE_P (label) = 1;
10555 emit_insn (gen_set_rip_rex64 (reg10, label));
10556 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10557 emit_insn (gen_add2_insn (reg10, reg11));
10558 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10559 x = gen_rtx_CONST (Pmode, x);
10560 emit_move_insn (reg11, x);
10561 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10562 x = gen_const_mem (Pmode, x);
10563 fn = copy_to_suggested_reg (x, reg11, Pmode);
10564 }
10565 else if (ix86_cmodel == CM_LARGE)
10566 fn = copy_to_suggested_reg (fn, reg11, Pmode);
10567
10568 /* When using the large model we need to load the address
10569 into a register, and we've run out of registers. So we
10570 switch to a different calling convention, and we call a
10571 different function: __morestack_large. We pass the
10572 argument size in the upper 32 bits of r10 and pass the
10573 frame size in the lower 32 bits. */
10574 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10575 gcc_assert ((args_size & 0xffffffff) == args_size);
10576
10577 argval = ((args_size << 16) << 16) + allocate;
10578 emit_move_insn (reg10, GEN_INT (argval));
10579 }
10580 else
10581 {
10582 emit_move_insn (reg10, allocate_rtx);
10583 emit_move_insn (reg11, GEN_INT (args_size));
10584 use_reg (fusage: &call_fusage, reg: reg11);
10585 }
10586
10587 use_reg (fusage: &call_fusage, reg: reg10);
10588 }
10589 else
10590 {
10591 if (flag_force_indirect_call && flag_pic)
10592 {
10593 rtx x;
10594
10595 gcc_assert (Pmode == SImode);
10596
10597 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10598
10599 emit_insn (gen_set_got (scratch_reg));
10600 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10601 UNSPEC_GOT);
10602 x = gen_rtx_CONST (Pmode, x);
10603 x = gen_rtx_PLUS (Pmode, scratch_reg, x);
10604 x = gen_const_mem (Pmode, x);
10605 fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
10606 }
10607
10608 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10609 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10610 insn = emit_insn (gen_push (arg: allocate_rtx));
10611 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10612 pop = GEN_INT (2 * UNITS_PER_WORD);
10613 }
10614
10615 if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
10616 {
10617 scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
10618
10619 if (GET_MODE (fn) != word_mode)
10620 fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
10621
10622 fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
10623 }
10624
10625 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10626 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10627 pop, false);
10628 add_function_usage_to (call_insn, call_fusage);
10629 if (!TARGET_64BIT)
10630 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10631 /* Indicate that this function can't jump to non-local gotos. */
10632 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10633
10634 /* In order to make call/return prediction work right, we now need
10635 to execute a return instruction. See
10636 libgcc/config/i386/morestack.S for the details on how this works.
10637
10638 For flow purposes gcc must not see this as a return
10639 instruction--we need control flow to continue at the subsequent
10640 label. Therefore, we use an unspec. */
10641 gcc_assert (crtl->args.pops_args < 65536);
10642 rtx_insn *ret_insn
10643 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10644
10645 if ((flag_cf_protection & CF_BRANCH))
10646 {
10647 /* Insert ENDBR since __morestack will jump back here via indirect
10648 call. */
10649 rtx cet_eb = gen_nop_endbr ();
10650 emit_insn_after (cet_eb, ret_insn);
10651 }
10652
10653 /* If we are in 64-bit mode and this function uses a static chain,
10654 we saved %r10 in %rax before calling _morestack. */
10655 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10656 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10657 gen_rtx_REG (word_mode, AX_REG));
10658
10659 /* If this function calls va_start, we need to store a pointer to
10660 the arguments on the old stack, because they may not have been
10661 all copied to the new stack. At this point the old stack can be
10662 found at the frame pointer value used by __morestack, because
10663 __morestack has set that up before calling back to us. Here we
10664 store that pointer in a scratch register, and in
10665 ix86_expand_prologue we store the scratch register in a stack
10666 slot. */
10667 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10668 {
10669 rtx frame_reg;
10670 int words;
10671
10672 scratch_regno = split_stack_prologue_scratch_regno ();
10673 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10674 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10675
10676 /* 64-bit:
10677 fp -> old fp value
10678 return address within this function
10679 return address of caller of this function
10680 stack arguments
10681 So we add three words to get to the stack arguments.
10682
10683 32-bit:
10684 fp -> old fp value
10685 return address within this function
10686 first argument to __morestack
10687 second argument to __morestack
10688 return address of caller of this function
10689 stack arguments
10690 So we add five words to get to the stack arguments.
10691 */
10692 words = TARGET_64BIT ? 3 : 5;
10693 emit_insn (gen_rtx_SET (scratch_reg,
10694 plus_constant (Pmode, frame_reg,
10695 words * UNITS_PER_WORD)));
10696
10697 varargs_label = gen_label_rtx ();
10698 emit_jump_insn (gen_jump (varargs_label));
10699 JUMP_LABEL (get_last_insn ()) = varargs_label;
10700
10701 emit_barrier ();
10702 }
10703
10704 emit_label (label);
10705 LABEL_NUSES (label) = 1;
10706
10707 /* If this function calls va_start, we now have to set the scratch
10708 register for the case where we do not call __morestack. In this
10709 case we need to set it based on the stack pointer. */
10710 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10711 {
10712 emit_insn (gen_rtx_SET (scratch_reg,
10713 plus_constant (Pmode, stack_pointer_rtx,
10714 UNITS_PER_WORD)));
10715
10716 emit_label (varargs_label);
10717 LABEL_NUSES (varargs_label) = 1;
10718 }
10719}
10720
10721/* We may have to tell the dataflow pass that the split stack prologue
10722 is initializing a scratch register. */
10723
10724static void
10725ix86_live_on_entry (bitmap regs)
10726{
10727 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10728 {
10729 gcc_assert (flag_split_stack);
10730 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10731 }
10732}
10733
10734/* Extract the parts of an RTL expression that is a valid memory address
10735 for an instruction. Return false if the structure of the address is
10736 grossly off. */
10737
10738bool
10739ix86_decompose_address (rtx addr, struct ix86_address *out)
10740{
10741 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10742 rtx base_reg, index_reg;
10743 HOST_WIDE_INT scale = 1;
10744 rtx scale_rtx = NULL_RTX;
10745 rtx tmp;
10746 addr_space_t seg = ADDR_SPACE_GENERIC;
10747
10748 /* Allow zero-extended SImode addresses,
10749 they will be emitted with addr32 prefix. */
10750 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10751 {
10752 if (GET_CODE (addr) == ZERO_EXTEND
10753 && GET_MODE (XEXP (addr, 0)) == SImode)
10754 {
10755 addr = XEXP (addr, 0);
10756 if (CONST_INT_P (addr))
10757 return false;
10758 }
10759 else if (GET_CODE (addr) == AND
10760 && const_32bit_mask (XEXP (addr, 1), DImode))
10761 {
10762 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10763 if (addr == NULL_RTX)
10764 return false;
10765
10766 if (CONST_INT_P (addr))
10767 return false;
10768 }
10769 else if (GET_CODE (addr) == AND)
10770 {
10771 /* For ASHIFT inside AND, combine will not generate
10772 canonical zero-extend. Merge mask for AND and shift_count
10773 to check if it is canonical zero-extend. */
10774 tmp = XEXP (addr, 0);
10775 rtx mask = XEXP (addr, 1);
10776 if (tmp && GET_CODE(tmp) == ASHIFT)
10777 {
10778 rtx shift_val = XEXP (tmp, 1);
10779 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10780 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10781 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10782 == 0xffffffff))
10783 {
10784 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10785 DImode);
10786 }
10787 }
10788
10789 }
10790 }
10791
10792 /* Allow SImode subregs of DImode addresses,
10793 they will be emitted with addr32 prefix. */
10794 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10795 {
10796 if (SUBREG_P (addr)
10797 && GET_MODE (SUBREG_REG (addr)) == DImode)
10798 {
10799 addr = SUBREG_REG (addr);
10800 if (CONST_INT_P (addr))
10801 return false;
10802 }
10803 }
10804
10805 if (REG_P (addr))
10806 base = addr;
10807 else if (SUBREG_P (addr))
10808 {
10809 if (REG_P (SUBREG_REG (addr)))
10810 base = addr;
10811 else
10812 return false;
10813 }
10814 else if (GET_CODE (addr) == PLUS)
10815 {
10816 rtx addends[4], op;
10817 int n = 0, i;
10818
10819 op = addr;
10820 do
10821 {
10822 if (n >= 4)
10823 return false;
10824 addends[n++] = XEXP (op, 1);
10825 op = XEXP (op, 0);
10826 }
10827 while (GET_CODE (op) == PLUS);
10828 if (n >= 4)
10829 return false;
10830 addends[n] = op;
10831
10832 for (i = n; i >= 0; --i)
10833 {
10834 op = addends[i];
10835 switch (GET_CODE (op))
10836 {
10837 case MULT:
10838 if (index)
10839 return false;
10840 index = XEXP (op, 0);
10841 scale_rtx = XEXP (op, 1);
10842 break;
10843
10844 case ASHIFT:
10845 if (index)
10846 return false;
10847 index = XEXP (op, 0);
10848 tmp = XEXP (op, 1);
10849 if (!CONST_INT_P (tmp))
10850 return false;
10851 scale = INTVAL (tmp);
10852 if ((unsigned HOST_WIDE_INT) scale > 3)
10853 return false;
10854 scale = 1 << scale;
10855 break;
10856
10857 case ZERO_EXTEND:
10858 op = XEXP (op, 0);
10859 if (GET_CODE (op) != UNSPEC)
10860 return false;
10861 /* FALLTHRU */
10862
10863 case UNSPEC:
10864 if (XINT (op, 1) == UNSPEC_TP
10865 && TARGET_TLS_DIRECT_SEG_REFS
10866 && seg == ADDR_SPACE_GENERIC)
10867 seg = DEFAULT_TLS_SEG_REG;
10868 else
10869 return false;
10870 break;
10871
10872 case SUBREG:
10873 if (!REG_P (SUBREG_REG (op)))
10874 return false;
10875 /* FALLTHRU */
10876
10877 case REG:
10878 if (!base)
10879 base = op;
10880 else if (!index)
10881 index = op;
10882 else
10883 return false;
10884 break;
10885
10886 case CONST:
10887 case CONST_INT:
10888 case SYMBOL_REF:
10889 case LABEL_REF:
10890 if (disp)
10891 return false;
10892 disp = op;
10893 break;
10894
10895 default:
10896 return false;
10897 }
10898 }
10899 }
10900 else if (GET_CODE (addr) == MULT)
10901 {
10902 index = XEXP (addr, 0); /* index*scale */
10903 scale_rtx = XEXP (addr, 1);
10904 }
10905 else if (GET_CODE (addr) == ASHIFT)
10906 {
10907 /* We're called for lea too, which implements ashift on occasion. */
10908 index = XEXP (addr, 0);
10909 tmp = XEXP (addr, 1);
10910 if (!CONST_INT_P (tmp))
10911 return false;
10912 scale = INTVAL (tmp);
10913 if ((unsigned HOST_WIDE_INT) scale > 3)
10914 return false;
10915 scale = 1 << scale;
10916 }
10917 else
10918 disp = addr; /* displacement */
10919
10920 if (index)
10921 {
10922 if (REG_P (index))
10923 ;
10924 else if (SUBREG_P (index)
10925 && REG_P (SUBREG_REG (index)))
10926 ;
10927 else
10928 return false;
10929 }
10930
10931 /* Extract the integral value of scale. */
10932 if (scale_rtx)
10933 {
10934 if (!CONST_INT_P (scale_rtx))
10935 return false;
10936 scale = INTVAL (scale_rtx);
10937 }
10938
10939 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10940 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10941
10942 /* Avoid useless 0 displacement. */
10943 if (disp == const0_rtx && (base || index))
10944 disp = NULL_RTX;
10945
10946 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10947 if (base_reg && index_reg && scale == 1
10948 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10949 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10950 || REGNO (index_reg) == SP_REG))
10951 {
10952 std::swap (a&: base, b&: index);
10953 std::swap (a&: base_reg, b&: index_reg);
10954 }
10955
10956 /* Special case: %ebp cannot be encoded as a base without a displacement.
10957 Similarly %r13. */
10958 if (!disp && base_reg
10959 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10960 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10961 || REGNO (base_reg) == BP_REG
10962 || REGNO (base_reg) == R13_REG))
10963 disp = const0_rtx;
10964
10965 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10966 Avoid this by transforming to [%esi+0].
10967 Reload calls address legitimization without cfun defined, so we need
10968 to test cfun for being non-NULL. */
10969 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10970 && base_reg && !index_reg && !disp
10971 && REGNO (base_reg) == SI_REG)
10972 disp = const0_rtx;
10973
10974 /* Special case: encode reg+reg instead of reg*2. */
10975 if (!base && index && scale == 2)
10976 base = index, base_reg = index_reg, scale = 1;
10977
10978 /* Special case: scaling cannot be encoded without base or displacement. */
10979 if (!base && !disp && index && scale != 1)
10980 disp = const0_rtx;
10981
10982 out->base = base;
10983 out->index = index;
10984 out->disp = disp;
10985 out->scale = scale;
10986 out->seg = seg;
10987
10988 return true;
10989}
10990
10991/* Return cost of the memory address x.
10992 For i386, it is better to use a complex address than let gcc copy
10993 the address into a reg and make a new pseudo. But not if the address
10994 requires to two regs - that would mean more pseudos with longer
10995 lifetimes. */
10996static int
10997ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10998{
10999 struct ix86_address parts;
11000 int cost = 1;
11001 int ok = ix86_decompose_address (addr: x, out: &parts);
11002
11003 gcc_assert (ok);
11004
11005 if (parts.base && SUBREG_P (parts.base))
11006 parts.base = SUBREG_REG (parts.base);
11007 if (parts.index && SUBREG_P (parts.index))
11008 parts.index = SUBREG_REG (parts.index);
11009
11010 /* Attempt to minimize number of registers in the address by increasing
11011 address cost for each used register. We don't increase address cost
11012 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11013 is not invariant itself it most likely means that base or index is not
11014 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11015 which is not profitable for x86. */
11016 if (parts.base
11017 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11018 && (current_pass->type == GIMPLE_PASS
11019 || !pic_offset_table_rtx
11020 || !REG_P (parts.base)
11021 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11022 cost++;
11023
11024 if (parts.index
11025 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11026 && (current_pass->type == GIMPLE_PASS
11027 || !pic_offset_table_rtx
11028 || !REG_P (parts.index)
11029 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11030 cost++;
11031
11032 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11033 since it's predecode logic can't detect the length of instructions
11034 and it degenerates to vector decoded. Increase cost of such
11035 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11036 to split such addresses or even refuse such addresses at all.
11037
11038 Following addressing modes are affected:
11039 [base+scale*index]
11040 [scale*index+disp]
11041 [base+index]
11042
11043 The first and last case may be avoidable by explicitly coding the zero in
11044 memory address, but I don't have AMD-K6 machine handy to check this
11045 theory. */
11046
11047 if (TARGET_CPU_P (K6)
11048 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11049 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11050 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11051 cost += 10;
11052
11053 return cost;
11054}
11055
11056/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11057 this is used for to form addresses to local data when -fPIC is in
11058 use. */
11059
11060static bool
11061darwin_local_data_pic (rtx disp)
11062{
11063 return (GET_CODE (disp) == UNSPEC
11064 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11065}
11066
11067/* True if the function symbol operand X should be loaded from GOT.
11068 If CALL_P is true, X is a call operand.
11069
11070 NB: -mno-direct-extern-access doesn't force load from GOT for
11071 call.
11072
11073 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11074 statements, since a PIC register could not be available at the
11075 call site. */
11076
11077bool
11078ix86_force_load_from_GOT_p (rtx x, bool call_p)
11079{
11080 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11081 && !TARGET_PECOFF && !TARGET_MACHO
11082 && (!flag_pic || this_is_asm_operands)
11083 && ix86_cmodel != CM_LARGE
11084 && ix86_cmodel != CM_LARGE_PIC
11085 && GET_CODE (x) == SYMBOL_REF
11086 && ((!call_p
11087 && (!ix86_direct_extern_access
11088 || (SYMBOL_REF_DECL (x)
11089 && lookup_attribute (attr_name: "nodirect_extern_access",
11090 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11091 || (SYMBOL_REF_FUNCTION_P (x)
11092 && (!flag_plt
11093 || (SYMBOL_REF_DECL (x)
11094 && lookup_attribute (attr_name: "noplt",
11095 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11096 && !SYMBOL_REF_LOCAL_P (x));
11097}
11098
11099/* Determine if a given RTX is a valid constant. We already know this
11100 satisfies CONSTANT_P. */
11101
11102static bool
11103ix86_legitimate_constant_p (machine_mode mode, rtx x)
11104{
11105 switch (GET_CODE (x))
11106 {
11107 case CONST:
11108 x = XEXP (x, 0);
11109
11110 if (GET_CODE (x) == PLUS)
11111 {
11112 if (!CONST_INT_P (XEXP (x, 1)))
11113 return false;
11114 x = XEXP (x, 0);
11115 }
11116
11117 if (TARGET_MACHO && darwin_local_data_pic (disp: x))
11118 return true;
11119
11120 /* Only some unspecs are valid as "constants". */
11121 if (GET_CODE (x) == UNSPEC)
11122 switch (XINT (x, 1))
11123 {
11124 case UNSPEC_GOT:
11125 case UNSPEC_GOTOFF:
11126 case UNSPEC_PLTOFF:
11127 return TARGET_64BIT;
11128 case UNSPEC_TPOFF:
11129 case UNSPEC_NTPOFF:
11130 x = XVECEXP (x, 0, 0);
11131 return (GET_CODE (x) == SYMBOL_REF
11132 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11133 case UNSPEC_DTPOFF:
11134 x = XVECEXP (x, 0, 0);
11135 return (GET_CODE (x) == SYMBOL_REF
11136 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11137 default:
11138 return false;
11139 }
11140
11141 /* We must have drilled down to a symbol. */
11142 if (GET_CODE (x) == LABEL_REF)
11143 return true;
11144 if (GET_CODE (x) != SYMBOL_REF)
11145 return false;
11146 /* FALLTHRU */
11147
11148 case SYMBOL_REF:
11149 /* TLS symbols are never valid. */
11150 if (SYMBOL_REF_TLS_MODEL (x))
11151 return false;
11152
11153 /* DLLIMPORT symbols are never valid. */
11154 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11155 && SYMBOL_REF_DLLIMPORT_P (x))
11156 return false;
11157
11158#if TARGET_MACHO
11159 /* mdynamic-no-pic */
11160 if (MACHO_DYNAMIC_NO_PIC_P)
11161 return machopic_symbol_defined_p (x);
11162#endif
11163
11164 /* External function address should be loaded
11165 via the GOT slot to avoid PLT. */
11166 if (ix86_force_load_from_GOT_p (x))
11167 return false;
11168
11169 break;
11170
11171 CASE_CONST_SCALAR_INT:
11172 if (ix86_endbr_immediate_operand (x, VOIDmode))
11173 return false;
11174
11175 switch (mode)
11176 {
11177 case E_TImode:
11178 if (TARGET_64BIT)
11179 return true;
11180 /* FALLTHRU */
11181 case E_OImode:
11182 case E_XImode:
11183 if (!standard_sse_constant_p (x, pred_mode: mode)
11184 && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
11185 ? XImode
11186 : (TARGET_AVX
11187 ? OImode
11188 : (TARGET_SSE2
11189 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11190 return false;
11191 default:
11192 break;
11193 }
11194 break;
11195
11196 case CONST_VECTOR:
11197 if (!standard_sse_constant_p (x, pred_mode: mode))
11198 return false;
11199 break;
11200
11201 case CONST_DOUBLE:
11202 if (mode == E_BFmode)
11203 return false;
11204
11205 default:
11206 break;
11207 }
11208
11209 /* Otherwise we handle everything else in the move patterns. */
11210 return true;
11211}
11212
11213/* Determine if it's legal to put X into the constant pool. This
11214 is not possible for the address of thread-local symbols, which
11215 is checked above. */
11216
11217static bool
11218ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11219{
11220 /* We can put any immediate constant in memory. */
11221 switch (GET_CODE (x))
11222 {
11223 CASE_CONST_ANY:
11224 return false;
11225
11226 default:
11227 break;
11228 }
11229
11230 return !ix86_legitimate_constant_p (mode, x);
11231}
11232
11233/* Nonzero if the symbol is marked as dllimport, or as stub-variable,
11234 otherwise zero. */
11235
11236static bool
11237is_imported_p (rtx x)
11238{
11239 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
11240 || GET_CODE (x) != SYMBOL_REF)
11241 return false;
11242
11243 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
11244}
11245
11246
11247/* Nonzero if the constant value X is a legitimate general operand
11248 when generating PIC code. It is given that flag_pic is on and
11249 that X satisfies CONSTANT_P. */
11250
11251bool
11252legitimate_pic_operand_p (rtx x)
11253{
11254 rtx inner;
11255
11256 switch (GET_CODE (x))
11257 {
11258 case CONST:
11259 inner = XEXP (x, 0);
11260 if (GET_CODE (inner) == PLUS
11261 && CONST_INT_P (XEXP (inner, 1)))
11262 inner = XEXP (inner, 0);
11263
11264 /* Only some unspecs are valid as "constants". */
11265 if (GET_CODE (inner) == UNSPEC)
11266 switch (XINT (inner, 1))
11267 {
11268 case UNSPEC_GOT:
11269 case UNSPEC_GOTOFF:
11270 case UNSPEC_PLTOFF:
11271 return TARGET_64BIT;
11272 case UNSPEC_TPOFF:
11273 x = XVECEXP (inner, 0, 0);
11274 return (GET_CODE (x) == SYMBOL_REF
11275 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11276 case UNSPEC_MACHOPIC_OFFSET:
11277 return legitimate_pic_address_disp_p (x);
11278 default:
11279 return false;
11280 }
11281 /* FALLTHRU */
11282
11283 case SYMBOL_REF:
11284 case LABEL_REF:
11285 return legitimate_pic_address_disp_p (x);
11286
11287 default:
11288 return true;
11289 }
11290}
11291
11292/* Determine if a given CONST RTX is a valid memory displacement
11293 in PIC mode. */
11294
11295bool
11296legitimate_pic_address_disp_p (rtx disp)
11297{
11298 bool saw_plus;
11299
11300 /* In 64bit mode we can allow direct addresses of symbols and labels
11301 when they are not dynamic symbols. */
11302 if (TARGET_64BIT)
11303 {
11304 rtx op0 = disp, op1;
11305
11306 switch (GET_CODE (disp))
11307 {
11308 case LABEL_REF:
11309 return true;
11310
11311 case CONST:
11312 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11313 break;
11314 op0 = XEXP (XEXP (disp, 0), 0);
11315 op1 = XEXP (XEXP (disp, 0), 1);
11316 if (!CONST_INT_P (op1))
11317 break;
11318 if (GET_CODE (op0) == UNSPEC
11319 && (XINT (op0, 1) == UNSPEC_DTPOFF
11320 || XINT (op0, 1) == UNSPEC_NTPOFF)
11321 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11322 return true;
11323 if (INTVAL (op1) >= 16*1024*1024
11324 || INTVAL (op1) < -16*1024*1024)
11325 break;
11326 if (GET_CODE (op0) == LABEL_REF)
11327 return true;
11328 if (GET_CODE (op0) == CONST
11329 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11330 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11331 return true;
11332 if (GET_CODE (op0) == UNSPEC
11333 && XINT (op0, 1) == UNSPEC_PCREL)
11334 return true;
11335 if (GET_CODE (op0) != SYMBOL_REF)
11336 break;
11337 /* FALLTHRU */
11338
11339 case SYMBOL_REF:
11340 /* TLS references should always be enclosed in UNSPEC.
11341 The dllimported symbol needs always to be resolved. */
11342 if (SYMBOL_REF_TLS_MODEL (op0)
11343 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11344 return false;
11345
11346 if (TARGET_PECOFF)
11347 {
11348 if (is_imported_p (x: op0))
11349 return true;
11350
11351 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11352 break;
11353
11354 /* Non-external-weak function symbols need to be resolved only
11355 for the large model. Non-external symbols don't need to be
11356 resolved for large and medium models. For the small model,
11357 we don't need to resolve anything here. */
11358 if ((ix86_cmodel != CM_LARGE_PIC
11359 && SYMBOL_REF_FUNCTION_P (op0)
11360 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11361 || !SYMBOL_REF_EXTERNAL_P (op0)
11362 || ix86_cmodel == CM_SMALL_PIC)
11363 return true;
11364 }
11365 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11366 && (SYMBOL_REF_LOCAL_P (op0)
11367 || ((ix86_direct_extern_access
11368 && !(SYMBOL_REF_DECL (op0)
11369 && lookup_attribute (attr_name: "nodirect_extern_access",
11370 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11371 && HAVE_LD_PIE_COPYRELOC
11372 && flag_pie
11373 && !SYMBOL_REF_WEAK (op0)
11374 && !SYMBOL_REF_FUNCTION_P (op0)))
11375 && ix86_cmodel != CM_LARGE_PIC)
11376 return true;
11377 break;
11378
11379 default:
11380 break;
11381 }
11382 }
11383 if (GET_CODE (disp) != CONST)
11384 return false;
11385 disp = XEXP (disp, 0);
11386
11387 if (TARGET_64BIT)
11388 {
11389 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11390 of GOT tables. We should not need these anyway. */
11391 if (GET_CODE (disp) != UNSPEC
11392 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11393 && XINT (disp, 1) != UNSPEC_GOTOFF
11394 && XINT (disp, 1) != UNSPEC_PCREL
11395 && XINT (disp, 1) != UNSPEC_PLTOFF))
11396 return false;
11397
11398 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11399 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11400 return false;
11401 return true;
11402 }
11403
11404 saw_plus = false;
11405 if (GET_CODE (disp) == PLUS)
11406 {
11407 if (!CONST_INT_P (XEXP (disp, 1)))
11408 return false;
11409 disp = XEXP (disp, 0);
11410 saw_plus = true;
11411 }
11412
11413 if (TARGET_MACHO && darwin_local_data_pic (disp))
11414 return true;
11415
11416 if (GET_CODE (disp) != UNSPEC)
11417 return false;
11418
11419 switch (XINT (disp, 1))
11420 {
11421 case UNSPEC_GOT:
11422 if (saw_plus)
11423 return false;
11424 /* We need to check for both symbols and labels because VxWorks loads
11425 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11426 details. */
11427 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11428 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11429 case UNSPEC_GOTOFF:
11430 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11431 While ABI specify also 32bit relocation but we don't produce it in
11432 small PIC model at all. */
11433 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11434 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11435 && !TARGET_64BIT)
11436 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11437 return false;
11438 case UNSPEC_GOTTPOFF:
11439 case UNSPEC_GOTNTPOFF:
11440 case UNSPEC_INDNTPOFF:
11441 if (saw_plus)
11442 return false;
11443 disp = XVECEXP (disp, 0, 0);
11444 return (GET_CODE (disp) == SYMBOL_REF
11445 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11446 case UNSPEC_NTPOFF:
11447 disp = XVECEXP (disp, 0, 0);
11448 return (GET_CODE (disp) == SYMBOL_REF
11449 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11450 case UNSPEC_DTPOFF:
11451 disp = XVECEXP (disp, 0, 0);
11452 return (GET_CODE (disp) == SYMBOL_REF
11453 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11454 }
11455
11456 return false;
11457}
11458
11459/* Determine if op is suitable RTX for an address register.
11460 Return naked register if a register or a register subreg is
11461 found, otherwise return NULL_RTX. */
11462
11463static rtx
11464ix86_validate_address_register (rtx op)
11465{
11466 machine_mode mode = GET_MODE (op);
11467
11468 /* Only SImode or DImode registers can form the address. */
11469 if (mode != SImode && mode != DImode)
11470 return NULL_RTX;
11471
11472 if (REG_P (op))
11473 return op;
11474 else if (SUBREG_P (op))
11475 {
11476 rtx reg = SUBREG_REG (op);
11477
11478 if (!REG_P (reg))
11479 return NULL_RTX;
11480
11481 mode = GET_MODE (reg);
11482
11483 /* Don't allow SUBREGs that span more than a word. It can
11484 lead to spill failures when the register is one word out
11485 of a two word structure. */
11486 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11487 return NULL_RTX;
11488
11489 /* Allow only SUBREGs of non-eliminable hard registers. */
11490 if (register_no_elim_operand (reg, mode))
11491 return reg;
11492 }
11493
11494 /* Op is not a register. */
11495 return NULL_RTX;
11496}
11497
11498/* Determine which memory address register set insn can use. */
11499
11500static enum attr_addr
11501ix86_memory_address_reg_class (rtx_insn* insn)
11502{
11503 /* LRA can do some initialization with NULL insn,
11504 return maximum register class in this case. */
11505 enum attr_addr addr_rclass = ADDR_GPR32;
11506
11507 if (!insn)
11508 return addr_rclass;
11509
11510 if (asm_noperands (PATTERN (insn)) >= 0
11511 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11512 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11513
11514 /* Return maximum register class for unrecognized instructions. */
11515 if (INSN_CODE (insn) < 0)
11516 return addr_rclass;
11517
11518 /* Try to recognize the insn before calling get_attr_addr.
11519 Save current recog_data and current alternative. */
11520 struct recog_data_d saved_recog_data = recog_data;
11521 int saved_alternative = which_alternative;
11522
11523 /* Update recog_data for processing of alternatives. */
11524 extract_insn_cached (insn);
11525
11526 /* If current alternative is not set, loop throught enabled
11527 alternatives and get the most limited register class. */
11528 if (saved_alternative == -1)
11529 {
11530 alternative_mask enabled = get_enabled_alternatives (insn);
11531
11532 for (int i = 0; i < recog_data.n_alternatives; i++)
11533 {
11534 if (!TEST_BIT (enabled, i))
11535 continue;
11536
11537 which_alternative = i;
11538 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11539 }
11540 }
11541 else
11542 {
11543 which_alternative = saved_alternative;
11544 addr_rclass = get_attr_addr (insn);
11545 }
11546
11547 recog_data = saved_recog_data;
11548 which_alternative = saved_alternative;
11549
11550 return addr_rclass;
11551}
11552
11553/* Return memory address register class insn can use. */
11554
11555enum reg_class
11556ix86_insn_base_reg_class (rtx_insn* insn)
11557{
11558 switch (ix86_memory_address_reg_class (insn))
11559 {
11560 case ADDR_GPR8:
11561 return LEGACY_GENERAL_REGS;
11562 case ADDR_GPR16:
11563 return GENERAL_GPR16;
11564 case ADDR_GPR32:
11565 break;
11566 default:
11567 gcc_unreachable ();
11568 }
11569
11570 return BASE_REG_CLASS;
11571}
11572
11573bool
11574ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11575{
11576 switch (ix86_memory_address_reg_class (insn))
11577 {
11578 case ADDR_GPR8:
11579 return LEGACY_INT_REGNO_P (regno);
11580 case ADDR_GPR16:
11581 return GENERAL_GPR16_REGNO_P (regno);
11582 case ADDR_GPR32:
11583 break;
11584 default:
11585 gcc_unreachable ();
11586 }
11587
11588 return GENERAL_REGNO_P (regno);
11589}
11590
11591enum reg_class
11592ix86_insn_index_reg_class (rtx_insn* insn)
11593{
11594 switch (ix86_memory_address_reg_class (insn))
11595 {
11596 case ADDR_GPR8:
11597 return LEGACY_INDEX_REGS;
11598 case ADDR_GPR16:
11599 return INDEX_GPR16;
11600 case ADDR_GPR32:
11601 break;
11602 default:
11603 gcc_unreachable ();
11604 }
11605
11606 return INDEX_REG_CLASS;
11607}
11608
11609/* Recognizes RTL expressions that are valid memory addresses for an
11610 instruction. The MODE argument is the machine mode for the MEM
11611 expression that wants to use this address.
11612
11613 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11614 convert common non-canonical forms to canonical form so that they will
11615 be recognized. */
11616
11617static bool
11618ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11619 code_helper = ERROR_MARK)
11620{
11621 struct ix86_address parts;
11622 rtx base, index, disp;
11623 HOST_WIDE_INT scale;
11624 addr_space_t seg;
11625
11626 if (ix86_decompose_address (addr, out: &parts) == 0)
11627 /* Decomposition failed. */
11628 return false;
11629
11630 base = parts.base;
11631 index = parts.index;
11632 disp = parts.disp;
11633 scale = parts.scale;
11634 seg = parts.seg;
11635
11636 /* Validate base register. */
11637 if (base)
11638 {
11639 rtx reg = ix86_validate_address_register (op: base);
11640
11641 if (reg == NULL_RTX)
11642 return false;
11643
11644 unsigned int regno = REGNO (reg);
11645 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11646 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11647 /* Base is not valid. */
11648 return false;
11649 }
11650
11651 /* Validate index register. */
11652 if (index)
11653 {
11654 rtx reg = ix86_validate_address_register (op: index);
11655
11656 if (reg == NULL_RTX)
11657 return false;
11658
11659 unsigned int regno = REGNO (reg);
11660 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11661 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11662 /* Index is not valid. */
11663 return false;
11664 }
11665
11666 /* Index and base should have the same mode. */
11667 if (base && index
11668 && GET_MODE (base) != GET_MODE (index))
11669 return false;
11670
11671 /* Address override works only on the (%reg) part of %fs:(%reg). */
11672 if (seg != ADDR_SPACE_GENERIC
11673 && ((base && GET_MODE (base) != word_mode)
11674 || (index && GET_MODE (index) != word_mode)))
11675 return false;
11676
11677 /* Validate scale factor. */
11678 if (scale != 1)
11679 {
11680 if (!index)
11681 /* Scale without index. */
11682 return false;
11683
11684 if (scale != 2 && scale != 4 && scale != 8)
11685 /* Scale is not a valid multiplier. */
11686 return false;
11687 }
11688
11689 /* Validate displacement. */
11690 if (disp)
11691 {
11692 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11693 return false;
11694
11695 if (GET_CODE (disp) == CONST
11696 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11697 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11698 switch (XINT (XEXP (disp, 0), 1))
11699 {
11700 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11701 when used. While ABI specify also 32bit relocations, we
11702 don't produce them at all and use IP relative instead.
11703 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11704 should be loaded via GOT. */
11705 case UNSPEC_GOT:
11706 if (!TARGET_64BIT
11707 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11708 goto is_legitimate_pic;
11709 /* FALLTHRU */
11710 case UNSPEC_GOTOFF:
11711 gcc_assert (flag_pic);
11712 if (!TARGET_64BIT)
11713 goto is_legitimate_pic;
11714
11715 /* 64bit address unspec. */
11716 return false;
11717
11718 case UNSPEC_GOTPCREL:
11719 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11720 goto is_legitimate_pic;
11721 /* FALLTHRU */
11722 case UNSPEC_PCREL:
11723 gcc_assert (flag_pic);
11724 goto is_legitimate_pic;
11725
11726 case UNSPEC_GOTTPOFF:
11727 case UNSPEC_GOTNTPOFF:
11728 case UNSPEC_INDNTPOFF:
11729 case UNSPEC_NTPOFF:
11730 case UNSPEC_DTPOFF:
11731 break;
11732
11733 default:
11734 /* Invalid address unspec. */
11735 return false;
11736 }
11737
11738 else if (SYMBOLIC_CONST (disp)
11739 && (flag_pic
11740#if TARGET_MACHO
11741 || (MACHOPIC_INDIRECT
11742 && !machopic_operand_p (disp))
11743#endif
11744 ))
11745 {
11746
11747 is_legitimate_pic:
11748 if (TARGET_64BIT && (index || base))
11749 {
11750 /* foo@dtpoff(%rX) is ok. */
11751 if (GET_CODE (disp) != CONST
11752 || GET_CODE (XEXP (disp, 0)) != PLUS
11753 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11754 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11755 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11756 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11757 /* Non-constant pic memory reference. */
11758 return false;
11759 }
11760 else if ((!TARGET_MACHO || flag_pic)
11761 && ! legitimate_pic_address_disp_p (disp))
11762 /* Displacement is an invalid pic construct. */
11763 return false;
11764#if TARGET_MACHO
11765 else if (MACHO_DYNAMIC_NO_PIC_P
11766 && !ix86_legitimate_constant_p (Pmode, disp))
11767 /* displacment must be referenced via non_lazy_pointer */
11768 return false;
11769#endif
11770
11771 /* This code used to verify that a symbolic pic displacement
11772 includes the pic_offset_table_rtx register.
11773
11774 While this is good idea, unfortunately these constructs may
11775 be created by "adds using lea" optimization for incorrect
11776 code like:
11777
11778 int a;
11779 int foo(int i)
11780 {
11781 return *(&a+i);
11782 }
11783
11784 This code is nonsensical, but results in addressing
11785 GOT table with pic_offset_table_rtx base. We can't
11786 just refuse it easily, since it gets matched by
11787 "addsi3" pattern, that later gets split to lea in the
11788 case output register differs from input. While this
11789 can be handled by separate addsi pattern for this case
11790 that never results in lea, this seems to be easier and
11791 correct fix for crash to disable this test. */
11792 }
11793 else if (GET_CODE (disp) != LABEL_REF
11794 && !CONST_INT_P (disp)
11795 && (GET_CODE (disp) != CONST
11796 || !ix86_legitimate_constant_p (Pmode, x: disp))
11797 && (GET_CODE (disp) != SYMBOL_REF
11798 || !ix86_legitimate_constant_p (Pmode, x: disp)))
11799 /* Displacement is not constant. */
11800 return false;
11801 else if (TARGET_64BIT
11802 && !x86_64_immediate_operand (disp, VOIDmode))
11803 /* Displacement is out of range. */
11804 return false;
11805 /* In x32 mode, constant addresses are sign extended to 64bit, so
11806 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11807 else if (TARGET_X32 && !(index || base)
11808 && CONST_INT_P (disp)
11809 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11810 return false;
11811 }
11812
11813 /* Everything looks valid. */
11814 return true;
11815}
11816
11817/* Determine if a given RTX is a valid constant address. */
11818
11819bool
11820constant_address_p (rtx x)
11821{
11822 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1);
11823}
11824
11825/* Return a unique alias set for the GOT. */
11826
11827alias_set_type
11828ix86_GOT_alias_set (void)
11829{
11830 static alias_set_type set = -1;
11831 if (set == -1)
11832 set = new_alias_set ();
11833 return set;
11834}
11835
11836/* Return a legitimate reference for ORIG (an address) using the
11837 register REG. If REG is 0, a new pseudo is generated.
11838
11839 There are two types of references that must be handled:
11840
11841 1. Global data references must load the address from the GOT, via
11842 the PIC reg. An insn is emitted to do this load, and the reg is
11843 returned.
11844
11845 2. Static data references, constant pool addresses, and code labels
11846 compute the address as an offset from the GOT, whose base is in
11847 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11848 differentiate them from global data objects. The returned
11849 address is the PIC reg + an unspec constant.
11850
11851 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11852 reg also appears in the address. */
11853
11854rtx
11855legitimize_pic_address (rtx orig, rtx reg)
11856{
11857 rtx addr = orig;
11858 rtx new_rtx = orig;
11859
11860#if TARGET_MACHO
11861 if (TARGET_MACHO && !TARGET_64BIT)
11862 {
11863 if (reg == 0)
11864 reg = gen_reg_rtx (Pmode);
11865 /* Use the generic Mach-O PIC machinery. */
11866 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11867 }
11868#endif
11869
11870 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11871 {
11872 rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true);
11873 if (tmp)
11874 return tmp;
11875 }
11876
11877 if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr))
11878 new_rtx = addr;
11879 else if ((!TARGET_64BIT
11880 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11881 && !TARGET_PECOFF
11882 && gotoff_operand (addr, Pmode))
11883 {
11884 /* This symbol may be referenced via a displacement
11885 from the PIC base address (@GOTOFF). */
11886 if (GET_CODE (addr) == CONST)
11887 addr = XEXP (addr, 0);
11888
11889 if (GET_CODE (addr) == PLUS)
11890 {
11891 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11892 UNSPEC_GOTOFF);
11893 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11894 }
11895 else
11896 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11897
11898 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11899
11900 if (TARGET_64BIT)
11901 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11902
11903 if (reg != 0)
11904 {
11905 gcc_assert (REG_P (reg));
11906 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11907 new_rtx, reg, 1, OPTAB_DIRECT);
11908 }
11909 else
11910 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11911 }
11912 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11913 /* We can't always use @GOTOFF for text labels
11914 on VxWorks, see gotoff_operand. */
11915 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11916 {
11917 rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true);
11918 if (tmp)
11919 return tmp;
11920
11921 /* For x64 PE-COFF there is no GOT table,
11922 so we use address directly. */
11923 if (TARGET_64BIT && TARGET_PECOFF)
11924 {
11925 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11927 }
11928 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11929 {
11930 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11931 UNSPEC_GOTPCREL);
11932 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11933 new_rtx = gen_const_mem (Pmode, new_rtx);
11934 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11935 }
11936 else
11937 {
11938 /* This symbol must be referenced via a load
11939 from the Global Offset Table (@GOT). */
11940 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11941 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11942
11943 if (TARGET_64BIT)
11944 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11945
11946 if (reg != 0)
11947 {
11948 gcc_assert (REG_P (reg));
11949 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11950 new_rtx, reg, 1, OPTAB_DIRECT);
11951 }
11952 else
11953 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11954
11955 new_rtx = gen_const_mem (Pmode, new_rtx);
11956 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11957 }
11958
11959 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11960 }
11961 else
11962 {
11963 if (CONST_INT_P (addr)
11964 && !x86_64_immediate_operand (addr, VOIDmode))
11965 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11966 else if (GET_CODE (addr) == CONST)
11967 {
11968 addr = XEXP (addr, 0);
11969
11970 /* We must match stuff we generate before. Assume the only
11971 unspecs that can get here are ours. Not that we could do
11972 anything with them anyway.... */
11973 if (GET_CODE (addr) == UNSPEC
11974 || (GET_CODE (addr) == PLUS
11975 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11976 return orig;
11977 gcc_assert (GET_CODE (addr) == PLUS);
11978 }
11979
11980 if (GET_CODE (addr) == PLUS)
11981 {
11982 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11983
11984 /* Check first to see if this is a constant
11985 offset from a @GOTOFF symbol reference. */
11986 if (!TARGET_PECOFF
11987 && gotoff_operand (op0, Pmode)
11988 && CONST_INT_P (op1))
11989 {
11990 if (!TARGET_64BIT)
11991 {
11992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11993 UNSPEC_GOTOFF);
11994 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11995 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11996
11997 if (reg != 0)
11998 {
11999 gcc_assert (REG_P (reg));
12000 new_rtx = expand_simple_binop (Pmode, PLUS,
12001 pic_offset_table_rtx,
12002 new_rtx, reg, 1,
12003 OPTAB_DIRECT);
12004 }
12005 else
12006 new_rtx
12007 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12008 }
12009 else
12010 {
12011 if (INTVAL (op1) < -16*1024*1024
12012 || INTVAL (op1) >= 16*1024*1024)
12013 {
12014 if (!x86_64_immediate_operand (op1, Pmode))
12015 op1 = force_reg (Pmode, op1);
12016
12017 new_rtx
12018 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12019 }
12020 }
12021 }
12022 else
12023 {
12024 rtx base = legitimize_pic_address (orig: op0, reg);
12025 machine_mode mode = GET_MODE (base);
12026 new_rtx
12027 = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg);
12028
12029 if (CONST_INT_P (new_rtx))
12030 {
12031 if (INTVAL (new_rtx) < -16*1024*1024
12032 || INTVAL (new_rtx) >= 16*1024*1024)
12033 {
12034 if (!x86_64_immediate_operand (new_rtx, mode))
12035 new_rtx = force_reg (mode, new_rtx);
12036
12037 new_rtx
12038 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12039 }
12040 else
12041 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12042 }
12043 else
12044 {
12045 /* For %rip addressing, we have to use
12046 just disp32, not base nor index. */
12047 if (TARGET_64BIT
12048 && (GET_CODE (base) == SYMBOL_REF
12049 || GET_CODE (base) == LABEL_REF))
12050 base = force_reg (mode, base);
12051 if (GET_CODE (new_rtx) == PLUS
12052 && CONSTANT_P (XEXP (new_rtx, 1)))
12053 {
12054 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12055 new_rtx = XEXP (new_rtx, 1);
12056 }
12057 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12058 }
12059 }
12060 }
12061 }
12062 return new_rtx;
12063}
12064
12065/* Load the thread pointer. If TO_REG is true, force it into a register. */
12066
12067static rtx
12068get_thread_pointer (machine_mode tp_mode, bool to_reg)
12069{
12070 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12071
12072 if (GET_MODE (tp) != tp_mode)
12073 {
12074 gcc_assert (GET_MODE (tp) == SImode);
12075 gcc_assert (tp_mode == DImode);
12076
12077 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12078 }
12079
12080 if (to_reg)
12081 tp = copy_to_mode_reg (tp_mode, tp);
12082
12083 return tp;
12084}
12085
12086/* Construct the SYMBOL_REF for the tls_get_addr function. */
12087
12088static GTY(()) rtx ix86_tls_symbol;
12089
12090static rtx
12091ix86_tls_get_addr (void)
12092{
12093 if (!ix86_tls_symbol)
12094 {
12095 const char *sym
12096 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12097 ? "___tls_get_addr" : "__tls_get_addr");
12098
12099 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12100 }
12101
12102 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12103 {
12104 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12105 UNSPEC_PLTOFF);
12106 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12107 gen_rtx_CONST (Pmode, unspec));
12108 }
12109
12110 return ix86_tls_symbol;
12111}
12112
12113/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12114
12115static GTY(()) rtx ix86_tls_module_base_symbol;
12116
12117rtx
12118ix86_tls_module_base (void)
12119{
12120 if (!ix86_tls_module_base_symbol)
12121 {
12122 ix86_tls_module_base_symbol
12123 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12124
12125 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12126 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12127 }
12128
12129 return ix86_tls_module_base_symbol;
12130}
12131
12132/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12133 false if we expect this to be used for a memory address and true if
12134 we expect to load the address into a register. */
12135
12136rtx
12137legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12138{
12139 rtx dest, base, off;
12140 rtx pic = NULL_RTX, tp = NULL_RTX;
12141 machine_mode tp_mode = Pmode;
12142 int type;
12143
12144 /* Fall back to global dynamic model if tool chain cannot support local
12145 dynamic. */
12146 if (TARGET_SUN_TLS && !TARGET_64BIT
12147 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12148 && model == TLS_MODEL_LOCAL_DYNAMIC)
12149 model = TLS_MODEL_GLOBAL_DYNAMIC;
12150
12151 switch (model)
12152 {
12153 case TLS_MODEL_GLOBAL_DYNAMIC:
12154 if (!TARGET_64BIT)
12155 {
12156 if (flag_pic && !TARGET_PECOFF)
12157 pic = pic_offset_table_rtx;
12158 else
12159 {
12160 pic = gen_reg_rtx (Pmode);
12161 emit_insn (gen_set_got (pic));
12162 }
12163 }
12164
12165 if (TARGET_GNU2_TLS)
12166 {
12167 dest = gen_reg_rtx (ptr_mode);
12168 if (TARGET_64BIT)
12169 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x));
12170 else
12171 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12172
12173 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12174 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12175 if (GET_MODE (dest) != Pmode)
12176 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12177 dest = force_reg (Pmode, dest);
12178
12179 if (GET_MODE (x) != Pmode)
12180 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12181
12182 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12183 }
12184 else
12185 {
12186 rtx caddr = ix86_tls_get_addr ();
12187
12188 dest = gen_reg_rtx (Pmode);
12189 if (TARGET_64BIT)
12190 {
12191 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12192 rtx_insn *insns;
12193
12194 start_sequence ();
12195 emit_call_insn
12196 (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr));
12197 insns = get_insns ();
12198 end_sequence ();
12199
12200 if (GET_MODE (x) != Pmode)
12201 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12202
12203 RTL_CONST_CALL_P (insns) = 1;
12204 emit_libcall_block (insns, dest, rax, x);
12205 }
12206 else
12207 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12208 }
12209 break;
12210
12211 case TLS_MODEL_LOCAL_DYNAMIC:
12212 if (!TARGET_64BIT)
12213 {
12214 if (flag_pic)
12215 pic = pic_offset_table_rtx;
12216 else
12217 {
12218 pic = gen_reg_rtx (Pmode);
12219 emit_insn (gen_set_got (pic));
12220 }
12221 }
12222
12223 if (TARGET_GNU2_TLS)
12224 {
12225 rtx tmp = ix86_tls_module_base ();
12226
12227 base = gen_reg_rtx (ptr_mode);
12228 if (TARGET_64BIT)
12229 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp));
12230 else
12231 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12232
12233 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12234 if (GET_MODE (base) != Pmode)
12235 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12236 base = force_reg (Pmode, base);
12237 }
12238 else
12239 {
12240 rtx caddr = ix86_tls_get_addr ();
12241
12242 base = gen_reg_rtx (Pmode);
12243 if (TARGET_64BIT)
12244 {
12245 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12246 rtx_insn *insns;
12247 rtx eqv;
12248
12249 start_sequence ();
12250 emit_call_insn
12251 (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr));
12252 insns = get_insns ();
12253 end_sequence ();
12254
12255 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12256 share the LD_BASE result with other LD model accesses. */
12257 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12258 UNSPEC_TLS_LD_BASE);
12259
12260 RTL_CONST_CALL_P (insns) = 1;
12261 emit_libcall_block (insns, base, rax, eqv);
12262 }
12263 else
12264 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12265 }
12266
12267 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12268 off = gen_rtx_CONST (Pmode, off);
12269
12270 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12271
12272 if (TARGET_GNU2_TLS)
12273 {
12274 if (GET_MODE (tp) != Pmode)
12275 {
12276 dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode);
12277 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12278 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12279 }
12280 else
12281 dest = gen_rtx_PLUS (Pmode, tp, dest);
12282 dest = force_reg (Pmode, dest);
12283
12284 if (GET_MODE (x) != Pmode)
12285 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12286
12287 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12288 }
12289 break;
12290
12291 case TLS_MODEL_INITIAL_EXEC:
12292 if (TARGET_64BIT)
12293 {
12294 if (TARGET_SUN_TLS && !TARGET_X32)
12295 {
12296 /* The Sun linker took the AMD64 TLS spec literally
12297 and can only handle %rax as destination of the
12298 initial executable code sequence. */
12299
12300 dest = gen_reg_rtx (DImode);
12301 emit_insn (gen_tls_initial_exec_64_sun (a: dest, b: x));
12302 return dest;
12303 }
12304
12305 /* Generate DImode references to avoid %fs:(%reg32)
12306 problems and linker IE->LE relaxation bug. */
12307 tp_mode = DImode;
12308 pic = NULL;
12309 type = UNSPEC_GOTNTPOFF;
12310 }
12311 else if (flag_pic)
12312 {
12313 pic = pic_offset_table_rtx;
12314 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12315 }
12316 else if (!TARGET_ANY_GNU_TLS)
12317 {
12318 pic = gen_reg_rtx (Pmode);
12319 emit_insn (gen_set_got (pic));
12320 type = UNSPEC_GOTTPOFF;
12321 }
12322 else
12323 {
12324 pic = NULL;
12325 type = UNSPEC_INDNTPOFF;
12326 }
12327
12328 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12329 off = gen_rtx_CONST (tp_mode, off);
12330 if (pic)
12331 off = gen_rtx_PLUS (tp_mode, pic, off);
12332 off = gen_const_mem (tp_mode, off);
12333 set_mem_alias_set (off, ix86_GOT_alias_set ());
12334
12335 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12336 {
12337 base = get_thread_pointer (tp_mode,
12338 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12339 off = force_reg (tp_mode, off);
12340 dest = gen_rtx_PLUS (tp_mode, base, off);
12341 if (tp_mode != Pmode)
12342 dest = convert_to_mode (Pmode, dest, 1);
12343 }
12344 else
12345 {
12346 base = get_thread_pointer (Pmode, to_reg: true);
12347 dest = gen_reg_rtx (Pmode);
12348 emit_insn (gen_sub3_insn (dest, base, off));
12349 }
12350 break;
12351
12352 case TLS_MODEL_LOCAL_EXEC:
12353 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12354 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12355 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12356 off = gen_rtx_CONST (Pmode, off);
12357
12358 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12359 {
12360 base = get_thread_pointer (Pmode,
12361 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12362 return gen_rtx_PLUS (Pmode, base, off);
12363 }
12364 else
12365 {
12366 base = get_thread_pointer (Pmode, to_reg: true);
12367 dest = gen_reg_rtx (Pmode);
12368 emit_insn (gen_sub3_insn (dest, base, off));
12369 }
12370 break;
12371
12372 default:
12373 gcc_unreachable ();
12374 }
12375
12376 return dest;
12377}
12378
12379/* Return true if the TLS address requires insn using integer registers.
12380 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12381 MOV instructions, refer to PR103275. */
12382bool
12383ix86_gpr_tls_address_pattern_p (rtx mem)
12384{
12385 gcc_assert (MEM_P (mem));
12386
12387 rtx addr = XEXP (mem, 0);
12388 subrtx_var_iterator::array_type array;
12389 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12390 {
12391 rtx op = *iter;
12392 if (GET_CODE (op) == UNSPEC)
12393 switch (XINT (op, 1))
12394 {
12395 case UNSPEC_GOTNTPOFF:
12396 return true;
12397 case UNSPEC_TPOFF:
12398 if (!TARGET_64BIT)
12399 return true;
12400 break;
12401 default:
12402 break;
12403 }
12404 }
12405
12406 return false;
12407}
12408
12409/* Return true if OP refers to a TLS address. */
12410bool
12411ix86_tls_address_pattern_p (rtx op)
12412{
12413 subrtx_var_iterator::array_type array;
12414 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12415 {
12416 rtx op = *iter;
12417 if (MEM_P (op))
12418 {
12419 rtx *x = &XEXP (op, 0);
12420 while (GET_CODE (*x) == PLUS)
12421 {
12422 int i;
12423 for (i = 0; i < 2; i++)
12424 {
12425 rtx u = XEXP (*x, i);
12426 if (GET_CODE (u) == ZERO_EXTEND)
12427 u = XEXP (u, 0);
12428 if (GET_CODE (u) == UNSPEC
12429 && XINT (u, 1) == UNSPEC_TP)
12430 return true;
12431 }
12432 x = &XEXP (*x, 0);
12433 }
12434
12435 iter.skip_subrtxes ();
12436 }
12437 }
12438
12439 return false;
12440}
12441
12442/* Rewrite *LOC so that it refers to a default TLS address space. */
12443void
12444ix86_rewrite_tls_address_1 (rtx *loc)
12445{
12446 subrtx_ptr_iterator::array_type array;
12447 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12448 {
12449 rtx *loc = *iter;
12450 if (MEM_P (*loc))
12451 {
12452 rtx addr = XEXP (*loc, 0);
12453 rtx *x = &addr;
12454 while (GET_CODE (*x) == PLUS)
12455 {
12456 int i;
12457 for (i = 0; i < 2; i++)
12458 {
12459 rtx u = XEXP (*x, i);
12460 if (GET_CODE (u) == ZERO_EXTEND)
12461 u = XEXP (u, 0);
12462 if (GET_CODE (u) == UNSPEC
12463 && XINT (u, 1) == UNSPEC_TP)
12464 {
12465 addr_space_t as = DEFAULT_TLS_SEG_REG;
12466
12467 *x = XEXP (*x, 1 - i);
12468
12469 *loc = replace_equiv_address_nv (*loc, addr, true);
12470 set_mem_addr_space (*loc, as);
12471 return;
12472 }
12473 }
12474 x = &XEXP (*x, 0);
12475 }
12476
12477 iter.skip_subrtxes ();
12478 }
12479 }
12480}
12481
12482/* Rewrite instruction pattern involvning TLS address
12483 so that it refers to a default TLS address space. */
12484rtx
12485ix86_rewrite_tls_address (rtx pattern)
12486{
12487 pattern = copy_insn (pattern);
12488 ix86_rewrite_tls_address_1 (loc: &pattern);
12489 return pattern;
12490}
12491
12492/* Create or return the unique __imp_DECL dllimport symbol corresponding
12493 to symbol DECL if BEIMPORT is true. Otherwise create or return the
12494 unique refptr-DECL symbol corresponding to symbol DECL. */
12495
12496struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
12497{
12498 static inline hashval_t hash (tree_map *m) { return m->hash; }
12499 static inline bool
12500 equal (tree_map *a, tree_map *b)
12501 {
12502 return a->base.from == b->base.from;
12503 }
12504
12505 static int
12506 keep_cache_entry (tree_map *&m)
12507 {
12508 return ggc_marked_p (m->base.from);
12509 }
12510};
12511
12512static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
12513
12514static tree
12515get_dllimport_decl (tree decl, bool beimport)
12516{
12517 struct tree_map *h, in;
12518 const char *name;
12519 const char *prefix;
12520 size_t namelen, prefixlen;
12521 char *imp_name;
12522 tree to;
12523 rtx rtl;
12524
12525 if (!dllimport_map)
12526 dllimport_map = hash_table<dllimport_hasher>::create_ggc (n: 512);
12527
12528 in.hash = htab_hash_pointer (decl);
12529 in.base.from = decl;
12530 tree_map **loc = dllimport_map->find_slot_with_hash (comparable: &in, hash: in.hash, insert: INSERT);
12531 h = *loc;
12532 if (h)
12533 return h->to;
12534
12535 *loc = h = ggc_alloc<tree_map> ();
12536 h->hash = in.hash;
12537 h->base.from = decl;
12538 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12539 VAR_DECL, NULL, ptr_type_node);
12540 DECL_ARTIFICIAL (to) = 1;
12541 DECL_IGNORED_P (to) = 1;
12542 DECL_EXTERNAL (to) = 1;
12543 TREE_READONLY (to) = 1;
12544
12545 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12546 name = targetm.strip_name_encoding (name);
12547 if (beimport)
12548 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12549 ? "*__imp_" : "*__imp__";
12550 else
12551 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
12552 namelen = strlen (s: name);
12553 prefixlen = strlen (s: prefix);
12554 imp_name = (char *) alloca (namelen + prefixlen + 1);
12555 memcpy (dest: imp_name, src: prefix, n: prefixlen);
12556 memcpy (dest: imp_name + prefixlen, src: name, n: namelen + 1);
12557
12558 name = ggc_alloc_string (contents: imp_name, length: namelen + prefixlen);
12559 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12560 SET_SYMBOL_REF_DECL (rtl, to);
12561 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
12562 if (!beimport)
12563 {
12564 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
12565#ifdef SUB_TARGET_RECORD_STUB
12566 SUB_TARGET_RECORD_STUB (name);
12567#endif
12568 }
12569
12570 rtl = gen_const_mem (Pmode, rtl);
12571 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12572
12573 SET_DECL_RTL (to, rtl);
12574 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12575
12576 return to;
12577}
12578
12579/* Expand SYMBOL into its corresponding far-address symbol.
12580 WANT_REG is true if we require the result be a register. */
12581
12582static rtx
12583legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
12584{
12585 tree imp_decl;
12586 rtx x;
12587
12588 gcc_assert (SYMBOL_REF_DECL (symbol));
12589 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: false);
12590
12591 x = DECL_RTL (imp_decl);
12592 if (want_reg)
12593 x = force_reg (Pmode, x);
12594 return x;
12595}
12596
12597/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12598 true if we require the result be a register. */
12599
12600static rtx
12601legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12602{
12603 tree imp_decl;
12604 rtx x;
12605
12606 gcc_assert (SYMBOL_REF_DECL (symbol));
12607 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: true);
12608
12609 x = DECL_RTL (imp_decl);
12610 if (want_reg)
12611 x = force_reg (Pmode, x);
12612 return x;
12613}
12614
12615/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12616 is true if we require the result be a register. */
12617
12618rtx
12619legitimize_pe_coff_symbol (rtx addr, bool inreg)
12620{
12621 if (!TARGET_PECOFF)
12622 return NULL_RTX;
12623
12624 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12625 {
12626 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12627 return legitimize_dllimport_symbol (symbol: addr, want_reg: inreg);
12628 if (GET_CODE (addr) == CONST
12629 && GET_CODE (XEXP (addr, 0)) == PLUS
12630 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12631 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12632 {
12633 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), want_reg: inreg);
12634 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12635 }
12636 }
12637
12638 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
12639 return NULL_RTX;
12640 if (GET_CODE (addr) == SYMBOL_REF
12641 && !is_imported_p (x: addr)
12642 && SYMBOL_REF_EXTERNAL_P (addr)
12643 && SYMBOL_REF_DECL (addr))
12644 return legitimize_pe_coff_extern_decl (symbol: addr, want_reg: inreg);
12645
12646 if (GET_CODE (addr) == CONST
12647 && GET_CODE (XEXP (addr, 0)) == PLUS
12648 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12649 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
12650 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
12651 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
12652 {
12653 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), want_reg: inreg);
12654 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12655 }
12656 return NULL_RTX;
12657}
12658
12659/* Try machine-dependent ways of modifying an illegitimate address
12660 to be legitimate. If we find one, return the new, valid address.
12661 This macro is used in only one place: `memory_address' in explow.cc.
12662
12663 OLDX is the address as it was before break_out_memory_refs was called.
12664 In some cases it is useful to look at this to decide what needs to be done.
12665
12666 It is always safe for this macro to do nothing. It exists to recognize
12667 opportunities to optimize the output.
12668
12669 For the 80386, we handle X+REG by loading X into a register R and
12670 using R+REG. R will go in a general reg and indexing will be used.
12671 However, if REG is a broken-out memory address or multiplication,
12672 nothing needs to be done because REG can certainly go in a general reg.
12673
12674 When -fpic is used, special handling is needed for symbolic references.
12675 See comments by legitimize_pic_address in i386.cc for details. */
12676
12677static rtx
12678ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12679{
12680 bool changed = false;
12681 unsigned log;
12682
12683 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12684 if (log)
12685 return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false);
12686 if (GET_CODE (x) == CONST
12687 && GET_CODE (XEXP (x, 0)) == PLUS
12688 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12689 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12690 {
12691 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12692 model: (enum tls_model) log, for_mov: false);
12693 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12694 }
12695
12696 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12697 {
12698 rtx tmp = legitimize_pe_coff_symbol (addr: x, inreg: true);
12699 if (tmp)
12700 return tmp;
12701 }
12702
12703 if (flag_pic && SYMBOLIC_CONST (x))
12704 return legitimize_pic_address (orig: x, reg: 0);
12705
12706#if TARGET_MACHO
12707 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12708 return machopic_indirect_data_reference (x, 0);
12709#endif
12710
12711 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12712 if (GET_CODE (x) == ASHIFT
12713 && CONST_INT_P (XEXP (x, 1))
12714 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12715 {
12716 changed = true;
12717 log = INTVAL (XEXP (x, 1));
12718 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12719 GEN_INT (1 << log));
12720 }
12721
12722 if (GET_CODE (x) == PLUS)
12723 {
12724 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12725
12726 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12727 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12728 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12729 {
12730 changed = true;
12731 log = INTVAL (XEXP (XEXP (x, 0), 1));
12732 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12733 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12734 GEN_INT (1 << log));
12735 }
12736
12737 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12738 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12739 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12740 {
12741 changed = true;
12742 log = INTVAL (XEXP (XEXP (x, 1), 1));
12743 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12744 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12745 GEN_INT (1 << log));
12746 }
12747
12748 /* Put multiply first if it isn't already. */
12749 if (GET_CODE (XEXP (x, 1)) == MULT)
12750 {
12751 std::swap (XEXP (x, 0), XEXP (x, 1));
12752 changed = true;
12753 }
12754
12755 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12756 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12757 created by virtual register instantiation, register elimination, and
12758 similar optimizations. */
12759 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12760 {
12761 changed = true;
12762 x = gen_rtx_PLUS (Pmode,
12763 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12764 XEXP (XEXP (x, 1), 0)),
12765 XEXP (XEXP (x, 1), 1));
12766 }
12767
12768 /* Canonicalize
12769 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12770 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12771 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12772 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12773 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12774 && CONSTANT_P (XEXP (x, 1)))
12775 {
12776 rtx constant;
12777 rtx other = NULL_RTX;
12778
12779 if (CONST_INT_P (XEXP (x, 1)))
12780 {
12781 constant = XEXP (x, 1);
12782 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12783 }
12784 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12785 {
12786 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12787 other = XEXP (x, 1);
12788 }
12789 else
12790 constant = 0;
12791
12792 if (constant)
12793 {
12794 changed = true;
12795 x = gen_rtx_PLUS (Pmode,
12796 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12797 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12798 plus_constant (Pmode, other,
12799 INTVAL (constant)));
12800 }
12801 }
12802
12803 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12804 return x;
12805
12806 if (GET_CODE (XEXP (x, 0)) == MULT)
12807 {
12808 changed = true;
12809 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12810 }
12811
12812 if (GET_CODE (XEXP (x, 1)) == MULT)
12813 {
12814 changed = true;
12815 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12816 }
12817
12818 if (changed
12819 && REG_P (XEXP (x, 1))
12820 && REG_P (XEXP (x, 0)))
12821 return x;
12822
12823 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12824 {
12825 changed = true;
12826 x = legitimize_pic_address (orig: x, reg: 0);
12827 }
12828
12829 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12830 return x;
12831
12832 if (REG_P (XEXP (x, 0)))
12833 {
12834 rtx temp = gen_reg_rtx (Pmode);
12835 rtx val = force_operand (XEXP (x, 1), temp);
12836 if (val != temp)
12837 {
12838 val = convert_to_mode (Pmode, val, 1);
12839 emit_move_insn (temp, val);
12840 }
12841
12842 XEXP (x, 1) = temp;
12843 return x;
12844 }
12845
12846 else if (REG_P (XEXP (x, 1)))
12847 {
12848 rtx temp = gen_reg_rtx (Pmode);
12849 rtx val = force_operand (XEXP (x, 0), temp);
12850 if (val != temp)
12851 {
12852 val = convert_to_mode (Pmode, val, 1);
12853 emit_move_insn (temp, val);
12854 }
12855
12856 XEXP (x, 0) = temp;
12857 return x;
12858 }
12859 }
12860
12861 return x;
12862}
12863
12864/* Print an integer constant expression in assembler syntax. Addition
12865 and subtraction are the only arithmetic that may appear in these
12866 expressions. FILE is the stdio stream to write to, X is the rtx, and
12867 CODE is the operand print code from the output string. */
12868
12869static void
12870output_pic_addr_const (FILE *file, rtx x, int code)
12871{
12872 char buf[256];
12873
12874 switch (GET_CODE (x))
12875 {
12876 case PC:
12877 gcc_assert (flag_pic);
12878 putc (c: '.', stream: file);
12879 break;
12880
12881 case SYMBOL_REF:
12882 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12883 output_addr_const (file, x);
12884 else
12885 {
12886 const char *name = XSTR (x, 0);
12887
12888 /* Mark the decl as referenced so that cgraph will
12889 output the function. */
12890 if (SYMBOL_REF_DECL (x))
12891 mark_decl_referenced (SYMBOL_REF_DECL (x));
12892
12893#if TARGET_MACHO
12894 if (MACHOPIC_INDIRECT
12895 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12896 name = machopic_indirection_name (x, /*stub_p=*/true);
12897#endif
12898 assemble_name (file, name);
12899 }
12900 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12901 && code == 'P' && ix86_call_use_plt_p (x))
12902 fputs (s: "@PLT", stream: file);
12903 break;
12904
12905 case LABEL_REF:
12906 x = XEXP (x, 0);
12907 /* FALLTHRU */
12908 case CODE_LABEL:
12909 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12910 assemble_name (asm_out_file, buf);
12911 break;
12912
12913 CASE_CONST_SCALAR_INT:
12914 output_addr_const (file, x);
12915 break;
12916
12917 case CONST:
12918 /* This used to output parentheses around the expression,
12919 but that does not work on the 386 (either ATT or BSD assembler). */
12920 output_pic_addr_const (file, XEXP (x, 0), code);
12921 break;
12922
12923 case CONST_DOUBLE:
12924 /* We can't handle floating point constants;
12925 TARGET_PRINT_OPERAND must handle them. */
12926 output_operand_lossage ("floating constant misused");
12927 break;
12928
12929 case PLUS:
12930 /* Some assemblers need integer constants to appear first. */
12931 if (CONST_INT_P (XEXP (x, 0)))
12932 {
12933 output_pic_addr_const (file, XEXP (x, 0), code);
12934 putc (c: '+', stream: file);
12935 output_pic_addr_const (file, XEXP (x, 1), code);
12936 }
12937 else
12938 {
12939 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12940 output_pic_addr_const (file, XEXP (x, 1), code);
12941 putc (c: '+', stream: file);
12942 output_pic_addr_const (file, XEXP (x, 0), code);
12943 }
12944 break;
12945
12946 case MINUS:
12947 if (!TARGET_MACHO)
12948 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file);
12949 output_pic_addr_const (file, XEXP (x, 0), code);
12950 putc (c: '-', stream: file);
12951 output_pic_addr_const (file, XEXP (x, 1), code);
12952 if (!TARGET_MACHO)
12953 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file);
12954 break;
12955
12956 case UNSPEC:
12957 gcc_assert (XVECLEN (x, 0) == 1);
12958 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12959 switch (XINT (x, 1))
12960 {
12961 case UNSPEC_GOT:
12962 fputs (s: "@GOT", stream: file);
12963 break;
12964 case UNSPEC_GOTOFF:
12965 fputs (s: "@GOTOFF", stream: file);
12966 break;
12967 case UNSPEC_PLTOFF:
12968 fputs (s: "@PLTOFF", stream: file);
12969 break;
12970 case UNSPEC_PCREL:
12971 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12972 "(%rip)" : "[rip]", stream: file);
12973 break;
12974 case UNSPEC_GOTPCREL:
12975 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12976 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", stream: file);
12977 break;
12978 case UNSPEC_GOTTPOFF:
12979 /* FIXME: This might be @TPOFF in Sun ld too. */
12980 fputs (s: "@gottpoff", stream: file);
12981 break;
12982 case UNSPEC_TPOFF:
12983 fputs (s: "@tpoff", stream: file);
12984 break;
12985 case UNSPEC_NTPOFF:
12986 if (TARGET_64BIT)
12987 fputs (s: "@tpoff", stream: file);
12988 else
12989 fputs (s: "@ntpoff", stream: file);
12990 break;
12991 case UNSPEC_DTPOFF:
12992 fputs (s: "@dtpoff", stream: file);
12993 break;
12994 case UNSPEC_GOTNTPOFF:
12995 if (TARGET_64BIT)
12996 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12997 "@gottpoff(%rip)": "@gottpoff[rip]", stream: file);
12998 else
12999 fputs (s: "@gotntpoff", stream: file);
13000 break;
13001 case UNSPEC_INDNTPOFF:
13002 fputs (s: "@indntpoff", stream: file);
13003 break;
13004#if TARGET_MACHO
13005 case UNSPEC_MACHOPIC_OFFSET:
13006 putc ('-', file);
13007 machopic_output_function_base_name (file);
13008 break;
13009#endif
13010 default:
13011 output_operand_lossage ("invalid UNSPEC as operand");
13012 break;
13013 }
13014 break;
13015
13016 default:
13017 output_operand_lossage ("invalid expression as operand");
13018 }
13019}
13020
13021/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13022 We need to emit DTP-relative relocations. */
13023
13024static void ATTRIBUTE_UNUSED
13025i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13026{
13027 fputs (ASM_LONG, stream: file);
13028 output_addr_const (file, x);
13029 fputs (s: "@dtpoff", stream: file);
13030 switch (size)
13031 {
13032 case 4:
13033 break;
13034 case 8:
13035 fputs (s: ", 0", stream: file);
13036 break;
13037 default:
13038 gcc_unreachable ();
13039 }
13040}
13041
13042/* Return true if X is a representation of the PIC register. This copes
13043 with calls from ix86_find_base_term, where the register might have
13044 been replaced by a cselib value. */
13045
13046static bool
13047ix86_pic_register_p (rtx x)
13048{
13049 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13050 return (pic_offset_table_rtx
13051 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13052 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13053 return true;
13054 else if (!REG_P (x))
13055 return false;
13056 else if (pic_offset_table_rtx)
13057 {
13058 if (REGNO (x) == REGNO (pic_offset_table_rtx))
13059 return true;
13060 if (HARD_REGISTER_P (x)
13061 && !HARD_REGISTER_P (pic_offset_table_rtx)
13062 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13063 return true;
13064 return false;
13065 }
13066 else
13067 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13068}
13069
13070/* Helper function for ix86_delegitimize_address.
13071 Attempt to delegitimize TLS local-exec accesses. */
13072
13073static rtx
13074ix86_delegitimize_tls_address (rtx orig_x)
13075{
13076 rtx x = orig_x, unspec;
13077 struct ix86_address addr;
13078
13079 if (!TARGET_TLS_DIRECT_SEG_REFS)
13080 return orig_x;
13081 if (MEM_P (x))
13082 x = XEXP (x, 0);
13083 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13084 return orig_x;
13085 if (ix86_decompose_address (addr: x, out: &addr) == 0
13086 || addr.seg != DEFAULT_TLS_SEG_REG
13087 || addr.disp == NULL_RTX
13088 || GET_CODE (addr.disp) != CONST)
13089 return orig_x;
13090 unspec = XEXP (addr.disp, 0);
13091 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13092 unspec = XEXP (unspec, 0);
13093 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13094 return orig_x;
13095 x = XVECEXP (unspec, 0, 0);
13096 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13097 if (unspec != XEXP (addr.disp, 0))
13098 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13099 if (addr.index)
13100 {
13101 rtx idx = addr.index;
13102 if (addr.scale != 1)
13103 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13104 x = gen_rtx_PLUS (Pmode, idx, x);
13105 }
13106 if (addr.base)
13107 x = gen_rtx_PLUS (Pmode, addr.base, x);
13108 if (MEM_P (orig_x))
13109 x = replace_equiv_address_nv (orig_x, x);
13110 return x;
13111}
13112
13113/* In the name of slightly smaller debug output, and to cater to
13114 general assembler lossage, recognize PIC+GOTOFF and turn it back
13115 into a direct symbol reference.
13116
13117 On Darwin, this is necessary to avoid a crash, because Darwin
13118 has a different PIC label for each routine but the DWARF debugging
13119 information is not associated with any particular routine, so it's
13120 necessary to remove references to the PIC label from RTL stored by
13121 the DWARF output code.
13122
13123 This helper is used in the normal ix86_delegitimize_address
13124 entrypoint (e.g. used in the target delegitimization hook) and
13125 in ix86_find_base_term. As compile time memory optimization, we
13126 avoid allocating rtxes that will not change anything on the outcome
13127 of the callers (find_base_value and find_base_term). */
13128
13129static inline rtx
13130ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13131{
13132 rtx orig_x = delegitimize_mem_from_attrs (x);
13133 /* addend is NULL or some rtx if x is something+GOTOFF where
13134 something doesn't include the PIC register. */
13135 rtx addend = NULL_RTX;
13136 /* reg_addend is NULL or a multiple of some register. */
13137 rtx reg_addend = NULL_RTX;
13138 /* const_addend is NULL or a const_int. */
13139 rtx const_addend = NULL_RTX;
13140 /* This is the result, or NULL. */
13141 rtx result = NULL_RTX;
13142
13143 x = orig_x;
13144
13145 if (MEM_P (x))
13146 x = XEXP (x, 0);
13147
13148 if (TARGET_64BIT)
13149 {
13150 if (GET_CODE (x) == CONST
13151 && GET_CODE (XEXP (x, 0)) == PLUS
13152 && GET_MODE (XEXP (x, 0)) == Pmode
13153 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13155 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13156 {
13157 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13158 base. A CONST can't be arg_pointer_rtx based. */
13159 if (base_term_p && MEM_P (orig_x))
13160 return orig_x;
13161 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13162 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13163 if (MEM_P (orig_x))
13164 x = replace_equiv_address_nv (orig_x, x);
13165 return x;
13166 }
13167
13168 if (GET_CODE (x) == CONST
13169 && GET_CODE (XEXP (x, 0)) == UNSPEC
13170 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13171 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13172 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13173 {
13174 x = XVECEXP (XEXP (x, 0), 0, 0);
13175 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13176 {
13177 x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x));
13178 if (x == NULL_RTX)
13179 return orig_x;
13180 }
13181 return x;
13182 }
13183
13184 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13185 return ix86_delegitimize_tls_address (orig_x);
13186
13187 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13188 and -mcmodel=medium -fpic. */
13189 }
13190
13191 if (GET_CODE (x) != PLUS
13192 || GET_CODE (XEXP (x, 1)) != CONST)
13193 return ix86_delegitimize_tls_address (orig_x);
13194
13195 if (ix86_pic_register_p (XEXP (x, 0)))
13196 /* %ebx + GOT/GOTOFF */
13197 ;
13198 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13199 {
13200 /* %ebx + %reg * scale + GOT/GOTOFF */
13201 reg_addend = XEXP (x, 0);
13202 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13203 reg_addend = XEXP (reg_addend, 1);
13204 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13205 reg_addend = XEXP (reg_addend, 0);
13206 else
13207 {
13208 reg_addend = NULL_RTX;
13209 addend = XEXP (x, 0);
13210 }
13211 }
13212 else
13213 addend = XEXP (x, 0);
13214
13215 x = XEXP (XEXP (x, 1), 0);
13216 if (GET_CODE (x) == PLUS
13217 && CONST_INT_P (XEXP (x, 1)))
13218 {
13219 const_addend = XEXP (x, 1);
13220 x = XEXP (x, 0);
13221 }
13222
13223 if (GET_CODE (x) == UNSPEC
13224 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13225 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13226 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13227 && !MEM_P (orig_x) && !addend)))
13228 result = XVECEXP (x, 0, 0);
13229
13230 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x)
13231 && !MEM_P (orig_x))
13232 result = XVECEXP (x, 0, 0);
13233
13234 if (! result)
13235 return ix86_delegitimize_tls_address (orig_x);
13236
13237 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13238 recurse on the first operand. */
13239 if (const_addend && !base_term_p)
13240 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13241 if (reg_addend)
13242 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13243 if (addend)
13244 {
13245 /* If the rest of original X doesn't involve the PIC register, add
13246 addend and subtract pic_offset_table_rtx. This can happen e.g.
13247 for code like:
13248 leal (%ebx, %ecx, 4), %ecx
13249 ...
13250 movl foo@GOTOFF(%ecx), %edx
13251 in which case we return (%ecx - %ebx) + foo
13252 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13253 and reload has completed. Don't do the latter for debug,
13254 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13255 if (pic_offset_table_rtx
13256 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13257 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13258 pic_offset_table_rtx),
13259 result);
13260 else if (base_term_p
13261 && pic_offset_table_rtx
13262 && !TARGET_MACHO
13263 && !TARGET_VXWORKS_RTP)
13264 {
13265 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13266 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13267 result = gen_rtx_PLUS (Pmode, tmp, result);
13268 }
13269 else
13270 return orig_x;
13271 }
13272 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13273 {
13274 result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode);
13275 if (result == NULL_RTX)
13276 return orig_x;
13277 }
13278 return result;
13279}
13280
13281/* The normal instantiation of the above template. */
13282
13283static rtx
13284ix86_delegitimize_address (rtx x)
13285{
13286 return ix86_delegitimize_address_1 (x, base_term_p: false);
13287}
13288
13289/* If X is a machine specific address (i.e. a symbol or label being
13290 referenced as a displacement from the GOT implemented using an
13291 UNSPEC), then return the base term. Otherwise return X. */
13292
13293rtx
13294ix86_find_base_term (rtx x)
13295{
13296 rtx term;
13297
13298 if (TARGET_64BIT)
13299 {
13300 if (GET_CODE (x) != CONST)
13301 return x;
13302 term = XEXP (x, 0);
13303 if (GET_CODE (term) == PLUS
13304 && CONST_INT_P (XEXP (term, 1)))
13305 term = XEXP (term, 0);
13306 if (GET_CODE (term) != UNSPEC
13307 || (XINT (term, 1) != UNSPEC_GOTPCREL
13308 && XINT (term, 1) != UNSPEC_PCREL))
13309 return x;
13310
13311 return XVECEXP (term, 0, 0);
13312 }
13313
13314 return ix86_delegitimize_address_1 (x, base_term_p: true);
13315}
13316
13317/* Return true if X shouldn't be emitted into the debug info.
13318 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13319 symbol easily into the .debug_info section, so we need not to
13320 delegitimize, but instead assemble as @gotoff.
13321 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13322 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13323
13324static bool
13325ix86_const_not_ok_for_debug_p (rtx x)
13326{
13327 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13328 return true;
13329
13330 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13331 return true;
13332
13333 return false;
13334}
13335
13336static void
13337put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13338 bool fp, FILE *file)
13339{
13340 const char *suffix;
13341
13342 if (mode == CCFPmode)
13343 {
13344 code = ix86_fp_compare_code_to_integer (code);
13345 mode = CCmode;
13346 }
13347 if (reverse)
13348 code = reverse_condition (code);
13349
13350 switch (code)
13351 {
13352 case EQ:
13353 gcc_assert (mode != CCGZmode);
13354 switch (mode)
13355 {
13356 case E_CCAmode:
13357 suffix = "a";
13358 break;
13359 case E_CCCmode:
13360 suffix = "c";
13361 break;
13362 case E_CCOmode:
13363 suffix = "o";
13364 break;
13365 case E_CCPmode:
13366 suffix = "p";
13367 break;
13368 case E_CCSmode:
13369 suffix = "s";
13370 break;
13371 default:
13372 suffix = "e";
13373 break;
13374 }
13375 break;
13376 case NE:
13377 gcc_assert (mode != CCGZmode);
13378 switch (mode)
13379 {
13380 case E_CCAmode:
13381 suffix = "na";
13382 break;
13383 case E_CCCmode:
13384 suffix = "nc";
13385 break;
13386 case E_CCOmode:
13387 suffix = "no";
13388 break;
13389 case E_CCPmode:
13390 suffix = "np";
13391 break;
13392 case E_CCSmode:
13393 suffix = "ns";
13394 break;
13395 default:
13396 suffix = "ne";
13397 break;
13398 }
13399 break;
13400 case GT:
13401 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13402 suffix = "g";
13403 break;
13404 case GTU:
13405 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13406 Those same assemblers have the same but opposite lossage on cmov. */
13407 if (mode == CCmode)
13408 suffix = fp ? "nbe" : "a";
13409 else
13410 gcc_unreachable ();
13411 break;
13412 case LT:
13413 switch (mode)
13414 {
13415 case E_CCNOmode:
13416 case E_CCGOCmode:
13417 suffix = "s";
13418 break;
13419
13420 case E_CCmode:
13421 case E_CCGCmode:
13422 case E_CCGZmode:
13423 suffix = "l";
13424 break;
13425
13426 default:
13427 gcc_unreachable ();
13428 }
13429 break;
13430 case LTU:
13431 if (mode == CCmode || mode == CCGZmode)
13432 suffix = "b";
13433 else if (mode == CCCmode)
13434 suffix = fp ? "b" : "c";
13435 else
13436 gcc_unreachable ();
13437 break;
13438 case GE:
13439 switch (mode)
13440 {
13441 case E_CCNOmode:
13442 case E_CCGOCmode:
13443 suffix = "ns";
13444 break;
13445
13446 case E_CCmode:
13447 case E_CCGCmode:
13448 case E_CCGZmode:
13449 suffix = "ge";
13450 break;
13451
13452 default:
13453 gcc_unreachable ();
13454 }
13455 break;
13456 case GEU:
13457 if (mode == CCmode || mode == CCGZmode)
13458 suffix = "nb";
13459 else if (mode == CCCmode)
13460 suffix = fp ? "nb" : "nc";
13461 else
13462 gcc_unreachable ();
13463 break;
13464 case LE:
13465 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13466 suffix = "le";
13467 break;
13468 case LEU:
13469 if (mode == CCmode)
13470 suffix = "be";
13471 else
13472 gcc_unreachable ();
13473 break;
13474 case UNORDERED:
13475 suffix = fp ? "u" : "p";
13476 break;
13477 case ORDERED:
13478 suffix = fp ? "nu" : "np";
13479 break;
13480 default:
13481 gcc_unreachable ();
13482 }
13483 fputs (s: suffix, stream: file);
13484}
13485
13486/* Print the name of register X to FILE based on its machine mode and number.
13487 If CODE is 'w', pretend the mode is HImode.
13488 If CODE is 'b', pretend the mode is QImode.
13489 If CODE is 'k', pretend the mode is SImode.
13490 If CODE is 'q', pretend the mode is DImode.
13491 If CODE is 'x', pretend the mode is V4SFmode.
13492 If CODE is 't', pretend the mode is V8SFmode.
13493 If CODE is 'g', pretend the mode is V16SFmode.
13494 If CODE is 'h', pretend the reg is the 'high' byte register.
13495 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13496 If CODE is 'd', duplicate the operand for AVX instruction.
13497 If CODE is 'V', print naked full integer register name without %.
13498 */
13499
13500void
13501print_reg (rtx x, int code, FILE *file)
13502{
13503 const char *reg;
13504 int msize;
13505 unsigned int regno;
13506 bool duplicated;
13507
13508 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13509 putc (c: '%', stream: file);
13510
13511 if (x == pc_rtx)
13512 {
13513 gcc_assert (TARGET_64BIT);
13514 fputs (s: "rip", stream: file);
13515 return;
13516 }
13517
13518 if (code == 'y' && STACK_TOP_P (x))
13519 {
13520 fputs (s: "st(0)", stream: file);
13521 return;
13522 }
13523
13524 if (code == 'w')
13525 msize = 2;
13526 else if (code == 'b')
13527 msize = 1;
13528 else if (code == 'k')
13529 msize = 4;
13530 else if (code == 'q')
13531 msize = 8;
13532 else if (code == 'h')
13533 msize = 0;
13534 else if (code == 'x')
13535 msize = 16;
13536 else if (code == 't')
13537 msize = 32;
13538 else if (code == 'g')
13539 msize = 64;
13540 else
13541 msize = GET_MODE_SIZE (GET_MODE (x));
13542
13543 regno = REGNO (x);
13544
13545 if (regno == ARG_POINTER_REGNUM
13546 || regno == FRAME_POINTER_REGNUM
13547 || regno == FPSR_REG)
13548 {
13549 output_operand_lossage
13550 ("invalid use of register '%s'", reg_names[regno]);
13551 return;
13552 }
13553 else if (regno == FLAGS_REG)
13554 {
13555 output_operand_lossage ("invalid use of asm flag output");
13556 return;
13557 }
13558
13559 if (code == 'V')
13560 {
13561 if (GENERAL_REGNO_P (regno))
13562 msize = GET_MODE_SIZE (word_mode);
13563 else
13564 error ("%<V%> modifier on non-integer register");
13565 }
13566
13567 duplicated = code == 'd' && TARGET_AVX;
13568
13569 switch (msize)
13570 {
13571 case 16:
13572 case 12:
13573 case 8:
13574 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13575 warning (0, "unsupported size for integer register");
13576 /* FALLTHRU */
13577 case 4:
13578 if (LEGACY_INT_REGNO_P (regno))
13579 putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file);
13580 /* FALLTHRU */
13581 case 2:
13582 normal:
13583 reg = hi_reg_name[regno];
13584 break;
13585 case 1:
13586 if (regno >= ARRAY_SIZE (qi_reg_name))
13587 goto normal;
13588 if (!ANY_QI_REGNO_P (regno))
13589 error ("unsupported size for integer register");
13590 reg = qi_reg_name[regno];
13591 break;
13592 case 0:
13593 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13594 goto normal;
13595 reg = qi_high_reg_name[regno];
13596 break;
13597 case 32:
13598 case 64:
13599 if (SSE_REGNO_P (regno))
13600 {
13601 gcc_assert (!duplicated);
13602 putc (c: msize == 32 ? 'y' : 'z', stream: file);
13603 reg = hi_reg_name[regno] + 1;
13604 break;
13605 }
13606 goto normal;
13607 default:
13608 gcc_unreachable ();
13609 }
13610
13611 fputs (s: reg, stream: file);
13612
13613 /* Irritatingly, AMD extended registers use
13614 different naming convention: "r%d[bwd]" */
13615 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13616 {
13617 gcc_assert (TARGET_64BIT);
13618 switch (msize)
13619 {
13620 case 0:
13621 error ("extended registers have no high halves");
13622 break;
13623 case 1:
13624 putc (c: 'b', stream: file);
13625 break;
13626 case 2:
13627 putc (c: 'w', stream: file);
13628 break;
13629 case 4:
13630 putc (c: 'd', stream: file);
13631 break;
13632 case 8:
13633 /* no suffix */
13634 break;
13635 default:
13636 error ("unsupported operand size for extended register");
13637 break;
13638 }
13639 return;
13640 }
13641
13642 if (duplicated)
13643 {
13644 if (ASSEMBLER_DIALECT == ASM_ATT)
13645 fprintf (stream: file, format: ", %%%s", reg);
13646 else
13647 fprintf (stream: file, format: ", %s", reg);
13648 }
13649}
13650
13651/* Meaning of CODE:
13652 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13653 C -- print opcode suffix for set/cmov insn.
13654 c -- like C, but print reversed condition
13655 F,f -- likewise, but for floating-point.
13656 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13657 otherwise nothing
13658 R -- print embedded rounding and sae.
13659 r -- print only sae.
13660 z -- print the opcode suffix for the size of the current operand.
13661 Z -- likewise, with special suffixes for x87 instructions.
13662 * -- print a star (in certain assembler syntax)
13663 A -- print an absolute memory reference.
13664 E -- print address with DImode register names if TARGET_64BIT.
13665 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13666 s -- print a shift double count, followed by the assemblers argument
13667 delimiter.
13668 b -- print the QImode name of the register for the indicated operand.
13669 %b0 would print %al if operands[0] is reg 0.
13670 w -- likewise, print the HImode name of the register.
13671 k -- likewise, print the SImode name of the register.
13672 q -- likewise, print the DImode name of the register.
13673 x -- likewise, print the V4SFmode name of the register.
13674 t -- likewise, print the V8SFmode name of the register.
13675 g -- likewise, print the V16SFmode name of the register.
13676 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13677 y -- print "st(0)" instead of "st" as a register.
13678 d -- print duplicated register operand for AVX instruction.
13679 D -- print condition for SSE cmp instruction.
13680 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13681 address from GOT.
13682 p -- print raw symbol name.
13683 X -- don't print any sort of PIC '@' suffix for a symbol.
13684 & -- print some in-use local-dynamic symbol name.
13685 H -- print a memory address offset by 8; used for sse high-parts
13686 Y -- print condition for XOP pcom* instruction.
13687 V -- print naked full integer register name without %.
13688 + -- print a branch hint as 'cs' or 'ds' prefix
13689 ; -- print a semicolon (after prefixes due to bug in older gas).
13690 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13691 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13692 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13693 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13694 N -- print maskz if it's constant 0 operand.
13695 */
13696
13697void
13698ix86_print_operand (FILE *file, rtx x, int code)
13699{
13700 if (code)
13701 {
13702 switch (code)
13703 {
13704 case 'A':
13705 switch (ASSEMBLER_DIALECT)
13706 {
13707 case ASM_ATT:
13708 putc (c: '*', stream: file);
13709 break;
13710
13711 case ASM_INTEL:
13712 /* Intel syntax. For absolute addresses, registers should not
13713 be surrounded by braces. */
13714 if (!REG_P (x))
13715 {
13716 putc (c: '[', stream: file);
13717 ix86_print_operand (file, x, code: 0);
13718 putc (c: ']', stream: file);
13719 return;
13720 }
13721 break;
13722
13723 default:
13724 gcc_unreachable ();
13725 }
13726
13727 ix86_print_operand (file, x, code: 0);
13728 return;
13729
13730 case 'E':
13731 /* Wrap address in an UNSPEC to declare special handling. */
13732 if (TARGET_64BIT)
13733 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13734
13735 output_address (VOIDmode, x);
13736 return;
13737
13738 case 'L':
13739 if (ASSEMBLER_DIALECT == ASM_ATT)
13740 putc (c: 'l', stream: file);
13741 return;
13742
13743 case 'W':
13744 if (ASSEMBLER_DIALECT == ASM_ATT)
13745 putc (c: 'w', stream: file);
13746 return;
13747
13748 case 'B':
13749 if (ASSEMBLER_DIALECT == ASM_ATT)
13750 putc (c: 'b', stream: file);
13751 return;
13752
13753 case 'Q':
13754 if (ASSEMBLER_DIALECT == ASM_ATT)
13755 putc (c: 'l', stream: file);
13756 return;
13757
13758 case 'S':
13759 if (ASSEMBLER_DIALECT == ASM_ATT)
13760 putc (c: 's', stream: file);
13761 return;
13762
13763 case 'T':
13764 if (ASSEMBLER_DIALECT == ASM_ATT)
13765 putc (c: 't', stream: file);
13766 return;
13767
13768 case 'O':
13769#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13770 if (ASSEMBLER_DIALECT != ASM_ATT)
13771 return;
13772
13773 switch (GET_MODE_SIZE (GET_MODE (x)))
13774 {
13775 case 2:
13776 putc ('w', file);
13777 break;
13778
13779 case 4:
13780 putc ('l', file);
13781 break;
13782
13783 case 8:
13784 putc ('q', file);
13785 break;
13786
13787 default:
13788 output_operand_lossage ("invalid operand size for operand "
13789 "code 'O'");
13790 return;
13791 }
13792
13793 putc ('.', file);
13794#endif
13795 return;
13796
13797 case 'z':
13798 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13799 {
13800 /* Opcodes don't get size suffixes if using Intel opcodes. */
13801 if (ASSEMBLER_DIALECT == ASM_INTEL)
13802 return;
13803
13804 switch (GET_MODE_SIZE (GET_MODE (x)))
13805 {
13806 case 1:
13807 putc (c: 'b', stream: file);
13808 return;
13809
13810 case 2:
13811 putc (c: 'w', stream: file);
13812 return;
13813
13814 case 4:
13815 putc (c: 'l', stream: file);
13816 return;
13817
13818 case 8:
13819 putc (c: 'q', stream: file);
13820 return;
13821
13822 default:
13823 output_operand_lossage ("invalid operand size for operand "
13824 "code 'z'");
13825 return;
13826 }
13827 }
13828
13829 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13830 {
13831 if (this_is_asm_operands)
13832 warning_for_asm (this_is_asm_operands,
13833 "non-integer operand used with operand code %<z%>");
13834 else
13835 warning (0, "non-integer operand used with operand code %<z%>");
13836 }
13837 /* FALLTHRU */
13838
13839 case 'Z':
13840 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13841 if (ASSEMBLER_DIALECT == ASM_INTEL)
13842 return;
13843
13844 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13845 {
13846 switch (GET_MODE_SIZE (GET_MODE (x)))
13847 {
13848 case 2:
13849#ifdef HAVE_AS_IX86_FILDS
13850 putc (c: 's', stream: file);
13851#endif
13852 return;
13853
13854 case 4:
13855 putc (c: 'l', stream: file);
13856 return;
13857
13858 case 8:
13859#ifdef HAVE_AS_IX86_FILDQ
13860 putc (c: 'q', stream: file);
13861#else
13862 fputs ("ll", file);
13863#endif
13864 return;
13865
13866 default:
13867 break;
13868 }
13869 }
13870 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13871 {
13872 /* 387 opcodes don't get size suffixes
13873 if the operands are registers. */
13874 if (STACK_REG_P (x))
13875 return;
13876
13877 switch (GET_MODE_SIZE (GET_MODE (x)))
13878 {
13879 case 4:
13880 putc (c: 's', stream: file);
13881 return;
13882
13883 case 8:
13884 putc (c: 'l', stream: file);
13885 return;
13886
13887 case 12:
13888 case 16:
13889 putc (c: 't', stream: file);
13890 return;
13891
13892 default:
13893 break;
13894 }
13895 }
13896 else
13897 {
13898 output_operand_lossage ("invalid operand type used with "
13899 "operand code '%c'", code);
13900 return;
13901 }
13902
13903 output_operand_lossage ("invalid operand size for operand code '%c'",
13904 code);
13905 return;
13906
13907 case 'd':
13908 case 'b':
13909 case 'w':
13910 case 'k':
13911 case 'q':
13912 case 'h':
13913 case 't':
13914 case 'g':
13915 case 'y':
13916 case 'x':
13917 case 'X':
13918 case 'P':
13919 case 'p':
13920 case 'V':
13921 break;
13922
13923 case 's':
13924 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13925 {
13926 ix86_print_operand (file, x, code: 0);
13927 fputs (s: ", ", stream: file);
13928 }
13929 return;
13930
13931 case 'Y':
13932 switch (GET_CODE (x))
13933 {
13934 case NE:
13935 fputs (s: "neq", stream: file);
13936 break;
13937 case EQ:
13938 fputs (s: "eq", stream: file);
13939 break;
13940 case GE:
13941 case GEU:
13942 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", stream: file);
13943 break;
13944 case GT:
13945 case GTU:
13946 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", stream: file);
13947 break;
13948 case LE:
13949 case LEU:
13950 fputs (s: "le", stream: file);
13951 break;
13952 case LT:
13953 case LTU:
13954 fputs (s: "lt", stream: file);
13955 break;
13956 case UNORDERED:
13957 fputs (s: "unord", stream: file);
13958 break;
13959 case ORDERED:
13960 fputs (s: "ord", stream: file);
13961 break;
13962 case UNEQ:
13963 fputs (s: "ueq", stream: file);
13964 break;
13965 case UNGE:
13966 fputs (s: "nlt", stream: file);
13967 break;
13968 case UNGT:
13969 fputs (s: "nle", stream: file);
13970 break;
13971 case UNLE:
13972 fputs (s: "ule", stream: file);
13973 break;
13974 case UNLT:
13975 fputs (s: "ult", stream: file);
13976 break;
13977 case LTGT:
13978 fputs (s: "une", stream: file);
13979 break;
13980 default:
13981 output_operand_lossage ("operand is not a condition code, "
13982 "invalid operand code 'Y'");
13983 return;
13984 }
13985 return;
13986
13987 case 'D':
13988 /* Little bit of braindamage here. The SSE compare instructions
13989 does use completely different names for the comparisons that the
13990 fp conditional moves. */
13991 switch (GET_CODE (x))
13992 {
13993 case UNEQ:
13994 if (TARGET_AVX)
13995 {
13996 fputs (s: "eq_us", stream: file);
13997 break;
13998 }
13999 /* FALLTHRU */
14000 case EQ:
14001 fputs (s: "eq", stream: file);
14002 break;
14003 case UNLT:
14004 if (TARGET_AVX)
14005 {
14006 fputs (s: "nge", stream: file);
14007 break;
14008 }
14009 /* FALLTHRU */
14010 case LT:
14011 fputs (s: "lt", stream: file);
14012 break;
14013 case UNLE:
14014 if (TARGET_AVX)
14015 {
14016 fputs (s: "ngt", stream: file);
14017 break;
14018 }
14019 /* FALLTHRU */
14020 case LE:
14021 fputs (s: "le", stream: file);
14022 break;
14023 case UNORDERED:
14024 fputs (s: "unord", stream: file);
14025 break;
14026 case LTGT:
14027 if (TARGET_AVX)
14028 {
14029 fputs (s: "neq_oq", stream: file);
14030 break;
14031 }
14032 /* FALLTHRU */
14033 case NE:
14034 fputs (s: "neq", stream: file);
14035 break;
14036 case GE:
14037 if (TARGET_AVX)
14038 {
14039 fputs (s: "ge", stream: file);
14040 break;
14041 }
14042 /* FALLTHRU */
14043 case UNGE:
14044 fputs (s: "nlt", stream: file);
14045 break;
14046 case GT:
14047 if (TARGET_AVX)
14048 {
14049 fputs (s: "gt", stream: file);
14050 break;
14051 }
14052 /* FALLTHRU */
14053 case UNGT:
14054 fputs (s: "nle", stream: file);
14055 break;
14056 case ORDERED:
14057 fputs (s: "ord", stream: file);
14058 break;
14059 default:
14060 output_operand_lossage ("operand is not a condition code, "
14061 "invalid operand code 'D'");
14062 return;
14063 }
14064 return;
14065
14066 case 'F':
14067 case 'f':
14068#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14069 if (ASSEMBLER_DIALECT == ASM_ATT)
14070 putc ('.', file);
14071 gcc_fallthrough ();
14072#endif
14073
14074 case 'C':
14075 case 'c':
14076 if (!COMPARISON_P (x))
14077 {
14078 output_operand_lossage ("operand is not a condition code, "
14079 "invalid operand code '%c'", code);
14080 return;
14081 }
14082 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14083 reverse: code == 'c' || code == 'f',
14084 fp: code == 'F' || code == 'f',
14085 file);
14086 return;
14087
14088 case 'H':
14089 if (!offsettable_memref_p (x))
14090 {
14091 output_operand_lossage ("operand is not an offsettable memory "
14092 "reference, invalid operand code 'H'");
14093 return;
14094 }
14095 /* It doesn't actually matter what mode we use here, as we're
14096 only going to use this for printing. */
14097 x = adjust_address_nv (x, DImode, 8);
14098 /* Output 'qword ptr' for intel assembler dialect. */
14099 if (ASSEMBLER_DIALECT == ASM_INTEL)
14100 code = 'q';
14101 break;
14102
14103 case 'K':
14104 if (!CONST_INT_P (x))
14105 {
14106 output_operand_lossage ("operand is not an integer, invalid "
14107 "operand code 'K'");
14108 return;
14109 }
14110
14111 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14112#ifdef HAVE_AS_IX86_HLE
14113 fputs (s: "xacquire ", stream: file);
14114#else
14115 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14116#endif
14117 else if (INTVAL (x) & IX86_HLE_RELEASE)
14118#ifdef HAVE_AS_IX86_HLE
14119 fputs (s: "xrelease ", stream: file);
14120#else
14121 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14122#endif
14123 /* We do not want to print value of the operand. */
14124 return;
14125
14126 case 'N':
14127 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14128 fputs (s: "{z}", stream: file);
14129 return;
14130
14131 case 'r':
14132 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14133 {
14134 output_operand_lossage ("operand is not a specific integer, "
14135 "invalid operand code 'r'");
14136 return;
14137 }
14138
14139 if (ASSEMBLER_DIALECT == ASM_INTEL)
14140 fputs (s: ", ", stream: file);
14141
14142 fputs (s: "{sae}", stream: file);
14143
14144 if (ASSEMBLER_DIALECT == ASM_ATT)
14145 fputs (s: ", ", stream: file);
14146
14147 return;
14148
14149 case 'R':
14150 if (!CONST_INT_P (x))
14151 {
14152 output_operand_lossage ("operand is not an integer, invalid "
14153 "operand code 'R'");
14154 return;
14155 }
14156
14157 if (ASSEMBLER_DIALECT == ASM_INTEL)
14158 fputs (s: ", ", stream: file);
14159
14160 switch (INTVAL (x))
14161 {
14162 case ROUND_NEAREST_INT | ROUND_SAE:
14163 fputs (s: "{rn-sae}", stream: file);
14164 break;
14165 case ROUND_NEG_INF | ROUND_SAE:
14166 fputs (s: "{rd-sae}", stream: file);
14167 break;
14168 case ROUND_POS_INF | ROUND_SAE:
14169 fputs (s: "{ru-sae}", stream: file);
14170 break;
14171 case ROUND_ZERO | ROUND_SAE:
14172 fputs (s: "{rz-sae}", stream: file);
14173 break;
14174 default:
14175 output_operand_lossage ("operand is not a specific integer, "
14176 "invalid operand code 'R'");
14177 }
14178
14179 if (ASSEMBLER_DIALECT == ASM_ATT)
14180 fputs (s: ", ", stream: file);
14181
14182 return;
14183
14184 case '*':
14185 if (ASSEMBLER_DIALECT == ASM_ATT)
14186 putc (c: '*', stream: file);
14187 return;
14188
14189 case '&':
14190 {
14191 const char *name = get_some_local_dynamic_name ();
14192 if (name == NULL)
14193 output_operand_lossage ("'%%&' used without any "
14194 "local dynamic TLS references");
14195 else
14196 assemble_name (file, name);
14197 return;
14198 }
14199
14200 case '+':
14201 {
14202 rtx x;
14203
14204 if (!optimize
14205 || optimize_function_for_size_p (cfun)
14206 || !TARGET_BRANCH_PREDICTION_HINTS)
14207 return;
14208
14209 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14210 if (x)
14211 {
14212 int pred_val = profile_probability::from_reg_br_prob_note
14213 (XINT (x, 0)).to_reg_br_prob_base ();
14214
14215 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14216 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14217 {
14218 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14219 bool cputaken
14220 = final_forward_branch_p (current_output_insn) == 0;
14221
14222 /* Emit hints only in the case default branch prediction
14223 heuristics would fail. */
14224 if (taken != cputaken)
14225 {
14226 /* We use 3e (DS) prefix for taken branches and
14227 2e (CS) prefix for not taken branches. */
14228 if (taken)
14229 fputs (s: "ds ; ", stream: file);
14230 else
14231 fputs (s: "cs ; ", stream: file);
14232 }
14233 }
14234 }
14235 return;
14236 }
14237
14238 case ';':
14239#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14240 putc (';', file);
14241#endif
14242 return;
14243
14244 case '~':
14245 putc (TARGET_AVX2 ? 'i' : 'f', stream: file);
14246 return;
14247
14248 case 'M':
14249 if (TARGET_X32)
14250 {
14251 /* NB: 32-bit indices in VSIB address are sign-extended
14252 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14253 sign-extended to 0xfffffffff7fa3010 which is invalid
14254 address. Add addr32 prefix if there is no base
14255 register nor symbol. */
14256 bool ok;
14257 struct ix86_address parts;
14258 ok = ix86_decompose_address (addr: x, out: &parts);
14259 gcc_assert (ok && parts.index == NULL_RTX);
14260 if (parts.base == NULL_RTX
14261 && (parts.disp == NULL_RTX
14262 || !symbolic_operand (parts.disp,
14263 GET_MODE (parts.disp))))
14264 fputs (s: "addr32 ", stream: file);
14265 }
14266 return;
14267
14268 case '^':
14269 if (TARGET_64BIT && Pmode != word_mode)
14270 fputs (s: "addr32 ", stream: file);
14271 return;
14272
14273 case '!':
14274 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14275 fputs (s: "notrack ", stream: file);
14276 return;
14277
14278 default:
14279 output_operand_lossage ("invalid operand code '%c'", code);
14280 }
14281 }
14282
14283 if (REG_P (x))
14284 print_reg (x, code, file);
14285
14286 else if (MEM_P (x))
14287 {
14288 rtx addr = XEXP (x, 0);
14289
14290 /* No `byte ptr' prefix for call instructions ... */
14291 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14292 {
14293 machine_mode mode = GET_MODE (x);
14294 const char *size;
14295
14296 /* Check for explicit size override codes. */
14297 if (code == 'b')
14298 size = "BYTE";
14299 else if (code == 'w')
14300 size = "WORD";
14301 else if (code == 'k')
14302 size = "DWORD";
14303 else if (code == 'q')
14304 size = "QWORD";
14305 else if (code == 'x')
14306 size = "XMMWORD";
14307 else if (code == 't')
14308 size = "YMMWORD";
14309 else if (code == 'g')
14310 size = "ZMMWORD";
14311 else if (mode == BLKmode)
14312 /* ... or BLKmode operands, when not overridden. */
14313 size = NULL;
14314 else
14315 switch (GET_MODE_SIZE (mode))
14316 {
14317 case 1: size = "BYTE"; break;
14318 case 2: size = "WORD"; break;
14319 case 4: size = "DWORD"; break;
14320 case 8: size = "QWORD"; break;
14321 case 12: size = "TBYTE"; break;
14322 case 16:
14323 if (mode == XFmode)
14324 size = "TBYTE";
14325 else
14326 size = "XMMWORD";
14327 break;
14328 case 32: size = "YMMWORD"; break;
14329 case 64: size = "ZMMWORD"; break;
14330 default:
14331 gcc_unreachable ();
14332 }
14333 if (size)
14334 {
14335 fputs (s: size, stream: file);
14336 fputs (s: " PTR ", stream: file);
14337 }
14338 }
14339
14340 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14341 output_operand_lossage ("invalid constraints for operand");
14342 else
14343 ix86_print_operand_address_as
14344 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14345 }
14346
14347 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14348 {
14349 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14350 REAL_MODE_FORMAT (HFmode));
14351 if (ASSEMBLER_DIALECT == ASM_ATT)
14352 putc (c: '$', stream: file);
14353 fprintf (stream: file, format: "0x%04x", (unsigned int) l);
14354 }
14355
14356 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14357 {
14358 long l;
14359
14360 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14361
14362 if (ASSEMBLER_DIALECT == ASM_ATT)
14363 putc (c: '$', stream: file);
14364 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14365 if (code == 'q')
14366 fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x",
14367 (unsigned long long) (int) l);
14368 else
14369 fprintf (stream: file, format: "0x%08x", (unsigned int) l);
14370 }
14371
14372 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14373 {
14374 long l[2];
14375
14376 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14377
14378 if (ASSEMBLER_DIALECT == ASM_ATT)
14379 putc (c: '$', stream: file);
14380 fprintf (stream: file, format: "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14381 }
14382
14383 /* These float cases don't actually occur as immediate operands. */
14384 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14385 {
14386 char dstr[30];
14387
14388 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14389 fputs (s: dstr, stream: file);
14390 }
14391
14392 /* Print bcst_mem_operand. */
14393 else if (GET_CODE (x) == VEC_DUPLICATE)
14394 {
14395 machine_mode vmode = GET_MODE (x);
14396 /* Must be bcst_memory_operand. */
14397 gcc_assert (bcst_mem_operand (x, vmode));
14398
14399 rtx mem = XEXP (x,0);
14400 ix86_print_operand (file, x: mem, code: 0);
14401
14402 switch (vmode)
14403 {
14404 case E_V2DImode:
14405 case E_V2DFmode:
14406 fputs (s: "{1to2}", stream: file);
14407 break;
14408 case E_V4SImode:
14409 case E_V4SFmode:
14410 case E_V4DImode:
14411 case E_V4DFmode:
14412 fputs (s: "{1to4}", stream: file);
14413 break;
14414 case E_V8SImode:
14415 case E_V8SFmode:
14416 case E_V8DFmode:
14417 case E_V8DImode:
14418 case E_V8HFmode:
14419 fputs (s: "{1to8}", stream: file);
14420 break;
14421 case E_V16SFmode:
14422 case E_V16SImode:
14423 case E_V16HFmode:
14424 fputs (s: "{1to16}", stream: file);
14425 break;
14426 case E_V32HFmode:
14427 fputs (s: "{1to32}", stream: file);
14428 break;
14429 default:
14430 gcc_unreachable ();
14431 }
14432 }
14433
14434 else
14435 {
14436 /* We have patterns that allow zero sets of memory, for instance.
14437 In 64-bit mode, we should probably support all 8-byte vectors,
14438 since we can in fact encode that into an immediate. */
14439 if (GET_CODE (x) == CONST_VECTOR)
14440 {
14441 if (x != CONST0_RTX (GET_MODE (x)))
14442 output_operand_lossage ("invalid vector immediate");
14443 x = const0_rtx;
14444 }
14445
14446 if (code == 'P')
14447 {
14448 if (ix86_force_load_from_GOT_p (x, call_p: true))
14449 {
14450 /* For inline assembly statement, load function address
14451 from GOT with 'P' operand modifier to avoid PLT. */
14452 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14453 (TARGET_64BIT
14454 ? UNSPEC_GOTPCREL
14455 : UNSPEC_GOT));
14456 x = gen_rtx_CONST (Pmode, x);
14457 x = gen_const_mem (Pmode, x);
14458 ix86_print_operand (file, x, code: 'A');
14459 return;
14460 }
14461 }
14462 else if (code != 'p')
14463 {
14464 if (CONST_INT_P (x))
14465 {
14466 if (ASSEMBLER_DIALECT == ASM_ATT)
14467 putc (c: '$', stream: file);
14468 }
14469 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14470 || GET_CODE (x) == LABEL_REF)
14471 {
14472 if (ASSEMBLER_DIALECT == ASM_ATT)
14473 putc (c: '$', stream: file);
14474 else
14475 fputs (s: "OFFSET FLAT:", stream: file);
14476 }
14477 }
14478 if (CONST_INT_P (x))
14479 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14480 else if (flag_pic || MACHOPIC_INDIRECT)
14481 output_pic_addr_const (file, x, code);
14482 else
14483 output_addr_const (file, x);
14484 }
14485}
14486
14487static bool
14488ix86_print_operand_punct_valid_p (unsigned char code)
14489{
14490 return (code == '*' || code == '+' || code == '&' || code == ';'
14491 || code == '~' || code == '^' || code == '!');
14492}
14493
14494/* Print a memory operand whose address is ADDR. */
14495
14496static void
14497ix86_print_operand_address_as (FILE *file, rtx addr,
14498 addr_space_t as, bool raw)
14499{
14500 struct ix86_address parts;
14501 rtx base, index, disp;
14502 int scale;
14503 int ok;
14504 bool vsib = false;
14505 int code = 0;
14506
14507 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14508 {
14509 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14510 gcc_assert (parts.index == NULL_RTX);
14511 parts.index = XVECEXP (addr, 0, 1);
14512 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14513 addr = XVECEXP (addr, 0, 0);
14514 vsib = true;
14515 }
14516 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14517 {
14518 gcc_assert (TARGET_64BIT);
14519 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14520 code = 'q';
14521 }
14522 else
14523 ok = ix86_decompose_address (addr, out: &parts);
14524
14525 gcc_assert (ok);
14526
14527 base = parts.base;
14528 index = parts.index;
14529 disp = parts.disp;
14530 scale = parts.scale;
14531
14532 if (ADDR_SPACE_GENERIC_P (as))
14533 as = parts.seg;
14534 else
14535 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14536
14537 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14538 {
14539 if (ASSEMBLER_DIALECT == ASM_ATT)
14540 putc (c: '%', stream: file);
14541
14542 switch (as)
14543 {
14544 case ADDR_SPACE_SEG_FS:
14545 fputs (s: "fs:", stream: file);
14546 break;
14547 case ADDR_SPACE_SEG_GS:
14548 fputs (s: "gs:", stream: file);
14549 break;
14550 default:
14551 gcc_unreachable ();
14552 }
14553 }
14554
14555 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14556 if (TARGET_64BIT && !base && !index && !raw)
14557 {
14558 rtx symbol = disp;
14559
14560 if (GET_CODE (disp) == CONST
14561 && GET_CODE (XEXP (disp, 0)) == PLUS
14562 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14563 symbol = XEXP (XEXP (disp, 0), 0);
14564
14565 if (GET_CODE (symbol) == LABEL_REF
14566 || (GET_CODE (symbol) == SYMBOL_REF
14567 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14568 base = pc_rtx;
14569 }
14570
14571 if (!base && !index)
14572 {
14573 /* Displacement only requires special attention. */
14574 if (CONST_INT_P (disp))
14575 {
14576 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14577 fputs (s: "ds:", stream: file);
14578 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14579 }
14580 /* Load the external function address via the GOT slot to avoid PLT. */
14581 else if (GET_CODE (disp) == CONST
14582 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14583 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14584 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14585 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14586 output_pic_addr_const (file, x: disp, code: 0);
14587 else if (flag_pic)
14588 output_pic_addr_const (file, x: disp, code: 0);
14589 else
14590 output_addr_const (file, disp);
14591 }
14592 else
14593 {
14594 /* Print SImode register names to force addr32 prefix. */
14595 if (SImode_address_operand (addr, VOIDmode))
14596 {
14597 if (flag_checking)
14598 {
14599 gcc_assert (TARGET_64BIT);
14600 switch (GET_CODE (addr))
14601 {
14602 case SUBREG:
14603 gcc_assert (GET_MODE (addr) == SImode);
14604 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14605 break;
14606 case ZERO_EXTEND:
14607 case AND:
14608 gcc_assert (GET_MODE (addr) == DImode);
14609 break;
14610 default:
14611 gcc_unreachable ();
14612 }
14613 }
14614 gcc_assert (!code);
14615 code = 'k';
14616 }
14617 else if (code == 0
14618 && TARGET_X32
14619 && disp
14620 && CONST_INT_P (disp)
14621 && INTVAL (disp) < -16*1024*1024)
14622 {
14623 /* X32 runs in 64-bit mode, where displacement, DISP, in
14624 address DISP(%r64), is encoded as 32-bit immediate sign-
14625 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14626 address is %r64 + 0xffffffffbffffd00. When %r64 <
14627 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14628 which is invalid for x32. The correct address is %r64
14629 - 0x40000300 == 0xf7ffdd64. To properly encode
14630 -0x40000300(%r64) for x32, we zero-extend negative
14631 displacement by forcing addr32 prefix which truncates
14632 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14633 zero-extend all negative displacements, including -1(%rsp).
14634 However, for small negative displacements, sign-extension
14635 won't cause overflow. We only zero-extend negative
14636 displacements if they < -16*1024*1024, which is also used
14637 to check legitimate address displacements for PIC. */
14638 code = 'k';
14639 }
14640
14641 /* Since the upper 32 bits of RSP are always zero for x32,
14642 we can encode %esp as %rsp to avoid 0x67 prefix if
14643 there is no index register. */
14644 if (TARGET_X32 && Pmode == SImode
14645 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14646 code = 'q';
14647
14648 if (ASSEMBLER_DIALECT == ASM_ATT)
14649 {
14650 if (disp)
14651 {
14652 if (flag_pic)
14653 output_pic_addr_const (file, x: disp, code: 0);
14654 else if (GET_CODE (disp) == LABEL_REF)
14655 output_asm_label (disp);
14656 else
14657 output_addr_const (file, disp);
14658 }
14659
14660 putc (c: '(', stream: file);
14661 if (base)
14662 print_reg (x: base, code, file);
14663 if (index)
14664 {
14665 putc (c: ',', stream: file);
14666 print_reg (x: index, code: vsib ? 0 : code, file);
14667 if (scale != 1 || vsib)
14668 fprintf (stream: file, format: ",%d", scale);
14669 }
14670 putc (c: ')', stream: file);
14671 }
14672 else
14673 {
14674 rtx offset = NULL_RTX;
14675
14676 if (disp)
14677 {
14678 /* Pull out the offset of a symbol; print any symbol itself. */
14679 if (GET_CODE (disp) == CONST
14680 && GET_CODE (XEXP (disp, 0)) == PLUS
14681 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14682 {
14683 offset = XEXP (XEXP (disp, 0), 1);
14684 disp = gen_rtx_CONST (VOIDmode,
14685 XEXP (XEXP (disp, 0), 0));
14686 }
14687
14688 if (flag_pic)
14689 output_pic_addr_const (file, x: disp, code: 0);
14690 else if (GET_CODE (disp) == LABEL_REF)
14691 output_asm_label (disp);
14692 else if (CONST_INT_P (disp))
14693 offset = disp;
14694 else
14695 output_addr_const (file, disp);
14696 }
14697
14698 putc (c: '[', stream: file);
14699 if (base)
14700 {
14701 print_reg (x: base, code, file);
14702 if (offset)
14703 {
14704 if (INTVAL (offset) >= 0)
14705 putc (c: '+', stream: file);
14706 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14707 }
14708 }
14709 else if (offset)
14710 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14711 else
14712 putc (c: '0', stream: file);
14713
14714 if (index)
14715 {
14716 putc (c: '+', stream: file);
14717 print_reg (x: index, code: vsib ? 0 : code, file);
14718 if (scale != 1 || vsib)
14719 fprintf (stream: file, format: "*%d", scale);
14720 }
14721 putc (c: ']', stream: file);
14722 }
14723 }
14724}
14725
14726static void
14727ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14728{
14729 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14730 output_operand_lossage ("invalid constraints for operand");
14731 else
14732 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false);
14733}
14734
14735/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14736
14737static bool
14738i386_asm_output_addr_const_extra (FILE *file, rtx x)
14739{
14740 rtx op;
14741
14742 if (GET_CODE (x) != UNSPEC)
14743 return false;
14744
14745 op = XVECEXP (x, 0, 0);
14746 switch (XINT (x, 1))
14747 {
14748 case UNSPEC_GOTOFF:
14749 output_addr_const (file, op);
14750 fputs (s: "@gotoff", stream: file);
14751 break;
14752 case UNSPEC_GOTTPOFF:
14753 output_addr_const (file, op);
14754 /* FIXME: This might be @TPOFF in Sun ld. */
14755 fputs (s: "@gottpoff", stream: file);
14756 break;
14757 case UNSPEC_TPOFF:
14758 output_addr_const (file, op);
14759 fputs (s: "@tpoff", stream: file);
14760 break;
14761 case UNSPEC_NTPOFF:
14762 output_addr_const (file, op);
14763 if (TARGET_64BIT)
14764 fputs (s: "@tpoff", stream: file);
14765 else
14766 fputs (s: "@ntpoff", stream: file);
14767 break;
14768 case UNSPEC_DTPOFF:
14769 output_addr_const (file, op);
14770 fputs (s: "@dtpoff", stream: file);
14771 break;
14772 case UNSPEC_GOTNTPOFF:
14773 output_addr_const (file, op);
14774 if (TARGET_64BIT)
14775 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14776 "@gottpoff(%rip)" : "@gottpoff[rip]", stream: file);
14777 else
14778 fputs (s: "@gotntpoff", stream: file);
14779 break;
14780 case UNSPEC_INDNTPOFF:
14781 output_addr_const (file, op);
14782 fputs (s: "@indntpoff", stream: file);
14783 break;
14784#if TARGET_MACHO
14785 case UNSPEC_MACHOPIC_OFFSET:
14786 output_addr_const (file, op);
14787 putc ('-', file);
14788 machopic_output_function_base_name (file);
14789 break;
14790#endif
14791
14792 default:
14793 return false;
14794 }
14795
14796 return true;
14797}
14798
14799
14800/* Output code to perform a 387 binary operation in INSN, one of PLUS,
14801 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14802 is the expression of the binary operation. The output may either be
14803 emitted here, or returned to the caller, like all output_* functions.
14804
14805 There is no guarantee that the operands are the same mode, as they
14806 might be within FLOAT or FLOAT_EXTEND expressions. */
14807
14808#ifndef SYSV386_COMPAT
14809/* Set to 1 for compatibility with brain-damaged assemblers. No-one
14810 wants to fix the assemblers because that causes incompatibility
14811 with gcc. No-one wants to fix gcc because that causes
14812 incompatibility with assemblers... You can use the option of
14813 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14814#define SYSV386_COMPAT 1
14815#endif
14816
14817const char *
14818output_387_binary_op (rtx_insn *insn, rtx *operands)
14819{
14820 static char buf[40];
14821 const char *p;
14822 bool is_sse
14823 = (SSE_REG_P (operands[0])
14824 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14825
14826 if (is_sse)
14827 p = "%v";
14828 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14830 p = "fi";
14831 else
14832 p = "f";
14833
14834 strcpy (dest: buf, src: p);
14835
14836 switch (GET_CODE (operands[3]))
14837 {
14838 case PLUS:
14839 p = "add"; break;
14840 case MINUS:
14841 p = "sub"; break;
14842 case MULT:
14843 p = "mul"; break;
14844 case DIV:
14845 p = "div"; break;
14846 default:
14847 gcc_unreachable ();
14848 }
14849
14850 strcat (dest: buf, src: p);
14851
14852 if (is_sse)
14853 {
14854 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14855 strcat (dest: buf, src: p);
14856
14857 if (TARGET_AVX)
14858 p = "\t{%2, %1, %0|%0, %1, %2}";
14859 else
14860 p = "\t{%2, %0|%0, %2}";
14861
14862 strcat (dest: buf, src: p);
14863 return buf;
14864 }
14865
14866 /* Even if we do not want to check the inputs, this documents input
14867 constraints. Which helps in understanding the following code. */
14868 if (flag_checking)
14869 {
14870 if (STACK_REG_P (operands[0])
14871 && ((REG_P (operands[1])
14872 && REGNO (operands[0]) == REGNO (operands[1])
14873 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14874 || (REG_P (operands[2])
14875 && REGNO (operands[0]) == REGNO (operands[2])
14876 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14877 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14878 ; /* ok */
14879 else
14880 gcc_unreachable ();
14881 }
14882
14883 switch (GET_CODE (operands[3]))
14884 {
14885 case MULT:
14886 case PLUS:
14887 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14888 std::swap (a&: operands[1], b&: operands[2]);
14889
14890 /* know operands[0] == operands[1]. */
14891
14892 if (MEM_P (operands[2]))
14893 {
14894 p = "%Z2\t%2";
14895 break;
14896 }
14897
14898 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14899 {
14900 if (STACK_TOP_P (operands[0]))
14901 /* How is it that we are storing to a dead operand[2]?
14902 Well, presumably operands[1] is dead too. We can't
14903 store the result to st(0) as st(0) gets popped on this
14904 instruction. Instead store to operands[2] (which I
14905 think has to be st(1)). st(1) will be popped later.
14906 gcc <= 2.8.1 didn't have this check and generated
14907 assembly code that the Unixware assembler rejected. */
14908 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14909 else
14910 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14911 break;
14912 }
14913
14914 if (STACK_TOP_P (operands[0]))
14915 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14916 else
14917 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14918 break;
14919
14920 case MINUS:
14921 case DIV:
14922 if (MEM_P (operands[1]))
14923 {
14924 p = "r%Z1\t%1";
14925 break;
14926 }
14927
14928 if (MEM_P (operands[2]))
14929 {
14930 p = "%Z2\t%2";
14931 break;
14932 }
14933
14934 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14935 {
14936#if SYSV386_COMPAT
14937 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14938 derived assemblers, confusingly reverse the direction of
14939 the operation for fsub{r} and fdiv{r} when the
14940 destination register is not st(0). The Intel assembler
14941 doesn't have this brain damage. Read !SYSV386_COMPAT to
14942 figure out what the hardware really does. */
14943 if (STACK_TOP_P (operands[0]))
14944 p = "{p\t%0, %2|rp\t%2, %0}";
14945 else
14946 p = "{rp\t%2, %0|p\t%0, %2}";
14947#else
14948 if (STACK_TOP_P (operands[0]))
14949 /* As above for fmul/fadd, we can't store to st(0). */
14950 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14951 else
14952 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14953#endif
14954 break;
14955 }
14956
14957 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14958 {
14959#if SYSV386_COMPAT
14960 if (STACK_TOP_P (operands[0]))
14961 p = "{rp\t%0, %1|p\t%1, %0}";
14962 else
14963 p = "{p\t%1, %0|rp\t%0, %1}";
14964#else
14965 if (STACK_TOP_P (operands[0]))
14966 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14967 else
14968 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14969#endif
14970 break;
14971 }
14972
14973 if (STACK_TOP_P (operands[0]))
14974 {
14975 if (STACK_TOP_P (operands[1]))
14976 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14977 else
14978 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14979 break;
14980 }
14981 else if (STACK_TOP_P (operands[1]))
14982 {
14983#if SYSV386_COMPAT
14984 p = "{\t%1, %0|r\t%0, %1}";
14985#else
14986 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14987#endif
14988 }
14989 else
14990 {
14991#if SYSV386_COMPAT
14992 p = "{r\t%2, %0|\t%0, %2}";
14993#else
14994 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14995#endif
14996 }
14997 break;
14998
14999 default:
15000 gcc_unreachable ();
15001 }
15002
15003 strcat (dest: buf, src: p);
15004 return buf;
15005}
15006
15007/* Return needed mode for entity in optimize_mode_switching pass. */
15008
15009static int
15010ix86_dirflag_mode_needed (rtx_insn *insn)
15011{
15012 if (CALL_P (insn))
15013 {
15014 if (cfun->machine->func_type == TYPE_NORMAL)
15015 return X86_DIRFLAG_ANY;
15016 else
15017 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15018 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15019 }
15020
15021 if (recog_memoized (insn) < 0)
15022 return X86_DIRFLAG_ANY;
15023
15024 if (get_attr_type (insn) == TYPE_STR)
15025 {
15026 /* Emit cld instruction if stringops are used in the function. */
15027 if (cfun->machine->func_type == TYPE_NORMAL)
15028 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15029 else
15030 return X86_DIRFLAG_RESET;
15031 }
15032
15033 return X86_DIRFLAG_ANY;
15034}
15035
15036/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15037
15038static bool
15039ix86_check_avx_upper_register (const_rtx exp)
15040{
15041 return (SSE_REG_P (exp)
15042 && !EXT_REX_SSE_REG_P (exp)
15043 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
15044}
15045
15046/* Check if a 256bit or 512bit AVX register is referenced in stores. */
15047
15048static void
15049ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15050{
15051 if (ix86_check_avx_upper_register (exp: dest))
15052 {
15053 bool *used = (bool *) data;
15054 *used = true;
15055 }
15056}
15057
15058/* Return needed mode for entity in optimize_mode_switching pass. */
15059
15060static int
15061ix86_avx_u128_mode_needed (rtx_insn *insn)
15062{
15063 if (DEBUG_INSN_P (insn))
15064 return AVX_U128_ANY;
15065
15066 if (CALL_P (insn))
15067 {
15068 rtx link;
15069
15070 /* Needed mode is set to AVX_U128_CLEAN if there are
15071 no 256bit or 512bit modes used in function arguments. */
15072 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15073 link;
15074 link = XEXP (link, 1))
15075 {
15076 if (GET_CODE (XEXP (link, 0)) == USE)
15077 {
15078 rtx arg = XEXP (XEXP (link, 0), 0);
15079
15080 if (ix86_check_avx_upper_register (exp: arg))
15081 return AVX_U128_DIRTY;
15082 }
15083 }
15084
15085 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15086 nor 512bit registers used in the function return register. */
15087 bool avx_upper_reg_found = false;
15088 note_stores (insn, ix86_check_avx_upper_stores,
15089 &avx_upper_reg_found);
15090 if (avx_upper_reg_found)
15091 return AVX_U128_DIRTY;
15092
15093 /* If the function is known to preserve some SSE registers,
15094 RA and previous passes can legitimately rely on that for
15095 modes wider than 256 bits. It's only safe to issue a
15096 vzeroupper if all SSE registers are clobbered. */
15097 const function_abi &abi = insn_callee_abi (insn);
15098 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15099 /* Should be safe to issue an vzeroupper before sibling_call_p.
15100 Also there not mode_exit for sibling_call, so there could be
15101 missing vzeroupper for that. */
15102 || !(SIBLING_CALL_P (insn)
15103 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15104 y: abi.mode_clobbers (V4DImode))))
15105 return AVX_U128_ANY;
15106
15107 return AVX_U128_CLEAN;
15108 }
15109
15110 subrtx_iterator::array_type array;
15111
15112 rtx set = single_set (insn);
15113 if (set)
15114 {
15115 rtx dest = SET_DEST (set);
15116 rtx src = SET_SRC (set);
15117 if (ix86_check_avx_upper_register (exp: dest))
15118 {
15119 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15120 source isn't zero. */
15121 if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1)
15122 return AVX_U128_DIRTY;
15123 else
15124 return AVX_U128_ANY;
15125 }
15126 else
15127 {
15128 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
15129 if (ix86_check_avx_upper_register (exp: *iter))
15130 return AVX_U128_DIRTY;
15131 }
15132
15133 /* This isn't YMM/ZMM load/store. */
15134 return AVX_U128_ANY;
15135 }
15136
15137 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15138 Hardware changes state only when a 256bit register is written to,
15139 but we need to prevent the compiler from moving optimal insertion
15140 point above eventual read from 256bit or 512 bit register. */
15141 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15142 if (ix86_check_avx_upper_register (exp: *iter))
15143 return AVX_U128_DIRTY;
15144
15145 return AVX_U128_ANY;
15146}
15147
15148/* Return mode that i387 must be switched into
15149 prior to the execution of insn. */
15150
15151static int
15152ix86_i387_mode_needed (int entity, rtx_insn *insn)
15153{
15154 enum attr_i387_cw mode;
15155
15156 /* The mode UNINITIALIZED is used to store control word after a
15157 function call or ASM pattern. The mode ANY specify that function
15158 has no requirements on the control word and make no changes in the
15159 bits we are interested in. */
15160
15161 if (CALL_P (insn)
15162 || (NONJUMP_INSN_P (insn)
15163 && (asm_noperands (PATTERN (insn)) >= 0
15164 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15165 return I387_CW_UNINITIALIZED;
15166
15167 if (recog_memoized (insn) < 0)
15168 return I387_CW_ANY;
15169
15170 mode = get_attr_i387_cw (insn);
15171
15172 switch (entity)
15173 {
15174 case I387_ROUNDEVEN:
15175 if (mode == I387_CW_ROUNDEVEN)
15176 return mode;
15177 break;
15178
15179 case I387_TRUNC:
15180 if (mode == I387_CW_TRUNC)
15181 return mode;
15182 break;
15183
15184 case I387_FLOOR:
15185 if (mode == I387_CW_FLOOR)
15186 return mode;
15187 break;
15188
15189 case I387_CEIL:
15190 if (mode == I387_CW_CEIL)
15191 return mode;
15192 break;
15193
15194 default:
15195 gcc_unreachable ();
15196 }
15197
15198 return I387_CW_ANY;
15199}
15200
15201/* Return mode that entity must be switched into
15202 prior to the execution of insn. */
15203
15204static int
15205ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15206{
15207 switch (entity)
15208 {
15209 case X86_DIRFLAG:
15210 return ix86_dirflag_mode_needed (insn);
15211 case AVX_U128:
15212 return ix86_avx_u128_mode_needed (insn);
15213 case I387_ROUNDEVEN:
15214 case I387_TRUNC:
15215 case I387_FLOOR:
15216 case I387_CEIL:
15217 return ix86_i387_mode_needed (entity, insn);
15218 default:
15219 gcc_unreachable ();
15220 }
15221 return 0;
15222}
15223
15224/* Calculate mode of upper 128bit AVX registers after the insn. */
15225
15226static int
15227ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15228{
15229 rtx pat = PATTERN (insn);
15230
15231 if (vzeroupper_pattern (pat, VOIDmode)
15232 || vzeroall_pattern (pat, VOIDmode))
15233 return AVX_U128_CLEAN;
15234
15235 /* We know that state is clean after CALL insn if there are no
15236 256bit or 512bit registers used in the function return register. */
15237 if (CALL_P (insn))
15238 {
15239 bool avx_upper_reg_found = false;
15240 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15241
15242 if (avx_upper_reg_found)
15243 return AVX_U128_DIRTY;
15244
15245 /* If the function desn't clobber any sse registers or only clobber
15246 128-bit part, Then vzeroupper isn't issued before the function exit.
15247 the status not CLEAN but ANY after the function. */
15248 const function_abi &abi = insn_callee_abi (insn);
15249 if (!(SIBLING_CALL_P (insn)
15250 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15251 y: abi.mode_clobbers (V4DImode))))
15252 return AVX_U128_ANY;
15253
15254 return AVX_U128_CLEAN;
15255 }
15256
15257 /* Otherwise, return current mode. Remember that if insn
15258 references AVX 256bit or 512bit registers, the mode was already
15259 changed to DIRTY from MODE_NEEDED. */
15260 return mode;
15261}
15262
15263/* Return the mode that an insn results in. */
15264
15265static int
15266ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15267{
15268 switch (entity)
15269 {
15270 case X86_DIRFLAG:
15271 return mode;
15272 case AVX_U128:
15273 return ix86_avx_u128_mode_after (mode, insn);
15274 case I387_ROUNDEVEN:
15275 case I387_TRUNC:
15276 case I387_FLOOR:
15277 case I387_CEIL:
15278 return mode;
15279 default:
15280 gcc_unreachable ();
15281 }
15282}
15283
15284static int
15285ix86_dirflag_mode_entry (void)
15286{
15287 /* For TARGET_CLD or in the interrupt handler we can't assume
15288 direction flag state at function entry. */
15289 if (TARGET_CLD
15290 || cfun->machine->func_type != TYPE_NORMAL)
15291 return X86_DIRFLAG_ANY;
15292
15293 return X86_DIRFLAG_RESET;
15294}
15295
15296static int
15297ix86_avx_u128_mode_entry (void)
15298{
15299 tree arg;
15300
15301 /* Entry mode is set to AVX_U128_DIRTY if there are
15302 256bit or 512bit modes used in function arguments. */
15303 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15304 arg = TREE_CHAIN (arg))
15305 {
15306 rtx incoming = DECL_INCOMING_RTL (arg);
15307
15308 if (incoming && ix86_check_avx_upper_register (exp: incoming))
15309 return AVX_U128_DIRTY;
15310 }
15311
15312 return AVX_U128_CLEAN;
15313}
15314
15315/* Return a mode that ENTITY is assumed to be
15316 switched to at function entry. */
15317
15318static int
15319ix86_mode_entry (int entity)
15320{
15321 switch (entity)
15322 {
15323 case X86_DIRFLAG:
15324 return ix86_dirflag_mode_entry ();
15325 case AVX_U128:
15326 return ix86_avx_u128_mode_entry ();
15327 case I387_ROUNDEVEN:
15328 case I387_TRUNC:
15329 case I387_FLOOR:
15330 case I387_CEIL:
15331 return I387_CW_ANY;
15332 default:
15333 gcc_unreachable ();
15334 }
15335}
15336
15337static int
15338ix86_avx_u128_mode_exit (void)
15339{
15340 rtx reg = crtl->return_rtx;
15341
15342 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15343 or 512 bit modes used in the function return register. */
15344 if (reg && ix86_check_avx_upper_register (exp: reg))
15345 return AVX_U128_DIRTY;
15346
15347 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15348 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15349 */
15350 return ix86_avx_u128_mode_entry ();
15351}
15352
15353/* Return a mode that ENTITY is assumed to be
15354 switched to at function exit. */
15355
15356static int
15357ix86_mode_exit (int entity)
15358{
15359 switch (entity)
15360 {
15361 case X86_DIRFLAG:
15362 return X86_DIRFLAG_ANY;
15363 case AVX_U128:
15364 return ix86_avx_u128_mode_exit ();
15365 case I387_ROUNDEVEN:
15366 case I387_TRUNC:
15367 case I387_FLOOR:
15368 case I387_CEIL:
15369 return I387_CW_ANY;
15370 default:
15371 gcc_unreachable ();
15372 }
15373}
15374
15375static int
15376ix86_mode_priority (int, int n)
15377{
15378 return n;
15379}
15380
15381/* Output code to initialize control word copies used by trunc?f?i and
15382 rounding patterns. CURRENT_MODE is set to current control word,
15383 while NEW_MODE is set to new control word. */
15384
15385static void
15386emit_i387_cw_initialization (int mode)
15387{
15388 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15389 rtx new_mode;
15390
15391 enum ix86_stack_slot slot;
15392
15393 rtx reg = gen_reg_rtx (HImode);
15394
15395 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15396 emit_move_insn (reg, copy_rtx (stored_mode));
15397
15398 switch (mode)
15399 {
15400 case I387_CW_ROUNDEVEN:
15401 /* round to nearest */
15402 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15403 slot = SLOT_CW_ROUNDEVEN;
15404 break;
15405
15406 case I387_CW_TRUNC:
15407 /* round toward zero (truncate) */
15408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15409 slot = SLOT_CW_TRUNC;
15410 break;
15411
15412 case I387_CW_FLOOR:
15413 /* round down toward -oo */
15414 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15415 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15416 slot = SLOT_CW_FLOOR;
15417 break;
15418
15419 case I387_CW_CEIL:
15420 /* round up toward +oo */
15421 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15422 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15423 slot = SLOT_CW_CEIL;
15424 break;
15425
15426 default:
15427 gcc_unreachable ();
15428 }
15429
15430 gcc_assert (slot < MAX_386_STACK_LOCALS);
15431
15432 new_mode = assign_386_stack_local (HImode, slot);
15433 emit_move_insn (new_mode, reg);
15434}
15435
15436/* Generate one or more insns to set ENTITY to MODE. */
15437
15438static void
15439ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15440 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15441{
15442 switch (entity)
15443 {
15444 case X86_DIRFLAG:
15445 if (mode == X86_DIRFLAG_RESET)
15446 emit_insn (gen_cld ());
15447 break;
15448 case AVX_U128:
15449 if (mode == AVX_U128_CLEAN)
15450 ix86_expand_avx_vzeroupper ();
15451 break;
15452 case I387_ROUNDEVEN:
15453 case I387_TRUNC:
15454 case I387_FLOOR:
15455 case I387_CEIL:
15456 if (mode != I387_CW_ANY
15457 && mode != I387_CW_UNINITIALIZED)
15458 emit_i387_cw_initialization (mode);
15459 break;
15460 default:
15461 gcc_unreachable ();
15462 }
15463}
15464
15465/* Output code for INSN to convert a float to a signed int. OPERANDS
15466 are the insn operands. The output may be [HSD]Imode and the input
15467 operand may be [SDX]Fmode. */
15468
15469const char *
15470output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15471{
15472 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15473 bool dimode_p = GET_MODE (operands[0]) == DImode;
15474 int round_mode = get_attr_i387_cw (insn);
15475
15476 static char buf[40];
15477 const char *p;
15478
15479 /* Jump through a hoop or two for DImode, since the hardware has no
15480 non-popping instruction. We used to do this a different way, but
15481 that was somewhat fragile and broke with post-reload splitters. */
15482 if ((dimode_p || fisttp) && !stack_top_dies)
15483 output_asm_insn ("fld\t%y1", operands);
15484
15485 gcc_assert (STACK_TOP_P (operands[1]));
15486 gcc_assert (MEM_P (operands[0]));
15487 gcc_assert (GET_MODE (operands[1]) != TFmode);
15488
15489 if (fisttp)
15490 return "fisttp%Z0\t%0";
15491
15492 strcpy (dest: buf, src: "fist");
15493
15494 if (round_mode != I387_CW_ANY)
15495 output_asm_insn ("fldcw\t%3", operands);
15496
15497 p = "p%Z0\t%0";
15498 strcat (dest: buf, src: p + !(stack_top_dies || dimode_p));
15499
15500 output_asm_insn (buf, operands);
15501
15502 if (round_mode != I387_CW_ANY)
15503 output_asm_insn ("fldcw\t%2", operands);
15504
15505 return "";
15506}
15507
15508/* Output code for x87 ffreep insn. The OPNO argument, which may only
15509 have the values zero or one, indicates the ffreep insn's operand
15510 from the OPERANDS array. */
15511
15512static const char *
15513output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15514{
15515 if (TARGET_USE_FFREEP)
15516#ifdef HAVE_AS_IX86_FFREEP
15517 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15518#else
15519 {
15520 static char retval[32];
15521 int regno = REGNO (operands[opno]);
15522
15523 gcc_assert (STACK_REGNO_P (regno));
15524
15525 regno -= FIRST_STACK_REG;
15526
15527 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15528 return retval;
15529 }
15530#endif
15531
15532 return opno ? "fstp\t%y1" : "fstp\t%y0";
15533}
15534
15535
15536/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15537 should be used. UNORDERED_P is true when fucom should be used. */
15538
15539const char *
15540output_fp_compare (rtx_insn *insn, rtx *operands,
15541 bool eflags_p, bool unordered_p)
15542{
15543 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15544 bool stack_top_dies;
15545
15546 static char buf[40];
15547 const char *p;
15548
15549 gcc_assert (STACK_TOP_P (xops[0]));
15550
15551 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15552
15553 if (eflags_p)
15554 {
15555 p = unordered_p ? "fucomi" : "fcomi";
15556 strcpy (dest: buf, src: p);
15557
15558 p = "p\t{%y1, %0|%0, %y1}";
15559 strcat (dest: buf, src: p + !stack_top_dies);
15560
15561 return buf;
15562 }
15563
15564 if (STACK_REG_P (xops[1])
15565 && stack_top_dies
15566 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15567 {
15568 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15569
15570 /* If both the top of the 387 stack die, and the other operand
15571 is also a stack register that dies, then this must be a
15572 `fcompp' float compare. */
15573 p = unordered_p ? "fucompp" : "fcompp";
15574 strcpy (dest: buf, src: p);
15575 }
15576 else if (const0_operand (xops[1], VOIDmode))
15577 {
15578 gcc_assert (!unordered_p);
15579 strcpy (dest: buf, src: "ftst");
15580 }
15581 else
15582 {
15583 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15584 {
15585 gcc_assert (!unordered_p);
15586 p = "ficom";
15587 }
15588 else
15589 p = unordered_p ? "fucom" : "fcom";
15590
15591 strcpy (dest: buf, src: p);
15592
15593 p = "p%Z2\t%y2";
15594 strcat (dest: buf, src: p + !stack_top_dies);
15595 }
15596
15597 output_asm_insn (buf, operands);
15598 return "fnstsw\t%0";
15599}
15600
15601void
15602ix86_output_addr_vec_elt (FILE *file, int value)
15603{
15604 const char *directive = ASM_LONG;
15605
15606#ifdef ASM_QUAD
15607 if (TARGET_LP64)
15608 directive = ASM_QUAD;
15609#else
15610 gcc_assert (!TARGET_64BIT);
15611#endif
15612
15613 fprintf (stream: file, format: "%s%s%d\n", directive, LPREFIX, value);
15614}
15615
15616void
15617ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15618{
15619 const char *directive = ASM_LONG;
15620
15621#ifdef ASM_QUAD
15622 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15623 directive = ASM_QUAD;
15624#else
15625 gcc_assert (!TARGET_64BIT);
15626#endif
15627 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15628 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15629 fprintf (stream: file, format: "%s%s%d-%s%d\n",
15630 directive, LPREFIX, value, LPREFIX, rel);
15631#if TARGET_MACHO
15632 else if (TARGET_MACHO)
15633 {
15634 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15635 machopic_output_function_base_name (file);
15636 putc ('\n', file);
15637 }
15638#endif
15639 else if (HAVE_AS_GOTOFF_IN_DATA)
15640 fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15641 else
15642 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15643 GOT_SYMBOL_NAME, LPREFIX, value);
15644}
15645
15646#define LEA_MAX_STALL (3)
15647#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15648
15649/* Increase given DISTANCE in half-cycles according to
15650 dependencies between PREV and NEXT instructions.
15651 Add 1 half-cycle if there is no dependency and
15652 go to next cycle if there is some dependecy. */
15653
15654static unsigned int
15655increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15656{
15657 df_ref def, use;
15658
15659 if (!prev || !next)
15660 return distance + (distance & 1) + 2;
15661
15662 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15663 return distance + 1;
15664
15665 FOR_EACH_INSN_USE (use, next)
15666 FOR_EACH_INSN_DEF (def, prev)
15667 if (!DF_REF_IS_ARTIFICIAL (def)
15668 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15669 return distance + (distance & 1) + 2;
15670
15671 return distance + 1;
15672}
15673
15674/* Function checks if instruction INSN defines register number
15675 REGNO1 or REGNO2. */
15676
15677bool
15678insn_defines_reg (unsigned int regno1, unsigned int regno2,
15679 rtx_insn *insn)
15680{
15681 df_ref def;
15682
15683 FOR_EACH_INSN_DEF (def, insn)
15684 if (DF_REF_REG_DEF_P (def)
15685 && !DF_REF_IS_ARTIFICIAL (def)
15686 && (regno1 == DF_REF_REGNO (def)
15687 || regno2 == DF_REF_REGNO (def)))
15688 return true;
15689
15690 return false;
15691}
15692
15693/* Function checks if instruction INSN uses register number
15694 REGNO as a part of address expression. */
15695
15696static bool
15697insn_uses_reg_mem (unsigned int regno, rtx insn)
15698{
15699 df_ref use;
15700
15701 FOR_EACH_INSN_USE (use, insn)
15702 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15703 return true;
15704
15705 return false;
15706}
15707
15708/* Search backward for non-agu definition of register number REGNO1
15709 or register number REGNO2 in basic block starting from instruction
15710 START up to head of basic block or instruction INSN.
15711
15712 Function puts true value into *FOUND var if definition was found
15713 and false otherwise.
15714
15715 Distance in half-cycles between START and found instruction or head
15716 of BB is added to DISTANCE and returned. */
15717
15718static int
15719distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15720 rtx_insn *insn, int distance,
15721 rtx_insn *start, bool *found)
15722{
15723 basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL;
15724 rtx_insn *prev = start;
15725 rtx_insn *next = NULL;
15726
15727 *found = false;
15728
15729 while (prev
15730 && prev != insn
15731 && distance < LEA_SEARCH_THRESHOLD)
15732 {
15733 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15734 {
15735 distance = increase_distance (prev, next, distance);
15736 if (insn_defines_reg (regno1, regno2, insn: prev))
15737 {
15738 if (recog_memoized (insn: prev) < 0
15739 || get_attr_type (prev) != TYPE_LEA)
15740 {
15741 *found = true;
15742 return distance;
15743 }
15744 }
15745
15746 next = prev;
15747 }
15748 if (prev == BB_HEAD (bb))
15749 break;
15750
15751 prev = PREV_INSN (insn: prev);
15752 }
15753
15754 return distance;
15755}
15756
15757/* Search backward for non-agu definition of register number REGNO1
15758 or register number REGNO2 in INSN's basic block until
15759 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15760 2. Reach neighbor BBs boundary, or
15761 3. Reach agu definition.
15762 Returns the distance between the non-agu definition point and INSN.
15763 If no definition point, returns -1. */
15764
15765static int
15766distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15767 rtx_insn *insn)
15768{
15769 basic_block bb = BLOCK_FOR_INSN (insn);
15770 int distance = 0;
15771 bool found = false;
15772
15773 if (insn != BB_HEAD (bb))
15774 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15775 distance, start: PREV_INSN (insn),
15776 found: &found);
15777
15778 if (!found && distance < LEA_SEARCH_THRESHOLD)
15779 {
15780 edge e;
15781 edge_iterator ei;
15782 bool simple_loop = false;
15783
15784 FOR_EACH_EDGE (e, ei, bb->preds)
15785 if (e->src == bb)
15786 {
15787 simple_loop = true;
15788 break;
15789 }
15790
15791 if (simple_loop)
15792 distance = distance_non_agu_define_in_bb (regno1, regno2,
15793 insn, distance,
15794 BB_END (bb), found: &found);
15795 else
15796 {
15797 int shortest_dist = -1;
15798 bool found_in_bb = false;
15799
15800 FOR_EACH_EDGE (e, ei, bb->preds)
15801 {
15802 int bb_dist
15803 = distance_non_agu_define_in_bb (regno1, regno2,
15804 insn, distance,
15805 BB_END (e->src),
15806 found: &found_in_bb);
15807 if (found_in_bb)
15808 {
15809 if (shortest_dist < 0)
15810 shortest_dist = bb_dist;
15811 else if (bb_dist > 0)
15812 shortest_dist = MIN (bb_dist, shortest_dist);
15813
15814 found = true;
15815 }
15816 }
15817
15818 distance = shortest_dist;
15819 }
15820 }
15821
15822 if (!found)
15823 return -1;
15824
15825 return distance >> 1;
15826}
15827
15828/* Return the distance in half-cycles between INSN and the next
15829 insn that uses register number REGNO in memory address added
15830 to DISTANCE. Return -1 if REGNO0 is set.
15831
15832 Put true value into *FOUND if register usage was found and
15833 false otherwise.
15834 Put true value into *REDEFINED if register redefinition was
15835 found and false otherwise. */
15836
15837static int
15838distance_agu_use_in_bb (unsigned int regno,
15839 rtx_insn *insn, int distance, rtx_insn *start,
15840 bool *found, bool *redefined)
15841{
15842 basic_block bb = NULL;
15843 rtx_insn *next = start;
15844 rtx_insn *prev = NULL;
15845
15846 *found = false;
15847 *redefined = false;
15848
15849 if (start != NULL_RTX)
15850 {
15851 bb = BLOCK_FOR_INSN (insn: start);
15852 if (start != BB_HEAD (bb))
15853 /* If insn and start belong to the same bb, set prev to insn,
15854 so the call to increase_distance will increase the distance
15855 between insns by 1. */
15856 prev = insn;
15857 }
15858
15859 while (next
15860 && next != insn
15861 && distance < LEA_SEARCH_THRESHOLD)
15862 {
15863 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15864 {
15865 distance = increase_distance(prev, next, distance);
15866 if (insn_uses_reg_mem (regno, insn: next))
15867 {
15868 /* Return DISTANCE if OP0 is used in memory
15869 address in NEXT. */
15870 *found = true;
15871 return distance;
15872 }
15873
15874 if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next))
15875 {
15876 /* Return -1 if OP0 is set in NEXT. */
15877 *redefined = true;
15878 return -1;
15879 }
15880
15881 prev = next;
15882 }
15883
15884 if (next == BB_END (bb))
15885 break;
15886
15887 next = NEXT_INSN (insn: next);
15888 }
15889
15890 return distance;
15891}
15892
15893/* Return the distance between INSN and the next insn that uses
15894 register number REGNO0 in memory address. Return -1 if no such
15895 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15896
15897static int
15898distance_agu_use (unsigned int regno0, rtx_insn *insn)
15899{
15900 basic_block bb = BLOCK_FOR_INSN (insn);
15901 int distance = 0;
15902 bool found = false;
15903 bool redefined = false;
15904
15905 if (insn != BB_END (bb))
15906 distance = distance_agu_use_in_bb (regno: regno0, insn, distance,
15907 start: NEXT_INSN (insn),
15908 found: &found, redefined: &redefined);
15909
15910 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15911 {
15912 edge e;
15913 edge_iterator ei;
15914 bool simple_loop = false;
15915
15916 FOR_EACH_EDGE (e, ei, bb->succs)
15917 if (e->dest == bb)
15918 {
15919 simple_loop = true;
15920 break;
15921 }
15922
15923 if (simple_loop)
15924 distance = distance_agu_use_in_bb (regno: regno0, insn,
15925 distance, BB_HEAD (bb),
15926 found: &found, redefined: &redefined);
15927 else
15928 {
15929 int shortest_dist = -1;
15930 bool found_in_bb = false;
15931 bool redefined_in_bb = false;
15932
15933 FOR_EACH_EDGE (e, ei, bb->succs)
15934 {
15935 int bb_dist
15936 = distance_agu_use_in_bb (regno: regno0, insn,
15937 distance, BB_HEAD (e->dest),
15938 found: &found_in_bb, redefined: &redefined_in_bb);
15939 if (found_in_bb)
15940 {
15941 if (shortest_dist < 0)
15942 shortest_dist = bb_dist;
15943 else if (bb_dist > 0)
15944 shortest_dist = MIN (bb_dist, shortest_dist);
15945
15946 found = true;
15947 }
15948 }
15949
15950 distance = shortest_dist;
15951 }
15952 }
15953
15954 if (!found || redefined)
15955 return -1;
15956
15957 return distance >> 1;
15958}
15959
15960/* Define this macro to tune LEA priority vs ADD, it take effect when
15961 there is a dilemma of choosing LEA or ADD
15962 Negative value: ADD is more preferred than LEA
15963 Zero: Neutral
15964 Positive value: LEA is more preferred than ADD. */
15965#define IX86_LEA_PRIORITY 0
15966
15967/* Return true if usage of lea INSN has performance advantage
15968 over a sequence of instructions. Instructions sequence has
15969 SPLIT_COST cycles higher latency than lea latency. */
15970
15971static bool
15972ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15973 unsigned int regno2, int split_cost, bool has_scale)
15974{
15975 int dist_define, dist_use;
15976
15977 /* For Atom processors newer than Bonnell, if using a 2-source or
15978 3-source LEA for non-destructive destination purposes, or due to
15979 wanting ability to use SCALE, the use of LEA is justified. */
15980 if (!TARGET_CPU_P (BONNELL))
15981 {
15982 if (has_scale)
15983 return true;
15984 if (split_cost < 1)
15985 return false;
15986 if (regno0 == regno1 || regno0 == regno2)
15987 return false;
15988 return true;
15989 }
15990
15991 /* Remember recog_data content. */
15992 struct recog_data_d recog_data_save = recog_data;
15993
15994 dist_define = distance_non_agu_define (regno1, regno2, insn);
15995 dist_use = distance_agu_use (regno0, insn);
15996
15997 /* distance_non_agu_define can call get_attr_type which can call
15998 recog_memoized, restore recog_data back to previous content. */
15999 recog_data = recog_data_save;
16000
16001 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16002 {
16003 /* If there is no non AGU operand definition, no AGU
16004 operand usage and split cost is 0 then both lea
16005 and non lea variants have same priority. Currently
16006 we prefer lea for 64 bit code and non lea on 32 bit
16007 code. */
16008 if (dist_use < 0 && split_cost == 0)
16009 return TARGET_64BIT || IX86_LEA_PRIORITY;
16010 else
16011 return true;
16012 }
16013
16014 /* With longer definitions distance lea is more preferable.
16015 Here we change it to take into account splitting cost and
16016 lea priority. */
16017 dist_define += split_cost + IX86_LEA_PRIORITY;
16018
16019 /* If there is no use in memory addess then we just check
16020 that split cost exceeds AGU stall. */
16021 if (dist_use < 0)
16022 return dist_define > LEA_MAX_STALL;
16023
16024 /* If this insn has both backward non-agu dependence and forward
16025 agu dependence, the one with short distance takes effect. */
16026 return dist_define >= dist_use;
16027}
16028
16029/* Return true if we need to split op0 = op1 + op2 into a sequence of
16030 move and add to avoid AGU stalls. */
16031
16032bool
16033ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16034{
16035 unsigned int regno0, regno1, regno2;
16036
16037 /* Check if we need to optimize. */
16038 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16039 return false;
16040
16041 regno0 = true_regnum (operands[0]);
16042 regno1 = true_regnum (operands[1]);
16043 regno2 = true_regnum (operands[2]);
16044
16045 /* We need to split only adds with non destructive
16046 destination operand. */
16047 if (regno0 == regno1 || regno0 == regno2)
16048 return false;
16049 else
16050 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false);
16051}
16052
16053/* Return true if we should emit lea instruction instead of mov
16054 instruction. */
16055
16056bool
16057ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16058{
16059 unsigned int regno0, regno1;
16060
16061 /* Check if we need to optimize. */
16062 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16063 return false;
16064
16065 /* Use lea for reg to reg moves only. */
16066 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16067 return false;
16068
16069 regno0 = true_regnum (operands[0]);
16070 regno1 = true_regnum (operands[1]);
16071
16072 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false);
16073}
16074
16075/* Return true if we need to split lea into a sequence of
16076 instructions to avoid AGU stalls during peephole2. */
16077
16078bool
16079ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16080{
16081 unsigned int regno0, regno1, regno2;
16082 int split_cost;
16083 struct ix86_address parts;
16084 int ok;
16085
16086 /* The "at least two components" test below might not catch simple
16087 move or zero extension insns if parts.base is non-NULL and parts.disp
16088 is const0_rtx as the only components in the address, e.g. if the
16089 register is %rbp or %r13. As this test is much cheaper and moves or
16090 zero extensions are the common case, do this check first. */
16091 if (REG_P (operands[1])
16092 || (SImode_address_operand (operands[1], VOIDmode)
16093 && REG_P (XEXP (operands[1], 0))))
16094 return false;
16095
16096 ok = ix86_decompose_address (addr: operands[1], out: &parts);
16097 gcc_assert (ok);
16098
16099 /* There should be at least two components in the address. */
16100 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16101 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16102 return false;
16103
16104 /* We should not split into add if non legitimate pic
16105 operand is used as displacement. */
16106 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16107 return false;
16108
16109 regno0 = true_regnum (operands[0]) ;
16110 regno1 = INVALID_REGNUM;
16111 regno2 = INVALID_REGNUM;
16112
16113 if (parts.base)
16114 regno1 = true_regnum (parts.base);
16115 if (parts.index)
16116 regno2 = true_regnum (parts.index);
16117
16118 /* Use add for a = a + b and a = b + a since it is faster and shorter
16119 than lea for most processors. For the processors like BONNELL, if
16120 the destination register of LEA holds an actual address which will
16121 be used soon, LEA is better and otherwise ADD is better. */
16122 if (!TARGET_CPU_P (BONNELL)
16123 && parts.scale == 1
16124 && (!parts.disp || parts.disp == const0_rtx)
16125 && (regno0 == regno1 || regno0 == regno2))
16126 return true;
16127
16128 /* Split with -Oz if the encoding requires fewer bytes. */
16129 if (optimize_size > 1
16130 && parts.scale > 1
16131 && !parts.base
16132 && (!parts.disp || parts.disp == const0_rtx))
16133 return true;
16134
16135 /* Check we need to optimize. */
16136 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16137 return false;
16138
16139 split_cost = 0;
16140
16141 /* Compute how many cycles we will add to execution time
16142 if split lea into a sequence of instructions. */
16143 if (parts.base || parts.index)
16144 {
16145 /* Have to use mov instruction if non desctructive
16146 destination form is used. */
16147 if (regno1 != regno0 && regno2 != regno0)
16148 split_cost += 1;
16149
16150 /* Have to add index to base if both exist. */
16151 if (parts.base && parts.index)
16152 split_cost += 1;
16153
16154 /* Have to use shift and adds if scale is 2 or greater. */
16155 if (parts.scale > 1)
16156 {
16157 if (regno0 != regno1)
16158 split_cost += 1;
16159 else if (regno2 == regno0)
16160 split_cost += 4;
16161 else
16162 split_cost += parts.scale;
16163 }
16164
16165 /* Have to use add instruction with immediate if
16166 disp is non zero. */
16167 if (parts.disp && parts.disp != const0_rtx)
16168 split_cost += 1;
16169
16170 /* Subtract the price of lea. */
16171 split_cost -= 1;
16172 }
16173
16174 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16175 has_scale: parts.scale > 1);
16176}
16177
16178/* Return true if it is ok to optimize an ADD operation to LEA
16179 operation to avoid flag register consumation. For most processors,
16180 ADD is faster than LEA. For the processors like BONNELL, if the
16181 destination register of LEA holds an actual address which will be
16182 used soon, LEA is better and otherwise ADD is better. */
16183
16184bool
16185ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16186{
16187 unsigned int regno0 = true_regnum (operands[0]);
16188 unsigned int regno1 = true_regnum (operands[1]);
16189 unsigned int regno2 = true_regnum (operands[2]);
16190
16191 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16192 if (regno0 != regno1 && regno0 != regno2)
16193 return true;
16194
16195 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16196 return false;
16197
16198 return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false);
16199}
16200
16201/* Return true if destination reg of SET_BODY is shift count of
16202 USE_BODY. */
16203
16204static bool
16205ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16206{
16207 rtx set_dest;
16208 rtx shift_rtx;
16209 int i;
16210
16211 /* Retrieve destination of SET_BODY. */
16212 switch (GET_CODE (set_body))
16213 {
16214 case SET:
16215 set_dest = SET_DEST (set_body);
16216 if (!set_dest || !REG_P (set_dest))
16217 return false;
16218 break;
16219 case PARALLEL:
16220 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16221 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16222 use_body))
16223 return true;
16224 /* FALLTHROUGH */
16225 default:
16226 return false;
16227 }
16228
16229 /* Retrieve shift count of USE_BODY. */
16230 switch (GET_CODE (use_body))
16231 {
16232 case SET:
16233 shift_rtx = XEXP (use_body, 1);
16234 break;
16235 case PARALLEL:
16236 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16237 if (ix86_dep_by_shift_count_body (set_body,
16238 XVECEXP (use_body, 0, i)))
16239 return true;
16240 /* FALLTHROUGH */
16241 default:
16242 return false;
16243 }
16244
16245 if (shift_rtx
16246 && (GET_CODE (shift_rtx) == ASHIFT
16247 || GET_CODE (shift_rtx) == LSHIFTRT
16248 || GET_CODE (shift_rtx) == ASHIFTRT
16249 || GET_CODE (shift_rtx) == ROTATE
16250 || GET_CODE (shift_rtx) == ROTATERT))
16251 {
16252 rtx shift_count = XEXP (shift_rtx, 1);
16253
16254 /* Return true if shift count is dest of SET_BODY. */
16255 if (REG_P (shift_count))
16256 {
16257 /* Add check since it can be invoked before register
16258 allocation in pre-reload schedule. */
16259 if (reload_completed
16260 && true_regnum (set_dest) == true_regnum (shift_count))
16261 return true;
16262 else if (REGNO(set_dest) == REGNO(shift_count))
16263 return true;
16264 }
16265 }
16266
16267 return false;
16268}
16269
16270/* Return true if destination reg of SET_INSN is shift count of
16271 USE_INSN. */
16272
16273bool
16274ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16275{
16276 return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn),
16277 use_body: PATTERN (insn: use_insn));
16278}
16279
16280/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16281 are ok, keeping in mind the possible movddup alternative. */
16282
16283bool
16284ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16285{
16286 if (MEM_P (operands[0]))
16287 return rtx_equal_p (operands[0], operands[1 + high]);
16288 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16289 return false;
16290 return true;
16291}
16292
16293/* A subroutine of ix86_build_signbit_mask. If VECT is true,
16294 then replicate the value for all elements of the vector
16295 register. */
16296
16297rtx
16298ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16299{
16300 int i, n_elt;
16301 rtvec v;
16302 machine_mode scalar_mode;
16303
16304 switch (mode)
16305 {
16306 case E_V64QImode:
16307 case E_V32QImode:
16308 case E_V16QImode:
16309 case E_V32HImode:
16310 case E_V16HImode:
16311 case E_V8HImode:
16312 case E_V16SImode:
16313 case E_V8SImode:
16314 case E_V4SImode:
16315 case E_V2SImode:
16316 case E_V8DImode:
16317 case E_V4DImode:
16318 case E_V2DImode:
16319 gcc_assert (vect);
16320 /* FALLTHRU */
16321 case E_V2HFmode:
16322 case E_V4HFmode:
16323 case E_V8HFmode:
16324 case E_V16HFmode:
16325 case E_V32HFmode:
16326 case E_V16SFmode:
16327 case E_V8SFmode:
16328 case E_V4SFmode:
16329 case E_V2SFmode:
16330 case E_V8DFmode:
16331 case E_V4DFmode:
16332 case E_V2DFmode:
16333 n_elt = GET_MODE_NUNITS (mode);
16334 v = rtvec_alloc (n_elt);
16335 scalar_mode = GET_MODE_INNER (mode);
16336
16337 RTVEC_ELT (v, 0) = value;
16338
16339 for (i = 1; i < n_elt; ++i)
16340 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16341
16342 return gen_rtx_CONST_VECTOR (mode, v);
16343
16344 default:
16345 gcc_unreachable ();
16346 }
16347}
16348
16349/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16350 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16351 for an SSE register. If VECT is true, then replicate the mask for
16352 all elements of the vector register. If INVERT is true, then create
16353 a mask excluding the sign bit. */
16354
16355rtx
16356ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16357{
16358 machine_mode vec_mode, imode;
16359 wide_int w;
16360 rtx mask, v;
16361
16362 switch (mode)
16363 {
16364 case E_V2HFmode:
16365 case E_V4HFmode:
16366 case E_V8HFmode:
16367 case E_V16HFmode:
16368 case E_V32HFmode:
16369 vec_mode = mode;
16370 imode = HImode;
16371 break;
16372
16373 case E_V16SImode:
16374 case E_V16SFmode:
16375 case E_V8SImode:
16376 case E_V4SImode:
16377 case E_V8SFmode:
16378 case E_V4SFmode:
16379 case E_V2SFmode:
16380 case E_V2SImode:
16381 vec_mode = mode;
16382 imode = SImode;
16383 break;
16384
16385 case E_V8DImode:
16386 case E_V4DImode:
16387 case E_V2DImode:
16388 case E_V8DFmode:
16389 case E_V4DFmode:
16390 case E_V2DFmode:
16391 vec_mode = mode;
16392 imode = DImode;
16393 break;
16394
16395 case E_TImode:
16396 case E_TFmode:
16397 vec_mode = VOIDmode;
16398 imode = TImode;
16399 break;
16400
16401 default:
16402 gcc_unreachable ();
16403 }
16404
16405 machine_mode inner_mode = GET_MODE_INNER (mode);
16406 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16407 GET_MODE_BITSIZE (inner_mode));
16408 if (invert)
16409 w = wi::bit_not (x: w);
16410
16411 /* Force this value into the low part of a fp vector constant. */
16412 mask = immed_wide_int_const (w, imode);
16413 mask = gen_lowpart (inner_mode, mask);
16414
16415 if (vec_mode == VOIDmode)
16416 return force_reg (inner_mode, mask);
16417
16418 v = ix86_build_const_vector (mode: vec_mode, vect, value: mask);
16419 return force_reg (vec_mode, v);
16420}
16421
16422/* Return HOST_WIDE_INT for const vector OP in MODE. */
16423
16424HOST_WIDE_INT
16425ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16426{
16427 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16428 gcc_unreachable ();
16429
16430 int nunits = GET_MODE_NUNITS (mode);
16431 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16432 machine_mode innermode = GET_MODE_INNER (mode);
16433 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16434
16435 switch (mode)
16436 {
16437 case E_V2QImode:
16438 case E_V4QImode:
16439 case E_V2HImode:
16440 case E_V8QImode:
16441 case E_V4HImode:
16442 case E_V2SImode:
16443 for (int i = 0; i < nunits; ++i)
16444 {
16445 int v = INTVAL (XVECEXP (op, 0, i));
16446 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16447 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16448 }
16449 break;
16450 case E_V2HFmode:
16451 case E_V2BFmode:
16452 case E_V4HFmode:
16453 case E_V4BFmode:
16454 case E_V2SFmode:
16455 for (int i = 0; i < nunits; ++i)
16456 {
16457 rtx x = XVECEXP (op, 0, i);
16458 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16459 REAL_MODE_FORMAT (innermode));
16460 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16461 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16462 }
16463 break;
16464 default:
16465 gcc_unreachable ();
16466 }
16467
16468 return val.to_shwi ();
16469}
16470
16471/* Return TRUE or FALSE depending on whether the first SET in INSN
16472 has source and destination with matching CC modes, and that the
16473 CC mode is at least as constrained as REQ_MODE. */
16474
16475bool
16476ix86_match_ccmode (rtx insn, machine_mode req_mode)
16477{
16478 rtx set;
16479 machine_mode set_mode;
16480
16481 set = PATTERN (insn);
16482 if (GET_CODE (set) == PARALLEL)
16483 set = XVECEXP (set, 0, 0);
16484 gcc_assert (GET_CODE (set) == SET);
16485 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16486
16487 set_mode = GET_MODE (SET_DEST (set));
16488 switch (set_mode)
16489 {
16490 case E_CCNOmode:
16491 if (req_mode != CCNOmode
16492 && (req_mode != CCmode
16493 || XEXP (SET_SRC (set), 1) != const0_rtx))
16494 return false;
16495 break;
16496 case E_CCmode:
16497 if (req_mode == CCGCmode)
16498 return false;
16499 /* FALLTHRU */
16500 case E_CCGCmode:
16501 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16502 return false;
16503 /* FALLTHRU */
16504 case E_CCGOCmode:
16505 if (req_mode == CCZmode)
16506 return false;
16507 /* FALLTHRU */
16508 case E_CCZmode:
16509 break;
16510
16511 case E_CCGZmode:
16512
16513 case E_CCAmode:
16514 case E_CCCmode:
16515 case E_CCOmode:
16516 case E_CCPmode:
16517 case E_CCSmode:
16518 if (set_mode != req_mode)
16519 return false;
16520 break;
16521
16522 default:
16523 gcc_unreachable ();
16524 }
16525
16526 return GET_MODE (SET_SRC (set)) == set_mode;
16527}
16528
16529machine_mode
16530ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16531{
16532 machine_mode mode = GET_MODE (op0);
16533
16534 if (SCALAR_FLOAT_MODE_P (mode))
16535 {
16536 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16537 return CCFPmode;
16538 }
16539
16540 switch (code)
16541 {
16542 /* Only zero flag is needed. */
16543 case EQ: /* ZF=0 */
16544 case NE: /* ZF!=0 */
16545 return CCZmode;
16546 /* Codes needing carry flag. */
16547 case GEU: /* CF=0 */
16548 case LTU: /* CF=1 */
16549 rtx geu;
16550 /* Detect overflow checks. They need just the carry flag. */
16551 if (GET_CODE (op0) == PLUS
16552 && (rtx_equal_p (op1, XEXP (op0, 0))
16553 || rtx_equal_p (op1, XEXP (op0, 1))))
16554 return CCCmode;
16555 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16556 Match LTU of op0
16557 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16558 and op1
16559 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16560 where CC_CCC is either CC or CCC. */
16561 else if (code == LTU
16562 && GET_CODE (op0) == NEG
16563 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16564 && REG_P (XEXP (geu, 0))
16565 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16566 || GET_MODE (XEXP (geu, 0)) == CCmode)
16567 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16568 && XEXP (geu, 1) == const0_rtx
16569 && GET_CODE (op1) == LTU
16570 && REG_P (XEXP (op1, 0))
16571 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16572 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16573 && XEXP (op1, 1) == const0_rtx)
16574 return CCCmode;
16575 /* Similarly for *x86_cmc pattern.
16576 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16577 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16578 It is sufficient to test that the operand modes are CCCmode. */
16579 else if (code == LTU
16580 && GET_CODE (op0) == NEG
16581 && GET_CODE (XEXP (op0, 0)) == LTU
16582 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16583 && GET_CODE (op1) == GEU
16584 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16585 return CCCmode;
16586 else
16587 return CCmode;
16588 case GTU: /* CF=0 & ZF=0 */
16589 case LEU: /* CF=1 | ZF=1 */
16590 return CCmode;
16591 /* Codes possibly doable only with sign flag when
16592 comparing against zero. */
16593 case GE: /* SF=OF or SF=0 */
16594 case LT: /* SF<>OF or SF=1 */
16595 if (op1 == const0_rtx)
16596 return CCGOCmode;
16597 else
16598 /* For other cases Carry flag is not required. */
16599 return CCGCmode;
16600 /* Codes doable only with sign flag when comparing
16601 against zero, but we miss jump instruction for it
16602 so we need to use relational tests against overflow
16603 that thus needs to be zero. */
16604 case GT: /* ZF=0 & SF=OF */
16605 case LE: /* ZF=1 | SF<>OF */
16606 if (op1 == const0_rtx)
16607 return CCNOmode;
16608 else
16609 return CCGCmode;
16610 default:
16611 /* CCmode should be used in all other cases. */
16612 return CCmode;
16613 }
16614}
16615
16616/* Return TRUE or FALSE depending on whether the ptest instruction
16617 INSN has source and destination with suitable matching CC modes. */
16618
16619bool
16620ix86_match_ptest_ccmode (rtx insn)
16621{
16622 rtx set, src;
16623 machine_mode set_mode;
16624
16625 set = PATTERN (insn);
16626 gcc_assert (GET_CODE (set) == SET);
16627 src = SET_SRC (set);
16628 gcc_assert (GET_CODE (src) == UNSPEC
16629 && XINT (src, 1) == UNSPEC_PTEST);
16630
16631 set_mode = GET_MODE (src);
16632 if (set_mode != CCZmode
16633 && set_mode != CCCmode
16634 && set_mode != CCmode)
16635 return false;
16636 return GET_MODE (SET_DEST (set)) == set_mode;
16637}
16638
16639/* Return the fixed registers used for condition codes. */
16640
16641static bool
16642ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16643{
16644 *p1 = FLAGS_REG;
16645 *p2 = INVALID_REGNUM;
16646 return true;
16647}
16648
16649/* If two condition code modes are compatible, return a condition code
16650 mode which is compatible with both. Otherwise, return
16651 VOIDmode. */
16652
16653static machine_mode
16654ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16655{
16656 if (m1 == m2)
16657 return m1;
16658
16659 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16660 return VOIDmode;
16661
16662 if ((m1 == CCGCmode && m2 == CCGOCmode)
16663 || (m1 == CCGOCmode && m2 == CCGCmode))
16664 return CCGCmode;
16665
16666 if ((m1 == CCNOmode && m2 == CCGOCmode)
16667 || (m1 == CCGOCmode && m2 == CCNOmode))
16668 return CCNOmode;
16669
16670 if (m1 == CCZmode
16671 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16672 return m2;
16673 else if (m2 == CCZmode
16674 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16675 return m1;
16676
16677 switch (m1)
16678 {
16679 default:
16680 gcc_unreachable ();
16681
16682 case E_CCmode:
16683 case E_CCGCmode:
16684 case E_CCGOCmode:
16685 case E_CCNOmode:
16686 case E_CCAmode:
16687 case E_CCCmode:
16688 case E_CCOmode:
16689 case E_CCPmode:
16690 case E_CCSmode:
16691 case E_CCZmode:
16692 switch (m2)
16693 {
16694 default:
16695 return VOIDmode;
16696
16697 case E_CCmode:
16698 case E_CCGCmode:
16699 case E_CCGOCmode:
16700 case E_CCNOmode:
16701 case E_CCAmode:
16702 case E_CCCmode:
16703 case E_CCOmode:
16704 case E_CCPmode:
16705 case E_CCSmode:
16706 case E_CCZmode:
16707 return CCmode;
16708 }
16709
16710 case E_CCFPmode:
16711 /* These are only compatible with themselves, which we already
16712 checked above. */
16713 return VOIDmode;
16714 }
16715}
16716
16717/* Return strategy to use for floating-point. We assume that fcomi is always
16718 preferrable where available, since that is also true when looking at size
16719 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16720
16721enum ix86_fpcmp_strategy
16722ix86_fp_comparison_strategy (enum rtx_code)
16723{
16724 /* Do fcomi/sahf based test when profitable. */
16725
16726 if (TARGET_CMOVE)
16727 return IX86_FPCMP_COMI;
16728
16729 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16730 return IX86_FPCMP_SAHF;
16731
16732 return IX86_FPCMP_ARITH;
16733}
16734
16735/* Convert comparison codes we use to represent FP comparison to integer
16736 code that will result in proper branch. Return UNKNOWN if no such code
16737 is available. */
16738
16739enum rtx_code
16740ix86_fp_compare_code_to_integer (enum rtx_code code)
16741{
16742 switch (code)
16743 {
16744 case GT:
16745 return GTU;
16746 case GE:
16747 return GEU;
16748 case ORDERED:
16749 case UNORDERED:
16750 return code;
16751 case UNEQ:
16752 return EQ;
16753 case UNLT:
16754 return LTU;
16755 case UNLE:
16756 return LEU;
16757 case LTGT:
16758 return NE;
16759 default:
16760 return UNKNOWN;
16761 }
16762}
16763
16764/* Zero extend possibly SImode EXP to Pmode register. */
16765rtx
16766ix86_zero_extend_to_Pmode (rtx exp)
16767{
16768 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16769}
16770
16771/* Return true if the function is called via PLT. */
16772
16773bool
16774ix86_call_use_plt_p (rtx call_op)
16775{
16776 if (SYMBOL_REF_LOCAL_P (call_op))
16777 {
16778 if (SYMBOL_REF_DECL (call_op)
16779 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16780 {
16781 /* NB: All ifunc functions must be called via PLT. */
16782 cgraph_node *node
16783 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16784 if (node && node->ifunc_resolver)
16785 return true;
16786 }
16787 return false;
16788 }
16789 return true;
16790}
16791
16792/* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16793 the PLT entry will be used as the function address for local IFUNC
16794 functions. When the PIC register is needed for PLT call, indirect
16795 call via the PLT entry will fail since the PIC register may not be
16796 set up properly for indirect call. In this case, we should return
16797 false. */
16798
16799static bool
16800ix86_ifunc_ref_local_ok (void)
16801{
16802 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16803}
16804
16805/* Return true if the function being called was marked with attribute
16806 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16807 to handle the non-PIC case in the backend because there is no easy
16808 interface for the front-end to force non-PLT calls to use the GOT.
16809 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16810 to call the function marked "noplt" indirectly. */
16811
16812static bool
16813ix86_nopic_noplt_attribute_p (rtx call_op)
16814{
16815 if (flag_pic || ix86_cmodel == CM_LARGE
16816 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16817 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16818 || SYMBOL_REF_LOCAL_P (call_op))
16819 return false;
16820
16821 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16822
16823 if (!flag_plt
16824 || (symbol_decl != NULL_TREE
16825 && lookup_attribute (attr_name: "noplt", DECL_ATTRIBUTES (symbol_decl))))
16826 return true;
16827
16828 return false;
16829}
16830
16831/* Helper to output the jmp/call. */
16832static void
16833ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16834{
16835 if (thunk_name != NULL)
16836 {
16837 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
16838 && ix86_indirect_branch_cs_prefix)
16839 fprintf (stream: asm_out_file, format: "\tcs\n");
16840 fprintf (stream: asm_out_file, format: "\tjmp\t");
16841 assemble_name (asm_out_file, thunk_name);
16842 putc (c: '\n', stream: asm_out_file);
16843 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16844 fputs (s: "\tint3\n", stream: asm_out_file);
16845 }
16846 else
16847 output_indirect_thunk (regno);
16848}
16849
16850/* Output indirect branch via a call and return thunk. CALL_OP is a
16851 register which contains the branch target. XASM is the assembly
16852 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16853 A normal call is converted to:
16854
16855 call __x86_indirect_thunk_reg
16856
16857 and a tail call is converted to:
16858
16859 jmp __x86_indirect_thunk_reg
16860 */
16861
16862static void
16863ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16864{
16865 char thunk_name_buf[32];
16866 char *thunk_name;
16867 enum indirect_thunk_prefix need_prefix
16868 = indirect_thunk_need_prefix (insn: current_output_insn);
16869 int regno = REGNO (call_op);
16870
16871 if (cfun->machine->indirect_branch_type
16872 != indirect_branch_thunk_inline)
16873 {
16874 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16875 SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno);
16876
16877 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
16878 thunk_name = thunk_name_buf;
16879 }
16880 else
16881 thunk_name = NULL;
16882
16883 if (sibcall_p)
16884 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16885 else
16886 {
16887 if (thunk_name != NULL)
16888 {
16889 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
16890 && ix86_indirect_branch_cs_prefix)
16891 fprintf (stream: asm_out_file, format: "\tcs\n");
16892 fprintf (stream: asm_out_file, format: "\tcall\t");
16893 assemble_name (asm_out_file, thunk_name);
16894 putc (c: '\n', stream: asm_out_file);
16895 return;
16896 }
16897
16898 char indirectlabel1[32];
16899 char indirectlabel2[32];
16900
16901 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16902 INDIRECT_LABEL,
16903 indirectlabelno++);
16904 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16905 INDIRECT_LABEL,
16906 indirectlabelno++);
16907
16908 /* Jump. */
16909 fputs (s: "\tjmp\t", stream: asm_out_file);
16910 assemble_name_raw (asm_out_file, indirectlabel2);
16911 fputc (c: '\n', stream: asm_out_file);
16912
16913 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16914
16915 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16916
16917 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16918
16919 /* Call. */
16920 fputs (s: "\tcall\t", stream: asm_out_file);
16921 assemble_name_raw (asm_out_file, indirectlabel1);
16922 fputc (c: '\n', stream: asm_out_file);
16923 }
16924}
16925
16926/* Output indirect branch via a call and return thunk. CALL_OP is
16927 the branch target. XASM is the assembly template for CALL_OP.
16928 Branch is a tail call if SIBCALL_P is true. A normal call is
16929 converted to:
16930
16931 jmp L2
16932 L1:
16933 push CALL_OP
16934 jmp __x86_indirect_thunk
16935 L2:
16936 call L1
16937
16938 and a tail call is converted to:
16939
16940 push CALL_OP
16941 jmp __x86_indirect_thunk
16942 */
16943
16944static void
16945ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16946 bool sibcall_p)
16947{
16948 char thunk_name_buf[32];
16949 char *thunk_name;
16950 char push_buf[64];
16951 enum indirect_thunk_prefix need_prefix
16952 = indirect_thunk_need_prefix (insn: current_output_insn);
16953 int regno = -1;
16954
16955 if (cfun->machine->indirect_branch_type
16956 != indirect_branch_thunk_inline)
16957 {
16958 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16959 indirect_thunk_needed = true;
16960 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
16961 thunk_name = thunk_name_buf;
16962 }
16963 else
16964 thunk_name = NULL;
16965
16966 snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s",
16967 TARGET_64BIT ? 'q' : 'l', xasm);
16968
16969 if (sibcall_p)
16970 {
16971 output_asm_insn (push_buf, &call_op);
16972 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16973 }
16974 else
16975 {
16976 char indirectlabel1[32];
16977 char indirectlabel2[32];
16978
16979 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16980 INDIRECT_LABEL,
16981 indirectlabelno++);
16982 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16983 INDIRECT_LABEL,
16984 indirectlabelno++);
16985
16986 /* Jump. */
16987 fputs (s: "\tjmp\t", stream: asm_out_file);
16988 assemble_name_raw (asm_out_file, indirectlabel2);
16989 fputc (c: '\n', stream: asm_out_file);
16990
16991 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16992
16993 /* An external function may be called via GOT, instead of PLT. */
16994 if (MEM_P (call_op))
16995 {
16996 struct ix86_address parts;
16997 rtx addr = XEXP (call_op, 0);
16998 if (ix86_decompose_address (addr, out: &parts)
16999 && parts.base == stack_pointer_rtx)
17000 {
17001 /* Since call will adjust stack by -UNITS_PER_WORD,
17002 we must convert "disp(stack, index, scale)" to
17003 "disp+UNITS_PER_WORD(stack, index, scale)". */
17004 if (parts.index)
17005 {
17006 addr = gen_rtx_MULT (Pmode, parts.index,
17007 GEN_INT (parts.scale));
17008 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17009 addr);
17010 }
17011 else
17012 addr = stack_pointer_rtx;
17013
17014 rtx disp;
17015 if (parts.disp != NULL_RTX)
17016 disp = plus_constant (Pmode, parts.disp,
17017 UNITS_PER_WORD);
17018 else
17019 disp = GEN_INT (UNITS_PER_WORD);
17020
17021 addr = gen_rtx_PLUS (Pmode, addr, disp);
17022 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17023 }
17024 }
17025
17026 output_asm_insn (push_buf, &call_op);
17027
17028 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17029
17030 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17031
17032 /* Call. */
17033 fputs (s: "\tcall\t", stream: asm_out_file);
17034 assemble_name_raw (asm_out_file, indirectlabel1);
17035 fputc (c: '\n', stream: asm_out_file);
17036 }
17037}
17038
17039/* Output indirect branch via a call and return thunk. CALL_OP is
17040 the branch target. XASM is the assembly template for CALL_OP.
17041 Branch is a tail call if SIBCALL_P is true. */
17042
17043static void
17044ix86_output_indirect_branch (rtx call_op, const char *xasm,
17045 bool sibcall_p)
17046{
17047 if (REG_P (call_op))
17048 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17049 else
17050 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17051}
17052
17053/* Output indirect jump. CALL_OP is the jump target. */
17054
17055const char *
17056ix86_output_indirect_jmp (rtx call_op)
17057{
17058 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17059 {
17060 /* We can't have red-zone since "call" in the indirect thunk
17061 pushes the return address onto stack, destroying red-zone. */
17062 if (ix86_red_zone_used)
17063 gcc_unreachable ();
17064
17065 ix86_output_indirect_branch (call_op, xasm: "%0", sibcall_p: true);
17066 }
17067 else
17068 output_asm_insn ("%!jmp\t%A0", &call_op);
17069 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17070}
17071
17072/* Output return instrumentation for current function if needed. */
17073
17074static void
17075output_return_instrumentation (void)
17076{
17077 if (ix86_instrument_return != instrument_return_none
17078 && flag_fentry
17079 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17080 {
17081 if (ix86_flag_record_return)
17082 fprintf (stream: asm_out_file, format: "1:\n");
17083 switch (ix86_instrument_return)
17084 {
17085 case instrument_return_call:
17086 fprintf (stream: asm_out_file, format: "\tcall\t__return__\n");
17087 break;
17088 case instrument_return_nop5:
17089 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17090 fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17091 break;
17092 case instrument_return_none:
17093 break;
17094 }
17095
17096 if (ix86_flag_record_return)
17097 {
17098 fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n");
17099 fprintf (stream: asm_out_file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17100 fprintf (stream: asm_out_file, format: "\t.previous\n");
17101 }
17102 }
17103}
17104
17105/* Output function return. CALL_OP is the jump target. Add a REP
17106 prefix to RET if LONG_P is true and function return is kept. */
17107
17108const char *
17109ix86_output_function_return (bool long_p)
17110{
17111 output_return_instrumentation ();
17112
17113 if (cfun->machine->function_return_type != indirect_branch_keep)
17114 {
17115 char thunk_name[32];
17116 enum indirect_thunk_prefix need_prefix
17117 = indirect_thunk_need_prefix (insn: current_output_insn);
17118
17119 if (cfun->machine->function_return_type
17120 != indirect_branch_thunk_inline)
17121 {
17122 bool need_thunk = (cfun->machine->function_return_type
17123 == indirect_branch_thunk);
17124 indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix,
17125 ret_p: true);
17126 indirect_return_needed |= need_thunk;
17127 fprintf (stream: asm_out_file, format: "\tjmp\t");
17128 assemble_name (asm_out_file, thunk_name);
17129 putc (c: '\n', stream: asm_out_file);
17130 }
17131 else
17132 output_indirect_thunk (INVALID_REGNUM);
17133
17134 return "";
17135 }
17136
17137 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17138 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17139}
17140
17141/* Output indirect function return. RET_OP is the function return
17142 target. */
17143
17144const char *
17145ix86_output_indirect_function_return (rtx ret_op)
17146{
17147 if (cfun->machine->function_return_type != indirect_branch_keep)
17148 {
17149 char thunk_name[32];
17150 enum indirect_thunk_prefix need_prefix
17151 = indirect_thunk_need_prefix (insn: current_output_insn);
17152 unsigned int regno = REGNO (ret_op);
17153 gcc_assert (regno == CX_REG);
17154
17155 if (cfun->machine->function_return_type
17156 != indirect_branch_thunk_inline)
17157 {
17158 bool need_thunk = (cfun->machine->function_return_type
17159 == indirect_branch_thunk);
17160 indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true);
17161
17162 if (need_thunk)
17163 {
17164 indirect_return_via_cx = true;
17165 SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG);
17166 }
17167 fprintf (stream: asm_out_file, format: "\tjmp\t");
17168 assemble_name (asm_out_file, thunk_name);
17169 putc (c: '\n', stream: asm_out_file);
17170 }
17171 else
17172 output_indirect_thunk (regno);
17173 }
17174 else
17175 {
17176 output_asm_insn ("%!jmp\t%A0", &ret_op);
17177 if (ix86_harden_sls & harden_sls_indirect_jmp)
17178 fputs (s: "\tint3\n", stream: asm_out_file);
17179 }
17180 return "";
17181}
17182
17183/* Output the assembly for a call instruction. */
17184
17185const char *
17186ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17187{
17188 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17189 bool output_indirect_p
17190 = (!TARGET_SEH
17191 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17192 bool seh_nop_p = false;
17193 const char *xasm;
17194
17195 if (SIBLING_CALL_P (insn))
17196 {
17197 output_return_instrumentation ();
17198 if (direct_p)
17199 {
17200 if (ix86_nopic_noplt_attribute_p (call_op))
17201 {
17202 direct_p = false;
17203 if (TARGET_64BIT)
17204 {
17205 if (output_indirect_p)
17206 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17207 else
17208 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17209 }
17210 else
17211 {
17212 if (output_indirect_p)
17213 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17214 else
17215 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17216 }
17217 }
17218 else
17219 xasm = "%!jmp\t%P0";
17220 }
17221 /* SEH epilogue detection requires the indirect branch case
17222 to include REX.W. */
17223 else if (TARGET_SEH)
17224 xasm = "%!rex.W jmp\t%A0";
17225 else
17226 {
17227 if (output_indirect_p)
17228 xasm = "%0";
17229 else
17230 xasm = "%!jmp\t%A0";
17231 }
17232
17233 if (output_indirect_p && !direct_p)
17234 ix86_output_indirect_branch (call_op, xasm, sibcall_p: true);
17235 else
17236 {
17237 output_asm_insn (xasm, &call_op);
17238 if (!direct_p
17239 && (ix86_harden_sls & harden_sls_indirect_jmp))
17240 return "int3";
17241 }
17242 return "";
17243 }
17244
17245 /* SEH unwinding can require an extra nop to be emitted in several
17246 circumstances. Determine if we have one of those. */
17247 if (TARGET_SEH)
17248 {
17249 rtx_insn *i;
17250
17251 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i))
17252 {
17253 /* Prevent a catch region from being adjacent to a jump that would
17254 be interpreted as an epilogue sequence by the unwinder. */
17255 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17256 {
17257 seh_nop_p = true;
17258 break;
17259 }
17260
17261 /* If we get to another real insn, we don't need the nop. */
17262 if (INSN_P (i))
17263 break;
17264
17265 /* If we get to the epilogue note, prevent a catch region from
17266 being adjacent to the standard epilogue sequence. Note that,
17267 if non-call exceptions are enabled, we already did it during
17268 epilogue expansion, or else, if the insn can throw internally,
17269 we already did it during the reorg pass. */
17270 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17271 && !flag_non_call_exceptions
17272 && !can_throw_internal (insn))
17273 {
17274 seh_nop_p = true;
17275 break;
17276 }
17277 }
17278
17279 /* If we didn't find a real insn following the call, prevent the
17280 unwinder from looking into the next function. */
17281 if (i == NULL)
17282 seh_nop_p = true;
17283 }
17284
17285 if (direct_p)
17286 {
17287 if (ix86_nopic_noplt_attribute_p (call_op))
17288 {
17289 direct_p = false;
17290 if (TARGET_64BIT)
17291 {
17292 if (output_indirect_p)
17293 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17294 else
17295 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17296 }
17297 else
17298 {
17299 if (output_indirect_p)
17300 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17301 else
17302 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17303 }
17304 }
17305 else
17306 xasm = "%!call\t%P0";
17307 }
17308 else
17309 {
17310 if (output_indirect_p)
17311 xasm = "%0";
17312 else
17313 xasm = "%!call\t%A0";
17314 }
17315
17316 if (output_indirect_p && !direct_p)
17317 ix86_output_indirect_branch (call_op, xasm, sibcall_p: false);
17318 else
17319 output_asm_insn (xasm, &call_op);
17320
17321 if (seh_nop_p)
17322 return "nop";
17323
17324 return "";
17325}
17326
17327/* Return a MEM corresponding to a stack slot with mode MODE.
17328 Allocate a new slot if necessary.
17329
17330 The RTL for a function can have several slots available: N is
17331 which slot to use. */
17332
17333rtx
17334assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17335{
17336 struct stack_local_entry *s;
17337
17338 gcc_assert (n < MAX_386_STACK_LOCALS);
17339
17340 for (s = ix86_stack_locals; s; s = s->next)
17341 if (s->mode == mode && s->n == n)
17342 return validize_mem (copy_rtx (s->rtl));
17343
17344 int align = 0;
17345 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17346 alignment with -m32 -mpreferred-stack-boundary=2. */
17347 if (mode == DImode
17348 && !TARGET_64BIT
17349 && n == SLOT_FLOATxFDI_387
17350 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17351 align = 32;
17352 s = ggc_alloc<stack_local_entry> ();
17353 s->n = n;
17354 s->mode = mode;
17355 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17356
17357 s->next = ix86_stack_locals;
17358 ix86_stack_locals = s;
17359 return validize_mem (copy_rtx (s->rtl));
17360}
17361
17362static void
17363ix86_instantiate_decls (void)
17364{
17365 struct stack_local_entry *s;
17366
17367 for (s = ix86_stack_locals; s; s = s->next)
17368 if (s->rtl != NULL_RTX)
17369 instantiate_decl_rtl (x: s->rtl);
17370}
17371
17372/* Check whether x86 address PARTS is a pc-relative address. */
17373
17374bool
17375ix86_rip_relative_addr_p (struct ix86_address *parts)
17376{
17377 rtx base, index, disp;
17378
17379 base = parts->base;
17380 index = parts->index;
17381 disp = parts->disp;
17382
17383 if (disp && !base && !index)
17384 {
17385 if (TARGET_64BIT)
17386 {
17387 rtx symbol = disp;
17388
17389 if (GET_CODE (disp) == CONST)
17390 symbol = XEXP (disp, 0);
17391 if (GET_CODE (symbol) == PLUS
17392 && CONST_INT_P (XEXP (symbol, 1)))
17393 symbol = XEXP (symbol, 0);
17394
17395 if (GET_CODE (symbol) == LABEL_REF
17396 || (GET_CODE (symbol) == SYMBOL_REF
17397 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17398 || (GET_CODE (symbol) == UNSPEC
17399 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17400 || XINT (symbol, 1) == UNSPEC_PCREL
17401 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17402 return true;
17403 }
17404 }
17405 return false;
17406}
17407
17408/* Calculate the length of the memory address in the instruction encoding.
17409 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17410 or other prefixes. We never generate addr32 prefix for LEA insn. */
17411
17412int
17413memory_address_length (rtx addr, bool lea)
17414{
17415 struct ix86_address parts;
17416 rtx base, index, disp;
17417 int len;
17418 int ok;
17419
17420 if (GET_CODE (addr) == PRE_DEC
17421 || GET_CODE (addr) == POST_INC
17422 || GET_CODE (addr) == PRE_MODIFY
17423 || GET_CODE (addr) == POST_MODIFY)
17424 return 0;
17425
17426 ok = ix86_decompose_address (addr, out: &parts);
17427 gcc_assert (ok);
17428
17429 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17430
17431 /* If this is not LEA instruction, add the length of addr32 prefix. */
17432 if (TARGET_64BIT && !lea
17433 && (SImode_address_operand (addr, VOIDmode)
17434 || (parts.base && GET_MODE (parts.base) == SImode)
17435 || (parts.index && GET_MODE (parts.index) == SImode)))
17436 len++;
17437
17438 base = parts.base;
17439 index = parts.index;
17440 disp = parts.disp;
17441
17442 if (base && SUBREG_P (base))
17443 base = SUBREG_REG (base);
17444 if (index && SUBREG_P (index))
17445 index = SUBREG_REG (index);
17446
17447 gcc_assert (base == NULL_RTX || REG_P (base));
17448 gcc_assert (index == NULL_RTX || REG_P (index));
17449
17450 /* Rule of thumb:
17451 - esp as the base always wants an index,
17452 - ebp as the base always wants a displacement,
17453 - r12 as the base always wants an index,
17454 - r13 as the base always wants a displacement. */
17455
17456 /* Register Indirect. */
17457 if (base && !index && !disp)
17458 {
17459 /* esp (for its index) and ebp (for its displacement) need
17460 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17461 code. */
17462 if (base == arg_pointer_rtx
17463 || base == frame_pointer_rtx
17464 || REGNO (base) == SP_REG
17465 || REGNO (base) == BP_REG
17466 || REGNO (base) == R12_REG
17467 || REGNO (base) == R13_REG)
17468 len++;
17469 }
17470
17471 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17472 is not disp32, but disp32(%rip), so for disp32
17473 SIB byte is needed, unless print_operand_address
17474 optimizes it into disp32(%rip) or (%rip) is implied
17475 by UNSPEC. */
17476 else if (disp && !base && !index)
17477 {
17478 len += 4;
17479 if (!ix86_rip_relative_addr_p (parts: &parts))
17480 len++;
17481 }
17482 else
17483 {
17484 /* Find the length of the displacement constant. */
17485 if (disp)
17486 {
17487 if (base && satisfies_constraint_K (op: disp))
17488 len += 1;
17489 else
17490 len += 4;
17491 }
17492 /* ebp always wants a displacement. Similarly r13. */
17493 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17494 len++;
17495
17496 /* An index requires the two-byte modrm form.... */
17497 if (index
17498 /* ...like esp (or r12), which always wants an index. */
17499 || base == arg_pointer_rtx
17500 || base == frame_pointer_rtx
17501 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17502 len++;
17503 }
17504
17505 return len;
17506}
17507
17508/* Compute default value for "length_immediate" attribute. When SHORTFORM
17509 is set, expect that insn have 8bit immediate alternative. */
17510int
17511ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17512{
17513 int len = 0;
17514 int i;
17515 extract_insn_cached (insn);
17516 for (i = recog_data.n_operands - 1; i >= 0; --i)
17517 if (CONSTANT_P (recog_data.operand[i]))
17518 {
17519 enum attr_mode mode = get_attr_mode (insn);
17520
17521 gcc_assert (!len);
17522 if (shortform && CONST_INT_P (recog_data.operand[i]))
17523 {
17524 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17525 switch (mode)
17526 {
17527 case MODE_QI:
17528 len = 1;
17529 continue;
17530 case MODE_HI:
17531 ival = trunc_int_for_mode (ival, HImode);
17532 break;
17533 case MODE_SI:
17534 ival = trunc_int_for_mode (ival, SImode);
17535 break;
17536 default:
17537 break;
17538 }
17539 if (IN_RANGE (ival, -128, 127))
17540 {
17541 len = 1;
17542 continue;
17543 }
17544 }
17545 switch (mode)
17546 {
17547 case MODE_QI:
17548 len = 1;
17549 break;
17550 case MODE_HI:
17551 len = 2;
17552 break;
17553 case MODE_SI:
17554 len = 4;
17555 break;
17556 /* Immediates for DImode instructions are encoded
17557 as 32bit sign extended values. */
17558 case MODE_DI:
17559 len = 4;
17560 break;
17561 default:
17562 fatal_insn ("unknown insn mode", insn);
17563 }
17564 }
17565 return len;
17566}
17567
17568/* Compute default value for "length_address" attribute. */
17569int
17570ix86_attr_length_address_default (rtx_insn *insn)
17571{
17572 int i;
17573
17574 if (get_attr_type (insn) == TYPE_LEA)
17575 {
17576 rtx set = PATTERN (insn), addr;
17577
17578 if (GET_CODE (set) == PARALLEL)
17579 set = XVECEXP (set, 0, 0);
17580
17581 gcc_assert (GET_CODE (set) == SET);
17582
17583 addr = SET_SRC (set);
17584
17585 return memory_address_length (addr, lea: true);
17586 }
17587
17588 extract_insn_cached (insn);
17589 for (i = recog_data.n_operands - 1; i >= 0; --i)
17590 {
17591 rtx op = recog_data.operand[i];
17592 if (MEM_P (op))
17593 {
17594 constrain_operands_cached (insn, reload_completed);
17595 if (which_alternative != -1)
17596 {
17597 const char *constraints = recog_data.constraints[i];
17598 int alt = which_alternative;
17599
17600 while (*constraints == '=' || *constraints == '+')
17601 constraints++;
17602 while (alt-- > 0)
17603 while (*constraints++ != ',')
17604 ;
17605 /* Skip ignored operands. */
17606 if (*constraints == 'X')
17607 continue;
17608 }
17609
17610 int len = memory_address_length (XEXP (op, 0), lea: false);
17611
17612 /* Account for segment prefix for non-default addr spaces. */
17613 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17614 len++;
17615
17616 return len;
17617 }
17618 }
17619 return 0;
17620}
17621
17622/* Compute default value for "length_vex" attribute. It includes
17623 2 or 3 byte VEX prefix and 1 opcode byte. */
17624
17625int
17626ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17627 bool has_vex_w)
17628{
17629 int i, reg_only = 2 + 1;
17630 bool has_mem = false;
17631
17632 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17633 byte VEX prefix. */
17634 if (!has_0f_opcode || has_vex_w)
17635 return 3 + 1;
17636
17637 /* We can always use 2 byte VEX prefix in 32bit. */
17638 if (!TARGET_64BIT)
17639 return 2 + 1;
17640
17641 extract_insn_cached (insn);
17642
17643 for (i = recog_data.n_operands - 1; i >= 0; --i)
17644 if (REG_P (recog_data.operand[i]))
17645 {
17646 /* REX.W bit uses 3 byte VEX prefix.
17647 REX2 with vex use extended EVEX prefix length is 4-byte. */
17648 if (GET_MODE (recog_data.operand[i]) == DImode
17649 && GENERAL_REG_P (recog_data.operand[i]))
17650 return 3 + 1;
17651
17652 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17653 operand will be encoded using VEX.B, so be conservative.
17654 REX2 with vex use extended EVEX prefix length is 4-byte. */
17655 if (REX_INT_REGNO_P (recog_data.operand[i])
17656 || REX2_INT_REGNO_P (recog_data.operand[i])
17657 || REX_SSE_REGNO_P (recog_data.operand[i]))
17658 reg_only = 3 + 1;
17659 }
17660 else if (MEM_P (recog_data.operand[i]))
17661 {
17662 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17663 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
17664 return 4;
17665
17666 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17667 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17668 return 3 + 1;
17669
17670 has_mem = true;
17671 }
17672
17673 return has_mem ? 2 + 1 : reg_only;
17674}
17675
17676
17677static bool
17678ix86_class_likely_spilled_p (reg_class_t);
17679
17680/* Returns true if lhs of insn is HW function argument register and set up
17681 is_spilled to true if it is likely spilled HW register. */
17682static bool
17683insn_is_function_arg (rtx insn, bool* is_spilled)
17684{
17685 rtx dst;
17686
17687 if (!NONDEBUG_INSN_P (insn))
17688 return false;
17689 /* Call instructions are not movable, ignore it. */
17690 if (CALL_P (insn))
17691 return false;
17692 insn = PATTERN (insn);
17693 if (GET_CODE (insn) == PARALLEL)
17694 insn = XVECEXP (insn, 0, 0);
17695 if (GET_CODE (insn) != SET)
17696 return false;
17697 dst = SET_DEST (insn);
17698 if (REG_P (dst) && HARD_REGISTER_P (dst)
17699 && ix86_function_arg_regno_p (REGNO (dst)))
17700 {
17701 /* Is it likely spilled HW register? */
17702 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17703 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17704 *is_spilled = true;
17705 return true;
17706 }
17707 return false;
17708}
17709
17710/* Add output dependencies for chain of function adjacent arguments if only
17711 there is a move to likely spilled HW register. Return first argument
17712 if at least one dependence was added or NULL otherwise. */
17713static rtx_insn *
17714add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17715{
17716 rtx_insn *insn;
17717 rtx_insn *last = call;
17718 rtx_insn *first_arg = NULL;
17719 bool is_spilled = false;
17720
17721 head = PREV_INSN (insn: head);
17722
17723 /* Find nearest to call argument passing instruction. */
17724 while (true)
17725 {
17726 last = PREV_INSN (insn: last);
17727 if (last == head)
17728 return NULL;
17729 if (!NONDEBUG_INSN_P (last))
17730 continue;
17731 if (insn_is_function_arg (insn: last, is_spilled: &is_spilled))
17732 break;
17733 return NULL;
17734 }
17735
17736 first_arg = last;
17737 while (true)
17738 {
17739 insn = PREV_INSN (insn: last);
17740 if (!INSN_P (insn))
17741 break;
17742 if (insn == head)
17743 break;
17744 if (!NONDEBUG_INSN_P (insn))
17745 {
17746 last = insn;
17747 continue;
17748 }
17749 if (insn_is_function_arg (insn, is_spilled: &is_spilled))
17750 {
17751 /* Add output depdendence between two function arguments if chain
17752 of output arguments contains likely spilled HW registers. */
17753 if (is_spilled)
17754 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17755 first_arg = last = insn;
17756 }
17757 else
17758 break;
17759 }
17760 if (!is_spilled)
17761 return NULL;
17762 return first_arg;
17763}
17764
17765/* Add output or anti dependency from insn to first_arg to restrict its code
17766 motion. */
17767static void
17768avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17769{
17770 rtx set;
17771 rtx tmp;
17772
17773 set = single_set (insn);
17774 if (!set)
17775 return;
17776 tmp = SET_DEST (set);
17777 if (REG_P (tmp))
17778 {
17779 /* Add output dependency to the first function argument. */
17780 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17781 return;
17782 }
17783 /* Add anti dependency. */
17784 add_dependence (first_arg, insn, REG_DEP_ANTI);
17785}
17786
17787/* Avoid cross block motion of function argument through adding dependency
17788 from the first non-jump instruction in bb. */
17789static void
17790add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17791{
17792 rtx_insn *insn = BB_END (bb);
17793
17794 while (insn)
17795 {
17796 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17797 {
17798 rtx set = single_set (insn);
17799 if (set)
17800 {
17801 avoid_func_arg_motion (first_arg: arg, insn);
17802 return;
17803 }
17804 }
17805 if (insn == BB_HEAD (bb))
17806 return;
17807 insn = PREV_INSN (insn);
17808 }
17809}
17810
17811/* Hook for pre-reload schedule - avoid motion of function arguments
17812 passed in likely spilled HW registers. */
17813static void
17814ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17815{
17816 rtx_insn *insn;
17817 rtx_insn *first_arg = NULL;
17818 if (reload_completed)
17819 return;
17820 while (head != tail && DEBUG_INSN_P (head))
17821 head = NEXT_INSN (insn: head);
17822 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17823 if (INSN_P (insn) && CALL_P (insn))
17824 {
17825 first_arg = add_parameter_dependencies (call: insn, head);
17826 if (first_arg)
17827 {
17828 /* Add dependee for first argument to predecessors if only
17829 region contains more than one block. */
17830 basic_block bb = BLOCK_FOR_INSN (insn);
17831 int rgn = CONTAINING_RGN (bb->index);
17832 int nr_blks = RGN_NR_BLOCKS (rgn);
17833 /* Skip trivial regions and region head blocks that can have
17834 predecessors outside of region. */
17835 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17836 {
17837 edge e;
17838 edge_iterator ei;
17839
17840 /* Regions are SCCs with the exception of selective
17841 scheduling with pipelining of outer blocks enabled.
17842 So also check that immediate predecessors of a non-head
17843 block are in the same region. */
17844 FOR_EACH_EDGE (e, ei, bb->preds)
17845 {
17846 /* Avoid creating of loop-carried dependencies through
17847 using topological ordering in the region. */
17848 if (rgn == CONTAINING_RGN (e->src->index)
17849 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17850 add_dependee_for_func_arg (arg: first_arg, bb: e->src);
17851 }
17852 }
17853 insn = first_arg;
17854 if (insn == head)
17855 break;
17856 }
17857 }
17858 else if (first_arg)
17859 avoid_func_arg_motion (first_arg, insn);
17860}
17861
17862/* Hook for pre-reload schedule - set priority of moves from likely spilled
17863 HW registers to maximum, to schedule them at soon as possible. These are
17864 moves from function argument registers at the top of the function entry
17865 and moves from function return value registers after call. */
17866static int
17867ix86_adjust_priority (rtx_insn *insn, int priority)
17868{
17869 rtx set;
17870
17871 if (reload_completed)
17872 return priority;
17873
17874 if (!NONDEBUG_INSN_P (insn))
17875 return priority;
17876
17877 set = single_set (insn);
17878 if (set)
17879 {
17880 rtx tmp = SET_SRC (set);
17881 if (REG_P (tmp)
17882 && HARD_REGISTER_P (tmp)
17883 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17884 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17885 return current_sched_info->sched_max_insns_priority;
17886 }
17887
17888 return priority;
17889}
17890
17891/* Prepare for scheduling pass. */
17892static void
17893ix86_sched_init_global (FILE *, int, int)
17894{
17895 /* Install scheduling hooks for current CPU. Some of these hooks are used
17896 in time-critical parts of the scheduler, so we only set them up when
17897 they are actually used. */
17898 switch (ix86_tune)
17899 {
17900 case PROCESSOR_CORE2:
17901 case PROCESSOR_NEHALEM:
17902 case PROCESSOR_SANDYBRIDGE:
17903 case PROCESSOR_HASWELL:
17904 case PROCESSOR_TREMONT:
17905 case PROCESSOR_ALDERLAKE:
17906 case PROCESSOR_GENERIC:
17907 /* Do not perform multipass scheduling for pre-reload schedule
17908 to save compile time. */
17909 if (reload_completed)
17910 {
17911 ix86_core2i7_init_hooks ();
17912 break;
17913 }
17914 /* Fall through. */
17915 default:
17916 targetm.sched.dfa_post_advance_cycle = NULL;
17917 targetm.sched.first_cycle_multipass_init = NULL;
17918 targetm.sched.first_cycle_multipass_begin = NULL;
17919 targetm.sched.first_cycle_multipass_issue = NULL;
17920 targetm.sched.first_cycle_multipass_backtrack = NULL;
17921 targetm.sched.first_cycle_multipass_end = NULL;
17922 targetm.sched.first_cycle_multipass_fini = NULL;
17923 break;
17924 }
17925}
17926
17927
17928/* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17929
17930static HOST_WIDE_INT
17931ix86_static_rtx_alignment (machine_mode mode)
17932{
17933 if (mode == DFmode)
17934 return 64;
17935 if (ALIGN_MODE_128 (mode))
17936 return MAX (128, GET_MODE_ALIGNMENT (mode));
17937 return GET_MODE_ALIGNMENT (mode);
17938}
17939
17940/* Implement TARGET_CONSTANT_ALIGNMENT. */
17941
17942static HOST_WIDE_INT
17943ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17944{
17945 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17946 || TREE_CODE (exp) == INTEGER_CST)
17947 {
17948 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17949 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17950 return MAX (mode_align, align);
17951 }
17952 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17953 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17954 return BITS_PER_WORD;
17955
17956 return align;
17957}
17958
17959/* Implement TARGET_EMPTY_RECORD_P. */
17960
17961static bool
17962ix86_is_empty_record (const_tree type)
17963{
17964 if (!TARGET_64BIT)
17965 return false;
17966 return default_is_empty_record (type);
17967}
17968
17969/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17970
17971static void
17972ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17973{
17974 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
17975
17976 if (!cum->warn_empty)
17977 return;
17978
17979 if (!TYPE_EMPTY_P (type))
17980 return;
17981
17982 /* Don't warn if the function isn't visible outside of the TU. */
17983 if (cum->decl && !TREE_PUBLIC (cum->decl))
17984 return;
17985
17986 const_tree ctx = get_ultimate_context (cum->decl);
17987 if (ctx != NULL_TREE
17988 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17989 return;
17990
17991 /* If the actual size of the type is zero, then there is no change
17992 in how objects of this size are passed. */
17993 if (int_size_in_bytes (type) == 0)
17994 return;
17995
17996 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17997 "changes in %<-fabi-version=12%> (GCC 8)", type);
17998
17999 /* Only warn once. */
18000 cum->warn_empty = false;
18001}
18002
18003/* This hook returns name of multilib ABI. */
18004
18005static const char *
18006ix86_get_multilib_abi_name (void)
18007{
18008 if (!(TARGET_64BIT_P (ix86_isa_flags)))
18009 return "i386";
18010 else if (TARGET_X32_P (ix86_isa_flags))
18011 return "x32";
18012 else
18013 return "x86_64";
18014}
18015
18016/* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18017 the data type, and ALIGN is the alignment that the object would
18018 ordinarily have. */
18019
18020static int
18021iamcu_alignment (tree type, int align)
18022{
18023 machine_mode mode;
18024
18025 if (align < 32 || TYPE_USER_ALIGN (type))
18026 return align;
18027
18028 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18029 bytes. */
18030 type = strip_array_types (type);
18031 if (TYPE_ATOMIC (type))
18032 return align;
18033
18034 mode = TYPE_MODE (type);
18035 switch (GET_MODE_CLASS (mode))
18036 {
18037 case MODE_INT:
18038 case MODE_COMPLEX_INT:
18039 case MODE_COMPLEX_FLOAT:
18040 case MODE_FLOAT:
18041 case MODE_DECIMAL_FLOAT:
18042 return 32;
18043 default:
18044 return align;
18045 }
18046}
18047
18048/* Compute the alignment for a static variable.
18049 TYPE is the data type, and ALIGN is the alignment that
18050 the object would ordinarily have. The value of this function is used
18051 instead of that alignment to align the object. */
18052
18053int
18054ix86_data_alignment (tree type, unsigned int align, bool opt)
18055{
18056 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18057 for symbols from other compilation units or symbols that don't need
18058 to bind locally. In order to preserve some ABI compatibility with
18059 those compilers, ensure we don't decrease alignment from what we
18060 used to assume. */
18061
18062 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18063
18064 /* A data structure, equal or greater than the size of a cache line
18065 (64 bytes in the Pentium 4 and other recent Intel processors, including
18066 processors based on Intel Core microarchitecture) should be aligned
18067 so that its base address is a multiple of a cache line size. */
18068
18069 unsigned int max_align
18070 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18071
18072 if (max_align < BITS_PER_WORD)
18073 max_align = BITS_PER_WORD;
18074
18075 switch (ix86_align_data_type)
18076 {
18077 case ix86_align_data_type_abi: opt = false; break;
18078 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18079 case ix86_align_data_type_cacheline: break;
18080 }
18081
18082 if (TARGET_IAMCU)
18083 align = iamcu_alignment (type, align);
18084
18085 if (opt
18086 && AGGREGATE_TYPE_P (type)
18087 && TYPE_SIZE (type)
18088 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18089 {
18090 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat)
18091 && align < max_align_compat)
18092 align = max_align_compat;
18093 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align)
18094 && align < max_align)
18095 align = max_align;
18096 }
18097
18098 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18099 to 16byte boundary. */
18100 if (TARGET_64BIT)
18101 {
18102 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18103 && TYPE_SIZE (type)
18104 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18105 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18106 && align < 128)
18107 return 128;
18108 }
18109
18110 if (!opt)
18111 return align;
18112
18113 if (TREE_CODE (type) == ARRAY_TYPE)
18114 {
18115 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18116 return 64;
18117 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18118 return 128;
18119 }
18120 else if (TREE_CODE (type) == COMPLEX_TYPE)
18121 {
18122
18123 if (TYPE_MODE (type) == DCmode && align < 64)
18124 return 64;
18125 if ((TYPE_MODE (type) == XCmode
18126 || TYPE_MODE (type) == TCmode) && align < 128)
18127 return 128;
18128 }
18129 else if (RECORD_OR_UNION_TYPE_P (type)
18130 && TYPE_FIELDS (type))
18131 {
18132 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18133 return 64;
18134 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18135 return 128;
18136 }
18137 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18138 || TREE_CODE (type) == INTEGER_TYPE)
18139 {
18140 if (TYPE_MODE (type) == DFmode && align < 64)
18141 return 64;
18142 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18143 return 128;
18144 }
18145
18146 return align;
18147}
18148
18149/* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18150static void
18151ix86_lower_local_decl_alignment (tree decl)
18152{
18153 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18154 DECL_ALIGN (decl), true);
18155 if (new_align < DECL_ALIGN (decl))
18156 SET_DECL_ALIGN (decl, new_align);
18157}
18158
18159/* Compute the alignment for a local variable or a stack slot. EXP is
18160 the data type or decl itself, MODE is the widest mode available and
18161 ALIGN is the alignment that the object would ordinarily have. The
18162 value of this macro is used instead of that alignment to align the
18163 object. */
18164
18165unsigned int
18166ix86_local_alignment (tree exp, machine_mode mode,
18167 unsigned int align, bool may_lower)
18168{
18169 tree type, decl;
18170
18171 if (exp && DECL_P (exp))
18172 {
18173 type = TREE_TYPE (exp);
18174 decl = exp;
18175 }
18176 else
18177 {
18178 type = exp;
18179 decl = NULL;
18180 }
18181
18182 /* Don't do dynamic stack realignment for long long objects with
18183 -mpreferred-stack-boundary=2. */
18184 if (may_lower
18185 && !TARGET_64BIT
18186 && align == 64
18187 && ix86_preferred_stack_boundary < 64
18188 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18189 && (!type || (!TYPE_USER_ALIGN (type)
18190 && !TYPE_ATOMIC (strip_array_types (type))))
18191 && (!decl || !DECL_USER_ALIGN (decl)))
18192 align = 32;
18193
18194 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18195 register in MODE. We will return the largest alignment of XF
18196 and DF. */
18197 if (!type)
18198 {
18199 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18200 align = GET_MODE_ALIGNMENT (DFmode);
18201 return align;
18202 }
18203
18204 /* Don't increase alignment for Intel MCU psABI. */
18205 if (TARGET_IAMCU)
18206 return align;
18207
18208 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18209 to 16byte boundary. Exact wording is:
18210
18211 An array uses the same alignment as its elements, except that a local or
18212 global array variable of length at least 16 bytes or
18213 a C99 variable-length array variable always has alignment of at least 16 bytes.
18214
18215 This was added to allow use of aligned SSE instructions at arrays. This
18216 rule is meant for static storage (where compiler cannot do the analysis
18217 by itself). We follow it for automatic variables only when convenient.
18218 We fully control everything in the function compiled and functions from
18219 other unit cannot rely on the alignment.
18220
18221 Exclude va_list type. It is the common case of local array where
18222 we cannot benefit from the alignment.
18223
18224 TODO: Probably one should optimize for size only when var is not escaping. */
18225 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18226 && TARGET_SSE)
18227 {
18228 if (AGGREGATE_TYPE_P (type)
18229 && (va_list_type_node == NULL_TREE
18230 || (TYPE_MAIN_VARIANT (type)
18231 != TYPE_MAIN_VARIANT (va_list_type_node)))
18232 && TYPE_SIZE (type)
18233 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18234 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18235 && align < 128)
18236 return 128;
18237 }
18238 if (TREE_CODE (type) == ARRAY_TYPE)
18239 {
18240 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18241 return 64;
18242 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18243 return 128;
18244 }
18245 else if (TREE_CODE (type) == COMPLEX_TYPE)
18246 {
18247 if (TYPE_MODE (type) == DCmode && align < 64)
18248 return 64;
18249 if ((TYPE_MODE (type) == XCmode
18250 || TYPE_MODE (type) == TCmode) && align < 128)
18251 return 128;
18252 }
18253 else if (RECORD_OR_UNION_TYPE_P (type)
18254 && TYPE_FIELDS (type))
18255 {
18256 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18257 return 64;
18258 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18259 return 128;
18260 }
18261 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18262 || TREE_CODE (type) == INTEGER_TYPE)
18263 {
18264
18265 if (TYPE_MODE (type) == DFmode && align < 64)
18266 return 64;
18267 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18268 return 128;
18269 }
18270 return align;
18271}
18272
18273/* Compute the minimum required alignment for dynamic stack realignment
18274 purposes for a local variable, parameter or a stack slot. EXP is
18275 the data type or decl itself, MODE is its mode and ALIGN is the
18276 alignment that the object would ordinarily have. */
18277
18278unsigned int
18279ix86_minimum_alignment (tree exp, machine_mode mode,
18280 unsigned int align)
18281{
18282 tree type, decl;
18283
18284 if (exp && DECL_P (exp))
18285 {
18286 type = TREE_TYPE (exp);
18287 decl = exp;
18288 }
18289 else
18290 {
18291 type = exp;
18292 decl = NULL;
18293 }
18294
18295 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18296 return align;
18297
18298 /* Don't do dynamic stack realignment for long long objects with
18299 -mpreferred-stack-boundary=2. */
18300 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18301 && (!type || (!TYPE_USER_ALIGN (type)
18302 && !TYPE_ATOMIC (strip_array_types (type))))
18303 && (!decl || !DECL_USER_ALIGN (decl)))
18304 {
18305 gcc_checking_assert (!TARGET_STV);
18306 return 32;
18307 }
18308
18309 return align;
18310}
18311
18312/* Find a location for the static chain incoming to a nested function.
18313 This is a register, unless all free registers are used by arguments. */
18314
18315static rtx
18316ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18317{
18318 unsigned regno;
18319
18320 if (TARGET_64BIT)
18321 {
18322 /* We always use R10 in 64-bit mode. */
18323 regno = R10_REG;
18324 }
18325 else
18326 {
18327 const_tree fntype, fndecl;
18328 unsigned int ccvt;
18329
18330 /* By default in 32-bit mode we use ECX to pass the static chain. */
18331 regno = CX_REG;
18332
18333 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18334 {
18335 fntype = TREE_TYPE (fndecl_or_type);
18336 fndecl = fndecl_or_type;
18337 }
18338 else
18339 {
18340 fntype = fndecl_or_type;
18341 fndecl = NULL;
18342 }
18343
18344 ccvt = ix86_get_callcvt (type: fntype);
18345 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18346 {
18347 /* Fastcall functions use ecx/edx for arguments, which leaves
18348 us with EAX for the static chain.
18349 Thiscall functions use ecx for arguments, which also
18350 leaves us with EAX for the static chain. */
18351 regno = AX_REG;
18352 }
18353 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18354 {
18355 /* Thiscall functions use ecx for arguments, which leaves
18356 us with EAX and EDX for the static chain.
18357 We are using for abi-compatibility EAX. */
18358 regno = AX_REG;
18359 }
18360 else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3)
18361 {
18362 /* For regparm 3, we have no free call-clobbered registers in
18363 which to store the static chain. In order to implement this,
18364 we have the trampoline push the static chain to the stack.
18365 However, we can't push a value below the return address when
18366 we call the nested function directly, so we have to use an
18367 alternate entry point. For this we use ESI, and have the
18368 alternate entry point push ESI, so that things appear the
18369 same once we're executing the nested function. */
18370 if (incoming_p)
18371 {
18372 if (fndecl == current_function_decl
18373 && !ix86_static_chain_on_stack)
18374 {
18375 gcc_assert (!reload_completed);
18376 ix86_static_chain_on_stack = true;
18377 }
18378 return gen_frame_mem (SImode,
18379 plus_constant (Pmode,
18380 arg_pointer_rtx, -8));
18381 }
18382 regno = SI_REG;
18383 }
18384 }
18385
18386 return gen_rtx_REG (Pmode, regno);
18387}
18388
18389/* Emit RTL insns to initialize the variable parts of a trampoline.
18390 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18391 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18392 to be passed to the target function. */
18393
18394static void
18395ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18396{
18397 rtx mem, fnaddr;
18398 int opcode;
18399 int offset = 0;
18400 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18401
18402 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18403
18404 if (TARGET_64BIT)
18405 {
18406 int size;
18407
18408 if (need_endbr)
18409 {
18410 /* Insert ENDBR64. */
18411 mem = adjust_address (m_tramp, SImode, offset);
18412 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18413 offset += 4;
18414 }
18415
18416 /* Load the function address to r11. Try to load address using
18417 the shorter movl instead of movabs. We may want to support
18418 movq for kernel mode, but kernel does not use trampolines at
18419 the moment. FNADDR is a 32bit address and may not be in
18420 DImode when ptr_mode == SImode. Always use movl in this
18421 case. */
18422 if (ptr_mode == SImode
18423 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18424 {
18425 fnaddr = copy_addr_to_reg (fnaddr);
18426
18427 mem = adjust_address (m_tramp, HImode, offset);
18428 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18429
18430 mem = adjust_address (m_tramp, SImode, offset + 2);
18431 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18432 offset += 6;
18433 }
18434 else
18435 {
18436 mem = adjust_address (m_tramp, HImode, offset);
18437 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18438
18439 mem = adjust_address (m_tramp, DImode, offset + 2);
18440 emit_move_insn (mem, fnaddr);
18441 offset += 10;
18442 }
18443
18444 /* Load static chain using movabs to r10. Use the shorter movl
18445 instead of movabs when ptr_mode == SImode. */
18446 if (ptr_mode == SImode)
18447 {
18448 opcode = 0xba41;
18449 size = 6;
18450 }
18451 else
18452 {
18453 opcode = 0xba49;
18454 size = 10;
18455 }
18456
18457 mem = adjust_address (m_tramp, HImode, offset);
18458 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18459
18460 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18461 emit_move_insn (mem, chain_value);
18462 offset += size;
18463
18464 /* Jump to r11; the last (unused) byte is a nop, only there to
18465 pad the write out to a single 32-bit store. */
18466 mem = adjust_address (m_tramp, SImode, offset);
18467 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18468 offset += 4;
18469 }
18470 else
18471 {
18472 rtx disp, chain;
18473
18474 /* Depending on the static chain location, either load a register
18475 with a constant, or push the constant to the stack. All of the
18476 instructions are the same size. */
18477 chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true);
18478 if (REG_P (chain))
18479 {
18480 switch (REGNO (chain))
18481 {
18482 case AX_REG:
18483 opcode = 0xb8; break;
18484 case CX_REG:
18485 opcode = 0xb9; break;
18486 default:
18487 gcc_unreachable ();
18488 }
18489 }
18490 else
18491 opcode = 0x68;
18492
18493 if (need_endbr)
18494 {
18495 /* Insert ENDBR32. */
18496 mem = adjust_address (m_tramp, SImode, offset);
18497 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18498 offset += 4;
18499 }
18500
18501 mem = adjust_address (m_tramp, QImode, offset);
18502 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18503
18504 mem = adjust_address (m_tramp, SImode, offset + 1);
18505 emit_move_insn (mem, chain_value);
18506 offset += 5;
18507
18508 mem = adjust_address (m_tramp, QImode, offset);
18509 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18510
18511 mem = adjust_address (m_tramp, SImode, offset + 1);
18512
18513 /* Compute offset from the end of the jmp to the target function.
18514 In the case in which the trampoline stores the static chain on
18515 the stack, we need to skip the first insn which pushes the
18516 (call-saved) register static chain; this push is 1 byte. */
18517 offset += 5;
18518 int skip = MEM_P (chain) ? 1 : 0;
18519 /* Skip ENDBR32 at the entry of the target function. */
18520 if (need_endbr
18521 && !cgraph_node::get (decl: fndecl)->only_called_directly_p ())
18522 skip += 4;
18523 disp = expand_binop (SImode, sub_optab, fnaddr,
18524 plus_constant (Pmode, XEXP (m_tramp, 0),
18525 offset - skip),
18526 NULL_RTX, 1, OPTAB_DIRECT);
18527 emit_move_insn (mem, disp);
18528 }
18529
18530 gcc_assert (offset <= TRAMPOLINE_SIZE);
18531
18532#ifdef HAVE_ENABLE_EXECUTE_STACK
18533#ifdef CHECK_EXECUTE_STACK_ENABLED
18534 if (CHECK_EXECUTE_STACK_ENABLED)
18535#endif
18536 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18537 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18538#endif
18539}
18540
18541static bool
18542ix86_allocate_stack_slots_for_args (void)
18543{
18544 /* Naked functions should not allocate stack slots for arguments. */
18545 return !ix86_function_naked (fn: current_function_decl);
18546}
18547
18548static bool
18549ix86_warn_func_return (tree decl)
18550{
18551 /* Naked functions are implemented entirely in assembly, including the
18552 return sequence, so suppress warnings about this. */
18553 return !ix86_function_naked (fn: decl);
18554}
18555
18556/* Return the shift count of a vector by scalar shift builtin second argument
18557 ARG1. */
18558static tree
18559ix86_vector_shift_count (tree arg1)
18560{
18561 if (tree_fits_uhwi_p (arg1))
18562 return arg1;
18563 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18564 {
18565 /* The count argument is weird, passed in as various 128-bit
18566 (or 64-bit) vectors, the low 64 bits from it are the count. */
18567 unsigned char buf[16];
18568 int len = native_encode_expr (arg1, buf, 16);
18569 if (len == 0)
18570 return NULL_TREE;
18571 tree t = native_interpret_expr (uint64_type_node, buf, len);
18572 if (t && tree_fits_uhwi_p (t))
18573 return t;
18574 }
18575 return NULL_TREE;
18576}
18577
18578/* Return true if arg_mask is all ones, ELEMS is elements number of
18579 corresponding vector. */
18580static bool
18581ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18582{
18583 if (TREE_CODE (arg_mask) != INTEGER_CST)
18584 return false;
18585
18586 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18587 if (elems == HOST_BITS_PER_WIDE_INT)
18588 return mask == HOST_WIDE_INT_M1U;
18589 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18590 return false;
18591
18592 return true;
18593}
18594
18595static tree
18596ix86_fold_builtin (tree fndecl, int n_args,
18597 tree *args, bool ignore ATTRIBUTE_UNUSED)
18598{
18599 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18600 {
18601 enum ix86_builtins fn_code
18602 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
18603 enum rtx_code rcode;
18604 bool is_vshift;
18605 unsigned HOST_WIDE_INT mask;
18606
18607 switch (fn_code)
18608 {
18609 case IX86_BUILTIN_CPU_IS:
18610 case IX86_BUILTIN_CPU_SUPPORTS:
18611 gcc_assert (n_args == 1);
18612 return fold_builtin_cpu (fndecl, args);
18613
18614 case IX86_BUILTIN_NANQ:
18615 case IX86_BUILTIN_NANSQ:
18616 {
18617 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18618 const char *str = c_getstr (*args);
18619 int quiet = fn_code == IX86_BUILTIN_NANQ;
18620 REAL_VALUE_TYPE real;
18621
18622 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18623 return build_real (type, real);
18624 return NULL_TREE;
18625 }
18626
18627 case IX86_BUILTIN_INFQ:
18628 case IX86_BUILTIN_HUGE_VALQ:
18629 {
18630 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18631 REAL_VALUE_TYPE inf;
18632 real_inf (&inf);
18633 return build_real (type, inf);
18634 }
18635
18636 case IX86_BUILTIN_TZCNT16:
18637 case IX86_BUILTIN_CTZS:
18638 case IX86_BUILTIN_TZCNT32:
18639 case IX86_BUILTIN_TZCNT64:
18640 gcc_assert (n_args == 1);
18641 if (TREE_CODE (args[0]) == INTEGER_CST)
18642 {
18643 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18644 tree arg = args[0];
18645 if (fn_code == IX86_BUILTIN_TZCNT16
18646 || fn_code == IX86_BUILTIN_CTZS)
18647 arg = fold_convert (short_unsigned_type_node, arg);
18648 if (integer_zerop (arg))
18649 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18650 else
18651 return fold_const_call (CFN_CTZ, type, arg);
18652 }
18653 break;
18654
18655 case IX86_BUILTIN_LZCNT16:
18656 case IX86_BUILTIN_CLZS:
18657 case IX86_BUILTIN_LZCNT32:
18658 case IX86_BUILTIN_LZCNT64:
18659 gcc_assert (n_args == 1);
18660 if (TREE_CODE (args[0]) == INTEGER_CST)
18661 {
18662 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18663 tree arg = args[0];
18664 if (fn_code == IX86_BUILTIN_LZCNT16
18665 || fn_code == IX86_BUILTIN_CLZS)
18666 arg = fold_convert (short_unsigned_type_node, arg);
18667 if (integer_zerop (arg))
18668 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18669 else
18670 return fold_const_call (CFN_CLZ, type, arg);
18671 }
18672 break;
18673
18674 case IX86_BUILTIN_BEXTR32:
18675 case IX86_BUILTIN_BEXTR64:
18676 case IX86_BUILTIN_BEXTRI32:
18677 case IX86_BUILTIN_BEXTRI64:
18678 gcc_assert (n_args == 2);
18679 if (tree_fits_uhwi_p (args[1]))
18680 {
18681 unsigned HOST_WIDE_INT res = 0;
18682 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18683 unsigned int start = tree_to_uhwi (args[1]);
18684 unsigned int len = (start & 0xff00) >> 8;
18685 start &= 0xff;
18686 if (start >= prec || len == 0)
18687 res = 0;
18688 else if (!tree_fits_uhwi_p (args[0]))
18689 break;
18690 else
18691 res = tree_to_uhwi (args[0]) >> start;
18692 if (len > prec)
18693 len = prec;
18694 if (len < HOST_BITS_PER_WIDE_INT)
18695 res &= (HOST_WIDE_INT_1U << len) - 1;
18696 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18697 }
18698 break;
18699
18700 case IX86_BUILTIN_BZHI32:
18701 case IX86_BUILTIN_BZHI64:
18702 gcc_assert (n_args == 2);
18703 if (tree_fits_uhwi_p (args[1]))
18704 {
18705 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18706 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18707 return args[0];
18708 if (idx == 0)
18709 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
18710 if (!tree_fits_uhwi_p (args[0]))
18711 break;
18712 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18713 res &= ~(HOST_WIDE_INT_M1U << idx);
18714 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18715 }
18716 break;
18717
18718 case IX86_BUILTIN_PDEP32:
18719 case IX86_BUILTIN_PDEP64:
18720 gcc_assert (n_args == 2);
18721 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18722 {
18723 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18724 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18725 unsigned HOST_WIDE_INT res = 0;
18726 unsigned HOST_WIDE_INT m, k = 1;
18727 for (m = 1; m; m <<= 1)
18728 if ((mask & m) != 0)
18729 {
18730 if ((src & k) != 0)
18731 res |= m;
18732 k <<= 1;
18733 }
18734 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18735 }
18736 break;
18737
18738 case IX86_BUILTIN_PEXT32:
18739 case IX86_BUILTIN_PEXT64:
18740 gcc_assert (n_args == 2);
18741 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18742 {
18743 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18744 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18745 unsigned HOST_WIDE_INT res = 0;
18746 unsigned HOST_WIDE_INT m, k = 1;
18747 for (m = 1; m; m <<= 1)
18748 if ((mask & m) != 0)
18749 {
18750 if ((src & m) != 0)
18751 res |= k;
18752 k <<= 1;
18753 }
18754 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18755 }
18756 break;
18757
18758 case IX86_BUILTIN_MOVMSKPS:
18759 case IX86_BUILTIN_PMOVMSKB:
18760 case IX86_BUILTIN_MOVMSKPD:
18761 case IX86_BUILTIN_PMOVMSKB128:
18762 case IX86_BUILTIN_MOVMSKPD256:
18763 case IX86_BUILTIN_MOVMSKPS256:
18764 case IX86_BUILTIN_PMOVMSKB256:
18765 gcc_assert (n_args == 1);
18766 if (TREE_CODE (args[0]) == VECTOR_CST)
18767 {
18768 HOST_WIDE_INT res = 0;
18769 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18770 {
18771 tree e = VECTOR_CST_ELT (args[0], i);
18772 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18773 {
18774 if (wi::neg_p (x: wi::to_wide (t: e)))
18775 res |= HOST_WIDE_INT_1 << i;
18776 }
18777 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18778 {
18779 if (TREE_REAL_CST (e).sign)
18780 res |= HOST_WIDE_INT_1 << i;
18781 }
18782 else
18783 return NULL_TREE;
18784 }
18785 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18786 }
18787 break;
18788
18789 case IX86_BUILTIN_PSLLD:
18790 case IX86_BUILTIN_PSLLD128:
18791 case IX86_BUILTIN_PSLLD128_MASK:
18792 case IX86_BUILTIN_PSLLD256:
18793 case IX86_BUILTIN_PSLLD256_MASK:
18794 case IX86_BUILTIN_PSLLD512:
18795 case IX86_BUILTIN_PSLLDI:
18796 case IX86_BUILTIN_PSLLDI128:
18797 case IX86_BUILTIN_PSLLDI128_MASK:
18798 case IX86_BUILTIN_PSLLDI256:
18799 case IX86_BUILTIN_PSLLDI256_MASK:
18800 case IX86_BUILTIN_PSLLDI512:
18801 case IX86_BUILTIN_PSLLQ:
18802 case IX86_BUILTIN_PSLLQ128:
18803 case IX86_BUILTIN_PSLLQ128_MASK:
18804 case IX86_BUILTIN_PSLLQ256:
18805 case IX86_BUILTIN_PSLLQ256_MASK:
18806 case IX86_BUILTIN_PSLLQ512:
18807 case IX86_BUILTIN_PSLLQI:
18808 case IX86_BUILTIN_PSLLQI128:
18809 case IX86_BUILTIN_PSLLQI128_MASK:
18810 case IX86_BUILTIN_PSLLQI256:
18811 case IX86_BUILTIN_PSLLQI256_MASK:
18812 case IX86_BUILTIN_PSLLQI512:
18813 case IX86_BUILTIN_PSLLW:
18814 case IX86_BUILTIN_PSLLW128:
18815 case IX86_BUILTIN_PSLLW128_MASK:
18816 case IX86_BUILTIN_PSLLW256:
18817 case IX86_BUILTIN_PSLLW256_MASK:
18818 case IX86_BUILTIN_PSLLW512_MASK:
18819 case IX86_BUILTIN_PSLLWI:
18820 case IX86_BUILTIN_PSLLWI128:
18821 case IX86_BUILTIN_PSLLWI128_MASK:
18822 case IX86_BUILTIN_PSLLWI256:
18823 case IX86_BUILTIN_PSLLWI256_MASK:
18824 case IX86_BUILTIN_PSLLWI512_MASK:
18825 rcode = ASHIFT;
18826 is_vshift = false;
18827 goto do_shift;
18828 case IX86_BUILTIN_PSRAD:
18829 case IX86_BUILTIN_PSRAD128:
18830 case IX86_BUILTIN_PSRAD128_MASK:
18831 case IX86_BUILTIN_PSRAD256:
18832 case IX86_BUILTIN_PSRAD256_MASK:
18833 case IX86_BUILTIN_PSRAD512:
18834 case IX86_BUILTIN_PSRADI:
18835 case IX86_BUILTIN_PSRADI128:
18836 case IX86_BUILTIN_PSRADI128_MASK:
18837 case IX86_BUILTIN_PSRADI256:
18838 case IX86_BUILTIN_PSRADI256_MASK:
18839 case IX86_BUILTIN_PSRADI512:
18840 case IX86_BUILTIN_PSRAQ128_MASK:
18841 case IX86_BUILTIN_PSRAQ256_MASK:
18842 case IX86_BUILTIN_PSRAQ512:
18843 case IX86_BUILTIN_PSRAQI128_MASK:
18844 case IX86_BUILTIN_PSRAQI256_MASK:
18845 case IX86_BUILTIN_PSRAQI512:
18846 case IX86_BUILTIN_PSRAW:
18847 case IX86_BUILTIN_PSRAW128:
18848 case IX86_BUILTIN_PSRAW128_MASK:
18849 case IX86_BUILTIN_PSRAW256:
18850 case IX86_BUILTIN_PSRAW256_MASK:
18851 case IX86_BUILTIN_PSRAW512:
18852 case IX86_BUILTIN_PSRAWI:
18853 case IX86_BUILTIN_PSRAWI128:
18854 case IX86_BUILTIN_PSRAWI128_MASK:
18855 case IX86_BUILTIN_PSRAWI256:
18856 case IX86_BUILTIN_PSRAWI256_MASK:
18857 case IX86_BUILTIN_PSRAWI512:
18858 rcode = ASHIFTRT;
18859 is_vshift = false;
18860 goto do_shift;
18861 case IX86_BUILTIN_PSRLD:
18862 case IX86_BUILTIN_PSRLD128:
18863 case IX86_BUILTIN_PSRLD128_MASK:
18864 case IX86_BUILTIN_PSRLD256:
18865 case IX86_BUILTIN_PSRLD256_MASK:
18866 case IX86_BUILTIN_PSRLD512:
18867 case IX86_BUILTIN_PSRLDI:
18868 case IX86_BUILTIN_PSRLDI128:
18869 case IX86_BUILTIN_PSRLDI128_MASK:
18870 case IX86_BUILTIN_PSRLDI256:
18871 case IX86_BUILTIN_PSRLDI256_MASK:
18872 case IX86_BUILTIN_PSRLDI512:
18873 case IX86_BUILTIN_PSRLQ:
18874 case IX86_BUILTIN_PSRLQ128:
18875 case IX86_BUILTIN_PSRLQ128_MASK:
18876 case IX86_BUILTIN_PSRLQ256:
18877 case IX86_BUILTIN_PSRLQ256_MASK:
18878 case IX86_BUILTIN_PSRLQ512:
18879 case IX86_BUILTIN_PSRLQI:
18880 case IX86_BUILTIN_PSRLQI128:
18881 case IX86_BUILTIN_PSRLQI128_MASK:
18882 case IX86_BUILTIN_PSRLQI256:
18883 case IX86_BUILTIN_PSRLQI256_MASK:
18884 case IX86_BUILTIN_PSRLQI512:
18885 case IX86_BUILTIN_PSRLW:
18886 case IX86_BUILTIN_PSRLW128:
18887 case IX86_BUILTIN_PSRLW128_MASK:
18888 case IX86_BUILTIN_PSRLW256:
18889 case IX86_BUILTIN_PSRLW256_MASK:
18890 case IX86_BUILTIN_PSRLW512:
18891 case IX86_BUILTIN_PSRLWI:
18892 case IX86_BUILTIN_PSRLWI128:
18893 case IX86_BUILTIN_PSRLWI128_MASK:
18894 case IX86_BUILTIN_PSRLWI256:
18895 case IX86_BUILTIN_PSRLWI256_MASK:
18896 case IX86_BUILTIN_PSRLWI512:
18897 rcode = LSHIFTRT;
18898 is_vshift = false;
18899 goto do_shift;
18900 case IX86_BUILTIN_PSLLVV16HI:
18901 case IX86_BUILTIN_PSLLVV16SI:
18902 case IX86_BUILTIN_PSLLVV2DI:
18903 case IX86_BUILTIN_PSLLVV2DI_MASK:
18904 case IX86_BUILTIN_PSLLVV32HI:
18905 case IX86_BUILTIN_PSLLVV4DI:
18906 case IX86_BUILTIN_PSLLVV4DI_MASK:
18907 case IX86_BUILTIN_PSLLVV4SI:
18908 case IX86_BUILTIN_PSLLVV4SI_MASK:
18909 case IX86_BUILTIN_PSLLVV8DI:
18910 case IX86_BUILTIN_PSLLVV8HI:
18911 case IX86_BUILTIN_PSLLVV8SI:
18912 case IX86_BUILTIN_PSLLVV8SI_MASK:
18913 rcode = ASHIFT;
18914 is_vshift = true;
18915 goto do_shift;
18916 case IX86_BUILTIN_PSRAVQ128:
18917 case IX86_BUILTIN_PSRAVQ256:
18918 case IX86_BUILTIN_PSRAVV16HI:
18919 case IX86_BUILTIN_PSRAVV16SI:
18920 case IX86_BUILTIN_PSRAVV32HI:
18921 case IX86_BUILTIN_PSRAVV4SI:
18922 case IX86_BUILTIN_PSRAVV4SI_MASK:
18923 case IX86_BUILTIN_PSRAVV8DI:
18924 case IX86_BUILTIN_PSRAVV8HI:
18925 case IX86_BUILTIN_PSRAVV8SI:
18926 case IX86_BUILTIN_PSRAVV8SI_MASK:
18927 rcode = ASHIFTRT;
18928 is_vshift = true;
18929 goto do_shift;
18930 case IX86_BUILTIN_PSRLVV16HI:
18931 case IX86_BUILTIN_PSRLVV16SI:
18932 case IX86_BUILTIN_PSRLVV2DI:
18933 case IX86_BUILTIN_PSRLVV2DI_MASK:
18934 case IX86_BUILTIN_PSRLVV32HI:
18935 case IX86_BUILTIN_PSRLVV4DI:
18936 case IX86_BUILTIN_PSRLVV4DI_MASK:
18937 case IX86_BUILTIN_PSRLVV4SI:
18938 case IX86_BUILTIN_PSRLVV4SI_MASK:
18939 case IX86_BUILTIN_PSRLVV8DI:
18940 case IX86_BUILTIN_PSRLVV8HI:
18941 case IX86_BUILTIN_PSRLVV8SI:
18942 case IX86_BUILTIN_PSRLVV8SI_MASK:
18943 rcode = LSHIFTRT;
18944 is_vshift = true;
18945 goto do_shift;
18946
18947 do_shift:
18948 gcc_assert (n_args >= 2);
18949 if (TREE_CODE (args[0]) != VECTOR_CST)
18950 break;
18951 mask = HOST_WIDE_INT_M1U;
18952 if (n_args > 2)
18953 {
18954 /* This is masked shift. */
18955 if (!tree_fits_uhwi_p (args[n_args - 1])
18956 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18957 break;
18958 mask = tree_to_uhwi (args[n_args - 1]);
18959 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18960 mask |= HOST_WIDE_INT_M1U << elems;
18961 if (mask != HOST_WIDE_INT_M1U
18962 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18963 break;
18964 if (mask == (HOST_WIDE_INT_M1U << elems))
18965 return args[n_args - 2];
18966 }
18967 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18968 break;
18969 if (tree tem = (is_vshift ? integer_one_node
18970 : ix86_vector_shift_count (arg1: args[1])))
18971 {
18972 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18973 unsigned HOST_WIDE_INT prec
18974 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18975 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18976 return args[0];
18977 if (count >= prec)
18978 {
18979 if (rcode == ASHIFTRT)
18980 count = prec - 1;
18981 else if (mask == HOST_WIDE_INT_M1U)
18982 return build_zero_cst (TREE_TYPE (args[0]));
18983 }
18984 tree countt = NULL_TREE;
18985 if (!is_vshift)
18986 {
18987 if (count >= prec)
18988 countt = integer_zero_node;
18989 else
18990 countt = build_int_cst (integer_type_node, count);
18991 }
18992 tree_vector_builder builder;
18993 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18994 builder.new_vector (TREE_TYPE (args[0]),
18995 npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18996 nelts_per_pattern: 1);
18997 else
18998 builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0],
18999 allow_stepped_p: false);
19000 unsigned int cnt = builder.encoded_nelts ();
19001 for (unsigned int i = 0; i < cnt; ++i)
19002 {
19003 tree elt = VECTOR_CST_ELT (args[0], i);
19004 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19005 return NULL_TREE;
19006 tree type = TREE_TYPE (elt);
19007 if (rcode == LSHIFTRT)
19008 elt = fold_convert (unsigned_type_for (type), elt);
19009 if (is_vshift)
19010 {
19011 countt = VECTOR_CST_ELT (args[1], i);
19012 if (TREE_CODE (countt) != INTEGER_CST
19013 || TREE_OVERFLOW (countt))
19014 return NULL_TREE;
19015 if (wi::neg_p (x: wi::to_wide (t: countt))
19016 || wi::to_widest (t: countt) >= prec)
19017 {
19018 if (rcode == ASHIFTRT)
19019 countt = build_int_cst (TREE_TYPE (countt),
19020 prec - 1);
19021 else
19022 {
19023 elt = build_zero_cst (TREE_TYPE (elt));
19024 countt = build_zero_cst (TREE_TYPE (countt));
19025 }
19026 }
19027 }
19028 else if (count >= prec)
19029 elt = build_zero_cst (TREE_TYPE (elt));
19030 elt = const_binop (rcode == ASHIFT
19031 ? LSHIFT_EXPR : RSHIFT_EXPR,
19032 TREE_TYPE (elt), elt, countt);
19033 if (!elt || TREE_CODE (elt) != INTEGER_CST)
19034 return NULL_TREE;
19035 if (rcode == LSHIFTRT)
19036 elt = fold_convert (type, elt);
19037 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19038 {
19039 elt = VECTOR_CST_ELT (args[n_args - 2], i);
19040 if (TREE_CODE (elt) != INTEGER_CST
19041 || TREE_OVERFLOW (elt))
19042 return NULL_TREE;
19043 }
19044 builder.quick_push (obj: elt);
19045 }
19046 return builder.build ();
19047 }
19048 break;
19049
19050 default:
19051 break;
19052 }
19053 }
19054
19055#ifdef SUBTARGET_FOLD_BUILTIN
19056 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19057#endif
19058
19059 return NULL_TREE;
19060}
19061
19062/* Fold a MD builtin (use ix86_fold_builtin for folding into
19063 constant) in GIMPLE. */
19064
19065bool
19066ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19067{
19068 gimple *stmt = gsi_stmt (i: *gsi), *g;
19069 gimple_seq stmts = NULL;
19070 tree fndecl = gimple_call_fndecl (gs: stmt);
19071 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19072 int n_args = gimple_call_num_args (gs: stmt);
19073 enum ix86_builtins fn_code
19074 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
19075 tree decl = NULL_TREE;
19076 tree arg0, arg1, arg2;
19077 enum rtx_code rcode;
19078 enum tree_code tcode;
19079 unsigned HOST_WIDE_INT count;
19080 bool is_vshift;
19081 unsigned HOST_WIDE_INT elems;
19082 location_t loc;
19083
19084 /* Don't fold when there's isa mismatch. */
19085 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19086 return false;
19087
19088 switch (fn_code)
19089 {
19090 case IX86_BUILTIN_TZCNT32:
19091 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ);
19092 goto fold_tzcnt_lzcnt;
19093
19094 case IX86_BUILTIN_TZCNT64:
19095 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL);
19096 goto fold_tzcnt_lzcnt;
19097
19098 case IX86_BUILTIN_LZCNT32:
19099 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ);
19100 goto fold_tzcnt_lzcnt;
19101
19102 case IX86_BUILTIN_LZCNT64:
19103 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL);
19104 goto fold_tzcnt_lzcnt;
19105
19106 fold_tzcnt_lzcnt:
19107 gcc_assert (n_args == 1);
19108 arg0 = gimple_call_arg (gs: stmt, index: 0);
19109 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt))
19110 {
19111 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19112 /* If arg0 is provably non-zero, optimize into generic
19113 __builtin_c[tl]z{,ll} function the middle-end handles
19114 better. */
19115 if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec)))
19116 return false;
19117
19118 loc = gimple_location (g: stmt);
19119 g = gimple_build_call (decl, 1, arg0);
19120 gimple_set_location (g, location: loc);
19121 tree lhs = make_ssa_name (integer_type_node);
19122 gimple_call_set_lhs (gs: g, lhs);
19123 gsi_insert_before (gsi, g, GSI_SAME_STMT);
19124 g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs);
19125 gimple_set_location (g, location: loc);
19126 gsi_replace (gsi, g, false);
19127 return true;
19128 }
19129 break;
19130
19131 case IX86_BUILTIN_BZHI32:
19132 case IX86_BUILTIN_BZHI64:
19133 gcc_assert (n_args == 2);
19134 arg1 = gimple_call_arg (gs: stmt, index: 1);
19135 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt))
19136 {
19137 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19138 arg0 = gimple_call_arg (gs: stmt, index: 0);
19139 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19140 break;
19141 loc = gimple_location (g: stmt);
19142 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19143 gimple_set_location (g, location: loc);
19144 gsi_replace (gsi, g, false);
19145 return true;
19146 }
19147 break;
19148
19149 case IX86_BUILTIN_PDEP32:
19150 case IX86_BUILTIN_PDEP64:
19151 case IX86_BUILTIN_PEXT32:
19152 case IX86_BUILTIN_PEXT64:
19153 gcc_assert (n_args == 2);
19154 arg1 = gimple_call_arg (gs: stmt, index: 1);
19155 if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt))
19156 {
19157 loc = gimple_location (g: stmt);
19158 arg0 = gimple_call_arg (gs: stmt, index: 0);
19159 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19160 gimple_set_location (g, location: loc);
19161 gsi_replace (gsi, g, false);
19162 return true;
19163 }
19164 break;
19165
19166 case IX86_BUILTIN_PBLENDVB256:
19167 case IX86_BUILTIN_BLENDVPS256:
19168 case IX86_BUILTIN_BLENDVPD256:
19169 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19170 to scalar operations and not combined back. */
19171 if (!TARGET_AVX2)
19172 break;
19173
19174 /* FALLTHRU. */
19175 case IX86_BUILTIN_BLENDVPD:
19176 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19177 w/o sse4.2, it's veclowered to scalar operations and
19178 not combined back. */
19179 if (!TARGET_SSE4_2)
19180 break;
19181 /* FALLTHRU. */
19182 case IX86_BUILTIN_PBLENDVB128:
19183 case IX86_BUILTIN_BLENDVPS:
19184 gcc_assert (n_args == 3);
19185 arg0 = gimple_call_arg (gs: stmt, index: 0);
19186 arg1 = gimple_call_arg (gs: stmt, index: 1);
19187 arg2 = gimple_call_arg (gs: stmt, index: 2);
19188 if (gimple_call_lhs (gs: stmt))
19189 {
19190 loc = gimple_location (g: stmt);
19191 tree type = TREE_TYPE (arg2);
19192 if (VECTOR_FLOAT_TYPE_P (type))
19193 {
19194 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19195 ? intSI_type_node : intDI_type_node;
19196 type = get_same_sized_vectype (itype, type);
19197 }
19198 else
19199 type = signed_type_for (type);
19200 arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2);
19201 tree zero_vec = build_zero_cst (type);
19202 tree cmp_type = truth_type_for (type);
19203 tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec);
19204 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19205 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19206 VEC_COND_EXPR, cmp,
19207 arg1, arg0);
19208 gimple_set_location (g, location: loc);
19209 gsi_replace (gsi, g, false);
19210 }
19211 else
19212 gsi_replace (gsi, gimple_build_nop (), false);
19213 return true;
19214
19215
19216 case IX86_BUILTIN_PCMPEQB128:
19217 case IX86_BUILTIN_PCMPEQW128:
19218 case IX86_BUILTIN_PCMPEQD128:
19219 case IX86_BUILTIN_PCMPEQQ:
19220 case IX86_BUILTIN_PCMPEQB256:
19221 case IX86_BUILTIN_PCMPEQW256:
19222 case IX86_BUILTIN_PCMPEQD256:
19223 case IX86_BUILTIN_PCMPEQQ256:
19224 tcode = EQ_EXPR;
19225 goto do_cmp;
19226
19227 case IX86_BUILTIN_PCMPGTB128:
19228 case IX86_BUILTIN_PCMPGTW128:
19229 case IX86_BUILTIN_PCMPGTD128:
19230 case IX86_BUILTIN_PCMPGTQ:
19231 case IX86_BUILTIN_PCMPGTB256:
19232 case IX86_BUILTIN_PCMPGTW256:
19233 case IX86_BUILTIN_PCMPGTD256:
19234 case IX86_BUILTIN_PCMPGTQ256:
19235 tcode = GT_EXPR;
19236
19237 do_cmp:
19238 gcc_assert (n_args == 2);
19239 arg0 = gimple_call_arg (gs: stmt, index: 0);
19240 arg1 = gimple_call_arg (gs: stmt, index: 1);
19241 if (gimple_call_lhs (gs: stmt))
19242 {
19243 loc = gimple_location (g: stmt);
19244 tree type = TREE_TYPE (arg0);
19245 tree zero_vec = build_zero_cst (type);
19246 tree minus_one_vec = build_minus_one_cst (type);
19247 tree cmp_type = truth_type_for (type);
19248 tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1);
19249 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19250 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19251 VEC_COND_EXPR, cmp,
19252 minus_one_vec, zero_vec);
19253 gimple_set_location (g, location: loc);
19254 gsi_replace (gsi, g, false);
19255 }
19256 else
19257 gsi_replace (gsi, gimple_build_nop (), false);
19258 return true;
19259
19260 case IX86_BUILTIN_PSLLD:
19261 case IX86_BUILTIN_PSLLD128:
19262 case IX86_BUILTIN_PSLLD128_MASK:
19263 case IX86_BUILTIN_PSLLD256:
19264 case IX86_BUILTIN_PSLLD256_MASK:
19265 case IX86_BUILTIN_PSLLD512:
19266 case IX86_BUILTIN_PSLLDI:
19267 case IX86_BUILTIN_PSLLDI128:
19268 case IX86_BUILTIN_PSLLDI128_MASK:
19269 case IX86_BUILTIN_PSLLDI256:
19270 case IX86_BUILTIN_PSLLDI256_MASK:
19271 case IX86_BUILTIN_PSLLDI512:
19272 case IX86_BUILTIN_PSLLQ:
19273 case IX86_BUILTIN_PSLLQ128:
19274 case IX86_BUILTIN_PSLLQ128_MASK:
19275 case IX86_BUILTIN_PSLLQ256:
19276 case IX86_BUILTIN_PSLLQ256_MASK:
19277 case IX86_BUILTIN_PSLLQ512:
19278 case IX86_BUILTIN_PSLLQI:
19279 case IX86_BUILTIN_PSLLQI128:
19280 case IX86_BUILTIN_PSLLQI128_MASK:
19281 case IX86_BUILTIN_PSLLQI256:
19282 case IX86_BUILTIN_PSLLQI256_MASK:
19283 case IX86_BUILTIN_PSLLQI512:
19284 case IX86_BUILTIN_PSLLW:
19285 case IX86_BUILTIN_PSLLW128:
19286 case IX86_BUILTIN_PSLLW128_MASK:
19287 case IX86_BUILTIN_PSLLW256:
19288 case IX86_BUILTIN_PSLLW256_MASK:
19289 case IX86_BUILTIN_PSLLW512_MASK:
19290 case IX86_BUILTIN_PSLLWI:
19291 case IX86_BUILTIN_PSLLWI128:
19292 case IX86_BUILTIN_PSLLWI128_MASK:
19293 case IX86_BUILTIN_PSLLWI256:
19294 case IX86_BUILTIN_PSLLWI256_MASK:
19295 case IX86_BUILTIN_PSLLWI512_MASK:
19296 rcode = ASHIFT;
19297 is_vshift = false;
19298 goto do_shift;
19299 case IX86_BUILTIN_PSRAD:
19300 case IX86_BUILTIN_PSRAD128:
19301 case IX86_BUILTIN_PSRAD128_MASK:
19302 case IX86_BUILTIN_PSRAD256:
19303 case IX86_BUILTIN_PSRAD256_MASK:
19304 case IX86_BUILTIN_PSRAD512:
19305 case IX86_BUILTIN_PSRADI:
19306 case IX86_BUILTIN_PSRADI128:
19307 case IX86_BUILTIN_PSRADI128_MASK:
19308 case IX86_BUILTIN_PSRADI256:
19309 case IX86_BUILTIN_PSRADI256_MASK:
19310 case IX86_BUILTIN_PSRADI512:
19311 case IX86_BUILTIN_PSRAQ128_MASK:
19312 case IX86_BUILTIN_PSRAQ256_MASK:
19313 case IX86_BUILTIN_PSRAQ512:
19314 case IX86_BUILTIN_PSRAQI128_MASK:
19315 case IX86_BUILTIN_PSRAQI256_MASK:
19316 case IX86_BUILTIN_PSRAQI512:
19317 case IX86_BUILTIN_PSRAW:
19318 case IX86_BUILTIN_PSRAW128:
19319 case IX86_BUILTIN_PSRAW128_MASK:
19320 case IX86_BUILTIN_PSRAW256:
19321 case IX86_BUILTIN_PSRAW256_MASK:
19322 case IX86_BUILTIN_PSRAW512:
19323 case IX86_BUILTIN_PSRAWI:
19324 case IX86_BUILTIN_PSRAWI128:
19325 case IX86_BUILTIN_PSRAWI128_MASK:
19326 case IX86_BUILTIN_PSRAWI256:
19327 case IX86_BUILTIN_PSRAWI256_MASK:
19328 case IX86_BUILTIN_PSRAWI512:
19329 rcode = ASHIFTRT;
19330 is_vshift = false;
19331 goto do_shift;
19332 case IX86_BUILTIN_PSRLD:
19333 case IX86_BUILTIN_PSRLD128:
19334 case IX86_BUILTIN_PSRLD128_MASK:
19335 case IX86_BUILTIN_PSRLD256:
19336 case IX86_BUILTIN_PSRLD256_MASK:
19337 case IX86_BUILTIN_PSRLD512:
19338 case IX86_BUILTIN_PSRLDI:
19339 case IX86_BUILTIN_PSRLDI128:
19340 case IX86_BUILTIN_PSRLDI128_MASK:
19341 case IX86_BUILTIN_PSRLDI256:
19342 case IX86_BUILTIN_PSRLDI256_MASK:
19343 case IX86_BUILTIN_PSRLDI512:
19344 case IX86_BUILTIN_PSRLQ:
19345 case IX86_BUILTIN_PSRLQ128:
19346 case IX86_BUILTIN_PSRLQ128_MASK:
19347 case IX86_BUILTIN_PSRLQ256:
19348 case IX86_BUILTIN_PSRLQ256_MASK:
19349 case IX86_BUILTIN_PSRLQ512:
19350 case IX86_BUILTIN_PSRLQI:
19351 case IX86_BUILTIN_PSRLQI128:
19352 case IX86_BUILTIN_PSRLQI128_MASK:
19353 case IX86_BUILTIN_PSRLQI256:
19354 case IX86_BUILTIN_PSRLQI256_MASK:
19355 case IX86_BUILTIN_PSRLQI512:
19356 case IX86_BUILTIN_PSRLW:
19357 case IX86_BUILTIN_PSRLW128:
19358 case IX86_BUILTIN_PSRLW128_MASK:
19359 case IX86_BUILTIN_PSRLW256:
19360 case IX86_BUILTIN_PSRLW256_MASK:
19361 case IX86_BUILTIN_PSRLW512:
19362 case IX86_BUILTIN_PSRLWI:
19363 case IX86_BUILTIN_PSRLWI128:
19364 case IX86_BUILTIN_PSRLWI128_MASK:
19365 case IX86_BUILTIN_PSRLWI256:
19366 case IX86_BUILTIN_PSRLWI256_MASK:
19367 case IX86_BUILTIN_PSRLWI512:
19368 rcode = LSHIFTRT;
19369 is_vshift = false;
19370 goto do_shift;
19371 case IX86_BUILTIN_PSLLVV16HI:
19372 case IX86_BUILTIN_PSLLVV16SI:
19373 case IX86_BUILTIN_PSLLVV2DI:
19374 case IX86_BUILTIN_PSLLVV2DI_MASK:
19375 case IX86_BUILTIN_PSLLVV32HI:
19376 case IX86_BUILTIN_PSLLVV4DI:
19377 case IX86_BUILTIN_PSLLVV4DI_MASK:
19378 case IX86_BUILTIN_PSLLVV4SI:
19379 case IX86_BUILTIN_PSLLVV4SI_MASK:
19380 case IX86_BUILTIN_PSLLVV8DI:
19381 case IX86_BUILTIN_PSLLVV8HI:
19382 case IX86_BUILTIN_PSLLVV8SI:
19383 case IX86_BUILTIN_PSLLVV8SI_MASK:
19384 rcode = ASHIFT;
19385 is_vshift = true;
19386 goto do_shift;
19387 case IX86_BUILTIN_PSRAVQ128:
19388 case IX86_BUILTIN_PSRAVQ256:
19389 case IX86_BUILTIN_PSRAVV16HI:
19390 case IX86_BUILTIN_PSRAVV16SI:
19391 case IX86_BUILTIN_PSRAVV32HI:
19392 case IX86_BUILTIN_PSRAVV4SI:
19393 case IX86_BUILTIN_PSRAVV4SI_MASK:
19394 case IX86_BUILTIN_PSRAVV8DI:
19395 case IX86_BUILTIN_PSRAVV8HI:
19396 case IX86_BUILTIN_PSRAVV8SI:
19397 case IX86_BUILTIN_PSRAVV8SI_MASK:
19398 rcode = ASHIFTRT;
19399 is_vshift = true;
19400 goto do_shift;
19401 case IX86_BUILTIN_PSRLVV16HI:
19402 case IX86_BUILTIN_PSRLVV16SI:
19403 case IX86_BUILTIN_PSRLVV2DI:
19404 case IX86_BUILTIN_PSRLVV2DI_MASK:
19405 case IX86_BUILTIN_PSRLVV32HI:
19406 case IX86_BUILTIN_PSRLVV4DI:
19407 case IX86_BUILTIN_PSRLVV4DI_MASK:
19408 case IX86_BUILTIN_PSRLVV4SI:
19409 case IX86_BUILTIN_PSRLVV4SI_MASK:
19410 case IX86_BUILTIN_PSRLVV8DI:
19411 case IX86_BUILTIN_PSRLVV8HI:
19412 case IX86_BUILTIN_PSRLVV8SI:
19413 case IX86_BUILTIN_PSRLVV8SI_MASK:
19414 rcode = LSHIFTRT;
19415 is_vshift = true;
19416 goto do_shift;
19417
19418 do_shift:
19419 gcc_assert (n_args >= 2);
19420 if (!gimple_call_lhs (gs: stmt))
19421 {
19422 gsi_replace (gsi, gimple_build_nop (), false);
19423 return true;
19424 }
19425 arg0 = gimple_call_arg (gs: stmt, index: 0);
19426 arg1 = gimple_call_arg (gs: stmt, index: 1);
19427 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19428 /* For masked shift, only optimize if the mask is all ones. */
19429 if (n_args > 2
19430 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19431 break;
19432 if (is_vshift)
19433 {
19434 if (TREE_CODE (arg1) != VECTOR_CST)
19435 break;
19436 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19437 if (integer_zerop (arg1))
19438 count = 0;
19439 else if (rcode == ASHIFTRT)
19440 break;
19441 else
19442 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19443 {
19444 tree elt = VECTOR_CST_ELT (arg1, i);
19445 if (!wi::neg_p (x: wi::to_wide (t: elt))
19446 && wi::to_widest (t: elt) < count)
19447 return false;
19448 }
19449 }
19450 else
19451 {
19452 arg1 = ix86_vector_shift_count (arg1);
19453 if (!arg1)
19454 break;
19455 count = tree_to_uhwi (arg1);
19456 }
19457 if (count == 0)
19458 {
19459 /* Just return the first argument for shift by 0. */
19460 loc = gimple_location (g: stmt);
19461 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19462 gimple_set_location (g, location: loc);
19463 gsi_replace (gsi, g, false);
19464 return true;
19465 }
19466 if (rcode != ASHIFTRT
19467 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19468 {
19469 /* For shift counts equal or greater than precision, except for
19470 arithmetic right shift the result is zero. */
19471 loc = gimple_location (g: stmt);
19472 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19473 build_zero_cst (TREE_TYPE (arg0)));
19474 gimple_set_location (g, location: loc);
19475 gsi_replace (gsi, g, false);
19476 return true;
19477 }
19478 break;
19479
19480 case IX86_BUILTIN_SHUFPD512:
19481 case IX86_BUILTIN_SHUFPS512:
19482 case IX86_BUILTIN_SHUFPD:
19483 case IX86_BUILTIN_SHUFPD256:
19484 case IX86_BUILTIN_SHUFPS:
19485 case IX86_BUILTIN_SHUFPS256:
19486 arg0 = gimple_call_arg (gs: stmt, index: 0);
19487 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19488 /* This is masked shuffle. Only optimize if the mask is all ones. */
19489 if (n_args > 3
19490 && !ix86_masked_all_ones (elems,
19491 arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19492 break;
19493 arg2 = gimple_call_arg (gs: stmt, index: 2);
19494 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt))
19495 {
19496 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19497 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19498 if (shuffle_mask > 255)
19499 return false;
19500
19501 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19502 loc = gimple_location (g: stmt);
19503 tree itype = (imode == E_DFmode
19504 ? long_long_integer_type_node : integer_type_node);
19505 tree vtype = build_vector_type (itype, elems);
19506 tree_vector_builder elts (vtype, elems, 1);
19507
19508
19509 /* Transform integer shuffle_mask to vector perm_mask which
19510 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19511 for (unsigned i = 0; i != elems; i++)
19512 {
19513 unsigned sel_idx;
19514 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19515 provide 2 select constrols for each element of the
19516 destination. */
19517 if (imode == E_DFmode)
19518 sel_idx = (i & 1) * elems + (i & ~1)
19519 + ((shuffle_mask >> i) & 1);
19520 else
19521 {
19522 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19523 controls for each element of the destination. */
19524 unsigned j = i % 4;
19525 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
19526 + ((shuffle_mask >> 2 * j) & 3);
19527 }
19528 elts.quick_push (obj: build_int_cst (itype, sel_idx));
19529 }
19530
19531 tree perm_mask = elts.build ();
19532 arg1 = gimple_call_arg (gs: stmt, index: 1);
19533 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19534 VEC_PERM_EXPR,
19535 arg0, arg1, perm_mask);
19536 gimple_set_location (g, location: loc);
19537 gsi_replace (gsi, g, false);
19538 return true;
19539 }
19540 // Do not error yet, the constant could be propagated later?
19541 break;
19542
19543 case IX86_BUILTIN_PABSB:
19544 case IX86_BUILTIN_PABSW:
19545 case IX86_BUILTIN_PABSD:
19546 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19547 if (!TARGET_MMX_WITH_SSE)
19548 break;
19549 /* FALLTHRU. */
19550 case IX86_BUILTIN_PABSB128:
19551 case IX86_BUILTIN_PABSB256:
19552 case IX86_BUILTIN_PABSB512:
19553 case IX86_BUILTIN_PABSW128:
19554 case IX86_BUILTIN_PABSW256:
19555 case IX86_BUILTIN_PABSW512:
19556 case IX86_BUILTIN_PABSD128:
19557 case IX86_BUILTIN_PABSD256:
19558 case IX86_BUILTIN_PABSD512:
19559 case IX86_BUILTIN_PABSQ128:
19560 case IX86_BUILTIN_PABSQ256:
19561 case IX86_BUILTIN_PABSQ512:
19562 case IX86_BUILTIN_PABSB128_MASK:
19563 case IX86_BUILTIN_PABSB256_MASK:
19564 case IX86_BUILTIN_PABSW128_MASK:
19565 case IX86_BUILTIN_PABSW256_MASK:
19566 case IX86_BUILTIN_PABSD128_MASK:
19567 case IX86_BUILTIN_PABSD256_MASK:
19568 gcc_assert (n_args >= 1);
19569 if (!gimple_call_lhs (gs: stmt))
19570 {
19571 gsi_replace (gsi, gimple_build_nop (), false);
19572 return true;
19573 }
19574 arg0 = gimple_call_arg (gs: stmt, index: 0);
19575 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19576 /* For masked ABS, only optimize if the mask is all ones. */
19577 if (n_args > 1
19578 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19579 break;
19580 {
19581 tree utype, ures, vce;
19582 utype = unsigned_type_for (TREE_TYPE (arg0));
19583 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19584 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19585 ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0);
19586 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19587 loc = gimple_location (g: stmt);
19588 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19589 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19590 VIEW_CONVERT_EXPR, vce);
19591 gsi_replace (gsi, g, false);
19592 }
19593 return true;
19594
19595 default:
19596 break;
19597 }
19598
19599 return false;
19600}
19601
19602/* Handler for an SVML-style interface to
19603 a library with vectorized intrinsics. */
19604
19605tree
19606ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19607{
19608 char name[20];
19609 tree fntype, new_fndecl, args;
19610 unsigned arity;
19611 const char *bname;
19612 machine_mode el_mode, in_mode;
19613 int n, in_n;
19614
19615 /* The SVML is suitable for unsafe math only. */
19616 if (!flag_unsafe_math_optimizations)
19617 return NULL_TREE;
19618
19619 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19620 n = TYPE_VECTOR_SUBPARTS (node: type_out);
19621 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19622 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
19623 if (el_mode != in_mode
19624 || n != in_n)
19625 return NULL_TREE;
19626
19627 switch (fn)
19628 {
19629 CASE_CFN_EXP:
19630 CASE_CFN_LOG:
19631 CASE_CFN_LOG10:
19632 CASE_CFN_POW:
19633 CASE_CFN_TANH:
19634 CASE_CFN_TAN:
19635 CASE_CFN_ATAN:
19636 CASE_CFN_ATAN2:
19637 CASE_CFN_ATANH:
19638 CASE_CFN_CBRT:
19639 CASE_CFN_SINH:
19640 CASE_CFN_SIN:
19641 CASE_CFN_ASINH:
19642 CASE_CFN_ASIN:
19643 CASE_CFN_COSH:
19644 CASE_CFN_COS:
19645 CASE_CFN_ACOSH:
19646 CASE_CFN_ACOS:
19647 if ((el_mode != DFmode || n != 2)
19648 && (el_mode != SFmode || n != 4))
19649 return NULL_TREE;
19650 break;
19651
19652 default:
19653 return NULL_TREE;
19654 }
19655
19656 tree fndecl = mathfn_built_in (el_mode == DFmode
19657 ? double_type_node : float_type_node, fn);
19658 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19659
19660 if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF)
19661 strcpy (dest: name, src: "vmlsLn4");
19662 else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG)
19663 strcpy (dest: name, src: "vmldLn2");
19664 else if (n == 4)
19665 {
19666 sprintf (s: name, format: "vmls%s", bname+10);
19667 name[strlen (s: name)-1] = '4';
19668 }
19669 else
19670 sprintf (s: name, format: "vmld%s2", bname+10);
19671
19672 /* Convert to uppercase. */
19673 name[4] &= ~0x20;
19674
19675 arity = 0;
19676 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19677 arity++;
19678
19679 if (arity == 1)
19680 fntype = build_function_type_list (type_out, type_in, NULL);
19681 else
19682 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19683
19684 /* Build a function declaration for the vectorized function. */
19685 new_fndecl = build_decl (BUILTINS_LOCATION,
19686 FUNCTION_DECL, get_identifier (name), fntype);
19687 TREE_PUBLIC (new_fndecl) = 1;
19688 DECL_EXTERNAL (new_fndecl) = 1;
19689 DECL_IS_NOVOPS (new_fndecl) = 1;
19690 TREE_READONLY (new_fndecl) = 1;
19691
19692 return new_fndecl;
19693}
19694
19695/* Handler for an ACML-style interface to
19696 a library with vectorized intrinsics. */
19697
19698tree
19699ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
19700{
19701 char name[20] = "__vr.._";
19702 tree fntype, new_fndecl, args;
19703 unsigned arity;
19704 const char *bname;
19705 machine_mode el_mode, in_mode;
19706 int n, in_n;
19707
19708 /* The ACML is 64bits only and suitable for unsafe math only as
19709 it does not correctly support parts of IEEE with the required
19710 precision such as denormals. */
19711 if (!TARGET_64BIT
19712 || !flag_unsafe_math_optimizations)
19713 return NULL_TREE;
19714
19715 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19716 n = TYPE_VECTOR_SUBPARTS (node: type_out);
19717 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19718 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
19719 if (el_mode != in_mode
19720 || n != in_n)
19721 return NULL_TREE;
19722
19723 switch (fn)
19724 {
19725 CASE_CFN_SIN:
19726 CASE_CFN_COS:
19727 CASE_CFN_EXP:
19728 CASE_CFN_LOG:
19729 CASE_CFN_LOG2:
19730 CASE_CFN_LOG10:
19731 if (el_mode == DFmode && n == 2)
19732 {
19733 name[4] = 'd';
19734 name[5] = '2';
19735 }
19736 else if (el_mode == SFmode && n == 4)
19737 {
19738 name[4] = 's';
19739 name[5] = '4';
19740 }
19741 else
19742 return NULL_TREE;
19743 break;
19744
19745 default:
19746 return NULL_TREE;
19747 }
19748
19749 tree fndecl = mathfn_built_in (el_mode == DFmode
19750 ? double_type_node : float_type_node, fn);
19751 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19752 sprintf (s: name + 7, format: "%s", bname+10);
19753
19754 arity = 0;
19755 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19756 arity++;
19757
19758 if (arity == 1)
19759 fntype = build_function_type_list (type_out, type_in, NULL);
19760 else
19761 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19762
19763 /* Build a function declaration for the vectorized function. */
19764 new_fndecl = build_decl (BUILTINS_LOCATION,
19765 FUNCTION_DECL, get_identifier (name), fntype);
19766 TREE_PUBLIC (new_fndecl) = 1;
19767 DECL_EXTERNAL (new_fndecl) = 1;
19768 DECL_IS_NOVOPS (new_fndecl) = 1;
19769 TREE_READONLY (new_fndecl) = 1;
19770
19771 return new_fndecl;
19772}
19773
19774/* Returns a decl of a function that implements scatter store with
19775 register type VECTYPE and index type INDEX_TYPE and SCALE.
19776 Return NULL_TREE if it is not available. */
19777
19778static tree
19779ix86_vectorize_builtin_scatter (const_tree vectype,
19780 const_tree index_type, int scale)
19781{
19782 bool si;
19783 enum ix86_builtins code;
19784 const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
19785
19786 if (!TARGET_AVX512F)
19787 return NULL_TREE;
19788
19789 if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
19790 return NULL_TREE;
19791
19792 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19793 ? !TARGET_USE_SCATTER_2PARTS
19794 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19795 ? !TARGET_USE_SCATTER_4PARTS
19796 : !TARGET_USE_SCATTER_8PARTS))
19797 return NULL_TREE;
19798
19799 if ((TREE_CODE (index_type) != INTEGER_TYPE
19800 && !POINTER_TYPE_P (index_type))
19801 || (TYPE_MODE (index_type) != SImode
19802 && TYPE_MODE (index_type) != DImode))
19803 return NULL_TREE;
19804
19805 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19806 return NULL_TREE;
19807
19808 /* v*scatter* insn sign extends index to pointer mode. */
19809 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19810 && TYPE_UNSIGNED (index_type))
19811 return NULL_TREE;
19812
19813 /* Scale can be 1, 2, 4 or 8. */
19814 if (scale <= 0
19815 || scale > 8
19816 || (scale & (scale - 1)) != 0)
19817 return NULL_TREE;
19818
19819 si = TYPE_MODE (index_type) == SImode;
19820 switch (TYPE_MODE (vectype))
19821 {
19822 case E_V8DFmode:
19823 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19824 break;
19825 case E_V8DImode:
19826 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19827 break;
19828 case E_V16SFmode:
19829 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19830 break;
19831 case E_V16SImode:
19832 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19833 break;
19834 case E_V4DFmode:
19835 if (TARGET_AVX512VL)
19836 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19837 else
19838 return NULL_TREE;
19839 break;
19840 case E_V4DImode:
19841 if (TARGET_AVX512VL)
19842 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19843 else
19844 return NULL_TREE;
19845 break;
19846 case E_V8SFmode:
19847 if (TARGET_AVX512VL)
19848 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19849 else
19850 return NULL_TREE;
19851 break;
19852 case E_V8SImode:
19853 if (TARGET_AVX512VL)
19854 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19855 else
19856 return NULL_TREE;
19857 break;
19858 case E_V2DFmode:
19859 if (TARGET_AVX512VL)
19860 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19861 else
19862 return NULL_TREE;
19863 break;
19864 case E_V2DImode:
19865 if (TARGET_AVX512VL)
19866 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19867 else
19868 return NULL_TREE;
19869 break;
19870 case E_V4SFmode:
19871 if (TARGET_AVX512VL)
19872 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19873 else
19874 return NULL_TREE;
19875 break;
19876 case E_V4SImode:
19877 if (TARGET_AVX512VL)
19878 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19879 else
19880 return NULL_TREE;
19881 break;
19882 default:
19883 return NULL_TREE;
19884 }
19885
19886 return get_ix86_builtin (c: code);
19887}
19888
19889/* Return true if it is safe to use the rsqrt optabs to optimize
19890 1.0/sqrt. */
19891
19892static bool
19893use_rsqrt_p (machine_mode mode)
19894{
19895 return ((mode == HFmode
19896 || (TARGET_SSE && TARGET_SSE_MATH))
19897 && flag_finite_math_only
19898 && !flag_trapping_math
19899 && flag_unsafe_math_optimizations);
19900}
19901
19902/* Helper for avx_vpermilps256_operand et al. This is also used by
19903 the expansion functions to turn the parallel back into a mask.
19904 The return value is 0 for no match and the imm8+1 for a match. */
19905
19906int
19907avx_vpermilp_parallel (rtx par, machine_mode mode)
19908{
19909 unsigned i, nelt = GET_MODE_NUNITS (mode);
19910 unsigned mask = 0;
19911 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19912
19913 if (XVECLEN (par, 0) != (int) nelt)
19914 return 0;
19915
19916 /* Validate that all of the elements are constants, and not totally
19917 out of range. Copy the data into an integral array to make the
19918 subsequent checks easier. */
19919 for (i = 0; i < nelt; ++i)
19920 {
19921 rtx er = XVECEXP (par, 0, i);
19922 unsigned HOST_WIDE_INT ei;
19923
19924 if (!CONST_INT_P (er))
19925 return 0;
19926 ei = INTVAL (er);
19927 if (ei >= nelt)
19928 return 0;
19929 ipar[i] = ei;
19930 }
19931
19932 switch (mode)
19933 {
19934 case E_V8DFmode:
19935 /* In the 512-bit DFmode case, we can only move elements within
19936 a 128-bit lane. First fill the second part of the mask,
19937 then fallthru. */
19938 for (i = 4; i < 6; ++i)
19939 {
19940 if (ipar[i] < 4 || ipar[i] >= 6)
19941 return 0;
19942 mask |= (ipar[i] - 4) << i;
19943 }
19944 for (i = 6; i < 8; ++i)
19945 {
19946 if (ipar[i] < 6)
19947 return 0;
19948 mask |= (ipar[i] - 6) << i;
19949 }
19950 /* FALLTHRU */
19951
19952 case E_V4DFmode:
19953 /* In the 256-bit DFmode case, we can only move elements within
19954 a 128-bit lane. */
19955 for (i = 0; i < 2; ++i)
19956 {
19957 if (ipar[i] >= 2)
19958 return 0;
19959 mask |= ipar[i] << i;
19960 }
19961 for (i = 2; i < 4; ++i)
19962 {
19963 if (ipar[i] < 2)
19964 return 0;
19965 mask |= (ipar[i] - 2) << i;
19966 }
19967 break;
19968
19969 case E_V16SFmode:
19970 /* In 512 bit SFmode case, permutation in the upper 256 bits
19971 must mirror the permutation in the lower 256-bits. */
19972 for (i = 0; i < 8; ++i)
19973 if (ipar[i] + 8 != ipar[i + 8])
19974 return 0;
19975 /* FALLTHRU */
19976
19977 case E_V8SFmode:
19978 /* In 256 bit SFmode case, we have full freedom of
19979 movement within the low 128-bit lane, but the high 128-bit
19980 lane must mirror the exact same pattern. */
19981 for (i = 0; i < 4; ++i)
19982 if (ipar[i] + 4 != ipar[i + 4])
19983 return 0;
19984 nelt = 4;
19985 /* FALLTHRU */
19986
19987 case E_V2DFmode:
19988 case E_V4SFmode:
19989 /* In the 128-bit case, we've full freedom in the placement of
19990 the elements from the source operand. */
19991 for (i = 0; i < nelt; ++i)
19992 mask |= ipar[i] << (i * (nelt / 2));
19993 break;
19994
19995 default:
19996 gcc_unreachable ();
19997 }
19998
19999 /* Make sure success has a non-zero value by adding one. */
20000 return mask + 1;
20001}
20002
20003/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20004 the expansion functions to turn the parallel back into a mask.
20005 The return value is 0 for no match and the imm8+1 for a match. */
20006
20007int
20008avx_vperm2f128_parallel (rtx par, machine_mode mode)
20009{
20010 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20011 unsigned mask = 0;
20012 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20013
20014 if (XVECLEN (par, 0) != (int) nelt)
20015 return 0;
20016
20017 /* Validate that all of the elements are constants, and not totally
20018 out of range. Copy the data into an integral array to make the
20019 subsequent checks easier. */
20020 for (i = 0; i < nelt; ++i)
20021 {
20022 rtx er = XVECEXP (par, 0, i);
20023 unsigned HOST_WIDE_INT ei;
20024
20025 if (!CONST_INT_P (er))
20026 return 0;
20027 ei = INTVAL (er);
20028 if (ei >= 2 * nelt)
20029 return 0;
20030 ipar[i] = ei;
20031 }
20032
20033 /* Validate that the halves of the permute are halves. */
20034 for (i = 0; i < nelt2 - 1; ++i)
20035 if (ipar[i] + 1 != ipar[i + 1])
20036 return 0;
20037 for (i = nelt2; i < nelt - 1; ++i)
20038 if (ipar[i] + 1 != ipar[i + 1])
20039 return 0;
20040
20041 /* Reconstruct the mask. */
20042 for (i = 0; i < 2; ++i)
20043 {
20044 unsigned e = ipar[i * nelt2];
20045 if (e % nelt2)
20046 return 0;
20047 e /= nelt2;
20048 mask |= e << (i * 4);
20049 }
20050
20051 /* Make sure success has a non-zero value by adding one. */
20052 return mask + 1;
20053}
20054
20055/* Return a mask of VPTERNLOG operands that do not affect output. */
20056
20057int
20058vpternlog_redundant_operand_mask (rtx pternlog_imm)
20059{
20060 int mask = 0;
20061 int imm8 = INTVAL (pternlog_imm);
20062
20063 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20064 mask |= 1;
20065 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20066 mask |= 2;
20067 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20068 mask |= 4;
20069
20070 return mask;
20071}
20072
20073/* Eliminate false dependencies on operands that do not affect output
20074 by substituting other operands of a VPTERNLOG. */
20075
20076void
20077substitute_vpternlog_operands (rtx *operands)
20078{
20079 int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]);
20080
20081 if (mask & 1) /* The first operand is redundant. */
20082 operands[1] = operands[2];
20083
20084 if (mask & 2) /* The second operand is redundant. */
20085 operands[2] = operands[1];
20086
20087 if (mask & 4) /* The third operand is redundant. */
20088 operands[3] = operands[1];
20089 else if (REG_P (operands[3]))
20090 {
20091 if (mask & 1)
20092 operands[1] = operands[3];
20093 if (mask & 2)
20094 operands[2] = operands[3];
20095 }
20096}
20097
20098/* Return a register priority for hard reg REGNO. */
20099static int
20100ix86_register_priority (int hard_regno)
20101{
20102 /* ebp and r13 as the base always wants a displacement, r12 as the
20103 base always wants an index. So discourage their usage in an
20104 address. */
20105 if (hard_regno == R12_REG || hard_regno == R13_REG)
20106 return 0;
20107 if (hard_regno == BP_REG)
20108 return 1;
20109 /* New x86-64 int registers result in bigger code size. Discourage them. */
20110 if (REX_INT_REGNO_P (hard_regno))
20111 return 2;
20112 if (REX2_INT_REGNO_P (hard_regno))
20113 return 2;
20114 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20115 if (REX_SSE_REGNO_P (hard_regno))
20116 return 2;
20117 if (EXT_REX_SSE_REGNO_P (hard_regno))
20118 return 1;
20119 /* Usage of AX register results in smaller code. Prefer it. */
20120 if (hard_regno == AX_REG)
20121 return 4;
20122 return 3;
20123}
20124
20125/* Implement TARGET_PREFERRED_RELOAD_CLASS.
20126
20127 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20128 QImode must go into class Q_REGS.
20129 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20130 movdf to do mem-to-mem moves through integer regs. */
20131
20132static reg_class_t
20133ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20134{
20135 machine_mode mode = GET_MODE (x);
20136
20137 /* We're only allowed to return a subclass of CLASS. Many of the
20138 following checks fail for NO_REGS, so eliminate that early. */
20139 if (regclass == NO_REGS)
20140 return NO_REGS;
20141
20142 /* All classes can load zeros. */
20143 if (x == CONST0_RTX (mode))
20144 return regclass;
20145
20146 /* Force constants into memory if we are loading a (nonzero) constant into
20147 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20148 instructions to load from a constant. */
20149 if (CONSTANT_P (x)
20150 && (MAYBE_MMX_CLASS_P (regclass)
20151 || MAYBE_SSE_CLASS_P (regclass)
20152 || MAYBE_MASK_CLASS_P (regclass)))
20153 return NO_REGS;
20154
20155 /* Floating-point constants need more complex checks. */
20156 if (CONST_DOUBLE_P (x))
20157 {
20158 /* General regs can load everything. */
20159 if (INTEGER_CLASS_P (regclass))
20160 return regclass;
20161
20162 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20163 zero above. We only want to wind up preferring 80387 registers if
20164 we plan on doing computation with them. */
20165 if (IS_STACK_MODE (mode)
20166 && standard_80387_constant_p (x) > 0)
20167 {
20168 /* Limit class to FP regs. */
20169 if (FLOAT_CLASS_P (regclass))
20170 return FLOAT_REGS;
20171 }
20172
20173 return NO_REGS;
20174 }
20175
20176 /* Prefer SSE if we can use them for math. Also allow integer regs
20177 when moves between register units are cheap. */
20178 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20179 {
20180 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20181 && TARGET_INTER_UNIT_MOVES_TO_VEC
20182 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20183 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20184 else
20185 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20186 }
20187
20188 /* Generally when we see PLUS here, it's the function invariant
20189 (plus soft-fp const_int). Which can only be computed into general
20190 regs. */
20191 if (GET_CODE (x) == PLUS)
20192 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20193
20194 /* QImode constants are easy to load, but non-constant QImode data
20195 must go into Q_REGS or ALL_MASK_REGS. */
20196 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20197 {
20198 if (Q_CLASS_P (regclass))
20199 return regclass;
20200 else if (reg_class_subset_p (Q_REGS, regclass))
20201 return Q_REGS;
20202 else if (MASK_CLASS_P (regclass))
20203 return regclass;
20204 else
20205 return NO_REGS;
20206 }
20207
20208 return regclass;
20209}
20210
20211/* Discourage putting floating-point values in SSE registers unless
20212 SSE math is being used, and likewise for the 387 registers. */
20213static reg_class_t
20214ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20215{
20216 /* Restrict the output reload class to the register bank that we are doing
20217 math on. If we would like not to return a subset of CLASS, reject this
20218 alternative: if reload cannot do this, it will still use its choice. */
20219 machine_mode mode = GET_MODE (x);
20220 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20221 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20222
20223 if (IS_STACK_MODE (mode))
20224 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20225
20226 return regclass;
20227}
20228
20229static reg_class_t
20230ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20231 machine_mode mode, secondary_reload_info *sri)
20232{
20233 /* Double-word spills from general registers to non-offsettable memory
20234 references (zero-extended addresses) require special handling. */
20235 if (TARGET_64BIT
20236 && MEM_P (x)
20237 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20238 && INTEGER_CLASS_P (rclass)
20239 && !offsettable_memref_p (x))
20240 {
20241 sri->icode = (in_p
20242 ? CODE_FOR_reload_noff_load
20243 : CODE_FOR_reload_noff_store);
20244 /* Add the cost of moving address to a temporary. */
20245 sri->extra_cost = 1;
20246
20247 return NO_REGS;
20248 }
20249
20250 /* QImode spills from non-QI registers require
20251 intermediate register on 32bit targets. */
20252 if (mode == QImode
20253 && ((!TARGET_64BIT && !in_p
20254 && INTEGER_CLASS_P (rclass)
20255 && MAYBE_NON_Q_CLASS_P (rclass))
20256 || (!TARGET_AVX512DQ
20257 && MAYBE_MASK_CLASS_P (rclass))))
20258 {
20259 int regno = true_regnum (x);
20260
20261 /* Return Q_REGS if the operand is in memory. */
20262 if (regno == -1)
20263 return Q_REGS;
20264
20265 return NO_REGS;
20266 }
20267
20268 /* Require movement to gpr, and then store to memory. */
20269 if ((mode == HFmode || mode == HImode || mode == V2QImode
20270 || mode == BFmode)
20271 && !TARGET_SSE4_1
20272 && SSE_CLASS_P (rclass)
20273 && !in_p && MEM_P (x))
20274 {
20275 sri->extra_cost = 1;
20276 return GENERAL_REGS;
20277 }
20278
20279 /* This condition handles corner case where an expression involving
20280 pointers gets vectorized. We're trying to use the address of a
20281 stack slot as a vector initializer.
20282
20283 (set (reg:V2DI 74 [ vect_cst_.2 ])
20284 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20285
20286 Eventually frame gets turned into sp+offset like this:
20287
20288 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20289 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20290 (const_int 392 [0x188]))))
20291
20292 That later gets turned into:
20293
20294 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20295 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20296 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20297
20298 We'll have the following reload recorded:
20299
20300 Reload 0: reload_in (DI) =
20301 (plus:DI (reg/f:DI 7 sp)
20302 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20303 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20304 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20305 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20306 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20307 reload_reg_rtx: (reg:V2DI 22 xmm1)
20308
20309 Which isn't going to work since SSE instructions can't handle scalar
20310 additions. Returning GENERAL_REGS forces the addition into integer
20311 register and reload can handle subsequent reloads without problems. */
20312
20313 if (in_p && GET_CODE (x) == PLUS
20314 && SSE_CLASS_P (rclass)
20315 && SCALAR_INT_MODE_P (mode))
20316 return GENERAL_REGS;
20317
20318 return NO_REGS;
20319}
20320
20321/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20322
20323static bool
20324ix86_class_likely_spilled_p (reg_class_t rclass)
20325{
20326 switch (rclass)
20327 {
20328 case AREG:
20329 case DREG:
20330 case CREG:
20331 case BREG:
20332 case AD_REGS:
20333 case SIREG:
20334 case DIREG:
20335 case SSE_FIRST_REG:
20336 case FP_TOP_REG:
20337 case FP_SECOND_REG:
20338 return true;
20339
20340 default:
20341 break;
20342 }
20343
20344 return false;
20345}
20346
20347/* Return true if a set of DST by the expression SRC should be allowed.
20348 This prevents complex sets of likely_spilled hard regs before reload. */
20349
20350bool
20351ix86_hardreg_mov_ok (rtx dst, rtx src)
20352{
20353 /* Avoid complex sets of likely_spilled hard registers before reload. */
20354 if (REG_P (dst) && HARD_REGISTER_P (dst)
20355 && !REG_P (src) && !MEM_P (src)
20356 && !(VECTOR_MODE_P (GET_MODE (dst))
20357 ? standard_sse_constant_p (x: src, GET_MODE (dst))
20358 : x86_64_immediate_operand (src, GET_MODE (dst)))
20359 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
20360 && !reload_completed)
20361 return false;
20362 return true;
20363}
20364
20365/* If we are copying between registers from different register sets
20366 (e.g. FP and integer), we may need a memory location.
20367
20368 The function can't work reliably when one of the CLASSES is a class
20369 containing registers from multiple sets. We avoid this by never combining
20370 different sets in a single alternative in the machine description.
20371 Ensure that this constraint holds to avoid unexpected surprises.
20372
20373 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20374 so do not enforce these sanity checks.
20375
20376 To optimize register_move_cost performance, define inline variant. */
20377
20378static inline bool
20379inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20380 reg_class_t class2, int strict)
20381{
20382 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
20383 return false;
20384
20385 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20386 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20387 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20388 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20389 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20390 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
20391 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
20392 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
20393 {
20394 gcc_assert (!strict || lra_in_progress);
20395 return true;
20396 }
20397
20398 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20399 return true;
20400
20401 /* ??? This is a lie. We do have moves between mmx/general, and for
20402 mmx/sse2. But by saying we need secondary memory we discourage the
20403 register allocator from using the mmx registers unless needed. */
20404 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20405 return true;
20406
20407 /* Between mask and general, we have moves no larger than word size. */
20408 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20409 {
20410 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
20411 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20412 return true;
20413 }
20414
20415 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20416 {
20417 /* SSE1 doesn't have any direct moves from other classes. */
20418 if (!TARGET_SSE2)
20419 return true;
20420
20421 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
20422 return true;
20423
20424 int msize = GET_MODE_SIZE (mode);
20425
20426 /* Between SSE and general, we have moves no larger than word size. */
20427 if (msize > UNITS_PER_WORD)
20428 return true;
20429
20430 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20431 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20432 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
20433
20434 if (msize < minsize)
20435 return true;
20436
20437 /* If the target says that inter-unit moves are more expensive
20438 than moving through memory, then don't generate them. */
20439 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
20440 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
20441 return true;
20442 }
20443
20444 return false;
20445}
20446
20447/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
20448
20449static bool
20450ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20451 reg_class_t class2)
20452{
20453 return inline_secondary_memory_needed (mode, class1, class2, strict: true);
20454}
20455
20456/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
20457
20458 get_secondary_mem widens integral modes to BITS_PER_WORD.
20459 There is no need to emit full 64 bit move on 64 bit targets
20460 for integral modes that can be moved using 32 bit move. */
20461
20462static machine_mode
20463ix86_secondary_memory_needed_mode (machine_mode mode)
20464{
20465 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
20466 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
20467 return mode;
20468}
20469
20470/* Implement the TARGET_CLASS_MAX_NREGS hook.
20471
20472 On the 80386, this is the size of MODE in words,
20473 except in the FP regs, where a single reg is always enough. */
20474
20475static unsigned char
20476ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
20477{
20478 if (MAYBE_INTEGER_CLASS_P (rclass))
20479 {
20480 if (mode == XFmode)
20481 return (TARGET_64BIT ? 2 : 3);
20482 else if (mode == XCmode)
20483 return (TARGET_64BIT ? 4 : 6);
20484 else
20485 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20486 }
20487 else
20488 {
20489 if (COMPLEX_MODE_P (mode))
20490 return 2;
20491 else
20492 return 1;
20493 }
20494}
20495
20496/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20497
20498static bool
20499ix86_can_change_mode_class (machine_mode from, machine_mode to,
20500 reg_class_t regclass)
20501{
20502 if (from == to)
20503 return true;
20504
20505 /* x87 registers can't do subreg at all, as all values are reformatted
20506 to extended precision. */
20507 if (MAYBE_FLOAT_CLASS_P (regclass))
20508 return false;
20509
20510 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20511 {
20512 /* Vector registers do not support QI or HImode loads. If we don't
20513 disallow a change to these modes, reload will assume it's ok to
20514 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20515 the vec_dupv4hi pattern.
20516 NB: SSE2 can load 16bit data to sse register via pinsrw. */
20517 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
20518 if (GET_MODE_SIZE (from) < mov_size
20519 || GET_MODE_SIZE (to) < mov_size)
20520 return false;
20521 }
20522
20523 return true;
20524}
20525
20526/* Return index of MODE in the sse load/store tables. */
20527
20528static inline int
20529sse_store_index (machine_mode mode)
20530{
20531 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
20532 costs to processor_costs, which requires changes to all entries in
20533 processor cost table. */
20534 if (mode == E_HFmode)
20535 mode = E_SFmode;
20536
20537 switch (GET_MODE_SIZE (mode))
20538 {
20539 case 4:
20540 return 0;
20541 case 8:
20542 return 1;
20543 case 16:
20544 return 2;
20545 case 32:
20546 return 3;
20547 case 64:
20548 return 4;
20549 default:
20550 return -1;
20551 }
20552}
20553
20554/* Return the cost of moving data of mode M between a
20555 register and memory. A value of 2 is the default; this cost is
20556 relative to those in `REGISTER_MOVE_COST'.
20557
20558 This function is used extensively by register_move_cost that is used to
20559 build tables at startup. Make it inline in this case.
20560 When IN is 2, return maximum of in and out move cost.
20561
20562 If moving between registers and memory is more expensive than
20563 between two registers, you should define this macro to express the
20564 relative cost.
20565
20566 Model also increased moving costs of QImode registers in non
20567 Q_REGS classes.
20568 */
20569static inline int
20570inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
20571{
20572 int cost;
20573
20574 if (FLOAT_CLASS_P (regclass))
20575 {
20576 int index;
20577 switch (mode)
20578 {
20579 case E_SFmode:
20580 index = 0;
20581 break;
20582 case E_DFmode:
20583 index = 1;
20584 break;
20585 case E_XFmode:
20586 index = 2;
20587 break;
20588 default:
20589 return 100;
20590 }
20591 if (in == 2)
20592 return MAX (ix86_cost->hard_register.fp_load [index],
20593 ix86_cost->hard_register.fp_store [index]);
20594 return in ? ix86_cost->hard_register.fp_load [index]
20595 : ix86_cost->hard_register.fp_store [index];
20596 }
20597 if (SSE_CLASS_P (regclass))
20598 {
20599 int index = sse_store_index (mode);
20600 if (index == -1)
20601 return 100;
20602 if (in == 2)
20603 return MAX (ix86_cost->hard_register.sse_load [index],
20604 ix86_cost->hard_register.sse_store [index]);
20605 return in ? ix86_cost->hard_register.sse_load [index]
20606 : ix86_cost->hard_register.sse_store [index];
20607 }
20608 if (MASK_CLASS_P (regclass))
20609 {
20610 int index;
20611 switch (GET_MODE_SIZE (mode))
20612 {
20613 case 1:
20614 index = 0;
20615 break;
20616 case 2:
20617 index = 1;
20618 break;
20619 /* DImode loads and stores assumed to cost the same as SImode. */
20620 case 4:
20621 case 8:
20622 index = 2;
20623 break;
20624 default:
20625 return 100;
20626 }
20627
20628 if (in == 2)
20629 return MAX (ix86_cost->hard_register.mask_load[index],
20630 ix86_cost->hard_register.mask_store[index]);
20631 return in ? ix86_cost->hard_register.mask_load[2]
20632 : ix86_cost->hard_register.mask_store[2];
20633 }
20634 if (MMX_CLASS_P (regclass))
20635 {
20636 int index;
20637 switch (GET_MODE_SIZE (mode))
20638 {
20639 case 4:
20640 index = 0;
20641 break;
20642 case 8:
20643 index = 1;
20644 break;
20645 default:
20646 return 100;
20647 }
20648 if (in == 2)
20649 return MAX (ix86_cost->hard_register.mmx_load [index],
20650 ix86_cost->hard_register.mmx_store [index]);
20651 return in ? ix86_cost->hard_register.mmx_load [index]
20652 : ix86_cost->hard_register.mmx_store [index];
20653 }
20654 switch (GET_MODE_SIZE (mode))
20655 {
20656 case 1:
20657 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20658 {
20659 if (!in)
20660 return ix86_cost->hard_register.int_store[0];
20661 if (TARGET_PARTIAL_REG_DEPENDENCY
20662 && optimize_function_for_speed_p (cfun))
20663 cost = ix86_cost->hard_register.movzbl_load;
20664 else
20665 cost = ix86_cost->hard_register.int_load[0];
20666 if (in == 2)
20667 return MAX (cost, ix86_cost->hard_register.int_store[0]);
20668 return cost;
20669 }
20670 else
20671 {
20672 if (in == 2)
20673 return MAX (ix86_cost->hard_register.movzbl_load,
20674 ix86_cost->hard_register.int_store[0] + 4);
20675 if (in)
20676 return ix86_cost->hard_register.movzbl_load;
20677 else
20678 return ix86_cost->hard_register.int_store[0] + 4;
20679 }
20680 break;
20681 case 2:
20682 {
20683 int cost;
20684 if (in == 2)
20685 cost = MAX (ix86_cost->hard_register.int_load[1],
20686 ix86_cost->hard_register.int_store[1]);
20687 else
20688 cost = in ? ix86_cost->hard_register.int_load[1]
20689 : ix86_cost->hard_register.int_store[1];
20690
20691 if (mode == E_HFmode)
20692 {
20693 /* Prefer SSE over GPR for HFmode. */
20694 int sse_cost;
20695 int index = sse_store_index (mode);
20696 if (in == 2)
20697 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
20698 ix86_cost->hard_register.sse_store[index]);
20699 else
20700 sse_cost = (in
20701 ? ix86_cost->hard_register.sse_load [index]
20702 : ix86_cost->hard_register.sse_store [index]);
20703 if (sse_cost >= cost)
20704 cost = sse_cost + 1;
20705 }
20706 return cost;
20707 }
20708 default:
20709 if (in == 2)
20710 cost = MAX (ix86_cost->hard_register.int_load[2],
20711 ix86_cost->hard_register.int_store[2]);
20712 else if (in)
20713 cost = ix86_cost->hard_register.int_load[2];
20714 else
20715 cost = ix86_cost->hard_register.int_store[2];
20716 /* Multiply with the number of GPR moves needed. */
20717 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
20718 }
20719}
20720
20721static int
20722ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
20723{
20724 return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0);
20725}
20726
20727
20728/* Return the cost of moving data from a register in class CLASS1 to
20729 one in class CLASS2.
20730
20731 It is not required that the cost always equal 2 when FROM is the same as TO;
20732 on some machines it is expensive to move between registers if they are not
20733 general registers. */
20734
20735static int
20736ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
20737 reg_class_t class2_i)
20738{
20739 enum reg_class class1 = (enum reg_class) class1_i;
20740 enum reg_class class2 = (enum reg_class) class2_i;
20741
20742 /* In case we require secondary memory, compute cost of the store followed
20743 by load. In order to avoid bad register allocation choices, we need
20744 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20745
20746 if (inline_secondary_memory_needed (mode, class1, class2, strict: false))
20747 {
20748 int cost = 1;
20749
20750 cost += inline_memory_move_cost (mode, regclass: class1, in: 2);
20751 cost += inline_memory_move_cost (mode, regclass: class2, in: 2);
20752
20753 /* In case of copying from general_purpose_register we may emit multiple
20754 stores followed by single load causing memory size mismatch stall.
20755 Count this as arbitrarily high cost of 20. */
20756 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
20757 && TARGET_MEMORY_MISMATCH_STALL
20758 && targetm.class_max_nregs (class1, mode)
20759 > targetm.class_max_nregs (class2, mode))
20760 cost += 20;
20761
20762 /* In the case of FP/MMX moves, the registers actually overlap, and we
20763 have to switch modes in order to treat them differently. */
20764 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20765 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20766 cost += 20;
20767
20768 return cost;
20769 }
20770
20771 /* Moves between MMX and non-MMX units require secondary memory. */
20772 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20773 gcc_unreachable ();
20774
20775 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20776 return (SSE_CLASS_P (class1)
20777 ? ix86_cost->hard_register.sse_to_integer
20778 : ix86_cost->hard_register.integer_to_sse);
20779
20780 /* Moves between mask register and GPR. */
20781 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20782 {
20783 return (MASK_CLASS_P (class1)
20784 ? ix86_cost->hard_register.mask_to_integer
20785 : ix86_cost->hard_register.integer_to_mask);
20786 }
20787 /* Moving between mask registers. */
20788 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20789 return ix86_cost->hard_register.mask_move;
20790
20791 if (MAYBE_FLOAT_CLASS_P (class1))
20792 return ix86_cost->hard_register.fp_move;
20793 if (MAYBE_SSE_CLASS_P (class1))
20794 {
20795 if (GET_MODE_BITSIZE (mode) <= 128)
20796 return ix86_cost->hard_register.xmm_move;
20797 if (GET_MODE_BITSIZE (mode) <= 256)
20798 return ix86_cost->hard_register.ymm_move;
20799 return ix86_cost->hard_register.zmm_move;
20800 }
20801 if (MAYBE_MMX_CLASS_P (class1))
20802 return ix86_cost->hard_register.mmx_move;
20803 return 2;
20804}
20805
20806/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20807 words of a value of mode MODE but can be less for certain modes in
20808 special long registers.
20809
20810 Actually there are no two word move instructions for consecutive
20811 registers. And only registers 0-3 may have mov byte instructions
20812 applied to them. */
20813
20814static unsigned int
20815ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20816{
20817 if (GENERAL_REGNO_P (regno))
20818 {
20819 if (mode == XFmode)
20820 return TARGET_64BIT ? 2 : 3;
20821 if (mode == XCmode)
20822 return TARGET_64BIT ? 4 : 6;
20823 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20824 }
20825 if (COMPLEX_MODE_P (mode))
20826 return 2;
20827 /* Register pair for mask registers. */
20828 if (mode == P2QImode || mode == P2HImode)
20829 return 2;
20830 if (mode == V64SFmode || mode == V64SImode)
20831 return 4;
20832 return 1;
20833}
20834
20835/* Implement REGMODE_NATURAL_SIZE(MODE). */
20836unsigned int
20837ix86_regmode_natural_size (machine_mode mode)
20838{
20839 if (mode == P2HImode || mode == P2QImode)
20840 return GET_MODE_SIZE (mode) / 2;
20841 return UNITS_PER_WORD;
20842}
20843
20844/* Implement TARGET_HARD_REGNO_MODE_OK. */
20845
20846static bool
20847ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20848{
20849 /* Flags and only flags can only hold CCmode values. */
20850 if (CC_REGNO_P (regno))
20851 return GET_MODE_CLASS (mode) == MODE_CC;
20852 if (GET_MODE_CLASS (mode) == MODE_CC
20853 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20854 return false;
20855 if (STACK_REGNO_P (regno))
20856 return VALID_FP_MODE_P (mode);
20857 if (MASK_REGNO_P (regno))
20858 {
20859 /* Register pair only starts at even register number. */
20860 if ((mode == P2QImode || mode == P2HImode))
20861 return MASK_PAIR_REGNO_P(regno);
20862
20863 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20864 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
20865 }
20866
20867 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20868 return false;
20869
20870 if (SSE_REGNO_P (regno))
20871 {
20872 /* We implement the move patterns for all vector modes into and
20873 out of SSE registers, even when no operation instructions
20874 are available. */
20875
20876 /* For AVX-512 we allow, regardless of regno:
20877 - XI mode
20878 - any of 512-bit wide vector mode
20879 - any scalar mode. */
20880 if (TARGET_AVX512F
20881 && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
20882 || VALID_AVX512F_SCALAR_MODE (mode)))
20883 return true;
20884
20885 /* For AVX-5124FMAPS or AVX-5124VNNIW
20886 allow V64SF and V64SI modes for special regnos. */
20887 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
20888 && (mode == V64SFmode || mode == V64SImode)
20889 && MOD4_SSE_REGNO_P (regno))
20890 return true;
20891
20892 /* TODO check for QI/HI scalars. */
20893 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20894 if (TARGET_AVX512VL
20895 && (VALID_AVX256_REG_OR_OI_MODE (mode)
20896 || VALID_AVX512VL_128_REG_MODE (mode)))
20897 return true;
20898
20899 /* xmm16-xmm31 are only available for AVX-512. */
20900 if (EXT_REX_SSE_REGNO_P (regno))
20901 return false;
20902
20903 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20904 if (TARGET_SSE2 && mode == HImode)
20905 return true;
20906
20907 /* OImode and AVX modes are available only when AVX is enabled. */
20908 return ((TARGET_AVX
20909 && VALID_AVX256_REG_OR_OI_MODE (mode))
20910 || VALID_SSE_REG_MODE (mode)
20911 || VALID_SSE2_REG_MODE (mode)
20912 || VALID_MMX_REG_MODE (mode)
20913 || VALID_MMX_REG_MODE_3DNOW (mode));
20914 }
20915 if (MMX_REGNO_P (regno))
20916 {
20917 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20918 so if the register is available at all, then we can move data of
20919 the given mode into or out of it. */
20920 return (VALID_MMX_REG_MODE (mode)
20921 || VALID_MMX_REG_MODE_3DNOW (mode));
20922 }
20923
20924 if (mode == QImode)
20925 {
20926 /* Take care for QImode values - they can be in non-QI regs,
20927 but then they do cause partial register stalls. */
20928 if (ANY_QI_REGNO_P (regno))
20929 return true;
20930 if (!TARGET_PARTIAL_REG_STALL)
20931 return true;
20932 /* LRA checks if the hard register is OK for the given mode.
20933 QImode values can live in non-QI regs, so we allow all
20934 registers here. */
20935 if (lra_in_progress)
20936 return true;
20937 return !can_create_pseudo_p ();
20938 }
20939 /* We handle both integer and floats in the general purpose registers. */
20940 else if (VALID_INT_MODE_P (mode)
20941 || VALID_FP_MODE_P (mode))
20942 return true;
20943 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20944 on to use that value in smaller contexts, this can easily force a
20945 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20946 supporting DImode, allow it. */
20947 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20948 return true;
20949
20950 return false;
20951}
20952
20953/* Implement TARGET_INSN_CALLEE_ABI. */
20954
20955const predefined_function_abi &
20956ix86_insn_callee_abi (const rtx_insn *insn)
20957{
20958 unsigned int abi_id = 0;
20959 rtx pat = PATTERN (insn);
20960 if (vzeroupper_pattern (pat, VOIDmode))
20961 abi_id = ABI_VZEROUPPER;
20962
20963 return function_abis[abi_id];
20964}
20965
20966/* Initialize function_abis with corresponding abi_id,
20967 currently only handle vzeroupper. */
20968void
20969ix86_initialize_callee_abi (unsigned int abi_id)
20970{
20971 gcc_assert (abi_id == ABI_VZEROUPPER);
20972 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20973 if (!vzeroupper_abi.initialized_p ())
20974 {
20975 HARD_REG_SET full_reg_clobbers;
20976 CLEAR_HARD_REG_SET (set&: full_reg_clobbers);
20977 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20978 }
20979}
20980
20981void
20982ix86_expand_avx_vzeroupper (void)
20983{
20984 /* Initialize vzeroupper_abi here. */
20985 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20986 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20987 /* Return false for non-local goto in can_nonlocal_goto. */
20988 make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN);
20989 /* Flag used for call_insn indicates it's a fake call. */
20990 RTX_FLAG (insn, used) = 1;
20991}
20992
20993
20994/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20995 saves SSE registers across calls is Win64 (thus no need to check the
20996 current ABI here), and with AVX enabled Win64 only guarantees that
20997 the low 16 bytes are saved. */
20998
20999static bool
21000ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21001 machine_mode mode)
21002{
21003 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21004 if (abi_id == ABI_VZEROUPPER)
21005 return (GET_MODE_SIZE (mode) > 16
21006 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21007 || LEGACY_SSE_REGNO_P (regno)));
21008
21009 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21010}
21011
21012/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21013 tieable integer mode. */
21014
21015static bool
21016ix86_tieable_integer_mode_p (machine_mode mode)
21017{
21018 switch (mode)
21019 {
21020 case E_HImode:
21021 case E_SImode:
21022 return true;
21023
21024 case E_QImode:
21025 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21026
21027 case E_DImode:
21028 return TARGET_64BIT;
21029
21030 default:
21031 return false;
21032 }
21033}
21034
21035/* Implement TARGET_MODES_TIEABLE_P.
21036
21037 Return true if MODE1 is accessible in a register that can hold MODE2
21038 without copying. That is, all register classes that can hold MODE2
21039 can also hold MODE1. */
21040
21041static bool
21042ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21043{
21044 if (mode1 == mode2)
21045 return true;
21046
21047 if (ix86_tieable_integer_mode_p (mode: mode1)
21048 && ix86_tieable_integer_mode_p (mode: mode2))
21049 return true;
21050
21051 /* MODE2 being XFmode implies fp stack or general regs, which means we
21052 can tie any smaller floating point modes to it. Note that we do not
21053 tie this with TFmode. */
21054 if (mode2 == XFmode)
21055 return mode1 == SFmode || mode1 == DFmode;
21056
21057 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21058 that we can tie it with SFmode. */
21059 if (mode2 == DFmode)
21060 return mode1 == SFmode;
21061
21062 /* If MODE2 is only appropriate for an SSE register, then tie with
21063 any other mode acceptable to SSE registers. */
21064 if (GET_MODE_SIZE (mode2) == 64
21065 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
21066 return (GET_MODE_SIZE (mode1) == 64
21067 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
21068 if (GET_MODE_SIZE (mode2) == 32
21069 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
21070 return (GET_MODE_SIZE (mode1) == 32
21071 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
21072 if (GET_MODE_SIZE (mode2) == 16
21073 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
21074 return (GET_MODE_SIZE (mode1) == 16
21075 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
21076
21077 /* If MODE2 is appropriate for an MMX register, then tie
21078 with any other mode acceptable to MMX registers. */
21079 if (GET_MODE_SIZE (mode2) == 8
21080 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2))
21081 return (GET_MODE_SIZE (mode1) == 8
21082 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1));
21083
21084 /* SCmode and DImode can be tied. */
21085 if ((mode1 == E_SCmode && mode2 == E_DImode)
21086 || (mode1 == E_DImode && mode2 == E_SCmode))
21087 return TARGET_64BIT;
21088
21089 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21090 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21091 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21092 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21093 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21094 return true;
21095
21096 return false;
21097}
21098
21099/* Return the cost of moving between two registers of mode MODE. */
21100
21101static int
21102ix86_set_reg_reg_cost (machine_mode mode)
21103{
21104 unsigned int units = UNITS_PER_WORD;
21105
21106 switch (GET_MODE_CLASS (mode))
21107 {
21108 default:
21109 break;
21110
21111 case MODE_CC:
21112 units = GET_MODE_SIZE (CCmode);
21113 break;
21114
21115 case MODE_FLOAT:
21116 if ((TARGET_SSE && mode == TFmode)
21117 || (TARGET_80387 && mode == XFmode)
21118 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21119 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21120 units = GET_MODE_SIZE (mode);
21121 break;
21122
21123 case MODE_COMPLEX_FLOAT:
21124 if ((TARGET_SSE && mode == TCmode)
21125 || (TARGET_80387 && mode == XCmode)
21126 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21127 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21128 units = GET_MODE_SIZE (mode);
21129 break;
21130
21131 case MODE_VECTOR_INT:
21132 case MODE_VECTOR_FLOAT:
21133 if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
21134 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21135 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21136 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21137 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21138 && VALID_MMX_REG_MODE (mode)))
21139 units = GET_MODE_SIZE (mode);
21140 }
21141
21142 /* Return the cost of moving between two registers of mode MODE,
21143 assuming that the move will be in pieces of at most UNITS bytes. */
21144 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21145}
21146
21147/* Return cost of vector operation in MODE given that scalar version has
21148 COST. */
21149
21150static int
21151ix86_vec_cost (machine_mode mode, int cost)
21152{
21153 if (!VECTOR_MODE_P (mode))
21154 return cost;
21155
21156 if (GET_MODE_BITSIZE (mode) == 128
21157 && TARGET_SSE_SPLIT_REGS)
21158 return cost * GET_MODE_BITSIZE (mode) / 64;
21159 else if (GET_MODE_BITSIZE (mode) > 128
21160 && TARGET_AVX256_SPLIT_REGS)
21161 return cost * GET_MODE_BITSIZE (mode) / 128;
21162 else if (GET_MODE_BITSIZE (mode) > 256
21163 && TARGET_AVX512_SPLIT_REGS)
21164 return cost * GET_MODE_BITSIZE (mode) / 256;
21165 return cost;
21166}
21167
21168/* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21169 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21170static int
21171ix86_widen_mult_cost (const struct processor_costs *cost,
21172 enum machine_mode mode, bool uns_p)
21173{
21174 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21175 int extra_cost = 0;
21176 int basic_cost = 0;
21177 switch (mode)
21178 {
21179 case V8HImode:
21180 case V16HImode:
21181 if (!uns_p || mode == V16HImode)
21182 extra_cost = cost->sse_op * 2;
21183 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21184 break;
21185 case V4SImode:
21186 case V8SImode:
21187 /* pmulhw/pmullw can be used. */
21188 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21189 break;
21190 case V2DImode:
21191 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21192 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21193 if (!TARGET_SSE4_1 && !uns_p)
21194 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
21195 + cost->sse_op * 2;
21196 /* Fallthru. */
21197 case V4DImode:
21198 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21199 break;
21200 default:
21201 /* Not implemented. */
21202 return 100;
21203 }
21204 return ix86_vec_cost (mode, cost: basic_cost + extra_cost);
21205}
21206
21207/* Return cost of multiplication in MODE. */
21208
21209static int
21210ix86_multiplication_cost (const struct processor_costs *cost,
21211 enum machine_mode mode)
21212{
21213 machine_mode inner_mode = mode;
21214 if (VECTOR_MODE_P (mode))
21215 inner_mode = GET_MODE_INNER (mode);
21216
21217 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21218 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21219 else if (X87_FLOAT_MODE_P (mode))
21220 return cost->fmul;
21221 else if (FLOAT_MODE_P (mode))
21222 return ix86_vec_cost (mode,
21223 cost: inner_mode == DFmode ? cost->mulsd : cost->mulss);
21224 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21225 {
21226 int nmults, nops;
21227 /* Cost of reading the memory. */
21228 int extra;
21229
21230 switch (mode)
21231 {
21232 case V4QImode:
21233 case V8QImode:
21234 /* Partial V*QImode is emulated with 4-6 insns. */
21235 nmults = 1;
21236 nops = 3;
21237 extra = 0;
21238
21239 if (TARGET_AVX512BW && TARGET_AVX512VL)
21240 ;
21241 else if (TARGET_AVX2)
21242 nops += 2;
21243 else if (TARGET_XOP)
21244 extra += cost->sse_load[2];
21245 else
21246 {
21247 nops += 1;
21248 extra += cost->sse_load[2];
21249 }
21250 goto do_qimode;
21251
21252 case V16QImode:
21253 /* V*QImode is emulated with 4-11 insns. */
21254 nmults = 1;
21255 nops = 3;
21256 extra = 0;
21257
21258 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21259 {
21260 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21261 nops += 3;
21262 }
21263 else if (TARGET_XOP)
21264 {
21265 nmults += 1;
21266 nops += 2;
21267 extra += cost->sse_load[2];
21268 }
21269 else
21270 {
21271 nmults += 1;
21272 nops += 4;
21273 extra += cost->sse_load[2];
21274 }
21275 goto do_qimode;
21276
21277 case V32QImode:
21278 nmults = 1;
21279 nops = 3;
21280 extra = 0;
21281
21282 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
21283 {
21284 nmults += 1;
21285 nops += 4;
21286 extra += cost->sse_load[3] * 2;
21287 }
21288 goto do_qimode;
21289
21290 case V64QImode:
21291 nmults = 2;
21292 nops = 9;
21293 extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
21294
21295 do_qimode:
21296 return ix86_vec_cost (mode, cost: cost->mulss * nmults
21297 + cost->sse_op * nops) + extra;
21298
21299 case V4SImode:
21300 /* pmulld is used in this case. No emulation is needed. */
21301 if (TARGET_SSE4_1)
21302 goto do_native;
21303 /* V4SImode is emulated with 7 insns. */
21304 else
21305 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5);
21306
21307 case V2DImode:
21308 case V4DImode:
21309 /* vpmullq is used in this case. No emulation is needed. */
21310 if (TARGET_AVX512DQ && TARGET_AVX512VL)
21311 goto do_native;
21312 /* V*DImode is emulated with 6-8 insns. */
21313 else if (TARGET_XOP && mode == V2DImode)
21314 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4);
21315 /* FALLTHRU */
21316 case V8DImode:
21317 /* vpmullq is used in this case. No emulation is needed. */
21318 if (TARGET_AVX512DQ && mode == V8DImode)
21319 goto do_native;
21320 else
21321 return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5);
21322
21323 default:
21324 do_native:
21325 return ix86_vec_cost (mode, cost: cost->mulss);
21326 }
21327 }
21328 else
21329 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
21330}
21331
21332/* Return cost of multiplication in MODE. */
21333
21334static int
21335ix86_division_cost (const struct processor_costs *cost,
21336 enum machine_mode mode)
21337{
21338 machine_mode inner_mode = mode;
21339 if (VECTOR_MODE_P (mode))
21340 inner_mode = GET_MODE_INNER (mode);
21341
21342 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21343 return inner_mode == DFmode ? cost->divsd : cost->divss;
21344 else if (X87_FLOAT_MODE_P (mode))
21345 return cost->fdiv;
21346 else if (FLOAT_MODE_P (mode))
21347 return ix86_vec_cost (mode,
21348 cost: inner_mode == DFmode ? cost->divsd : cost->divss);
21349 else
21350 return cost->divide[MODE_INDEX (mode)];
21351}
21352
21353/* Return cost of shift in MODE.
21354 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21355 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21356 if op1 is a result of subreg.
21357
21358 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21359
21360static int
21361ix86_shift_rotate_cost (const struct processor_costs *cost,
21362 enum rtx_code code,
21363 enum machine_mode mode, bool constant_op1,
21364 HOST_WIDE_INT op1_val,
21365 bool and_in_op1,
21366 bool shift_and_truncate,
21367 bool *skip_op0, bool *skip_op1)
21368{
21369 if (skip_op0)
21370 *skip_op0 = *skip_op1 = false;
21371
21372 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21373 {
21374 int count;
21375 /* Cost of reading the memory. */
21376 int extra;
21377
21378 switch (mode)
21379 {
21380 case V4QImode:
21381 case V8QImode:
21382 if (TARGET_AVX2)
21383 /* Use vpbroadcast. */
21384 extra = cost->sse_op;
21385 else
21386 extra = cost->sse_load[2];
21387
21388 if (constant_op1)
21389 {
21390 if (code == ASHIFTRT)
21391 {
21392 count = 4;
21393 extra *= 2;
21394 }
21395 else
21396 count = 2;
21397 }
21398 else if (TARGET_AVX512BW && TARGET_AVX512VL)
21399 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
21400 else if (TARGET_SSE4_1)
21401 count = 5;
21402 else if (code == ASHIFTRT)
21403 count = 6;
21404 else
21405 count = 5;
21406 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
21407
21408 case V16QImode:
21409 if (TARGET_XOP)
21410 {
21411 /* For XOP we use vpshab, which requires a broadcast of the
21412 value to the variable shift insn. For constants this
21413 means a V16Q const in mem; even when we can perform the
21414 shift with one insn set the cost to prefer paddb. */
21415 if (constant_op1)
21416 {
21417 extra = cost->sse_load[2];
21418 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
21419 }
21420 else
21421 {
21422 count = (code == ASHIFT) ? 3 : 4;
21423 return ix86_vec_cost (mode, cost: cost->sse_op * count);
21424 }
21425 }
21426 /* FALLTHRU */
21427 case V32QImode:
21428 if (TARGET_AVX2)
21429 /* Use vpbroadcast. */
21430 extra = cost->sse_op;
21431 else
21432 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
21433
21434 if (constant_op1)
21435 {
21436 if (code == ASHIFTRT)
21437 {
21438 count = 4;
21439 extra *= 2;
21440 }
21441 else
21442 count = 2;
21443 }
21444 else if (TARGET_AVX512BW
21445 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
21446 || (mode == V16QImode && TARGET_AVX512VL
21447 && !TARGET_PREFER_AVX128)))
21448 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
21449 else if (TARGET_AVX2
21450 && mode == V16QImode && !TARGET_PREFER_AVX128)
21451 count = 6;
21452 else if (TARGET_SSE4_1)
21453 count = 9;
21454 else if (code == ASHIFTRT)
21455 count = 10;
21456 else
21457 count = 9;
21458 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
21459
21460 case V2DImode:
21461 case V4DImode:
21462 /* V*DImode arithmetic right shift is emulated. */
21463 if (code == ASHIFTRT && !TARGET_AVX512VL)
21464 {
21465 if (constant_op1)
21466 {
21467 if (op1_val == 63)
21468 count = TARGET_SSE4_2 ? 1 : 2;
21469 else if (TARGET_XOP)
21470 count = 2;
21471 else if (TARGET_SSE4_1)
21472 count = 3;
21473 else
21474 count = 4;
21475 }
21476 else if (TARGET_XOP)
21477 count = 3;
21478 else if (TARGET_SSE4_2)
21479 count = 4;
21480 else
21481 count = 5;
21482
21483 return ix86_vec_cost (mode, cost: cost->sse_op * count);
21484 }
21485 /* FALLTHRU */
21486 default:
21487 return ix86_vec_cost (mode, cost: cost->sse_op);
21488 }
21489 }
21490
21491 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21492 {
21493 if (constant_op1)
21494 {
21495 if (op1_val > 32)
21496 return cost->shift_const + COSTS_N_INSNS (2);
21497 else
21498 return cost->shift_const * 2;
21499 }
21500 else
21501 {
21502 if (and_in_op1)
21503 return cost->shift_var * 2;
21504 else
21505 return cost->shift_var * 6 + COSTS_N_INSNS (2);
21506 }
21507 }
21508 else
21509 {
21510 if (constant_op1)
21511 return cost->shift_const;
21512 else if (shift_and_truncate)
21513 {
21514 if (skip_op0)
21515 *skip_op0 = *skip_op1 = true;
21516 /* Return the cost after shift-and truncation. */
21517 return cost->shift_var;
21518 }
21519 else
21520 return cost->shift_var;
21521 }
21522}
21523
21524/* Compute a (partial) cost for rtx X. Return true if the complete
21525 cost has been computed, and false if subexpressions should be
21526 scanned. In either case, *TOTAL contains the cost result. */
21527
21528static bool
21529ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
21530 int *total, bool speed)
21531{
21532 rtx mask;
21533 enum rtx_code code = GET_CODE (x);
21534 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
21535 const struct processor_costs *cost
21536 = speed ? ix86_tune_cost : &ix86_size_cost;
21537 int src_cost;
21538
21539 switch (code)
21540 {
21541 case SET:
21542 if (register_operand (SET_DEST (x), VOIDmode)
21543 && register_operand (SET_SRC (x), VOIDmode))
21544 {
21545 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
21546 return true;
21547 }
21548
21549 if (register_operand (SET_SRC (x), VOIDmode))
21550 /* Avoid potentially incorrect high cost from rtx_costs
21551 for non-tieable SUBREGs. */
21552 src_cost = 0;
21553 else
21554 {
21555 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
21556
21557 if (CONSTANT_P (SET_SRC (x)))
21558 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
21559 a small value, possibly zero for cheap constants. */
21560 src_cost += COSTS_N_INSNS (1);
21561 }
21562
21563 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
21564 return true;
21565
21566 case CONST_INT:
21567 case CONST:
21568 case LABEL_REF:
21569 case SYMBOL_REF:
21570 if (x86_64_immediate_operand (x, VOIDmode))
21571 *total = 0;
21572 else
21573 *total = 1;
21574 return true;
21575
21576 case CONST_DOUBLE:
21577 if (IS_STACK_MODE (mode))
21578 switch (standard_80387_constant_p (x))
21579 {
21580 case -1:
21581 case 0:
21582 break;
21583 case 1: /* 0.0 */
21584 *total = 1;
21585 return true;
21586 default: /* Other constants */
21587 *total = 2;
21588 return true;
21589 }
21590 /* FALLTHRU */
21591
21592 case CONST_VECTOR:
21593 switch (standard_sse_constant_p (x, pred_mode: mode))
21594 {
21595 case 0:
21596 break;
21597 case 1: /* 0: xor eliminates false dependency */
21598 *total = 0;
21599 return true;
21600 default: /* -1: cmp contains false dependency */
21601 *total = 1;
21602 return true;
21603 }
21604 /* FALLTHRU */
21605
21606 case CONST_WIDE_INT:
21607 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21608 it'll probably end up. Add a penalty for size. */
21609 *total = (COSTS_N_INSNS (1)
21610 + (!TARGET_64BIT && flag_pic)
21611 + (GET_MODE_SIZE (mode) <= 4
21612 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
21613 return true;
21614
21615 case ZERO_EXTEND:
21616 /* The zero extensions is often completely free on x86_64, so make
21617 it as cheap as possible. */
21618 if (TARGET_64BIT && mode == DImode
21619 && GET_MODE (XEXP (x, 0)) == SImode)
21620 *total = 1;
21621 else if (TARGET_ZERO_EXTEND_WITH_AND)
21622 *total = cost->add;
21623 else
21624 *total = cost->movzx;
21625 return false;
21626
21627 case SIGN_EXTEND:
21628 *total = cost->movsx;
21629 return false;
21630
21631 case ASHIFT:
21632 if (SCALAR_INT_MODE_P (mode)
21633 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
21634 && CONST_INT_P (XEXP (x, 1)))
21635 {
21636 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21637 if (value == 1)
21638 {
21639 *total = cost->add;
21640 return false;
21641 }
21642 if ((value == 2 || value == 3)
21643 && cost->lea <= cost->shift_const)
21644 {
21645 *total = cost->lea;
21646 return false;
21647 }
21648 }
21649 /* FALLTHRU */
21650
21651 case ROTATE:
21652 case ASHIFTRT:
21653 case LSHIFTRT:
21654 case ROTATERT:
21655 bool skip_op0, skip_op1;
21656 *total = ix86_shift_rotate_cost (cost, code, mode,
21657 CONSTANT_P (XEXP (x, 1)),
21658 CONST_INT_P (XEXP (x, 1))
21659 ? INTVAL (XEXP (x, 1)) : -1,
21660 GET_CODE (XEXP (x, 1)) == AND,
21661 SUBREG_P (XEXP (x, 1))
21662 && GET_CODE (XEXP (XEXP (x, 1),
21663 0)) == AND,
21664 skip_op0: &skip_op0, skip_op1: &skip_op1);
21665 if (skip_op0 || skip_op1)
21666 {
21667 if (!skip_op0)
21668 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21669 if (!skip_op1)
21670 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
21671 return true;
21672 }
21673 return false;
21674
21675 case FMA:
21676 {
21677 rtx sub;
21678
21679 gcc_assert (FLOAT_MODE_P (mode));
21680 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
21681
21682 *total = ix86_vec_cost (mode,
21683 GET_MODE_INNER (mode) == SFmode
21684 ? cost->fmass : cost->fmasd);
21685 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
21686
21687 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21688 sub = XEXP (x, 0);
21689 if (GET_CODE (sub) == NEG)
21690 sub = XEXP (sub, 0);
21691 *total += rtx_cost (sub, mode, FMA, 0, speed);
21692
21693 sub = XEXP (x, 2);
21694 if (GET_CODE (sub) == NEG)
21695 sub = XEXP (sub, 0);
21696 *total += rtx_cost (sub, mode, FMA, 2, speed);
21697 return true;
21698 }
21699
21700 case MULT:
21701 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
21702 {
21703 rtx op0 = XEXP (x, 0);
21704 rtx op1 = XEXP (x, 1);
21705 int nbits;
21706 if (CONST_INT_P (XEXP (x, 1)))
21707 {
21708 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21709 for (nbits = 0; value != 0; value &= value - 1)
21710 nbits++;
21711 }
21712 else
21713 /* This is arbitrary. */
21714 nbits = 7;
21715
21716 /* Compute costs correctly for widening multiplication. */
21717 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
21718 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21719 == GET_MODE_SIZE (mode))
21720 {
21721 int is_mulwiden = 0;
21722 machine_mode inner_mode = GET_MODE (op0);
21723
21724 if (GET_CODE (op0) == GET_CODE (op1))
21725 is_mulwiden = 1, op1 = XEXP (op1, 0);
21726 else if (CONST_INT_P (op1))
21727 {
21728 if (GET_CODE (op0) == SIGN_EXTEND)
21729 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21730 == INTVAL (op1);
21731 else
21732 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21733 }
21734
21735 if (is_mulwiden)
21736 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21737 }
21738
21739 int mult_init;
21740 // Double word multiplication requires 3 mults and 2 adds.
21741 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21742 {
21743 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
21744 + 2 * cost->add;
21745 nbits *= 3;
21746 }
21747 else mult_init = cost->mult_init[MODE_INDEX (mode)];
21748
21749 *total = (mult_init
21750 + nbits * cost->mult_bit
21751 + rtx_cost (op0, mode, outer_code, opno, speed)
21752 + rtx_cost (op1, mode, outer_code, opno, speed));
21753
21754 return true;
21755 }
21756 *total = ix86_multiplication_cost (cost, mode);
21757 return false;
21758
21759 case DIV:
21760 case UDIV:
21761 case MOD:
21762 case UMOD:
21763 *total = ix86_division_cost (cost, mode);
21764 return false;
21765
21766 case PLUS:
21767 if (GET_MODE_CLASS (mode) == MODE_INT
21768 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21769 {
21770 if (GET_CODE (XEXP (x, 0)) == PLUS
21771 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21772 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21773 && CONSTANT_P (XEXP (x, 1)))
21774 {
21775 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21776 if (val == 2 || val == 4 || val == 8)
21777 {
21778 *total = cost->lea;
21779 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21780 outer_code, opno, speed);
21781 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
21782 outer_code, opno, speed);
21783 *total += rtx_cost (XEXP (x, 1), mode,
21784 outer_code, opno, speed);
21785 return true;
21786 }
21787 }
21788 else if (GET_CODE (XEXP (x, 0)) == MULT
21789 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21790 {
21791 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21792 if (val == 2 || val == 4 || val == 8)
21793 {
21794 *total = cost->lea;
21795 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21796 outer_code, opno, speed);
21797 *total += rtx_cost (XEXP (x, 1), mode,
21798 outer_code, opno, speed);
21799 return true;
21800 }
21801 }
21802 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21803 {
21804 rtx op = XEXP (XEXP (x, 0), 0);
21805
21806 /* Add with carry, ignore the cost of adding a carry flag. */
21807 if (ix86_carry_flag_operator (op, mode)
21808 || ix86_carry_flag_unset_operator (op, mode))
21809 *total = cost->add;
21810 else
21811 {
21812 *total = cost->lea;
21813 *total += rtx_cost (op, mode,
21814 outer_code, opno, speed);
21815 }
21816
21817 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21818 outer_code, opno, speed);
21819 *total += rtx_cost (XEXP (x, 1), mode,
21820 outer_code, opno, speed);
21821 return true;
21822 }
21823 }
21824 /* FALLTHRU */
21825
21826 case MINUS:
21827 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21828 if (GET_MODE_CLASS (mode) == MODE_INT
21829 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21830 && GET_CODE (XEXP (x, 0)) == MINUS
21831 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21832 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21833 {
21834 *total = cost->add;
21835 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21836 outer_code, opno, speed);
21837 *total += rtx_cost (XEXP (x, 1), mode,
21838 outer_code, opno, speed);
21839 return true;
21840 }
21841
21842 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21843 *total = cost->addss;
21844 else if (X87_FLOAT_MODE_P (mode))
21845 *total = cost->fadd;
21846 else if (FLOAT_MODE_P (mode))
21847 *total = ix86_vec_cost (mode, cost: cost->addss);
21848 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21849 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21850 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21851 *total = cost->add * 2;
21852 else
21853 *total = cost->add;
21854 return false;
21855
21856 case IOR:
21857 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21858 || SSE_FLOAT_MODE_P (mode))
21859 {
21860 /* (ior (not ...) ...) can be a single insn in AVX512. */
21861 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
21862 && ((TARGET_EVEX512
21863 && GET_MODE_SIZE (mode) == 64)
21864 || (TARGET_AVX512VL
21865 && (GET_MODE_SIZE (mode) == 32
21866 || GET_MODE_SIZE (mode) == 16))))
21867 {
21868 rtx right = GET_CODE (XEXP (x, 1)) != NOT
21869 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
21870
21871 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21872 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21873 outer_code, opno, speed)
21874 + rtx_cost (right, mode, outer_code, opno, speed);
21875 return true;
21876 }
21877 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21878 }
21879 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21880 *total = cost->add * 2;
21881 else
21882 *total = cost->add;
21883 return false;
21884
21885 case XOR:
21886 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21887 || SSE_FLOAT_MODE_P (mode))
21888 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21889 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21890 *total = cost->add * 2;
21891 else
21892 *total = cost->add;
21893 return false;
21894
21895 case AND:
21896 if (address_no_seg_operand (x, mode))
21897 {
21898 *total = cost->lea;
21899 return true;
21900 }
21901 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21902 || SSE_FLOAT_MODE_P (mode))
21903 {
21904 /* pandn is a single instruction. */
21905 if (GET_CODE (XEXP (x, 0)) == NOT)
21906 {
21907 rtx right = XEXP (x, 1);
21908
21909 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21910 if (GET_CODE (right) == NOT && TARGET_AVX512F
21911 && ((TARGET_EVEX512
21912 && GET_MODE_SIZE (mode) == 64)
21913 || (TARGET_AVX512VL
21914 && (GET_MODE_SIZE (mode) == 32
21915 || GET_MODE_SIZE (mode) == 16))))
21916 right = XEXP (right, 0);
21917
21918 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21919 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21920 outer_code, opno, speed)
21921 + rtx_cost (right, mode, outer_code, opno, speed);
21922 return true;
21923 }
21924 else if (GET_CODE (XEXP (x, 1)) == NOT)
21925 {
21926 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21927 + rtx_cost (XEXP (x, 0), mode,
21928 outer_code, opno, speed)
21929 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21930 outer_code, opno, speed);
21931 return true;
21932 }
21933 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21934 }
21935 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21936 {
21937 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21938 {
21939 *total = cost->add * 2
21940 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21941 outer_code, opno, speed)
21942 + rtx_cost (XEXP (x, 1), mode,
21943 outer_code, opno, speed);
21944 return true;
21945 }
21946 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
21947 {
21948 *total = cost->add * 2
21949 + rtx_cost (XEXP (x, 0), mode,
21950 outer_code, opno, speed)
21951 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21952 outer_code, opno, speed);
21953 return true;
21954 }
21955 *total = cost->add * 2;
21956 }
21957 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21958 {
21959 *total = cost->add
21960 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21961 outer_code, opno, speed)
21962 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
21963 return true;
21964 }
21965 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
21966 {
21967 *total = cost->add
21968 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
21969 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21970 outer_code, opno, speed);
21971 return true;
21972 }
21973 else
21974 *total = cost->add;
21975 return false;
21976
21977 case NOT:
21978 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21979 {
21980 /* (not (xor ...)) can be a single insn in AVX512. */
21981 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
21982 && ((TARGET_EVEX512
21983 && GET_MODE_SIZE (mode) == 64)
21984 || (TARGET_AVX512VL
21985 && (GET_MODE_SIZE (mode) == 32
21986 || GET_MODE_SIZE (mode) == 16))))
21987 {
21988 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21989 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21990 outer_code, opno, speed)
21991 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21992 outer_code, opno, speed);
21993 return true;
21994 }
21995
21996 // vnot is pxor -1.
21997 *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1;
21998 }
21999 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22000 *total = cost->add * 2;
22001 else
22002 *total = cost->add;
22003 return false;
22004
22005 case NEG:
22006 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22007 *total = cost->sse_op;
22008 else if (X87_FLOAT_MODE_P (mode))
22009 *total = cost->fchs;
22010 else if (FLOAT_MODE_P (mode))
22011 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22012 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22013 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22014 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22015 *total = cost->add * 3;
22016 else
22017 *total = cost->add;
22018 return false;
22019
22020 case COMPARE:
22021 rtx op0, op1;
22022 op0 = XEXP (x, 0);
22023 op1 = XEXP (x, 1);
22024 if (GET_CODE (op0) == ZERO_EXTRACT
22025 && XEXP (op0, 1) == const1_rtx
22026 && CONST_INT_P (XEXP (op0, 2))
22027 && op1 == const0_rtx)
22028 {
22029 /* This kind of construct is implemented using test[bwl].
22030 Treat it as if we had an AND. */
22031 mode = GET_MODE (XEXP (op0, 0));
22032 *total = (cost->add
22033 + rtx_cost (XEXP (op0, 0), mode, outer_code,
22034 opno, speed)
22035 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22036 return true;
22037 }
22038
22039 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22040 {
22041 /* This is an overflow detection, count it as a normal compare. */
22042 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22043 return true;
22044 }
22045
22046 rtx geu;
22047 /* Match x
22048 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22049 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22050 if (mode == CCCmode
22051 && GET_CODE (op0) == NEG
22052 && GET_CODE (geu = XEXP (op0, 0)) == GEU
22053 && REG_P (XEXP (geu, 0))
22054 && (GET_MODE (XEXP (geu, 0)) == CCCmode
22055 || GET_MODE (XEXP (geu, 0)) == CCmode)
22056 && REGNO (XEXP (geu, 0)) == FLAGS_REG
22057 && XEXP (geu, 1) == const0_rtx
22058 && GET_CODE (op1) == LTU
22059 && REG_P (XEXP (op1, 0))
22060 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22061 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22062 && XEXP (op1, 1) == const0_rtx)
22063 {
22064 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22065 *total = 0;
22066 return true;
22067 }
22068 /* Match x
22069 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22070 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22071 if (mode == CCCmode
22072 && GET_CODE (op0) == NEG
22073 && GET_CODE (XEXP (op0, 0)) == LTU
22074 && REG_P (XEXP (XEXP (op0, 0), 0))
22075 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22076 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22077 && XEXP (XEXP (op0, 0), 1) == const0_rtx
22078 && GET_CODE (op1) == GEU
22079 && REG_P (XEXP (op1, 0))
22080 && GET_MODE (XEXP (op1, 0)) == CCCmode
22081 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22082 && XEXP (op1, 1) == const0_rtx)
22083 {
22084 /* This is *x86_cmc. */
22085 if (!speed)
22086 *total = COSTS_N_BYTES (1);
22087 else if (TARGET_SLOW_STC)
22088 *total = COSTS_N_INSNS (2);
22089 else
22090 *total = COSTS_N_INSNS (1);
22091 return true;
22092 }
22093
22094 if (SCALAR_INT_MODE_P (GET_MODE (op0))
22095 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22096 {
22097 if (op1 == const0_rtx)
22098 *total = cost->add
22099 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22100 else
22101 *total = 3*cost->add
22102 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
22103 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
22104 return true;
22105 }
22106
22107 /* The embedded comparison operand is completely free. */
22108 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
22109 *total = 0;
22110
22111 return false;
22112
22113 case FLOAT_EXTEND:
22114 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22115 *total = 0;
22116 else
22117 *total = ix86_vec_cost (mode, cost: cost->addss);
22118 return false;
22119
22120 case FLOAT_TRUNCATE:
22121 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22122 *total = cost->fadd;
22123 else
22124 *total = ix86_vec_cost (mode, cost: cost->addss);
22125 return false;
22126
22127 case ABS:
22128 /* SSE requires memory load for the constant operand. It may make
22129 sense to account for this. Of course the constant operand may or
22130 may not be reused. */
22131 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22132 *total = cost->sse_op;
22133 else if (X87_FLOAT_MODE_P (mode))
22134 *total = cost->fabs;
22135 else if (FLOAT_MODE_P (mode))
22136 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22137 return false;
22138
22139 case SQRT:
22140 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22141 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
22142 else if (X87_FLOAT_MODE_P (mode))
22143 *total = cost->fsqrt;
22144 else if (FLOAT_MODE_P (mode))
22145 *total = ix86_vec_cost (mode,
22146 cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd);
22147 return false;
22148
22149 case UNSPEC:
22150 if (XINT (x, 1) == UNSPEC_TP)
22151 *total = 0;
22152 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
22153 {
22154 *total = cost->sse_op;
22155 return true;
22156 }
22157 else if (XINT (x, 1) == UNSPEC_PTEST)
22158 {
22159 *total = cost->sse_op;
22160 rtx test_op0 = XVECEXP (x, 0, 0);
22161 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
22162 return false;
22163 if (GET_CODE (test_op0) == AND)
22164 {
22165 rtx and_op0 = XEXP (test_op0, 0);
22166 if (GET_CODE (and_op0) == NOT)
22167 and_op0 = XEXP (and_op0, 0);
22168 *total += rtx_cost (and_op0, GET_MODE (and_op0),
22169 AND, 0, speed)
22170 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
22171 AND, 1, speed);
22172 }
22173 else
22174 *total = rtx_cost (test_op0, GET_MODE (test_op0),
22175 UNSPEC, 0, speed);
22176 return true;
22177 }
22178 return false;
22179
22180 case VEC_SELECT:
22181 case VEC_CONCAT:
22182 case VEC_DUPLICATE:
22183 /* ??? Assume all of these vector manipulation patterns are
22184 recognizable. In which case they all pretty much have the
22185 same cost. */
22186 *total = cost->sse_op;
22187 return true;
22188 case VEC_MERGE:
22189 mask = XEXP (x, 2);
22190 /* This is masked instruction, assume the same cost,
22191 as nonmasked variant. */
22192 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
22193 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
22194 else
22195 *total = cost->sse_op;
22196 return true;
22197
22198 case MEM:
22199 /* An insn that accesses memory is slightly more expensive
22200 than one that does not. */
22201 if (speed)
22202 *total += 1;
22203 return false;
22204
22205 case ZERO_EXTRACT:
22206 if (XEXP (x, 1) == const1_rtx
22207 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
22208 && GET_MODE (XEXP (x, 2)) == SImode
22209 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
22210 {
22211 /* Ignore cost of zero extension and masking of last argument. */
22212 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22213 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22214 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
22215 return true;
22216 }
22217 return false;
22218
22219 case IF_THEN_ELSE:
22220 if (TARGET_XOP
22221 && VECTOR_MODE_P (mode)
22222 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
22223 {
22224 /* vpcmov. */
22225 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
22226 if (!REG_P (XEXP (x, 0)))
22227 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22228 if (!REG_P (XEXP (x, 1)))
22229 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22230 if (!REG_P (XEXP (x, 2)))
22231 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22232 return true;
22233 }
22234 else if (TARGET_CMOVE
22235 && SCALAR_INT_MODE_P (mode)
22236 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22237 {
22238 /* cmov. */
22239 *total = COSTS_N_INSNS (1);
22240 if (!REG_P (XEXP (x, 0)))
22241 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22242 if (!REG_P (XEXP (x, 1)))
22243 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22244 if (!REG_P (XEXP (x, 2)))
22245 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22246 return true;
22247 }
22248 return false;
22249
22250 default:
22251 return false;
22252 }
22253}
22254
22255#if TARGET_MACHO
22256
22257static int current_machopic_label_num;
22258
22259/* Given a symbol name and its associated stub, write out the
22260 definition of the stub. */
22261
22262void
22263machopic_output_stub (FILE *file, const char *symb, const char *stub)
22264{
22265 unsigned int length;
22266 char *binder_name, *symbol_name, lazy_ptr_name[32];
22267 int label = ++current_machopic_label_num;
22268
22269 /* For 64-bit we shouldn't get here. */
22270 gcc_assert (!TARGET_64BIT);
22271
22272 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22273 symb = targetm.strip_name_encoding (symb);
22274
22275 length = strlen (stub);
22276 binder_name = XALLOCAVEC (char, length + 32);
22277 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22278
22279 length = strlen (symb);
22280 symbol_name = XALLOCAVEC (char, length + 32);
22281 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22282
22283 sprintf (lazy_ptr_name, "L%d$lz", label);
22284
22285 if (MACHOPIC_ATT_STUB)
22286 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
22287 else if (MACHOPIC_PURE)
22288 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
22289 else
22290 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22291
22292 fprintf (file, "%s:\n", stub);
22293 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22294
22295 if (MACHOPIC_ATT_STUB)
22296 {
22297 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
22298 }
22299 else if (MACHOPIC_PURE)
22300 {
22301 /* PIC stub. */
22302 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22303 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22304 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
22305 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
22306 label, lazy_ptr_name, label);
22307 fprintf (file, "\tjmp\t*%%ecx\n");
22308 }
22309 else
22310 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22311
22312 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
22313 it needs no stub-binding-helper. */
22314 if (MACHOPIC_ATT_STUB)
22315 return;
22316
22317 fprintf (file, "%s:\n", binder_name);
22318
22319 if (MACHOPIC_PURE)
22320 {
22321 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
22322 fprintf (file, "\tpushl\t%%ecx\n");
22323 }
22324 else
22325 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22326
22327 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
22328
22329 /* N.B. Keep the correspondence of these
22330 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
22331 old-pic/new-pic/non-pic stubs; altering this will break
22332 compatibility with existing dylibs. */
22333 if (MACHOPIC_PURE)
22334 {
22335 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22336 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
22337 }
22338 else
22339 /* 16-byte -mdynamic-no-pic stub. */
22340 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
22341
22342 fprintf (file, "%s:\n", lazy_ptr_name);
22343 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22344 fprintf (file, ASM_LONG "%s\n", binder_name);
22345}
22346#endif /* TARGET_MACHO */
22347
22348/* Order the registers for register allocator. */
22349
22350void
22351x86_order_regs_for_local_alloc (void)
22352{
22353 int pos = 0;
22354 int i;
22355
22356 /* First allocate the local general purpose registers. */
22357 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22358 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i))
22359 reg_alloc_order [pos++] = i;
22360
22361 /* Global general purpose registers. */
22362 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22363 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i))
22364 reg_alloc_order [pos++] = i;
22365
22366 /* x87 registers come first in case we are doing FP math
22367 using them. */
22368 if (!TARGET_SSE_MATH)
22369 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22370 reg_alloc_order [pos++] = i;
22371
22372 /* SSE registers. */
22373 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22374 reg_alloc_order [pos++] = i;
22375 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22376 reg_alloc_order [pos++] = i;
22377
22378 /* Extended REX SSE registers. */
22379 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
22380 reg_alloc_order [pos++] = i;
22381
22382 /* Mask register. */
22383 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
22384 reg_alloc_order [pos++] = i;
22385
22386 /* x87 registers. */
22387 if (TARGET_SSE_MATH)
22388 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22389 reg_alloc_order [pos++] = i;
22390
22391 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22392 reg_alloc_order [pos++] = i;
22393
22394 /* Initialize the rest of array as we do not allocate some registers
22395 at all. */
22396 while (pos < FIRST_PSEUDO_REGISTER)
22397 reg_alloc_order [pos++] = 0;
22398}
22399
22400static bool
22401ix86_ms_bitfield_layout_p (const_tree record_type)
22402{
22403 return ((TARGET_MS_BITFIELD_LAYOUT
22404 && !lookup_attribute (attr_name: "gcc_struct", TYPE_ATTRIBUTES (record_type)))
22405 || lookup_attribute (attr_name: "ms_struct", TYPE_ATTRIBUTES (record_type)));
22406}
22407
22408/* Returns an expression indicating where the this parameter is
22409 located on entry to the FUNCTION. */
22410
22411static rtx
22412x86_this_parameter (tree function)
22413{
22414 tree type = TREE_TYPE (function);
22415 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22416 int nregs;
22417
22418 if (TARGET_64BIT)
22419 {
22420 const int *parm_regs;
22421
22422 if (ix86_function_type_abi (fntype: type) == MS_ABI)
22423 parm_regs = x86_64_ms_abi_int_parameter_registers;
22424 else
22425 parm_regs = x86_64_int_parameter_registers;
22426 return gen_rtx_REG (Pmode, parm_regs[aggr]);
22427 }
22428
22429 nregs = ix86_function_regparm (type, decl: function);
22430
22431 if (nregs > 0 && !stdarg_p (type))
22432 {
22433 int regno;
22434 unsigned int ccvt = ix86_get_callcvt (type);
22435
22436 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22437 regno = aggr ? DX_REG : CX_REG;
22438 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22439 {
22440 regno = CX_REG;
22441 if (aggr)
22442 return gen_rtx_MEM (SImode,
22443 plus_constant (Pmode, stack_pointer_rtx, 4));
22444 }
22445 else
22446 {
22447 regno = AX_REG;
22448 if (aggr)
22449 {
22450 regno = DX_REG;
22451 if (nregs == 1)
22452 return gen_rtx_MEM (SImode,
22453 plus_constant (Pmode,
22454 stack_pointer_rtx, 4));
22455 }
22456 }
22457 return gen_rtx_REG (SImode, regno);
22458 }
22459
22460 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
22461 aggr ? 8 : 4));
22462}
22463
22464/* Determine whether x86_output_mi_thunk can succeed. */
22465
22466static bool
22467x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
22468 const_tree function)
22469{
22470 /* 64-bit can handle anything. */
22471 if (TARGET_64BIT)
22472 return true;
22473
22474 /* For 32-bit, everything's fine if we have one free register. */
22475 if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3)
22476 return true;
22477
22478 /* Need a free register for vcall_offset. */
22479 if (vcall_offset)
22480 return false;
22481
22482 /* Need a free register for GOT references. */
22483 if (flag_pic && !targetm.binds_local_p (function))
22484 return false;
22485
22486 /* Otherwise ok. */
22487 return true;
22488}
22489
22490/* Output the assembler code for a thunk function. THUNK_DECL is the
22491 declaration for the thunk function itself, FUNCTION is the decl for
22492 the target function. DELTA is an immediate constant offset to be
22493 added to THIS. If VCALL_OFFSET is nonzero, the word at
22494 *(*this + vcall_offset) should be added to THIS. */
22495
22496static void
22497x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
22498 HOST_WIDE_INT vcall_offset, tree function)
22499{
22500 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
22501 rtx this_param = x86_this_parameter (function);
22502 rtx this_reg, tmp, fnaddr;
22503 unsigned int tmp_regno;
22504 rtx_insn *insn;
22505 int saved_flag_force_indirect_call = flag_force_indirect_call;
22506
22507 if (TARGET_64BIT)
22508 tmp_regno = R10_REG;
22509 else
22510 {
22511 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
22512 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22513 tmp_regno = AX_REG;
22514 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22515 tmp_regno = DX_REG;
22516 else
22517 tmp_regno = CX_REG;
22518
22519 if (flag_pic)
22520 flag_force_indirect_call = 0;
22521 }
22522
22523 emit_note (NOTE_INSN_PROLOGUE_END);
22524
22525 /* CET is enabled, insert EB instruction. */
22526 if ((flag_cf_protection & CF_BRANCH))
22527 emit_insn (gen_nop_endbr ());
22528
22529 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22530 pull it in now and let DELTA benefit. */
22531 if (REG_P (this_param))
22532 this_reg = this_param;
22533 else if (vcall_offset)
22534 {
22535 /* Put the this parameter into %eax. */
22536 this_reg = gen_rtx_REG (Pmode, AX_REG);
22537 emit_move_insn (this_reg, this_param);
22538 }
22539 else
22540 this_reg = NULL_RTX;
22541
22542 /* Adjust the this parameter by a fixed constant. */
22543 if (delta)
22544 {
22545 rtx delta_rtx = GEN_INT (delta);
22546 rtx delta_dst = this_reg ? this_reg : this_param;
22547
22548 if (TARGET_64BIT)
22549 {
22550 if (!x86_64_general_operand (delta_rtx, Pmode))
22551 {
22552 tmp = gen_rtx_REG (Pmode, tmp_regno);
22553 emit_move_insn (tmp, delta_rtx);
22554 delta_rtx = tmp;
22555 }
22556 }
22557
22558 ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx);
22559 }
22560
22561 /* Adjust the this parameter by a value stored in the vtable. */
22562 if (vcall_offset)
22563 {
22564 rtx vcall_addr, vcall_mem, this_mem;
22565
22566 tmp = gen_rtx_REG (Pmode, tmp_regno);
22567
22568 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
22569 if (Pmode != ptr_mode)
22570 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
22571 emit_move_insn (tmp, this_mem);
22572
22573 /* Adjust the this parameter. */
22574 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
22575 if (TARGET_64BIT
22576 && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true))
22577 {
22578 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
22579 emit_move_insn (tmp2, GEN_INT (vcall_offset));
22580 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
22581 }
22582
22583 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
22584 if (Pmode != ptr_mode)
22585 emit_insn (gen_addsi_1_zext (this_reg,
22586 gen_rtx_REG (ptr_mode,
22587 REGNO (this_reg)),
22588 vcall_mem));
22589 else
22590 ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem);
22591 }
22592
22593 /* If necessary, drop THIS back to its stack slot. */
22594 if (this_reg && this_reg != this_param)
22595 emit_move_insn (this_param, this_reg);
22596
22597 fnaddr = XEXP (DECL_RTL (function), 0);
22598 if (TARGET_64BIT)
22599 {
22600 if (!flag_pic || targetm.binds_local_p (function)
22601 || TARGET_PECOFF)
22602 ;
22603 else
22604 {
22605 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
22606 tmp = gen_rtx_CONST (Pmode, tmp);
22607 fnaddr = gen_const_mem (Pmode, tmp);
22608 }
22609 }
22610 else
22611 {
22612 if (!flag_pic || targetm.binds_local_p (function))
22613 ;
22614#if TARGET_MACHO
22615 else if (TARGET_MACHO)
22616 {
22617 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
22618 fnaddr = XEXP (fnaddr, 0);
22619 }
22620#endif /* TARGET_MACHO */
22621 else
22622 {
22623 tmp = gen_rtx_REG (Pmode, CX_REG);
22624 output_set_got (dest: tmp, NULL_RTX);
22625
22626 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
22627 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
22628 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
22629 fnaddr = gen_const_mem (Pmode, fnaddr);
22630 }
22631 }
22632
22633 /* Our sibling call patterns do not allow memories, because we have no
22634 predicate that can distinguish between frame and non-frame memory.
22635 For our purposes here, we can get away with (ab)using a jump pattern,
22636 because we're going to do no optimization. */
22637 if (MEM_P (fnaddr))
22638 {
22639 if (sibcall_insn_operand (fnaddr, word_mode))
22640 {
22641 fnaddr = XEXP (DECL_RTL (function), 0);
22642 tmp = gen_rtx_MEM (QImode, fnaddr);
22643 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22644 tmp = emit_call_insn (tmp);
22645 SIBLING_CALL_P (tmp) = 1;
22646 }
22647 else
22648 emit_jump_insn (gen_indirect_jump (fnaddr));
22649 }
22650 else
22651 {
22652 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
22653 {
22654 // CM_LARGE_PIC always uses pseudo PIC register which is
22655 // uninitialized. Since FUNCTION is local and calling it
22656 // doesn't go through PLT, we use scratch register %r11 as
22657 // PIC register and initialize it here.
22658 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
22659 ix86_init_large_pic_reg (tmp_regno);
22660 fnaddr = legitimize_pic_address (orig: fnaddr,
22661 reg: gen_rtx_REG (Pmode, tmp_regno));
22662 }
22663
22664 if (!sibcall_insn_operand (fnaddr, word_mode))
22665 {
22666 tmp = gen_rtx_REG (word_mode, tmp_regno);
22667 if (GET_MODE (fnaddr) != word_mode)
22668 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
22669 emit_move_insn (tmp, fnaddr);
22670 fnaddr = tmp;
22671 }
22672
22673 tmp = gen_rtx_MEM (QImode, fnaddr);
22674 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22675 tmp = emit_call_insn (tmp);
22676 SIBLING_CALL_P (tmp) = 1;
22677 }
22678 emit_barrier ();
22679
22680 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22681 insn = get_insns ();
22682 shorten_branches (insn);
22683 assemble_start_function (thunk_fndecl, fnname);
22684 final_start_function (insn, file, 1);
22685 final (insn, file, 1);
22686 final_end_function ();
22687 assemble_end_function (thunk_fndecl, fnname);
22688
22689 flag_force_indirect_call = saved_flag_force_indirect_call;
22690}
22691
22692static void
22693x86_file_start (void)
22694{
22695 default_file_start ();
22696 if (TARGET_16BIT)
22697 fputs (s: "\t.code16gcc\n", stream: asm_out_file);
22698#if TARGET_MACHO
22699 darwin_file_start ();
22700#endif
22701 if (X86_FILE_START_VERSION_DIRECTIVE)
22702 fputs (s: "\t.version\t\"01.01\"\n", stream: asm_out_file);
22703 if (X86_FILE_START_FLTUSED)
22704 fputs (s: "\t.global\t__fltused\n", stream: asm_out_file);
22705 if (ix86_asm_dialect == ASM_INTEL)
22706 fputs (s: "\t.intel_syntax noprefix\n", stream: asm_out_file);
22707}
22708
22709int
22710x86_field_alignment (tree type, int computed)
22711{
22712 machine_mode mode;
22713
22714 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22715 return computed;
22716 if (TARGET_IAMCU)
22717 return iamcu_alignment (type, align: computed);
22718 type = strip_array_types (type);
22719 mode = TYPE_MODE (type);
22720 if (mode == DFmode || mode == DCmode
22721 || GET_MODE_CLASS (mode) == MODE_INT
22722 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22723 {
22724 if (TYPE_ATOMIC (type) && computed > 32)
22725 {
22726 static bool warned;
22727
22728 if (!warned && warn_psabi)
22729 {
22730 const char *url
22731 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
22732
22733 warned = true;
22734 inform (input_location, "the alignment of %<_Atomic %T%> "
22735 "fields changed in %{GCC 11.1%}",
22736 TYPE_MAIN_VARIANT (type), url);
22737 }
22738 }
22739 else
22740 return MIN (32, computed);
22741 }
22742 return computed;
22743}
22744
22745/* Print call to TARGET to FILE. */
22746
22747static void
22748x86_print_call_or_nop (FILE *file, const char *target)
22749{
22750 if (flag_nop_mcount || !strcmp (s1: target, s2: "nop"))
22751 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22752 fprintf (stream: file, format: "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22753 else
22754 fprintf (stream: file, format: "1:\tcall\t%s\n", target);
22755}
22756
22757static bool
22758current_fentry_name (const char **name)
22759{
22760 tree attr = lookup_attribute (attr_name: "fentry_name",
22761 DECL_ATTRIBUTES (current_function_decl));
22762 if (!attr)
22763 return false;
22764 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22765 return true;
22766}
22767
22768static bool
22769current_fentry_section (const char **name)
22770{
22771 tree attr = lookup_attribute (attr_name: "fentry_section",
22772 DECL_ATTRIBUTES (current_function_decl));
22773 if (!attr)
22774 return false;
22775 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22776 return true;
22777}
22778
22779/* Return a caller-saved register which isn't live or a callee-saved
22780 register which has been saved on stack in the prologue at entry for
22781 profile. */
22782
22783static int
22784x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
22785{
22786 /* Use %r10 if the profiler is emitted before the prologue or it isn't
22787 used by DRAP. */
22788 if (ix86_profile_before_prologue ()
22789 || !crtl->drap_reg
22790 || REGNO (crtl->drap_reg) != R10_REG)
22791 return R10_REG;
22792
22793 /* The profiler is emitted after the prologue. If there is a
22794 caller-saved register which isn't live or a callee-saved
22795 register saved on stack in the prologue, use it. */
22796
22797 bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
22798
22799 int i;
22800 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22801 if (GENERAL_REGNO_P (i)
22802 && i != R10_REG
22803#ifdef NO_PROFILE_COUNTERS
22804 && (r11_ok || i != R11_REG)
22805#else
22806 && i != R11_REG
22807#endif
22808 && TEST_HARD_REG_BIT (accessible_reg_set, bit: i)
22809 && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true)
22810 || (call_used_regs[i]
22811 && !fixed_regs[i]
22812 && !REGNO_REG_SET_P (reg_live, i))))
22813 return i;
22814
22815 sorry ("no register available for profiling %<-mcmodel=large%s%>",
22816 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
22817
22818 return R10_REG;
22819}
22820
22821/* Output assembler code to FILE to increment profiler label # LABELNO
22822 for profiling a function entry. */
22823void
22824x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22825{
22826 if (cfun->machine->insn_queued_at_entrance)
22827 {
22828 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
22829 fprintf (stream: file, format: "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
22830 unsigned int patch_area_size
22831 = crtl->patch_area_size - crtl->patch_area_entry;
22832 if (patch_area_size)
22833 ix86_output_patchable_area (patch_area_size,
22834 crtl->patch_area_entry == 0);
22835 }
22836
22837 const char *mcount_name = MCOUNT_NAME;
22838
22839 if (current_fentry_name (name: &mcount_name))
22840 ;
22841 else if (fentry_name)
22842 mcount_name = fentry_name;
22843 else if (flag_fentry)
22844 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
22845
22846 if (TARGET_64BIT)
22847 {
22848#ifndef NO_PROFILE_COUNTERS
22849 if (ASSEMBLER_DIALECT == ASM_INTEL)
22850 fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
22851 else
22852 fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
22853#endif
22854
22855 int scratch;
22856 const char *reg;
22857 char legacy_reg[4] = { 0 };
22858
22859 if (!TARGET_PECOFF)
22860 {
22861 switch (ix86_cmodel)
22862 {
22863 case CM_LARGE:
22864 scratch = x86_64_select_profile_regnum (r11_ok: true);
22865 reg = hi_reg_name[scratch];
22866 if (LEGACY_INT_REGNO_P (scratch))
22867 {
22868 legacy_reg[0] = 'r';
22869 legacy_reg[1] = reg[0];
22870 legacy_reg[2] = reg[1];
22871 reg = legacy_reg;
22872 }
22873 if (ASSEMBLER_DIALECT == ASM_INTEL)
22874 fprintf (stream: file, format: "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
22875 "\tcall\t%s\n", reg, mcount_name, reg);
22876 else
22877 fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
22878 mcount_name, reg, reg);
22879 break;
22880 case CM_LARGE_PIC:
22881#ifdef NO_PROFILE_COUNTERS
22882 scratch = x86_64_select_profile_regnum (r11_ok: false);
22883 reg = hi_reg_name[scratch];
22884 if (LEGACY_INT_REGNO_P (scratch))
22885 {
22886 legacy_reg[0] = 'r';
22887 legacy_reg[1] = reg[0];
22888 legacy_reg[2] = reg[1];
22889 reg = legacy_reg;
22890 }
22891 if (ASSEMBLER_DIALECT == ASM_INTEL)
22892 {
22893 fprintf (stream: file, format: "1:movabs\tr11, "
22894 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
22895 fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n", reg);
22896 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
22897 fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
22898 mcount_name);
22899 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
22900 fprintf (stream: file, format: "\tcall\t%s\n", reg);
22901 break;
22902 }
22903 fprintf (stream: file,
22904 format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22905 fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n", reg);
22906 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
22907 fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
22908 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
22909 fprintf (stream: file, format: "\tcall\t*%%%s\n", reg);
22910#else
22911 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22912#endif
22913 break;
22914 case CM_SMALL_PIC:
22915 case CM_MEDIUM_PIC:
22916 if (!ix86_direct_extern_access)
22917 {
22918 if (ASSEMBLER_DIALECT == ASM_INTEL)
22919 fprintf (stream: file, format: "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
22920 mcount_name);
22921 else
22922 fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
22923 mcount_name);
22924 break;
22925 }
22926 /* fall through */
22927 default:
22928 x86_print_call_or_nop (file, target: mcount_name);
22929 break;
22930 }
22931 }
22932 else
22933 x86_print_call_or_nop (file, target: mcount_name);
22934 }
22935 else if (flag_pic)
22936 {
22937#ifndef NO_PROFILE_COUNTERS
22938 if (ASSEMBLER_DIALECT == ASM_INTEL)
22939 fprintf (file,
22940 "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
22941 LPREFIX, labelno);
22942 else
22943 fprintf (file,
22944 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
22945 LPREFIX, labelno);
22946#endif
22947 if (ASSEMBLER_DIALECT == ASM_INTEL)
22948 fprintf (stream: file, format: "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
22949 else
22950 fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
22951 }
22952 else
22953 {
22954#ifndef NO_PROFILE_COUNTERS
22955 if (ASSEMBLER_DIALECT == ASM_INTEL)
22956 fprintf (file,
22957 "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
22958 LPREFIX, labelno);
22959 else
22960 fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
22961 LPREFIX, labelno);
22962#endif
22963 x86_print_call_or_nop (file, target: mcount_name);
22964 }
22965
22966 if (flag_record_mcount
22967 || lookup_attribute (attr_name: "fentry_section",
22968 DECL_ATTRIBUTES (current_function_decl)))
22969 {
22970 const char *sname = "__mcount_loc";
22971
22972 if (current_fentry_section (name: &sname))
22973 ;
22974 else if (fentry_section)
22975 sname = fentry_section;
22976
22977 fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n", sname);
22978 fprintf (stream: file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
22979 fprintf (stream: file, format: "\t.previous\n");
22980 }
22981}
22982
22983/* We don't have exact information about the insn sizes, but we may assume
22984 quite safely that we are informed about all 1 byte insns and memory
22985 address sizes. This is enough to eliminate unnecessary padding in
22986 99% of cases. */
22987
22988int
22989ix86_min_insn_size (rtx_insn *insn)
22990{
22991 int l = 0, len;
22992
22993 if (!INSN_P (insn) || !active_insn_p (insn))
22994 return 0;
22995
22996 /* Discard alignments we've emit and jump instructions. */
22997 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22998 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22999 return 0;
23000
23001 /* Important case - calls are always 5 bytes.
23002 It is common to have many calls in the row. */
23003 if (CALL_P (insn)
23004 && symbolic_reference_mentioned_p (op: PATTERN (insn))
23005 && !SIBLING_CALL_P (insn))
23006 return 5;
23007 len = get_attr_length (insn);
23008 if (len <= 1)
23009 return 1;
23010
23011 /* For normal instructions we rely on get_attr_length being exact,
23012 with a few exceptions. */
23013 if (!JUMP_P (insn))
23014 {
23015 enum attr_type type = get_attr_type (insn);
23016
23017 switch (type)
23018 {
23019 case TYPE_MULTI:
23020 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
23021 || asm_noperands (PATTERN (insn)) >= 0)
23022 return 0;
23023 break;
23024 case TYPE_OTHER:
23025 case TYPE_FCMP:
23026 break;
23027 default:
23028 /* Otherwise trust get_attr_length. */
23029 return len;
23030 }
23031
23032 l = get_attr_length_address (insn);
23033 if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn)))
23034 l = 4;
23035 }
23036 if (l)
23037 return 1+l;
23038 else
23039 return 2;
23040}
23041
23042#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23043
23044/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23045 window. */
23046
23047static void
23048ix86_avoid_jump_mispredicts (void)
23049{
23050 rtx_insn *insn, *start = get_insns ();
23051 int nbytes = 0, njumps = 0;
23052 bool isjump = false;
23053
23054 /* Look for all minimal intervals of instructions containing 4 jumps.
23055 The intervals are bounded by START and INSN. NBYTES is the total
23056 size of instructions in the interval including INSN and not including
23057 START. When the NBYTES is smaller than 16 bytes, it is possible
23058 that the end of START and INSN ends up in the same 16byte page.
23059
23060 The smallest offset in the page INSN can start is the case where START
23061 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23062 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
23063
23064 Don't consider asm goto as jump, while it can contain a jump, it doesn't
23065 have to, control transfer to label(s) can be performed through other
23066 means, and also we estimate minimum length of all asm stmts as 0. */
23067 for (insn = start; insn; insn = NEXT_INSN (insn))
23068 {
23069 int min_size;
23070
23071 if (LABEL_P (insn))
23072 {
23073 align_flags alignment = label_to_alignment (insn);
23074 int align = alignment.levels[0].log;
23075 int max_skip = alignment.levels[0].maxskip;
23076
23077 if (max_skip > 15)
23078 max_skip = 15;
23079 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
23080 already in the current 16 byte page, because otherwise
23081 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
23082 bytes to reach 16 byte boundary. */
23083 if (align <= 0
23084 || (align <= 3 && max_skip != (1 << align) - 1))
23085 max_skip = 0;
23086 if (dump_file)
23087 fprintf (stream: dump_file, format: "Label %i with max_skip %i\n",
23088 INSN_UID (insn), max_skip);
23089 if (max_skip)
23090 {
23091 while (nbytes + max_skip >= 16)
23092 {
23093 start = NEXT_INSN (insn: start);
23094 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
23095 || CALL_P (start))
23096 njumps--, isjump = true;
23097 else
23098 isjump = false;
23099 nbytes -= ix86_min_insn_size (insn: start);
23100 }
23101 }
23102 continue;
23103 }
23104
23105 min_size = ix86_min_insn_size (insn);
23106 nbytes += min_size;
23107 if (dump_file)
23108 fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n",
23109 INSN_UID (insn), min_size);
23110 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
23111 || CALL_P (insn))
23112 njumps++;
23113 else
23114 continue;
23115
23116 while (njumps > 3)
23117 {
23118 start = NEXT_INSN (insn: start);
23119 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
23120 || CALL_P (start))
23121 njumps--, isjump = true;
23122 else
23123 isjump = false;
23124 nbytes -= ix86_min_insn_size (insn: start);
23125 }
23126 gcc_assert (njumps >= 0);
23127 if (dump_file)
23128 fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n",
23129 INSN_UID (insn: start), INSN_UID (insn), nbytes);
23130
23131 if (njumps == 3 && isjump && nbytes < 16)
23132 {
23133 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
23134
23135 if (dump_file)
23136 fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n",
23137 INSN_UID (insn), padsize);
23138 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
23139 }
23140 }
23141}
23142#endif
23143
23144/* AMD Athlon works faster
23145 when RET is not destination of conditional jump or directly preceded
23146 by other jump instruction. We avoid the penalty by inserting NOP just
23147 before the RET instructions in such cases. */
23148static void
23149ix86_pad_returns (void)
23150{
23151 edge e;
23152 edge_iterator ei;
23153
23154 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23155 {
23156 basic_block bb = e->src;
23157 rtx_insn *ret = BB_END (bb);
23158 rtx_insn *prev;
23159 bool replace = false;
23160
23161 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
23162 || optimize_bb_for_size_p (bb))
23163 continue;
23164 for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev))
23165 if (active_insn_p (prev) || LABEL_P (prev))
23166 break;
23167 if (prev && LABEL_P (prev))
23168 {
23169 edge e;
23170 edge_iterator ei;
23171
23172 FOR_EACH_EDGE (e, ei, bb->preds)
23173 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23174 && !(e->flags & EDGE_FALLTHRU))
23175 {
23176 replace = true;
23177 break;
23178 }
23179 }
23180 if (!replace)
23181 {
23182 prev = prev_active_insn (ret);
23183 if (prev
23184 && ((JUMP_P (prev) && any_condjump_p (prev))
23185 || CALL_P (prev)))
23186 replace = true;
23187 /* Empty functions get branch mispredict even when
23188 the jump destination is not visible to us. */
23189 if (!prev && !optimize_function_for_size_p (cfun))
23190 replace = true;
23191 }
23192 if (replace)
23193 {
23194 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
23195 delete_insn (ret);
23196 }
23197 }
23198}
23199
23200/* Count the minimum number of instructions in BB. Return 4 if the
23201 number of instructions >= 4. */
23202
23203static int
23204ix86_count_insn_bb (basic_block bb)
23205{
23206 rtx_insn *insn;
23207 int insn_count = 0;
23208
23209 /* Count number of instructions in this block. Return 4 if the number
23210 of instructions >= 4. */
23211 FOR_BB_INSNS (bb, insn)
23212 {
23213 /* Only happen in exit blocks. */
23214 if (JUMP_P (insn)
23215 && ANY_RETURN_P (PATTERN (insn)))
23216 break;
23217
23218 if (NONDEBUG_INSN_P (insn)
23219 && GET_CODE (PATTERN (insn)) != USE
23220 && GET_CODE (PATTERN (insn)) != CLOBBER)
23221 {
23222 insn_count++;
23223 if (insn_count >= 4)
23224 return insn_count;
23225 }
23226 }
23227
23228 return insn_count;
23229}
23230
23231
23232/* Count the minimum number of instructions in code path in BB.
23233 Return 4 if the number of instructions >= 4. */
23234
23235static int
23236ix86_count_insn (basic_block bb)
23237{
23238 edge e;
23239 edge_iterator ei;
23240 int min_prev_count;
23241
23242 /* Only bother counting instructions along paths with no
23243 more than 2 basic blocks between entry and exit. Given
23244 that BB has an edge to exit, determine if a predecessor
23245 of BB has an edge from entry. If so, compute the number
23246 of instructions in the predecessor block. If there
23247 happen to be multiple such blocks, compute the minimum. */
23248 min_prev_count = 4;
23249 FOR_EACH_EDGE (e, ei, bb->preds)
23250 {
23251 edge prev_e;
23252 edge_iterator prev_ei;
23253
23254 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23255 {
23256 min_prev_count = 0;
23257 break;
23258 }
23259 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
23260 {
23261 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23262 {
23263 int count = ix86_count_insn_bb (bb: e->src);
23264 if (count < min_prev_count)
23265 min_prev_count = count;
23266 break;
23267 }
23268 }
23269 }
23270
23271 if (min_prev_count < 4)
23272 min_prev_count += ix86_count_insn_bb (bb);
23273
23274 return min_prev_count;
23275}
23276
23277/* Pad short function to 4 instructions. */
23278
23279static void
23280ix86_pad_short_function (void)
23281{
23282 edge e;
23283 edge_iterator ei;
23284
23285 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23286 {
23287 rtx_insn *ret = BB_END (e->src);
23288 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
23289 {
23290 int insn_count = ix86_count_insn (bb: e->src);
23291
23292 /* Pad short function. */
23293 if (insn_count < 4)
23294 {
23295 rtx_insn *insn = ret;
23296
23297 /* Find epilogue. */
23298 while (insn
23299 && (!NOTE_P (insn)
23300 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
23301 insn = PREV_INSN (insn);
23302
23303 if (!insn)
23304 insn = ret;
23305
23306 /* Two NOPs count as one instruction. */
23307 insn_count = 2 * (4 - insn_count);
23308 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
23309 }
23310 }
23311 }
23312}
23313
23314/* Fix up a Windows system unwinder issue. If an EH region falls through into
23315 the epilogue, the Windows system unwinder will apply epilogue logic and
23316 produce incorrect offsets. This can be avoided by adding a nop between
23317 the last insn that can throw and the first insn of the epilogue. */
23318
23319static void
23320ix86_seh_fixup_eh_fallthru (void)
23321{
23322 edge e;
23323 edge_iterator ei;
23324
23325 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23326 {
23327 rtx_insn *insn, *next;
23328
23329 /* Find the beginning of the epilogue. */
23330 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
23331 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
23332 break;
23333 if (insn == NULL)
23334 continue;
23335
23336 /* We only care about preceding insns that can throw. */
23337 insn = prev_active_insn (insn);
23338 if (insn == NULL || !can_throw_internal (insn))
23339 continue;
23340
23341 /* Do not separate calls from their debug information. */
23342 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next))
23343 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
23344 insn = next;
23345 else
23346 break;
23347
23348 emit_insn_after (gen_nops (const1_rtx), insn);
23349 }
23350}
23351/* Split vector load from parm_decl to elemental loads to avoid STLF
23352 stalls. */
23353static void
23354ix86_split_stlf_stall_load ()
23355{
23356 rtx_insn* insn, *start = get_insns ();
23357 unsigned window = 0;
23358
23359 for (insn = start; insn; insn = NEXT_INSN (insn))
23360 {
23361 if (!NONDEBUG_INSN_P (insn))
23362 continue;
23363 window++;
23364 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
23365 other, just emulate for pipeline) before stalled load, stlf stall
23366 case is as fast as no stall cases on CLX.
23367 Since CFG is freed before machine_reorg, just do a rough
23368 calculation of the window according to the layout. */
23369 if (window > (unsigned) x86_stlf_window_ninsns)
23370 return;
23371
23372 if (any_uncondjump_p (insn)
23373 || ANY_RETURN_P (PATTERN (insn))
23374 || CALL_P (insn))
23375 return;
23376
23377 rtx set = single_set (insn);
23378 if (!set)
23379 continue;
23380 rtx src = SET_SRC (set);
23381 if (!MEM_P (src)
23382 /* Only handle V2DFmode load since it doesn't need any scratch
23383 register. */
23384 || GET_MODE (src) != E_V2DFmode
23385 || !MEM_EXPR (src)
23386 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
23387 continue;
23388
23389 rtx zero = CONST0_RTX (V2DFmode);
23390 rtx dest = SET_DEST (set);
23391 rtx m = adjust_address (src, DFmode, 0);
23392 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
23393 emit_insn_before (loadlpd, insn);
23394 m = adjust_address (src, DFmode, 8);
23395 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
23396 if (dump_file && (dump_flags & TDF_DETAILS))
23397 {
23398 fputs (s: "Due to potential STLF stall, split instruction:\n",
23399 stream: dump_file);
23400 print_rtl_single (dump_file, insn);
23401 fputs (s: "To:\n", stream: dump_file);
23402 print_rtl_single (dump_file, loadlpd);
23403 print_rtl_single (dump_file, loadhpd);
23404 }
23405 PATTERN (insn) = loadhpd;
23406 INSN_CODE (insn) = -1;
23407 gcc_assert (recog_memoized (insn) != -1);
23408 }
23409}
23410
23411/* Implement machine specific optimizations. We implement padding of returns
23412 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23413static void
23414ix86_reorg (void)
23415{
23416 /* We are freeing block_for_insn in the toplev to keep compatibility
23417 with old MDEP_REORGS that are not CFG based. Recompute it now. */
23418 compute_bb_for_insn ();
23419
23420 if (TARGET_SEH && current_function_has_exception_handlers ())
23421 ix86_seh_fixup_eh_fallthru ();
23422
23423 if (optimize && optimize_function_for_speed_p (cfun))
23424 {
23425 if (TARGET_SSE2)
23426 ix86_split_stlf_stall_load ();
23427 if (TARGET_PAD_SHORT_FUNCTION)
23428 ix86_pad_short_function ();
23429 else if (TARGET_PAD_RETURNS)
23430 ix86_pad_returns ();
23431#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23432 if (TARGET_FOUR_JUMP_LIMIT)
23433 ix86_avoid_jump_mispredicts ();
23434#endif
23435 }
23436}
23437
23438/* Return nonzero when QImode register that must be represented via REX prefix
23439 is used. */
23440bool
23441x86_extended_QIreg_mentioned_p (rtx_insn *insn)
23442{
23443 int i;
23444 extract_insn_cached (insn);
23445 for (i = 0; i < recog_data.n_operands; i++)
23446 if (GENERAL_REG_P (recog_data.operand[i])
23447 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
23448 return true;
23449 return false;
23450}
23451
23452/* Return true when INSN mentions register that must be encoded using REX
23453 prefix. */
23454bool
23455x86_extended_reg_mentioned_p (rtx insn)
23456{
23457 subrtx_iterator::array_type array;
23458 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23459 {
23460 const_rtx x = *iter;
23461 if (REG_P (x)
23462 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
23463 || REX2_INT_REGNO_P (REGNO (x))))
23464 return true;
23465 }
23466 return false;
23467}
23468
23469/* Return true when INSN mentions register that must be encoded using REX2
23470 prefix. */
23471bool
23472x86_extended_rex2reg_mentioned_p (rtx insn)
23473{
23474 subrtx_iterator::array_type array;
23475 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23476 {
23477 const_rtx x = *iter;
23478 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
23479 return true;
23480 }
23481 return false;
23482}
23483
23484/* Return true when rtx operands mentions register that must be encoded using
23485 evex prefix. */
23486bool
23487x86_evex_reg_mentioned_p (rtx operands[], int nops)
23488{
23489 int i;
23490 for (i = 0; i < nops; i++)
23491 if (EXT_REX_SSE_REG_P (operands[i])
23492 || x86_extended_rex2reg_mentioned_p (insn: operands[i]))
23493 return true;
23494 return false;
23495}
23496
23497/* If profitable, negate (without causing overflow) integer constant
23498 of mode MODE at location LOC. Return true in this case. */
23499bool
23500x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
23501{
23502 HOST_WIDE_INT val;
23503
23504 if (!CONST_INT_P (*loc))
23505 return false;
23506
23507 switch (mode)
23508 {
23509 case E_DImode:
23510 /* DImode x86_64 constants must fit in 32 bits. */
23511 gcc_assert (x86_64_immediate_operand (*loc, mode));
23512
23513 mode = SImode;
23514 break;
23515
23516 case E_SImode:
23517 case E_HImode:
23518 case E_QImode:
23519 break;
23520
23521 default:
23522 gcc_unreachable ();
23523 }
23524
23525 /* Avoid overflows. */
23526 if (mode_signbit_p (mode, *loc))
23527 return false;
23528
23529 val = INTVAL (*loc);
23530
23531 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
23532 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
23533 if ((val < 0 && val != -128)
23534 || val == 128)
23535 {
23536 *loc = GEN_INT (-val);
23537 return true;
23538 }
23539
23540 return false;
23541}
23542
23543/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23544 optabs would emit if we didn't have TFmode patterns. */
23545
23546void
23547x86_emit_floatuns (rtx operands[2])
23548{
23549 rtx_code_label *neglab, *donelab;
23550 rtx i0, i1, f0, in, out;
23551 machine_mode mode, inmode;
23552
23553 inmode = GET_MODE (operands[1]);
23554 gcc_assert (inmode == SImode || inmode == DImode);
23555
23556 out = operands[0];
23557 in = force_reg (inmode, operands[1]);
23558 mode = GET_MODE (out);
23559 neglab = gen_label_rtx ();
23560 donelab = gen_label_rtx ();
23561 f0 = gen_reg_rtx (mode);
23562
23563 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23564
23565 expand_float (out, in, 0);
23566
23567 emit_jump_insn (gen_jump (donelab));
23568 emit_barrier ();
23569
23570 emit_label (neglab);
23571
23572 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23573 1, OPTAB_DIRECT);
23574 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23575 1, OPTAB_DIRECT);
23576 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23577
23578 expand_float (f0, i0, 0);
23579
23580 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
23581
23582 emit_label (donelab);
23583}
23584
23585/* Return the diagnostic message string if conversion from FROMTYPE to
23586 TOTYPE is not allowed, NULL otherwise. */
23587
23588static const char *
23589ix86_invalid_conversion (const_tree fromtype, const_tree totype)
23590{
23591 machine_mode from_mode = element_mode (fromtype);
23592 machine_mode to_mode = element_mode (totype);
23593
23594 if (!TARGET_SSE2 && from_mode != to_mode)
23595 {
23596 /* Do no allow conversions to/from BFmode/HFmode scalar types
23597 when TARGET_SSE2 is not available. */
23598 if (from_mode == BFmode)
23599 return N_("invalid conversion from type %<__bf16%> "
23600 "without option %<-msse2%>");
23601 if (from_mode == HFmode)
23602 return N_("invalid conversion from type %<_Float16%> "
23603 "without option %<-msse2%>");
23604 if (to_mode == BFmode)
23605 return N_("invalid conversion to type %<__bf16%> "
23606 "without option %<-msse2%>");
23607 if (to_mode == HFmode)
23608 return N_("invalid conversion to type %<_Float16%> "
23609 "without option %<-msse2%>");
23610 }
23611
23612 /* Warn for silent implicit conversion between __bf16 and short,
23613 since __bfloat16 is refined as real __bf16 instead of short
23614 since GCC13. */
23615 if (element_mode (fromtype) != element_mode (totype)
23616 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
23617 {
23618 /* Warn for silent implicit conversion where user may expect
23619 a bitcast. */
23620 if ((TYPE_MODE (fromtype) == BFmode
23621 && TYPE_MODE (totype) == HImode)
23622 || (TYPE_MODE (totype) == BFmode
23623 && TYPE_MODE (fromtype) == HImode))
23624 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
23625 "to real %<__bf16%> since GCC 13.1, be careful of "
23626 "implicit conversion between %<__bf16%> and %<short%>; "
23627 "an explicit bitcast may be needed here");
23628 }
23629
23630 /* Conversion allowed. */
23631 return NULL;
23632}
23633
23634/* Return the diagnostic message string if the unary operation OP is
23635 not permitted on TYPE, NULL otherwise. */
23636
23637static const char *
23638ix86_invalid_unary_op (int op, const_tree type)
23639{
23640 machine_mode mmode = element_mode (type);
23641 /* Reject all single-operand operations on BFmode/HFmode except for &
23642 when TARGET_SSE2 is not available. */
23643 if (!TARGET_SSE2 && op != ADDR_EXPR)
23644 {
23645 if (mmode == BFmode)
23646 return N_("operation not permitted on type %<__bf16%> "
23647 "without option %<-msse2%>");
23648 if (mmode == HFmode)
23649 return N_("operation not permitted on type %<_Float16%> "
23650 "without option %<-msse2%>");
23651 }
23652
23653 /* Operation allowed. */
23654 return NULL;
23655}
23656
23657/* Return the diagnostic message string if the binary operation OP is
23658 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23659
23660static const char *
23661ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
23662 const_tree type2)
23663{
23664 machine_mode type1_mode = element_mode (type1);
23665 machine_mode type2_mode = element_mode (type2);
23666 /* Reject all 2-operand operations on BFmode or HFmode
23667 when TARGET_SSE2 is not available. */
23668 if (!TARGET_SSE2)
23669 {
23670 if (type1_mode == BFmode || type2_mode == BFmode)
23671 return N_("operation not permitted on type %<__bf16%> "
23672 "without option %<-msse2%>");
23673
23674 if (type1_mode == HFmode || type2_mode == HFmode)
23675 return N_("operation not permitted on type %<_Float16%> "
23676 "without option %<-msse2%>");
23677 }
23678
23679 /* Operation allowed. */
23680 return NULL;
23681}
23682
23683
23684/* Target hook for scalar_mode_supported_p. */
23685static bool
23686ix86_scalar_mode_supported_p (scalar_mode mode)
23687{
23688 if (DECIMAL_FLOAT_MODE_P (mode))
23689 return default_decimal_float_supported_p ();
23690 else if (mode == TFmode)
23691 return true;
23692 else if (mode == HFmode || mode == BFmode)
23693 return true;
23694 else
23695 return default_scalar_mode_supported_p (mode);
23696}
23697
23698/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
23699 if MODE is HFmode, and punt to the generic implementation otherwise. */
23700
23701static bool
23702ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23703{
23704 /* NB: Always return TRUE for HFmode so that the _Float16 type will
23705 be defined by the C front-end for AVX512FP16 intrinsics. We will
23706 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
23707 enabled. */
23708 return ((mode == HFmode || mode == BFmode)
23709 ? true
23710 : default_libgcc_floating_mode_supported_p (mode));
23711}
23712
23713/* Implements target hook vector_mode_supported_p. */
23714static bool
23715ix86_vector_mode_supported_p (machine_mode mode)
23716{
23717 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
23718 either. */
23719 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
23720 return false;
23721 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23722 return true;
23723 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23724 return true;
23725 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
23726 return true;
23727 if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
23728 return true;
23729 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
23730 && VALID_MMX_REG_MODE (mode))
23731 return true;
23732 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
23733 && VALID_MMX_REG_MODE_3DNOW (mode))
23734 return true;
23735 if (mode == V2QImode)
23736 return true;
23737 return false;
23738}
23739
23740/* Target hook for c_mode_for_suffix. */
23741static machine_mode
23742ix86_c_mode_for_suffix (char suffix)
23743{
23744 if (suffix == 'q')
23745 return TFmode;
23746 if (suffix == 'w')
23747 return XFmode;
23748
23749 return VOIDmode;
23750}
23751
23752/* Helper function to map common constraints to non-EGPR ones.
23753 All related constraints have h prefix, and h plus Upper letter
23754 means the constraint is strictly EGPR enabled, while h plus
23755 lower letter indicates the constraint is strictly gpr16 only.
23756
23757 Specially for "g" constraint, split it to rmi as there is
23758 no corresponding general constraint define for backend.
23759
23760 Here is the full list to map constraints that may involve
23761 gpr to h prefixed.
23762
23763 "g" -> "jrjmi"
23764 "r" -> "jr"
23765 "m" -> "jm"
23766 "<" -> "j<"
23767 ">" -> "j>"
23768 "o" -> "jo"
23769 "V" -> "jV"
23770 "p" -> "jp"
23771 "Bm" -> "ja"
23772*/
23773
23774static void map_egpr_constraints (vec<const char *> &constraints)
23775{
23776 for (size_t i = 0; i < constraints.length(); i++)
23777 {
23778 const char *cur = constraints[i];
23779
23780 if (startswith (str: cur, prefix: "=@cc"))
23781 continue;
23782
23783 int len = strlen (s: cur);
23784 auto_vec<char> buf;
23785
23786 for (int j = 0; j < len; j++)
23787 {
23788 switch (cur[j])
23789 {
23790 case 'g':
23791 buf.safe_push (obj: 'j');
23792 buf.safe_push (obj: 'r');
23793 buf.safe_push (obj: 'j');
23794 buf.safe_push (obj: 'm');
23795 buf.safe_push (obj: 'i');
23796 break;
23797 case 'r':
23798 case 'm':
23799 case '<':
23800 case '>':
23801 case 'o':
23802 case 'V':
23803 case 'p':
23804 buf.safe_push (obj: 'j');
23805 buf.safe_push (obj: cur[j]);
23806 break;
23807 case 'B':
23808 if (cur[j + 1] == 'm')
23809 {
23810 buf.safe_push (obj: 'j');
23811 buf.safe_push (obj: 'a');
23812 j++;
23813 }
23814 else
23815 {
23816 buf.safe_push (obj: cur[j]);
23817 buf.safe_push (obj: cur[j + 1]);
23818 j++;
23819 }
23820 break;
23821 case 'T':
23822 case 'Y':
23823 case 'W':
23824 case 'j':
23825 buf.safe_push (obj: cur[j]);
23826 buf.safe_push (obj: cur[j + 1]);
23827 j++;
23828 break;
23829 default:
23830 buf.safe_push (obj: cur[j]);
23831 break;
23832 }
23833 }
23834 buf.safe_push (obj: '\0');
23835 constraints[i] = xstrdup (buf.address ());
23836 }
23837}
23838
23839/* Worker function for TARGET_MD_ASM_ADJUST.
23840
23841 We implement asm flag outputs, and maintain source compatibility
23842 with the old cc0-based compiler. */
23843
23844static rtx_insn *
23845ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
23846 vec<machine_mode> & /*input_modes*/,
23847 vec<const char *> &constraints, vec<rtx> &/*uses*/,
23848 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
23849 location_t loc)
23850{
23851 bool saw_asm_flag = false;
23852
23853 start_sequence ();
23854
23855 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
23856 map_egpr_constraints (constraints);
23857
23858 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
23859 {
23860 const char *con = constraints[i];
23861 if (!startswith (str: con, prefix: "=@cc"))
23862 continue;
23863 con += 4;
23864 if (strchr (s: con, c: ',') != NULL)
23865 {
23866 error_at (loc, "alternatives not allowed in %<asm%> flag output");
23867 continue;
23868 }
23869
23870 bool invert = false;
23871 if (con[0] == 'n')
23872 invert = true, con++;
23873
23874 machine_mode mode = CCmode;
23875 rtx_code code = UNKNOWN;
23876
23877 switch (con[0])
23878 {
23879 case 'a':
23880 if (con[1] == 0)
23881 mode = CCAmode, code = EQ;
23882 else if (con[1] == 'e' && con[2] == 0)
23883 mode = CCCmode, code = NE;
23884 break;
23885 case 'b':
23886 if (con[1] == 0)
23887 mode = CCCmode, code = EQ;
23888 else if (con[1] == 'e' && con[2] == 0)
23889 mode = CCAmode, code = NE;
23890 break;
23891 case 'c':
23892 if (con[1] == 0)
23893 mode = CCCmode, code = EQ;
23894 break;
23895 case 'e':
23896 if (con[1] == 0)
23897 mode = CCZmode, code = EQ;
23898 break;
23899 case 'g':
23900 if (con[1] == 0)
23901 mode = CCGCmode, code = GT;
23902 else if (con[1] == 'e' && con[2] == 0)
23903 mode = CCGCmode, code = GE;
23904 break;
23905 case 'l':
23906 if (con[1] == 0)
23907 mode = CCGCmode, code = LT;
23908 else if (con[1] == 'e' && con[2] == 0)
23909 mode = CCGCmode, code = LE;
23910 break;
23911 case 'o':
23912 if (con[1] == 0)
23913 mode = CCOmode, code = EQ;
23914 break;
23915 case 'p':
23916 if (con[1] == 0)
23917 mode = CCPmode, code = EQ;
23918 break;
23919 case 's':
23920 if (con[1] == 0)
23921 mode = CCSmode, code = EQ;
23922 break;
23923 case 'z':
23924 if (con[1] == 0)
23925 mode = CCZmode, code = EQ;
23926 break;
23927 }
23928 if (code == UNKNOWN)
23929 {
23930 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
23931 continue;
23932 }
23933 if (invert)
23934 code = reverse_condition (code);
23935
23936 rtx dest = outputs[i];
23937 if (!saw_asm_flag)
23938 {
23939 /* This is the first asm flag output. Here we put the flags
23940 register in as the real output and adjust the condition to
23941 allow it. */
23942 constraints[i] = "=Bf";
23943 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
23944 saw_asm_flag = true;
23945 }
23946 else
23947 {
23948 /* We don't need the flags register as output twice. */
23949 constraints[i] = "=X";
23950 outputs[i] = gen_rtx_SCRATCH (SImode);
23951 }
23952
23953 rtx x = gen_rtx_REG (mode, FLAGS_REG);
23954 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
23955
23956 machine_mode dest_mode = GET_MODE (dest);
23957 if (!SCALAR_INT_MODE_P (dest_mode))
23958 {
23959 error_at (loc, "invalid type for %<asm%> flag output");
23960 continue;
23961 }
23962
23963 if (dest_mode == QImode)
23964 emit_insn (gen_rtx_SET (dest, x));
23965 else
23966 {
23967 rtx reg = gen_reg_rtx (QImode);
23968 emit_insn (gen_rtx_SET (reg, x));
23969
23970 reg = convert_to_mode (dest_mode, reg, 1);
23971 emit_move_insn (dest, reg);
23972 }
23973 }
23974
23975 rtx_insn *seq = get_insns ();
23976 end_sequence ();
23977
23978 if (saw_asm_flag)
23979 return seq;
23980 else
23981 {
23982 /* If we had no asm flag outputs, clobber the flags. */
23983 clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG));
23984 SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG);
23985 return NULL;
23986 }
23987}
23988
23989/* Implements target vector targetm.asm.encode_section_info. */
23990
23991static void ATTRIBUTE_UNUSED
23992ix86_encode_section_info (tree decl, rtx rtl, int first)
23993{
23994 default_encode_section_info (decl, rtl, first);
23995
23996 if (ix86_in_large_data_p (exp: decl))
23997 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23998}
23999
24000/* Worker function for REVERSE_CONDITION. */
24001
24002enum rtx_code
24003ix86_reverse_condition (enum rtx_code code, machine_mode mode)
24004{
24005 return (mode == CCFPmode
24006 ? reverse_condition_maybe_unordered (code)
24007 : reverse_condition (code));
24008}
24009
24010/* Output code to perform an x87 FP register move, from OPERANDS[1]
24011 to OPERANDS[0]. */
24012
24013const char *
24014output_387_reg_move (rtx_insn *insn, rtx *operands)
24015{
24016 if (REG_P (operands[0]))
24017 {
24018 if (REG_P (operands[1])
24019 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24020 {
24021 if (REGNO (operands[0]) == FIRST_STACK_REG)
24022 return output_387_ffreep (operands, opno: 0);
24023 return "fstp\t%y0";
24024 }
24025 if (STACK_TOP_P (operands[0]))
24026 return "fld%Z1\t%y1";
24027 return "fst\t%y0";
24028 }
24029 else if (MEM_P (operands[0]))
24030 {
24031 gcc_assert (REG_P (operands[1]));
24032 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24033 return "fstp%Z0\t%y0";
24034 else
24035 {
24036 /* There is no non-popping store to memory for XFmode.
24037 So if we need one, follow the store with a load. */
24038 if (GET_MODE (operands[0]) == XFmode)
24039 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
24040 else
24041 return "fst%Z0\t%y0";
24042 }
24043 }
24044 else
24045 gcc_unreachable();
24046}
24047#ifdef TARGET_SOLARIS
24048/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24049
24050static void
24051i386_solaris_elf_named_section (const char *name, unsigned int flags,
24052 tree decl)
24053{
24054 /* With Binutils 2.15, the "@unwind" marker must be specified on
24055 every occurrence of the ".eh_frame" section, not just the first
24056 one. */
24057 if (TARGET_64BIT
24058 && strcmp (name, ".eh_frame") == 0)
24059 {
24060 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24061 flags & SECTION_WRITE ? "aw" : "a");
24062 return;
24063 }
24064
24065#ifndef USE_GAS
24066 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
24067 {
24068 solaris_elf_asm_comdat_section (name, flags, decl);
24069 return;
24070 }
24071
24072 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
24073 SPARC assembler. One cannot mix single-letter flags and #exclude, so
24074 only emit the latter here. */
24075 if (flags & SECTION_EXCLUDE)
24076 {
24077 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
24078 return;
24079 }
24080#endif
24081
24082 default_elf_asm_named_section (name, flags, decl);
24083}
24084#endif /* TARGET_SOLARIS */
24085
24086/* Return the mangling of TYPE if it is an extended fundamental type. */
24087
24088static const char *
24089ix86_mangle_type (const_tree type)
24090{
24091 type = TYPE_MAIN_VARIANT (type);
24092
24093 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24094 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24095 return NULL;
24096
24097 if (type == float128_type_node || type == float64x_type_node)
24098 return NULL;
24099
24100 switch (TYPE_MODE (type))
24101 {
24102 case E_BFmode:
24103 return "DF16b";
24104 case E_HFmode:
24105 /* _Float16 is "DF16_".
24106 Align with clang's decision in https://reviews.llvm.org/D33719. */
24107 return "DF16_";
24108 case E_TFmode:
24109 /* __float128 is "g". */
24110 return "g";
24111 case E_XFmode:
24112 /* "long double" or __float80 is "e". */
24113 return "e";
24114 default:
24115 return NULL;
24116 }
24117}
24118
24119/* Create C++ tinfo symbols for only conditionally available fundamental
24120 types. */
24121
24122static void
24123ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
24124{
24125 extern tree ix86_float16_type_node;
24126 extern tree ix86_bf16_type_node;
24127
24128 if (!TARGET_SSE2)
24129 {
24130 if (!float16_type_node)
24131 float16_type_node = ix86_float16_type_node;
24132 if (!bfloat16_type_node)
24133 bfloat16_type_node = ix86_bf16_type_node;
24134 callback (float16_type_node);
24135 callback (bfloat16_type_node);
24136 float16_type_node = NULL_TREE;
24137 bfloat16_type_node = NULL_TREE;
24138 }
24139}
24140
24141static GTY(()) tree ix86_tls_stack_chk_guard_decl;
24142
24143static tree
24144ix86_stack_protect_guard (void)
24145{
24146 if (TARGET_SSP_TLS_GUARD)
24147 {
24148 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
24149 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
24150 tree type = build_qualified_type (type_node, qual);
24151 tree t;
24152
24153 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
24154 {
24155 t = ix86_tls_stack_chk_guard_decl;
24156
24157 if (t == NULL)
24158 {
24159 rtx x;
24160
24161 t = build_decl
24162 (UNKNOWN_LOCATION, VAR_DECL,
24163 get_identifier (ix86_stack_protector_guard_symbol_str),
24164 type);
24165 TREE_STATIC (t) = 1;
24166 TREE_PUBLIC (t) = 1;
24167 DECL_EXTERNAL (t) = 1;
24168 TREE_USED (t) = 1;
24169 TREE_THIS_VOLATILE (t) = 1;
24170 DECL_ARTIFICIAL (t) = 1;
24171 DECL_IGNORED_P (t) = 1;
24172
24173 /* Do not share RTL as the declaration is visible outside of
24174 current function. */
24175 x = DECL_RTL (t);
24176 RTX_FLAG (x, used) = 1;
24177
24178 ix86_tls_stack_chk_guard_decl = t;
24179 }
24180 }
24181 else
24182 {
24183 tree asptrtype = build_pointer_type (type);
24184
24185 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
24186 t = build2 (MEM_REF, asptrtype, t,
24187 build_int_cst (asptrtype, 0));
24188 TREE_THIS_VOLATILE (t) = 1;
24189 }
24190
24191 return t;
24192 }
24193
24194 return default_stack_protect_guard ();
24195}
24196
24197/* For 32-bit code we can save PIC register setup by using
24198 __stack_chk_fail_local hidden function instead of calling
24199 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24200 register, so it is better to call __stack_chk_fail directly. */
24201
24202static tree ATTRIBUTE_UNUSED
24203ix86_stack_protect_fail (void)
24204{
24205 return TARGET_64BIT
24206 ? default_external_stack_protect_fail ()
24207 : default_hidden_stack_protect_fail ();
24208}
24209
24210/* Select a format to encode pointers in exception handling data. CODE
24211 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24212 true if the symbol may be affected by dynamic relocations.
24213
24214 ??? All x86 object file formats are capable of representing this.
24215 After all, the relocation needed is the same as for the call insn.
24216 Whether or not a particular assembler allows us to enter such, I
24217 guess we'll have to see. */
24218
24219int
24220asm_preferred_eh_data_format (int code, int global)
24221{
24222 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
24223 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
24224 {
24225 int type = DW_EH_PE_sdata8;
24226 if (ptr_mode == SImode
24227 || ix86_cmodel == CM_SMALL_PIC
24228 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24229 type = DW_EH_PE_sdata4;
24230 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24231 }
24232
24233 if (ix86_cmodel == CM_SMALL
24234 || (ix86_cmodel == CM_MEDIUM && code))
24235 return DW_EH_PE_udata4;
24236
24237 return DW_EH_PE_absptr;
24238}
24239
24240/* Implement targetm.vectorize.builtin_vectorization_cost. */
24241static int
24242ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
24243 tree vectype, int)
24244{
24245 bool fp = false;
24246 machine_mode mode = TImode;
24247 int index;
24248 if (vectype != NULL)
24249 {
24250 fp = FLOAT_TYPE_P (vectype);
24251 mode = TYPE_MODE (vectype);
24252 }
24253
24254 switch (type_of_cost)
24255 {
24256 case scalar_stmt:
24257 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
24258
24259 case scalar_load:
24260 /* load/store costs are relative to register move which is 2. Recompute
24261 it to COSTS_N_INSNS so everything have same base. */
24262 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
24263 : ix86_cost->int_load [2]) / 2;
24264
24265 case scalar_store:
24266 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
24267 : ix86_cost->int_store [2]) / 2;
24268
24269 case vector_stmt:
24270 return ix86_vec_cost (mode,
24271 cost: fp ? ix86_cost->addss : ix86_cost->sse_op);
24272
24273 case vector_load:
24274 index = sse_store_index (mode);
24275 /* See PR82713 - we may end up being called on non-vector type. */
24276 if (index < 0)
24277 index = 2;
24278 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
24279
24280 case vector_store:
24281 index = sse_store_index (mode);
24282 /* See PR82713 - we may end up being called on non-vector type. */
24283 if (index < 0)
24284 index = 2;
24285 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
24286
24287 case vec_to_scalar:
24288 case scalar_to_vec:
24289 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24290
24291 /* We should have separate costs for unaligned loads and gather/scatter.
24292 Do that incrementally. */
24293 case unaligned_load:
24294 index = sse_store_index (mode);
24295 /* See PR82713 - we may end up being called on non-vector type. */
24296 if (index < 0)
24297 index = 2;
24298 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
24299
24300 case unaligned_store:
24301 index = sse_store_index (mode);
24302 /* See PR82713 - we may end up being called on non-vector type. */
24303 if (index < 0)
24304 index = 2;
24305 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
24306
24307 case vector_gather_load:
24308 return ix86_vec_cost (mode,
24309 COSTS_N_INSNS
24310 (ix86_cost->gather_static
24311 + ix86_cost->gather_per_elt
24312 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24313
24314 case vector_scatter_store:
24315 return ix86_vec_cost (mode,
24316 COSTS_N_INSNS
24317 (ix86_cost->scatter_static
24318 + ix86_cost->scatter_per_elt
24319 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24320
24321 case cond_branch_taken:
24322 return ix86_cost->cond_taken_branch_cost;
24323
24324 case cond_branch_not_taken:
24325 return ix86_cost->cond_not_taken_branch_cost;
24326
24327 case vec_perm:
24328 case vec_promote_demote:
24329 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24330
24331 case vec_construct:
24332 {
24333 int n = TYPE_VECTOR_SUBPARTS (node: vectype);
24334 /* N - 1 element inserts into an SSE vector, the possible
24335 GPR -> XMM move is accounted for in add_stmt_cost. */
24336 if (GET_MODE_BITSIZE (mode) <= 128)
24337 return (n - 1) * ix86_cost->sse_op;
24338 /* One vinserti128 for combining two SSE vectors for AVX256. */
24339 else if (GET_MODE_BITSIZE (mode) == 256)
24340 return ((n - 2) * ix86_cost->sse_op
24341 + ix86_vec_cost (mode, cost: ix86_cost->addss));
24342 /* One vinserti64x4 and two vinserti128 for combining SSE
24343 and AVX256 vectors to AVX512. */
24344 else if (GET_MODE_BITSIZE (mode) == 512)
24345 return ((n - 4) * ix86_cost->sse_op
24346 + 3 * ix86_vec_cost (mode, cost: ix86_cost->addss));
24347 gcc_unreachable ();
24348 }
24349
24350 default:
24351 gcc_unreachable ();
24352 }
24353}
24354
24355
24356/* This function returns the calling abi specific va_list type node.
24357 It returns the FNDECL specific va_list type. */
24358
24359static tree
24360ix86_fn_abi_va_list (tree fndecl)
24361{
24362 if (!TARGET_64BIT)
24363 return va_list_type_node;
24364 gcc_assert (fndecl != NULL_TREE);
24365
24366 if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI)
24367 return ms_va_list_type_node;
24368 else
24369 return sysv_va_list_type_node;
24370}
24371
24372/* Returns the canonical va_list type specified by TYPE. If there
24373 is no valid TYPE provided, it return NULL_TREE. */
24374
24375static tree
24376ix86_canonical_va_list_type (tree type)
24377{
24378 if (TARGET_64BIT)
24379 {
24380 if (lookup_attribute (attr_name: "ms_abi va_list", TYPE_ATTRIBUTES (type)))
24381 return ms_va_list_type_node;
24382
24383 if ((TREE_CODE (type) == ARRAY_TYPE
24384 && integer_zerop (array_type_nelts (type)))
24385 || POINTER_TYPE_P (type))
24386 {
24387 tree elem_type = TREE_TYPE (type);
24388 if (TREE_CODE (elem_type) == RECORD_TYPE
24389 && lookup_attribute (attr_name: "sysv_abi va_list",
24390 TYPE_ATTRIBUTES (elem_type)))
24391 return sysv_va_list_type_node;
24392 }
24393
24394 return NULL_TREE;
24395 }
24396
24397 return std_canonical_va_list_type (type);
24398}
24399
24400/* Iterate through the target-specific builtin types for va_list.
24401 IDX denotes the iterator, *PTREE is set to the result type of
24402 the va_list builtin, and *PNAME to its internal type.
24403 Returns zero if there is no element for this index, otherwise
24404 IDX should be increased upon the next call.
24405 Note, do not iterate a base builtin's name like __builtin_va_list.
24406 Used from c_common_nodes_and_builtins. */
24407
24408static int
24409ix86_enum_va_list (int idx, const char **pname, tree *ptree)
24410{
24411 if (TARGET_64BIT)
24412 {
24413 switch (idx)
24414 {
24415 default:
24416 break;
24417
24418 case 0:
24419 *ptree = ms_va_list_type_node;
24420 *pname = "__builtin_ms_va_list";
24421 return 1;
24422
24423 case 1:
24424 *ptree = sysv_va_list_type_node;
24425 *pname = "__builtin_sysv_va_list";
24426 return 1;
24427 }
24428 }
24429
24430 return 0;
24431}
24432
24433#undef TARGET_SCHED_DISPATCH
24434#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
24435#undef TARGET_SCHED_DISPATCH_DO
24436#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
24437#undef TARGET_SCHED_REASSOCIATION_WIDTH
24438#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
24439#undef TARGET_SCHED_REORDER
24440#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
24441#undef TARGET_SCHED_ADJUST_PRIORITY
24442#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
24443#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
24444#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
24445 ix86_dependencies_evaluation_hook
24446
24447
24448/* Implementation of reassociation_width target hook used by
24449 reassoc phase to identify parallelism level in reassociated
24450 tree. Statements tree_code is passed in OPC. Arguments type
24451 is passed in MODE. */
24452
24453static int
24454ix86_reassociation_width (unsigned int op, machine_mode mode)
24455{
24456 int width = 1;
24457 /* Vector part. */
24458 if (VECTOR_MODE_P (mode))
24459 {
24460 int div = 1;
24461 if (INTEGRAL_MODE_P (mode))
24462 width = ix86_cost->reassoc_vec_int;
24463 else if (FLOAT_MODE_P (mode))
24464 width = ix86_cost->reassoc_vec_fp;
24465
24466 if (width == 1)
24467 return 1;
24468
24469 /* Integer vector instructions execute in FP unit
24470 and can execute 3 additions and one multiplication per cycle. */
24471 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
24472 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
24473 || ix86_tune == PROCESSOR_ZNVER5)
24474 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
24475 return 1;
24476
24477 /* Account for targets that splits wide vectors into multiple parts. */
24478 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
24479 div = GET_MODE_BITSIZE (mode) / 256;
24480 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
24481 div = GET_MODE_BITSIZE (mode) / 128;
24482 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
24483 div = GET_MODE_BITSIZE (mode) / 64;
24484 width = (width + div - 1) / div;
24485 }
24486 /* Scalar part. */
24487 else if (INTEGRAL_MODE_P (mode))
24488 width = ix86_cost->reassoc_int;
24489 else if (FLOAT_MODE_P (mode))
24490 width = ix86_cost->reassoc_fp;
24491
24492 /* Avoid using too many registers in 32bit mode. */
24493 if (!TARGET_64BIT && width > 2)
24494 width = 2;
24495 return width;
24496}
24497
24498/* ??? No autovectorization into MMX or 3DNOW until we can reliably
24499 place emms and femms instructions. */
24500
24501static machine_mode
24502ix86_preferred_simd_mode (scalar_mode mode)
24503{
24504 if (!TARGET_SSE)
24505 return word_mode;
24506
24507 switch (mode)
24508 {
24509 case E_QImode:
24510 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24511 return V64QImode;
24512 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24513 return V32QImode;
24514 else
24515 return V16QImode;
24516
24517 case E_HImode:
24518 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24519 return V32HImode;
24520 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24521 return V16HImode;
24522 else
24523 return V8HImode;
24524
24525 case E_SImode:
24526 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24527 return V16SImode;
24528 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24529 return V8SImode;
24530 else
24531 return V4SImode;
24532
24533 case E_DImode:
24534 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24535 return V8DImode;
24536 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24537 return V4DImode;
24538 else
24539 return V2DImode;
24540
24541 case E_HFmode:
24542 if (TARGET_AVX512FP16)
24543 {
24544 if (TARGET_AVX512VL)
24545 {
24546 if (TARGET_PREFER_AVX128)
24547 return V8HFmode;
24548 else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
24549 return V16HFmode;
24550 }
24551 if (TARGET_EVEX512)
24552 return V32HFmode;
24553 }
24554 return word_mode;
24555
24556 case E_SFmode:
24557 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24558 return V16SFmode;
24559 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24560 return V8SFmode;
24561 else
24562 return V4SFmode;
24563
24564 case E_DFmode:
24565 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24566 return V8DFmode;
24567 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24568 return V4DFmode;
24569 else if (TARGET_SSE2)
24570 return V2DFmode;
24571 /* FALLTHRU */
24572
24573 default:
24574 return word_mode;
24575 }
24576}
24577
24578/* If AVX is enabled then try vectorizing with both 256bit and 128bit
24579 vectors. If AVX512F is enabled then try vectorizing with 512bit,
24580 256bit and 128bit vectors. */
24581
24582static unsigned int
24583ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
24584{
24585 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24586 {
24587 modes->safe_push (V64QImode);
24588 modes->safe_push (V32QImode);
24589 modes->safe_push (V16QImode);
24590 }
24591 else if (TARGET_AVX512F && TARGET_EVEX512 && all)
24592 {
24593 modes->safe_push (V32QImode);
24594 modes->safe_push (V16QImode);
24595 modes->safe_push (V64QImode);
24596 }
24597 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24598 {
24599 modes->safe_push (V32QImode);
24600 modes->safe_push (V16QImode);
24601 }
24602 else if (TARGET_AVX && all)
24603 {
24604 modes->safe_push (V16QImode);
24605 modes->safe_push (V32QImode);
24606 }
24607 else if (TARGET_SSE2)
24608 modes->safe_push (V16QImode);
24609
24610 if (TARGET_MMX_WITH_SSE)
24611 modes->safe_push (V8QImode);
24612
24613 if (TARGET_SSE2)
24614 modes->safe_push (V4QImode);
24615
24616 return 0;
24617}
24618
24619/* Implemenation of targetm.vectorize.get_mask_mode. */
24620
24621static opt_machine_mode
24622ix86_get_mask_mode (machine_mode data_mode)
24623{
24624 unsigned vector_size = GET_MODE_SIZE (data_mode);
24625 unsigned nunits = GET_MODE_NUNITS (data_mode);
24626 unsigned elem_size = vector_size / nunits;
24627
24628 /* Scalar mask case. */
24629 if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
24630 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
24631 /* AVX512FP16 only supports vector comparison
24632 to kmask for _Float16. */
24633 || (TARGET_AVX512VL && TARGET_AVX512FP16
24634 && GET_MODE_INNER (data_mode) == E_HFmode))
24635 {
24636 if (elem_size == 4
24637 || elem_size == 8
24638 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
24639 return smallest_int_mode_for_size (size: nunits);
24640 }
24641
24642 scalar_int_mode elem_mode
24643 = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT);
24644
24645 gcc_assert (elem_size * nunits == vector_size);
24646
24647 return mode_for_vector (elem_mode, nunits);
24648}
24649
24650
24651
24652/* Return class of registers which could be used for pseudo of MODE
24653 and of class RCLASS for spilling instead of memory. Return NO_REGS
24654 if it is not possible or non-profitable. */
24655
24656/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24657
24658static reg_class_t
24659ix86_spill_class (reg_class_t rclass, machine_mode mode)
24660{
24661 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
24662 && TARGET_SSE2
24663 && TARGET_INTER_UNIT_MOVES_TO_VEC
24664 && TARGET_INTER_UNIT_MOVES_FROM_VEC
24665 && (mode == SImode || (TARGET_64BIT && mode == DImode))
24666 && INTEGER_CLASS_P (rclass))
24667 return ALL_SSE_REGS;
24668 return NO_REGS;
24669}
24670
24671/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
24672 but returns a lower bound. */
24673
24674static unsigned int
24675ix86_max_noce_ifcvt_seq_cost (edge e)
24676{
24677 bool predictable_p = predictable_edge_p (e);
24678 if (predictable_p)
24679 {
24680 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
24681 return param_max_rtl_if_conversion_predictable_cost;
24682 }
24683 else
24684 {
24685 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
24686 return param_max_rtl_if_conversion_unpredictable_cost;
24687 }
24688
24689 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
24690}
24691
24692/* Return true if SEQ is a good candidate as a replacement for the
24693 if-convertible sequence described in IF_INFO. */
24694
24695static bool
24696ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
24697{
24698 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
24699 {
24700 int cmov_cnt = 0;
24701 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
24702 Maybe we should allow even more conditional moves as long as they
24703 are used far enough not to stall the CPU, or also consider
24704 IF_INFO->TEST_BB succ edge probabilities. */
24705 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
24706 {
24707 rtx set = single_set (insn);
24708 if (!set)
24709 continue;
24710 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
24711 continue;
24712 rtx src = SET_SRC (set);
24713 machine_mode mode = GET_MODE (src);
24714 if (GET_MODE_CLASS (mode) != MODE_INT
24715 && GET_MODE_CLASS (mode) != MODE_FLOAT)
24716 continue;
24717 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
24718 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
24719 continue;
24720 /* insn is CMOV or FCMOV. */
24721 if (++cmov_cnt > 1)
24722 return false;
24723 }
24724 }
24725 return default_noce_conversion_profitable_p (seq, if_info);
24726}
24727
24728/* x86-specific vector costs. */
24729class ix86_vector_costs : public vector_costs
24730{
24731public:
24732 ix86_vector_costs (vec_info *, bool);
24733
24734 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
24735 stmt_vec_info stmt_info, slp_tree node,
24736 tree vectype, int misalign,
24737 vect_cost_model_location where) override;
24738 void finish_cost (const vector_costs *) override;
24739
24740private:
24741
24742 /* Estimate register pressure of the vectorized code. */
24743 void ix86_vect_estimate_reg_pressure ();
24744 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
24745 estimation of register pressure.
24746 ??? Currently it's only used by vec_construct/scalar_to_vec
24747 where we know it's not loaded from memory. */
24748 unsigned m_num_gpr_needed[3];
24749 unsigned m_num_sse_needed[3];
24750};
24751
24752ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
24753 : vector_costs (vinfo, costing_for_scalar),
24754 m_num_gpr_needed (),
24755 m_num_sse_needed ()
24756{
24757}
24758
24759/* Implement targetm.vectorize.create_costs. */
24760
24761static vector_costs *
24762ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
24763{
24764 return new ix86_vector_costs (vinfo, costing_for_scalar);
24765}
24766
24767unsigned
24768ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
24769 stmt_vec_info stmt_info, slp_tree node,
24770 tree vectype, int misalign,
24771 vect_cost_model_location where)
24772{
24773 unsigned retval = 0;
24774 bool scalar_p
24775 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
24776 int stmt_cost = - 1;
24777
24778 bool fp = false;
24779 machine_mode mode = scalar_p ? SImode : TImode;
24780
24781 if (vectype != NULL)
24782 {
24783 fp = FLOAT_TYPE_P (vectype);
24784 mode = TYPE_MODE (vectype);
24785 if (scalar_p)
24786 mode = TYPE_MODE (TREE_TYPE (vectype));
24787 }
24788
24789 if ((kind == vector_stmt || kind == scalar_stmt)
24790 && stmt_info
24791 && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN)
24792 {
24793 tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt);
24794 /*machine_mode inner_mode = mode;
24795 if (VECTOR_MODE_P (mode))
24796 inner_mode = GET_MODE_INNER (mode);*/
24797
24798 switch (subcode)
24799 {
24800 case PLUS_EXPR:
24801 case POINTER_PLUS_EXPR:
24802 case MINUS_EXPR:
24803 if (kind == scalar_stmt)
24804 {
24805 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24806 stmt_cost = ix86_cost->addss;
24807 else if (X87_FLOAT_MODE_P (mode))
24808 stmt_cost = ix86_cost->fadd;
24809 else
24810 stmt_cost = ix86_cost->add;
24811 }
24812 else
24813 stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss
24814 : ix86_cost->sse_op);
24815 break;
24816
24817 case MULT_EXPR:
24818 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
24819 take it as MULT_EXPR. */
24820 case MULT_HIGHPART_EXPR:
24821 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
24822 break;
24823 /* There's no direct instruction for WIDEN_MULT_EXPR,
24824 take emulation into account. */
24825 case WIDEN_MULT_EXPR:
24826 stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode,
24827 TYPE_UNSIGNED (vectype));
24828 break;
24829
24830 case NEGATE_EXPR:
24831 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24832 stmt_cost = ix86_cost->sse_op;
24833 else if (X87_FLOAT_MODE_P (mode))
24834 stmt_cost = ix86_cost->fchs;
24835 else if (VECTOR_MODE_P (mode))
24836 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24837 else
24838 stmt_cost = ix86_cost->add;
24839 break;
24840 case TRUNC_DIV_EXPR:
24841 case CEIL_DIV_EXPR:
24842 case FLOOR_DIV_EXPR:
24843 case ROUND_DIV_EXPR:
24844 case TRUNC_MOD_EXPR:
24845 case CEIL_MOD_EXPR:
24846 case FLOOR_MOD_EXPR:
24847 case RDIV_EXPR:
24848 case ROUND_MOD_EXPR:
24849 case EXACT_DIV_EXPR:
24850 stmt_cost = ix86_division_cost (cost: ix86_cost, mode);
24851 break;
24852
24853 case RSHIFT_EXPR:
24854 case LSHIFT_EXPR:
24855 case LROTATE_EXPR:
24856 case RROTATE_EXPR:
24857 {
24858 tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
24859 tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
24860 stmt_cost = ix86_shift_rotate_cost
24861 (cost: ix86_cost,
24862 code: (subcode == RSHIFT_EXPR
24863 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
24864 ? ASHIFTRT : LSHIFTRT, mode,
24865 TREE_CODE (op2) == INTEGER_CST,
24866 op1_val: cst_and_fits_in_hwi (op2)
24867 ? int_cst_value (op2) : -1,
24868 and_in_op1: false, shift_and_truncate: false, NULL, NULL);
24869 }
24870 break;
24871 case NOP_EXPR:
24872 /* Only sign-conversions are free. */
24873 if (tree_nop_conversion_p
24874 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
24875 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
24876 stmt_cost = 0;
24877 break;
24878
24879 case BIT_IOR_EXPR:
24880 case ABS_EXPR:
24881 case ABSU_EXPR:
24882 case MIN_EXPR:
24883 case MAX_EXPR:
24884 case BIT_XOR_EXPR:
24885 case BIT_AND_EXPR:
24886 case BIT_NOT_EXPR:
24887 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24888 stmt_cost = ix86_cost->sse_op;
24889 else if (VECTOR_MODE_P (mode))
24890 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24891 else
24892 stmt_cost = ix86_cost->add;
24893 break;
24894 default:
24895 break;
24896 }
24897 }
24898
24899 combined_fn cfn;
24900 if ((kind == vector_stmt || kind == scalar_stmt)
24901 && stmt_info
24902 && stmt_info->stmt
24903 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
24904 switch (cfn)
24905 {
24906 case CFN_FMA:
24907 stmt_cost = ix86_vec_cost (mode,
24908 cost: mode == SFmode ? ix86_cost->fmass
24909 : ix86_cost->fmasd);
24910 break;
24911 case CFN_MULH:
24912 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
24913 break;
24914 default:
24915 break;
24916 }
24917
24918 /* If we do elementwise loads into a vector then we are bound by
24919 latency and execution resources for the many scalar loads
24920 (AGU and load ports). Try to account for this by scaling the
24921 construction cost by the number of elements involved. */
24922 if ((kind == vec_construct || kind == vec_to_scalar)
24923 && stmt_info
24924 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
24925 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
24926 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
24927 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
24928 != INTEGER_CST))
24929 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
24930 {
24931 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24932 stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1);
24933 }
24934 else if ((kind == vec_construct || kind == scalar_to_vec)
24935 && node
24936 && SLP_TREE_DEF_TYPE (node) == vect_external_def)
24937 {
24938 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24939 unsigned i;
24940 tree op;
24941 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24942 if (TREE_CODE (op) == SSA_NAME)
24943 TREE_VISITED (op) = 0;
24944 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24945 {
24946 if (TREE_CODE (op) != SSA_NAME
24947 || TREE_VISITED (op))
24948 continue;
24949 TREE_VISITED (op) = 1;
24950 gimple *def = SSA_NAME_DEF_STMT (op);
24951 tree tem;
24952 if (is_gimple_assign (gs: def)
24953 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
24954 && ((tem = gimple_assign_rhs1 (gs: def)), true)
24955 && TREE_CODE (tem) == SSA_NAME
24956 /* A sign-change expands to nothing. */
24957 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
24958 TREE_TYPE (tem)))
24959 def = SSA_NAME_DEF_STMT (tem);
24960 /* When the component is loaded from memory we can directly
24961 move it to a vector register, otherwise we have to go
24962 via a GPR or via vpinsr which involves similar cost.
24963 Likewise with a BIT_FIELD_REF extracting from a vector
24964 register we can hope to avoid using a GPR. */
24965 if (!is_gimple_assign (gs: def)
24966 || ((!gimple_assign_load_p (def)
24967 || (!TARGET_SSE4_1
24968 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
24969 && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF
24970 || !VECTOR_TYPE_P (TREE_TYPE
24971 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
24972 {
24973 if (fp)
24974 m_num_sse_needed[where]++;
24975 else
24976 {
24977 m_num_gpr_needed[where]++;
24978 stmt_cost += ix86_cost->sse_to_integer;
24979 }
24980 }
24981 }
24982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24983 if (TREE_CODE (op) == SSA_NAME)
24984 TREE_VISITED (op) = 0;
24985 }
24986 if (stmt_cost == -1)
24987 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24988
24989 /* Penalize DFmode vector operations for Bonnell. */
24990 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
24991 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
24992 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
24993
24994 /* Statements in an inner loop relative to the loop being
24995 vectorized are weighted more heavily. The value here is
24996 arbitrary and could potentially be improved with analysis. */
24997 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
24998
24999 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
25000 for Silvermont as it has out of order integer pipeline and can execute
25001 2 scalar instruction per tick, but has in order SIMD pipeline. */
25002 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
25003 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
25004 && stmt_info && stmt_info->stmt)
25005 {
25006 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
25007 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
25008 retval = (retval * 17) / 10;
25009 }
25010
25011 m_costs[where] += retval;
25012
25013 return retval;
25014}
25015
25016void
25017ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
25018{
25019 unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
25020 unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
25021
25022 /* Any better way to have target available fp registers, currently use SSE_REGS. */
25023 unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
25024 for (unsigned i = 0; i != 3; i++)
25025 {
25026 if (m_num_gpr_needed[i] > target_avail_regs)
25027 m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
25028 /* Only measure sse registers pressure. */
25029 if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
25030 m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
25031 }
25032}
25033
25034void
25035ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
25036{
25037 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo);
25038 if (loop_vinfo && !m_costing_for_scalar)
25039 {
25040 /* We are currently not asking the vectorizer to compare costs
25041 between different vector mode sizes. When using predication
25042 that will end up always choosing the prefered mode size even
25043 if there's a smaller mode covering all lanes. Test for this
25044 situation and artificially reject the larger mode attempt.
25045 ??? We currently lack masked ops for sub-SSE sized modes,
25046 so we could restrict this rejection to AVX and AVX512 modes
25047 but error on the safe side for now. */
25048 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
25049 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
25050 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
25051 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
25052 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
25053 m_costs[vect_body] = INT_MAX;
25054 }
25055
25056 ix86_vect_estimate_reg_pressure ();
25057
25058 vector_costs::finish_cost (scalar_costs);
25059}
25060
25061/* Validate target specific memory model bits in VAL. */
25062
25063static unsigned HOST_WIDE_INT
25064ix86_memmodel_check (unsigned HOST_WIDE_INT val)
25065{
25066 enum memmodel model = memmodel_from_int (val);
25067 bool strong;
25068
25069 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
25070 |MEMMODEL_MASK)
25071 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
25072 {
25073 warning (OPT_Winvalid_memory_model,
25074 "unknown architecture specific memory model");
25075 return MEMMODEL_SEQ_CST;
25076 }
25077 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
25078 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
25079 {
25080 warning (OPT_Winvalid_memory_model,
25081 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
25082 "memory model");
25083 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
25084 }
25085 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
25086 {
25087 warning (OPT_Winvalid_memory_model,
25088 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
25089 "memory model");
25090 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
25091 }
25092 return val;
25093}
25094
25095/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
25096 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
25097 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
25098 or number of vecsize_mangle variants that should be emitted. */
25099
25100static int
25101ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
25102 struct cgraph_simd_clone *clonei,
25103 tree base_type, int num,
25104 bool explicit_p)
25105{
25106 int ret = 1;
25107
25108 if (clonei->simdlen
25109 && (clonei->simdlen < 2
25110 || clonei->simdlen > 1024
25111 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
25112 {
25113 if (explicit_p)
25114 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25115 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
25116 return 0;
25117 }
25118
25119 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
25120 if (TREE_CODE (ret_type) != VOID_TYPE)
25121 switch (TYPE_MODE (ret_type))
25122 {
25123 case E_QImode:
25124 case E_HImode:
25125 case E_SImode:
25126 case E_DImode:
25127 case E_SFmode:
25128 case E_DFmode:
25129 /* case E_SCmode: */
25130 /* case E_DCmode: */
25131 if (!AGGREGATE_TYPE_P (ret_type))
25132 break;
25133 /* FALLTHRU */
25134 default:
25135 if (explicit_p)
25136 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25137 "unsupported return type %qT for simd", ret_type);
25138 return 0;
25139 }
25140
25141 tree t;
25142 int i;
25143 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
25144 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
25145
25146 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
25147 t && t != void_list_node; t = TREE_CHAIN (t), i++)
25148 {
25149 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
25150 switch (TYPE_MODE (arg_type))
25151 {
25152 case E_QImode:
25153 case E_HImode:
25154 case E_SImode:
25155 case E_DImode:
25156 case E_SFmode:
25157 case E_DFmode:
25158 /* case E_SCmode: */
25159 /* case E_DCmode: */
25160 if (!AGGREGATE_TYPE_P (arg_type))
25161 break;
25162 /* FALLTHRU */
25163 default:
25164 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
25165 break;
25166 if (explicit_p)
25167 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25168 "unsupported argument type %qT for simd", arg_type);
25169 return 0;
25170 }
25171 }
25172
25173 if (!TREE_PUBLIC (node->decl) || !explicit_p)
25174 {
25175 /* If the function isn't exported, we can pick up just one ISA
25176 for the clones. */
25177 if (TARGET_AVX512F && TARGET_EVEX512)
25178 clonei->vecsize_mangle = 'e';
25179 else if (TARGET_AVX2)
25180 clonei->vecsize_mangle = 'd';
25181 else if (TARGET_AVX)
25182 clonei->vecsize_mangle = 'c';
25183 else
25184 clonei->vecsize_mangle = 'b';
25185 ret = 1;
25186 }
25187 else
25188 {
25189 clonei->vecsize_mangle = "bcde"[num];
25190 ret = 4;
25191 }
25192 clonei->mask_mode = VOIDmode;
25193 switch (clonei->vecsize_mangle)
25194 {
25195 case 'b':
25196 clonei->vecsize_int = 128;
25197 clonei->vecsize_float = 128;
25198 break;
25199 case 'c':
25200 clonei->vecsize_int = 128;
25201 clonei->vecsize_float = 256;
25202 break;
25203 case 'd':
25204 clonei->vecsize_int = 256;
25205 clonei->vecsize_float = 256;
25206 break;
25207 case 'e':
25208 clonei->vecsize_int = 512;
25209 clonei->vecsize_float = 512;
25210 if (TYPE_MODE (base_type) == QImode)
25211 clonei->mask_mode = DImode;
25212 else
25213 clonei->mask_mode = SImode;
25214 break;
25215 }
25216 if (clonei->simdlen == 0)
25217 {
25218 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
25219 clonei->simdlen = clonei->vecsize_int;
25220 else
25221 clonei->simdlen = clonei->vecsize_float;
25222 clonei->simdlen = clonei->simdlen
25223 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
25224 }
25225 else if (clonei->simdlen > 16)
25226 {
25227 /* For compatibility with ICC, use the same upper bounds
25228 for simdlen. In particular, for CTYPE below, use the return type,
25229 unless the function returns void, in that case use the characteristic
25230 type. If it is possible for given SIMDLEN to pass CTYPE value
25231 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
25232 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
25233 emit corresponding clone. */
25234 tree ctype = ret_type;
25235 if (VOID_TYPE_P (ret_type))
25236 ctype = base_type;
25237 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
25238 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
25239 cnt /= clonei->vecsize_int;
25240 else
25241 cnt /= clonei->vecsize_float;
25242 if (cnt > (TARGET_64BIT ? 16 : 8))
25243 {
25244 if (explicit_p)
25245 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25246 "unsupported simdlen %wd",
25247 clonei->simdlen.to_constant ());
25248 return 0;
25249 }
25250 }
25251 return ret;
25252}
25253
25254/* If SIMD clone NODE can't be used in a vectorized loop
25255 in current function, return -1, otherwise return a badness of using it
25256 (0 if it is most desirable from vecsize_mangle point of view, 1
25257 slightly less desirable, etc.). */
25258
25259static int
25260ix86_simd_clone_usable (struct cgraph_node *node)
25261{
25262 switch (node->simdclone->vecsize_mangle)
25263 {
25264 case 'b':
25265 if (!TARGET_SSE2)
25266 return -1;
25267 if (!TARGET_AVX)
25268 return 0;
25269 return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
25270 case 'c':
25271 if (!TARGET_AVX)
25272 return -1;
25273 return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
25274 case 'd':
25275 if (!TARGET_AVX2)
25276 return -1;
25277 return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
25278 case 'e':
25279 if (!TARGET_AVX512F || !TARGET_EVEX512)
25280 return -1;
25281 return 0;
25282 default:
25283 gcc_unreachable ();
25284 }
25285}
25286
25287/* This function adjusts the unroll factor based on
25288 the hardware capabilities. For ex, bdver3 has
25289 a loop buffer which makes unrolling of smaller
25290 loops less important. This function decides the
25291 unroll factor using number of memory references
25292 (value 32 is used) as a heuristic. */
25293
25294static unsigned
25295ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
25296{
25297 basic_block *bbs;
25298 rtx_insn *insn;
25299 unsigned i;
25300 unsigned mem_count = 0;
25301
25302 /* Unroll small size loop when unroll factor is not explicitly
25303 specified. */
25304 if (ix86_unroll_only_small_loops && !loop->unroll)
25305 {
25306 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
25307 return MIN (nunroll, ix86_cost->small_unroll_factor);
25308 else
25309 return 1;
25310 }
25311
25312 if (!TARGET_ADJUST_UNROLL)
25313 return nunroll;
25314
25315 /* Count the number of memory references within the loop body.
25316 This value determines the unrolling factor for bdver3 and bdver4
25317 architectures. */
25318 subrtx_iterator::array_type array;
25319 bbs = get_loop_body (loop);
25320 for (i = 0; i < loop->num_nodes; i++)
25321 FOR_BB_INSNS (bbs[i], insn)
25322 if (NONDEBUG_INSN_P (insn))
25323 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
25324 if (const_rtx x = *iter)
25325 if (MEM_P (x))
25326 {
25327 machine_mode mode = GET_MODE (x);
25328 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25329 if (n_words > 4)
25330 mem_count += 2;
25331 else
25332 mem_count += 1;
25333 }
25334 free (ptr: bbs);
25335
25336 if (mem_count && mem_count <=32)
25337 return MIN (nunroll, 32 / mem_count);
25338
25339 return nunroll;
25340}
25341
25342
25343/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
25344
25345static bool
25346ix86_float_exceptions_rounding_supported_p (void)
25347{
25348 /* For x87 floating point with standard excess precision handling,
25349 there is no adddf3 pattern (since x87 floating point only has
25350 XFmode operations) so the default hook implementation gets this
25351 wrong. */
25352 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
25353}
25354
25355/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
25356
25357static void
25358ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25359{
25360 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
25361 return;
25362 tree exceptions_var = create_tmp_var_raw (integer_type_node);
25363 if (TARGET_80387)
25364 {
25365 tree fenv_index_type = build_index_type (size_int (6));
25366 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
25367 tree fenv_var = create_tmp_var_raw (fenv_type);
25368 TREE_ADDRESSABLE (fenv_var) = 1;
25369 tree fenv_ptr = build_pointer_type (fenv_type);
25370 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
25371 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
25372 tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV);
25373 tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV);
25374 tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW);
25375 tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX);
25376 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
25377 tree hold_fnclex = build_call_expr (fnclex, 0);
25378 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
25379 NULL_TREE, NULL_TREE);
25380 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
25381 hold_fnclex);
25382 *clear = build_call_expr (fnclex, 0);
25383 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
25384 tree fnstsw_call = build_call_expr (fnstsw, 0);
25385 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
25386 fnstsw_call, NULL_TREE, NULL_TREE);
25387 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
25388 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
25389 exceptions_var, exceptions_x87,
25390 NULL_TREE, NULL_TREE);
25391 *update = build2 (COMPOUND_EXPR, integer_type_node,
25392 sw_mod, update_mod);
25393 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
25394 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
25395 }
25396 if (TARGET_SSE && TARGET_SSE_MATH)
25397 {
25398 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
25399 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
25400 tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR);
25401 tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR);
25402 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
25403 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
25404 mxcsr_orig_var, stmxcsr_hold_call,
25405 NULL_TREE, NULL_TREE);
25406 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
25407 mxcsr_orig_var,
25408 build_int_cst (unsigned_type_node, 0x1f80));
25409 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
25410 build_int_cst (unsigned_type_node, 0xffffffc0));
25411 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
25412 mxcsr_mod_var, hold_mod_val,
25413 NULL_TREE, NULL_TREE);
25414 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25415 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
25416 hold_assign_orig, hold_assign_mod);
25417 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
25418 ldmxcsr_hold_call);
25419 if (*hold)
25420 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
25421 else
25422 *hold = hold_all;
25423 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25424 if (*clear)
25425 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
25426 ldmxcsr_clear_call);
25427 else
25428 *clear = ldmxcsr_clear_call;
25429 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
25430 tree exceptions_sse = fold_convert (integer_type_node,
25431 stxmcsr_update_call);
25432 if (*update)
25433 {
25434 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
25435 exceptions_var, exceptions_sse);
25436 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
25437 exceptions_var, exceptions_mod);
25438 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
25439 exceptions_assign);
25440 }
25441 else
25442 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
25443 exceptions_sse, NULL_TREE, NULL_TREE);
25444 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
25445 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25446 ldmxcsr_update_call);
25447 }
25448 tree atomic_feraiseexcept
25449 = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT);
25450 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
25451 1, exceptions_var);
25452 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25453 atomic_feraiseexcept_call);
25454}
25455
25456#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25457/* For i386, common symbol is local only for non-PIE binaries. For
25458 x86-64, common symbol is local only for non-PIE binaries or linker
25459 supports copy reloc in PIE binaries. */
25460
25461static bool
25462ix86_binds_local_p (const_tree exp)
25463{
25464 bool direct_extern_access
25465 = (ix86_direct_extern_access
25466 && !(VAR_OR_FUNCTION_DECL_P (exp)
25467 && lookup_attribute (attr_name: "nodirect_extern_access",
25468 DECL_ATTRIBUTES (exp))));
25469 if (!direct_extern_access)
25470 ix86_has_no_direct_extern_access = true;
25471 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
25472 direct_extern_access,
25473 (direct_extern_access
25474 && (!flag_pic
25475 || (TARGET_64BIT
25476 && HAVE_LD_PIE_COPYRELOC != 0))));
25477}
25478
25479/* If flag_pic or ix86_direct_extern_access is false, then neither
25480 local nor global relocs should be placed in readonly memory. */
25481
25482static int
25483ix86_reloc_rw_mask (void)
25484{
25485 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
25486}
25487#endif
25488
25489/* Return true iff ADDR can be used as a symbolic base address. */
25490
25491static bool
25492symbolic_base_address_p (rtx addr)
25493{
25494 if (GET_CODE (addr) == SYMBOL_REF)
25495 return true;
25496
25497 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
25498 return true;
25499
25500 return false;
25501}
25502
25503/* Return true iff ADDR can be used as a base address. */
25504
25505static bool
25506base_address_p (rtx addr)
25507{
25508 if (REG_P (addr))
25509 return true;
25510
25511 if (symbolic_base_address_p (addr))
25512 return true;
25513
25514 return false;
25515}
25516
25517/* If MEM is in the form of [(base+symbase)+offset], extract the three
25518 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
25519 return false. */
25520
25521static bool
25522extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
25523{
25524 rtx addr;
25525
25526 gcc_assert (MEM_P (mem));
25527
25528 addr = XEXP (mem, 0);
25529
25530 if (GET_CODE (addr) == CONST)
25531 addr = XEXP (addr, 0);
25532
25533 if (base_address_p (addr))
25534 {
25535 *base = addr;
25536 *symbase = const0_rtx;
25537 *offset = const0_rtx;
25538 return true;
25539 }
25540
25541 if (GET_CODE (addr) == PLUS
25542 && base_address_p (XEXP (addr, 0)))
25543 {
25544 rtx addend = XEXP (addr, 1);
25545
25546 if (GET_CODE (addend) == CONST)
25547 addend = XEXP (addend, 0);
25548
25549 if (CONST_INT_P (addend))
25550 {
25551 *base = XEXP (addr, 0);
25552 *symbase = const0_rtx;
25553 *offset = addend;
25554 return true;
25555 }
25556
25557 /* Also accept REG + symbolic ref, with or without a CONST_INT
25558 offset. */
25559 if (REG_P (XEXP (addr, 0)))
25560 {
25561 if (symbolic_base_address_p (addr: addend))
25562 {
25563 *base = XEXP (addr, 0);
25564 *symbase = addend;
25565 *offset = const0_rtx;
25566 return true;
25567 }
25568
25569 if (GET_CODE (addend) == PLUS
25570 && symbolic_base_address_p (XEXP (addend, 0))
25571 && CONST_INT_P (XEXP (addend, 1)))
25572 {
25573 *base = XEXP (addr, 0);
25574 *symbase = XEXP (addend, 0);
25575 *offset = XEXP (addend, 1);
25576 return true;
25577 }
25578 }
25579 }
25580
25581 return false;
25582}
25583
25584/* Given OPERANDS of consecutive load/store, check if we can merge
25585 them into move multiple. LOAD is true if they are load instructions.
25586 MODE is the mode of memory operands. */
25587
25588bool
25589ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
25590 machine_mode mode)
25591{
25592 HOST_WIDE_INT offval_1, offval_2, msize;
25593 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
25594 symbase_1, symbase_2, offset_1, offset_2;
25595
25596 if (load)
25597 {
25598 mem_1 = operands[1];
25599 mem_2 = operands[3];
25600 reg_1 = operands[0];
25601 reg_2 = operands[2];
25602 }
25603 else
25604 {
25605 mem_1 = operands[0];
25606 mem_2 = operands[2];
25607 reg_1 = operands[1];
25608 reg_2 = operands[3];
25609 }
25610
25611 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
25612
25613 if (REGNO (reg_1) != REGNO (reg_2))
25614 return false;
25615
25616 /* Check if the addresses are in the form of [base+offset]. */
25617 if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1))
25618 return false;
25619 if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2))
25620 return false;
25621
25622 /* Check if the bases are the same. */
25623 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
25624 return false;
25625
25626 offval_1 = INTVAL (offset_1);
25627 offval_2 = INTVAL (offset_2);
25628 msize = GET_MODE_SIZE (mode);
25629 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
25630 if (offval_1 + msize != offval_2)
25631 return false;
25632
25633 return true;
25634}
25635
25636/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25637
25638static bool
25639ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
25640 optimization_type opt_type)
25641{
25642 switch (op)
25643 {
25644 case asin_optab:
25645 case acos_optab:
25646 case log1p_optab:
25647 case exp_optab:
25648 case exp10_optab:
25649 case exp2_optab:
25650 case expm1_optab:
25651 case ldexp_optab:
25652 case scalb_optab:
25653 case round_optab:
25654 case lround_optab:
25655 return opt_type == OPTIMIZE_FOR_SPEED;
25656
25657 case rint_optab:
25658 if (SSE_FLOAT_MODE_P (mode1)
25659 && TARGET_SSE_MATH
25660 && !flag_trapping_math
25661 && !TARGET_SSE4_1
25662 && mode1 != HFmode)
25663 return opt_type == OPTIMIZE_FOR_SPEED;
25664 return true;
25665
25666 case floor_optab:
25667 case ceil_optab:
25668 case btrunc_optab:
25669 if (((SSE_FLOAT_MODE_P (mode1)
25670 && TARGET_SSE_MATH
25671 && TARGET_SSE4_1)
25672 || mode1 == HFmode)
25673 && !flag_trapping_math)
25674 return true;
25675 return opt_type == OPTIMIZE_FOR_SPEED;
25676
25677 case rsqrt_optab:
25678 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1);
25679
25680 default:
25681 return true;
25682 }
25683}
25684
25685/* Address space support.
25686
25687 This is not "far pointers" in the 16-bit sense, but an easy way
25688 to use %fs and %gs segment prefixes. Therefore:
25689
25690 (a) All address spaces have the same modes,
25691 (b) All address spaces have the same addresss forms,
25692 (c) While %fs and %gs are technically subsets of the generic
25693 address space, they are probably not subsets of each other.
25694 (d) Since we have no access to the segment base register values
25695 without resorting to a system call, we cannot convert a
25696 non-default address space to a default address space.
25697 Therefore we do not claim %fs or %gs are subsets of generic.
25698
25699 Therefore we can (mostly) use the default hooks. */
25700
25701/* All use of segmentation is assumed to make address 0 valid. */
25702
25703static bool
25704ix86_addr_space_zero_address_valid (addr_space_t as)
25705{
25706 return as != ADDR_SPACE_GENERIC;
25707}
25708
25709static void
25710ix86_init_libfuncs (void)
25711{
25712 if (TARGET_64BIT)
25713 {
25714 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
25715 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
25716 }
25717 else
25718 {
25719 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
25720 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
25721 }
25722
25723#if TARGET_MACHO
25724 darwin_rename_builtins ();
25725#endif
25726}
25727
25728/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
25729 FPU, assume that the fpcw is set to extended precision; when using
25730 only SSE, rounding is correct; when using both SSE and the FPU,
25731 the rounding precision is indeterminate, since either may be chosen
25732 apparently at random. */
25733
25734static enum flt_eval_method
25735ix86_get_excess_precision (enum excess_precision_type type)
25736{
25737 switch (type)
25738 {
25739 case EXCESS_PRECISION_TYPE_FAST:
25740 /* The fastest type to promote to will always be the native type,
25741 whether that occurs with implicit excess precision or
25742 otherwise. */
25743 return TARGET_AVX512FP16
25744 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25745 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25746 case EXCESS_PRECISION_TYPE_STANDARD:
25747 case EXCESS_PRECISION_TYPE_IMPLICIT:
25748 /* Otherwise, the excess precision we want when we are
25749 in a standards compliant mode, and the implicit precision we
25750 provide would be identical were it not for the unpredictable
25751 cases. */
25752 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
25753 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25754 else if (!TARGET_80387)
25755 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25756 else if (!TARGET_MIX_SSE_I387)
25757 {
25758 if (!(TARGET_SSE && TARGET_SSE_MATH))
25759 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
25760 else if (TARGET_SSE2)
25761 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25762 }
25763
25764 /* If we are in standards compliant mode, but we know we will
25765 calculate in unpredictable precision, return
25766 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
25767 excess precision if the target can't guarantee it will honor
25768 it. */
25769 return (type == EXCESS_PRECISION_TYPE_STANDARD
25770 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
25771 : FLT_EVAL_METHOD_UNPREDICTABLE);
25772 case EXCESS_PRECISION_TYPE_FLOAT16:
25773 if (TARGET_80387
25774 && !(TARGET_SSE_MATH && TARGET_SSE))
25775 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
25776 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25777 default:
25778 gcc_unreachable ();
25779 }
25780
25781 return FLT_EVAL_METHOD_UNPREDICTABLE;
25782}
25783
25784/* Return true if _BitInt(N) is supported and fill its details into *INFO. */
25785bool
25786ix86_bitint_type_info (int n, struct bitint_info *info)
25787{
25788 if (n <= 8)
25789 info->limb_mode = QImode;
25790 else if (n <= 16)
25791 info->limb_mode = HImode;
25792 else if (n <= 32 || (!TARGET_64BIT && n > 64))
25793 info->limb_mode = SImode;
25794 else
25795 info->limb_mode = DImode;
25796 info->abi_limb_mode = info->limb_mode;
25797 info->big_endian = false;
25798 info->extended = false;
25799 return true;
25800}
25801
25802/* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
25803 decrements by exactly 2 no matter what the position was, there is no pushb.
25804
25805 But as CIE data alignment factor on this arch is -4 for 32bit targets
25806 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
25807 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
25808
25809poly_int64
25810ix86_push_rounding (poly_int64 bytes)
25811{
25812 return ROUND_UP (bytes, UNITS_PER_WORD);
25813}
25814
25815/* Use 8 bits metadata start from bit48 for LAM_U48,
25816 6 bits metadat start from bit57 for LAM_U57. */
25817#define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
25818 ? 48 \
25819 : (ix86_lam_type == lam_u57 ? 57 : 0))
25820#define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
25821 ? 8 \
25822 : (ix86_lam_type == lam_u57 ? 6 : 0))
25823
25824/* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
25825bool
25826ix86_memtag_can_tag_addresses ()
25827{
25828 return ix86_lam_type != lam_none && TARGET_LP64;
25829}
25830
25831/* Implement TARGET_MEMTAG_TAG_SIZE. */
25832unsigned char
25833ix86_memtag_tag_size ()
25834{
25835 return IX86_HWASAN_TAG_SIZE;
25836}
25837
25838/* Implement TARGET_MEMTAG_SET_TAG. */
25839rtx
25840ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
25841{
25842 /* default_memtag_insert_random_tag may
25843 generate tag with value more than 6 bits. */
25844 if (ix86_lam_type == lam_u57)
25845 {
25846 unsigned HOST_WIDE_INT and_imm
25847 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25848
25849 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
25850 }
25851 tag = expand_simple_binop (Pmode, ASHIFT, tag,
25852 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
25853 /* unsignedp = */1, OPTAB_WIDEN);
25854 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
25855 /* unsignedp = */1, OPTAB_DIRECT);
25856 return ret;
25857}
25858
25859/* Implement TARGET_MEMTAG_EXTRACT_TAG. */
25860rtx
25861ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
25862{
25863 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
25864 GEN_INT (IX86_HWASAN_SHIFT), target,
25865 /* unsignedp = */0,
25866 OPTAB_DIRECT);
25867 rtx ret = gen_reg_rtx (QImode);
25868 /* Mask off bit63 when LAM_U57. */
25869 if (ix86_lam_type == lam_u57)
25870 {
25871 unsigned HOST_WIDE_INT and_imm
25872 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25873 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
25874 gen_int_mode (and_imm, QImode)));
25875 }
25876 else
25877 emit_move_insn (ret, gen_lowpart (QImode, tag));
25878 return ret;
25879}
25880
25881/* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
25882rtx
25883ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
25884{
25885 /* Leave bit63 alone. */
25886 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
25887 + (HOST_WIDE_INT_1U << 63) - 1),
25888 Pmode);
25889 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
25890 tag_mask, target, true,
25891 OPTAB_DIRECT);
25892 gcc_assert (untagged_base);
25893 return untagged_base;
25894}
25895
25896/* Implement TARGET_MEMTAG_ADD_TAG. */
25897rtx
25898ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
25899{
25900 rtx base_tag = gen_reg_rtx (QImode);
25901 rtx base_addr = gen_reg_rtx (Pmode);
25902 rtx tagged_addr = gen_reg_rtx (Pmode);
25903 rtx new_tag = gen_reg_rtx (QImode);
25904 unsigned HOST_WIDE_INT and_imm
25905 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
25906
25907 /* When there's "overflow" in tag adding,
25908 need to mask the most significant bit off. */
25909 emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX));
25910 emit_move_insn (base_addr,
25911 ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX));
25912 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
25913 emit_move_insn (new_tag, base_tag);
25914 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
25915 emit_move_insn (tagged_addr,
25916 ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX));
25917 return plus_constant (Pmode, tagged_addr, offset);
25918}
25919
25920/* Target-specific selftests. */
25921
25922#if CHECKING_P
25923
25924namespace selftest {
25925
25926/* Verify that hard regs are dumped as expected (in compact mode). */
25927
25928static void
25929ix86_test_dumping_hard_regs ()
25930{
25931 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
25932 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
25933}
25934
25935/* Test dumping an insn with repeated references to the same SCRATCH,
25936 to verify the rtx_reuse code. */
25937
25938static void
25939ix86_test_dumping_memory_blockage ()
25940{
25941 set_new_first_and_last_insn (NULL, NULL);
25942
25943 rtx pat = gen_memory_blockage ();
25944 rtx_reuse_manager r;
25945 r.preprocess (x: pat);
25946
25947 /* Verify that the repeated references to the SCRATCH show use
25948 reuse IDS. The first should be prefixed with a reuse ID,
25949 and the second should be dumped as a "reuse_rtx" of that ID.
25950 The expected string assumes Pmode == DImode. */
25951 if (Pmode == DImode)
25952 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25953 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25954 " (unspec:BLK [\n"
25955 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25956 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
25957}
25958
25959/* Verify loading an RTL dump; specifically a dump of copying
25960 a param on x86_64 from a hard reg into the frame.
25961 This test is target-specific since the dump contains target-specific
25962 hard reg names. */
25963
25964static void
25965ix86_test_loading_dump_fragment_1 ()
25966{
25967 rtl_dump_test t (SELFTEST_LOCATION,
25968 locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl"));
25969
25970 rtx_insn *insn = get_insn_by_uid (uid: 1);
25971
25972 /* The block structure and indentation here is purely for
25973 readability; it mirrors the structure of the rtx. */
25974 tree mem_expr;
25975 {
25976 rtx pat = PATTERN (insn);
25977 ASSERT_EQ (SET, GET_CODE (pat));
25978 {
25979 rtx dest = SET_DEST (pat);
25980 ASSERT_EQ (MEM, GET_CODE (dest));
25981 /* Verify the "/c" was parsed. */
25982 ASSERT_TRUE (RTX_FLAG (dest, call));
25983 ASSERT_EQ (SImode, GET_MODE (dest));
25984 {
25985 rtx addr = XEXP (dest, 0);
25986 ASSERT_EQ (PLUS, GET_CODE (addr));
25987 ASSERT_EQ (DImode, GET_MODE (addr));
25988 {
25989 rtx lhs = XEXP (addr, 0);
25990 /* Verify that the "frame" REG was consolidated. */
25991 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
25992 }
25993 {
25994 rtx rhs = XEXP (addr, 1);
25995 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
25996 ASSERT_EQ (-4, INTVAL (rhs));
25997 }
25998 }
25999 /* Verify the "[1 i+0 S4 A32]" was parsed. */
26000 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
26001 /* "i" should have been handled by synthesizing a global int
26002 variable named "i". */
26003 mem_expr = MEM_EXPR (dest);
26004 ASSERT_NE (mem_expr, NULL);
26005 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
26006 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
26007 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
26008 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
26009 /* "+0". */
26010 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
26011 ASSERT_EQ (0, MEM_OFFSET (dest));
26012 /* "S4". */
26013 ASSERT_EQ (4, MEM_SIZE (dest));
26014 /* "A32. */
26015 ASSERT_EQ (32, MEM_ALIGN (dest));
26016 }
26017 {
26018 rtx src = SET_SRC (pat);
26019 ASSERT_EQ (REG, GET_CODE (src));
26020 ASSERT_EQ (SImode, GET_MODE (src));
26021 ASSERT_EQ (5, REGNO (src));
26022 tree reg_expr = REG_EXPR (src);
26023 /* "i" here should point to the same var as for the MEM_EXPR. */
26024 ASSERT_EQ (reg_expr, mem_expr);
26025 }
26026 }
26027}
26028
26029/* Verify that the RTL loader copes with a call_insn dump.
26030 This test is target-specific since the dump contains a target-specific
26031 hard reg name. */
26032
26033static void
26034ix86_test_loading_call_insn ()
26035{
26036 /* The test dump includes register "xmm0", where requires TARGET_SSE
26037 to exist. */
26038 if (!TARGET_SSE)
26039 return;
26040
26041 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl"));
26042
26043 rtx_insn *insn = get_insns ();
26044 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
26045
26046 /* "/j". */
26047 ASSERT_TRUE (RTX_FLAG (insn, jump));
26048
26049 rtx pat = PATTERN (insn);
26050 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
26051
26052 /* Verify REG_NOTES. */
26053 {
26054 /* "(expr_list:REG_CALL_DECL". */
26055 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
26056 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
26057 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
26058
26059 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
26060 rtx_expr_list *note1 = note0->next ();
26061 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
26062
26063 ASSERT_EQ (NULL, note1->next ());
26064 }
26065
26066 /* Verify CALL_INSN_FUNCTION_USAGE. */
26067 {
26068 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
26069 rtx_expr_list *usage
26070 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
26071 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
26072 ASSERT_EQ (DFmode, GET_MODE (usage));
26073 ASSERT_EQ (USE, GET_CODE (usage->element ()));
26074 ASSERT_EQ (NULL, usage->next ());
26075 }
26076}
26077
26078/* Verify that the RTL loader copes a dump from print_rtx_function.
26079 This test is target-specific since the dump contains target-specific
26080 hard reg names. */
26081
26082static void
26083ix86_test_loading_full_dump ()
26084{
26085 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl"));
26086
26087 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26088
26089 rtx_insn *insn_1 = get_insn_by_uid (uid: 1);
26090 ASSERT_EQ (NOTE, GET_CODE (insn_1));
26091
26092 rtx_insn *insn_7 = get_insn_by_uid (uid: 7);
26093 ASSERT_EQ (INSN, GET_CODE (insn_7));
26094 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
26095
26096 rtx_insn *insn_15 = get_insn_by_uid (uid: 15);
26097 ASSERT_EQ (INSN, GET_CODE (insn_15));
26098 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
26099
26100 /* Verify crtl->return_rtx. */
26101 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
26102 ASSERT_EQ (0, REGNO (crtl->return_rtx));
26103 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
26104}
26105
26106/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
26107 In particular, verify that it correctly loads the 2nd operand.
26108 This test is target-specific since these are machine-specific
26109 operands (and enums). */
26110
26111static void
26112ix86_test_loading_unspec ()
26113{
26114 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl"));
26115
26116 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26117
26118 ASSERT_TRUE (cfun);
26119
26120 /* Test of an UNSPEC. */
26121 rtx_insn *insn = get_insns ();
26122 ASSERT_EQ (INSN, GET_CODE (insn));
26123 rtx set = single_set (insn);
26124 ASSERT_NE (NULL, set);
26125 rtx dst = SET_DEST (set);
26126 ASSERT_EQ (MEM, GET_CODE (dst));
26127 rtx src = SET_SRC (set);
26128 ASSERT_EQ (UNSPEC, GET_CODE (src));
26129 ASSERT_EQ (BLKmode, GET_MODE (src));
26130 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
26131
26132 rtx v0 = XVECEXP (src, 0, 0);
26133
26134 /* Verify that the two uses of the first SCRATCH have pointer
26135 equality. */
26136 rtx scratch_a = XEXP (dst, 0);
26137 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
26138
26139 rtx scratch_b = XEXP (v0, 0);
26140 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
26141
26142 ASSERT_EQ (scratch_a, scratch_b);
26143
26144 /* Verify that the two mems are thus treated as equal. */
26145 ASSERT_TRUE (rtx_equal_p (dst, v0));
26146
26147 /* Verify that the insn is recognized. */
26148 ASSERT_NE(-1, recog_memoized (insn));
26149
26150 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
26151 insn = NEXT_INSN (insn);
26152 ASSERT_EQ (INSN, GET_CODE (insn));
26153
26154 set = single_set (insn);
26155 ASSERT_NE (NULL, set);
26156
26157 src = SET_SRC (set);
26158 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
26159 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
26160}
26161
26162/* Run all target-specific selftests. */
26163
26164static void
26165ix86_run_selftests (void)
26166{
26167 ix86_test_dumping_hard_regs ();
26168 ix86_test_dumping_memory_blockage ();
26169
26170 /* Various tests of loading RTL dumps, here because they contain
26171 ix86-isms (e.g. names of hard regs). */
26172 ix86_test_loading_dump_fragment_1 ();
26173 ix86_test_loading_call_insn ();
26174 ix86_test_loading_full_dump ();
26175 ix86_test_loading_unspec ();
26176}
26177
26178} // namespace selftest
26179
26180#endif /* CHECKING_P */
26181
26182static const scoped_attribute_specs *const ix86_attribute_table[] =
26183{
26184 &ix86_gnu_attribute_table
26185};
26186
26187/* Initialize the GCC target structure. */
26188#undef TARGET_RETURN_IN_MEMORY
26189#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
26190
26191#undef TARGET_LEGITIMIZE_ADDRESS
26192#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
26193
26194#undef TARGET_ATTRIBUTE_TABLE
26195#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
26196#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
26197#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
26198#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26199# undef TARGET_MERGE_DECL_ATTRIBUTES
26200# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
26201#endif
26202
26203#undef TARGET_INVALID_CONVERSION
26204#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
26205
26206#undef TARGET_INVALID_UNARY_OP
26207#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
26208
26209#undef TARGET_INVALID_BINARY_OP
26210#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
26211
26212#undef TARGET_COMP_TYPE_ATTRIBUTES
26213#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
26214
26215#undef TARGET_INIT_BUILTINS
26216#define TARGET_INIT_BUILTINS ix86_init_builtins
26217#undef TARGET_BUILTIN_DECL
26218#define TARGET_BUILTIN_DECL ix86_builtin_decl
26219#undef TARGET_EXPAND_BUILTIN
26220#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
26221
26222#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
26223#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
26224 ix86_builtin_vectorized_function
26225
26226#undef TARGET_VECTORIZE_BUILTIN_GATHER
26227#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
26228
26229#undef TARGET_VECTORIZE_BUILTIN_SCATTER
26230#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
26231
26232#undef TARGET_BUILTIN_RECIPROCAL
26233#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
26234
26235#undef TARGET_ASM_FUNCTION_EPILOGUE
26236#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
26237
26238#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
26239#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
26240 ix86_print_patchable_function_entry
26241
26242#undef TARGET_ENCODE_SECTION_INFO
26243#ifndef SUBTARGET_ENCODE_SECTION_INFO
26244#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
26245#else
26246#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
26247#endif
26248
26249#undef TARGET_ASM_OPEN_PAREN
26250#define TARGET_ASM_OPEN_PAREN ""
26251#undef TARGET_ASM_CLOSE_PAREN
26252#define TARGET_ASM_CLOSE_PAREN ""
26253
26254#undef TARGET_ASM_BYTE_OP
26255#define TARGET_ASM_BYTE_OP ASM_BYTE
26256
26257#undef TARGET_ASM_ALIGNED_HI_OP
26258#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
26259#undef TARGET_ASM_ALIGNED_SI_OP
26260#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
26261#ifdef ASM_QUAD
26262#undef TARGET_ASM_ALIGNED_DI_OP
26263#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
26264#endif
26265
26266#undef TARGET_PROFILE_BEFORE_PROLOGUE
26267#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
26268
26269#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
26270#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
26271
26272#undef TARGET_ASM_UNALIGNED_HI_OP
26273#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
26274#undef TARGET_ASM_UNALIGNED_SI_OP
26275#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
26276#undef TARGET_ASM_UNALIGNED_DI_OP
26277#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
26278
26279#undef TARGET_PRINT_OPERAND
26280#define TARGET_PRINT_OPERAND ix86_print_operand
26281#undef TARGET_PRINT_OPERAND_ADDRESS
26282#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
26283#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
26284#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
26285#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
26286#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
26287
26288#undef TARGET_SCHED_INIT_GLOBAL
26289#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
26290#undef TARGET_SCHED_ADJUST_COST
26291#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26292#undef TARGET_SCHED_ISSUE_RATE
26293#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26294#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26295#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26296 ia32_multipass_dfa_lookahead
26297#undef TARGET_SCHED_MACRO_FUSION_P
26298#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
26299#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
26300#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
26301
26302#undef TARGET_FUNCTION_OK_FOR_SIBCALL
26303#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26304
26305#undef TARGET_MEMMODEL_CHECK
26306#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
26307
26308#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
26309#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
26310
26311#ifdef HAVE_AS_TLS
26312#undef TARGET_HAVE_TLS
26313#define TARGET_HAVE_TLS true
26314#endif
26315#undef TARGET_CANNOT_FORCE_CONST_MEM
26316#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26317#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26318#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26319
26320#undef TARGET_DELEGITIMIZE_ADDRESS
26321#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26322
26323#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
26324#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
26325
26326#undef TARGET_MS_BITFIELD_LAYOUT_P
26327#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26328
26329#if TARGET_MACHO
26330#undef TARGET_BINDS_LOCAL_P
26331#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26332#else
26333#undef TARGET_BINDS_LOCAL_P
26334#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
26335#endif
26336#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26337#undef TARGET_BINDS_LOCAL_P
26338#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26339#endif
26340
26341#undef TARGET_ASM_OUTPUT_MI_THUNK
26342#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26343#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26344#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26345
26346#undef TARGET_ASM_FILE_START
26347#define TARGET_ASM_FILE_START x86_file_start
26348
26349#undef TARGET_OPTION_OVERRIDE
26350#define TARGET_OPTION_OVERRIDE ix86_option_override
26351
26352#undef TARGET_REGISTER_MOVE_COST
26353#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
26354#undef TARGET_MEMORY_MOVE_COST
26355#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
26356#undef TARGET_RTX_COSTS
26357#define TARGET_RTX_COSTS ix86_rtx_costs
26358#undef TARGET_ADDRESS_COST
26359#define TARGET_ADDRESS_COST ix86_address_cost
26360
26361#undef TARGET_OVERLAP_OP_BY_PIECES_P
26362#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
26363
26364#undef TARGET_FLAGS_REGNUM
26365#define TARGET_FLAGS_REGNUM FLAGS_REG
26366#undef TARGET_FIXED_CONDITION_CODE_REGS
26367#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26368#undef TARGET_CC_MODES_COMPATIBLE
26369#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26370
26371#undef TARGET_MACHINE_DEPENDENT_REORG
26372#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26373
26374#undef TARGET_BUILD_BUILTIN_VA_LIST
26375#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26376
26377#undef TARGET_FOLD_BUILTIN
26378#define TARGET_FOLD_BUILTIN ix86_fold_builtin
26379
26380#undef TARGET_GIMPLE_FOLD_BUILTIN
26381#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
26382
26383#undef TARGET_COMPARE_VERSION_PRIORITY
26384#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
26385
26386#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
26387#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
26388 ix86_generate_version_dispatcher_body
26389
26390#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
26391#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
26392 ix86_get_function_versions_dispatcher
26393
26394#undef TARGET_ENUM_VA_LIST_P
26395#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
26396
26397#undef TARGET_FN_ABI_VA_LIST
26398#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26399
26400#undef TARGET_CANONICAL_VA_LIST_TYPE
26401#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26402
26403#undef TARGET_EXPAND_BUILTIN_VA_START
26404#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26405
26406#undef TARGET_MD_ASM_ADJUST
26407#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
26408
26409#undef TARGET_C_EXCESS_PRECISION
26410#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
26411#undef TARGET_C_BITINT_TYPE_INFO
26412#define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
26413#undef TARGET_PROMOTE_PROTOTYPES
26414#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26415#undef TARGET_PUSH_ARGUMENT
26416#define TARGET_PUSH_ARGUMENT ix86_push_argument
26417#undef TARGET_SETUP_INCOMING_VARARGS
26418#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26419#undef TARGET_MUST_PASS_IN_STACK
26420#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26421#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
26422#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
26423#undef TARGET_FUNCTION_ARG_ADVANCE
26424#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
26425#undef TARGET_FUNCTION_ARG
26426#define TARGET_FUNCTION_ARG ix86_function_arg
26427#undef TARGET_INIT_PIC_REG
26428#define TARGET_INIT_PIC_REG ix86_init_pic_reg
26429#undef TARGET_USE_PSEUDO_PIC_REG
26430#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
26431#undef TARGET_FUNCTION_ARG_BOUNDARY
26432#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
26433#undef TARGET_PASS_BY_REFERENCE
26434#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26435#undef TARGET_INTERNAL_ARG_POINTER
26436#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26437#undef TARGET_UPDATE_STACK_BOUNDARY
26438#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
26439#undef TARGET_GET_DRAP_RTX
26440#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
26441#undef TARGET_STRICT_ARGUMENT_NAMING
26442#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26443#undef TARGET_STATIC_CHAIN
26444#define TARGET_STATIC_CHAIN ix86_static_chain
26445#undef TARGET_TRAMPOLINE_INIT
26446#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
26447#undef TARGET_RETURN_POPS_ARGS
26448#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
26449
26450#undef TARGET_WARN_FUNC_RETURN
26451#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
26452
26453#undef TARGET_LEGITIMATE_COMBINED_INSN
26454#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
26455
26456#undef TARGET_ASAN_SHADOW_OFFSET
26457#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
26458
26459#undef TARGET_GIMPLIFY_VA_ARG_EXPR
26460#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26461
26462#undef TARGET_SCALAR_MODE_SUPPORTED_P
26463#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26464
26465#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
26466#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
26467ix86_libgcc_floating_mode_supported_p
26468
26469#undef TARGET_VECTOR_MODE_SUPPORTED_P
26470#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26471
26472#undef TARGET_C_MODE_FOR_SUFFIX
26473#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26474
26475#ifdef HAVE_AS_TLS
26476#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26477#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26478#endif
26479
26480#ifdef SUBTARGET_INSERT_ATTRIBUTES
26481#undef TARGET_INSERT_ATTRIBUTES
26482#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26483#endif
26484
26485#undef TARGET_MANGLE_TYPE
26486#define TARGET_MANGLE_TYPE ix86_mangle_type
26487
26488#undef TARGET_EMIT_SUPPORT_TINFOS
26489#define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
26490
26491#undef TARGET_STACK_PROTECT_GUARD
26492#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
26493
26494#if !TARGET_MACHO
26495#undef TARGET_STACK_PROTECT_FAIL
26496#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26497#endif
26498
26499#undef TARGET_FUNCTION_VALUE
26500#define TARGET_FUNCTION_VALUE ix86_function_value
26501
26502#undef TARGET_FUNCTION_VALUE_REGNO_P
26503#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
26504
26505#undef TARGET_ZERO_CALL_USED_REGS
26506#define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
26507
26508#undef TARGET_PROMOTE_FUNCTION_MODE
26509#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
26510
26511#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
26512#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
26513
26514#undef TARGET_MEMBER_TYPE_FORCES_BLK
26515#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
26516
26517#undef TARGET_INSTANTIATE_DECLS
26518#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
26519
26520#undef TARGET_SECONDARY_RELOAD
26521#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26522#undef TARGET_SECONDARY_MEMORY_NEEDED
26523#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
26524#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
26525#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
26526
26527#undef TARGET_CLASS_MAX_NREGS
26528#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
26529
26530#undef TARGET_PREFERRED_RELOAD_CLASS
26531#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
26532#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
26533#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
26534#undef TARGET_CLASS_LIKELY_SPILLED_P
26535#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
26536
26537#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26538#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
26539 ix86_builtin_vectorization_cost
26540#undef TARGET_VECTORIZE_VEC_PERM_CONST
26541#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
26542#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
26543#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
26544 ix86_preferred_simd_mode
26545#undef TARGET_VECTORIZE_SPLIT_REDUCTION
26546#define TARGET_VECTORIZE_SPLIT_REDUCTION \
26547 ix86_split_reduction
26548#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
26549#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
26550 ix86_autovectorize_vector_modes
26551#undef TARGET_VECTORIZE_GET_MASK_MODE
26552#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
26553#undef TARGET_VECTORIZE_CREATE_COSTS
26554#define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
26555
26556#undef TARGET_SET_CURRENT_FUNCTION
26557#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
26558
26559#undef TARGET_OPTION_VALID_ATTRIBUTE_P
26560#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
26561
26562#undef TARGET_OPTION_SAVE
26563#define TARGET_OPTION_SAVE ix86_function_specific_save
26564
26565#undef TARGET_OPTION_RESTORE
26566#define TARGET_OPTION_RESTORE ix86_function_specific_restore
26567
26568#undef TARGET_OPTION_POST_STREAM_IN
26569#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
26570
26571#undef TARGET_OPTION_PRINT
26572#define TARGET_OPTION_PRINT ix86_function_specific_print
26573
26574#undef TARGET_OPTION_FUNCTION_VERSIONS
26575#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
26576
26577#undef TARGET_CAN_INLINE_P
26578#define TARGET_CAN_INLINE_P ix86_can_inline_p
26579
26580#undef TARGET_LEGITIMATE_ADDRESS_P
26581#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
26582
26583#undef TARGET_REGISTER_PRIORITY
26584#define TARGET_REGISTER_PRIORITY ix86_register_priority
26585
26586#undef TARGET_REGISTER_USAGE_LEVELING_P
26587#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
26588
26589#undef TARGET_LEGITIMATE_CONSTANT_P
26590#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
26591
26592#undef TARGET_COMPUTE_FRAME_LAYOUT
26593#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
26594
26595#undef TARGET_FRAME_POINTER_REQUIRED
26596#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
26597
26598#undef TARGET_CAN_ELIMINATE
26599#define TARGET_CAN_ELIMINATE ix86_can_eliminate
26600
26601#undef TARGET_EXTRA_LIVE_ON_ENTRY
26602#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
26603
26604#undef TARGET_ASM_CODE_END
26605#define TARGET_ASM_CODE_END ix86_code_end
26606
26607#undef TARGET_CONDITIONAL_REGISTER_USAGE
26608#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
26609
26610#undef TARGET_CANONICALIZE_COMPARISON
26611#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
26612
26613#undef TARGET_LOOP_UNROLL_ADJUST
26614#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
26615
26616/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26617#undef TARGET_SPILL_CLASS
26618#define TARGET_SPILL_CLASS ix86_spill_class
26619
26620#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
26621#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
26622 ix86_simd_clone_compute_vecsize_and_simdlen
26623
26624#undef TARGET_SIMD_CLONE_ADJUST
26625#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
26626
26627#undef TARGET_SIMD_CLONE_USABLE
26628#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
26629
26630#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
26631#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
26632
26633#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
26634#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
26635 ix86_float_exceptions_rounding_supported_p
26636
26637#undef TARGET_MODE_EMIT
26638#define TARGET_MODE_EMIT ix86_emit_mode_set
26639
26640#undef TARGET_MODE_NEEDED
26641#define TARGET_MODE_NEEDED ix86_mode_needed
26642
26643#undef TARGET_MODE_AFTER
26644#define TARGET_MODE_AFTER ix86_mode_after
26645
26646#undef TARGET_MODE_ENTRY
26647#define TARGET_MODE_ENTRY ix86_mode_entry
26648
26649#undef TARGET_MODE_EXIT
26650#define TARGET_MODE_EXIT ix86_mode_exit
26651
26652#undef TARGET_MODE_PRIORITY
26653#define TARGET_MODE_PRIORITY ix86_mode_priority
26654
26655#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
26656#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
26657
26658#undef TARGET_OFFLOAD_OPTIONS
26659#define TARGET_OFFLOAD_OPTIONS \
26660 ix86_offload_options
26661
26662#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
26663#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
26664
26665#undef TARGET_OPTAB_SUPPORTED_P
26666#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
26667
26668#undef TARGET_HARD_REGNO_SCRATCH_OK
26669#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
26670
26671#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
26672#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
26673
26674#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
26675#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
26676
26677#undef TARGET_INIT_LIBFUNCS
26678#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
26679
26680#undef TARGET_EXPAND_DIVMOD_LIBFUNC
26681#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
26682
26683#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
26684#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
26685
26686#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
26687#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
26688
26689#undef TARGET_HARD_REGNO_NREGS
26690#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
26691#undef TARGET_HARD_REGNO_MODE_OK
26692#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
26693
26694#undef TARGET_MODES_TIEABLE_P
26695#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
26696
26697#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
26698#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
26699 ix86_hard_regno_call_part_clobbered
26700
26701#undef TARGET_INSN_CALLEE_ABI
26702#define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
26703
26704#undef TARGET_CAN_CHANGE_MODE_CLASS
26705#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
26706
26707#undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
26708#define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
26709
26710#undef TARGET_STATIC_RTX_ALIGNMENT
26711#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
26712#undef TARGET_CONSTANT_ALIGNMENT
26713#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
26714
26715#undef TARGET_EMPTY_RECORD_P
26716#define TARGET_EMPTY_RECORD_P ix86_is_empty_record
26717
26718#undef TARGET_WARN_PARAMETER_PASSING_ABI
26719#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
26720
26721#undef TARGET_GET_MULTILIB_ABI_NAME
26722#define TARGET_GET_MULTILIB_ABI_NAME \
26723 ix86_get_multilib_abi_name
26724
26725#undef TARGET_IFUNC_REF_LOCAL_OK
26726#define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
26727
26728#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
26729# undef TARGET_ASM_RELOC_RW_MASK
26730# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
26731#endif
26732
26733#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
26734#define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
26735
26736#undef TARGET_MEMTAG_ADD_TAG
26737#define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
26738
26739#undef TARGET_MEMTAG_SET_TAG
26740#define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
26741
26742#undef TARGET_MEMTAG_EXTRACT_TAG
26743#define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
26744
26745#undef TARGET_MEMTAG_UNTAGGED_POINTER
26746#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
26747
26748#undef TARGET_MEMTAG_TAG_SIZE
26749#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
26750
26751static bool
26752ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
26753{
26754#ifdef OPTION_GLIBC
26755 if (OPTION_GLIBC)
26756 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
26757 else
26758 return false;
26759#else
26760 return false;
26761#endif
26762}
26763
26764#undef TARGET_LIBC_HAS_FAST_FUNCTION
26765#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
26766
26767static unsigned
26768ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
26769 bool boundary_p)
26770{
26771#ifdef OPTION_GLIBC
26772 bool glibc_p = OPTION_GLIBC;
26773#else
26774 bool glibc_p = false;
26775#endif
26776 if (glibc_p)
26777 {
26778 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
26779 unsigned int libmvec_ret = 0;
26780 if (!flag_trapping_math
26781 && flag_unsafe_math_optimizations
26782 && flag_finite_math_only
26783 && !flag_signed_zeros
26784 && !flag_errno_math)
26785 switch (cfn)
26786 {
26787 CASE_CFN_COS:
26788 CASE_CFN_COS_FN:
26789 CASE_CFN_SIN:
26790 CASE_CFN_SIN_FN:
26791 if (!boundary_p)
26792 {
26793 /* With non-default rounding modes, libmvec provides
26794 complete garbage in results. E.g.
26795 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
26796 returns 0.00333309174f rather than 1.40129846e-45f. */
26797 if (flag_rounding_math)
26798 return ~0U;
26799 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
26800 claims libmvec maximum error is 4ulps.
26801 My own random testing indicates 2ulps for SFmode and
26802 0.5ulps for DFmode, but let's go with the 4ulps. */
26803 libmvec_ret = 4;
26804 }
26805 break;
26806 default:
26807 break;
26808 }
26809 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
26810 boundary_p);
26811 return MAX (ret, libmvec_ret);
26812 }
26813 return default_libm_function_max_error (cfn, mode, boundary_p);
26814}
26815
26816#undef TARGET_LIBM_FUNCTION_MAX_ERROR
26817#define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
26818
26819#if CHECKING_P
26820#undef TARGET_RUN_TARGET_SELFTESTS
26821#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
26822#endif /* #if CHECKING_P */
26823
26824struct gcc_target targetm = TARGET_INITIALIZER;
26825
26826#include "gt-i386.h"
26827

source code of gcc/config/i386/i386.cc