1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "rtl.h"
25#include "tree.h"
26#include "memmodel.h"
27#include "gimple.h"
28#include "cfghooks.h"
29#include "cfgloop.h"
30#include "df.h"
31#include "tm_p.h"
32#include "stringpool.h"
33#include "expmed.h"
34#include "optabs.h"
35#include "regs.h"
36#include "emit-rtl.h"
37#include "recog.h"
38#include "cgraph.h"
39#include "diagnostic.h"
40#include "cfgbuild.h"
41#include "alias.h"
42#include "fold-const.h"
43#include "attribs.h"
44#include "calls.h"
45#include "stor-layout.h"
46#include "varasm.h"
47#include "output.h"
48#include "insn-attr.h"
49#include "flags.h"
50#include "except.h"
51#include "explow.h"
52#include "expr.h"
53#include "cfgrtl.h"
54#include "common/common-target.h"
55#include "langhooks.h"
56#include "reload.h"
57#include "gimplify.h"
58#include "dwarf2.h"
59#include "tm-constrs.h"
60#include "params.h"
61#include "cselib.h"
62#include "sched-int.h"
63#include "opts.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "pass_manager.h"
67#include "target-globals.h"
68#include "gimple-iterator.h"
69#include "tree-vectorizer.h"
70#include "shrink-wrap.h"
71#include "builtins.h"
72#include "rtl-iter.h"
73#include "tree-iterator.h"
74#include "tree-chkp.h"
75#include "rtl-chkp.h"
76#include "dbgcnt.h"
77#include "case-cfn-macros.h"
78#include "regrename.h"
79#include "dojump.h"
80#include "fold-const-call.h"
81#include "tree-vrp.h"
82#include "tree-ssanames.h"
83#include "selftest.h"
84#include "selftest-rtl.h"
85#include "print-rtl.h"
86#include "intl.h"
87#include "ifcvt.h"
88#include "symbol-summary.h"
89#include "ipa-prop.h"
90#include "ipa-fnsummary.h"
91
92/* This file should be included last. */
93#include "target-def.h"
94
95#include "x86-tune-costs.h"
96
97static rtx legitimize_dllimport_symbol (rtx, bool);
98static rtx legitimize_pe_coff_extern_decl (rtx, bool);
99static rtx legitimize_pe_coff_symbol (rtx, bool);
100static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
101static bool ix86_save_reg (unsigned int, bool, bool);
102static bool ix86_function_naked (const_tree);
103static bool ix86_notrack_prefixed_insn_p (rtx);
104static void ix86_emit_restore_reg_using_pop (rtx);
105
106
107#ifndef CHECK_STACK_LIMIT
108#define CHECK_STACK_LIMIT (-1)
109#endif
110
111/* Return index of given mode in mult and division cost tables. */
112#define MODE_INDEX(mode) \
113 ((mode) == QImode ? 0 \
114 : (mode) == HImode ? 1 \
115 : (mode) == SImode ? 2 \
116 : (mode) == DImode ? 3 \
117 : 4)
118
119
120/* Set by -mtune. */
121const struct processor_costs *ix86_tune_cost = NULL;
122
123/* Set by -mtune or -Os. */
124const struct processor_costs *ix86_cost = NULL;
125
126/* Processor feature/optimization bitmasks. */
127#define m_386 (1U<<PROCESSOR_I386)
128#define m_486 (1U<<PROCESSOR_I486)
129#define m_PENT (1U<<PROCESSOR_PENTIUM)
130#define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
131#define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
132#define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
133#define m_NOCONA (1U<<PROCESSOR_NOCONA)
134#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
135#define m_CORE2 (1U<<PROCESSOR_CORE2)
136#define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
137#define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
138#define m_HASWELL (1U<<PROCESSOR_HASWELL)
139#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
140#define m_BONNELL (1U<<PROCESSOR_BONNELL)
141#define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
142#define m_KNL (1U<<PROCESSOR_KNL)
143#define m_KNM (1U<<PROCESSOR_KNM)
144#define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
145#define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
146#define m_INTEL (1U<<PROCESSOR_INTEL)
147
148#define m_GEODE (1U<<PROCESSOR_GEODE)
149#define m_K6 (1U<<PROCESSOR_K6)
150#define m_K6_GEODE (m_K6 | m_GEODE)
151#define m_K8 (1U<<PROCESSOR_K8)
152#define m_ATHLON (1U<<PROCESSOR_ATHLON)
153#define m_ATHLON_K8 (m_K8 | m_ATHLON)
154#define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
155#define m_BDVER1 (1U<<PROCESSOR_BDVER1)
156#define m_BDVER2 (1U<<PROCESSOR_BDVER2)
157#define m_BDVER3 (1U<<PROCESSOR_BDVER3)
158#define m_BDVER4 (1U<<PROCESSOR_BDVER4)
159#define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
160#define m_BTVER1 (1U<<PROCESSOR_BTVER1)
161#define m_BTVER2 (1U<<PROCESSOR_BTVER2)
162#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
163#define m_BTVER (m_BTVER1 | m_BTVER2)
164#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
165 | m_ZNVER1)
166
167#define m_GENERIC (1U<<PROCESSOR_GENERIC)
168
169const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
170#undef DEF_TUNE
171#define DEF_TUNE(tune, name, selector) name,
172#include "x86-tune.def"
173#undef DEF_TUNE
174};
175
176/* Feature tests against the various tunings. */
177unsigned char ix86_tune_features[X86_TUNE_LAST];
178
179/* Feature tests against the various tunings used to create ix86_tune_features
180 based on the processor mask. */
181static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
182#undef DEF_TUNE
183#define DEF_TUNE(tune, name, selector) selector,
184#include "x86-tune.def"
185#undef DEF_TUNE
186};
187
188/* Feature tests against the various architecture variations. */
189unsigned char ix86_arch_features[X86_ARCH_LAST];
190
191/* Feature tests against the various architecture variations, used to create
192 ix86_arch_features based on the processor mask. */
193static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
194 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
195 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
196
197 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
198 ~m_386,
199
200 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
201 ~(m_386 | m_486),
202
203 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
204 ~m_386,
205
206 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
207 ~m_386,
208};
209
210/* In case the average insn count for single function invocation is
211 lower than this constant, emit fast (but longer) prologue and
212 epilogue code. */
213#define FAST_PROLOGUE_INSN_COUNT 20
214
215/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
216static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
217static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
218static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
219
220/* Array of the smallest class containing reg number REGNO, indexed by
221 REGNO. Used by REGNO_REG_CLASS in i386.h. */
222
223enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
224{
225 /* ax, dx, cx, bx */
226 AREG, DREG, CREG, BREG,
227 /* si, di, bp, sp */
228 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
229 /* FP registers */
230 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
231 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
232 /* arg pointer */
233 NON_Q_REGS,
234 /* flags, fpsr, fpcr, frame */
235 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
236 /* SSE registers */
237 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
238 SSE_REGS, SSE_REGS,
239 /* MMX registers */
240 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
241 MMX_REGS, MMX_REGS,
242 /* REX registers */
243 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
244 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
245 /* SSE REX registers */
246 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
247 SSE_REGS, SSE_REGS,
248 /* AVX-512 SSE registers */
249 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
250 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
251 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
252 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
253 /* Mask registers. */
254 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
255 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
256 /* MPX bound registers */
257 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
258};
259
260/* The "default" register map used in 32bit mode. */
261
262int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
263{
264 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
265 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
266 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
267 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
268 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
269 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
271 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
272 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
273 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
274 101, 102, 103, 104, /* bound registers */
275};
276
277/* The "default" register map used in 64bit mode. */
278
279int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
280{
281 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
282 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
283 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
284 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
285 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
286 8,9,10,11,12,13,14,15, /* extended integer registers */
287 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
288 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
289 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
290 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
291 126, 127, 128, 129, /* bound registers */
292};
293
294/* Define the register numbers to be used in Dwarf debugging information.
295 The SVR4 reference port C compiler uses the following register numbers
296 in its Dwarf output code:
297 0 for %eax (gcc regno = 0)
298 1 for %ecx (gcc regno = 2)
299 2 for %edx (gcc regno = 1)
300 3 for %ebx (gcc regno = 3)
301 4 for %esp (gcc regno = 7)
302 5 for %ebp (gcc regno = 6)
303 6 for %esi (gcc regno = 4)
304 7 for %edi (gcc regno = 5)
305 The following three DWARF register numbers are never generated by
306 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
307 believed these numbers have these meanings.
308 8 for %eip (no gcc equivalent)
309 9 for %eflags (gcc regno = 17)
310 10 for %trapno (no gcc equivalent)
311 It is not at all clear how we should number the FP stack registers
312 for the x86 architecture. If the version of SDB on x86/svr4 were
313 a bit less brain dead with respect to floating-point then we would
314 have a precedent to follow with respect to DWARF register numbers
315 for x86 FP registers, but the SDB on x86/svr4 was so completely
316 broken with respect to FP registers that it is hardly worth thinking
317 of it as something to strive for compatibility with.
318 The version of x86/svr4 SDB I had does (partially)
319 seem to believe that DWARF register number 11 is associated with
320 the x86 register %st(0), but that's about all. Higher DWARF
321 register numbers don't seem to be associated with anything in
322 particular, and even for DWARF regno 11, SDB only seemed to under-
323 stand that it should say that a variable lives in %st(0) (when
324 asked via an `=' command) if we said it was in DWARF regno 11,
325 but SDB still printed garbage when asked for the value of the
326 variable in question (via a `/' command).
327 (Also note that the labels SDB printed for various FP stack regs
328 when doing an `x' command were all wrong.)
329 Note that these problems generally don't affect the native SVR4
330 C compiler because it doesn't allow the use of -O with -g and
331 because when it is *not* optimizing, it allocates a memory
332 location for each floating-point variable, and the memory
333 location is what gets described in the DWARF AT_location
334 attribute for the variable in question.
335 Regardless of the severe mental illness of the x86/svr4 SDB, we
336 do something sensible here and we use the following DWARF
337 register numbers. Note that these are all stack-top-relative
338 numbers.
339 11 for %st(0) (gcc regno = 8)
340 12 for %st(1) (gcc regno = 9)
341 13 for %st(2) (gcc regno = 10)
342 14 for %st(3) (gcc regno = 11)
343 15 for %st(4) (gcc regno = 12)
344 16 for %st(5) (gcc regno = 13)
345 17 for %st(6) (gcc regno = 14)
346 18 for %st(7) (gcc regno = 15)
347*/
348int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
349{
350 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
351 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
352 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
353 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
354 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
355 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
356 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
357 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
358 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
359 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
360 101, 102, 103, 104, /* bound registers */
361};
362
363/* Define parameter passing and return registers. */
364
365static int const x86_64_int_parameter_registers[6] =
366{
367 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
368};
369
370static int const x86_64_ms_abi_int_parameter_registers[4] =
371{
372 CX_REG, DX_REG, R8_REG, R9_REG
373};
374
375static int const x86_64_int_return_registers[4] =
376{
377 AX_REG, DX_REG, DI_REG, SI_REG
378};
379
380/* Additional registers that are clobbered by SYSV calls. */
381
382#define NUM_X86_64_MS_CLOBBERED_REGS 12
383static int const x86_64_ms_sysv_extra_clobbered_registers
384 [NUM_X86_64_MS_CLOBBERED_REGS] =
385{
386 SI_REG, DI_REG,
387 XMM6_REG, XMM7_REG,
388 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
389 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
390};
391
392enum xlogue_stub {
393 XLOGUE_STUB_SAVE,
394 XLOGUE_STUB_RESTORE,
395 XLOGUE_STUB_RESTORE_TAIL,
396 XLOGUE_STUB_SAVE_HFP,
397 XLOGUE_STUB_RESTORE_HFP,
398 XLOGUE_STUB_RESTORE_HFP_TAIL,
399
400 XLOGUE_STUB_COUNT
401};
402
403enum xlogue_stub_sets {
404 XLOGUE_SET_ALIGNED,
405 XLOGUE_SET_ALIGNED_PLUS_8,
406 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
407 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
408
409 XLOGUE_SET_COUNT
410};
411
412/* Register save/restore layout used by out-of-line stubs. */
413class xlogue_layout {
414public:
415 struct reginfo
416 {
417 unsigned regno;
418 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
419 rsi) to where each register is stored. */
420 };
421
422 unsigned get_nregs () const {return m_nregs;}
423 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
424
425 const reginfo &get_reginfo (unsigned reg) const
426 {
427 gcc_assert (reg < m_nregs);
428 return m_regs[reg];
429 }
430
431 static const char *get_stub_name (enum xlogue_stub stub,
432 unsigned n_extra_args);
433
434 /* Returns an rtx for the stub's symbol based upon
435 1.) the specified stub (save, restore or restore_ret) and
436 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
437 3.) rather or not stack alignment is being performed. */
438 static rtx get_stub_rtx (enum xlogue_stub stub);
439
440 /* Returns the amount of stack space (including padding) that the stub
441 needs to store registers based upon data in the machine_function. */
442 HOST_WIDE_INT get_stack_space_used () const
443 {
444 const struct machine_function *m = cfun->machine;
445 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
446
447 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
448 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
449 }
450
451 /* Returns the offset for the base pointer used by the stub. */
452 HOST_WIDE_INT get_stub_ptr_offset () const
453 {
454 return STUB_INDEX_OFFSET + m_stack_align_off_in;
455 }
456
457 static const struct xlogue_layout &get_instance ();
458 static unsigned count_stub_managed_regs ();
459 static bool is_stub_managed_reg (unsigned regno, unsigned count);
460
461 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
462 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
463 static const unsigned MAX_REGS = 18;
464 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
465 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
466 static const unsigned STUB_NAME_MAX_LEN = 20;
467 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
468 static const unsigned REG_ORDER[MAX_REGS];
469 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
470
471private:
472 xlogue_layout ();
473 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
474 xlogue_layout (const xlogue_layout &);
475
476 /* True if hard frame pointer is used. */
477 bool m_hfp;
478
479 /* Max number of register this layout manages. */
480 unsigned m_nregs;
481
482 /* Incoming offset from 16-byte alignment. */
483 HOST_WIDE_INT m_stack_align_off_in;
484
485 /* Register order and offsets. */
486 struct reginfo m_regs[MAX_REGS];
487
488 /* Lazy-inited cache of symbol names for stubs. */
489 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
490 [STUB_NAME_MAX_LEN];
491
492 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
493};
494
495const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
496 "savms64",
497 "resms64",
498 "resms64x",
499 "savms64f",
500 "resms64f",
501 "resms64fx"
502};
503
504const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
505/* The below offset values are where each register is stored for the layout
506 relative to incoming stack pointer. The value of each m_regs[].offset will
507 be relative to the incoming base pointer (rax or rsi) used by the stub.
508
509 s_instances: 0 1 2 3
510 Offset: realigned or aligned + 8
511 Register aligned aligned + 8 aligned w/HFP w/HFP */
512 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
513 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
514 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
515 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
516 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
517 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
518 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
519 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
520 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
521 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
522 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
523 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
524 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
525 BP_REG, /* 0xc0 0xc8 N/A N/A */
526 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
527 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
528 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
529 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
530};
531
532/* Instantiate static const values. */
533const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
534const unsigned xlogue_layout::MIN_REGS;
535const unsigned xlogue_layout::MAX_REGS;
536const unsigned xlogue_layout::MAX_EXTRA_REGS;
537const unsigned xlogue_layout::VARIANT_COUNT;
538const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
539
540/* Initialize xlogue_layout::s_stub_names to zero. */
541char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
542 [STUB_NAME_MAX_LEN];
543
544/* Instantiates all xlogue_layout instances. */
545const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
546 xlogue_layout (0, false),
547 xlogue_layout (8, false),
548 xlogue_layout (0, true),
549 xlogue_layout (8, true)
550};
551
552/* Return an appropriate const instance of xlogue_layout based upon values
553 in cfun->machine and crtl. */
554const struct xlogue_layout &
555xlogue_layout::get_instance ()
556{
557 enum xlogue_stub_sets stub_set;
558 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
559
560 if (stack_realign_fp)
561 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
562 else if (frame_pointer_needed)
563 stub_set = aligned_plus_8
564 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
565 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
566 else
567 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
568
569 return s_instances[stub_set];
570}
571
572/* Determine how many clobbered registers can be saved by the stub.
573 Returns the count of registers the stub will save and restore. */
574unsigned
575xlogue_layout::count_stub_managed_regs ()
576{
577 bool hfp = frame_pointer_needed || stack_realign_fp;
578 unsigned i, count;
579 unsigned regno;
580
581 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
582 {
583 regno = REG_ORDER[i];
584 if (regno == BP_REG && hfp)
585 continue;
586 if (!ix86_save_reg (regno, false, false))
587 break;
588 ++count;
589 }
590 return count;
591}
592
593/* Determine if register REGNO is a stub managed register given the
594 total COUNT of stub managed registers. */
595bool
596xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
597{
598 bool hfp = frame_pointer_needed || stack_realign_fp;
599 unsigned i;
600
601 for (i = 0; i < count; ++i)
602 {
603 gcc_assert (i < MAX_REGS);
604 if (REG_ORDER[i] == BP_REG && hfp)
605 ++count;
606 else if (REG_ORDER[i] == regno)
607 return true;
608 }
609 return false;
610}
611
612/* Constructor for xlogue_layout. */
613xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
614 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
615 m_stack_align_off_in (stack_align_off_in)
616{
617 HOST_WIDE_INT offset = stack_align_off_in;
618 unsigned i, j;
619
620 for (i = j = 0; i < MAX_REGS; ++i)
621 {
622 unsigned regno = REG_ORDER[i];
623
624 if (regno == BP_REG && hfp)
625 continue;
626 if (SSE_REGNO_P (regno))
627 {
628 offset += 16;
629 /* Verify that SSE regs are always aligned. */
630 gcc_assert (!((stack_align_off_in + offset) & 15));
631 }
632 else
633 offset += 8;
634
635 m_regs[j].regno = regno;
636 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
637 }
638 gcc_assert (j == m_nregs);
639}
640
641const char *
642xlogue_layout::get_stub_name (enum xlogue_stub stub,
643 unsigned n_extra_regs)
644{
645 const int have_avx = TARGET_AVX;
646 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
647
648 /* Lazy init */
649 if (!*name)
650 {
651 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
652 (have_avx ? "avx" : "sse"),
653 STUB_BASE_NAMES[stub],
654 MIN_REGS + n_extra_regs);
655 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
656 }
657
658 return name;
659}
660
661/* Return rtx of a symbol ref for the entry point (based upon
662 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
663rtx
664xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
665{
666 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
667 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
668 gcc_assert (stub < XLOGUE_STUB_COUNT);
669 gcc_assert (crtl->stack_realign_finalized);
670
671 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
672}
673
674/* Define the structure for the machine field in struct function. */
675
676struct GTY(()) stack_local_entry {
677 unsigned short mode;
678 unsigned short n;
679 rtx rtl;
680 struct stack_local_entry *next;
681};
682
683/* Which cpu are we scheduling for. */
684enum attr_cpu ix86_schedule;
685
686/* Which cpu are we optimizing for. */
687enum processor_type ix86_tune;
688
689/* Which instruction set architecture to use. */
690enum processor_type ix86_arch;
691
692/* True if processor has SSE prefetch instruction. */
693unsigned char x86_prefetch_sse;
694
695/* -mstackrealign option */
696static const char ix86_force_align_arg_pointer_string[]
697 = "force_align_arg_pointer";
698
699static rtx (*ix86_gen_leave) (void);
700static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
701static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
702static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
703static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
704static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
705static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
706static rtx (*ix86_gen_clzero) (rtx);
707static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
708static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
709static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
710static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
711static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
712static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
713
714/* Preferred alignment for stack boundary in bits. */
715unsigned int ix86_preferred_stack_boundary;
716
717/* Alignment for incoming stack boundary in bits specified at
718 command line. */
719static unsigned int ix86_user_incoming_stack_boundary;
720
721/* Default alignment for incoming stack boundary in bits. */
722static unsigned int ix86_default_incoming_stack_boundary;
723
724/* Alignment for incoming stack boundary in bits. */
725unsigned int ix86_incoming_stack_boundary;
726
727/* Calling abi specific va_list type nodes. */
728static GTY(()) tree sysv_va_list_type_node;
729static GTY(()) tree ms_va_list_type_node;
730
731/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
732char internal_label_prefix[16];
733int internal_label_prefix_len;
734
735/* Fence to use after loop using movnt. */
736tree x86_mfence;
737
738/* Register class used for passing given 64bit part of the argument.
739 These represent classes as documented by the PS ABI, with the exception
740 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
741 use SF or DFmode move instead of DImode to avoid reformatting penalties.
742
743 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
744 whenever possible (upper half does contain padding). */
745enum x86_64_reg_class
746 {
747 X86_64_NO_CLASS,
748 X86_64_INTEGER_CLASS,
749 X86_64_INTEGERSI_CLASS,
750 X86_64_SSE_CLASS,
751 X86_64_SSESF_CLASS,
752 X86_64_SSEDF_CLASS,
753 X86_64_SSEUP_CLASS,
754 X86_64_X87_CLASS,
755 X86_64_X87UP_CLASS,
756 X86_64_COMPLEX_X87_CLASS,
757 X86_64_MEMORY_CLASS
758 };
759
760#define MAX_CLASSES 8
761
762/* Table of constants used by fldpi, fldln2, etc.... */
763static REAL_VALUE_TYPE ext_80387_constants_table [5];
764static bool ext_80387_constants_init;
765
766
767static struct machine_function * ix86_init_machine_status (void);
768static rtx ix86_function_value (const_tree, const_tree, bool);
769static bool ix86_function_value_regno_p (const unsigned int);
770static unsigned int ix86_function_arg_boundary (machine_mode,
771 const_tree);
772static rtx ix86_static_chain (const_tree, bool);
773static int ix86_function_regparm (const_tree, const_tree);
774static void ix86_compute_frame_layout (void);
775static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
776 rtx, rtx, int);
777static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
778static tree ix86_canonical_va_list_type (tree);
779static void predict_jump (int);
780static unsigned int split_stack_prologue_scratch_regno (void);
781static bool i386_asm_output_addr_const_extra (FILE *, rtx);
782
783enum ix86_function_specific_strings
784{
785 IX86_FUNCTION_SPECIFIC_ARCH,
786 IX86_FUNCTION_SPECIFIC_TUNE,
787 IX86_FUNCTION_SPECIFIC_MAX
788};
789
790static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
791 const char *, const char *, enum fpmath_unit,
792 bool);
793static void ix86_function_specific_save (struct cl_target_option *,
794 struct gcc_options *opts);
795static void ix86_function_specific_restore (struct gcc_options *opts,
796 struct cl_target_option *);
797static void ix86_function_specific_post_stream_in (struct cl_target_option *);
798static void ix86_function_specific_print (FILE *, int,
799 struct cl_target_option *);
800static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
801static bool ix86_valid_target_attribute_inner_p (tree, char *[],
802 struct gcc_options *,
803 struct gcc_options *,
804 struct gcc_options *);
805static bool ix86_can_inline_p (tree, tree);
806static void ix86_set_current_function (tree);
807static unsigned int ix86_minimum_incoming_stack_boundary (bool);
808
809static enum calling_abi ix86_function_abi (const_tree);
810
811
812#ifndef SUBTARGET32_DEFAULT_CPU
813#define SUBTARGET32_DEFAULT_CPU "i386"
814#endif
815
816/* Whether -mtune= or -march= were specified */
817static int ix86_tune_defaulted;
818static int ix86_arch_specified;
819
820/* Vectorization library interface and handlers. */
821static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
822
823static tree ix86_veclibabi_svml (combined_fn, tree, tree);
824static tree ix86_veclibabi_acml (combined_fn, tree, tree);
825
826/* Processor target table, indexed by processor number */
827struct ptt
828{
829 const char *const name; /* processor name */
830 const struct processor_costs *cost; /* Processor costs */
831 const int align_loop; /* Default alignments. */
832 const int align_loop_max_skip;
833 const int align_jump;
834 const int align_jump_max_skip;
835 const int align_func;
836};
837
838/* This table must be in sync with enum processor_type in i386.h. */
839static const struct ptt processor_target_table[PROCESSOR_max] =
840{
841 {"generic", &generic_cost, 16, 10, 16, 10, 16},
842 {"i386", &i386_cost, 4, 3, 4, 3, 4},
843 {"i486", &i486_cost, 16, 15, 16, 15, 16},
844 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
845 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
846 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
847 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
848 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
849 {"core2", &core_cost, 16, 10, 16, 10, 16},
850 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
851 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
852 {"haswell", &core_cost, 16, 10, 16, 10, 16},
853 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
854 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
855 {"knl", &slm_cost, 16, 15, 16, 7, 16},
856 {"knm", &slm_cost, 16, 15, 16, 7, 16},
857 {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
858 {"cannonlake", &core_cost, 16, 10, 16, 10, 16},
859 {"intel", &intel_cost, 16, 15, 16, 7, 16},
860 {"geode", &geode_cost, 0, 0, 0, 0, 0},
861 {"k6", &k6_cost, 32, 7, 32, 7, 32},
862 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
863 {"k8", &k8_cost, 16, 7, 16, 7, 16},
864 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
865 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
866 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
867 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
868 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
869 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
870 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
871 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
872};
873
874static unsigned int
875rest_of_handle_insert_vzeroupper (void)
876{
877 int i;
878
879 /* vzeroupper instructions are inserted immediately after reload to
880 account for possible spills from 256bit or 512bit registers. The pass
881 reuses mode switching infrastructure by re-running mode insertion
882 pass, so disable entities that have already been processed. */
883 for (i = 0; i < MAX_386_ENTITIES; i++)
884 ix86_optimize_mode_switching[i] = 0;
885
886 ix86_optimize_mode_switching[AVX_U128] = 1;
887
888 /* Call optimize_mode_switching. */
889 g->get_passes ()->execute_pass_mode_switching ();
890 return 0;
891}
892
893/* Return 1 if INSN uses or defines a hard register.
894 Hard register uses in a memory address are ignored.
895 Clobbers and flags definitions are ignored. */
896
897static bool
898has_non_address_hard_reg (rtx_insn *insn)
899{
900 df_ref ref;
901 FOR_EACH_INSN_DEF (ref, insn)
902 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
903 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
904 && DF_REF_REGNO (ref) != FLAGS_REG)
905 return true;
906
907 FOR_EACH_INSN_USE (ref, insn)
908 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
909 return true;
910
911 return false;
912}
913
914/* Check if comparison INSN may be transformed
915 into vector comparison. Currently we transform
916 zero checks only which look like:
917
918 (set (reg:CCZ 17 flags)
919 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
920 (subreg:SI (reg:DI x) 0))
921 (const_int 0 [0]))) */
922
923static bool
924convertible_comparison_p (rtx_insn *insn)
925{
926 if (!TARGET_SSE4_1)
927 return false;
928
929 rtx def_set = single_set (insn);
930
931 gcc_assert (def_set);
932
933 rtx src = SET_SRC (def_set);
934 rtx dst = SET_DEST (def_set);
935
936 gcc_assert (GET_CODE (src) == COMPARE);
937
938 if (GET_CODE (dst) != REG
939 || REGNO (dst) != FLAGS_REG
940 || GET_MODE (dst) != CCZmode)
941 return false;
942
943 rtx op1 = XEXP (src, 0);
944 rtx op2 = XEXP (src, 1);
945
946 if (op2 != CONST0_RTX (GET_MODE (op2)))
947 return false;
948
949 if (GET_CODE (op1) != IOR)
950 return false;
951
952 op2 = XEXP (op1, 1);
953 op1 = XEXP (op1, 0);
954
955 if (!SUBREG_P (op1)
956 || !SUBREG_P (op2)
957 || GET_MODE (op1) != SImode
958 || GET_MODE (op2) != SImode
959 || ((SUBREG_BYTE (op1) != 0
960 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
961 && (SUBREG_BYTE (op2) != 0
962 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
963 return false;
964
965 op1 = SUBREG_REG (op1);
966 op2 = SUBREG_REG (op2);
967
968 if (op1 != op2
969 || !REG_P (op1)
970 || GET_MODE (op1) != DImode)
971 return false;
972
973 return true;
974}
975
976/* The DImode version of scalar_to_vector_candidate_p. */
977
978static bool
979dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
980{
981 rtx def_set = single_set (insn);
982
983 if (!def_set)
984 return false;
985
986 if (has_non_address_hard_reg (insn))
987 return false;
988
989 rtx src = SET_SRC (def_set);
990 rtx dst = SET_DEST (def_set);
991
992 if (GET_CODE (src) == COMPARE)
993 return convertible_comparison_p (insn);
994
995 /* We are interested in DImode promotion only. */
996 if ((GET_MODE (src) != DImode
997 && !CONST_INT_P (src))
998 || GET_MODE (dst) != DImode)
999 return false;
1000
1001 if (!REG_P (dst) && !MEM_P (dst))
1002 return false;
1003
1004 switch (GET_CODE (src))
1005 {
1006 case ASHIFTRT:
1007 if (!TARGET_AVX512VL)
1008 return false;
1009 /* FALLTHRU */
1010
1011 case ASHIFT:
1012 case LSHIFTRT:
1013 if (!REG_P (XEXP (src, 1))
1014 && (!SUBREG_P (XEXP (src, 1))
1015 || SUBREG_BYTE (XEXP (src, 1)) != 0
1016 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1017 && (!CONST_INT_P (XEXP (src, 1))
1018 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1019 return false;
1020
1021 if (GET_MODE (XEXP (src, 1)) != QImode
1022 && !CONST_INT_P (XEXP (src, 1)))
1023 return false;
1024 break;
1025
1026 case PLUS:
1027 case MINUS:
1028 case IOR:
1029 case XOR:
1030 case AND:
1031 if (!REG_P (XEXP (src, 1))
1032 && !MEM_P (XEXP (src, 1))
1033 && !CONST_INT_P (XEXP (src, 1)))
1034 return false;
1035
1036 if (GET_MODE (XEXP (src, 1)) != DImode
1037 && !CONST_INT_P (XEXP (src, 1)))
1038 return false;
1039 break;
1040
1041 case NEG:
1042 case NOT:
1043 break;
1044
1045 case REG:
1046 return true;
1047
1048 case MEM:
1049 case CONST_INT:
1050 return REG_P (dst);
1051
1052 default:
1053 return false;
1054 }
1055
1056 if (!REG_P (XEXP (src, 0))
1057 && !MEM_P (XEXP (src, 0))
1058 && !CONST_INT_P (XEXP (src, 0))
1059 /* Check for andnot case. */
1060 && (GET_CODE (src) != AND
1061 || GET_CODE (XEXP (src, 0)) != NOT
1062 || !REG_P (XEXP (XEXP (src, 0), 0))))
1063 return false;
1064
1065 if (GET_MODE (XEXP (src, 0)) != DImode
1066 && !CONST_INT_P (XEXP (src, 0)))
1067 return false;
1068
1069 return true;
1070}
1071
1072/* The TImode version of scalar_to_vector_candidate_p. */
1073
1074static bool
1075timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1076{
1077 rtx def_set = single_set (insn);
1078
1079 if (!def_set)
1080 return false;
1081
1082 if (has_non_address_hard_reg (insn))
1083 return false;
1084
1085 rtx src = SET_SRC (def_set);
1086 rtx dst = SET_DEST (def_set);
1087
1088 /* Only TImode load and store are allowed. */
1089 if (GET_MODE (dst) != TImode)
1090 return false;
1091
1092 if (MEM_P (dst))
1093 {
1094 /* Check for store. Memory must be aligned or unaligned store
1095 is optimal. Only support store from register, standard SSE
1096 constant or CONST_WIDE_INT generated from piecewise store.
1097
1098 ??? Verify performance impact before enabling CONST_INT for
1099 __int128 store. */
1100 if (misaligned_operand (dst, TImode)
1101 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1102 return false;
1103
1104 switch (GET_CODE (src))
1105 {
1106 default:
1107 return false;
1108
1109 case REG:
1110 case CONST_WIDE_INT:
1111 return true;
1112
1113 case CONST_INT:
1114 return standard_sse_constant_p (src, TImode);
1115 }
1116 }
1117 else if (MEM_P (src))
1118 {
1119 /* Check for load. Memory must be aligned or unaligned load is
1120 optimal. */
1121 return (REG_P (dst)
1122 && (!misaligned_operand (src, TImode)
1123 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1124 }
1125
1126 return false;
1127}
1128
1129/* Return 1 if INSN may be converted into vector
1130 instruction. */
1131
1132static bool
1133scalar_to_vector_candidate_p (rtx_insn *insn)
1134{
1135 if (TARGET_64BIT)
1136 return timode_scalar_to_vector_candidate_p (insn);
1137 else
1138 return dimode_scalar_to_vector_candidate_p (insn);
1139}
1140
1141/* The DImode version of remove_non_convertible_regs. */
1142
1143static void
1144dimode_remove_non_convertible_regs (bitmap candidates)
1145{
1146 bitmap_iterator bi;
1147 unsigned id;
1148 bitmap regs = BITMAP_ALLOC (NULL);
1149
1150 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1151 {
1152 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1153 rtx reg = SET_DEST (def_set);
1154
1155 if (!REG_P (reg)
1156 || bitmap_bit_p (regs, REGNO (reg))
1157 || HARD_REGISTER_P (reg))
1158 continue;
1159
1160 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1161 def;
1162 def = DF_REF_NEXT_REG (def))
1163 {
1164 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1165 {
1166 if (dump_file)
1167 fprintf (dump_file,
1168 "r%d has non convertible definition in insn %d\n",
1169 REGNO (reg), DF_REF_INSN_UID (def));
1170
1171 bitmap_set_bit (regs, REGNO (reg));
1172 break;
1173 }
1174 }
1175 }
1176
1177 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1178 {
1179 for (df_ref def = DF_REG_DEF_CHAIN (id);
1180 def;
1181 def = DF_REF_NEXT_REG (def))
1182 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1183 {
1184 if (dump_file)
1185 fprintf (dump_file, "Removing insn %d from candidates list\n",
1186 DF_REF_INSN_UID (def));
1187
1188 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1189 }
1190 }
1191
1192 BITMAP_FREE (regs);
1193}
1194
1195/* For a register REGNO, scan instructions for its defs and uses.
1196 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1197
1198static void
1199timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1200 unsigned int regno)
1201{
1202 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1203 def;
1204 def = DF_REF_NEXT_REG (def))
1205 {
1206 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1207 {
1208 if (dump_file)
1209 fprintf (dump_file,
1210 "r%d has non convertible def in insn %d\n",
1211 regno, DF_REF_INSN_UID (def));
1212
1213 bitmap_set_bit (regs, regno);
1214 break;
1215 }
1216 }
1217
1218 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1219 ref;
1220 ref = DF_REF_NEXT_REG (ref))
1221 {
1222 /* Debug instructions are skipped. */
1223 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1224 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1225 {
1226 if (dump_file)
1227 fprintf (dump_file,
1228 "r%d has non convertible use in insn %d\n",
1229 regno, DF_REF_INSN_UID (ref));
1230
1231 bitmap_set_bit (regs, regno);
1232 break;
1233 }
1234 }
1235}
1236
1237/* The TImode version of remove_non_convertible_regs. */
1238
1239static void
1240timode_remove_non_convertible_regs (bitmap candidates)
1241{
1242 bitmap_iterator bi;
1243 unsigned id;
1244 bitmap regs = BITMAP_ALLOC (NULL);
1245
1246 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1247 {
1248 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1249 rtx dest = SET_DEST (def_set);
1250 rtx src = SET_SRC (def_set);
1251
1252 if ((!REG_P (dest)
1253 || bitmap_bit_p (regs, REGNO (dest))
1254 || HARD_REGISTER_P (dest))
1255 && (!REG_P (src)
1256 || bitmap_bit_p (regs, REGNO (src))
1257 || HARD_REGISTER_P (src)))
1258 continue;
1259
1260 if (REG_P (dest))
1261 timode_check_non_convertible_regs (candidates, regs,
1262 REGNO (dest));
1263
1264 if (REG_P (src))
1265 timode_check_non_convertible_regs (candidates, regs,
1266 REGNO (src));
1267 }
1268
1269 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1270 {
1271 for (df_ref def = DF_REG_DEF_CHAIN (id);
1272 def;
1273 def = DF_REF_NEXT_REG (def))
1274 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1275 {
1276 if (dump_file)
1277 fprintf (dump_file, "Removing insn %d from candidates list\n",
1278 DF_REF_INSN_UID (def));
1279
1280 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1281 }
1282
1283 for (df_ref ref = DF_REG_USE_CHAIN (id);
1284 ref;
1285 ref = DF_REF_NEXT_REG (ref))
1286 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1287 {
1288 if (dump_file)
1289 fprintf (dump_file, "Removing insn %d from candidates list\n",
1290 DF_REF_INSN_UID (ref));
1291
1292 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1293 }
1294 }
1295
1296 BITMAP_FREE (regs);
1297}
1298
1299/* For a given bitmap of insn UIDs scans all instruction and
1300 remove insn from CANDIDATES in case it has both convertible
1301 and not convertible definitions.
1302
1303 All insns in a bitmap are conversion candidates according to
1304 scalar_to_vector_candidate_p. Currently it implies all insns
1305 are single_set. */
1306
1307static void
1308remove_non_convertible_regs (bitmap candidates)
1309{
1310 if (TARGET_64BIT)
1311 timode_remove_non_convertible_regs (candidates);
1312 else
1313 dimode_remove_non_convertible_regs (candidates);
1314}
1315
1316class scalar_chain
1317{
1318 public:
1319 scalar_chain ();
1320 virtual ~scalar_chain ();
1321
1322 static unsigned max_id;
1323
1324 /* ID of a chain. */
1325 unsigned int chain_id;
1326 /* A queue of instructions to be included into a chain. */
1327 bitmap queue;
1328 /* Instructions included into a chain. */
1329 bitmap insns;
1330 /* All registers defined by a chain. */
1331 bitmap defs;
1332 /* Registers used in both vector and sclar modes. */
1333 bitmap defs_conv;
1334
1335 void build (bitmap candidates, unsigned insn_uid);
1336 virtual int compute_convert_gain () = 0;
1337 int convert ();
1338
1339 protected:
1340 void add_to_queue (unsigned insn_uid);
1341 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1342
1343 private:
1344 void add_insn (bitmap candidates, unsigned insn_uid);
1345 void analyze_register_chain (bitmap candidates, df_ref ref);
1346 virtual void mark_dual_mode_def (df_ref def) = 0;
1347 virtual void convert_insn (rtx_insn *insn) = 0;
1348 virtual void convert_registers () = 0;
1349};
1350
1351class dimode_scalar_chain : public scalar_chain
1352{
1353 public:
1354 int compute_convert_gain ();
1355 private:
1356 void mark_dual_mode_def (df_ref def);
1357 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1358 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1359 void convert_insn (rtx_insn *insn);
1360 void convert_op (rtx *op, rtx_insn *insn);
1361 void convert_reg (unsigned regno);
1362 void make_vector_copies (unsigned regno);
1363 void convert_registers ();
1364 int vector_const_cost (rtx exp);
1365};
1366
1367class timode_scalar_chain : public scalar_chain
1368{
1369 public:
1370 /* Convert from TImode to V1TImode is always faster. */
1371 int compute_convert_gain () { return 1; }
1372
1373 private:
1374 void mark_dual_mode_def (df_ref def);
1375 void fix_debug_reg_uses (rtx reg);
1376 void convert_insn (rtx_insn *insn);
1377 /* We don't convert registers to difference size. */
1378 void convert_registers () {}
1379};
1380
1381unsigned scalar_chain::max_id = 0;
1382
1383/* Initialize new chain. */
1384
1385scalar_chain::scalar_chain ()
1386{
1387 chain_id = ++max_id;
1388
1389 if (dump_file)
1390 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1391
1392 bitmap_obstack_initialize (NULL);
1393 insns = BITMAP_ALLOC (NULL);
1394 defs = BITMAP_ALLOC (NULL);
1395 defs_conv = BITMAP_ALLOC (NULL);
1396 queue = NULL;
1397}
1398
1399/* Free chain's data. */
1400
1401scalar_chain::~scalar_chain ()
1402{
1403 BITMAP_FREE (insns);
1404 BITMAP_FREE (defs);
1405 BITMAP_FREE (defs_conv);
1406 bitmap_obstack_release (NULL);
1407}
1408
1409/* Add instruction into chains' queue. */
1410
1411void
1412scalar_chain::add_to_queue (unsigned insn_uid)
1413{
1414 if (bitmap_bit_p (insns, insn_uid)
1415 || bitmap_bit_p (queue, insn_uid))
1416 return;
1417
1418 if (dump_file)
1419 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1420 insn_uid, chain_id);
1421 bitmap_set_bit (queue, insn_uid);
1422}
1423
1424/* For DImode conversion, mark register defined by DEF as requiring
1425 conversion. */
1426
1427void
1428dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1429{
1430 gcc_assert (DF_REF_REG_DEF_P (def));
1431
1432 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1433 return;
1434
1435 if (dump_file)
1436 fprintf (dump_file,
1437 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1438 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1439
1440 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1441}
1442
1443/* For TImode conversion, it is unused. */
1444
1445void
1446timode_scalar_chain::mark_dual_mode_def (df_ref)
1447{
1448 gcc_unreachable ();
1449}
1450
1451/* Check REF's chain to add new insns into a queue
1452 and find registers requiring conversion. */
1453
1454void
1455scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1456{
1457 df_link *chain;
1458
1459 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1460 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1461 add_to_queue (DF_REF_INSN_UID (ref));
1462
1463 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1464 {
1465 unsigned uid = DF_REF_INSN_UID (chain->ref);
1466
1467 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1468 continue;
1469
1470 if (!DF_REF_REG_MEM_P (chain->ref))
1471 {
1472 if (bitmap_bit_p (insns, uid))
1473 continue;
1474
1475 if (bitmap_bit_p (candidates, uid))
1476 {
1477 add_to_queue (uid);
1478 continue;
1479 }
1480 }
1481
1482 if (DF_REF_REG_DEF_P (chain->ref))
1483 {
1484 if (dump_file)
1485 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1486 DF_REF_REGNO (chain->ref), uid);
1487 mark_dual_mode_def (chain->ref);
1488 }
1489 else
1490 {
1491 if (dump_file)
1492 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1493 DF_REF_REGNO (chain->ref), uid);
1494 mark_dual_mode_def (ref);
1495 }
1496 }
1497}
1498
1499/* Add instruction into a chain. */
1500
1501void
1502scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1503{
1504 if (bitmap_bit_p (insns, insn_uid))
1505 return;
1506
1507 if (dump_file)
1508 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1509
1510 bitmap_set_bit (insns, insn_uid);
1511
1512 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1513 rtx def_set = single_set (insn);
1514 if (def_set && REG_P (SET_DEST (def_set))
1515 && !HARD_REGISTER_P (SET_DEST (def_set)))
1516 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1517
1518 df_ref ref;
1519 df_ref def;
1520 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1521 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1522 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1523 def;
1524 def = DF_REF_NEXT_REG (def))
1525 analyze_register_chain (candidates, def);
1526 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1527 if (!DF_REF_REG_MEM_P (ref))
1528 analyze_register_chain (candidates, ref);
1529}
1530
1531/* Build new chain starting from insn INSN_UID recursively
1532 adding all dependent uses and definitions. */
1533
1534void
1535scalar_chain::build (bitmap candidates, unsigned insn_uid)
1536{
1537 queue = BITMAP_ALLOC (NULL);
1538 bitmap_set_bit (queue, insn_uid);
1539
1540 if (dump_file)
1541 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1542
1543 while (!bitmap_empty_p (queue))
1544 {
1545 insn_uid = bitmap_first_set_bit (queue);
1546 bitmap_clear_bit (queue, insn_uid);
1547 bitmap_clear_bit (candidates, insn_uid);
1548 add_insn (candidates, insn_uid);
1549 }
1550
1551 if (dump_file)
1552 {
1553 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1554 fprintf (dump_file, " insns: ");
1555 dump_bitmap (dump_file, insns);
1556 if (!bitmap_empty_p (defs_conv))
1557 {
1558 bitmap_iterator bi;
1559 unsigned id;
1560 const char *comma = "";
1561 fprintf (dump_file, " defs to convert: ");
1562 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1563 {
1564 fprintf (dump_file, "%sr%d", comma, id);
1565 comma = ", ";
1566 }
1567 fprintf (dump_file, "\n");
1568 }
1569 }
1570
1571 BITMAP_FREE (queue);
1572}
1573
1574/* Return a cost of building a vector costant
1575 instead of using a scalar one. */
1576
1577int
1578dimode_scalar_chain::vector_const_cost (rtx exp)
1579{
1580 gcc_assert (CONST_INT_P (exp));
1581
1582 if (standard_sse_constant_p (exp, V2DImode))
1583 return COSTS_N_INSNS (1);
1584 return ix86_cost->sse_load[1];
1585}
1586
1587/* Compute a gain for chain conversion. */
1588
1589int
1590dimode_scalar_chain::compute_convert_gain ()
1591{
1592 bitmap_iterator bi;
1593 unsigned insn_uid;
1594 int gain = 0;
1595 int cost = 0;
1596
1597 if (dump_file)
1598 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1599
1600 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1601 {
1602 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1603 rtx def_set = single_set (insn);
1604 rtx src = SET_SRC (def_set);
1605 rtx dst = SET_DEST (def_set);
1606
1607 if (REG_P (src) && REG_P (dst))
1608 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1609 else if (REG_P (src) && MEM_P (dst))
1610 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1611 else if (MEM_P (src) && REG_P (dst))
1612 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1613 else if (GET_CODE (src) == ASHIFT
1614 || GET_CODE (src) == ASHIFTRT
1615 || GET_CODE (src) == LSHIFTRT)
1616 {
1617 if (CONST_INT_P (XEXP (src, 0)))
1618 gain -= vector_const_cost (XEXP (src, 0));
1619 if (CONST_INT_P (XEXP (src, 1)))
1620 {
1621 gain += ix86_cost->shift_const;
1622 if (INTVAL (XEXP (src, 1)) >= 32)
1623 gain -= COSTS_N_INSNS (1);
1624 }
1625 else
1626 /* Additional gain for omitting two CMOVs. */
1627 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1628 }
1629 else if (GET_CODE (src) == PLUS
1630 || GET_CODE (src) == MINUS
1631 || GET_CODE (src) == IOR
1632 || GET_CODE (src) == XOR
1633 || GET_CODE (src) == AND)
1634 {
1635 gain += ix86_cost->add;
1636 /* Additional gain for andnot for targets without BMI. */
1637 if (GET_CODE (XEXP (src, 0)) == NOT
1638 && !TARGET_BMI)
1639 gain += 2 * ix86_cost->add;
1640
1641 if (CONST_INT_P (XEXP (src, 0)))
1642 gain -= vector_const_cost (XEXP (src, 0));
1643 if (CONST_INT_P (XEXP (src, 1)))
1644 gain -= vector_const_cost (XEXP (src, 1));
1645 }
1646 else if (GET_CODE (src) == NEG
1647 || GET_CODE (src) == NOT)
1648 gain += ix86_cost->add - COSTS_N_INSNS (1);
1649 else if (GET_CODE (src) == COMPARE)
1650 {
1651 /* Assume comparison cost is the same. */
1652 }
1653 else if (CONST_INT_P (src))
1654 {
1655 if (REG_P (dst))
1656 gain += COSTS_N_INSNS (2);
1657 else if (MEM_P (dst))
1658 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1659 gain -= vector_const_cost (src);
1660 }
1661 else
1662 gcc_unreachable ();
1663 }
1664
1665 if (dump_file)
1666 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1667
1668 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1669 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1670
1671 if (dump_file)
1672 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1673
1674 gain -= cost;
1675
1676 if (dump_file)
1677 fprintf (dump_file, " Total gain: %d\n", gain);
1678
1679 return gain;
1680}
1681
1682/* Replace REG in X with a V2DI subreg of NEW_REG. */
1683
1684rtx
1685dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1686{
1687 if (x == reg)
1688 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1689
1690 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1691 int i, j;
1692 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1693 {
1694 if (fmt[i] == 'e')
1695 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1696 else if (fmt[i] == 'E')
1697 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1698 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1699 reg, new_reg);
1700 }
1701
1702 return x;
1703}
1704
1705/* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1706
1707void
1708dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1709 rtx reg, rtx new_reg)
1710{
1711 replace_with_subreg (single_set (insn), reg, new_reg);
1712}
1713
1714/* Insert generated conversion instruction sequence INSNS
1715 after instruction AFTER. New BB may be required in case
1716 instruction has EH region attached. */
1717
1718void
1719scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1720{
1721 if (!control_flow_insn_p (after))
1722 {
1723 emit_insn_after (insns, after);
1724 return;
1725 }
1726
1727 basic_block bb = BLOCK_FOR_INSN (after);
1728 edge e = find_fallthru_edge (bb->succs);
1729 gcc_assert (e);
1730
1731 basic_block new_bb = split_edge (e);
1732 emit_insn_after (insns, BB_HEAD (new_bb));
1733}
1734
1735/* Make vector copies for all register REGNO definitions
1736 and replace its uses in a chain. */
1737
1738void
1739dimode_scalar_chain::make_vector_copies (unsigned regno)
1740{
1741 rtx reg = regno_reg_rtx[regno];
1742 rtx vreg = gen_reg_rtx (DImode);
1743 bool count_reg = false;
1744 df_ref ref;
1745
1746 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1747 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1748 {
1749 df_ref use;
1750
1751 /* Detect the count register of a shift instruction. */
1752 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1753 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1754 {
1755 rtx_insn *insn = DF_REF_INSN (use);
1756 rtx def_set = single_set (insn);
1757
1758 gcc_assert (def_set);
1759
1760 rtx src = SET_SRC (def_set);
1761
1762 if ((GET_CODE (src) == ASHIFT
1763 || GET_CODE (src) == ASHIFTRT
1764 || GET_CODE (src) == LSHIFTRT)
1765 && !CONST_INT_P (XEXP (src, 1))
1766 && reg_or_subregno (XEXP (src, 1)) == regno)
1767 count_reg = true;
1768 }
1769
1770 start_sequence ();
1771 if (count_reg)
1772 {
1773 rtx qreg = gen_lowpart (QImode, reg);
1774 rtx tmp = gen_reg_rtx (SImode);
1775
1776 if (TARGET_ZERO_EXTEND_WITH_AND
1777 && optimize_function_for_speed_p (cfun))
1778 {
1779 emit_move_insn (tmp, const0_rtx);
1780 emit_insn (gen_movstrictqi
1781 (gen_lowpart (QImode, tmp), qreg));
1782 }
1783 else
1784 emit_insn (gen_rtx_SET
1785 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1786
1787 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1788 {
1789 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1790 emit_move_insn (slot, tmp);
1791 tmp = copy_rtx (slot);
1792 }
1793
1794 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1795 }
1796 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1797 {
1798 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1799 emit_move_insn (adjust_address (tmp, SImode, 0),
1800 gen_rtx_SUBREG (SImode, reg, 0));
1801 emit_move_insn (adjust_address (tmp, SImode, 4),
1802 gen_rtx_SUBREG (SImode, reg, 4));
1803 emit_move_insn (vreg, tmp);
1804 }
1805 else if (TARGET_SSE4_1)
1806 {
1807 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1808 CONST0_RTX (V4SImode),
1809 gen_rtx_SUBREG (SImode, reg, 0)));
1810 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1811 gen_rtx_SUBREG (V4SImode, vreg, 0),
1812 gen_rtx_SUBREG (SImode, reg, 4),
1813 GEN_INT (2)));
1814 }
1815 else
1816 {
1817 rtx tmp = gen_reg_rtx (DImode);
1818 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1819 CONST0_RTX (V4SImode),
1820 gen_rtx_SUBREG (SImode, reg, 0)));
1821 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1822 CONST0_RTX (V4SImode),
1823 gen_rtx_SUBREG (SImode, reg, 4)));
1824 emit_insn (gen_vec_interleave_lowv4si
1825 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1826 gen_rtx_SUBREG (V4SImode, vreg, 0),
1827 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1828 }
1829 rtx_insn *seq = get_insns ();
1830 end_sequence ();
1831 rtx_insn *insn = DF_REF_INSN (ref);
1832 emit_conversion_insns (seq, insn);
1833
1834 if (dump_file)
1835 fprintf (dump_file,
1836 " Copied r%d to a vector register r%d for insn %d\n",
1837 regno, REGNO (vreg), INSN_UID (insn));
1838 }
1839
1840 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1841 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1842 {
1843 rtx_insn *insn = DF_REF_INSN (ref);
1844 if (count_reg)
1845 {
1846 rtx def_set = single_set (insn);
1847 gcc_assert (def_set);
1848
1849 rtx src = SET_SRC (def_set);
1850
1851 if ((GET_CODE (src) == ASHIFT
1852 || GET_CODE (src) == ASHIFTRT
1853 || GET_CODE (src) == LSHIFTRT)
1854 && !CONST_INT_P (XEXP (src, 1))
1855 && reg_or_subregno (XEXP (src, 1)) == regno)
1856 XEXP (src, 1) = vreg;
1857 }
1858 else
1859 replace_with_subreg_in_insn (insn, reg, vreg);
1860
1861 if (dump_file)
1862 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1863 regno, REGNO (vreg), INSN_UID (insn));
1864 }
1865}
1866
1867/* Convert all definitions of register REGNO
1868 and fix its uses. Scalar copies may be created
1869 in case register is used in not convertible insn. */
1870
1871void
1872dimode_scalar_chain::convert_reg (unsigned regno)
1873{
1874 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1875 rtx reg = regno_reg_rtx[regno];
1876 rtx scopy = NULL_RTX;
1877 df_ref ref;
1878 bitmap conv;
1879
1880 conv = BITMAP_ALLOC (NULL);
1881 bitmap_copy (conv, insns);
1882
1883 if (scalar_copy)
1884 scopy = gen_reg_rtx (DImode);
1885
1886 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1887 {
1888 rtx_insn *insn = DF_REF_INSN (ref);
1889 rtx def_set = single_set (insn);
1890 rtx src = SET_SRC (def_set);
1891 rtx reg = DF_REF_REG (ref);
1892
1893 if (!MEM_P (src))
1894 {
1895 replace_with_subreg_in_insn (insn, reg, reg);
1896 bitmap_clear_bit (conv, INSN_UID (insn));
1897 }
1898
1899 if (scalar_copy)
1900 {
1901 start_sequence ();
1902 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1903 {
1904 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1905 emit_move_insn (tmp, reg);
1906 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1907 adjust_address (tmp, SImode, 0));
1908 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1909 adjust_address (tmp, SImode, 4));
1910 }
1911 else if (TARGET_SSE4_1)
1912 {
1913 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1914 emit_insn
1915 (gen_rtx_SET
1916 (gen_rtx_SUBREG (SImode, scopy, 0),
1917 gen_rtx_VEC_SELECT (SImode,
1918 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1919
1920 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1921 emit_insn
1922 (gen_rtx_SET
1923 (gen_rtx_SUBREG (SImode, scopy, 4),
1924 gen_rtx_VEC_SELECT (SImode,
1925 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1926 }
1927 else
1928 {
1929 rtx vcopy = gen_reg_rtx (V2DImode);
1930 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1931 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1932 gen_rtx_SUBREG (SImode, vcopy, 0));
1933 emit_move_insn (vcopy,
1934 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1935 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1936 gen_rtx_SUBREG (SImode, vcopy, 0));
1937 }
1938 rtx_insn *seq = get_insns ();
1939 end_sequence ();
1940 emit_conversion_insns (seq, insn);
1941
1942 if (dump_file)
1943 fprintf (dump_file,
1944 " Copied r%d to a scalar register r%d for insn %d\n",
1945 regno, REGNO (scopy), INSN_UID (insn));
1946 }
1947 }
1948
1949 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1950 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1951 {
1952 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1953 {
1954 rtx_insn *insn = DF_REF_INSN (ref);
1955
1956 rtx def_set = single_set (insn);
1957 gcc_assert (def_set);
1958
1959 rtx src = SET_SRC (def_set);
1960 rtx dst = SET_DEST (def_set);
1961
1962 if ((GET_CODE (src) == ASHIFT
1963 || GET_CODE (src) == ASHIFTRT
1964 || GET_CODE (src) == LSHIFTRT)
1965 && !CONST_INT_P (XEXP (src, 1))
1966 && reg_or_subregno (XEXP (src, 1)) == regno)
1967 {
1968 rtx tmp2 = gen_reg_rtx (V2DImode);
1969
1970 start_sequence ();
1971
1972 if (TARGET_SSE4_1)
1973 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1974 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1975 else
1976 {
1977 rtx vec_cst
1978 = gen_rtx_CONST_VECTOR (V2DImode,
1979 gen_rtvec (2, GEN_INT (0xff),
1980 const0_rtx));
1981 vec_cst
1982 = validize_mem (force_const_mem (V2DImode, vec_cst));
1983
1984 emit_insn (gen_rtx_SET
1985 (tmp2,
1986 gen_rtx_AND (V2DImode,
1987 gen_rtx_SUBREG (V2DImode, reg, 0),
1988 vec_cst)));
1989 }
1990 rtx_insn *seq = get_insns ();
1991 end_sequence ();
1992
1993 emit_insn_before (seq, insn);
1994
1995 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
1996 }
1997 else if (!MEM_P (dst) || !REG_P (src))
1998 replace_with_subreg_in_insn (insn, reg, reg);
1999
2000 bitmap_clear_bit (conv, INSN_UID (insn));
2001 }
2002 }
2003 /* Skip debug insns and uninitialized uses. */
2004 else if (DF_REF_CHAIN (ref)
2005 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2006 {
2007 gcc_assert (scopy);
2008 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2009 df_insn_rescan (DF_REF_INSN (ref));
2010 }
2011
2012 BITMAP_FREE (conv);
2013}
2014
2015/* Convert operand OP in INSN. We should handle
2016 memory operands and uninitialized registers.
2017 All other register uses are converted during
2018 registers conversion. */
2019
2020void
2021dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2022{
2023 *op = copy_rtx_if_shared (*op);
2024
2025 if (GET_CODE (*op) == NOT)
2026 {
2027 convert_op (&XEXP (*op, 0), insn);
2028 PUT_MODE (*op, V2DImode);
2029 }
2030 else if (MEM_P (*op))
2031 {
2032 rtx tmp = gen_reg_rtx (DImode);
2033
2034 emit_insn_before (gen_move_insn (tmp, *op), insn);
2035 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2036
2037 if (dump_file)
2038 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2039 INSN_UID (insn), REGNO (tmp));
2040 }
2041 else if (REG_P (*op))
2042 {
2043 /* We may have not converted register usage in case
2044 this register has no definition. Otherwise it
2045 should be converted in convert_reg. */
2046 df_ref ref;
2047 FOR_EACH_INSN_USE (ref, insn)
2048 if (DF_REF_REGNO (ref) == REGNO (*op))
2049 {
2050 gcc_assert (!DF_REF_CHAIN (ref));
2051 break;
2052 }
2053 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2054 }
2055 else if (CONST_INT_P (*op))
2056 {
2057 rtx vec_cst;
2058 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2059
2060 /* Prefer all ones vector in case of -1. */
2061 if (constm1_operand (*op, GET_MODE (*op)))
2062 vec_cst = CONSTM1_RTX (V2DImode);
2063 else
2064 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2065 gen_rtvec (2, *op, const0_rtx));
2066
2067 if (!standard_sse_constant_p (vec_cst, V2DImode))
2068 {
2069 start_sequence ();
2070 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2071 rtx_insn *seq = get_insns ();
2072 end_sequence ();
2073 emit_insn_before (seq, insn);
2074 }
2075
2076 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2077 *op = tmp;
2078 }
2079 else
2080 {
2081 gcc_assert (SUBREG_P (*op));
2082 gcc_assert (GET_MODE (*op) == V2DImode);
2083 }
2084}
2085
2086/* Convert INSN to vector mode. */
2087
2088void
2089dimode_scalar_chain::convert_insn (rtx_insn *insn)
2090{
2091 rtx def_set = single_set (insn);
2092 rtx src = SET_SRC (def_set);
2093 rtx dst = SET_DEST (def_set);
2094 rtx subreg;
2095
2096 if (MEM_P (dst) && !REG_P (src))
2097 {
2098 /* There are no scalar integer instructions and therefore
2099 temporary register usage is required. */
2100 rtx tmp = gen_reg_rtx (DImode);
2101 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2102 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2103 }
2104
2105 switch (GET_CODE (src))
2106 {
2107 case ASHIFT:
2108 case ASHIFTRT:
2109 case LSHIFTRT:
2110 convert_op (&XEXP (src, 0), insn);
2111 PUT_MODE (src, V2DImode);
2112 break;
2113
2114 case PLUS:
2115 case MINUS:
2116 case IOR:
2117 case XOR:
2118 case AND:
2119 convert_op (&XEXP (src, 0), insn);
2120 convert_op (&XEXP (src, 1), insn);
2121 PUT_MODE (src, V2DImode);
2122 break;
2123
2124 case NEG:
2125 src = XEXP (src, 0);
2126 convert_op (&src, insn);
2127 subreg = gen_reg_rtx (V2DImode);
2128 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2129 src = gen_rtx_MINUS (V2DImode, subreg, src);
2130 break;
2131
2132 case NOT:
2133 src = XEXP (src, 0);
2134 convert_op (&src, insn);
2135 subreg = gen_reg_rtx (V2DImode);
2136 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2137 src = gen_rtx_XOR (V2DImode, src, subreg);
2138 break;
2139
2140 case MEM:
2141 if (!REG_P (dst))
2142 convert_op (&src, insn);
2143 break;
2144
2145 case REG:
2146 if (!MEM_P (dst))
2147 convert_op (&src, insn);
2148 break;
2149
2150 case SUBREG:
2151 gcc_assert (GET_MODE (src) == V2DImode);
2152 break;
2153
2154 case COMPARE:
2155 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2156
2157 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2158 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2159
2160 if (REG_P (src))
2161 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2162 else
2163 subreg = copy_rtx_if_shared (src);
2164 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2165 copy_rtx_if_shared (subreg),
2166 copy_rtx_if_shared (subreg)),
2167 insn);
2168 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2169 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2170 copy_rtx_if_shared (src)),
2171 UNSPEC_PTEST);
2172 break;
2173
2174 case CONST_INT:
2175 convert_op (&src, insn);
2176 break;
2177
2178 default:
2179 gcc_unreachable ();
2180 }
2181
2182 SET_SRC (def_set) = src;
2183 SET_DEST (def_set) = dst;
2184
2185 /* Drop possible dead definitions. */
2186 PATTERN (insn) = def_set;
2187
2188 INSN_CODE (insn) = -1;
2189 recog_memoized (insn);
2190 df_insn_rescan (insn);
2191}
2192
2193/* Fix uses of converted REG in debug insns. */
2194
2195void
2196timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2197{
2198 if (!flag_var_tracking)
2199 return;
2200
2201 df_ref ref, next;
2202 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2203 {
2204 rtx_insn *insn = DF_REF_INSN (ref);
2205 /* Make sure the next ref is for a different instruction,
2206 so that we're not affected by the rescan. */
2207 next = DF_REF_NEXT_REG (ref);
2208 while (next && DF_REF_INSN (next) == insn)
2209 next = DF_REF_NEXT_REG (next);
2210
2211 if (DEBUG_INSN_P (insn))
2212 {
2213 /* It may be a debug insn with a TImode variable in
2214 register. */
2215 bool changed = false;
2216 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2217 {
2218 rtx *loc = DF_REF_LOC (ref);
2219 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2220 {
2221 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2222 changed = true;
2223 }
2224 }
2225 if (changed)
2226 df_insn_rescan (insn);
2227 }
2228 }
2229}
2230
2231/* Convert INSN from TImode to V1T1mode. */
2232
2233void
2234timode_scalar_chain::convert_insn (rtx_insn *insn)
2235{
2236 rtx def_set = single_set (insn);
2237 rtx src = SET_SRC (def_set);
2238 rtx dst = SET_DEST (def_set);
2239
2240 switch (GET_CODE (dst))
2241 {
2242 case REG:
2243 {
2244 rtx tmp = find_reg_equal_equiv_note (insn);
2245 if (tmp)
2246 PUT_MODE (XEXP (tmp, 0), V1TImode);
2247 PUT_MODE (dst, V1TImode);
2248 fix_debug_reg_uses (dst);
2249 }
2250 break;
2251 case MEM:
2252 PUT_MODE (dst, V1TImode);
2253 break;
2254
2255 default:
2256 gcc_unreachable ();
2257 }
2258
2259 switch (GET_CODE (src))
2260 {
2261 case REG:
2262 PUT_MODE (src, V1TImode);
2263 /* Call fix_debug_reg_uses only if SRC is never defined. */
2264 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2265 fix_debug_reg_uses (src);
2266 break;
2267
2268 case MEM:
2269 PUT_MODE (src, V1TImode);
2270 break;
2271
2272 case CONST_WIDE_INT:
2273 if (NONDEBUG_INSN_P (insn))
2274 {
2275 /* Since there are no instructions to store 128-bit constant,
2276 temporary register usage is required. */
2277 rtx tmp = gen_reg_rtx (V1TImode);
2278 start_sequence ();
2279 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2280 src = validize_mem (force_const_mem (V1TImode, src));
2281 rtx_insn *seq = get_insns ();
2282 end_sequence ();
2283 if (seq)
2284 emit_insn_before (seq, insn);
2285 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2286 dst = tmp;
2287 }
2288 break;
2289
2290 case CONST_INT:
2291 switch (standard_sse_constant_p (src, TImode))
2292 {
2293 case 1:
2294 src = CONST0_RTX (GET_MODE (dst));
2295 break;
2296 case 2:
2297 src = CONSTM1_RTX (GET_MODE (dst));
2298 break;
2299 default:
2300 gcc_unreachable ();
2301 }
2302 if (NONDEBUG_INSN_P (insn))
2303 {
2304 rtx tmp = gen_reg_rtx (V1TImode);
2305 /* Since there are no instructions to store standard SSE
2306 constant, temporary register usage is required. */
2307 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2308 dst = tmp;
2309 }
2310 break;
2311
2312 default:
2313 gcc_unreachable ();
2314 }
2315
2316 SET_SRC (def_set) = src;
2317 SET_DEST (def_set) = dst;
2318
2319 /* Drop possible dead definitions. */
2320 PATTERN (insn) = def_set;
2321
2322 INSN_CODE (insn) = -1;
2323 recog_memoized (insn);
2324 df_insn_rescan (insn);
2325}
2326
2327void
2328dimode_scalar_chain::convert_registers ()
2329{
2330 bitmap_iterator bi;
2331 unsigned id;
2332
2333 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2334 convert_reg (id);
2335
2336 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2337 make_vector_copies (id);
2338}
2339
2340/* Convert whole chain creating required register
2341 conversions and copies. */
2342
2343int
2344scalar_chain::convert ()
2345{
2346 bitmap_iterator bi;
2347 unsigned id;
2348 int converted_insns = 0;
2349
2350 if (!dbg_cnt (stv_conversion))
2351 return 0;
2352
2353 if (dump_file)
2354 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2355
2356 convert_registers ();
2357
2358 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2359 {
2360 convert_insn (DF_INSN_UID_GET (id)->insn);
2361 converted_insns++;
2362 }
2363
2364 return converted_insns;
2365}
2366
2367/* Main STV pass function. Find and convert scalar
2368 instructions into vector mode when profitable. */
2369
2370static unsigned int
2371convert_scalars_to_vector ()
2372{
2373 basic_block bb;
2374 bitmap candidates;
2375 int converted_insns = 0;
2376
2377 bitmap_obstack_initialize (NULL);
2378 candidates = BITMAP_ALLOC (NULL);
2379
2380 calculate_dominance_info (CDI_DOMINATORS);
2381 df_set_flags (DF_DEFER_INSN_RESCAN);
2382 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2383 df_md_add_problem ();
2384 df_analyze ();
2385
2386 /* Find all instructions we want to convert into vector mode. */
2387 if (dump_file)
2388 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2389
2390 FOR_EACH_BB_FN (bb, cfun)
2391 {
2392 rtx_insn *insn;
2393 FOR_BB_INSNS (bb, insn)
2394 if (scalar_to_vector_candidate_p (insn))
2395 {
2396 if (dump_file)
2397 fprintf (dump_file, " insn %d is marked as a candidate\n",
2398 INSN_UID (insn));
2399
2400 bitmap_set_bit (candidates, INSN_UID (insn));
2401 }
2402 }
2403
2404 remove_non_convertible_regs (candidates);
2405
2406 if (bitmap_empty_p (candidates))
2407 if (dump_file)
2408 fprintf (dump_file, "There are no candidates for optimization.\n");
2409
2410 while (!bitmap_empty_p (candidates))
2411 {
2412 unsigned uid = bitmap_first_set_bit (candidates);
2413 scalar_chain *chain;
2414
2415 if (TARGET_64BIT)
2416 chain = new timode_scalar_chain;
2417 else
2418 chain = new dimode_scalar_chain;
2419
2420 /* Find instructions chain we want to convert to vector mode.
2421 Check all uses and definitions to estimate all required
2422 conversions. */
2423 chain->build (candidates, uid);
2424
2425 if (chain->compute_convert_gain () > 0)
2426 converted_insns += chain->convert ();
2427 else
2428 if (dump_file)
2429 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2430 chain->chain_id);
2431
2432 delete chain;
2433 }
2434
2435 if (dump_file)
2436 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2437
2438 BITMAP_FREE (candidates);
2439 bitmap_obstack_release (NULL);
2440 df_process_deferred_rescans ();
2441
2442 /* Conversion means we may have 128bit register spills/fills
2443 which require aligned stack. */
2444 if (converted_insns)
2445 {
2446 if (crtl->stack_alignment_needed < 128)
2447 crtl->stack_alignment_needed = 128;
2448 if (crtl->stack_alignment_estimated < 128)
2449 crtl->stack_alignment_estimated = 128;
2450 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2451 if (TARGET_64BIT)
2452 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2453 parm; parm = DECL_CHAIN (parm))
2454 {
2455 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2456 continue;
2457 if (DECL_RTL_SET_P (parm)
2458 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2459 {
2460 rtx r = DECL_RTL (parm);
2461 if (REG_P (r))
2462 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2463 }
2464 if (DECL_INCOMING_RTL (parm)
2465 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2466 {
2467 rtx r = DECL_INCOMING_RTL (parm);
2468 if (REG_P (r))
2469 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2470 }
2471 }
2472 }
2473
2474 return 0;
2475}
2476
2477namespace {
2478
2479const pass_data pass_data_insert_vzeroupper =
2480{
2481 RTL_PASS, /* type */
2482 "vzeroupper", /* name */
2483 OPTGROUP_NONE, /* optinfo_flags */
2484 TV_MACH_DEP, /* tv_id */
2485 0, /* properties_required */
2486 0, /* properties_provided */
2487 0, /* properties_destroyed */
2488 0, /* todo_flags_start */
2489 TODO_df_finish, /* todo_flags_finish */
2490};
2491
2492class pass_insert_vzeroupper : public rtl_opt_pass
2493{
2494public:
2495 pass_insert_vzeroupper(gcc::context *ctxt)
2496 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2497 {}
2498
2499 /* opt_pass methods: */
2500 virtual bool gate (function *)
2501 {
2502 return TARGET_AVX
2503 && TARGET_VZEROUPPER && flag_expensive_optimizations
2504 && !optimize_size;
2505 }
2506
2507 virtual unsigned int execute (function *)
2508 {
2509 return rest_of_handle_insert_vzeroupper ();
2510 }
2511
2512}; // class pass_insert_vzeroupper
2513
2514const pass_data pass_data_stv =
2515{
2516 RTL_PASS, /* type */
2517 "stv", /* name */
2518 OPTGROUP_NONE, /* optinfo_flags */
2519 TV_MACH_DEP, /* tv_id */
2520 0, /* properties_required */
2521 0, /* properties_provided */
2522 0, /* properties_destroyed */
2523 0, /* todo_flags_start */
2524 TODO_df_finish, /* todo_flags_finish */
2525};
2526
2527class pass_stv : public rtl_opt_pass
2528{
2529public:
2530 pass_stv (gcc::context *ctxt)
2531 : rtl_opt_pass (pass_data_stv, ctxt),
2532 timode_p (false)
2533 {}
2534
2535 /* opt_pass methods: */
2536 virtual bool gate (function *)
2537 {
2538 return (timode_p == !!TARGET_64BIT
2539 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2540 }
2541
2542 virtual unsigned int execute (function *)
2543 {
2544 return convert_scalars_to_vector ();
2545 }
2546
2547 opt_pass *clone ()
2548 {
2549 return new pass_stv (m_ctxt);
2550 }
2551
2552 void set_pass_param (unsigned int n, bool param)
2553 {
2554 gcc_assert (n == 0);
2555 timode_p = param;
2556 }
2557
2558private:
2559 bool timode_p;
2560}; // class pass_stv
2561
2562} // anon namespace
2563
2564rtl_opt_pass *
2565make_pass_insert_vzeroupper (gcc::context *ctxt)
2566{
2567 return new pass_insert_vzeroupper (ctxt);
2568}
2569
2570rtl_opt_pass *
2571make_pass_stv (gcc::context *ctxt)
2572{
2573 return new pass_stv (ctxt);
2574}
2575
2576/* Inserting ENDBRANCH instructions. */
2577
2578static unsigned int
2579rest_of_insert_endbranch (void)
2580{
2581 timevar_push (TV_MACH_DEP);
2582
2583 rtx cet_eb;
2584 rtx_insn *insn;
2585 basic_block bb;
2586
2587 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2588 absent among function attributes. Later an optimization will be
2589 introduced to make analysis if an address of a static function is
2590 taken. A static function whose address is not taken will get a
2591 nocf_check attribute. This will allow to reduce the number of EB. */
2592
2593 if (!lookup_attribute ("nocf_check",
2594 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2595 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2596 {
2597 cet_eb = gen_nop_endbr ();
2598
2599 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2600 insn = BB_HEAD (bb);
2601 emit_insn_before (cet_eb, insn);
2602 }
2603
2604 bb = 0;
2605 FOR_EACH_BB_FN (bb, cfun)
2606 {
2607 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2608 insn = NEXT_INSN (insn))
2609 {
2610 if (INSN_P (insn) && GET_CODE (insn) == CALL_INSN)
2611 {
2612 rtx_insn *next_insn = insn;
2613
2614 while ((next_insn != BB_END (bb))
2615 && (DEBUG_INSN_P (NEXT_INSN (next_insn))
2616 || NOTE_P (NEXT_INSN (next_insn))
2617 || BARRIER_P (NEXT_INSN (next_insn))))
2618 next_insn = NEXT_INSN (next_insn);
2619
2620 /* Generate ENDBRANCH after CALL, which can return more than
2621 twice, setjmp-like functions. */
2622 if (find_reg_note (insn, REG_SETJMP, NULL) != NULL)
2623 {
2624 cet_eb = gen_nop_endbr ();
2625 emit_insn_after (cet_eb, next_insn);
2626 }
2627 continue;
2628 }
2629
2630 if (INSN_P (insn) && JUMP_P (insn) && flag_cet_switch)
2631 {
2632 rtx target = JUMP_LABEL (insn);
2633 if (target == NULL_RTX || ANY_RETURN_P (target))
2634 continue;
2635
2636 /* Check the jump is a switch table. */
2637 rtx_insn *label = as_a<rtx_insn *> (target);
2638 rtx_insn *table = next_insn (label);
2639 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2640 continue;
2641
2642 /* For the indirect jump find out all places it jumps and insert
2643 ENDBRANCH there. It should be done under a special flag to
2644 control ENDBRANCH generation for switch stmts. */
2645 edge_iterator ei;
2646 edge e;
2647 basic_block dest_blk;
2648
2649 FOR_EACH_EDGE (e, ei, bb->succs)
2650 {
2651 rtx_insn *insn;
2652
2653 dest_blk = e->dest;
2654 insn = BB_HEAD (dest_blk);
2655 gcc_assert (LABEL_P (insn));
2656 cet_eb = gen_nop_endbr ();
2657 emit_insn_after (cet_eb, insn);
2658 }
2659 continue;
2660 }
2661
2662 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2663 || (NOTE_P (insn)
2664 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2665/* TODO. Check /s bit also. */
2666 {
2667 cet_eb = gen_nop_endbr ();
2668 emit_insn_after (cet_eb, insn);
2669 continue;
2670 }
2671 }
2672 }
2673
2674 timevar_pop (TV_MACH_DEP);
2675 return 0;
2676}
2677
2678namespace {
2679
2680const pass_data pass_data_insert_endbranch =
2681{
2682 RTL_PASS, /* type. */
2683 "cet", /* name. */
2684 OPTGROUP_NONE, /* optinfo_flags. */
2685 TV_MACH_DEP, /* tv_id. */
2686 0, /* properties_required. */
2687 0, /* properties_provided. */
2688 0, /* properties_destroyed. */
2689 0, /* todo_flags_start. */
2690 0, /* todo_flags_finish. */
2691};
2692
2693class pass_insert_endbranch : public rtl_opt_pass
2694{
2695public:
2696 pass_insert_endbranch (gcc::context *ctxt)
2697 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2698 {}
2699
2700 /* opt_pass methods: */
2701 virtual bool gate (function *)
2702 {
2703 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2704 }
2705
2706 virtual unsigned int execute (function *)
2707 {
2708 return rest_of_insert_endbranch ();
2709 }
2710
2711}; // class pass_insert_endbranch
2712
2713} // anon namespace
2714
2715rtl_opt_pass *
2716make_pass_insert_endbranch (gcc::context *ctxt)
2717{
2718 return new pass_insert_endbranch (ctxt);
2719}
2720
2721/* Return true if a red-zone is in use. */
2722
2723bool
2724ix86_using_red_zone (void)
2725{
2726 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2727}
2728
2729/* Return a string that documents the current -m options. The caller is
2730 responsible for freeing the string. */
2731
2732static char *
2733ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2734 int flags, int flags2,
2735 const char *arch, const char *tune,
2736 enum fpmath_unit fpmath, bool add_nl_p)
2737{
2738 struct ix86_target_opts
2739 {
2740 const char *option; /* option string */
2741 HOST_WIDE_INT mask; /* isa mask options */
2742 };
2743
2744 /* This table is ordered so that options like -msse4.2 that imply other
2745 ISAs come first. Target string will be displayed in the same order. */
2746 static struct ix86_target_opts isa2_opts[] =
2747 {
2748 { "-mmpx", OPTION_MASK_ISA_MPX },
2749 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 },
2750 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI },
2751 { "-mvaes", OPTION_MASK_ISA_VAES },
2752 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2753 { "-msgx", OPTION_MASK_ISA_SGX },
2754 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2755 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2756 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2757 { "-mibt", OPTION_MASK_ISA_IBT },
2758 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2759 };
2760 static struct ix86_target_opts isa_opts[] =
2761 {
2762 { "-mgfni", OPTION_MASK_ISA_GFNI },
2763 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2764 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2765 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2766 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2767 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2768 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2769 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2770 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2771 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2772 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2773 { "-mfma", OPTION_MASK_ISA_FMA },
2774 { "-mxop", OPTION_MASK_ISA_XOP },
2775 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2776 { "-mf16c", OPTION_MASK_ISA_F16C },
2777 { "-mavx", OPTION_MASK_ISA_AVX },
2778/* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2779 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2780 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2781 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2782 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2783 { "-msse3", OPTION_MASK_ISA_SSE3 },
2784 { "-maes", OPTION_MASK_ISA_AES },
2785 { "-msha", OPTION_MASK_ISA_SHA },
2786 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2787 { "-msse2", OPTION_MASK_ISA_SSE2 },
2788 { "-msse", OPTION_MASK_ISA_SSE },
2789 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2790 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2791 { "-mmmx", OPTION_MASK_ISA_MMX },
2792 { "-mrtm", OPTION_MASK_ISA_RTM },
2793 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2794 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2795 { "-madx", OPTION_MASK_ISA_ADX },
2796 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2797 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2798 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2799 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2800 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2801 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2802 { "-mabm", OPTION_MASK_ISA_ABM },
2803 { "-mbmi", OPTION_MASK_ISA_BMI },
2804 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2805 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2806 { "-mtbm", OPTION_MASK_ISA_TBM },
2807 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2808 { "-mcx16", OPTION_MASK_ISA_CX16 },
2809 { "-msahf", OPTION_MASK_ISA_SAHF },
2810 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2811 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2812 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2813 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2814 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2815 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2816 { "-mpku", OPTION_MASK_ISA_PKU },
2817 { "-mlwp", OPTION_MASK_ISA_LWP },
2818 { "-mhle", OPTION_MASK_ISA_HLE },
2819 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2820 { "-mclwb", OPTION_MASK_ISA_CLWB }
2821 };
2822
2823 /* Flag options. */
2824 static struct ix86_target_opts flag_opts[] =
2825 {
2826 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2827 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2828 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2829 { "-m80387", MASK_80387 },
2830 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2831 { "-malign-double", MASK_ALIGN_DOUBLE },
2832 { "-mcld", MASK_CLD },
2833 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2834 { "-mieee-fp", MASK_IEEE_FP },
2835 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2836 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2837 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2838 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2839 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2840 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2841 { "-mno-red-zone", MASK_NO_RED_ZONE },
2842 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2843 { "-mrecip", MASK_RECIP },
2844 { "-mrtd", MASK_RTD },
2845 { "-msseregparm", MASK_SSEREGPARM },
2846 { "-mstack-arg-probe", MASK_STACK_PROBE },
2847 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2848 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2849 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2850 { "-mvzeroupper", MASK_VZEROUPPER },
2851 { "-mstv", MASK_STV },
2852 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2853 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2854 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2855 };
2856
2857 /* Additional flag options. */
2858 static struct ix86_target_opts flag2_opts[] =
2859 {
2860 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }
2861 };
2862
2863 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2864 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2865
2866 char isa_other[40];
2867 char isa2_other[40];
2868 char flags_other[40];
2869 char flags2_other[40];
2870 unsigned num = 0;
2871 unsigned i, j;
2872 char *ret;
2873 char *ptr;
2874 size_t len;
2875 size_t line_len;
2876 size_t sep_len;
2877 const char *abi;
2878
2879 memset (opts, '\0', sizeof (opts));
2880
2881 /* Add -march= option. */
2882 if (arch)
2883 {
2884 opts[num][0] = "-march=";
2885 opts[num++][1] = arch;
2886 }
2887
2888 /* Add -mtune= option. */
2889 if (tune)
2890 {
2891 opts[num][0] = "-mtune=";
2892 opts[num++][1] = tune;
2893 }
2894
2895 /* Add -m32/-m64/-mx32. */
2896 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2897 {
2898 if ((isa & OPTION_MASK_ABI_64) != 0)
2899 abi = "-m64";
2900 else
2901 abi = "-mx32";
2902 isa &= ~ (OPTION_MASK_ISA_64BIT
2903 | OPTION_MASK_ABI_64
2904 | OPTION_MASK_ABI_X32);
2905 }
2906 else
2907 abi = "-m32";
2908 opts[num++][0] = abi;
2909
2910 /* Pick out the options in isa2 options. */
2911 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2912 {
2913 if ((isa2 & isa2_opts[i].mask) != 0)
2914 {
2915 opts[num++][0] = isa2_opts[i].option;
2916 isa2 &= ~ isa2_opts[i].mask;
2917 }
2918 }
2919
2920 if (isa2 && add_nl_p)
2921 {
2922 opts[num++][0] = isa2_other;
2923 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2924 }
2925
2926 /* Pick out the options in isa options. */
2927 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2928 {
2929 if ((isa & isa_opts[i].mask) != 0)
2930 {
2931 opts[num++][0] = isa_opts[i].option;
2932 isa &= ~ isa_opts[i].mask;
2933 }
2934 }
2935
2936 if (isa && add_nl_p)
2937 {
2938 opts[num++][0] = isa_other;
2939 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2940 }
2941
2942 /* Add flag options. */
2943 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2944 {
2945 if ((flags & flag_opts[i].mask) != 0)
2946 {
2947 opts[num++][0] = flag_opts[i].option;
2948 flags &= ~ flag_opts[i].mask;
2949 }
2950 }
2951
2952 if (flags && add_nl_p)
2953 {
2954 opts[num++][0] = flags_other;
2955 sprintf (flags_other, "(other flags: %#x)", flags);
2956 }
2957
2958 /* Add additional flag options. */
2959 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2960 {
2961 if ((flags2 & flag2_opts[i].mask) != 0)
2962 {
2963 opts[num++][0] = flag2_opts[i].option;
2964 flags2 &= ~ flag2_opts[i].mask;
2965 }
2966 }
2967
2968 if (flags2 && add_nl_p)
2969 {
2970 opts[num++][0] = flags2_other;
2971 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2972 }
2973
2974 /* Add -fpmath= option. */
2975 if (fpmath)
2976 {
2977 opts[num][0] = "-mfpmath=";
2978 switch ((int) fpmath)
2979 {
2980 case FPMATH_387:
2981 opts[num++][1] = "387";
2982 break;
2983
2984 case FPMATH_SSE:
2985 opts[num++][1] = "sse";
2986 break;
2987
2988 case FPMATH_387 | FPMATH_SSE:
2989 opts[num++][1] = "sse+387";
2990 break;
2991
2992 default:
2993 gcc_unreachable ();
2994 }
2995 }
2996
2997 /* Any options? */
2998 if (num == 0)
2999 return NULL;
3000
3001 gcc_assert (num < ARRAY_SIZE (opts));
3002
3003 /* Size the string. */
3004 len = 0;
3005 sep_len = (add_nl_p) ? 3 : 1;
3006 for (i = 0; i < num; i++)
3007 {
3008 len += sep_len;
3009 for (j = 0; j < 2; j++)
3010 if (opts[i][j])
3011 len += strlen (opts[i][j]);
3012 }
3013
3014 /* Build the string. */
3015 ret = ptr = (char *) xmalloc (len);
3016 line_len = 0;
3017
3018 for (i = 0; i < num; i++)
3019 {
3020 size_t len2[2];
3021
3022 for (j = 0; j < 2; j++)
3023 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3024
3025 if (i != 0)
3026 {
3027 *ptr++ = ' ';
3028 line_len++;
3029
3030 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3031 {
3032 *ptr++ = '\\';
3033 *ptr++ = '\n';
3034 line_len = 0;
3035 }
3036 }
3037
3038 for (j = 0; j < 2; j++)
3039 if (opts[i][j])
3040 {
3041 memcpy (ptr, opts[i][j], len2[j]);
3042 ptr += len2[j];
3043 line_len += len2[j];
3044 }
3045 }
3046
3047 *ptr = '\0';
3048 gcc_assert (ret + len >= ptr);
3049
3050 return ret;
3051}
3052
3053/* Return true, if profiling code should be emitted before
3054 prologue. Otherwise it returns false.
3055 Note: For x86 with "hotfix" it is sorried. */
3056static bool
3057ix86_profile_before_prologue (void)
3058{
3059 return flag_fentry != 0;
3060}
3061
3062/* Function that is callable from the debugger to print the current
3063 options. */
3064void ATTRIBUTE_UNUSED
3065ix86_debug_options (void)
3066{
3067 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3068 target_flags, ix86_target_flags,
3069 ix86_arch_string,ix86_tune_string,
3070 ix86_fpmath, true);
3071
3072 if (opts)
3073 {
3074 fprintf (stderr, "%s\n\n", opts);
3075 free (opts);
3076 }
3077 else
3078 fputs ("<no options>\n\n", stderr);
3079
3080 return;
3081}
3082
3083/* Return true if T is one of the bytes we should avoid with
3084 -fmitigate-rop. */
3085
3086static bool
3087ix86_rop_should_change_byte_p (int t)
3088{
3089 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3090}
3091
3092static const char *stringop_alg_names[] = {
3093#define DEF_ENUM
3094#define DEF_ALG(alg, name) #name,
3095#include "stringop.def"
3096#undef DEF_ENUM
3097#undef DEF_ALG
3098};
3099
3100/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3101 The string is of the following form (or comma separated list of it):
3102
3103 strategy_alg:max_size:[align|noalign]
3104
3105 where the full size range for the strategy is either [0, max_size] or
3106 [min_size, max_size], in which min_size is the max_size + 1 of the
3107 preceding range. The last size range must have max_size == -1.
3108
3109 Examples:
3110
3111 1.
3112 -mmemcpy-strategy=libcall:-1:noalign
3113
3114 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3115
3116
3117 2.
3118 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3119
3120 This is to tell the compiler to use the following strategy for memset
3121 1) when the expected size is between [1, 16], use rep_8byte strategy;
3122 2) when the size is between [17, 2048], use vector_loop;
3123 3) when the size is > 2048, use libcall. */
3124
3125struct stringop_size_range
3126{
3127 int max;
3128 stringop_alg alg;
3129 bool noalign;
3130};
3131
3132static void
3133ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3134{
3135 const struct stringop_algs *default_algs;
3136 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3137 char *curr_range_str, *next_range_str;
3138 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3139 int i = 0, n = 0;
3140
3141 if (is_memset)
3142 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3143 else
3144 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3145
3146 curr_range_str = strategy_str;
3147
3148 do
3149 {
3150 int maxs;
3151 char alg_name[128];
3152 char align[16];
3153 next_range_str = strchr (curr_range_str, ',');
3154 if (next_range_str)
3155 *next_range_str++ = '\0';
3156
3157 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3158 alg_name, &maxs, align))
3159 {
3160 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3161 return;
3162 }
3163
3164 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3165 {
3166 error ("size ranges of option %qs should be increasing", opt);
3167 return;
3168 }
3169
3170 for (i = 0; i < last_alg; i++)
3171 if (!strcmp (alg_name, stringop_alg_names[i]))
3172 break;
3173
3174 if (i == last_alg)
3175 {
3176 error ("wrong strategy name %qs specified for option %qs",
3177 alg_name, opt);
3178
3179 auto_vec <const char *> candidates;
3180 for (i = 0; i < last_alg; i++)
3181 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3182 candidates.safe_push (stringop_alg_names[i]);
3183
3184 char *s;
3185 const char *hint
3186 = candidates_list_and_hint (alg_name, s, candidates);
3187 if (hint)
3188 inform (input_location,
3189 "valid arguments to %qs are: %s; did you mean %qs?",
3190 opt, s, hint);
3191 else
3192 inform (input_location, "valid arguments to %qs are: %s",
3193 opt, s);
3194 XDELETEVEC (s);
3195 return;
3196 }
3197
3198 if ((stringop_alg) i == rep_prefix_8_byte
3199 && !TARGET_64BIT)
3200 {
3201 /* rep; movq isn't available in 32-bit code. */
3202 error ("strategy name %qs specified for option %qs "
3203 "not supported for 32-bit code", alg_name, opt);
3204 return;
3205 }
3206
3207 input_ranges[n].max = maxs;
3208 input_ranges[n].alg = (stringop_alg) i;
3209 if (!strcmp (align, "align"))
3210 input_ranges[n].noalign = false;
3211 else if (!strcmp (align, "noalign"))
3212 input_ranges[n].noalign = true;
3213 else
3214 {
3215 error ("unknown alignment %qs specified for option %qs", align, opt);
3216 return;
3217 }
3218 n++;
3219 curr_range_str = next_range_str;
3220 }
3221 while (curr_range_str);
3222
3223 if (input_ranges[n - 1].max != -1)
3224 {
3225 error ("the max value for the last size range should be -1"
3226 " for option %qs", opt);
3227 return;
3228 }
3229
3230 if (n > MAX_STRINGOP_ALGS)
3231 {
3232 error ("too many size ranges specified in option %qs", opt);
3233 return;
3234 }
3235
3236 /* Now override the default algs array. */
3237 for (i = 0; i < n; i++)
3238 {
3239 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3240 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3241 = input_ranges[i].alg;
3242 *const_cast<int *>(&default_algs->size[i].noalign)
3243 = input_ranges[i].noalign;
3244 }
3245}
3246
3247
3248/* parse -mtune-ctrl= option. When DUMP is true,
3249 print the features that are explicitly set. */
3250
3251static void
3252parse_mtune_ctrl_str (bool dump)
3253{
3254 if (!ix86_tune_ctrl_string)
3255 return;
3256
3257 char *next_feature_string = NULL;
3258 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3259 char *orig = curr_feature_string;
3260 int i;
3261 do
3262 {
3263 bool clear = false;
3264
3265 next_feature_string = strchr (curr_feature_string, ',');
3266 if (next_feature_string)
3267 *next_feature_string++ = '\0';
3268 if (*curr_feature_string == '^')
3269 {
3270 curr_feature_string++;
3271 clear = true;
3272 }
3273 for (i = 0; i < X86_TUNE_LAST; i++)
3274 {
3275 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3276 {
3277 ix86_tune_features[i] = !clear;
3278 if (dump)
3279 fprintf (stderr, "Explicitly %s feature %s\n",
3280 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3281 break;
3282 }
3283 }
3284 if (i == X86_TUNE_LAST)
3285 error ("unknown parameter to option -mtune-ctrl: %s",
3286 clear ? curr_feature_string - 1 : curr_feature_string);
3287 curr_feature_string = next_feature_string;
3288 }
3289 while (curr_feature_string);
3290 free (orig);
3291}
3292
3293/* Helper function to set ix86_tune_features. IX86_TUNE is the
3294 processor type. */
3295
3296static void
3297set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3298{
3299 unsigned int ix86_tune_mask = 1u << ix86_tune;
3300 int i;
3301
3302 for (i = 0; i < X86_TUNE_LAST; ++i)
3303 {
3304 if (ix86_tune_no_default)
3305 ix86_tune_features[i] = 0;
3306 else
3307 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3308 }
3309
3310 if (dump)
3311 {
3312 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3313 for (i = 0; i < X86_TUNE_LAST; i++)
3314 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3315 ix86_tune_features[i] ? "on" : "off");
3316 }
3317
3318 parse_mtune_ctrl_str (dump);
3319}
3320
3321
3322/* Default align_* from the processor table. */
3323
3324static void
3325ix86_default_align (struct gcc_options *opts)
3326{
3327 if (opts->x_align_loops == 0)
3328 {
3329 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3330 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3331 }
3332 if (opts->x_align_jumps == 0)
3333 {
3334 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3335 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3336 }
3337 if (opts->x_align_functions == 0)
3338 {
3339 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3340 }
3341}
3342
3343/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3344
3345static void
3346ix86_override_options_after_change (void)
3347{
3348 ix86_default_align (&global_options);
3349}
3350
3351/* Override various settings based on options. If MAIN_ARGS_P, the
3352 options are from the command line, otherwise they are from
3353 attributes. Return true if there's an error related to march
3354 option. */
3355
3356static bool
3357ix86_option_override_internal (bool main_args_p,
3358 struct gcc_options *opts,
3359 struct gcc_options *opts_set)
3360{
3361 int i;
3362 unsigned int ix86_arch_mask;
3363 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3364
3365#define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3366#define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3367#define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3368#define PTA_ABM (HOST_WIDE_INT_1 << 3)
3369#define PTA_AES (HOST_WIDE_INT_1 << 4)
3370#define PTA_AVX (HOST_WIDE_INT_1 << 5)
3371#define PTA_BMI (HOST_WIDE_INT_1 << 6)
3372#define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3373#define PTA_F16C (HOST_WIDE_INT_1 << 8)
3374#define PTA_FMA (HOST_WIDE_INT_1 << 9)
3375#define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3376#define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3377#define PTA_LWP (HOST_WIDE_INT_1 << 12)
3378#define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3379#define PTA_MMX (HOST_WIDE_INT_1 << 14)
3380#define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3381#define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3382#define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3383#define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3384#define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3385#define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3386#define PTA_SSE (HOST_WIDE_INT_1 << 21)
3387#define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3388#define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3389#define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3390#define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3391#define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3392#define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3393#define PTA_TBM (HOST_WIDE_INT_1 << 28)
3394#define PTA_XOP (HOST_WIDE_INT_1 << 29)
3395#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3396#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3397#define PTA_RTM (HOST_WIDE_INT_1 << 32)
3398#define PTA_HLE (HOST_WIDE_INT_1 << 33)
3399#define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3400#define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3401#define PTA_ADX (HOST_WIDE_INT_1 << 36)
3402#define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3403#define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3404#define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3405#define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3406#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3407#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3408#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3409#define PTA_MPX (HOST_WIDE_INT_1 << 44)
3410#define PTA_SHA (HOST_WIDE_INT_1 << 45)
3411#define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3412#define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3413#define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3414#define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3415#define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3416#define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3417#define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3418#define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3419#define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3420#define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3421#define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3422#define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3423#define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3424#define PTA_PKU (HOST_WIDE_INT_1 << 59)
3425#define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3426#define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3427#define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3428#define PTA_SGX (HOST_WIDE_INT_1 << 63)
3429
3430#define PTA_CORE2 \
3431 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3432 | PTA_CX16 | PTA_FXSR)
3433#define PTA_NEHALEM \
3434 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3435#define PTA_WESTMERE \
3436 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3437#define PTA_SANDYBRIDGE \
3438 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3439#define PTA_IVYBRIDGE \
3440 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3441#define PTA_HASWELL \
3442 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3443 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3444#define PTA_BROADWELL \
3445 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3446#define PTA_SKYLAKE \
3447 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3448#define PTA_SKYLAKE_AVX512 \
3449 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3450 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_CLWB)
3451#define PTA_CANNONLAKE \
3452 (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA)
3453#define PTA_KNL \
3454 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3455#define PTA_BONNELL \
3456 (PTA_CORE2 | PTA_MOVBE)
3457#define PTA_SILVERMONT \
3458 (PTA_WESTMERE | PTA_MOVBE)
3459#define PTA_KNM \
3460 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3461
3462/* if this reaches 64, need to widen struct pta flags below */
3463
3464 static struct pta
3465 {
3466 const char *const name; /* processor name or nickname. */
3467 const enum processor_type processor;
3468 const enum attr_cpu schedule;
3469 const unsigned HOST_WIDE_INT flags;
3470 }
3471 const processor_alias_table[] =
3472 {
3473 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3474 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3475 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3476 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3477 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3478 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3479 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3480 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3481 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3482 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3483 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3484 PTA_MMX | PTA_SSE | PTA_FXSR},
3485 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3486 PTA_MMX | PTA_SSE | PTA_FXSR},
3487 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3488 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3489 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3490 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3491 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3492 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3493 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3494 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3495 PTA_MMX | PTA_SSE | PTA_FXSR},
3496 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3497 PTA_MMX | PTA_SSE | PTA_FXSR},
3498 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3499 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3500 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3501 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3502 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3503 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3504 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3505 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3506 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3507 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3508 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3509 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3510 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3511 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3512 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3513 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3514 PTA_SANDYBRIDGE},
3515 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3516 PTA_SANDYBRIDGE},
3517 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3518 PTA_IVYBRIDGE},
3519 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3520 PTA_IVYBRIDGE},
3521 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3522 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3523 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3524 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3525 {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
3526 PTA_SKYLAKE_AVX512},
3527 {"cannonlake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_CANNONLAKE},
3528 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3529 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3530 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3531 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3532 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3533 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3534 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3535 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3536 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3537 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3538 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3539 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3540 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3541 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3542 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3544 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3546 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3548 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3549 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3550 {"x86-64", PROCESSOR_K8, CPU_K8,
3551 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3552 {"eden-x2", PROCESSOR_K8, CPU_K8,
3553 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3554 {"nano", PROCESSOR_K8, CPU_K8,
3555 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3556 | PTA_SSSE3 | PTA_FXSR},
3557 {"nano-1000", PROCESSOR_K8, CPU_K8,
3558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3559 | PTA_SSSE3 | PTA_FXSR},
3560 {"nano-2000", PROCESSOR_K8, CPU_K8,
3561 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3562 | PTA_SSSE3 | PTA_FXSR},
3563 {"nano-3000", PROCESSOR_K8, CPU_K8,
3564 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3565 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3566 {"nano-x2", PROCESSOR_K8, CPU_K8,
3567 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3568 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3569 {"eden-x4", PROCESSOR_K8, CPU_K8,
3570 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3571 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3572 {"nano-x4", PROCESSOR_K8, CPU_K8,
3573 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3574 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3575 {"k8", PROCESSOR_K8, CPU_K8,
3576 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3577 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3578 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3579 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3580 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3581 {"opteron", PROCESSOR_K8, CPU_K8,
3582 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3583 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3584 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3585 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3586 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3587 {"athlon64", PROCESSOR_K8, CPU_K8,
3588 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3589 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3590 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3591 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3592 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3593 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3594 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3595 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3596 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3597 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3598 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3599 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3600 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3601 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3602 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3603 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3604 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3605 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3606 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3607 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3608 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3609 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3610 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3611 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3612 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3613 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3614 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3615 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3616 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3617 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3618 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3619 | PTA_XSAVEOPT | PTA_FSGSBASE},
3620 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3622 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3623 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3624 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3625 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3626 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3627 | PTA_MOVBE | PTA_MWAITX},
3628 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3629 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3630 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3631 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3632 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3633 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3634 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3635 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3636 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3637 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3638 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3639 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3640 | PTA_FXSR | PTA_XSAVE},
3641 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3642 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3643 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3644 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3645 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3646 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3647
3648 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3649 PTA_64BIT
3650 | PTA_HLE /* flags are only used for -march switch. */ },
3651 };
3652
3653 /* -mrecip options. */
3654 static struct
3655 {
3656 const char *string; /* option name */
3657 unsigned int mask; /* mask bits to set */
3658 }
3659 const recip_options[] =
3660 {
3661 { "all", RECIP_MASK_ALL },
3662 { "none", RECIP_MASK_NONE },
3663 { "div", RECIP_MASK_DIV },
3664 { "sqrt", RECIP_MASK_SQRT },
3665 { "vec-div", RECIP_MASK_VEC_DIV },
3666 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3667 };
3668
3669 int const pta_size = ARRAY_SIZE (processor_alias_table);
3670
3671 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3672 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3673 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3674 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3675#ifdef TARGET_BI_ARCH
3676 else
3677 {
3678#if TARGET_BI_ARCH == 1
3679 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3680 is on and OPTION_MASK_ABI_X32 is off. We turn off
3681 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3682 -mx32. */
3683 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3684 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3685#else
3686 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3687 on and OPTION_MASK_ABI_64 is off. We turn off
3688 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3689 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3690 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3691 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3692 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3693#endif
3694 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3695 && TARGET_IAMCU_P (opts->x_target_flags))
3696 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3697 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3698 }
3699#endif
3700
3701 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3702 {
3703 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3704 OPTION_MASK_ABI_64 for TARGET_X32. */
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3706 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3707 }
3708 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3709 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3710 | OPTION_MASK_ABI_X32
3711 | OPTION_MASK_ABI_64);
3712 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3713 {
3714 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3715 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3717 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3718 }
3719
3720#ifdef SUBTARGET_OVERRIDE_OPTIONS
3721 SUBTARGET_OVERRIDE_OPTIONS;
3722#endif
3723
3724#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3725 SUBSUBTARGET_OVERRIDE_OPTIONS;
3726#endif
3727
3728 /* -fPIC is the default for x86_64. */
3729 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3730 opts->x_flag_pic = 2;
3731
3732 /* Need to check -mtune=generic first. */
3733 if (opts->x_ix86_tune_string)
3734 {
3735 /* As special support for cross compilers we read -mtune=native
3736 as -mtune=generic. With native compilers we won't see the
3737 -mtune=native, as it was changed by the driver. */
3738 if (!strcmp (opts->x_ix86_tune_string, "native"))
3739 {
3740 opts->x_ix86_tune_string = "generic";
3741 }
3742 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3743 warning (OPT_Wdeprecated,
3744 main_args_p
3745 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3746 "or %<-mtune=generic%> instead as appropriate")
3747 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3748 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3749 " instead as appropriate"));
3750 }
3751 else
3752 {
3753 if (opts->x_ix86_arch_string)
3754 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3755 if (!opts->x_ix86_tune_string)
3756 {
3757 opts->x_ix86_tune_string
3758 = processor_target_table[TARGET_CPU_DEFAULT].name;
3759 ix86_tune_defaulted = 1;
3760 }
3761
3762 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3763 or defaulted. We need to use a sensible tune option. */
3764 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3765 {
3766 opts->x_ix86_tune_string = "generic";
3767 }
3768 }
3769
3770 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3771 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3772 {
3773 /* rep; movq isn't available in 32-bit code. */
3774 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3775 opts->x_ix86_stringop_alg = no_stringop;
3776 }
3777
3778 if (!opts->x_ix86_arch_string)
3779 opts->x_ix86_arch_string
3780 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3781 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3782 else
3783 ix86_arch_specified = 1;
3784
3785 if (opts_set->x_ix86_pmode)
3786 {
3787 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3788 && opts->x_ix86_pmode == PMODE_SI)
3789 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3790 && opts->x_ix86_pmode == PMODE_DI))
3791 error ("address mode %qs not supported in the %s bit mode",
3792 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3793 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3794 }
3795 else
3796 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3797 ? PMODE_DI : PMODE_SI;
3798
3799 if (!opts_set->x_ix86_abi)
3800 opts->x_ix86_abi = DEFAULT_ABI;
3801
3802 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3803 error ("-mabi=ms not supported with X32 ABI");
3804 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3805
3806 /* For targets using ms ABI enable ms-extensions, if not
3807 explicit turned off. For non-ms ABI we turn off this
3808 option. */
3809 if (!opts_set->x_flag_ms_extensions)
3810 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3811
3812 if (opts_set->x_ix86_cmodel)
3813 {
3814 switch (opts->x_ix86_cmodel)
3815 {
3816 case CM_SMALL:
3817 case CM_SMALL_PIC:
3818 if (opts->x_flag_pic)
3819 opts->x_ix86_cmodel = CM_SMALL_PIC;
3820 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3821 error ("code model %qs not supported in the %s bit mode",
3822 "small", "32");
3823 break;
3824
3825 case CM_MEDIUM:
3826 case CM_MEDIUM_PIC:
3827 if (opts->x_flag_pic)
3828 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3829 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3830 error ("code model %qs not supported in the %s bit mode",
3831 "medium", "32");
3832 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3833 error ("code model %qs not supported in x32 mode",
3834 "medium");
3835 break;
3836
3837 case CM_LARGE:
3838 case CM_LARGE_PIC:
3839 if (opts->x_flag_pic)
3840 opts->x_ix86_cmodel = CM_LARGE_PIC;
3841 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3842 error ("code model %qs not supported in the %s bit mode",
3843 "large", "32");
3844 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3845 error ("code model %qs not supported in x32 mode",
3846 "large");
3847 break;
3848
3849 case CM_32:
3850 if (opts->x_flag_pic)
3851 error ("code model %s does not support PIC mode", "32");
3852 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3853 error ("code model %qs not supported in the %s bit mode",
3854 "32", "64");
3855 break;
3856
3857 case CM_KERNEL:
3858 if (opts->x_flag_pic)
3859 {
3860 error ("code model %s does not support PIC mode", "kernel");
3861 opts->x_ix86_cmodel = CM_32;
3862 }
3863 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3864 error ("code model %qs not supported in the %s bit mode",
3865 "kernel", "32");
3866 break;
3867
3868 default:
3869 gcc_unreachable ();
3870 }
3871 }
3872 else
3873 {
3874 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3875 use of rip-relative addressing. This eliminates fixups that
3876 would otherwise be needed if this object is to be placed in a
3877 DLL, and is essentially just as efficient as direct addressing. */
3878 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3879 && (TARGET_RDOS || TARGET_PECOFF))
3880 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3881 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3882 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3883 else
3884 opts->x_ix86_cmodel = CM_32;
3885 }
3886 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3887 {
3888 error ("-masm=intel not supported in this configuration");
3889 opts->x_ix86_asm_dialect = ASM_ATT;
3890 }
3891 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3892 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3893 sorry ("%i-bit mode not compiled in",
3894 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3895
3896 for (i = 0; i < pta_size; i++)
3897 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3898 {
3899 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3900 {
3901 error (main_args_p
3902 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3903 "switch")
3904 : G_("%<generic%> CPU can be used only for "
3905 "%<target(\"tune=\")%> attribute"));
3906 return false;
3907 }
3908 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3909 {
3910 error (main_args_p
3911 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3912 "switch")
3913 : G_("%<intel%> CPU can be used only for "
3914 "%<target(\"tune=\")%> attribute"));
3915 return false;
3916 }
3917
3918 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3919 && !(processor_alias_table[i].flags & PTA_64BIT))
3920 {
3921 error ("CPU you selected does not support x86-64 "
3922 "instruction set");
3923 return false;
3924 }
3925
3926 ix86_schedule = processor_alias_table[i].schedule;
3927 ix86_arch = processor_alias_table[i].processor;
3928 /* Default cpu tuning to the architecture. */
3929 ix86_tune = ix86_arch;
3930
3931 if (processor_alias_table[i].flags & PTA_MMX
3932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3934 if (processor_alias_table[i].flags & PTA_3DNOW
3935 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3937 if (processor_alias_table[i].flags & PTA_3DNOW_A
3938 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3939 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3940 if (processor_alias_table[i].flags & PTA_SSE
3941 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3942 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3943 if (processor_alias_table[i].flags & PTA_SSE2
3944 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3945 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3946 if (processor_alias_table[i].flags & PTA_SSE3
3947 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3949 if (processor_alias_table[i].flags & PTA_SSSE3
3950 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3951 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3952 if (processor_alias_table[i].flags & PTA_SSE4_1
3953 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3954 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3955 if (processor_alias_table[i].flags & PTA_SSE4_2
3956 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3957 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3958 if (processor_alias_table[i].flags & PTA_AVX
3959 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3960 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3961 if (processor_alias_table[i].flags & PTA_AVX2
3962 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3963 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3964 if (processor_alias_table[i].flags & PTA_FMA
3965 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3966 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3967 if (processor_alias_table[i].flags & PTA_SSE4A
3968 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3969 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3970 if (processor_alias_table[i].flags & PTA_FMA4
3971 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3972 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3973 if (processor_alias_table[i].flags & PTA_XOP
3974 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3976 if (processor_alias_table[i].flags & PTA_LWP
3977 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3978 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3979 if (processor_alias_table[i].flags & PTA_ABM
3980 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3981 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3982 if (processor_alias_table[i].flags & PTA_BMI
3983 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3984 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3985 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3986 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3987 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3988 if (processor_alias_table[i].flags & PTA_TBM
3989 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3990 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3991 if (processor_alias_table[i].flags & PTA_BMI2
3992 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3993 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3994 if (processor_alias_table[i].flags & PTA_CX16
3995 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3996 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3997 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3998 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3999 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4000 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4001 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4002 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4003 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4004 if (processor_alias_table[i].flags & PTA_MOVBE
4005 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4006 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4007 if (processor_alias_table[i].flags & PTA_AES
4008 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4009 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4010 if (processor_alias_table[i].flags & PTA_SHA
4011 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4012 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4013 if (processor_alias_table[i].flags & PTA_PCLMUL
4014 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4015 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4016 if (processor_alias_table[i].flags & PTA_FSGSBASE
4017 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4018 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4019 if (processor_alias_table[i].flags & PTA_RDRND
4020 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4021 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4022 if (processor_alias_table[i].flags & PTA_F16C
4023 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4024 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4025 if (processor_alias_table[i].flags & PTA_RTM
4026 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4027 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4028 if (processor_alias_table[i].flags & PTA_HLE
4029 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4030 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4031 if (processor_alias_table[i].flags & PTA_PRFCHW
4032 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4033 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4034 if (processor_alias_table[i].flags & PTA_RDSEED
4035 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4036 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4037 if (processor_alias_table[i].flags & PTA_ADX
4038 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4039 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4040 if (processor_alias_table[i].flags & PTA_FXSR
4041 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4042 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4043 if (processor_alias_table[i].flags & PTA_XSAVE
4044 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4045 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4046 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4047 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4048 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4049 if (processor_alias_table[i].flags & PTA_AVX512F
4050 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4051 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4052 if (processor_alias_table[i].flags & PTA_AVX512ER
4053 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4054 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4055 if (processor_alias_table[i].flags & PTA_AVX512PF
4056 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4057 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4058 if (processor_alias_table[i].flags & PTA_AVX512CD
4059 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4060 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4061 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4062 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4063 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4064 if (processor_alias_table[i].flags & PTA_CLWB
4065 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4066 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4067 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4068 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4069 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4070 if (processor_alias_table[i].flags & PTA_CLZERO
4071 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4072 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4073 if (processor_alias_table[i].flags & PTA_XSAVEC
4074 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4075 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4076 if (processor_alias_table[i].flags & PTA_XSAVES
4077 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4078 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4079 if (processor_alias_table[i].flags & PTA_AVX512DQ
4080 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4081 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4082 if (processor_alias_table[i].flags & PTA_AVX512BW
4083 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4084 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4085 if (processor_alias_table[i].flags & PTA_AVX512VL
4086 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4087 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4088 if (processor_alias_table[i].flags & PTA_MPX
4089 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4090 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4091 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4092 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4093 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4094 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4095 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4096 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4097
4098 if (processor_alias_table[i].flags & PTA_AVX5124VNNIW
4099 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW))
4100 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4101 if (processor_alias_table[i].flags & PTA_AVX5124FMAPS
4102 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS))
4103 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4104 if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ
4105 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4106 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4107 if (processor_alias_table[i].flags & PTA_SGX
4108 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4109 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4110
4111 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4112 x86_prefetch_sse = true;
4113 if (processor_alias_table[i].flags & PTA_MWAITX
4114 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4115 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4116 if (processor_alias_table[i].flags & PTA_PKU
4117 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4118 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4119
4120 /* Don't enable x87 instructions if only
4121 general registers are allowed. */
4122 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4123 && !(opts_set->x_target_flags & MASK_80387))
4124 {
4125 if (processor_alias_table[i].flags & PTA_NO_80387)
4126 opts->x_target_flags &= ~MASK_80387;
4127 else
4128 opts->x_target_flags |= MASK_80387;
4129 }
4130 break;
4131 }
4132
4133 if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4134 error ("Intel MPX does not support x32");
4135
4136 if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4137 error ("Intel MPX does not support x32");
4138
4139 if (i == pta_size)
4140 {
4141 error (main_args_p
4142 ? G_("bad value (%qs) for %<-march=%> switch")
4143 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4144 opts->x_ix86_arch_string);
4145
4146 auto_vec <const char *> candidates;
4147 for (i = 0; i < pta_size; i++)
4148 if (strcmp (processor_alias_table[i].name, "generic")
4149 && strcmp (processor_alias_table[i].name, "intel")
4150 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4151 || (processor_alias_table[i].flags & PTA_64BIT)))
4152 candidates.safe_push (processor_alias_table[i].name);
4153
4154 char *s;
4155 const char *hint
4156 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4157 if (hint)
4158 inform (input_location,
4159 main_args_p
4160 ? G_("valid arguments to %<-march=%> switch are: "
4161 "%s; did you mean %qs?")
4162 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4163 "%s; did you mean %qs?"), s, hint);
4164 else
4165 inform (input_location,
4166 main_args_p
4167 ? G_("valid arguments to %<-march=%> switch are: %s")
4168 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4169 "are: %s"), s);
4170 XDELETEVEC (s);
4171 }
4172
4173 ix86_arch_mask = 1u << ix86_arch;
4174 for (i = 0; i < X86_ARCH_LAST; ++i)
4175 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4176
4177 for (i = 0; i < pta_size; i++)
4178 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4179 {
4180 ix86_schedule = processor_alias_table[i].schedule;
4181 ix86_tune = processor_alias_table[i].processor;
4182 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4183 {
4184 if (!(processor_alias_table[i].flags & PTA_64BIT))
4185 {
4186 if (ix86_tune_defaulted)
4187 {
4188 opts->x_ix86_tune_string = "x86-64";
4189 for (i = 0; i < pta_size; i++)
4190 if (! strcmp (opts->x_ix86_tune_string,
4191 processor_alias_table[i].name))
4192 break;
4193 ix86_schedule = processor_alias_table[i].schedule;
4194 ix86_tune = processor_alias_table[i].processor;
4195 }
4196 else
4197 error ("CPU you selected does not support x86-64 "
4198 "instruction set");
4199 }
4200 }
4201 /* Intel CPUs have always interpreted SSE prefetch instructions as
4202 NOPs; so, we can enable SSE prefetch instructions even when
4203 -mtune (rather than -march) points us to a processor that has them.
4204 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4205 higher processors. */
4206 if (TARGET_CMOV
4207 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4208 x86_prefetch_sse = true;
4209 break;
4210 }
4211
4212 if (ix86_tune_specified && i == pta_size)
4213 {
4214 error (main_args_p
4215 ? G_("bad value (%qs) for %<-mtune=%> switch")
4216 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4217 opts->x_ix86_tune_string);
4218
4219 auto_vec <const char *> candidates;
4220 for (i = 0; i < pta_size; i++)
4221 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4222 || (processor_alias_table[i].flags & PTA_64BIT))
4223 candidates.safe_push (processor_alias_table[i].name);
4224
4225 char *s;
4226 const char *hint
4227 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4228 if (hint)
4229 inform (input_location,
4230 main_args_p
4231 ? G_("valid arguments to %<-mtune=%> switch are: "
4232 "%s; did you mean %qs?")
4233 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4234 "%s; did you mean %qs?"), s, hint);
4235 else
4236 inform (input_location,
4237 main_args_p
4238 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4239 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4240 "are: %s"), s);
4241 XDELETEVEC (s);
4242 }
4243
4244 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4245
4246#ifndef USE_IX86_FRAME_POINTER
4247#define USE_IX86_FRAME_POINTER 0
4248#endif
4249
4250#ifndef USE_X86_64_FRAME_POINTER
4251#define USE_X86_64_FRAME_POINTER 0
4252#endif
4253
4254 /* Set the default values for switches whose default depends on TARGET_64BIT
4255 in case they weren't overwritten by command line options. */
4256 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4257 {
4258 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4259 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4260 if (opts->x_flag_asynchronous_unwind_tables
4261 && !opts_set->x_flag_unwind_tables
4262 && TARGET_64BIT_MS_ABI)
4263 opts->x_flag_unwind_tables = 1;
4264 if (opts->x_flag_asynchronous_unwind_tables == 2)
4265 opts->x_flag_unwind_tables
4266 = opts->x_flag_asynchronous_unwind_tables = 1;
4267 if (opts->x_flag_pcc_struct_return == 2)
4268 opts->x_flag_pcc_struct_return = 0;
4269 }
4270 else
4271 {
4272 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4273 opts->x_flag_omit_frame_pointer
4274 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4275 if (opts->x_flag_asynchronous_unwind_tables == 2)
4276 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4277 if (opts->x_flag_pcc_struct_return == 2)
4278 {
4279 /* Intel MCU psABI specifies that -freg-struct-return should
4280 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4281 we check -miamcu so that -freg-struct-return is always
4282 turned on if -miamcu is used. */
4283 if (TARGET_IAMCU_P (opts->x_target_flags))
4284 opts->x_flag_pcc_struct_return = 0;
4285 else
4286 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4287 }
4288 }
4289
4290 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4291 /* TODO: ix86_cost should be chosen at instruction or function granuality
4292 so for cold code we use size_cost even in !optimize_size compilation. */
4293 if (opts->x_optimize_size)
4294 ix86_cost = &ix86_size_cost;
4295 else
4296 ix86_cost = ix86_tune_cost;
4297
4298 /* Arrange to set up i386_stack_locals for all functions. */
4299 init_machine_status = ix86_init_machine_status;
4300
4301 /* Validate -mregparm= value. */
4302 if (opts_set->x_ix86_regparm)
4303 {
4304 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4305 warning (0, "-mregparm is ignored in 64-bit mode");
4306 else if (TARGET_IAMCU_P (opts->x_target_flags))
4307 warning (0, "-mregparm is ignored for Intel MCU psABI");
4308 if (opts->x_ix86_regparm > REGPARM_MAX)
4309 {
4310 error ("-mregparm=%d is not between 0 and %d",
4311 opts->x_ix86_regparm, REGPARM_MAX);
4312 opts->x_ix86_regparm = 0;
4313 }
4314 }
4315 if (TARGET_IAMCU_P (opts->x_target_flags)
4316 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4317 opts->x_ix86_regparm = REGPARM_MAX;
4318
4319 /* Default align_* from the processor table. */
4320 ix86_default_align (opts);
4321
4322 /* Provide default for -mbranch-cost= value. */
4323 if (!opts_set->x_ix86_branch_cost)
4324 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4325
4326 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4327 {
4328 opts->x_target_flags
4329 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4330
4331 /* Enable by default the SSE and MMX builtins. Do allow the user to
4332 explicitly disable any of these. In particular, disabling SSE and
4333 MMX for kernel code is extremely useful. */
4334 if (!ix86_arch_specified)
4335 opts->x_ix86_isa_flags
4336 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4337 | TARGET_SUBTARGET64_ISA_DEFAULT)
4338 & ~opts->x_ix86_isa_flags_explicit);
4339
4340 if (TARGET_RTD_P (opts->x_target_flags))
4341 warning (0,
4342 main_args_p
4343 ? G_("%<-mrtd%> is ignored in 64bit mode")
4344 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4345 }
4346 else
4347 {
4348 opts->x_target_flags
4349 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4350
4351 if (!ix86_arch_specified)
4352 opts->x_ix86_isa_flags
4353 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4354
4355 /* i386 ABI does not specify red zone. It still makes sense to use it
4356 when programmer takes care to stack from being destroyed. */
4357 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4358 opts->x_target_flags |= MASK_NO_RED_ZONE;
4359 }
4360
4361 /* Keep nonleaf frame pointers. */
4362 if (opts->x_flag_omit_frame_pointer)
4363 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4364 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4365 opts->x_flag_omit_frame_pointer = 1;
4366
4367 /* If we're doing fast math, we don't care about comparison order
4368 wrt NaNs. This lets us use a shorter comparison sequence. */
4369 if (opts->x_flag_finite_math_only)
4370 opts->x_target_flags &= ~MASK_IEEE_FP;
4371
4372 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4373 since the insns won't need emulation. */
4374 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4375 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4376
4377 /* Likewise, if the target doesn't have a 387, or we've specified
4378 software floating point, don't use 387 inline intrinsics. */
4379 if (!TARGET_80387_P (opts->x_target_flags))
4380 opts->