1 | /* Subroutines used for code generation on IA-32. |
2 | Copyright (C) 1988-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "rtl.h" |
25 | #include "tree.h" |
26 | #include "memmodel.h" |
27 | #include "gimple.h" |
28 | #include "cfghooks.h" |
29 | #include "cfgloop.h" |
30 | #include "df.h" |
31 | #include "tm_p.h" |
32 | #include "stringpool.h" |
33 | #include "expmed.h" |
34 | #include "optabs.h" |
35 | #include "regs.h" |
36 | #include "emit-rtl.h" |
37 | #include "recog.h" |
38 | #include "cgraph.h" |
39 | #include "diagnostic.h" |
40 | #include "cfgbuild.h" |
41 | #include "alias.h" |
42 | #include "fold-const.h" |
43 | #include "attribs.h" |
44 | #include "calls.h" |
45 | #include "stor-layout.h" |
46 | #include "varasm.h" |
47 | #include "output.h" |
48 | #include "insn-attr.h" |
49 | #include "flags.h" |
50 | #include "except.h" |
51 | #include "explow.h" |
52 | #include "expr.h" |
53 | #include "cfgrtl.h" |
54 | #include "common/common-target.h" |
55 | #include "langhooks.h" |
56 | #include "reload.h" |
57 | #include "gimplify.h" |
58 | #include "dwarf2.h" |
59 | #include "tm-constrs.h" |
60 | #include "params.h" |
61 | #include "cselib.h" |
62 | #include "sched-int.h" |
63 | #include "opts.h" |
64 | #include "tree-pass.h" |
65 | #include "context.h" |
66 | #include "pass_manager.h" |
67 | #include "target-globals.h" |
68 | #include "gimple-iterator.h" |
69 | #include "tree-vectorizer.h" |
70 | #include "shrink-wrap.h" |
71 | #include "builtins.h" |
72 | #include "rtl-iter.h" |
73 | #include "tree-iterator.h" |
74 | #include "tree-chkp.h" |
75 | #include "rtl-chkp.h" |
76 | #include "dbgcnt.h" |
77 | #include "case-cfn-macros.h" |
78 | #include "regrename.h" |
79 | #include "dojump.h" |
80 | #include "fold-const-call.h" |
81 | #include "tree-vrp.h" |
82 | #include "tree-ssanames.h" |
83 | #include "selftest.h" |
84 | #include "selftest-rtl.h" |
85 | #include "print-rtl.h" |
86 | #include "intl.h" |
87 | #include "ifcvt.h" |
88 | #include "symbol-summary.h" |
89 | #include "ipa-prop.h" |
90 | #include "ipa-fnsummary.h" |
91 | |
92 | /* This file should be included last. */ |
93 | #include "target-def.h" |
94 | |
95 | #include "x86-tune-costs.h" |
96 | |
97 | static rtx legitimize_dllimport_symbol (rtx, bool); |
98 | static rtx legitimize_pe_coff_extern_decl (rtx, bool); |
99 | static rtx legitimize_pe_coff_symbol (rtx, bool); |
100 | static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
101 | static bool ix86_save_reg (unsigned int, bool, bool); |
102 | static bool ix86_function_naked (const_tree); |
103 | static bool ix86_notrack_prefixed_insn_p (rtx); |
104 | static void ix86_emit_restore_reg_using_pop (rtx); |
105 | |
106 | |
107 | #ifndef CHECK_STACK_LIMIT |
108 | #define CHECK_STACK_LIMIT (-1) |
109 | #endif |
110 | |
111 | /* Return index of given mode in mult and division cost tables. */ |
112 | #define MODE_INDEX(mode) \ |
113 | ((mode) == QImode ? 0 \ |
114 | : (mode) == HImode ? 1 \ |
115 | : (mode) == SImode ? 2 \ |
116 | : (mode) == DImode ? 3 \ |
117 | : 4) |
118 | |
119 | |
120 | /* Set by -mtune. */ |
121 | const struct processor_costs *ix86_tune_cost = NULL; |
122 | |
123 | /* Set by -mtune or -Os. */ |
124 | const struct processor_costs *ix86_cost = NULL; |
125 | |
126 | /* Processor feature/optimization bitmasks. */ |
127 | #define m_386 (1U<<PROCESSOR_I386) |
128 | #define m_486 (1U<<PROCESSOR_I486) |
129 | #define m_PENT (1U<<PROCESSOR_PENTIUM) |
130 | #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT) |
131 | #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO) |
132 | #define m_PENT4 (1U<<PROCESSOR_PENTIUM4) |
133 | #define m_NOCONA (1U<<PROCESSOR_NOCONA) |
134 | #define m_P4_NOCONA (m_PENT4 | m_NOCONA) |
135 | #define m_CORE2 (1U<<PROCESSOR_CORE2) |
136 | #define m_NEHALEM (1U<<PROCESSOR_NEHALEM) |
137 | #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE) |
138 | #define m_HASWELL (1U<<PROCESSOR_HASWELL) |
139 | #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) |
140 | #define m_BONNELL (1U<<PROCESSOR_BONNELL) |
141 | #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT) |
142 | #define m_KNL (1U<<PROCESSOR_KNL) |
143 | #define m_KNM (1U<<PROCESSOR_KNM) |
144 | #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512) |
145 | #define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE) |
146 | #define m_INTEL (1U<<PROCESSOR_INTEL) |
147 | |
148 | #define m_GEODE (1U<<PROCESSOR_GEODE) |
149 | #define m_K6 (1U<<PROCESSOR_K6) |
150 | #define m_K6_GEODE (m_K6 | m_GEODE) |
151 | #define m_K8 (1U<<PROCESSOR_K8) |
152 | #define m_ATHLON (1U<<PROCESSOR_ATHLON) |
153 | #define m_ATHLON_K8 (m_K8 | m_ATHLON) |
154 | #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10) |
155 | #define m_BDVER1 (1U<<PROCESSOR_BDVER1) |
156 | #define m_BDVER2 (1U<<PROCESSOR_BDVER2) |
157 | #define m_BDVER3 (1U<<PROCESSOR_BDVER3) |
158 | #define m_BDVER4 (1U<<PROCESSOR_BDVER4) |
159 | #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1) |
160 | #define m_BTVER1 (1U<<PROCESSOR_BTVER1) |
161 | #define m_BTVER2 (1U<<PROCESSOR_BTVER2) |
162 | #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4) |
163 | #define m_BTVER (m_BTVER1 | m_BTVER2) |
164 | #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \ |
165 | | m_ZNVER1) |
166 | |
167 | #define m_GENERIC (1U<<PROCESSOR_GENERIC) |
168 | |
169 | const char* ix86_tune_feature_names[X86_TUNE_LAST] = { |
170 | #undef DEF_TUNE |
171 | #define DEF_TUNE(tune, name, selector) name, |
172 | #include "x86-tune.def" |
173 | #undef DEF_TUNE |
174 | }; |
175 | |
176 | /* Feature tests against the various tunings. */ |
177 | unsigned char ix86_tune_features[X86_TUNE_LAST]; |
178 | |
179 | /* Feature tests against the various tunings used to create ix86_tune_features |
180 | based on the processor mask. */ |
181 | static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
182 | #undef DEF_TUNE |
183 | #define DEF_TUNE(tune, name, selector) selector, |
184 | #include "x86-tune.def" |
185 | #undef DEF_TUNE |
186 | }; |
187 | |
188 | /* Feature tests against the various architecture variations. */ |
189 | unsigned char ix86_arch_features[X86_ARCH_LAST]; |
190 | |
191 | /* Feature tests against the various architecture variations, used to create |
192 | ix86_arch_features based on the processor mask. */ |
193 | static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { |
194 | /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */ |
195 | ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6), |
196 | |
197 | /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ |
198 | ~m_386, |
199 | |
200 | /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ |
201 | ~(m_386 | m_486), |
202 | |
203 | /* X86_ARCH_XADD: Exchange and add was added for 80486. */ |
204 | ~m_386, |
205 | |
206 | /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ |
207 | ~m_386, |
208 | }; |
209 | |
210 | /* In case the average insn count for single function invocation is |
211 | lower than this constant, emit fast (but longer) prologue and |
212 | epilogue code. */ |
213 | #define FAST_PROLOGUE_INSN_COUNT 20 |
214 | |
215 | /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
216 | static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
217 | static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
218 | static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
219 | |
220 | /* Array of the smallest class containing reg number REGNO, indexed by |
221 | REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
222 | |
223 | enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
224 | { |
225 | /* ax, dx, cx, bx */ |
226 | AREG, DREG, CREG, BREG, |
227 | /* si, di, bp, sp */ |
228 | SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
229 | /* FP registers */ |
230 | FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
231 | FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
232 | /* arg pointer */ |
233 | NON_Q_REGS, |
234 | /* flags, fpsr, fpcr, frame */ |
235 | NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
236 | /* SSE registers */ |
237 | SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
238 | SSE_REGS, SSE_REGS, |
239 | /* MMX registers */ |
240 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
241 | MMX_REGS, MMX_REGS, |
242 | /* REX registers */ |
243 | NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
244 | NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
245 | /* SSE REX registers */ |
246 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
247 | SSE_REGS, SSE_REGS, |
248 | /* AVX-512 SSE registers */ |
249 | EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
250 | EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
251 | EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
252 | EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
253 | /* Mask registers. */ |
254 | MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, |
255 | MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, |
256 | /* MPX bound registers */ |
257 | BND_REGS, BND_REGS, BND_REGS, BND_REGS, |
258 | }; |
259 | |
260 | /* The "default" register map used in 32bit mode. */ |
261 | |
262 | int const dbx_register_map[FIRST_PSEUDO_REGISTER] = |
263 | { |
264 | 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ |
265 | 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ |
266 | -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
267 | 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ |
268 | 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ |
269 | -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
270 | -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
271 | -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ |
272 | -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ |
273 | 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ |
274 | 101, 102, 103, 104, /* bound registers */ |
275 | }; |
276 | |
277 | /* The "default" register map used in 64bit mode. */ |
278 | |
279 | int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = |
280 | { |
281 | 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ |
282 | 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ |
283 | -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
284 | 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ |
285 | 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ |
286 | 8,9,10,11,12,13,14,15, /* extended integer registers */ |
287 | 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ |
288 | 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */ |
289 | 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */ |
290 | 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */ |
291 | 126, 127, 128, 129, /* bound registers */ |
292 | }; |
293 | |
294 | /* Define the register numbers to be used in Dwarf debugging information. |
295 | The SVR4 reference port C compiler uses the following register numbers |
296 | in its Dwarf output code: |
297 | 0 for %eax (gcc regno = 0) |
298 | 1 for %ecx (gcc regno = 2) |
299 | 2 for %edx (gcc regno = 1) |
300 | 3 for %ebx (gcc regno = 3) |
301 | 4 for %esp (gcc regno = 7) |
302 | 5 for %ebp (gcc regno = 6) |
303 | 6 for %esi (gcc regno = 4) |
304 | 7 for %edi (gcc regno = 5) |
305 | The following three DWARF register numbers are never generated by |
306 | the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
307 | believed these numbers have these meanings. |
308 | 8 for %eip (no gcc equivalent) |
309 | 9 for %eflags (gcc regno = 17) |
310 | 10 for %trapno (no gcc equivalent) |
311 | It is not at all clear how we should number the FP stack registers |
312 | for the x86 architecture. If the version of SDB on x86/svr4 were |
313 | a bit less brain dead with respect to floating-point then we would |
314 | have a precedent to follow with respect to DWARF register numbers |
315 | for x86 FP registers, but the SDB on x86/svr4 was so completely |
316 | broken with respect to FP registers that it is hardly worth thinking |
317 | of it as something to strive for compatibility with. |
318 | The version of x86/svr4 SDB I had does (partially) |
319 | seem to believe that DWARF register number 11 is associated with |
320 | the x86 register %st(0), but that's about all. Higher DWARF |
321 | register numbers don't seem to be associated with anything in |
322 | particular, and even for DWARF regno 11, SDB only seemed to under- |
323 | stand that it should say that a variable lives in %st(0) (when |
324 | asked via an `=' command) if we said it was in DWARF regno 11, |
325 | but SDB still printed garbage when asked for the value of the |
326 | variable in question (via a `/' command). |
327 | (Also note that the labels SDB printed for various FP stack regs |
328 | when doing an `x' command were all wrong.) |
329 | Note that these problems generally don't affect the native SVR4 |
330 | C compiler because it doesn't allow the use of -O with -g and |
331 | because when it is *not* optimizing, it allocates a memory |
332 | location for each floating-point variable, and the memory |
333 | location is what gets described in the DWARF AT_location |
334 | attribute for the variable in question. |
335 | Regardless of the severe mental illness of the x86/svr4 SDB, we |
336 | do something sensible here and we use the following DWARF |
337 | register numbers. Note that these are all stack-top-relative |
338 | numbers. |
339 | 11 for %st(0) (gcc regno = 8) |
340 | 12 for %st(1) (gcc regno = 9) |
341 | 13 for %st(2) (gcc regno = 10) |
342 | 14 for %st(3) (gcc regno = 11) |
343 | 15 for %st(4) (gcc regno = 12) |
344 | 16 for %st(5) (gcc regno = 13) |
345 | 17 for %st(6) (gcc regno = 14) |
346 | 18 for %st(7) (gcc regno = 15) |
347 | */ |
348 | int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = |
349 | { |
350 | 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ |
351 | 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ |
352 | -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
353 | 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ |
354 | 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ |
355 | -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
356 | -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
357 | -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ |
358 | -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ |
359 | 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ |
360 | 101, 102, 103, 104, /* bound registers */ |
361 | }; |
362 | |
363 | /* Define parameter passing and return registers. */ |
364 | |
365 | static int const x86_64_int_parameter_registers[6] = |
366 | { |
367 | DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
368 | }; |
369 | |
370 | static int const x86_64_ms_abi_int_parameter_registers[4] = |
371 | { |
372 | CX_REG, DX_REG, R8_REG, R9_REG |
373 | }; |
374 | |
375 | static int const x86_64_int_return_registers[4] = |
376 | { |
377 | AX_REG, DX_REG, DI_REG, SI_REG |
378 | }; |
379 | |
380 | /* Additional registers that are clobbered by SYSV calls. */ |
381 | |
382 | #define NUM_X86_64_MS_CLOBBERED_REGS 12 |
383 | static int const |
384 | [NUM_X86_64_MS_CLOBBERED_REGS] = |
385 | { |
386 | SI_REG, DI_REG, |
387 | XMM6_REG, XMM7_REG, |
388 | XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG, |
389 | XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG |
390 | }; |
391 | |
392 | enum xlogue_stub { |
393 | XLOGUE_STUB_SAVE, |
394 | XLOGUE_STUB_RESTORE, |
395 | XLOGUE_STUB_RESTORE_TAIL, |
396 | XLOGUE_STUB_SAVE_HFP, |
397 | XLOGUE_STUB_RESTORE_HFP, |
398 | XLOGUE_STUB_RESTORE_HFP_TAIL, |
399 | |
400 | XLOGUE_STUB_COUNT |
401 | }; |
402 | |
403 | enum xlogue_stub_sets { |
404 | XLOGUE_SET_ALIGNED, |
405 | XLOGUE_SET_ALIGNED_PLUS_8, |
406 | XLOGUE_SET_HFP_ALIGNED_OR_REALIGN, |
407 | XLOGUE_SET_HFP_ALIGNED_PLUS_8, |
408 | |
409 | XLOGUE_SET_COUNT |
410 | }; |
411 | |
412 | /* Register save/restore layout used by out-of-line stubs. */ |
413 | class xlogue_layout { |
414 | public: |
415 | struct reginfo |
416 | { |
417 | unsigned regno; |
418 | HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or |
419 | rsi) to where each register is stored. */ |
420 | }; |
421 | |
422 | unsigned get_nregs () const {return m_nregs;} |
423 | HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;} |
424 | |
425 | const reginfo &get_reginfo (unsigned reg) const |
426 | { |
427 | gcc_assert (reg < m_nregs); |
428 | return m_regs[reg]; |
429 | } |
430 | |
431 | static const char *get_stub_name (enum xlogue_stub stub, |
432 | unsigned ); |
433 | |
434 | /* Returns an rtx for the stub's symbol based upon |
435 | 1.) the specified stub (save, restore or restore_ret) and |
436 | 2.) the value of cfun->machine->call_ms2sysv_extra_regs and |
437 | 3.) rather or not stack alignment is being performed. */ |
438 | static rtx get_stub_rtx (enum xlogue_stub stub); |
439 | |
440 | /* Returns the amount of stack space (including padding) that the stub |
441 | needs to store registers based upon data in the machine_function. */ |
442 | HOST_WIDE_INT get_stack_space_used () const |
443 | { |
444 | const struct machine_function *m = cfun->machine; |
445 | unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1; |
446 | |
447 | gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS); |
448 | return m_regs[last_reg].offset + STUB_INDEX_OFFSET; |
449 | } |
450 | |
451 | /* Returns the offset for the base pointer used by the stub. */ |
452 | HOST_WIDE_INT get_stub_ptr_offset () const |
453 | { |
454 | return STUB_INDEX_OFFSET + m_stack_align_off_in; |
455 | } |
456 | |
457 | static const struct xlogue_layout &get_instance (); |
458 | static unsigned count_stub_managed_regs (); |
459 | static bool is_stub_managed_reg (unsigned regno, unsigned count); |
460 | |
461 | static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70; |
462 | static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS; |
463 | static const unsigned MAX_REGS = 18; |
464 | static const unsigned = MAX_REGS - MIN_REGS; |
465 | static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1; |
466 | static const unsigned STUB_NAME_MAX_LEN = 20; |
467 | static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT]; |
468 | static const unsigned REG_ORDER[MAX_REGS]; |
469 | static const unsigned REG_ORDER_REALIGN[MAX_REGS]; |
470 | |
471 | private: |
472 | xlogue_layout (); |
473 | xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp); |
474 | xlogue_layout (const xlogue_layout &); |
475 | |
476 | /* True if hard frame pointer is used. */ |
477 | bool m_hfp; |
478 | |
479 | /* Max number of register this layout manages. */ |
480 | unsigned m_nregs; |
481 | |
482 | /* Incoming offset from 16-byte alignment. */ |
483 | HOST_WIDE_INT m_stack_align_off_in; |
484 | |
485 | /* Register order and offsets. */ |
486 | struct reginfo m_regs[MAX_REGS]; |
487 | |
488 | /* Lazy-inited cache of symbol names for stubs. */ |
489 | static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] |
490 | [STUB_NAME_MAX_LEN]; |
491 | |
492 | static const xlogue_layout s_instances[XLOGUE_SET_COUNT]; |
493 | }; |
494 | |
495 | const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { |
496 | "savms64" , |
497 | "resms64" , |
498 | "resms64x" , |
499 | "savms64f" , |
500 | "resms64f" , |
501 | "resms64fx" |
502 | }; |
503 | |
504 | const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { |
505 | /* The below offset values are where each register is stored for the layout |
506 | relative to incoming stack pointer. The value of each m_regs[].offset will |
507 | be relative to the incoming base pointer (rax or rsi) used by the stub. |
508 | |
509 | s_instances: 0 1 2 3 |
510 | Offset: realigned or aligned + 8 |
511 | Register aligned aligned + 8 aligned w/HFP w/HFP */ |
512 | XMM15_REG, /* 0x10 0x18 0x10 0x18 */ |
513 | XMM14_REG, /* 0x20 0x28 0x20 0x28 */ |
514 | XMM13_REG, /* 0x30 0x38 0x30 0x38 */ |
515 | XMM12_REG, /* 0x40 0x48 0x40 0x48 */ |
516 | XMM11_REG, /* 0x50 0x58 0x50 0x58 */ |
517 | XMM10_REG, /* 0x60 0x68 0x60 0x68 */ |
518 | XMM9_REG, /* 0x70 0x78 0x70 0x78 */ |
519 | XMM8_REG, /* 0x80 0x88 0x80 0x88 */ |
520 | XMM7_REG, /* 0x90 0x98 0x90 0x98 */ |
521 | XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ |
522 | SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ |
523 | DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ |
524 | BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ |
525 | BP_REG, /* 0xc0 0xc8 N/A N/A */ |
526 | R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ |
527 | R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ |
528 | R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ |
529 | R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ |
530 | }; |
531 | |
532 | /* Instantiate static const values. */ |
533 | const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; |
534 | const unsigned xlogue_layout::MIN_REGS; |
535 | const unsigned xlogue_layout::MAX_REGS; |
536 | const unsigned xlogue_layout::; |
537 | const unsigned xlogue_layout::VARIANT_COUNT; |
538 | const unsigned xlogue_layout::STUB_NAME_MAX_LEN; |
539 | |
540 | /* Initialize xlogue_layout::s_stub_names to zero. */ |
541 | char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] |
542 | [STUB_NAME_MAX_LEN]; |
543 | |
544 | /* Instantiates all xlogue_layout instances. */ |
545 | const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { |
546 | xlogue_layout (0, false), |
547 | xlogue_layout (8, false), |
548 | xlogue_layout (0, true), |
549 | xlogue_layout (8, true) |
550 | }; |
551 | |
552 | /* Return an appropriate const instance of xlogue_layout based upon values |
553 | in cfun->machine and crtl. */ |
554 | const struct xlogue_layout & |
555 | xlogue_layout::get_instance () |
556 | { |
557 | enum xlogue_stub_sets stub_set; |
558 | bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; |
559 | |
560 | if (stack_realign_fp) |
561 | stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; |
562 | else if (frame_pointer_needed) |
563 | stub_set = aligned_plus_8 |
564 | ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 |
565 | : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; |
566 | else |
567 | stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; |
568 | |
569 | return s_instances[stub_set]; |
570 | } |
571 | |
572 | /* Determine how many clobbered registers can be saved by the stub. |
573 | Returns the count of registers the stub will save and restore. */ |
574 | unsigned |
575 | xlogue_layout::count_stub_managed_regs () |
576 | { |
577 | bool hfp = frame_pointer_needed || stack_realign_fp; |
578 | unsigned i, count; |
579 | unsigned regno; |
580 | |
581 | for (count = i = MIN_REGS; i < MAX_REGS; ++i) |
582 | { |
583 | regno = REG_ORDER[i]; |
584 | if (regno == BP_REG && hfp) |
585 | continue; |
586 | if (!ix86_save_reg (regno, false, false)) |
587 | break; |
588 | ++count; |
589 | } |
590 | return count; |
591 | } |
592 | |
593 | /* Determine if register REGNO is a stub managed register given the |
594 | total COUNT of stub managed registers. */ |
595 | bool |
596 | xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) |
597 | { |
598 | bool hfp = frame_pointer_needed || stack_realign_fp; |
599 | unsigned i; |
600 | |
601 | for (i = 0; i < count; ++i) |
602 | { |
603 | gcc_assert (i < MAX_REGS); |
604 | if (REG_ORDER[i] == BP_REG && hfp) |
605 | ++count; |
606 | else if (REG_ORDER[i] == regno) |
607 | return true; |
608 | } |
609 | return false; |
610 | } |
611 | |
612 | /* Constructor for xlogue_layout. */ |
613 | xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) |
614 | : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), |
615 | m_stack_align_off_in (stack_align_off_in) |
616 | { |
617 | HOST_WIDE_INT offset = stack_align_off_in; |
618 | unsigned i, j; |
619 | |
620 | for (i = j = 0; i < MAX_REGS; ++i) |
621 | { |
622 | unsigned regno = REG_ORDER[i]; |
623 | |
624 | if (regno == BP_REG && hfp) |
625 | continue; |
626 | if (SSE_REGNO_P (regno)) |
627 | { |
628 | offset += 16; |
629 | /* Verify that SSE regs are always aligned. */ |
630 | gcc_assert (!((stack_align_off_in + offset) & 15)); |
631 | } |
632 | else |
633 | offset += 8; |
634 | |
635 | m_regs[j].regno = regno; |
636 | m_regs[j++].offset = offset - STUB_INDEX_OFFSET; |
637 | } |
638 | gcc_assert (j == m_nregs); |
639 | } |
640 | |
641 | const char * |
642 | xlogue_layout::get_stub_name (enum xlogue_stub stub, |
643 | unsigned ) |
644 | { |
645 | const int have_avx = TARGET_AVX; |
646 | char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; |
647 | |
648 | /* Lazy init */ |
649 | if (!*name) |
650 | { |
651 | int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u" , |
652 | (have_avx ? "avx" : "sse" ), |
653 | STUB_BASE_NAMES[stub], |
654 | MIN_REGS + n_extra_regs); |
655 | gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); |
656 | } |
657 | |
658 | return name; |
659 | } |
660 | |
661 | /* Return rtx of a symbol ref for the entry point (based upon |
662 | cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ |
663 | rtx |
664 | xlogue_layout::get_stub_rtx (enum xlogue_stub stub) |
665 | { |
666 | const unsigned = cfun->machine->call_ms2sysv_extra_regs; |
667 | gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); |
668 | gcc_assert (stub < XLOGUE_STUB_COUNT); |
669 | gcc_assert (crtl->stack_realign_finalized); |
670 | |
671 | return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); |
672 | } |
673 | |
674 | /* Define the structure for the machine field in struct function. */ |
675 | |
676 | struct GTY(()) stack_local_entry { |
677 | unsigned short mode; |
678 | unsigned short n; |
679 | rtx rtl; |
680 | struct stack_local_entry *next; |
681 | }; |
682 | |
683 | /* Which cpu are we scheduling for. */ |
684 | enum attr_cpu ix86_schedule; |
685 | |
686 | /* Which cpu are we optimizing for. */ |
687 | enum processor_type ix86_tune; |
688 | |
689 | /* Which instruction set architecture to use. */ |
690 | enum processor_type ix86_arch; |
691 | |
692 | /* True if processor has SSE prefetch instruction. */ |
693 | unsigned char x86_prefetch_sse; |
694 | |
695 | /* -mstackrealign option */ |
696 | static const char ix86_force_align_arg_pointer_string[] |
697 | = "force_align_arg_pointer" ; |
698 | |
699 | static rtx (*ix86_gen_leave) (void); |
700 | static rtx (*ix86_gen_add3) (rtx, rtx, rtx); |
701 | static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); |
702 | static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); |
703 | static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); |
704 | static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); |
705 | static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); |
706 | static rtx (*ix86_gen_clzero) (rtx); |
707 | static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); |
708 | static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); |
709 | static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); |
710 | static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); |
711 | static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); |
712 | static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); |
713 | |
714 | /* Preferred alignment for stack boundary in bits. */ |
715 | unsigned int ix86_preferred_stack_boundary; |
716 | |
717 | /* Alignment for incoming stack boundary in bits specified at |
718 | command line. */ |
719 | static unsigned int ix86_user_incoming_stack_boundary; |
720 | |
721 | /* Default alignment for incoming stack boundary in bits. */ |
722 | static unsigned int ix86_default_incoming_stack_boundary; |
723 | |
724 | /* Alignment for incoming stack boundary in bits. */ |
725 | unsigned int ix86_incoming_stack_boundary; |
726 | |
727 | /* Calling abi specific va_list type nodes. */ |
728 | static GTY(()) tree sysv_va_list_type_node; |
729 | static GTY(()) tree ms_va_list_type_node; |
730 | |
731 | /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
732 | char internal_label_prefix[16]; |
733 | int internal_label_prefix_len; |
734 | |
735 | /* Fence to use after loop using movnt. */ |
736 | tree x86_mfence; |
737 | |
738 | /* Register class used for passing given 64bit part of the argument. |
739 | These represent classes as documented by the PS ABI, with the exception |
740 | of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
741 | use SF or DFmode move instead of DImode to avoid reformatting penalties. |
742 | |
743 | Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
744 | whenever possible (upper half does contain padding). */ |
745 | enum x86_64_reg_class |
746 | { |
747 | X86_64_NO_CLASS, |
748 | X86_64_INTEGER_CLASS, |
749 | X86_64_INTEGERSI_CLASS, |
750 | X86_64_SSE_CLASS, |
751 | X86_64_SSESF_CLASS, |
752 | X86_64_SSEDF_CLASS, |
753 | X86_64_SSEUP_CLASS, |
754 | X86_64_X87_CLASS, |
755 | X86_64_X87UP_CLASS, |
756 | X86_64_COMPLEX_X87_CLASS, |
757 | X86_64_MEMORY_CLASS |
758 | }; |
759 | |
760 | #define MAX_CLASSES 8 |
761 | |
762 | /* Table of constants used by fldpi, fldln2, etc.... */ |
763 | static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
764 | static bool ext_80387_constants_init; |
765 | |
766 | |
767 | static struct machine_function * ix86_init_machine_status (void); |
768 | static rtx ix86_function_value (const_tree, const_tree, bool); |
769 | static bool ix86_function_value_regno_p (const unsigned int); |
770 | static unsigned int ix86_function_arg_boundary (machine_mode, |
771 | const_tree); |
772 | static rtx ix86_static_chain (const_tree, bool); |
773 | static int ix86_function_regparm (const_tree, const_tree); |
774 | static void ix86_compute_frame_layout (void); |
775 | static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, |
776 | rtx, rtx, int); |
777 | static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT); |
778 | static tree ix86_canonical_va_list_type (tree); |
779 | static void predict_jump (int); |
780 | static unsigned int split_stack_prologue_scratch_regno (void); |
781 | static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
782 | |
783 | enum ix86_function_specific_strings |
784 | { |
785 | IX86_FUNCTION_SPECIFIC_ARCH, |
786 | IX86_FUNCTION_SPECIFIC_TUNE, |
787 | IX86_FUNCTION_SPECIFIC_MAX |
788 | }; |
789 | |
790 | static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int, |
791 | const char *, const char *, enum fpmath_unit, |
792 | bool); |
793 | static void ix86_function_specific_save (struct cl_target_option *, |
794 | struct gcc_options *opts); |
795 | static void ix86_function_specific_restore (struct gcc_options *opts, |
796 | struct cl_target_option *); |
797 | static void ix86_function_specific_post_stream_in (struct cl_target_option *); |
798 | static void ix86_function_specific_print (FILE *, int, |
799 | struct cl_target_option *); |
800 | static bool ix86_valid_target_attribute_p (tree, tree, tree, int); |
801 | static bool ix86_valid_target_attribute_inner_p (tree, char *[], |
802 | struct gcc_options *, |
803 | struct gcc_options *, |
804 | struct gcc_options *); |
805 | static bool ix86_can_inline_p (tree, tree); |
806 | static void ix86_set_current_function (tree); |
807 | static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
808 | |
809 | static enum calling_abi ix86_function_abi (const_tree); |
810 | |
811 | |
812 | #ifndef SUBTARGET32_DEFAULT_CPU |
813 | #define SUBTARGET32_DEFAULT_CPU "i386" |
814 | #endif |
815 | |
816 | /* Whether -mtune= or -march= were specified */ |
817 | static int ix86_tune_defaulted; |
818 | static int ix86_arch_specified; |
819 | |
820 | /* Vectorization library interface and handlers. */ |
821 | static tree (*ix86_veclib_handler) (combined_fn, tree, tree); |
822 | |
823 | static tree ix86_veclibabi_svml (combined_fn, tree, tree); |
824 | static tree ix86_veclibabi_acml (combined_fn, tree, tree); |
825 | |
826 | /* Processor target table, indexed by processor number */ |
827 | struct ptt |
828 | { |
829 | const char *const name; /* processor name */ |
830 | const struct processor_costs *cost; /* Processor costs */ |
831 | const int align_loop; /* Default alignments. */ |
832 | const int align_loop_max_skip; |
833 | const int align_jump; |
834 | const int align_jump_max_skip; |
835 | const int align_func; |
836 | }; |
837 | |
838 | /* This table must be in sync with enum processor_type in i386.h. */ |
839 | static const struct ptt processor_target_table[PROCESSOR_max] = |
840 | { |
841 | {"generic" , &generic_cost, 16, 10, 16, 10, 16}, |
842 | {"i386" , &i386_cost, 4, 3, 4, 3, 4}, |
843 | {"i486" , &i486_cost, 16, 15, 16, 15, 16}, |
844 | {"pentium" , &pentium_cost, 16, 7, 16, 7, 16}, |
845 | {"lakemont" , &lakemont_cost, 16, 7, 16, 7, 16}, |
846 | {"pentiumpro" , &pentiumpro_cost, 16, 15, 16, 10, 16}, |
847 | {"pentium4" , &pentium4_cost, 0, 0, 0, 0, 0}, |
848 | {"nocona" , &nocona_cost, 0, 0, 0, 0, 0}, |
849 | {"core2" , &core_cost, 16, 10, 16, 10, 16}, |
850 | {"nehalem" , &core_cost, 16, 10, 16, 10, 16}, |
851 | {"sandybridge" , &core_cost, 16, 10, 16, 10, 16}, |
852 | {"haswell" , &core_cost, 16, 10, 16, 10, 16}, |
853 | {"bonnell" , &atom_cost, 16, 15, 16, 7, 16}, |
854 | {"silvermont" , &slm_cost, 16, 15, 16, 7, 16}, |
855 | {"knl" , &slm_cost, 16, 15, 16, 7, 16}, |
856 | {"knm" , &slm_cost, 16, 15, 16, 7, 16}, |
857 | {"skylake-avx512" , &skylake_cost, 16, 10, 16, 10, 16}, |
858 | {"cannonlake" , &core_cost, 16, 10, 16, 10, 16}, |
859 | {"intel" , &intel_cost, 16, 15, 16, 7, 16}, |
860 | {"geode" , &geode_cost, 0, 0, 0, 0, 0}, |
861 | {"k6" , &k6_cost, 32, 7, 32, 7, 32}, |
862 | {"athlon" , &athlon_cost, 16, 7, 16, 7, 16}, |
863 | {"k8" , &k8_cost, 16, 7, 16, 7, 16}, |
864 | {"amdfam10" , &amdfam10_cost, 32, 24, 32, 7, 32}, |
865 | {"bdver1" , &bdver1_cost, 16, 10, 16, 7, 11}, |
866 | {"bdver2" , &bdver2_cost, 16, 10, 16, 7, 11}, |
867 | {"bdver3" , &bdver3_cost, 16, 10, 16, 7, 11}, |
868 | {"bdver4" , &bdver4_cost, 16, 10, 16, 7, 11}, |
869 | {"btver1" , &btver1_cost, 16, 10, 16, 7, 11}, |
870 | {"btver2" , &btver2_cost, 16, 10, 16, 7, 11}, |
871 | {"znver1" , &znver1_cost, 16, 15, 16, 15, 16} |
872 | }; |
873 | |
874 | static unsigned int |
875 | rest_of_handle_insert_vzeroupper (void) |
876 | { |
877 | int i; |
878 | |
879 | /* vzeroupper instructions are inserted immediately after reload to |
880 | account for possible spills from 256bit or 512bit registers. The pass |
881 | reuses mode switching infrastructure by re-running mode insertion |
882 | pass, so disable entities that have already been processed. */ |
883 | for (i = 0; i < MAX_386_ENTITIES; i++) |
884 | ix86_optimize_mode_switching[i] = 0; |
885 | |
886 | ix86_optimize_mode_switching[AVX_U128] = 1; |
887 | |
888 | /* Call optimize_mode_switching. */ |
889 | g->get_passes ()->execute_pass_mode_switching (); |
890 | return 0; |
891 | } |
892 | |
893 | /* Return 1 if INSN uses or defines a hard register. |
894 | Hard register uses in a memory address are ignored. |
895 | Clobbers and flags definitions are ignored. */ |
896 | |
897 | static bool |
898 | has_non_address_hard_reg (rtx_insn *insn) |
899 | { |
900 | df_ref ref; |
901 | FOR_EACH_INSN_DEF (ref, insn) |
902 | if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) |
903 | && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) |
904 | && DF_REF_REGNO (ref) != FLAGS_REG) |
905 | return true; |
906 | |
907 | FOR_EACH_INSN_USE (ref, insn) |
908 | if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) |
909 | return true; |
910 | |
911 | return false; |
912 | } |
913 | |
914 | /* Check if comparison INSN may be transformed |
915 | into vector comparison. Currently we transform |
916 | zero checks only which look like: |
917 | |
918 | (set (reg:CCZ 17 flags) |
919 | (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) |
920 | (subreg:SI (reg:DI x) 0)) |
921 | (const_int 0 [0]))) */ |
922 | |
923 | static bool |
924 | convertible_comparison_p (rtx_insn *insn) |
925 | { |
926 | if (!TARGET_SSE4_1) |
927 | return false; |
928 | |
929 | rtx def_set = single_set (insn); |
930 | |
931 | gcc_assert (def_set); |
932 | |
933 | rtx src = SET_SRC (def_set); |
934 | rtx dst = SET_DEST (def_set); |
935 | |
936 | gcc_assert (GET_CODE (src) == COMPARE); |
937 | |
938 | if (GET_CODE (dst) != REG |
939 | || REGNO (dst) != FLAGS_REG |
940 | || GET_MODE (dst) != CCZmode) |
941 | return false; |
942 | |
943 | rtx op1 = XEXP (src, 0); |
944 | rtx op2 = XEXP (src, 1); |
945 | |
946 | if (op2 != CONST0_RTX (GET_MODE (op2))) |
947 | return false; |
948 | |
949 | if (GET_CODE (op1) != IOR) |
950 | return false; |
951 | |
952 | op2 = XEXP (op1, 1); |
953 | op1 = XEXP (op1, 0); |
954 | |
955 | if (!SUBREG_P (op1) |
956 | || !SUBREG_P (op2) |
957 | || GET_MODE (op1) != SImode |
958 | || GET_MODE (op2) != SImode |
959 | || ((SUBREG_BYTE (op1) != 0 |
960 | || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) |
961 | && (SUBREG_BYTE (op2) != 0 |
962 | || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) |
963 | return false; |
964 | |
965 | op1 = SUBREG_REG (op1); |
966 | op2 = SUBREG_REG (op2); |
967 | |
968 | if (op1 != op2 |
969 | || !REG_P (op1) |
970 | || GET_MODE (op1) != DImode) |
971 | return false; |
972 | |
973 | return true; |
974 | } |
975 | |
976 | /* The DImode version of scalar_to_vector_candidate_p. */ |
977 | |
978 | static bool |
979 | dimode_scalar_to_vector_candidate_p (rtx_insn *insn) |
980 | { |
981 | rtx def_set = single_set (insn); |
982 | |
983 | if (!def_set) |
984 | return false; |
985 | |
986 | if (has_non_address_hard_reg (insn)) |
987 | return false; |
988 | |
989 | rtx src = SET_SRC (def_set); |
990 | rtx dst = SET_DEST (def_set); |
991 | |
992 | if (GET_CODE (src) == COMPARE) |
993 | return convertible_comparison_p (insn); |
994 | |
995 | /* We are interested in DImode promotion only. */ |
996 | if ((GET_MODE (src) != DImode |
997 | && !CONST_INT_P (src)) |
998 | || GET_MODE (dst) != DImode) |
999 | return false; |
1000 | |
1001 | if (!REG_P (dst) && !MEM_P (dst)) |
1002 | return false; |
1003 | |
1004 | switch (GET_CODE (src)) |
1005 | { |
1006 | case ASHIFTRT: |
1007 | if (!TARGET_AVX512VL) |
1008 | return false; |
1009 | /* FALLTHRU */ |
1010 | |
1011 | case ASHIFT: |
1012 | case LSHIFTRT: |
1013 | if (!REG_P (XEXP (src, 1)) |
1014 | && (!SUBREG_P (XEXP (src, 1)) |
1015 | || SUBREG_BYTE (XEXP (src, 1)) != 0 |
1016 | || !REG_P (SUBREG_REG (XEXP (src, 1)))) |
1017 | && (!CONST_INT_P (XEXP (src, 1)) |
1018 | || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63))) |
1019 | return false; |
1020 | |
1021 | if (GET_MODE (XEXP (src, 1)) != QImode |
1022 | && !CONST_INT_P (XEXP (src, 1))) |
1023 | return false; |
1024 | break; |
1025 | |
1026 | case PLUS: |
1027 | case MINUS: |
1028 | case IOR: |
1029 | case XOR: |
1030 | case AND: |
1031 | if (!REG_P (XEXP (src, 1)) |
1032 | && !MEM_P (XEXP (src, 1)) |
1033 | && !CONST_INT_P (XEXP (src, 1))) |
1034 | return false; |
1035 | |
1036 | if (GET_MODE (XEXP (src, 1)) != DImode |
1037 | && !CONST_INT_P (XEXP (src, 1))) |
1038 | return false; |
1039 | break; |
1040 | |
1041 | case NEG: |
1042 | case NOT: |
1043 | break; |
1044 | |
1045 | case REG: |
1046 | return true; |
1047 | |
1048 | case MEM: |
1049 | case CONST_INT: |
1050 | return REG_P (dst); |
1051 | |
1052 | default: |
1053 | return false; |
1054 | } |
1055 | |
1056 | if (!REG_P (XEXP (src, 0)) |
1057 | && !MEM_P (XEXP (src, 0)) |
1058 | && !CONST_INT_P (XEXP (src, 0)) |
1059 | /* Check for andnot case. */ |
1060 | && (GET_CODE (src) != AND |
1061 | || GET_CODE (XEXP (src, 0)) != NOT |
1062 | || !REG_P (XEXP (XEXP (src, 0), 0)))) |
1063 | return false; |
1064 | |
1065 | if (GET_MODE (XEXP (src, 0)) != DImode |
1066 | && !CONST_INT_P (XEXP (src, 0))) |
1067 | return false; |
1068 | |
1069 | return true; |
1070 | } |
1071 | |
1072 | /* The TImode version of scalar_to_vector_candidate_p. */ |
1073 | |
1074 | static bool |
1075 | timode_scalar_to_vector_candidate_p (rtx_insn *insn) |
1076 | { |
1077 | rtx def_set = single_set (insn); |
1078 | |
1079 | if (!def_set) |
1080 | return false; |
1081 | |
1082 | if (has_non_address_hard_reg (insn)) |
1083 | return false; |
1084 | |
1085 | rtx src = SET_SRC (def_set); |
1086 | rtx dst = SET_DEST (def_set); |
1087 | |
1088 | /* Only TImode load and store are allowed. */ |
1089 | if (GET_MODE (dst) != TImode) |
1090 | return false; |
1091 | |
1092 | if (MEM_P (dst)) |
1093 | { |
1094 | /* Check for store. Memory must be aligned or unaligned store |
1095 | is optimal. Only support store from register, standard SSE |
1096 | constant or CONST_WIDE_INT generated from piecewise store. |
1097 | |
1098 | ??? Verify performance impact before enabling CONST_INT for |
1099 | __int128 store. */ |
1100 | if (misaligned_operand (dst, TImode) |
1101 | && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) |
1102 | return false; |
1103 | |
1104 | switch (GET_CODE (src)) |
1105 | { |
1106 | default: |
1107 | return false; |
1108 | |
1109 | case REG: |
1110 | case CONST_WIDE_INT: |
1111 | return true; |
1112 | |
1113 | case CONST_INT: |
1114 | return standard_sse_constant_p (src, TImode); |
1115 | } |
1116 | } |
1117 | else if (MEM_P (src)) |
1118 | { |
1119 | /* Check for load. Memory must be aligned or unaligned load is |
1120 | optimal. */ |
1121 | return (REG_P (dst) |
1122 | && (!misaligned_operand (src, TImode) |
1123 | || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); |
1124 | } |
1125 | |
1126 | return false; |
1127 | } |
1128 | |
1129 | /* Return 1 if INSN may be converted into vector |
1130 | instruction. */ |
1131 | |
1132 | static bool |
1133 | scalar_to_vector_candidate_p (rtx_insn *insn) |
1134 | { |
1135 | if (TARGET_64BIT) |
1136 | return timode_scalar_to_vector_candidate_p (insn); |
1137 | else |
1138 | return dimode_scalar_to_vector_candidate_p (insn); |
1139 | } |
1140 | |
1141 | /* The DImode version of remove_non_convertible_regs. */ |
1142 | |
1143 | static void |
1144 | dimode_remove_non_convertible_regs (bitmap candidates) |
1145 | { |
1146 | bitmap_iterator bi; |
1147 | unsigned id; |
1148 | bitmap regs = BITMAP_ALLOC (NULL); |
1149 | |
1150 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) |
1151 | { |
1152 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); |
1153 | rtx reg = SET_DEST (def_set); |
1154 | |
1155 | if (!REG_P (reg) |
1156 | || bitmap_bit_p (regs, REGNO (reg)) |
1157 | || HARD_REGISTER_P (reg)) |
1158 | continue; |
1159 | |
1160 | for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); |
1161 | def; |
1162 | def = DF_REF_NEXT_REG (def)) |
1163 | { |
1164 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) |
1165 | { |
1166 | if (dump_file) |
1167 | fprintf (dump_file, |
1168 | "r%d has non convertible definition in insn %d\n" , |
1169 | REGNO (reg), DF_REF_INSN_UID (def)); |
1170 | |
1171 | bitmap_set_bit (regs, REGNO (reg)); |
1172 | break; |
1173 | } |
1174 | } |
1175 | } |
1176 | |
1177 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) |
1178 | { |
1179 | for (df_ref def = DF_REG_DEF_CHAIN (id); |
1180 | def; |
1181 | def = DF_REF_NEXT_REG (def)) |
1182 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) |
1183 | { |
1184 | if (dump_file) |
1185 | fprintf (dump_file, "Removing insn %d from candidates list\n" , |
1186 | DF_REF_INSN_UID (def)); |
1187 | |
1188 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); |
1189 | } |
1190 | } |
1191 | |
1192 | BITMAP_FREE (regs); |
1193 | } |
1194 | |
1195 | /* For a register REGNO, scan instructions for its defs and uses. |
1196 | Put REGNO in REGS if a def or use isn't in CANDIDATES. */ |
1197 | |
1198 | static void |
1199 | timode_check_non_convertible_regs (bitmap candidates, bitmap regs, |
1200 | unsigned int regno) |
1201 | { |
1202 | for (df_ref def = DF_REG_DEF_CHAIN (regno); |
1203 | def; |
1204 | def = DF_REF_NEXT_REG (def)) |
1205 | { |
1206 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) |
1207 | { |
1208 | if (dump_file) |
1209 | fprintf (dump_file, |
1210 | "r%d has non convertible def in insn %d\n" , |
1211 | regno, DF_REF_INSN_UID (def)); |
1212 | |
1213 | bitmap_set_bit (regs, regno); |
1214 | break; |
1215 | } |
1216 | } |
1217 | |
1218 | for (df_ref ref = DF_REG_USE_CHAIN (regno); |
1219 | ref; |
1220 | ref = DF_REF_NEXT_REG (ref)) |
1221 | { |
1222 | /* Debug instructions are skipped. */ |
1223 | if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) |
1224 | && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) |
1225 | { |
1226 | if (dump_file) |
1227 | fprintf (dump_file, |
1228 | "r%d has non convertible use in insn %d\n" , |
1229 | regno, DF_REF_INSN_UID (ref)); |
1230 | |
1231 | bitmap_set_bit (regs, regno); |
1232 | break; |
1233 | } |
1234 | } |
1235 | } |
1236 | |
1237 | /* The TImode version of remove_non_convertible_regs. */ |
1238 | |
1239 | static void |
1240 | timode_remove_non_convertible_regs (bitmap candidates) |
1241 | { |
1242 | bitmap_iterator bi; |
1243 | unsigned id; |
1244 | bitmap regs = BITMAP_ALLOC (NULL); |
1245 | |
1246 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) |
1247 | { |
1248 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); |
1249 | rtx dest = SET_DEST (def_set); |
1250 | rtx src = SET_SRC (def_set); |
1251 | |
1252 | if ((!REG_P (dest) |
1253 | || bitmap_bit_p (regs, REGNO (dest)) |
1254 | || HARD_REGISTER_P (dest)) |
1255 | && (!REG_P (src) |
1256 | || bitmap_bit_p (regs, REGNO (src)) |
1257 | || HARD_REGISTER_P (src))) |
1258 | continue; |
1259 | |
1260 | if (REG_P (dest)) |
1261 | timode_check_non_convertible_regs (candidates, regs, |
1262 | REGNO (dest)); |
1263 | |
1264 | if (REG_P (src)) |
1265 | timode_check_non_convertible_regs (candidates, regs, |
1266 | REGNO (src)); |
1267 | } |
1268 | |
1269 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) |
1270 | { |
1271 | for (df_ref def = DF_REG_DEF_CHAIN (id); |
1272 | def; |
1273 | def = DF_REF_NEXT_REG (def)) |
1274 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) |
1275 | { |
1276 | if (dump_file) |
1277 | fprintf (dump_file, "Removing insn %d from candidates list\n" , |
1278 | DF_REF_INSN_UID (def)); |
1279 | |
1280 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); |
1281 | } |
1282 | |
1283 | for (df_ref ref = DF_REG_USE_CHAIN (id); |
1284 | ref; |
1285 | ref = DF_REF_NEXT_REG (ref)) |
1286 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) |
1287 | { |
1288 | if (dump_file) |
1289 | fprintf (dump_file, "Removing insn %d from candidates list\n" , |
1290 | DF_REF_INSN_UID (ref)); |
1291 | |
1292 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); |
1293 | } |
1294 | } |
1295 | |
1296 | BITMAP_FREE (regs); |
1297 | } |
1298 | |
1299 | /* For a given bitmap of insn UIDs scans all instruction and |
1300 | remove insn from CANDIDATES in case it has both convertible |
1301 | and not convertible definitions. |
1302 | |
1303 | All insns in a bitmap are conversion candidates according to |
1304 | scalar_to_vector_candidate_p. Currently it implies all insns |
1305 | are single_set. */ |
1306 | |
1307 | static void |
1308 | remove_non_convertible_regs (bitmap candidates) |
1309 | { |
1310 | if (TARGET_64BIT) |
1311 | timode_remove_non_convertible_regs (candidates); |
1312 | else |
1313 | dimode_remove_non_convertible_regs (candidates); |
1314 | } |
1315 | |
1316 | class scalar_chain |
1317 | { |
1318 | public: |
1319 | scalar_chain (); |
1320 | virtual ~scalar_chain (); |
1321 | |
1322 | static unsigned max_id; |
1323 | |
1324 | /* ID of a chain. */ |
1325 | unsigned int chain_id; |
1326 | /* A queue of instructions to be included into a chain. */ |
1327 | bitmap queue; |
1328 | /* Instructions included into a chain. */ |
1329 | bitmap insns; |
1330 | /* All registers defined by a chain. */ |
1331 | bitmap defs; |
1332 | /* Registers used in both vector and sclar modes. */ |
1333 | bitmap defs_conv; |
1334 | |
1335 | void build (bitmap candidates, unsigned insn_uid); |
1336 | virtual int compute_convert_gain () = 0; |
1337 | int convert (); |
1338 | |
1339 | protected: |
1340 | void add_to_queue (unsigned insn_uid); |
1341 | void emit_conversion_insns (rtx insns, rtx_insn *pos); |
1342 | |
1343 | private: |
1344 | void add_insn (bitmap candidates, unsigned insn_uid); |
1345 | void analyze_register_chain (bitmap candidates, df_ref ref); |
1346 | virtual void mark_dual_mode_def (df_ref def) = 0; |
1347 | virtual void convert_insn (rtx_insn *insn) = 0; |
1348 | virtual void convert_registers () = 0; |
1349 | }; |
1350 | |
1351 | class dimode_scalar_chain : public scalar_chain |
1352 | { |
1353 | public: |
1354 | int compute_convert_gain (); |
1355 | private: |
1356 | void mark_dual_mode_def (df_ref def); |
1357 | rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); |
1358 | void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); |
1359 | void convert_insn (rtx_insn *insn); |
1360 | void convert_op (rtx *op, rtx_insn *insn); |
1361 | void convert_reg (unsigned regno); |
1362 | void make_vector_copies (unsigned regno); |
1363 | void convert_registers (); |
1364 | int vector_const_cost (rtx exp); |
1365 | }; |
1366 | |
1367 | class timode_scalar_chain : public scalar_chain |
1368 | { |
1369 | public: |
1370 | /* Convert from TImode to V1TImode is always faster. */ |
1371 | int compute_convert_gain () { return 1; } |
1372 | |
1373 | private: |
1374 | void mark_dual_mode_def (df_ref def); |
1375 | void fix_debug_reg_uses (rtx reg); |
1376 | void convert_insn (rtx_insn *insn); |
1377 | /* We don't convert registers to difference size. */ |
1378 | void convert_registers () {} |
1379 | }; |
1380 | |
1381 | unsigned scalar_chain::max_id = 0; |
1382 | |
1383 | /* Initialize new chain. */ |
1384 | |
1385 | scalar_chain::scalar_chain () |
1386 | { |
1387 | chain_id = ++max_id; |
1388 | |
1389 | if (dump_file) |
1390 | fprintf (dump_file, "Created a new instruction chain #%d\n" , chain_id); |
1391 | |
1392 | bitmap_obstack_initialize (NULL); |
1393 | insns = BITMAP_ALLOC (NULL); |
1394 | defs = BITMAP_ALLOC (NULL); |
1395 | defs_conv = BITMAP_ALLOC (NULL); |
1396 | queue = NULL; |
1397 | } |
1398 | |
1399 | /* Free chain's data. */ |
1400 | |
1401 | scalar_chain::~scalar_chain () |
1402 | { |
1403 | BITMAP_FREE (insns); |
1404 | BITMAP_FREE (defs); |
1405 | BITMAP_FREE (defs_conv); |
1406 | bitmap_obstack_release (NULL); |
1407 | } |
1408 | |
1409 | /* Add instruction into chains' queue. */ |
1410 | |
1411 | void |
1412 | scalar_chain::add_to_queue (unsigned insn_uid) |
1413 | { |
1414 | if (bitmap_bit_p (insns, insn_uid) |
1415 | || bitmap_bit_p (queue, insn_uid)) |
1416 | return; |
1417 | |
1418 | if (dump_file) |
1419 | fprintf (dump_file, " Adding insn %d into chain's #%d queue\n" , |
1420 | insn_uid, chain_id); |
1421 | bitmap_set_bit (queue, insn_uid); |
1422 | } |
1423 | |
1424 | /* For DImode conversion, mark register defined by DEF as requiring |
1425 | conversion. */ |
1426 | |
1427 | void |
1428 | dimode_scalar_chain::mark_dual_mode_def (df_ref def) |
1429 | { |
1430 | gcc_assert (DF_REF_REG_DEF_P (def)); |
1431 | |
1432 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) |
1433 | return; |
1434 | |
1435 | if (dump_file) |
1436 | fprintf (dump_file, |
1437 | " Mark r%d def in insn %d as requiring both modes in chain #%d\n" , |
1438 | DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); |
1439 | |
1440 | bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); |
1441 | } |
1442 | |
1443 | /* For TImode conversion, it is unused. */ |
1444 | |
1445 | void |
1446 | timode_scalar_chain::mark_dual_mode_def (df_ref) |
1447 | { |
1448 | gcc_unreachable (); |
1449 | } |
1450 | |
1451 | /* Check REF's chain to add new insns into a queue |
1452 | and find registers requiring conversion. */ |
1453 | |
1454 | void |
1455 | scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) |
1456 | { |
1457 | df_link *chain; |
1458 | |
1459 | gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) |
1460 | || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); |
1461 | add_to_queue (DF_REF_INSN_UID (ref)); |
1462 | |
1463 | for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) |
1464 | { |
1465 | unsigned uid = DF_REF_INSN_UID (chain->ref); |
1466 | |
1467 | if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) |
1468 | continue; |
1469 | |
1470 | if (!DF_REF_REG_MEM_P (chain->ref)) |
1471 | { |
1472 | if (bitmap_bit_p (insns, uid)) |
1473 | continue; |
1474 | |
1475 | if (bitmap_bit_p (candidates, uid)) |
1476 | { |
1477 | add_to_queue (uid); |
1478 | continue; |
1479 | } |
1480 | } |
1481 | |
1482 | if (DF_REF_REG_DEF_P (chain->ref)) |
1483 | { |
1484 | if (dump_file) |
1485 | fprintf (dump_file, " r%d def in insn %d isn't convertible\n" , |
1486 | DF_REF_REGNO (chain->ref), uid); |
1487 | mark_dual_mode_def (chain->ref); |
1488 | } |
1489 | else |
1490 | { |
1491 | if (dump_file) |
1492 | fprintf (dump_file, " r%d use in insn %d isn't convertible\n" , |
1493 | DF_REF_REGNO (chain->ref), uid); |
1494 | mark_dual_mode_def (ref); |
1495 | } |
1496 | } |
1497 | } |
1498 | |
1499 | /* Add instruction into a chain. */ |
1500 | |
1501 | void |
1502 | scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) |
1503 | { |
1504 | if (bitmap_bit_p (insns, insn_uid)) |
1505 | return; |
1506 | |
1507 | if (dump_file) |
1508 | fprintf (dump_file, " Adding insn %d to chain #%d\n" , insn_uid, chain_id); |
1509 | |
1510 | bitmap_set_bit (insns, insn_uid); |
1511 | |
1512 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; |
1513 | rtx def_set = single_set (insn); |
1514 | if (def_set && REG_P (SET_DEST (def_set)) |
1515 | && !HARD_REGISTER_P (SET_DEST (def_set))) |
1516 | bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); |
1517 | |
1518 | df_ref ref; |
1519 | df_ref def; |
1520 | for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
1521 | if (!HARD_REGISTER_P (DF_REF_REG (ref))) |
1522 | for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); |
1523 | def; |
1524 | def = DF_REF_NEXT_REG (def)) |
1525 | analyze_register_chain (candidates, def); |
1526 | for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
1527 | if (!DF_REF_REG_MEM_P (ref)) |
1528 | analyze_register_chain (candidates, ref); |
1529 | } |
1530 | |
1531 | /* Build new chain starting from insn INSN_UID recursively |
1532 | adding all dependent uses and definitions. */ |
1533 | |
1534 | void |
1535 | scalar_chain::build (bitmap candidates, unsigned insn_uid) |
1536 | { |
1537 | queue = BITMAP_ALLOC (NULL); |
1538 | bitmap_set_bit (queue, insn_uid); |
1539 | |
1540 | if (dump_file) |
1541 | fprintf (dump_file, "Building chain #%d...\n" , chain_id); |
1542 | |
1543 | while (!bitmap_empty_p (queue)) |
1544 | { |
1545 | insn_uid = bitmap_first_set_bit (queue); |
1546 | bitmap_clear_bit (queue, insn_uid); |
1547 | bitmap_clear_bit (candidates, insn_uid); |
1548 | add_insn (candidates, insn_uid); |
1549 | } |
1550 | |
1551 | if (dump_file) |
1552 | { |
1553 | fprintf (dump_file, "Collected chain #%d...\n" , chain_id); |
1554 | fprintf (dump_file, " insns: " ); |
1555 | dump_bitmap (dump_file, insns); |
1556 | if (!bitmap_empty_p (defs_conv)) |
1557 | { |
1558 | bitmap_iterator bi; |
1559 | unsigned id; |
1560 | const char *comma = "" ; |
1561 | fprintf (dump_file, " defs to convert: " ); |
1562 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) |
1563 | { |
1564 | fprintf (dump_file, "%sr%d" , comma, id); |
1565 | comma = ", " ; |
1566 | } |
1567 | fprintf (dump_file, "\n" ); |
1568 | } |
1569 | } |
1570 | |
1571 | BITMAP_FREE (queue); |
1572 | } |
1573 | |
1574 | /* Return a cost of building a vector costant |
1575 | instead of using a scalar one. */ |
1576 | |
1577 | int |
1578 | dimode_scalar_chain::vector_const_cost (rtx exp) |
1579 | { |
1580 | gcc_assert (CONST_INT_P (exp)); |
1581 | |
1582 | if (standard_sse_constant_p (exp, V2DImode)) |
1583 | return COSTS_N_INSNS (1); |
1584 | return ix86_cost->sse_load[1]; |
1585 | } |
1586 | |
1587 | /* Compute a gain for chain conversion. */ |
1588 | |
1589 | int |
1590 | dimode_scalar_chain::compute_convert_gain () |
1591 | { |
1592 | bitmap_iterator bi; |
1593 | unsigned insn_uid; |
1594 | int gain = 0; |
1595 | int cost = 0; |
1596 | |
1597 | if (dump_file) |
1598 | fprintf (dump_file, "Computing gain for chain #%d...\n" , chain_id); |
1599 | |
1600 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) |
1601 | { |
1602 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; |
1603 | rtx def_set = single_set (insn); |
1604 | rtx src = SET_SRC (def_set); |
1605 | rtx dst = SET_DEST (def_set); |
1606 | |
1607 | if (REG_P (src) && REG_P (dst)) |
1608 | gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; |
1609 | else if (REG_P (src) && MEM_P (dst)) |
1610 | gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; |
1611 | else if (MEM_P (src) && REG_P (dst)) |
1612 | gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; |
1613 | else if (GET_CODE (src) == ASHIFT |
1614 | || GET_CODE (src) == ASHIFTRT |
1615 | || GET_CODE (src) == LSHIFTRT) |
1616 | { |
1617 | if (CONST_INT_P (XEXP (src, 0))) |
1618 | gain -= vector_const_cost (XEXP (src, 0)); |
1619 | if (CONST_INT_P (XEXP (src, 1))) |
1620 | { |
1621 | gain += ix86_cost->shift_const; |
1622 | if (INTVAL (XEXP (src, 1)) >= 32) |
1623 | gain -= COSTS_N_INSNS (1); |
1624 | } |
1625 | else |
1626 | /* Additional gain for omitting two CMOVs. */ |
1627 | gain += ix86_cost->shift_var + COSTS_N_INSNS (2); |
1628 | } |
1629 | else if (GET_CODE (src) == PLUS |
1630 | || GET_CODE (src) == MINUS |
1631 | || GET_CODE (src) == IOR |
1632 | || GET_CODE (src) == XOR |
1633 | || GET_CODE (src) == AND) |
1634 | { |
1635 | gain += ix86_cost->add; |
1636 | /* Additional gain for andnot for targets without BMI. */ |
1637 | if (GET_CODE (XEXP (src, 0)) == NOT |
1638 | && !TARGET_BMI) |
1639 | gain += 2 * ix86_cost->add; |
1640 | |
1641 | if (CONST_INT_P (XEXP (src, 0))) |
1642 | gain -= vector_const_cost (XEXP (src, 0)); |
1643 | if (CONST_INT_P (XEXP (src, 1))) |
1644 | gain -= vector_const_cost (XEXP (src, 1)); |
1645 | } |
1646 | else if (GET_CODE (src) == NEG |
1647 | || GET_CODE (src) == NOT) |
1648 | gain += ix86_cost->add - COSTS_N_INSNS (1); |
1649 | else if (GET_CODE (src) == COMPARE) |
1650 | { |
1651 | /* Assume comparison cost is the same. */ |
1652 | } |
1653 | else if (CONST_INT_P (src)) |
1654 | { |
1655 | if (REG_P (dst)) |
1656 | gain += COSTS_N_INSNS (2); |
1657 | else if (MEM_P (dst)) |
1658 | gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; |
1659 | gain -= vector_const_cost (src); |
1660 | } |
1661 | else |
1662 | gcc_unreachable (); |
1663 | } |
1664 | |
1665 | if (dump_file) |
1666 | fprintf (dump_file, " Instruction conversion gain: %d\n" , gain); |
1667 | |
1668 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) |
1669 | cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; |
1670 | |
1671 | if (dump_file) |
1672 | fprintf (dump_file, " Registers conversion cost: %d\n" , cost); |
1673 | |
1674 | gain -= cost; |
1675 | |
1676 | if (dump_file) |
1677 | fprintf (dump_file, " Total gain: %d\n" , gain); |
1678 | |
1679 | return gain; |
1680 | } |
1681 | |
1682 | /* Replace REG in X with a V2DI subreg of NEW_REG. */ |
1683 | |
1684 | rtx |
1685 | dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) |
1686 | { |
1687 | if (x == reg) |
1688 | return gen_rtx_SUBREG (V2DImode, new_reg, 0); |
1689 | |
1690 | const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); |
1691 | int i, j; |
1692 | for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) |
1693 | { |
1694 | if (fmt[i] == 'e') |
1695 | XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); |
1696 | else if (fmt[i] == 'E') |
1697 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
1698 | XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), |
1699 | reg, new_reg); |
1700 | } |
1701 | |
1702 | return x; |
1703 | } |
1704 | |
1705 | /* Replace REG in INSN with a V2DI subreg of NEW_REG. */ |
1706 | |
1707 | void |
1708 | dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, |
1709 | rtx reg, rtx new_reg) |
1710 | { |
1711 | replace_with_subreg (single_set (insn), reg, new_reg); |
1712 | } |
1713 | |
1714 | /* Insert generated conversion instruction sequence INSNS |
1715 | after instruction AFTER. New BB may be required in case |
1716 | instruction has EH region attached. */ |
1717 | |
1718 | void |
1719 | scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) |
1720 | { |
1721 | if (!control_flow_insn_p (after)) |
1722 | { |
1723 | emit_insn_after (insns, after); |
1724 | return; |
1725 | } |
1726 | |
1727 | basic_block bb = BLOCK_FOR_INSN (after); |
1728 | edge e = find_fallthru_edge (bb->succs); |
1729 | gcc_assert (e); |
1730 | |
1731 | basic_block new_bb = split_edge (e); |
1732 | emit_insn_after (insns, BB_HEAD (new_bb)); |
1733 | } |
1734 | |
1735 | /* Make vector copies for all register REGNO definitions |
1736 | and replace its uses in a chain. */ |
1737 | |
1738 | void |
1739 | dimode_scalar_chain::make_vector_copies (unsigned regno) |
1740 | { |
1741 | rtx reg = regno_reg_rtx[regno]; |
1742 | rtx vreg = gen_reg_rtx (DImode); |
1743 | bool count_reg = false; |
1744 | df_ref ref; |
1745 | |
1746 | for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) |
1747 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) |
1748 | { |
1749 | df_ref use; |
1750 | |
1751 | /* Detect the count register of a shift instruction. */ |
1752 | for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use)) |
1753 | if (bitmap_bit_p (insns, DF_REF_INSN_UID (use))) |
1754 | { |
1755 | rtx_insn *insn = DF_REF_INSN (use); |
1756 | rtx def_set = single_set (insn); |
1757 | |
1758 | gcc_assert (def_set); |
1759 | |
1760 | rtx src = SET_SRC (def_set); |
1761 | |
1762 | if ((GET_CODE (src) == ASHIFT |
1763 | || GET_CODE (src) == ASHIFTRT |
1764 | || GET_CODE (src) == LSHIFTRT) |
1765 | && !CONST_INT_P (XEXP (src, 1)) |
1766 | && reg_or_subregno (XEXP (src, 1)) == regno) |
1767 | count_reg = true; |
1768 | } |
1769 | |
1770 | start_sequence (); |
1771 | if (count_reg) |
1772 | { |
1773 | rtx qreg = gen_lowpart (QImode, reg); |
1774 | rtx tmp = gen_reg_rtx (SImode); |
1775 | |
1776 | if (TARGET_ZERO_EXTEND_WITH_AND |
1777 | && optimize_function_for_speed_p (cfun)) |
1778 | { |
1779 | emit_move_insn (tmp, const0_rtx); |
1780 | emit_insn (gen_movstrictqi |
1781 | (gen_lowpart (QImode, tmp), qreg)); |
1782 | } |
1783 | else |
1784 | emit_insn (gen_rtx_SET |
1785 | (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg))); |
1786 | |
1787 | if (!TARGET_INTER_UNIT_MOVES_TO_VEC) |
1788 | { |
1789 | rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP); |
1790 | emit_move_insn (slot, tmp); |
1791 | tmp = copy_rtx (slot); |
1792 | } |
1793 | |
1794 | emit_insn (gen_zero_extendsidi2 (vreg, tmp)); |
1795 | } |
1796 | else if (!TARGET_INTER_UNIT_MOVES_TO_VEC) |
1797 | { |
1798 | rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); |
1799 | emit_move_insn (adjust_address (tmp, SImode, 0), |
1800 | gen_rtx_SUBREG (SImode, reg, 0)); |
1801 | emit_move_insn (adjust_address (tmp, SImode, 4), |
1802 | gen_rtx_SUBREG (SImode, reg, 4)); |
1803 | emit_move_insn (vreg, tmp); |
1804 | } |
1805 | else if (TARGET_SSE4_1) |
1806 | { |
1807 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), |
1808 | CONST0_RTX (V4SImode), |
1809 | gen_rtx_SUBREG (SImode, reg, 0))); |
1810 | emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), |
1811 | gen_rtx_SUBREG (V4SImode, vreg, 0), |
1812 | gen_rtx_SUBREG (SImode, reg, 4), |
1813 | GEN_INT (2))); |
1814 | } |
1815 | else |
1816 | { |
1817 | rtx tmp = gen_reg_rtx (DImode); |
1818 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), |
1819 | CONST0_RTX (V4SImode), |
1820 | gen_rtx_SUBREG (SImode, reg, 0))); |
1821 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), |
1822 | CONST0_RTX (V4SImode), |
1823 | gen_rtx_SUBREG (SImode, reg, 4))); |
1824 | emit_insn (gen_vec_interleave_lowv4si |
1825 | (gen_rtx_SUBREG (V4SImode, vreg, 0), |
1826 | gen_rtx_SUBREG (V4SImode, vreg, 0), |
1827 | gen_rtx_SUBREG (V4SImode, tmp, 0))); |
1828 | } |
1829 | rtx_insn *seq = get_insns (); |
1830 | end_sequence (); |
1831 | rtx_insn *insn = DF_REF_INSN (ref); |
1832 | emit_conversion_insns (seq, insn); |
1833 | |
1834 | if (dump_file) |
1835 | fprintf (dump_file, |
1836 | " Copied r%d to a vector register r%d for insn %d\n" , |
1837 | regno, REGNO (vreg), INSN_UID (insn)); |
1838 | } |
1839 | |
1840 | for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) |
1841 | if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) |
1842 | { |
1843 | rtx_insn *insn = DF_REF_INSN (ref); |
1844 | if (count_reg) |
1845 | { |
1846 | rtx def_set = single_set (insn); |
1847 | gcc_assert (def_set); |
1848 | |
1849 | rtx src = SET_SRC (def_set); |
1850 | |
1851 | if ((GET_CODE (src) == ASHIFT |
1852 | || GET_CODE (src) == ASHIFTRT |
1853 | || GET_CODE (src) == LSHIFTRT) |
1854 | && !CONST_INT_P (XEXP (src, 1)) |
1855 | && reg_or_subregno (XEXP (src, 1)) == regno) |
1856 | XEXP (src, 1) = vreg; |
1857 | } |
1858 | else |
1859 | replace_with_subreg_in_insn (insn, reg, vreg); |
1860 | |
1861 | if (dump_file) |
1862 | fprintf (dump_file, " Replaced r%d with r%d in insn %d\n" , |
1863 | regno, REGNO (vreg), INSN_UID (insn)); |
1864 | } |
1865 | } |
1866 | |
1867 | /* Convert all definitions of register REGNO |
1868 | and fix its uses. Scalar copies may be created |
1869 | in case register is used in not convertible insn. */ |
1870 | |
1871 | void |
1872 | dimode_scalar_chain::convert_reg (unsigned regno) |
1873 | { |
1874 | bool scalar_copy = bitmap_bit_p (defs_conv, regno); |
1875 | rtx reg = regno_reg_rtx[regno]; |
1876 | rtx scopy = NULL_RTX; |
1877 | df_ref ref; |
1878 | bitmap conv; |
1879 | |
1880 | conv = BITMAP_ALLOC (NULL); |
1881 | bitmap_copy (conv, insns); |
1882 | |
1883 | if (scalar_copy) |
1884 | scopy = gen_reg_rtx (DImode); |
1885 | |
1886 | for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) |
1887 | { |
1888 | rtx_insn *insn = DF_REF_INSN (ref); |
1889 | rtx def_set = single_set (insn); |
1890 | rtx src = SET_SRC (def_set); |
1891 | rtx reg = DF_REF_REG (ref); |
1892 | |
1893 | if (!MEM_P (src)) |
1894 | { |
1895 | replace_with_subreg_in_insn (insn, reg, reg); |
1896 | bitmap_clear_bit (conv, INSN_UID (insn)); |
1897 | } |
1898 | |
1899 | if (scalar_copy) |
1900 | { |
1901 | start_sequence (); |
1902 | if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) |
1903 | { |
1904 | rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); |
1905 | emit_move_insn (tmp, reg); |
1906 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), |
1907 | adjust_address (tmp, SImode, 0)); |
1908 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), |
1909 | adjust_address (tmp, SImode, 4)); |
1910 | } |
1911 | else if (TARGET_SSE4_1) |
1912 | { |
1913 | rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); |
1914 | emit_insn |
1915 | (gen_rtx_SET |
1916 | (gen_rtx_SUBREG (SImode, scopy, 0), |
1917 | gen_rtx_VEC_SELECT (SImode, |
1918 | gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); |
1919 | |
1920 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); |
1921 | emit_insn |
1922 | (gen_rtx_SET |
1923 | (gen_rtx_SUBREG (SImode, scopy, 4), |
1924 | gen_rtx_VEC_SELECT (SImode, |
1925 | gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); |
1926 | } |
1927 | else |
1928 | { |
1929 | rtx vcopy = gen_reg_rtx (V2DImode); |
1930 | emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); |
1931 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), |
1932 | gen_rtx_SUBREG (SImode, vcopy, 0)); |
1933 | emit_move_insn (vcopy, |
1934 | gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); |
1935 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), |
1936 | gen_rtx_SUBREG (SImode, vcopy, 0)); |
1937 | } |
1938 | rtx_insn *seq = get_insns (); |
1939 | end_sequence (); |
1940 | emit_conversion_insns (seq, insn); |
1941 | |
1942 | if (dump_file) |
1943 | fprintf (dump_file, |
1944 | " Copied r%d to a scalar register r%d for insn %d\n" , |
1945 | regno, REGNO (scopy), INSN_UID (insn)); |
1946 | } |
1947 | } |
1948 | |
1949 | for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) |
1950 | if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) |
1951 | { |
1952 | if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) |
1953 | { |
1954 | rtx_insn *insn = DF_REF_INSN (ref); |
1955 | |
1956 | rtx def_set = single_set (insn); |
1957 | gcc_assert (def_set); |
1958 | |
1959 | rtx src = SET_SRC (def_set); |
1960 | rtx dst = SET_DEST (def_set); |
1961 | |
1962 | if ((GET_CODE (src) == ASHIFT |
1963 | || GET_CODE (src) == ASHIFTRT |
1964 | || GET_CODE (src) == LSHIFTRT) |
1965 | && !CONST_INT_P (XEXP (src, 1)) |
1966 | && reg_or_subregno (XEXP (src, 1)) == regno) |
1967 | { |
1968 | rtx tmp2 = gen_reg_rtx (V2DImode); |
1969 | |
1970 | start_sequence (); |
1971 | |
1972 | if (TARGET_SSE4_1) |
1973 | emit_insn (gen_sse4_1_zero_extendv2qiv2di2 |
1974 | (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0))); |
1975 | else |
1976 | { |
1977 | rtx vec_cst |
1978 | = gen_rtx_CONST_VECTOR (V2DImode, |
1979 | gen_rtvec (2, GEN_INT (0xff), |
1980 | const0_rtx)); |
1981 | vec_cst |
1982 | = validize_mem (force_const_mem (V2DImode, vec_cst)); |
1983 | |
1984 | emit_insn (gen_rtx_SET |
1985 | (tmp2, |
1986 | gen_rtx_AND (V2DImode, |
1987 | gen_rtx_SUBREG (V2DImode, reg, 0), |
1988 | vec_cst))); |
1989 | } |
1990 | rtx_insn *seq = get_insns (); |
1991 | end_sequence (); |
1992 | |
1993 | emit_insn_before (seq, insn); |
1994 | |
1995 | XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0); |
1996 | } |
1997 | else if (!MEM_P (dst) || !REG_P (src)) |
1998 | replace_with_subreg_in_insn (insn, reg, reg); |
1999 | |
2000 | bitmap_clear_bit (conv, INSN_UID (insn)); |
2001 | } |
2002 | } |
2003 | /* Skip debug insns and uninitialized uses. */ |
2004 | else if (DF_REF_CHAIN (ref) |
2005 | && NONDEBUG_INSN_P (DF_REF_INSN (ref))) |
2006 | { |
2007 | gcc_assert (scopy); |
2008 | replace_rtx (DF_REF_INSN (ref), reg, scopy); |
2009 | df_insn_rescan (DF_REF_INSN (ref)); |
2010 | } |
2011 | |
2012 | BITMAP_FREE (conv); |
2013 | } |
2014 | |
2015 | /* Convert operand OP in INSN. We should handle |
2016 | memory operands and uninitialized registers. |
2017 | All other register uses are converted during |
2018 | registers conversion. */ |
2019 | |
2020 | void |
2021 | dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) |
2022 | { |
2023 | *op = copy_rtx_if_shared (*op); |
2024 | |
2025 | if (GET_CODE (*op) == NOT) |
2026 | { |
2027 | convert_op (&XEXP (*op, 0), insn); |
2028 | PUT_MODE (*op, V2DImode); |
2029 | } |
2030 | else if (MEM_P (*op)) |
2031 | { |
2032 | rtx tmp = gen_reg_rtx (DImode); |
2033 | |
2034 | emit_insn_before (gen_move_insn (tmp, *op), insn); |
2035 | *op = gen_rtx_SUBREG (V2DImode, tmp, 0); |
2036 | |
2037 | if (dump_file) |
2038 | fprintf (dump_file, " Preloading operand for insn %d into r%d\n" , |
2039 | INSN_UID (insn), REGNO (tmp)); |
2040 | } |
2041 | else if (REG_P (*op)) |
2042 | { |
2043 | /* We may have not converted register usage in case |
2044 | this register has no definition. Otherwise it |
2045 | should be converted in convert_reg. */ |
2046 | df_ref ref; |
2047 | FOR_EACH_INSN_USE (ref, insn) |
2048 | if (DF_REF_REGNO (ref) == REGNO (*op)) |
2049 | { |
2050 | gcc_assert (!DF_REF_CHAIN (ref)); |
2051 | break; |
2052 | } |
2053 | *op = gen_rtx_SUBREG (V2DImode, *op, 0); |
2054 | } |
2055 | else if (CONST_INT_P (*op)) |
2056 | { |
2057 | rtx vec_cst; |
2058 | rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0); |
2059 | |
2060 | /* Prefer all ones vector in case of -1. */ |
2061 | if (constm1_operand (*op, GET_MODE (*op))) |
2062 | vec_cst = CONSTM1_RTX (V2DImode); |
2063 | else |
2064 | vec_cst = gen_rtx_CONST_VECTOR (V2DImode, |
2065 | gen_rtvec (2, *op, const0_rtx)); |
2066 | |
2067 | if (!standard_sse_constant_p (vec_cst, V2DImode)) |
2068 | { |
2069 | start_sequence (); |
2070 | vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst)); |
2071 | rtx_insn *seq = get_insns (); |
2072 | end_sequence (); |
2073 | emit_insn_before (seq, insn); |
2074 | } |
2075 | |
2076 | emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); |
2077 | *op = tmp; |
2078 | } |
2079 | else |
2080 | { |
2081 | gcc_assert (SUBREG_P (*op)); |
2082 | gcc_assert (GET_MODE (*op) == V2DImode); |
2083 | } |
2084 | } |
2085 | |
2086 | /* Convert INSN to vector mode. */ |
2087 | |
2088 | void |
2089 | dimode_scalar_chain::convert_insn (rtx_insn *insn) |
2090 | { |
2091 | rtx def_set = single_set (insn); |
2092 | rtx src = SET_SRC (def_set); |
2093 | rtx dst = SET_DEST (def_set); |
2094 | rtx subreg; |
2095 | |
2096 | if (MEM_P (dst) && !REG_P (src)) |
2097 | { |
2098 | /* There are no scalar integer instructions and therefore |
2099 | temporary register usage is required. */ |
2100 | rtx tmp = gen_reg_rtx (DImode); |
2101 | emit_conversion_insns (gen_move_insn (dst, tmp), insn); |
2102 | dst = gen_rtx_SUBREG (V2DImode, tmp, 0); |
2103 | } |
2104 | |
2105 | switch (GET_CODE (src)) |
2106 | { |
2107 | case ASHIFT: |
2108 | case ASHIFTRT: |
2109 | case LSHIFTRT: |
2110 | convert_op (&XEXP (src, 0), insn); |
2111 | PUT_MODE (src, V2DImode); |
2112 | break; |
2113 | |
2114 | case PLUS: |
2115 | case MINUS: |
2116 | case IOR: |
2117 | case XOR: |
2118 | case AND: |
2119 | convert_op (&XEXP (src, 0), insn); |
2120 | convert_op (&XEXP (src, 1), insn); |
2121 | PUT_MODE (src, V2DImode); |
2122 | break; |
2123 | |
2124 | case NEG: |
2125 | src = XEXP (src, 0); |
2126 | convert_op (&src, insn); |
2127 | subreg = gen_reg_rtx (V2DImode); |
2128 | emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn); |
2129 | src = gen_rtx_MINUS (V2DImode, subreg, src); |
2130 | break; |
2131 | |
2132 | case NOT: |
2133 | src = XEXP (src, 0); |
2134 | convert_op (&src, insn); |
2135 | subreg = gen_reg_rtx (V2DImode); |
2136 | emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); |
2137 | src = gen_rtx_XOR (V2DImode, src, subreg); |
2138 | break; |
2139 | |
2140 | case MEM: |
2141 | if (!REG_P (dst)) |
2142 | convert_op (&src, insn); |
2143 | break; |
2144 | |
2145 | case REG: |
2146 | if (!MEM_P (dst)) |
2147 | convert_op (&src, insn); |
2148 | break; |
2149 | |
2150 | case SUBREG: |
2151 | gcc_assert (GET_MODE (src) == V2DImode); |
2152 | break; |
2153 | |
2154 | case COMPARE: |
2155 | src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); |
2156 | |
2157 | gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) |
2158 | || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); |
2159 | |
2160 | if (REG_P (src)) |
2161 | subreg = gen_rtx_SUBREG (V2DImode, src, 0); |
2162 | else |
2163 | subreg = copy_rtx_if_shared (src); |
2164 | emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), |
2165 | copy_rtx_if_shared (subreg), |
2166 | copy_rtx_if_shared (subreg)), |
2167 | insn); |
2168 | dst = gen_rtx_REG (CCmode, FLAGS_REG); |
2169 | src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), |
2170 | copy_rtx_if_shared (src)), |
2171 | UNSPEC_PTEST); |
2172 | break; |
2173 | |
2174 | case CONST_INT: |
2175 | convert_op (&src, insn); |
2176 | break; |
2177 | |
2178 | default: |
2179 | gcc_unreachable (); |
2180 | } |
2181 | |
2182 | SET_SRC (def_set) = src; |
2183 | SET_DEST (def_set) = dst; |
2184 | |
2185 | /* Drop possible dead definitions. */ |
2186 | PATTERN (insn) = def_set; |
2187 | |
2188 | INSN_CODE (insn) = -1; |
2189 | recog_memoized (insn); |
2190 | df_insn_rescan (insn); |
2191 | } |
2192 | |
2193 | /* Fix uses of converted REG in debug insns. */ |
2194 | |
2195 | void |
2196 | timode_scalar_chain::fix_debug_reg_uses (rtx reg) |
2197 | { |
2198 | if (!flag_var_tracking) |
2199 | return; |
2200 | |
2201 | df_ref ref, next; |
2202 | for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) |
2203 | { |
2204 | rtx_insn *insn = DF_REF_INSN (ref); |
2205 | /* Make sure the next ref is for a different instruction, |
2206 | so that we're not affected by the rescan. */ |
2207 | next = DF_REF_NEXT_REG (ref); |
2208 | while (next && DF_REF_INSN (next) == insn) |
2209 | next = DF_REF_NEXT_REG (next); |
2210 | |
2211 | if (DEBUG_INSN_P (insn)) |
2212 | { |
2213 | /* It may be a debug insn with a TImode variable in |
2214 | register. */ |
2215 | bool changed = false; |
2216 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) |
2217 | { |
2218 | rtx *loc = DF_REF_LOC (ref); |
2219 | if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) |
2220 | { |
2221 | *loc = gen_rtx_SUBREG (TImode, *loc, 0); |
2222 | changed = true; |
2223 | } |
2224 | } |
2225 | if (changed) |
2226 | df_insn_rescan (insn); |
2227 | } |
2228 | } |
2229 | } |
2230 | |
2231 | /* Convert INSN from TImode to V1T1mode. */ |
2232 | |
2233 | void |
2234 | timode_scalar_chain::convert_insn (rtx_insn *insn) |
2235 | { |
2236 | rtx def_set = single_set (insn); |
2237 | rtx src = SET_SRC (def_set); |
2238 | rtx dst = SET_DEST (def_set); |
2239 | |
2240 | switch (GET_CODE (dst)) |
2241 | { |
2242 | case REG: |
2243 | { |
2244 | rtx tmp = find_reg_equal_equiv_note (insn); |
2245 | if (tmp) |
2246 | PUT_MODE (XEXP (tmp, 0), V1TImode); |
2247 | PUT_MODE (dst, V1TImode); |
2248 | fix_debug_reg_uses (dst); |
2249 | } |
2250 | break; |
2251 | case MEM: |
2252 | PUT_MODE (dst, V1TImode); |
2253 | break; |
2254 | |
2255 | default: |
2256 | gcc_unreachable (); |
2257 | } |
2258 | |
2259 | switch (GET_CODE (src)) |
2260 | { |
2261 | case REG: |
2262 | PUT_MODE (src, V1TImode); |
2263 | /* Call fix_debug_reg_uses only if SRC is never defined. */ |
2264 | if (!DF_REG_DEF_CHAIN (REGNO (src))) |
2265 | fix_debug_reg_uses (src); |
2266 | break; |
2267 | |
2268 | case MEM: |
2269 | PUT_MODE (src, V1TImode); |
2270 | break; |
2271 | |
2272 | case CONST_WIDE_INT: |
2273 | if (NONDEBUG_INSN_P (insn)) |
2274 | { |
2275 | /* Since there are no instructions to store 128-bit constant, |
2276 | temporary register usage is required. */ |
2277 | rtx tmp = gen_reg_rtx (V1TImode); |
2278 | start_sequence (); |
2279 | src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); |
2280 | src = validize_mem (force_const_mem (V1TImode, src)); |
2281 | rtx_insn *seq = get_insns (); |
2282 | end_sequence (); |
2283 | if (seq) |
2284 | emit_insn_before (seq, insn); |
2285 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); |
2286 | dst = tmp; |
2287 | } |
2288 | break; |
2289 | |
2290 | case CONST_INT: |
2291 | switch (standard_sse_constant_p (src, TImode)) |
2292 | { |
2293 | case 1: |
2294 | src = CONST0_RTX (GET_MODE (dst)); |
2295 | break; |
2296 | case 2: |
2297 | src = CONSTM1_RTX (GET_MODE (dst)); |
2298 | break; |
2299 | default: |
2300 | gcc_unreachable (); |
2301 | } |
2302 | if (NONDEBUG_INSN_P (insn)) |
2303 | { |
2304 | rtx tmp = gen_reg_rtx (V1TImode); |
2305 | /* Since there are no instructions to store standard SSE |
2306 | constant, temporary register usage is required. */ |
2307 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); |
2308 | dst = tmp; |
2309 | } |
2310 | break; |
2311 | |
2312 | default: |
2313 | gcc_unreachable (); |
2314 | } |
2315 | |
2316 | SET_SRC (def_set) = src; |
2317 | SET_DEST (def_set) = dst; |
2318 | |
2319 | /* Drop possible dead definitions. */ |
2320 | PATTERN (insn) = def_set; |
2321 | |
2322 | INSN_CODE (insn) = -1; |
2323 | recog_memoized (insn); |
2324 | df_insn_rescan (insn); |
2325 | } |
2326 | |
2327 | void |
2328 | dimode_scalar_chain::convert_registers () |
2329 | { |
2330 | bitmap_iterator bi; |
2331 | unsigned id; |
2332 | |
2333 | EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) |
2334 | convert_reg (id); |
2335 | |
2336 | EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) |
2337 | make_vector_copies (id); |
2338 | } |
2339 | |
2340 | /* Convert whole chain creating required register |
2341 | conversions and copies. */ |
2342 | |
2343 | int |
2344 | scalar_chain::convert () |
2345 | { |
2346 | bitmap_iterator bi; |
2347 | unsigned id; |
2348 | int converted_insns = 0; |
2349 | |
2350 | if (!dbg_cnt (stv_conversion)) |
2351 | return 0; |
2352 | |
2353 | if (dump_file) |
2354 | fprintf (dump_file, "Converting chain #%d...\n" , chain_id); |
2355 | |
2356 | convert_registers (); |
2357 | |
2358 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) |
2359 | { |
2360 | convert_insn (DF_INSN_UID_GET (id)->insn); |
2361 | converted_insns++; |
2362 | } |
2363 | |
2364 | return converted_insns; |
2365 | } |
2366 | |
2367 | /* Main STV pass function. Find and convert scalar |
2368 | instructions into vector mode when profitable. */ |
2369 | |
2370 | static unsigned int |
2371 | convert_scalars_to_vector () |
2372 | { |
2373 | basic_block bb; |
2374 | bitmap candidates; |
2375 | int converted_insns = 0; |
2376 | |
2377 | bitmap_obstack_initialize (NULL); |
2378 | candidates = BITMAP_ALLOC (NULL); |
2379 | |
2380 | calculate_dominance_info (CDI_DOMINATORS); |
2381 | df_set_flags (DF_DEFER_INSN_RESCAN); |
2382 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); |
2383 | df_md_add_problem (); |
2384 | df_analyze (); |
2385 | |
2386 | /* Find all instructions we want to convert into vector mode. */ |
2387 | if (dump_file) |
2388 | fprintf (dump_file, "Searching for mode conversion candidates...\n" ); |
2389 | |
2390 | FOR_EACH_BB_FN (bb, cfun) |
2391 | { |
2392 | rtx_insn *insn; |
2393 | FOR_BB_INSNS (bb, insn) |
2394 | if (scalar_to_vector_candidate_p (insn)) |
2395 | { |
2396 | if (dump_file) |
2397 | fprintf (dump_file, " insn %d is marked as a candidate\n" , |
2398 | INSN_UID (insn)); |
2399 | |
2400 | bitmap_set_bit (candidates, INSN_UID (insn)); |
2401 | } |
2402 | } |
2403 | |
2404 | remove_non_convertible_regs (candidates); |
2405 | |
2406 | if (bitmap_empty_p (candidates)) |
2407 | if (dump_file) |
2408 | fprintf (dump_file, "There are no candidates for optimization.\n" ); |
2409 | |
2410 | while (!bitmap_empty_p (candidates)) |
2411 | { |
2412 | unsigned uid = bitmap_first_set_bit (candidates); |
2413 | scalar_chain *chain; |
2414 | |
2415 | if (TARGET_64BIT) |
2416 | chain = new timode_scalar_chain; |
2417 | else |
2418 | chain = new dimode_scalar_chain; |
2419 | |
2420 | /* Find instructions chain we want to convert to vector mode. |
2421 | Check all uses and definitions to estimate all required |
2422 | conversions. */ |
2423 | chain->build (candidates, uid); |
2424 | |
2425 | if (chain->compute_convert_gain () > 0) |
2426 | converted_insns += chain->convert (); |
2427 | else |
2428 | if (dump_file) |
2429 | fprintf (dump_file, "Chain #%d conversion is not profitable\n" , |
2430 | chain->chain_id); |
2431 | |
2432 | delete chain; |
2433 | } |
2434 | |
2435 | if (dump_file) |
2436 | fprintf (dump_file, "Total insns converted: %d\n" , converted_insns); |
2437 | |
2438 | BITMAP_FREE (candidates); |
2439 | bitmap_obstack_release (NULL); |
2440 | df_process_deferred_rescans (); |
2441 | |
2442 | /* Conversion means we may have 128bit register spills/fills |
2443 | which require aligned stack. */ |
2444 | if (converted_insns) |
2445 | { |
2446 | if (crtl->stack_alignment_needed < 128) |
2447 | crtl->stack_alignment_needed = 128; |
2448 | if (crtl->stack_alignment_estimated < 128) |
2449 | crtl->stack_alignment_estimated = 128; |
2450 | /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ |
2451 | if (TARGET_64BIT) |
2452 | for (tree parm = DECL_ARGUMENTS (current_function_decl); |
2453 | parm; parm = DECL_CHAIN (parm)) |
2454 | { |
2455 | if (TYPE_MODE (TREE_TYPE (parm)) != TImode) |
2456 | continue; |
2457 | if (DECL_RTL_SET_P (parm) |
2458 | && GET_MODE (DECL_RTL (parm)) == V1TImode) |
2459 | { |
2460 | rtx r = DECL_RTL (parm); |
2461 | if (REG_P (r)) |
2462 | SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); |
2463 | } |
2464 | if (DECL_INCOMING_RTL (parm) |
2465 | && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) |
2466 | { |
2467 | rtx r = DECL_INCOMING_RTL (parm); |
2468 | if (REG_P (r)) |
2469 | DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); |
2470 | } |
2471 | } |
2472 | } |
2473 | |
2474 | return 0; |
2475 | } |
2476 | |
2477 | namespace { |
2478 | |
2479 | const pass_data pass_data_insert_vzeroupper = |
2480 | { |
2481 | RTL_PASS, /* type */ |
2482 | "vzeroupper" , /* name */ |
2483 | OPTGROUP_NONE, /* optinfo_flags */ |
2484 | TV_MACH_DEP, /* tv_id */ |
2485 | 0, /* properties_required */ |
2486 | 0, /* properties_provided */ |
2487 | 0, /* properties_destroyed */ |
2488 | 0, /* todo_flags_start */ |
2489 | TODO_df_finish, /* todo_flags_finish */ |
2490 | }; |
2491 | |
2492 | class pass_insert_vzeroupper : public rtl_opt_pass |
2493 | { |
2494 | public: |
2495 | pass_insert_vzeroupper(gcc::context *ctxt) |
2496 | : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) |
2497 | {} |
2498 | |
2499 | /* opt_pass methods: */ |
2500 | virtual bool gate (function *) |
2501 | { |
2502 | return TARGET_AVX |
2503 | && TARGET_VZEROUPPER && flag_expensive_optimizations |
2504 | && !optimize_size; |
2505 | } |
2506 | |
2507 | virtual unsigned int execute (function *) |
2508 | { |
2509 | return rest_of_handle_insert_vzeroupper (); |
2510 | } |
2511 | |
2512 | }; // class pass_insert_vzeroupper |
2513 | |
2514 | const pass_data pass_data_stv = |
2515 | { |
2516 | RTL_PASS, /* type */ |
2517 | "stv" , /* name */ |
2518 | OPTGROUP_NONE, /* optinfo_flags */ |
2519 | TV_MACH_DEP, /* tv_id */ |
2520 | 0, /* properties_required */ |
2521 | 0, /* properties_provided */ |
2522 | 0, /* properties_destroyed */ |
2523 | 0, /* todo_flags_start */ |
2524 | TODO_df_finish, /* todo_flags_finish */ |
2525 | }; |
2526 | |
2527 | class pass_stv : public rtl_opt_pass |
2528 | { |
2529 | public: |
2530 | pass_stv (gcc::context *ctxt) |
2531 | : rtl_opt_pass (pass_data_stv, ctxt), |
2532 | timode_p (false) |
2533 | {} |
2534 | |
2535 | /* opt_pass methods: */ |
2536 | virtual bool gate (function *) |
2537 | { |
2538 | return (timode_p == !!TARGET_64BIT |
2539 | && TARGET_STV && TARGET_SSE2 && optimize > 1); |
2540 | } |
2541 | |
2542 | virtual unsigned int execute (function *) |
2543 | { |
2544 | return convert_scalars_to_vector (); |
2545 | } |
2546 | |
2547 | opt_pass *clone () |
2548 | { |
2549 | return new pass_stv (m_ctxt); |
2550 | } |
2551 | |
2552 | void set_pass_param (unsigned int n, bool param) |
2553 | { |
2554 | gcc_assert (n == 0); |
2555 | timode_p = param; |
2556 | } |
2557 | |
2558 | private: |
2559 | bool timode_p; |
2560 | }; // class pass_stv |
2561 | |
2562 | } // anon namespace |
2563 | |
2564 | rtl_opt_pass * |
2565 | make_pass_insert_vzeroupper (gcc::context *ctxt) |
2566 | { |
2567 | return new pass_insert_vzeroupper (ctxt); |
2568 | } |
2569 | |
2570 | rtl_opt_pass * |
2571 | make_pass_stv (gcc::context *ctxt) |
2572 | { |
2573 | return new pass_stv (ctxt); |
2574 | } |
2575 | |
2576 | /* Inserting ENDBRANCH instructions. */ |
2577 | |
2578 | static unsigned int |
2579 | rest_of_insert_endbranch (void) |
2580 | { |
2581 | timevar_push (TV_MACH_DEP); |
2582 | |
2583 | rtx cet_eb; |
2584 | rtx_insn *insn; |
2585 | basic_block bb; |
2586 | |
2587 | /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is |
2588 | absent among function attributes. Later an optimization will be |
2589 | introduced to make analysis if an address of a static function is |
2590 | taken. A static function whose address is not taken will get a |
2591 | nocf_check attribute. This will allow to reduce the number of EB. */ |
2592 | |
2593 | if (!lookup_attribute ("nocf_check" , |
2594 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
2595 | && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) |
2596 | { |
2597 | cet_eb = gen_nop_endbr (); |
2598 | |
2599 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
2600 | insn = BB_HEAD (bb); |
2601 | emit_insn_before (cet_eb, insn); |
2602 | } |
2603 | |
2604 | bb = 0; |
2605 | FOR_EACH_BB_FN (bb, cfun) |
2606 | { |
2607 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); |
2608 | insn = NEXT_INSN (insn)) |
2609 | { |
2610 | if (INSN_P (insn) && GET_CODE (insn) == CALL_INSN) |
2611 | { |
2612 | rtx_insn *next_insn = insn; |
2613 | |
2614 | while ((next_insn != BB_END (bb)) |
2615 | && (DEBUG_INSN_P (NEXT_INSN (next_insn)) |
2616 | || NOTE_P (NEXT_INSN (next_insn)) |
2617 | || BARRIER_P (NEXT_INSN (next_insn)))) |
2618 | next_insn = NEXT_INSN (next_insn); |
2619 | |
2620 | /* Generate ENDBRANCH after CALL, which can return more than |
2621 | twice, setjmp-like functions. */ |
2622 | if (find_reg_note (insn, REG_SETJMP, NULL) != NULL) |
2623 | { |
2624 | cet_eb = gen_nop_endbr (); |
2625 | emit_insn_after (cet_eb, next_insn); |
2626 | } |
2627 | continue; |
2628 | } |
2629 | |
2630 | if (INSN_P (insn) && JUMP_P (insn) && flag_cet_switch) |
2631 | { |
2632 | rtx target = JUMP_LABEL (insn); |
2633 | if (target == NULL_RTX || ANY_RETURN_P (target)) |
2634 | continue; |
2635 | |
2636 | /* Check the jump is a switch table. */ |
2637 | rtx_insn *label = as_a<rtx_insn *> (target); |
2638 | rtx_insn *table = next_insn (label); |
2639 | if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) |
2640 | continue; |
2641 | |
2642 | /* For the indirect jump find out all places it jumps and insert |
2643 | ENDBRANCH there. It should be done under a special flag to |
2644 | control ENDBRANCH generation for switch stmts. */ |
2645 | edge_iterator ei; |
2646 | edge e; |
2647 | basic_block dest_blk; |
2648 | |
2649 | FOR_EACH_EDGE (e, ei, bb->succs) |
2650 | { |
2651 | rtx_insn *insn; |
2652 | |
2653 | dest_blk = e->dest; |
2654 | insn = BB_HEAD (dest_blk); |
2655 | gcc_assert (LABEL_P (insn)); |
2656 | cet_eb = gen_nop_endbr (); |
2657 | emit_insn_after (cet_eb, insn); |
2658 | } |
2659 | continue; |
2660 | } |
2661 | |
2662 | if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn)) |
2663 | || (NOTE_P (insn) |
2664 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) |
2665 | /* TODO. Check /s bit also. */ |
2666 | { |
2667 | cet_eb = gen_nop_endbr (); |
2668 | emit_insn_after (cet_eb, insn); |
2669 | continue; |
2670 | } |
2671 | } |
2672 | } |
2673 | |
2674 | timevar_pop (TV_MACH_DEP); |
2675 | return 0; |
2676 | } |
2677 | |
2678 | namespace { |
2679 | |
2680 | const pass_data pass_data_insert_endbranch = |
2681 | { |
2682 | RTL_PASS, /* type. */ |
2683 | "cet" , /* name. */ |
2684 | OPTGROUP_NONE, /* optinfo_flags. */ |
2685 | TV_MACH_DEP, /* tv_id. */ |
2686 | 0, /* properties_required. */ |
2687 | 0, /* properties_provided. */ |
2688 | 0, /* properties_destroyed. */ |
2689 | 0, /* todo_flags_start. */ |
2690 | 0, /* todo_flags_finish. */ |
2691 | }; |
2692 | |
2693 | class pass_insert_endbranch : public rtl_opt_pass |
2694 | { |
2695 | public: |
2696 | pass_insert_endbranch (gcc::context *ctxt) |
2697 | : rtl_opt_pass (pass_data_insert_endbranch, ctxt) |
2698 | {} |
2699 | |
2700 | /* opt_pass methods: */ |
2701 | virtual bool gate (function *) |
2702 | { |
2703 | return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT); |
2704 | } |
2705 | |
2706 | virtual unsigned int execute (function *) |
2707 | { |
2708 | return rest_of_insert_endbranch (); |
2709 | } |
2710 | |
2711 | }; // class pass_insert_endbranch |
2712 | |
2713 | } // anon namespace |
2714 | |
2715 | rtl_opt_pass * |
2716 | make_pass_insert_endbranch (gcc::context *ctxt) |
2717 | { |
2718 | return new pass_insert_endbranch (ctxt); |
2719 | } |
2720 | |
2721 | /* Return true if a red-zone is in use. */ |
2722 | |
2723 | bool |
2724 | ix86_using_red_zone (void) |
2725 | { |
2726 | return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI; |
2727 | } |
2728 | |
2729 | /* Return a string that documents the current -m options. The caller is |
2730 | responsible for freeing the string. */ |
2731 | |
2732 | static char * |
2733 | ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, |
2734 | int flags, int flags2, |
2735 | const char *arch, const char *tune, |
2736 | enum fpmath_unit fpmath, bool add_nl_p) |
2737 | { |
2738 | struct ix86_target_opts |
2739 | { |
2740 | const char *option; /* option string */ |
2741 | HOST_WIDE_INT mask; /* isa mask options */ |
2742 | }; |
2743 | |
2744 | /* This table is ordered so that options like -msse4.2 that imply other |
2745 | ISAs come first. Target string will be displayed in the same order. */ |
2746 | static struct ix86_target_opts isa2_opts[] = |
2747 | { |
2748 | { "-mmpx" , OPTION_MASK_ISA_MPX }, |
2749 | { "-mavx512vbmi2" , OPTION_MASK_ISA_AVX512VBMI2 }, |
2750 | { "-mavx512vnni" , OPTION_MASK_ISA_AVX512VNNI }, |
2751 | { "-mvaes" , OPTION_MASK_ISA_VAES }, |
2752 | { "-mrdpid" , OPTION_MASK_ISA_RDPID }, |
2753 | { "-msgx" , OPTION_MASK_ISA_SGX }, |
2754 | { "-mavx5124vnniw" , OPTION_MASK_ISA_AVX5124VNNIW }, |
2755 | { "-mavx5124fmaps" , OPTION_MASK_ISA_AVX5124FMAPS }, |
2756 | { "-mavx512vpopcntdq" , OPTION_MASK_ISA_AVX512VPOPCNTDQ }, |
2757 | { "-mibt" , OPTION_MASK_ISA_IBT }, |
2758 | { "-mshstk" , OPTION_MASK_ISA_SHSTK } |
2759 | }; |
2760 | static struct ix86_target_opts isa_opts[] = |
2761 | { |
2762 | { "-mgfni" , OPTION_MASK_ISA_GFNI }, |
2763 | { "-mavx512vbmi" , OPTION_MASK_ISA_AVX512VBMI }, |
2764 | { "-mavx512ifma" , OPTION_MASK_ISA_AVX512IFMA }, |
2765 | { "-mavx512vl" , OPTION_MASK_ISA_AVX512VL }, |
2766 | { "-mavx512bw" , OPTION_MASK_ISA_AVX512BW }, |
2767 | { "-mavx512dq" , OPTION_MASK_ISA_AVX512DQ }, |
2768 | { "-mavx512er" , OPTION_MASK_ISA_AVX512ER }, |
2769 | { "-mavx512pf" , OPTION_MASK_ISA_AVX512PF }, |
2770 | { "-mavx512cd" , OPTION_MASK_ISA_AVX512CD }, |
2771 | { "-mavx512f" , OPTION_MASK_ISA_AVX512F }, |
2772 | { "-mavx2" , OPTION_MASK_ISA_AVX2 }, |
2773 | { "-mfma" , OPTION_MASK_ISA_FMA }, |
2774 | { "-mxop" , OPTION_MASK_ISA_XOP }, |
2775 | { "-mfma4" , OPTION_MASK_ISA_FMA4 }, |
2776 | { "-mf16c" , OPTION_MASK_ISA_F16C }, |
2777 | { "-mavx" , OPTION_MASK_ISA_AVX }, |
2778 | /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */ |
2779 | { "-msse4.2" , OPTION_MASK_ISA_SSE4_2 }, |
2780 | { "-msse4.1" , OPTION_MASK_ISA_SSE4_1 }, |
2781 | { "-msse4a" , OPTION_MASK_ISA_SSE4A }, |
2782 | { "-mssse3" , OPTION_MASK_ISA_SSSE3 }, |
2783 | { "-msse3" , OPTION_MASK_ISA_SSE3 }, |
2784 | { "-maes" , OPTION_MASK_ISA_AES }, |
2785 | { "-msha" , OPTION_MASK_ISA_SHA }, |
2786 | { "-mpclmul" , OPTION_MASK_ISA_PCLMUL }, |
2787 | { "-msse2" , OPTION_MASK_ISA_SSE2 }, |
2788 | { "-msse" , OPTION_MASK_ISA_SSE }, |
2789 | { "-m3dnowa" , OPTION_MASK_ISA_3DNOW_A }, |
2790 | { "-m3dnow" , OPTION_MASK_ISA_3DNOW }, |
2791 | { "-mmmx" , OPTION_MASK_ISA_MMX }, |
2792 | { "-mrtm" , OPTION_MASK_ISA_RTM }, |
2793 | { "-mprfchw" , OPTION_MASK_ISA_PRFCHW }, |
2794 | { "-mrdseed" , OPTION_MASK_ISA_RDSEED }, |
2795 | { "-madx" , OPTION_MASK_ISA_ADX }, |
2796 | { "-mprefetchwt1" , OPTION_MASK_ISA_PREFETCHWT1 }, |
2797 | { "-mclflushopt" , OPTION_MASK_ISA_CLFLUSHOPT }, |
2798 | { "-mxsaves" , OPTION_MASK_ISA_XSAVES }, |
2799 | { "-mxsavec" , OPTION_MASK_ISA_XSAVEC }, |
2800 | { "-mxsaveopt" , OPTION_MASK_ISA_XSAVEOPT }, |
2801 | { "-mxsave" , OPTION_MASK_ISA_XSAVE }, |
2802 | { "-mabm" , OPTION_MASK_ISA_ABM }, |
2803 | { "-mbmi" , OPTION_MASK_ISA_BMI }, |
2804 | { "-mbmi2" , OPTION_MASK_ISA_BMI2 }, |
2805 | { "-mlzcnt" , OPTION_MASK_ISA_LZCNT }, |
2806 | { "-mtbm" , OPTION_MASK_ISA_TBM }, |
2807 | { "-mpopcnt" , OPTION_MASK_ISA_POPCNT }, |
2808 | { "-mcx16" , OPTION_MASK_ISA_CX16 }, |
2809 | { "-msahf" , OPTION_MASK_ISA_SAHF }, |
2810 | { "-mmovbe" , OPTION_MASK_ISA_MOVBE }, |
2811 | { "-mcrc32" , OPTION_MASK_ISA_CRC32 }, |
2812 | { "-mfsgsbase" , OPTION_MASK_ISA_FSGSBASE }, |
2813 | { "-mrdrnd" , OPTION_MASK_ISA_RDRND }, |
2814 | { "-mmwaitx" , OPTION_MASK_ISA_MWAITX }, |
2815 | { "-mclzero" , OPTION_MASK_ISA_CLZERO }, |
2816 | { "-mpku" , OPTION_MASK_ISA_PKU }, |
2817 | { "-mlwp" , OPTION_MASK_ISA_LWP }, |
2818 | { "-mhle" , OPTION_MASK_ISA_HLE }, |
2819 | { "-mfxsr" , OPTION_MASK_ISA_FXSR }, |
2820 | { "-mclwb" , OPTION_MASK_ISA_CLWB } |
2821 | }; |
2822 | |
2823 | /* Flag options. */ |
2824 | static struct ix86_target_opts flag_opts[] = |
2825 | { |
2826 | { "-m128bit-long-double" , MASK_128BIT_LONG_DOUBLE }, |
2827 | { "-mlong-double-128" , MASK_LONG_DOUBLE_128 }, |
2828 | { "-mlong-double-64" , MASK_LONG_DOUBLE_64 }, |
2829 | { "-m80387" , MASK_80387 }, |
2830 | { "-maccumulate-outgoing-args" , MASK_ACCUMULATE_OUTGOING_ARGS }, |
2831 | { "-malign-double" , MASK_ALIGN_DOUBLE }, |
2832 | { "-mcld" , MASK_CLD }, |
2833 | { "-mfp-ret-in-387" , MASK_FLOAT_RETURNS }, |
2834 | { "-mieee-fp" , MASK_IEEE_FP }, |
2835 | { "-minline-all-stringops" , MASK_INLINE_ALL_STRINGOPS }, |
2836 | { "-minline-stringops-dynamically" , MASK_INLINE_STRINGOPS_DYNAMICALLY }, |
2837 | { "-mms-bitfields" , MASK_MS_BITFIELD_LAYOUT }, |
2838 | { "-mno-align-stringops" , MASK_NO_ALIGN_STRINGOPS }, |
2839 | { "-mno-fancy-math-387" , MASK_NO_FANCY_MATH_387 }, |
2840 | { "-mno-push-args" , MASK_NO_PUSH_ARGS }, |
2841 | { "-mno-red-zone" , MASK_NO_RED_ZONE }, |
2842 | { "-momit-leaf-frame-pointer" , MASK_OMIT_LEAF_FRAME_POINTER }, |
2843 | { "-mrecip" , MASK_RECIP }, |
2844 | { "-mrtd" , MASK_RTD }, |
2845 | { "-msseregparm" , MASK_SSEREGPARM }, |
2846 | { "-mstack-arg-probe" , MASK_STACK_PROBE }, |
2847 | { "-mtls-direct-seg-refs" , MASK_TLS_DIRECT_SEG_REFS }, |
2848 | { "-mvect8-ret-in-mem" , MASK_VECT8_RETURNS }, |
2849 | { "-m8bit-idiv" , MASK_USE_8BIT_IDIV }, |
2850 | { "-mvzeroupper" , MASK_VZEROUPPER }, |
2851 | { "-mstv" , MASK_STV }, |
2852 | { "-mavx256-split-unaligned-load" , MASK_AVX256_SPLIT_UNALIGNED_LOAD }, |
2853 | { "-mavx256-split-unaligned-store" , MASK_AVX256_SPLIT_UNALIGNED_STORE }, |
2854 | { "-mcall-ms2sysv-xlogues" , MASK_CALL_MS2SYSV_XLOGUES } |
2855 | }; |
2856 | |
2857 | /* Additional flag options. */ |
2858 | static struct ix86_target_opts flag2_opts[] = |
2859 | { |
2860 | { "-mgeneral-regs-only" , OPTION_MASK_GENERAL_REGS_ONLY } |
2861 | }; |
2862 | |
2863 | const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts) |
2864 | + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2]; |
2865 | |
2866 | char isa_other[40]; |
2867 | char isa2_other[40]; |
2868 | char flags_other[40]; |
2869 | char flags2_other[40]; |
2870 | unsigned num = 0; |
2871 | unsigned i, j; |
2872 | char *ret; |
2873 | char *ptr; |
2874 | size_t len; |
2875 | size_t line_len; |
2876 | size_t sep_len; |
2877 | const char *abi; |
2878 | |
2879 | memset (opts, '\0', sizeof (opts)); |
2880 | |
2881 | /* Add -march= option. */ |
2882 | if (arch) |
2883 | { |
2884 | opts[num][0] = "-march=" ; |
2885 | opts[num++][1] = arch; |
2886 | } |
2887 | |
2888 | /* Add -mtune= option. */ |
2889 | if (tune) |
2890 | { |
2891 | opts[num][0] = "-mtune=" ; |
2892 | opts[num++][1] = tune; |
2893 | } |
2894 | |
2895 | /* Add -m32/-m64/-mx32. */ |
2896 | if ((isa & OPTION_MASK_ISA_64BIT) != 0) |
2897 | { |
2898 | if ((isa & OPTION_MASK_ABI_64) != 0) |
2899 | abi = "-m64" ; |
2900 | else |
2901 | abi = "-mx32" ; |
2902 | isa &= ~ (OPTION_MASK_ISA_64BIT |
2903 | | OPTION_MASK_ABI_64 |
2904 | | OPTION_MASK_ABI_X32); |
2905 | } |
2906 | else |
2907 | abi = "-m32" ; |
2908 | opts[num++][0] = abi; |
2909 | |
2910 | /* Pick out the options in isa2 options. */ |
2911 | for (i = 0; i < ARRAY_SIZE (isa2_opts); i++) |
2912 | { |
2913 | if ((isa2 & isa2_opts[i].mask) != 0) |
2914 | { |
2915 | opts[num++][0] = isa2_opts[i].option; |
2916 | isa2 &= ~ isa2_opts[i].mask; |
2917 | } |
2918 | } |
2919 | |
2920 | if (isa2 && add_nl_p) |
2921 | { |
2922 | opts[num++][0] = isa2_other; |
2923 | sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)" , isa2); |
2924 | } |
2925 | |
2926 | /* Pick out the options in isa options. */ |
2927 | for (i = 0; i < ARRAY_SIZE (isa_opts); i++) |
2928 | { |
2929 | if ((isa & isa_opts[i].mask) != 0) |
2930 | { |
2931 | opts[num++][0] = isa_opts[i].option; |
2932 | isa &= ~ isa_opts[i].mask; |
2933 | } |
2934 | } |
2935 | |
2936 | if (isa && add_nl_p) |
2937 | { |
2938 | opts[num++][0] = isa_other; |
2939 | sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)" , isa); |
2940 | } |
2941 | |
2942 | /* Add flag options. */ |
2943 | for (i = 0; i < ARRAY_SIZE (flag_opts); i++) |
2944 | { |
2945 | if ((flags & flag_opts[i].mask) != 0) |
2946 | { |
2947 | opts[num++][0] = flag_opts[i].option; |
2948 | flags &= ~ flag_opts[i].mask; |
2949 | } |
2950 | } |
2951 | |
2952 | if (flags && add_nl_p) |
2953 | { |
2954 | opts[num++][0] = flags_other; |
2955 | sprintf (flags_other, "(other flags: %#x)" , flags); |
2956 | } |
2957 | |
2958 | /* Add additional flag options. */ |
2959 | for (i = 0; i < ARRAY_SIZE (flag2_opts); i++) |
2960 | { |
2961 | if ((flags2 & flag2_opts[i].mask) != 0) |
2962 | { |
2963 | opts[num++][0] = flag2_opts[i].option; |
2964 | flags2 &= ~ flag2_opts[i].mask; |
2965 | } |
2966 | } |
2967 | |
2968 | if (flags2 && add_nl_p) |
2969 | { |
2970 | opts[num++][0] = flags2_other; |
2971 | sprintf (flags2_other, "(other flags2: %#x)" , flags2); |
2972 | } |
2973 | |
2974 | /* Add -fpmath= option. */ |
2975 | if (fpmath) |
2976 | { |
2977 | opts[num][0] = "-mfpmath=" ; |
2978 | switch ((int) fpmath) |
2979 | { |
2980 | case FPMATH_387: |
2981 | opts[num++][1] = "387" ; |
2982 | break; |
2983 | |
2984 | case FPMATH_SSE: |
2985 | opts[num++][1] = "sse" ; |
2986 | break; |
2987 | |
2988 | case FPMATH_387 | FPMATH_SSE: |
2989 | opts[num++][1] = "sse+387" ; |
2990 | break; |
2991 | |
2992 | default: |
2993 | gcc_unreachable (); |
2994 | } |
2995 | } |
2996 | |
2997 | /* Any options? */ |
2998 | if (num == 0) |
2999 | return NULL; |
3000 | |
3001 | gcc_assert (num < ARRAY_SIZE (opts)); |
3002 | |
3003 | /* Size the string. */ |
3004 | len = 0; |
3005 | sep_len = (add_nl_p) ? 3 : 1; |
3006 | for (i = 0; i < num; i++) |
3007 | { |
3008 | len += sep_len; |
3009 | for (j = 0; j < 2; j++) |
3010 | if (opts[i][j]) |
3011 | len += strlen (opts[i][j]); |
3012 | } |
3013 | |
3014 | /* Build the string. */ |
3015 | ret = ptr = (char *) xmalloc (len); |
3016 | line_len = 0; |
3017 | |
3018 | for (i = 0; i < num; i++) |
3019 | { |
3020 | size_t len2[2]; |
3021 | |
3022 | for (j = 0; j < 2; j++) |
3023 | len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; |
3024 | |
3025 | if (i != 0) |
3026 | { |
3027 | *ptr++ = ' '; |
3028 | line_len++; |
3029 | |
3030 | if (add_nl_p && line_len + len2[0] + len2[1] > 70) |
3031 | { |
3032 | *ptr++ = '\\'; |
3033 | *ptr++ = '\n'; |
3034 | line_len = 0; |
3035 | } |
3036 | } |
3037 | |
3038 | for (j = 0; j < 2; j++) |
3039 | if (opts[i][j]) |
3040 | { |
3041 | memcpy (ptr, opts[i][j], len2[j]); |
3042 | ptr += len2[j]; |
3043 | line_len += len2[j]; |
3044 | } |
3045 | } |
3046 | |
3047 | *ptr = '\0'; |
3048 | gcc_assert (ret + len >= ptr); |
3049 | |
3050 | return ret; |
3051 | } |
3052 | |
3053 | /* Return true, if profiling code should be emitted before |
3054 | prologue. Otherwise it returns false. |
3055 | Note: For x86 with "hotfix" it is sorried. */ |
3056 | static bool |
3057 | ix86_profile_before_prologue (void) |
3058 | { |
3059 | return flag_fentry != 0; |
3060 | } |
3061 | |
3062 | /* Function that is callable from the debugger to print the current |
3063 | options. */ |
3064 | void ATTRIBUTE_UNUSED |
3065 | ix86_debug_options (void) |
3066 | { |
3067 | char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, |
3068 | target_flags, ix86_target_flags, |
3069 | ix86_arch_string,ix86_tune_string, |
3070 | ix86_fpmath, true); |
3071 | |
3072 | if (opts) |
3073 | { |
3074 | fprintf (stderr, "%s\n\n" , opts); |
3075 | free (opts); |
3076 | } |
3077 | else |
3078 | fputs ("<no options>\n\n" , stderr); |
3079 | |
3080 | return; |
3081 | } |
3082 | |
3083 | /* Return true if T is one of the bytes we should avoid with |
3084 | -fmitigate-rop. */ |
3085 | |
3086 | static bool |
3087 | ix86_rop_should_change_byte_p (int t) |
3088 | { |
3089 | return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb; |
3090 | } |
3091 | |
3092 | static const char *stringop_alg_names[] = { |
3093 | #define DEF_ENUM |
3094 | #define DEF_ALG(alg, name) #name, |
3095 | #include "stringop.def" |
3096 | #undef DEF_ENUM |
3097 | #undef DEF_ALG |
3098 | }; |
3099 | |
3100 | /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. |
3101 | The string is of the following form (or comma separated list of it): |
3102 | |
3103 | strategy_alg:max_size:[align|noalign] |
3104 | |
3105 | where the full size range for the strategy is either [0, max_size] or |
3106 | [min_size, max_size], in which min_size is the max_size + 1 of the |
3107 | preceding range. The last size range must have max_size == -1. |
3108 | |
3109 | Examples: |
3110 | |
3111 | 1. |
3112 | -mmemcpy-strategy=libcall:-1:noalign |
3113 | |
3114 | this is equivalent to (for known size memcpy) -mstringop-strategy=libcall |
3115 | |
3116 | |
3117 | 2. |
3118 | -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign |
3119 | |
3120 | This is to tell the compiler to use the following strategy for memset |
3121 | 1) when the expected size is between [1, 16], use rep_8byte strategy; |
3122 | 2) when the size is between [17, 2048], use vector_loop; |
3123 | 3) when the size is > 2048, use libcall. */ |
3124 | |
3125 | struct stringop_size_range |
3126 | { |
3127 | int max; |
3128 | stringop_alg alg; |
3129 | bool noalign; |
3130 | }; |
3131 | |
3132 | static void |
3133 | ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) |
3134 | { |
3135 | const struct stringop_algs *default_algs; |
3136 | stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; |
3137 | char *curr_range_str, *next_range_str; |
3138 | const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=" ; |
3139 | int i = 0, n = 0; |
3140 | |
3141 | if (is_memset) |
3142 | default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; |
3143 | else |
3144 | default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; |
3145 | |
3146 | curr_range_str = strategy_str; |
3147 | |
3148 | do |
3149 | { |
3150 | int maxs; |
3151 | char alg_name[128]; |
3152 | char align[16]; |
3153 | next_range_str = strchr (curr_range_str, ','); |
3154 | if (next_range_str) |
3155 | *next_range_str++ = '\0'; |
3156 | |
3157 | if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s" , |
3158 | alg_name, &maxs, align)) |
3159 | { |
3160 | error ("wrong argument %qs to option %qs" , curr_range_str, opt); |
3161 | return; |
3162 | } |
3163 | |
3164 | if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) |
3165 | { |
3166 | error ("size ranges of option %qs should be increasing" , opt); |
3167 | return; |
3168 | } |
3169 | |
3170 | for (i = 0; i < last_alg; i++) |
3171 | if (!strcmp (alg_name, stringop_alg_names[i])) |
3172 | break; |
3173 | |
3174 | if (i == last_alg) |
3175 | { |
3176 | error ("wrong strategy name %qs specified for option %qs" , |
3177 | alg_name, opt); |
3178 | |
3179 | auto_vec <const char *> candidates; |
3180 | for (i = 0; i < last_alg; i++) |
3181 | if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT) |
3182 | candidates.safe_push (stringop_alg_names[i]); |
3183 | |
3184 | char *s; |
3185 | const char *hint |
3186 | = candidates_list_and_hint (alg_name, s, candidates); |
3187 | if (hint) |
3188 | inform (input_location, |
3189 | "valid arguments to %qs are: %s; did you mean %qs?" , |
3190 | opt, s, hint); |
3191 | else |
3192 | inform (input_location, "valid arguments to %qs are: %s" , |
3193 | opt, s); |
3194 | XDELETEVEC (s); |
3195 | return; |
3196 | } |
3197 | |
3198 | if ((stringop_alg) i == rep_prefix_8_byte |
3199 | && !TARGET_64BIT) |
3200 | { |
3201 | /* rep; movq isn't available in 32-bit code. */ |
3202 | error ("strategy name %qs specified for option %qs " |
3203 | "not supported for 32-bit code" , alg_name, opt); |
3204 | return; |
3205 | } |
3206 | |
3207 | input_ranges[n].max = maxs; |
3208 | input_ranges[n].alg = (stringop_alg) i; |
3209 | if (!strcmp (align, "align" )) |
3210 | input_ranges[n].noalign = false; |
3211 | else if (!strcmp (align, "noalign" )) |
3212 | input_ranges[n].noalign = true; |
3213 | else |
3214 | { |
3215 | error ("unknown alignment %qs specified for option %qs" , align, opt); |
3216 | return; |
3217 | } |
3218 | n++; |
3219 | curr_range_str = next_range_str; |
3220 | } |
3221 | while (curr_range_str); |
3222 | |
3223 | if (input_ranges[n - 1].max != -1) |
3224 | { |
3225 | error ("the max value for the last size range should be -1" |
3226 | " for option %qs" , opt); |
3227 | return; |
3228 | } |
3229 | |
3230 | if (n > MAX_STRINGOP_ALGS) |
3231 | { |
3232 | error ("too many size ranges specified in option %qs" , opt); |
3233 | return; |
3234 | } |
3235 | |
3236 | /* Now override the default algs array. */ |
3237 | for (i = 0; i < n; i++) |
3238 | { |
3239 | *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max; |
3240 | *const_cast<stringop_alg *>(&default_algs->size[i].alg) |
3241 | = input_ranges[i].alg; |
3242 | *const_cast<int *>(&default_algs->size[i].noalign) |
3243 | = input_ranges[i].noalign; |
3244 | } |
3245 | } |
3246 | |
3247 | |
3248 | /* parse -mtune-ctrl= option. When DUMP is true, |
3249 | print the features that are explicitly set. */ |
3250 | |
3251 | static void |
3252 | parse_mtune_ctrl_str (bool dump) |
3253 | { |
3254 | if (!ix86_tune_ctrl_string) |
3255 | return; |
3256 | |
3257 | char *next_feature_string = NULL; |
3258 | char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); |
3259 | char *orig = curr_feature_string; |
3260 | int i; |
3261 | do |
3262 | { |
3263 | bool clear = false; |
3264 | |
3265 | next_feature_string = strchr (curr_feature_string, ','); |
3266 | if (next_feature_string) |
3267 | *next_feature_string++ = '\0'; |
3268 | if (*curr_feature_string == '^') |
3269 | { |
3270 | curr_feature_string++; |
3271 | clear = true; |
3272 | } |
3273 | for (i = 0; i < X86_TUNE_LAST; i++) |
3274 | { |
3275 | if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) |
3276 | { |
3277 | ix86_tune_features[i] = !clear; |
3278 | if (dump) |
3279 | fprintf (stderr, "Explicitly %s feature %s\n" , |
3280 | clear ? "clear" : "set" , ix86_tune_feature_names[i]); |
3281 | break; |
3282 | } |
3283 | } |
3284 | if (i == X86_TUNE_LAST) |
3285 | error ("unknown parameter to option -mtune-ctrl: %s" , |
3286 | clear ? curr_feature_string - 1 : curr_feature_string); |
3287 | curr_feature_string = next_feature_string; |
3288 | } |
3289 | while (curr_feature_string); |
3290 | free (orig); |
3291 | } |
3292 | |
3293 | /* Helper function to set ix86_tune_features. IX86_TUNE is the |
3294 | processor type. */ |
3295 | |
3296 | static void |
3297 | set_ix86_tune_features (enum processor_type ix86_tune, bool dump) |
3298 | { |
3299 | unsigned int ix86_tune_mask = 1u << ix86_tune; |
3300 | int i; |
3301 | |
3302 | for (i = 0; i < X86_TUNE_LAST; ++i) |
3303 | { |
3304 | if (ix86_tune_no_default) |
3305 | ix86_tune_features[i] = 0; |
3306 | else |
3307 | ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); |
3308 | } |
3309 | |
3310 | if (dump) |
3311 | { |
3312 | fprintf (stderr, "List of x86 specific tuning parameter names:\n" ); |
3313 | for (i = 0; i < X86_TUNE_LAST; i++) |
3314 | fprintf (stderr, "%s : %s\n" , ix86_tune_feature_names[i], |
3315 | ix86_tune_features[i] ? "on" : "off" ); |
3316 | } |
3317 | |
3318 | parse_mtune_ctrl_str (dump); |
3319 | } |
3320 | |
3321 | |
3322 | /* Default align_* from the processor table. */ |
3323 | |
3324 | static void |
3325 | ix86_default_align (struct gcc_options *opts) |
3326 | { |
3327 | if (opts->x_align_loops == 0) |
3328 | { |
3329 | opts->x_align_loops = processor_target_table[ix86_tune].align_loop; |
3330 | align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; |
3331 | } |
3332 | if (opts->x_align_jumps == 0) |
3333 | { |
3334 | opts->x_align_jumps = processor_target_table[ix86_tune].align_jump; |
3335 | align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; |
3336 | } |
3337 | if (opts->x_align_functions == 0) |
3338 | { |
3339 | opts->x_align_functions = processor_target_table[ix86_tune].align_func; |
3340 | } |
3341 | } |
3342 | |
3343 | /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ |
3344 | |
3345 | static void |
3346 | ix86_override_options_after_change (void) |
3347 | { |
3348 | ix86_default_align (&global_options); |
3349 | } |
3350 | |
3351 | /* Override various settings based on options. If MAIN_ARGS_P, the |
3352 | options are from the command line, otherwise they are from |
3353 | attributes. Return true if there's an error related to march |
3354 | option. */ |
3355 | |
3356 | static bool |
3357 | ix86_option_override_internal (bool main_args_p, |
3358 | struct gcc_options *opts, |
3359 | struct gcc_options *opts_set) |
3360 | { |
3361 | int i; |
3362 | unsigned int ix86_arch_mask; |
3363 | const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); |
3364 | |
3365 | #define PTA_3DNOW (HOST_WIDE_INT_1 << 0) |
3366 | #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1) |
3367 | #define PTA_64BIT (HOST_WIDE_INT_1 << 2) |
3368 | #define PTA_ABM (HOST_WIDE_INT_1 << 3) |
3369 | #define PTA_AES (HOST_WIDE_INT_1 << 4) |
3370 | #define PTA_AVX (HOST_WIDE_INT_1 << 5) |
3371 | #define PTA_BMI (HOST_WIDE_INT_1 << 6) |
3372 | #define PTA_CX16 (HOST_WIDE_INT_1 << 7) |
3373 | #define PTA_F16C (HOST_WIDE_INT_1 << 8) |
3374 | #define PTA_FMA (HOST_WIDE_INT_1 << 9) |
3375 | #define PTA_FMA4 (HOST_WIDE_INT_1 << 10) |
3376 | #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11) |
3377 | #define PTA_LWP (HOST_WIDE_INT_1 << 12) |
3378 | #define PTA_LZCNT (HOST_WIDE_INT_1 << 13) |
3379 | #define PTA_MMX (HOST_WIDE_INT_1 << 14) |
3380 | #define PTA_MOVBE (HOST_WIDE_INT_1 << 15) |
3381 | #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16) |
3382 | #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17) |
3383 | #define PTA_POPCNT (HOST_WIDE_INT_1 << 18) |
3384 | #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19) |
3385 | #define PTA_RDRND (HOST_WIDE_INT_1 << 20) |
3386 | #define PTA_SSE (HOST_WIDE_INT_1 << 21) |
3387 | #define PTA_SSE2 (HOST_WIDE_INT_1 << 22) |
3388 | #define PTA_SSE3 (HOST_WIDE_INT_1 << 23) |
3389 | #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24) |
3390 | #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25) |
3391 | #define PTA_SSE4A (HOST_WIDE_INT_1 << 26) |
3392 | #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27) |
3393 | #define PTA_TBM (HOST_WIDE_INT_1 << 28) |
3394 | #define PTA_XOP (HOST_WIDE_INT_1 << 29) |
3395 | #define PTA_AVX2 (HOST_WIDE_INT_1 << 30) |
3396 | #define PTA_BMI2 (HOST_WIDE_INT_1 << 31) |
3397 | #define PTA_RTM (HOST_WIDE_INT_1 << 32) |
3398 | #define PTA_HLE (HOST_WIDE_INT_1 << 33) |
3399 | #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34) |
3400 | #define PTA_RDSEED (HOST_WIDE_INT_1 << 35) |
3401 | #define PTA_ADX (HOST_WIDE_INT_1 << 36) |
3402 | #define PTA_FXSR (HOST_WIDE_INT_1 << 37) |
3403 | #define PTA_XSAVE (HOST_WIDE_INT_1 << 38) |
3404 | #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) |
3405 | #define PTA_AVX512F (HOST_WIDE_INT_1 << 40) |
3406 | #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41) |
3407 | #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42) |
3408 | #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43) |
3409 | #define PTA_MPX (HOST_WIDE_INT_1 << 44) |
3410 | #define PTA_SHA (HOST_WIDE_INT_1 << 45) |
3411 | #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46) |
3412 | #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47) |
3413 | #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48) |
3414 | #define PTA_XSAVES (HOST_WIDE_INT_1 << 49) |
3415 | #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50) |
3416 | #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51) |
3417 | #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52) |
3418 | #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53) |
3419 | #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54) |
3420 | #define PTA_CLWB (HOST_WIDE_INT_1 << 55) |
3421 | #define PTA_MWAITX (HOST_WIDE_INT_1 << 56) |
3422 | #define PTA_CLZERO (HOST_WIDE_INT_1 << 57) |
3423 | #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58) |
3424 | #define PTA_PKU (HOST_WIDE_INT_1 << 59) |
3425 | #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60) |
3426 | #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61) |
3427 | #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62) |
3428 | #define PTA_SGX (HOST_WIDE_INT_1 << 63) |
3429 | |
3430 | #define PTA_CORE2 \ |
3431 | (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ |
3432 | | PTA_CX16 | PTA_FXSR) |
3433 | #define PTA_NEHALEM \ |
3434 | (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT) |
3435 | #define PTA_WESTMERE \ |
3436 | (PTA_NEHALEM | PTA_AES | PTA_PCLMUL) |
3437 | #define PTA_SANDYBRIDGE \ |
3438 | (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT) |
3439 | #define PTA_IVYBRIDGE \ |
3440 | (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C) |
3441 | #define PTA_HASWELL \ |
3442 | (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \ |
3443 | | PTA_FMA | PTA_MOVBE | PTA_HLE) |
3444 | #define PTA_BROADWELL \ |
3445 | (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) |
3446 | #define PTA_SKYLAKE \ |
3447 | (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES) |
3448 | #define PTA_SKYLAKE_AVX512 \ |
3449 | (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \ |
3450 | | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_CLWB) |
3451 | #define PTA_CANNONLAKE \ |
3452 | (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA) |
3453 | #define PTA_KNL \ |
3454 | (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD) |
3455 | #define PTA_BONNELL \ |
3456 | (PTA_CORE2 | PTA_MOVBE) |
3457 | #define PTA_SILVERMONT \ |
3458 | (PTA_WESTMERE | PTA_MOVBE) |
3459 | #define PTA_KNM \ |
3460 | (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ) |
3461 | |
3462 | /* if this reaches 64, need to widen struct pta flags below */ |
3463 | |
3464 | static struct pta |
3465 | { |
3466 | const char *const name; /* processor name or nickname. */ |
3467 | const enum processor_type processor; |
3468 | const enum attr_cpu schedule; |
3469 | const unsigned HOST_WIDE_INT flags; |
3470 | } |
3471 | const processor_alias_table[] = |
3472 | { |
3473 | {"i386" , PROCESSOR_I386, CPU_NONE, 0}, |
3474 | {"i486" , PROCESSOR_I486, CPU_NONE, 0}, |
3475 | {"i586" , PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, |
3476 | {"pentium" , PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, |
3477 | {"lakemont" , PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387}, |
3478 | {"pentium-mmx" , PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, |
3479 | {"winchip-c6" , PROCESSOR_I486, CPU_NONE, PTA_MMX}, |
3480 | {"winchip2" , PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, |
3481 | {"c3" , PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, |
3482 | {"samuel-2" , PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, |
3483 | {"c3-2" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3484 | PTA_MMX | PTA_SSE | PTA_FXSR}, |
3485 | {"nehemiah" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3486 | PTA_MMX | PTA_SSE | PTA_FXSR}, |
3487 | {"c7" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3488 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, |
3489 | {"esther" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3490 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, |
3491 | {"i686" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, |
3492 | {"pentiumpro" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, |
3493 | {"pentium2" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR}, |
3494 | {"pentium3" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3495 | PTA_MMX | PTA_SSE | PTA_FXSR}, |
3496 | {"pentium3m" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3497 | PTA_MMX | PTA_SSE | PTA_FXSR}, |
3498 | {"pentium-m" , PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
3499 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
3500 | {"pentium4" , PROCESSOR_PENTIUM4, CPU_NONE, |
3501 | PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
3502 | {"pentium4m" , PROCESSOR_PENTIUM4, CPU_NONE, |
3503 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
3504 | {"prescott" , PROCESSOR_NOCONA, CPU_NONE, |
3505 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, |
3506 | {"nocona" , PROCESSOR_NOCONA, CPU_NONE, |
3507 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3508 | | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR}, |
3509 | {"core2" , PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2}, |
3510 | {"nehalem" , PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, |
3511 | {"corei7" , PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, |
3512 | {"westmere" , PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE}, |
3513 | {"sandybridge" , PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
3514 | PTA_SANDYBRIDGE}, |
3515 | {"corei7-avx" , PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
3516 | PTA_SANDYBRIDGE}, |
3517 | {"ivybridge" , PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
3518 | PTA_IVYBRIDGE}, |
3519 | {"core-avx-i" , PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
3520 | PTA_IVYBRIDGE}, |
3521 | {"haswell" , PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL}, |
3522 | {"core-avx2" , PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL}, |
3523 | {"broadwell" , PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL}, |
3524 | {"skylake" , PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE}, |
3525 | {"skylake-avx512" , PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, |
3526 | PTA_SKYLAKE_AVX512}, |
3527 | {"cannonlake" , PROCESSOR_HASWELL, CPU_HASWELL, PTA_CANNONLAKE}, |
3528 | {"bonnell" , PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, |
3529 | {"atom" , PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, |
3530 | {"silvermont" , PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, |
3531 | {"slm" , PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, |
3532 | {"knl" , PROCESSOR_KNL, CPU_SLM, PTA_KNL}, |
3533 | {"knm" , PROCESSOR_KNM, CPU_SLM, PTA_KNM}, |
3534 | {"intel" , PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, |
3535 | {"geode" , PROCESSOR_GEODE, CPU_GEODE, |
3536 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, |
3537 | {"k6" , PROCESSOR_K6, CPU_K6, PTA_MMX}, |
3538 | {"k6-2" , PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, |
3539 | {"k6-3" , PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, |
3540 | {"athlon" , PROCESSOR_ATHLON, CPU_ATHLON, |
3541 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, |
3542 | {"athlon-tbird" , PROCESSOR_ATHLON, CPU_ATHLON, |
3543 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, |
3544 | {"athlon-4" , PROCESSOR_ATHLON, CPU_ATHLON, |
3545 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, |
3546 | {"athlon-xp" , PROCESSOR_ATHLON, CPU_ATHLON, |
3547 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, |
3548 | {"athlon-mp" , PROCESSOR_ATHLON, CPU_ATHLON, |
3549 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, |
3550 | {"x86-64" , PROCESSOR_K8, CPU_K8, |
3551 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
3552 | {"eden-x2" , PROCESSOR_K8, CPU_K8, |
3553 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, |
3554 | {"nano" , PROCESSOR_K8, CPU_K8, |
3555 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3556 | | PTA_SSSE3 | PTA_FXSR}, |
3557 | {"nano-1000" , PROCESSOR_K8, CPU_K8, |
3558 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3559 | | PTA_SSSE3 | PTA_FXSR}, |
3560 | {"nano-2000" , PROCESSOR_K8, CPU_K8, |
3561 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3562 | | PTA_SSSE3 | PTA_FXSR}, |
3563 | {"nano-3000" , PROCESSOR_K8, CPU_K8, |
3564 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3565 | | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR}, |
3566 | {"nano-x2" , PROCESSOR_K8, CPU_K8, |
3567 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3568 | | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR}, |
3569 | {"eden-x4" , PROCESSOR_K8, CPU_K8, |
3570 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3571 | | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR}, |
3572 | {"nano-x4" , PROCESSOR_K8, CPU_K8, |
3573 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3574 | | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR}, |
3575 | {"k8" , PROCESSOR_K8, CPU_K8, |
3576 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3577 | | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
3578 | {"k8-sse3" , PROCESSOR_K8, CPU_K8, |
3579 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3580 | | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, |
3581 | {"opteron" , PROCESSOR_K8, CPU_K8, |
3582 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3583 | | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
3584 | {"opteron-sse3" , PROCESSOR_K8, CPU_K8, |
3585 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3586 | | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, |
3587 | {"athlon64" , PROCESSOR_K8, CPU_K8, |
3588 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3589 | | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
3590 | {"athlon64-sse3" , PROCESSOR_K8, CPU_K8, |
3591 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3592 | | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, |
3593 | {"athlon-fx" , PROCESSOR_K8, CPU_K8, |
3594 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
3595 | | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
3596 | {"amdfam10" , PROCESSOR_AMDFAM10, CPU_AMDFAM10, |
3597 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 |
3598 | | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, |
3599 | {"barcelona" , PROCESSOR_AMDFAM10, CPU_AMDFAM10, |
3600 | PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 |
3601 | | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, |
3602 | {"bdver1" , PROCESSOR_BDVER1, CPU_BDVER1, |
3603 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3604 | | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
3605 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
3606 | | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, |
3607 | {"bdver2" , PROCESSOR_BDVER2, CPU_BDVER2, |
3608 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3609 | | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
3610 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
3611 | | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C |
3612 | | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, |
3613 | {"bdver3" , PROCESSOR_BDVER3, CPU_BDVER3, |
3614 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3615 | | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
3616 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
3617 | | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C |
3618 | | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE |
3619 | | PTA_XSAVEOPT | PTA_FSGSBASE}, |
3620 | {"bdver4" , PROCESSOR_BDVER4, CPU_BDVER4, |
3621 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3622 | | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
3623 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 |
3624 | | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 |
3625 | | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR |
3626 | | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND |
3627 | | PTA_MOVBE | PTA_MWAITX}, |
3628 | {"znver1" , PROCESSOR_ZNVER1, CPU_ZNVER1, |
3629 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3630 | | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
3631 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 |
3632 | | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW |
3633 | | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE |
3634 | | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED |
3635 | | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES |
3636 | | PTA_SHA | PTA_LZCNT | PTA_POPCNT}, |
3637 | {"btver1" , PROCESSOR_BTVER1, CPU_GENERIC, |
3638 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3639 | | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW |
3640 | | PTA_FXSR | PTA_XSAVE}, |
3641 | {"btver2" , PROCESSOR_BTVER2, CPU_BTVER2, |
3642 | PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
3643 | | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1 |
3644 | | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX |
3645 | | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW |
3646 | | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT}, |
3647 | |
3648 | {"generic" , PROCESSOR_GENERIC, CPU_GENERIC, |
3649 | PTA_64BIT |
3650 | | PTA_HLE /* flags are only used for -march switch. */ }, |
3651 | }; |
3652 | |
3653 | /* -mrecip options. */ |
3654 | static struct |
3655 | { |
3656 | const char *string; /* option name */ |
3657 | unsigned int mask; /* mask bits to set */ |
3658 | } |
3659 | const recip_options[] = |
3660 | { |
3661 | { "all" , RECIP_MASK_ALL }, |
3662 | { "none" , RECIP_MASK_NONE }, |
3663 | { "div" , RECIP_MASK_DIV }, |
3664 | { "sqrt" , RECIP_MASK_SQRT }, |
3665 | { "vec-div" , RECIP_MASK_VEC_DIV }, |
3666 | { "vec-sqrt" , RECIP_MASK_VEC_SQRT }, |
3667 | }; |
3668 | |
3669 | int const pta_size = ARRAY_SIZE (processor_alias_table); |
3670 | |
3671 | /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if |
3672 | TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ |
3673 | if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3674 | opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); |
3675 | #ifdef TARGET_BI_ARCH |
3676 | else |
3677 | { |
3678 | #if TARGET_BI_ARCH == 1 |
3679 | /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 |
3680 | is on and OPTION_MASK_ABI_X32 is off. We turn off |
3681 | OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by |
3682 | -mx32. */ |
3683 | if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
3684 | opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; |
3685 | #else |
3686 | /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is |
3687 | on and OPTION_MASK_ABI_64 is off. We turn off |
3688 | OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by |
3689 | -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ |
3690 | if (TARGET_LP64_P (opts->x_ix86_isa_flags) |
3691 | || TARGET_16BIT_P (opts->x_ix86_isa_flags)) |
3692 | opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; |
3693 | #endif |
3694 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
3695 | && TARGET_IAMCU_P (opts->x_target_flags)) |
3696 | sorry ("Intel MCU psABI isn%'t supported in %s mode" , |
3697 | TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit" ); |
3698 | } |
3699 | #endif |
3700 | |
3701 | if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
3702 | { |
3703 | /* Always turn on OPTION_MASK_ISA_64BIT and turn off |
3704 | OPTION_MASK_ABI_64 for TARGET_X32. */ |
3705 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; |
3706 | opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; |
3707 | } |
3708 | else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) |
3709 | opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT |
3710 | | OPTION_MASK_ABI_X32 |
3711 | | OPTION_MASK_ABI_64); |
3712 | else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) |
3713 | { |
3714 | /* Always turn on OPTION_MASK_ISA_64BIT and turn off |
3715 | OPTION_MASK_ABI_X32 for TARGET_LP64. */ |
3716 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; |
3717 | opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; |
3718 | } |
3719 | |
3720 | #ifdef SUBTARGET_OVERRIDE_OPTIONS |
3721 | SUBTARGET_OVERRIDE_OPTIONS; |
3722 | #endif |
3723 | |
3724 | #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS |
3725 | SUBSUBTARGET_OVERRIDE_OPTIONS; |
3726 | #endif |
3727 | |
3728 | /* -fPIC is the default for x86_64. */ |
3729 | if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3730 | opts->x_flag_pic = 2; |
3731 | |
3732 | /* Need to check -mtune=generic first. */ |
3733 | if (opts->x_ix86_tune_string) |
3734 | { |
3735 | /* As special support for cross compilers we read -mtune=native |
3736 | as -mtune=generic. With native compilers we won't see the |
3737 | -mtune=native, as it was changed by the driver. */ |
3738 | if (!strcmp (opts->x_ix86_tune_string, "native" )) |
3739 | { |
3740 | opts->x_ix86_tune_string = "generic" ; |
3741 | } |
3742 | else if (!strcmp (opts->x_ix86_tune_string, "x86-64" )) |
3743 | warning (OPT_Wdeprecated, |
3744 | main_args_p |
3745 | ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> " |
3746 | "or %<-mtune=generic%> instead as appropriate" ) |
3747 | : G_("%<target(\"tune=x86-64\")%> is deprecated; use " |
3748 | "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>" |
3749 | " instead as appropriate" )); |
3750 | } |
3751 | else |
3752 | { |
3753 | if (opts->x_ix86_arch_string) |
3754 | opts->x_ix86_tune_string = opts->x_ix86_arch_string; |
3755 | if (!opts->x_ix86_tune_string) |
3756 | { |
3757 | opts->x_ix86_tune_string |
3758 | = processor_target_table[TARGET_CPU_DEFAULT].name; |
3759 | ix86_tune_defaulted = 1; |
3760 | } |
3761 | |
3762 | /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string |
3763 | or defaulted. We need to use a sensible tune option. */ |
3764 | if (!strcmp (opts->x_ix86_tune_string, "x86-64" )) |
3765 | { |
3766 | opts->x_ix86_tune_string = "generic" ; |
3767 | } |
3768 | } |
3769 | |
3770 | if (opts->x_ix86_stringop_alg == rep_prefix_8_byte |
3771 | && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3772 | { |
3773 | /* rep; movq isn't available in 32-bit code. */ |
3774 | error ("-mstringop-strategy=rep_8byte not supported for 32-bit code" ); |
3775 | opts->x_ix86_stringop_alg = no_stringop; |
3776 | } |
3777 | |
3778 | if (!opts->x_ix86_arch_string) |
3779 | opts->x_ix86_arch_string |
3780 | = TARGET_64BIT_P (opts->x_ix86_isa_flags) |
3781 | ? "x86-64" : SUBTARGET32_DEFAULT_CPU; |
3782 | else |
3783 | ix86_arch_specified = 1; |
3784 | |
3785 | if (opts_set->x_ix86_pmode) |
3786 | { |
3787 | if ((TARGET_LP64_P (opts->x_ix86_isa_flags) |
3788 | && opts->x_ix86_pmode == PMODE_SI) |
3789 | || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) |
3790 | && opts->x_ix86_pmode == PMODE_DI)) |
3791 | error ("address mode %qs not supported in the %s bit mode" , |
3792 | TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long" , |
3793 | TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32" ); |
3794 | } |
3795 | else |
3796 | opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) |
3797 | ? PMODE_DI : PMODE_SI; |
3798 | |
3799 | if (!opts_set->x_ix86_abi) |
3800 | opts->x_ix86_abi = DEFAULT_ABI; |
3801 | |
3802 | if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags)) |
3803 | error ("-mabi=ms not supported with X32 ABI" ); |
3804 | gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI); |
3805 | |
3806 | /* For targets using ms ABI enable ms-extensions, if not |
3807 | explicit turned off. For non-ms ABI we turn off this |
3808 | option. */ |
3809 | if (!opts_set->x_flag_ms_extensions) |
3810 | opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); |
3811 | |
3812 | if (opts_set->x_ix86_cmodel) |
3813 | { |
3814 | switch (opts->x_ix86_cmodel) |
3815 | { |
3816 | case CM_SMALL: |
3817 | case CM_SMALL_PIC: |
3818 | if (opts->x_flag_pic) |
3819 | opts->x_ix86_cmodel = CM_SMALL_PIC; |
3820 | if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3821 | error ("code model %qs not supported in the %s bit mode" , |
3822 | "small" , "32" ); |
3823 | break; |
3824 | |
3825 | case CM_MEDIUM: |
3826 | case CM_MEDIUM_PIC: |
3827 | if (opts->x_flag_pic) |
3828 | opts->x_ix86_cmodel = CM_MEDIUM_PIC; |
3829 | if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3830 | error ("code model %qs not supported in the %s bit mode" , |
3831 | "medium" , "32" ); |
3832 | else if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
3833 | error ("code model %qs not supported in x32 mode" , |
3834 | "medium" ); |
3835 | break; |
3836 | |
3837 | case CM_LARGE: |
3838 | case CM_LARGE_PIC: |
3839 | if (opts->x_flag_pic) |
3840 | opts->x_ix86_cmodel = CM_LARGE_PIC; |
3841 | if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3842 | error ("code model %qs not supported in the %s bit mode" , |
3843 | "large" , "32" ); |
3844 | else if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
3845 | error ("code model %qs not supported in x32 mode" , |
3846 | "large" ); |
3847 | break; |
3848 | |
3849 | case CM_32: |
3850 | if (opts->x_flag_pic) |
3851 | error ("code model %s does not support PIC mode" , "32" ); |
3852 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3853 | error ("code model %qs not supported in the %s bit mode" , |
3854 | "32" , "64" ); |
3855 | break; |
3856 | |
3857 | case CM_KERNEL: |
3858 | if (opts->x_flag_pic) |
3859 | { |
3860 | error ("code model %s does not support PIC mode" , "kernel" ); |
3861 | opts->x_ix86_cmodel = CM_32; |
3862 | } |
3863 | if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3864 | error ("code model %qs not supported in the %s bit mode" , |
3865 | "kernel" , "32" ); |
3866 | break; |
3867 | |
3868 | default: |
3869 | gcc_unreachable (); |
3870 | } |
3871 | } |
3872 | else |
3873 | { |
3874 | /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the |
3875 | use of rip-relative addressing. This eliminates fixups that |
3876 | would otherwise be needed if this object is to be placed in a |
3877 | DLL, and is essentially just as efficient as direct addressing. */ |
3878 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
3879 | && (TARGET_RDOS || TARGET_PECOFF)) |
3880 | opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; |
3881 | else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
3882 | opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; |
3883 | else |
3884 | opts->x_ix86_cmodel = CM_32; |
3885 | } |
3886 | if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) |
3887 | { |
3888 | error ("-masm=intel not supported in this configuration" ); |
3889 | opts->x_ix86_asm_dialect = ASM_ATT; |
3890 | } |
3891 | if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) |
3892 | != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) |
3893 | sorry ("%i-bit mode not compiled in" , |
3894 | (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); |
3895 | |
3896 | for (i = 0; i < pta_size; i++) |
3897 | if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) |
3898 | { |
3899 | if (!strcmp (opts->x_ix86_arch_string, "generic" )) |
3900 | { |
3901 | error (main_args_p |
3902 | ? G_("%<generic%> CPU can be used only for %<-mtune=%> " |
3903 | "switch" ) |
3904 | : G_("%<generic%> CPU can be used only for " |
3905 | "%<target(\"tune=\")%> attribute" )); |
3906 | return false; |
3907 | } |
3908 | else if (!strcmp (opts->x_ix86_arch_string, "intel" )) |
3909 | { |
3910 | error (main_args_p |
3911 | ? G_("%<intel%> CPU can be used only for %<-mtune=%> " |
3912 | "switch" ) |
3913 | : G_("%<intel%> CPU can be used only for " |
3914 | "%<target(\"tune=\")%> attribute" )); |
3915 | return false; |
3916 | } |
3917 | |
3918 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
3919 | && !(processor_alias_table[i].flags & PTA_64BIT)) |
3920 | { |
3921 | error ("CPU you selected does not support x86-64 " |
3922 | "instruction set" ); |
3923 | return false; |
3924 | } |
3925 | |
3926 | ix86_schedule = processor_alias_table[i].schedule; |
3927 | ix86_arch = processor_alias_table[i].processor; |
3928 | /* Default cpu tuning to the architecture. */ |
3929 | ix86_tune = ix86_arch; |
3930 | |
3931 | if (processor_alias_table[i].flags & PTA_MMX |
3932 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) |
3933 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; |
3934 | if (processor_alias_table[i].flags & PTA_3DNOW |
3935 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) |
3936 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; |
3937 | if (processor_alias_table[i].flags & PTA_3DNOW_A |
3938 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) |
3939 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; |
3940 | if (processor_alias_table[i].flags & PTA_SSE |
3941 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) |
3942 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; |
3943 | if (processor_alias_table[i].flags & PTA_SSE2 |
3944 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) |
3945 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; |
3946 | if (processor_alias_table[i].flags & PTA_SSE3 |
3947 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) |
3948 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; |
3949 | if (processor_alias_table[i].flags & PTA_SSSE3 |
3950 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) |
3951 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; |
3952 | if (processor_alias_table[i].flags & PTA_SSE4_1 |
3953 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) |
3954 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; |
3955 | if (processor_alias_table[i].flags & PTA_SSE4_2 |
3956 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) |
3957 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; |
3958 | if (processor_alias_table[i].flags & PTA_AVX |
3959 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) |
3960 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; |
3961 | if (processor_alias_table[i].flags & PTA_AVX2 |
3962 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) |
3963 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; |
3964 | if (processor_alias_table[i].flags & PTA_FMA |
3965 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) |
3966 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; |
3967 | if (processor_alias_table[i].flags & PTA_SSE4A |
3968 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) |
3969 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; |
3970 | if (processor_alias_table[i].flags & PTA_FMA4 |
3971 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) |
3972 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; |
3973 | if (processor_alias_table[i].flags & PTA_XOP |
3974 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) |
3975 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; |
3976 | if (processor_alias_table[i].flags & PTA_LWP |
3977 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) |
3978 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; |
3979 | if (processor_alias_table[i].flags & PTA_ABM |
3980 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) |
3981 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; |
3982 | if (processor_alias_table[i].flags & PTA_BMI |
3983 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) |
3984 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; |
3985 | if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM) |
3986 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) |
3987 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; |
3988 | if (processor_alias_table[i].flags & PTA_TBM |
3989 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) |
3990 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; |
3991 | if (processor_alias_table[i].flags & PTA_BMI2 |
3992 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) |
3993 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; |
3994 | if (processor_alias_table[i].flags & PTA_CX16 |
3995 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) |
3996 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; |
3997 | if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) |
3998 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) |
3999 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; |
4000 | if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) |
4001 | && (processor_alias_table[i].flags & PTA_NO_SAHF)) |
4002 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) |
4003 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; |
4004 | if (processor_alias_table[i].flags & PTA_MOVBE |
4005 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE)) |
4006 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE; |
4007 | if (processor_alias_table[i].flags & PTA_AES |
4008 | && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) |
4009 | ix86_isa_flags |= OPTION_MASK_ISA_AES; |
4010 | if (processor_alias_table[i].flags & PTA_SHA |
4011 | && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) |
4012 | ix86_isa_flags |= OPTION_MASK_ISA_SHA; |
4013 | if (processor_alias_table[i].flags & PTA_PCLMUL |
4014 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) |
4015 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; |
4016 | if (processor_alias_table[i].flags & PTA_FSGSBASE |
4017 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) |
4018 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; |
4019 | if (processor_alias_table[i].flags & PTA_RDRND |
4020 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) |
4021 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; |
4022 | if (processor_alias_table[i].flags & PTA_F16C |
4023 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) |
4024 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; |
4025 | if (processor_alias_table[i].flags & PTA_RTM |
4026 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) |
4027 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; |
4028 | if (processor_alias_table[i].flags & PTA_HLE |
4029 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE)) |
4030 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE; |
4031 | if (processor_alias_table[i].flags & PTA_PRFCHW |
4032 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) |
4033 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; |
4034 | if (processor_alias_table[i].flags & PTA_RDSEED |
4035 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) |
4036 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; |
4037 | if (processor_alias_table[i].flags & PTA_ADX |
4038 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) |
4039 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; |
4040 | if (processor_alias_table[i].flags & PTA_FXSR |
4041 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) |
4042 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; |
4043 | if (processor_alias_table[i].flags & PTA_XSAVE |
4044 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) |
4045 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; |
4046 | if (processor_alias_table[i].flags & PTA_XSAVEOPT |
4047 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) |
4048 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; |
4049 | if (processor_alias_table[i].flags & PTA_AVX512F |
4050 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) |
4051 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; |
4052 | if (processor_alias_table[i].flags & PTA_AVX512ER |
4053 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) |
4054 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; |
4055 | if (processor_alias_table[i].flags & PTA_AVX512PF |
4056 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) |
4057 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; |
4058 | if (processor_alias_table[i].flags & PTA_AVX512CD |
4059 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) |
4060 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; |
4061 | if (processor_alias_table[i].flags & PTA_PREFETCHWT1 |
4062 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) |
4063 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; |
4064 | if (processor_alias_table[i].flags & PTA_CLWB |
4065 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) |
4066 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; |
4067 | if (processor_alias_table[i].flags & PTA_CLFLUSHOPT |
4068 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) |
4069 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; |
4070 | if (processor_alias_table[i].flags & PTA_CLZERO |
4071 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO)) |
4072 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO; |
4073 | if (processor_alias_table[i].flags & PTA_XSAVEC |
4074 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) |
4075 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; |
4076 | if (processor_alias_table[i].flags & PTA_XSAVES |
4077 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) |
4078 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; |
4079 | if (processor_alias_table[i].flags & PTA_AVX512DQ |
4080 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) |
4081 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; |
4082 | if (processor_alias_table[i].flags & PTA_AVX512BW |
4083 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) |
4084 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; |
4085 | if (processor_alias_table[i].flags & PTA_AVX512VL |
4086 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) |
4087 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; |
4088 | if (processor_alias_table[i].flags & PTA_MPX |
4089 | && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX)) |
4090 | opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX; |
4091 | if (processor_alias_table[i].flags & PTA_AVX512VBMI |
4092 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) |
4093 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; |
4094 | if (processor_alias_table[i].flags & PTA_AVX512IFMA |
4095 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) |
4096 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; |
4097 | |
4098 | if (processor_alias_table[i].flags & PTA_AVX5124VNNIW |
4099 | && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW)) |
4100 | opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW; |
4101 | if (processor_alias_table[i].flags & PTA_AVX5124FMAPS |
4102 | && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS)) |
4103 | opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS; |
4104 | if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ |
4105 | && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ)) |
4106 | opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ; |
4107 | if (processor_alias_table[i].flags & PTA_SGX |
4108 | && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX)) |
4109 | opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX; |
4110 | |
4111 | if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) |
4112 | x86_prefetch_sse = true; |
4113 | if (processor_alias_table[i].flags & PTA_MWAITX |
4114 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX)) |
4115 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX; |
4116 | if (processor_alias_table[i].flags & PTA_PKU |
4117 | && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) |
4118 | opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; |
4119 | |
4120 | /* Don't enable x87 instructions if only |
4121 | general registers are allowed. */ |
4122 | if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) |
4123 | && !(opts_set->x_target_flags & MASK_80387)) |
4124 | { |
4125 | if (processor_alias_table[i].flags & PTA_NO_80387) |
4126 | opts->x_target_flags &= ~MASK_80387; |
4127 | else |
4128 | opts->x_target_flags |= MASK_80387; |
4129 | } |
4130 | break; |
4131 | } |
4132 | |
4133 | if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX)) |
4134 | error ("Intel MPX does not support x32" ); |
4135 | |
4136 | if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX)) |
4137 | error ("Intel MPX does not support x32" ); |
4138 | |
4139 | if (i == pta_size) |
4140 | { |
4141 | error (main_args_p |
4142 | ? G_("bad value (%qs) for %<-march=%> switch" ) |
4143 | : G_("bad value (%qs) for %<target(\"arch=\")%> attribute" ), |
4144 | opts->x_ix86_arch_string); |
4145 | |
4146 | auto_vec <const char *> candidates; |
4147 | for (i = 0; i < pta_size; i++) |
4148 | if (strcmp (processor_alias_table[i].name, "generic" ) |
4149 | && strcmp (processor_alias_table[i].name, "intel" ) |
4150 | && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) |
4151 | || (processor_alias_table[i].flags & PTA_64BIT))) |
4152 | candidates.safe_push (processor_alias_table[i].name); |
4153 | |
4154 | char *s; |
4155 | const char *hint |
4156 | = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates); |
4157 | if (hint) |
4158 | inform (input_location, |
4159 | main_args_p |
4160 | ? G_("valid arguments to %<-march=%> switch are: " |
4161 | "%s; did you mean %qs?" ) |
4162 | : G_("valid arguments to %<target(\"arch=\")%> attribute are: " |
4163 | "%s; did you mean %qs?" ), s, hint); |
4164 | else |
4165 | inform (input_location, |
4166 | main_args_p |
4167 | ? G_("valid arguments to %<-march=%> switch are: %s" ) |
4168 | : G_("valid arguments to %<target(\"arch=\")%> attribute " |
4169 | "are: %s" ), s); |
4170 | XDELETEVEC (s); |
4171 | } |
4172 | |
4173 | ix86_arch_mask = 1u << ix86_arch; |
4174 | for (i = 0; i < X86_ARCH_LAST; ++i) |
4175 | ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); |
4176 | |
4177 | for (i = 0; i < pta_size; i++) |
4178 | if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) |
4179 | { |
4180 | ix86_schedule = processor_alias_table[i].schedule; |
4181 | ix86_tune = processor_alias_table[i].processor; |
4182 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
4183 | { |
4184 | if (!(processor_alias_table[i].flags & PTA_64BIT)) |
4185 | { |
4186 | if (ix86_tune_defaulted) |
4187 | { |
4188 | opts->x_ix86_tune_string = "x86-64" ; |
4189 | for (i = 0; i < pta_size; i++) |
4190 | if (! strcmp (opts->x_ix86_tune_string, |
4191 | processor_alias_table[i].name)) |
4192 | break; |
4193 | ix86_schedule = processor_alias_table[i].schedule; |
4194 | ix86_tune = processor_alias_table[i].processor; |
4195 | } |
4196 | else |
4197 | error ("CPU you selected does not support x86-64 " |
4198 | "instruction set" ); |
4199 | } |
4200 | } |
4201 | /* Intel CPUs have always interpreted SSE prefetch instructions as |
4202 | NOPs; so, we can enable SSE prefetch instructions even when |
4203 | -mtune (rather than -march) points us to a processor that has them. |
4204 | However, the VIA C3 gives a SIGILL, so we only do that for i686 and |
4205 | higher processors. */ |
4206 | if (TARGET_CMOV |
4207 | && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) |
4208 | x86_prefetch_sse = true; |
4209 | break; |
4210 | } |
4211 | |
4212 | if (ix86_tune_specified && i == pta_size) |
4213 | { |
4214 | error (main_args_p |
4215 | ? G_("bad value (%qs) for %<-mtune=%> switch" ) |
4216 | : G_("bad value (%qs) for %<target(\"tune=\")%> attribute" ), |
4217 | opts->x_ix86_tune_string); |
4218 | |
4219 | auto_vec <const char *> candidates; |
4220 | for (i = 0; i < pta_size; i++) |
4221 | if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) |
4222 | || (processor_alias_table[i].flags & PTA_64BIT)) |
4223 | candidates.safe_push (processor_alias_table[i].name); |
4224 | |
4225 | char *s; |
4226 | const char *hint |
4227 | = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates); |
4228 | if (hint) |
4229 | inform (input_location, |
4230 | main_args_p |
4231 | ? G_("valid arguments to %<-mtune=%> switch are: " |
4232 | "%s; did you mean %qs?" ) |
4233 | : G_("valid arguments to %<target(\"tune=\")%> attribute are: " |
4234 | "%s; did you mean %qs?" ), s, hint); |
4235 | else |
4236 | inform (input_location, |
4237 | main_args_p |
4238 | ? G_("valid arguments to %<-mtune=%> switch are: %s" ) |
4239 | : G_("valid arguments to %<target(\"tune=\")%> attribute " |
4240 | "are: %s" ), s); |
4241 | XDELETEVEC (s); |
4242 | } |
4243 | |
4244 | set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); |
4245 | |
4246 | #ifndef USE_IX86_FRAME_POINTER |
4247 | #define USE_IX86_FRAME_POINTER 0 |
4248 | #endif |
4249 | |
4250 | #ifndef USE_X86_64_FRAME_POINTER |
4251 | #define USE_X86_64_FRAME_POINTER 0 |
4252 | #endif |
4253 | |
4254 | /* Set the default values for switches whose default depends on TARGET_64BIT |
4255 | in case they weren't overwritten by command line options. */ |
4256 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
4257 | { |
4258 | if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) |
4259 | opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; |
4260 | if (opts->x_flag_asynchronous_unwind_tables |
4261 | && !opts_set->x_flag_unwind_tables |
4262 | && TARGET_64BIT_MS_ABI) |
4263 | opts->x_flag_unwind_tables = 1; |
4264 | if (opts->x_flag_asynchronous_unwind_tables == 2) |
4265 | opts->x_flag_unwind_tables |
4266 | = opts->x_flag_asynchronous_unwind_tables = 1; |
4267 | if (opts->x_flag_pcc_struct_return == 2) |
4268 | opts->x_flag_pcc_struct_return = 0; |
4269 | } |
4270 | else |
4271 | { |
4272 | if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) |
4273 | opts->x_flag_omit_frame_pointer |
4274 | = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); |
4275 | if (opts->x_flag_asynchronous_unwind_tables == 2) |
4276 | opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; |
4277 | if (opts->x_flag_pcc_struct_return == 2) |
4278 | { |
4279 | /* Intel MCU psABI specifies that -freg-struct-return should |
4280 | be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, |
4281 | we check -miamcu so that -freg-struct-return is always |
4282 | turned on if -miamcu is used. */ |
4283 | if (TARGET_IAMCU_P (opts->x_target_flags)) |
4284 | opts->x_flag_pcc_struct_return = 0; |
4285 | else |
4286 | opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; |
4287 | } |
4288 | } |
4289 | |
4290 | ix86_tune_cost = processor_target_table[ix86_tune].cost; |
4291 | /* TODO: ix86_cost should be chosen at instruction or function granuality |
4292 | so for cold code we use size_cost even in !optimize_size compilation. */ |
4293 | if (opts->x_optimize_size) |
4294 | ix86_cost = &ix86_size_cost; |
4295 | else |
4296 | ix86_cost = ix86_tune_cost; |
4297 | |
4298 | /* Arrange to set up i386_stack_locals for all functions. */ |
4299 | init_machine_status = ix86_init_machine_status; |
4300 | |
4301 | /* Validate -mregparm= value. */ |
4302 | if (opts_set->x_ix86_regparm) |
4303 | { |
4304 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
4305 | warning (0, "-mregparm is ignored in 64-bit mode" ); |
4306 | else if (TARGET_IAMCU_P (opts->x_target_flags)) |
4307 | warning (0, "-mregparm is ignored for Intel MCU psABI" ); |
4308 | if (opts->x_ix86_regparm > REGPARM_MAX) |
4309 | { |
4310 | error ("-mregparm=%d is not between 0 and %d" , |
4311 | opts->x_ix86_regparm, REGPARM_MAX); |
4312 | opts->x_ix86_regparm = 0; |
4313 | } |
4314 | } |
4315 | if (TARGET_IAMCU_P (opts->x_target_flags) |
4316 | || TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
4317 | opts->x_ix86_regparm = REGPARM_MAX; |
4318 | |
4319 | /* Default align_* from the processor table. */ |
4320 | ix86_default_align (opts); |
4321 | |
4322 | /* Provide default for -mbranch-cost= value. */ |
4323 | if (!opts_set->x_ix86_branch_cost) |
4324 | opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; |
4325 | |
4326 | if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
4327 | { |
4328 | opts->x_target_flags |
4329 | |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; |
4330 | |
4331 | /* Enable by default the SSE and MMX builtins. Do allow the user to |
4332 | explicitly disable any of these. In particular, disabling SSE and |
4333 | MMX for kernel code is extremely useful. */ |
4334 | if (!ix86_arch_specified) |
4335 | opts->x_ix86_isa_flags |
4336 | |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX |
4337 | | TARGET_SUBTARGET64_ISA_DEFAULT) |
4338 | & ~opts->x_ix86_isa_flags_explicit); |
4339 | |
4340 | if (TARGET_RTD_P (opts->x_target_flags)) |
4341 | warning (0, |
4342 | main_args_p |
4343 | ? G_("%<-mrtd%> is ignored in 64bit mode" ) |
4344 | : G_("%<target(\"rtd\")%> is ignored in 64bit mode" )); |
4345 | } |
4346 | else |
4347 | { |
4348 | opts->x_target_flags |
4349 | |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; |
4350 | |
4351 | if (!ix86_arch_specified) |
4352 | opts->x_ix86_isa_flags |
4353 | |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; |
4354 | |
4355 | /* i386 ABI does not specify red zone. It still makes sense to use it |
4356 | when programmer takes care to stack from being destroyed. */ |
4357 | if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) |
4358 | opts->x_target_flags |= MASK_NO_RED_ZONE; |
4359 | } |
4360 | |
4361 | /* Keep nonleaf frame pointers. */ |
4362 | if (opts->x_flag_omit_frame_pointer) |
4363 | opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; |
4364 | else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) |
4365 | opts->x_flag_omit_frame_pointer = 1; |
4366 | |
4367 | /* If we're doing fast math, we don't care about comparison order |
4368 | wrt NaNs. This lets us use a shorter comparison sequence. */ |
4369 | if (opts->x_flag_finite_math_only) |
4370 | opts->x_target_flags &= ~MASK_IEEE_FP; |
4371 | |
4372 | /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, |
4373 | since the insns won't need emulation. */ |
4374 | if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) |
4375 | opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; |
4376 | |
4377 | /* Likewise, if the target doesn't have a 387, or we've specified |
4378 | software floating point, don't use 387 inline intrinsics. */ |
4379 | if (!TARGET_80387_P (opts->x_target_flags)) |
4380 | opts-> |
---|