1 | /* Subroutines for the gcc driver. |
2 | Copyright (C) 2006-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "tm.h" |
24 | |
25 | const char *host_detect_local_cpu (int argc, const char **argv); |
26 | |
27 | #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__)) |
28 | #include "cpuid.h" |
29 | |
30 | struct cache_desc |
31 | { |
32 | unsigned sizekb; |
33 | unsigned assoc; |
34 | unsigned line; |
35 | }; |
36 | |
37 | /* Returns command line parameters that describe size and |
38 | cache line size of the processor caches. */ |
39 | |
40 | static char * |
41 | describe_cache (struct cache_desc level1, struct cache_desc level2) |
42 | { |
43 | char size[100], line[100], size2[100]; |
44 | |
45 | /* At the moment, gcc does not use the information |
46 | about the associativity of the cache. */ |
47 | |
48 | snprintf (size, sizeof (size), |
49 | "--param l1-cache-size=%u " , level1.sizekb); |
50 | snprintf (line, sizeof (line), |
51 | "--param l1-cache-line-size=%u " , level1.line); |
52 | |
53 | snprintf (size2, sizeof (size2), |
54 | "--param l2-cache-size=%u " , level2.sizekb); |
55 | |
56 | return concat (size, line, size2, NULL); |
57 | } |
58 | |
59 | /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ |
60 | |
61 | static void |
62 | detect_l2_cache (struct cache_desc *level2) |
63 | { |
64 | unsigned eax, ebx, ecx, edx; |
65 | unsigned assoc; |
66 | |
67 | __cpuid (0x80000006, eax, ebx, ecx, edx); |
68 | |
69 | level2->sizekb = (ecx >> 16) & 0xffff; |
70 | level2->line = ecx & 0xff; |
71 | |
72 | assoc = (ecx >> 12) & 0xf; |
73 | if (assoc == 6) |
74 | assoc = 8; |
75 | else if (assoc == 8) |
76 | assoc = 16; |
77 | else if (assoc >= 0xa && assoc <= 0xc) |
78 | assoc = 32 + (assoc - 0xa) * 16; |
79 | else if (assoc >= 0xd && assoc <= 0xe) |
80 | assoc = 96 + (assoc - 0xd) * 32; |
81 | |
82 | level2->assoc = assoc; |
83 | } |
84 | |
85 | /* Returns the description of caches for an AMD processor. */ |
86 | |
87 | static const char * |
88 | detect_caches_amd (unsigned max_ext_level) |
89 | { |
90 | unsigned eax, ebx, ecx, edx; |
91 | |
92 | struct cache_desc level1, level2 = {0, 0, 0}; |
93 | |
94 | if (max_ext_level < 0x80000005) |
95 | return "" ; |
96 | |
97 | __cpuid (0x80000005, eax, ebx, ecx, edx); |
98 | |
99 | level1.sizekb = (ecx >> 24) & 0xff; |
100 | level1.assoc = (ecx >> 16) & 0xff; |
101 | level1.line = ecx & 0xff; |
102 | |
103 | if (max_ext_level >= 0x80000006) |
104 | detect_l2_cache (&level2); |
105 | |
106 | return describe_cache (level1, level2); |
107 | } |
108 | |
109 | /* Decodes the size, the associativity and the cache line size of |
110 | L1/L2 caches of an Intel processor. Values are based on |
111 | "Intel Processor Identification and the CPUID Instruction" |
112 | [Application Note 485], revision -032, December 2007. */ |
113 | |
114 | static void |
115 | decode_caches_intel (unsigned reg, bool xeon_mp, |
116 | struct cache_desc *level1, struct cache_desc *level2) |
117 | { |
118 | int i; |
119 | |
120 | for (i = 24; i >= 0; i -= 8) |
121 | switch ((reg >> i) & 0xff) |
122 | { |
123 | case 0x0a: |
124 | level1->sizekb = 8; level1->assoc = 2; level1->line = 32; |
125 | break; |
126 | case 0x0c: |
127 | level1->sizekb = 16; level1->assoc = 4; level1->line = 32; |
128 | break; |
129 | case 0x0d: |
130 | level1->sizekb = 16; level1->assoc = 4; level1->line = 64; |
131 | break; |
132 | case 0x0e: |
133 | level1->sizekb = 24; level1->assoc = 6; level1->line = 64; |
134 | break; |
135 | case 0x21: |
136 | level2->sizekb = 256; level2->assoc = 8; level2->line = 64; |
137 | break; |
138 | case 0x24: |
139 | level2->sizekb = 1024; level2->assoc = 16; level2->line = 64; |
140 | break; |
141 | case 0x2c: |
142 | level1->sizekb = 32; level1->assoc = 8; level1->line = 64; |
143 | break; |
144 | case 0x39: |
145 | level2->sizekb = 128; level2->assoc = 4; level2->line = 64; |
146 | break; |
147 | case 0x3a: |
148 | level2->sizekb = 192; level2->assoc = 6; level2->line = 64; |
149 | break; |
150 | case 0x3b: |
151 | level2->sizekb = 128; level2->assoc = 2; level2->line = 64; |
152 | break; |
153 | case 0x3c: |
154 | level2->sizekb = 256; level2->assoc = 4; level2->line = 64; |
155 | break; |
156 | case 0x3d: |
157 | level2->sizekb = 384; level2->assoc = 6; level2->line = 64; |
158 | break; |
159 | case 0x3e: |
160 | level2->sizekb = 512; level2->assoc = 4; level2->line = 64; |
161 | break; |
162 | case 0x41: |
163 | level2->sizekb = 128; level2->assoc = 4; level2->line = 32; |
164 | break; |
165 | case 0x42: |
166 | level2->sizekb = 256; level2->assoc = 4; level2->line = 32; |
167 | break; |
168 | case 0x43: |
169 | level2->sizekb = 512; level2->assoc = 4; level2->line = 32; |
170 | break; |
171 | case 0x44: |
172 | level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; |
173 | break; |
174 | case 0x45: |
175 | level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; |
176 | break; |
177 | case 0x48: |
178 | level2->sizekb = 3072; level2->assoc = 12; level2->line = 64; |
179 | break; |
180 | case 0x49: |
181 | if (xeon_mp) |
182 | break; |
183 | level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; |
184 | break; |
185 | case 0x4e: |
186 | level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; |
187 | break; |
188 | case 0x60: |
189 | level1->sizekb = 16; level1->assoc = 8; level1->line = 64; |
190 | break; |
191 | case 0x66: |
192 | level1->sizekb = 8; level1->assoc = 4; level1->line = 64; |
193 | break; |
194 | case 0x67: |
195 | level1->sizekb = 16; level1->assoc = 4; level1->line = 64; |
196 | break; |
197 | case 0x68: |
198 | level1->sizekb = 32; level1->assoc = 4; level1->line = 64; |
199 | break; |
200 | case 0x78: |
201 | level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; |
202 | break; |
203 | case 0x79: |
204 | level2->sizekb = 128; level2->assoc = 8; level2->line = 64; |
205 | break; |
206 | case 0x7a: |
207 | level2->sizekb = 256; level2->assoc = 8; level2->line = 64; |
208 | break; |
209 | case 0x7b: |
210 | level2->sizekb = 512; level2->assoc = 8; level2->line = 64; |
211 | break; |
212 | case 0x7c: |
213 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; |
214 | break; |
215 | case 0x7d: |
216 | level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; |
217 | break; |
218 | case 0x7f: |
219 | level2->sizekb = 512; level2->assoc = 2; level2->line = 64; |
220 | break; |
221 | case 0x80: |
222 | level2->sizekb = 512; level2->assoc = 8; level2->line = 64; |
223 | break; |
224 | case 0x82: |
225 | level2->sizekb = 256; level2->assoc = 8; level2->line = 32; |
226 | break; |
227 | case 0x83: |
228 | level2->sizekb = 512; level2->assoc = 8; level2->line = 32; |
229 | break; |
230 | case 0x84: |
231 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; |
232 | break; |
233 | case 0x85: |
234 | level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; |
235 | break; |
236 | case 0x86: |
237 | level2->sizekb = 512; level2->assoc = 4; level2->line = 64; |
238 | break; |
239 | case 0x87: |
240 | level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; |
241 | |
242 | default: |
243 | break; |
244 | } |
245 | } |
246 | |
247 | /* Detect cache parameters using CPUID function 2. */ |
248 | |
249 | static void |
250 | detect_caches_cpuid2 (bool xeon_mp, |
251 | struct cache_desc *level1, struct cache_desc *level2) |
252 | { |
253 | unsigned regs[4]; |
254 | int nreps, i; |
255 | |
256 | __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
257 | |
258 | nreps = regs[0] & 0x0f; |
259 | regs[0] &= ~0x0f; |
260 | |
261 | while (--nreps >= 0) |
262 | { |
263 | for (i = 0; i < 4; i++) |
264 | if (regs[i] && !((regs[i] >> 31) & 1)) |
265 | decode_caches_intel (regs[i], xeon_mp, level1, level2); |
266 | |
267 | if (nreps) |
268 | __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
269 | } |
270 | } |
271 | |
272 | /* Detect cache parameters using CPUID function 4. This |
273 | method doesn't require hardcoded tables. */ |
274 | |
275 | enum cache_type |
276 | { |
277 | CACHE_END = 0, |
278 | CACHE_DATA = 1, |
279 | CACHE_INST = 2, |
280 | CACHE_UNIFIED = 3 |
281 | }; |
282 | |
283 | static void |
284 | detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, |
285 | struct cache_desc *level3) |
286 | { |
287 | struct cache_desc *cache; |
288 | |
289 | unsigned eax, ebx, ecx, edx; |
290 | int count; |
291 | |
292 | for (count = 0;; count++) |
293 | { |
294 | __cpuid_count(4, count, eax, ebx, ecx, edx); |
295 | switch (eax & 0x1f) |
296 | { |
297 | case CACHE_END: |
298 | return; |
299 | case CACHE_DATA: |
300 | case CACHE_UNIFIED: |
301 | { |
302 | switch ((eax >> 5) & 0x07) |
303 | { |
304 | case 1: |
305 | cache = level1; |
306 | break; |
307 | case 2: |
308 | cache = level2; |
309 | break; |
310 | case 3: |
311 | cache = level3; |
312 | break; |
313 | default: |
314 | cache = NULL; |
315 | } |
316 | |
317 | if (cache) |
318 | { |
319 | unsigned sets = ecx + 1; |
320 | unsigned part = ((ebx >> 12) & 0x03ff) + 1; |
321 | |
322 | cache->assoc = ((ebx >> 22) & 0x03ff) + 1; |
323 | cache->line = (ebx & 0x0fff) + 1; |
324 | |
325 | cache->sizekb = (cache->assoc * part |
326 | * cache->line * sets) / 1024; |
327 | } |
328 | } |
329 | default: |
330 | break; |
331 | } |
332 | } |
333 | } |
334 | |
335 | /* Returns the description of caches for an Intel processor. */ |
336 | |
337 | static const char * |
338 | detect_caches_intel (bool xeon_mp, unsigned max_level, |
339 | unsigned max_ext_level, unsigned *l2sizekb) |
340 | { |
341 | struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; |
342 | |
343 | if (max_level >= 4) |
344 | detect_caches_cpuid4 (&level1, &level2, &level3); |
345 | else if (max_level >= 2) |
346 | detect_caches_cpuid2 (xeon_mp, &level1, &level2); |
347 | else |
348 | return "" ; |
349 | |
350 | if (level1.sizekb == 0) |
351 | return "" ; |
352 | |
353 | /* Let the L3 replace the L2. This assumes inclusive caches |
354 | and single threaded program for now. */ |
355 | if (level3.sizekb) |
356 | level2 = level3; |
357 | |
358 | /* Intel CPUs are equipped with AMD style L2 cache info. Try this |
359 | method if other methods fail to provide L2 cache parameters. */ |
360 | if (level2.sizekb == 0 && max_ext_level >= 0x80000006) |
361 | detect_l2_cache (&level2); |
362 | |
363 | *l2sizekb = level2.sizekb; |
364 | |
365 | return describe_cache (level1, level2); |
366 | } |
367 | |
368 | /* This will be called by the spec parser in gcc.c when it sees |
369 | a %:local_cpu_detect(args) construct. Currently it will be called |
370 | with either "arch" or "tune" as argument depending on if -march=native |
371 | or -mtune=native is to be substituted. |
372 | |
373 | It returns a string containing new command line parameters to be |
374 | put at the place of the above two options, depending on what CPU |
375 | this is executed. E.g. "-march=k8" on an AMD64 machine |
376 | for -march=native. |
377 | |
378 | ARGC and ARGV are set depending on the actual arguments given |
379 | in the spec. */ |
380 | |
381 | const char *host_detect_local_cpu (int argc, const char **argv) |
382 | { |
383 | enum processor_type processor = PROCESSOR_I386; |
384 | const char *cpu = "i386" ; |
385 | |
386 | const char *cache = "" ; |
387 | const char *options = "" ; |
388 | |
389 | unsigned int eax, ebx, ecx, edx; |
390 | |
391 | unsigned int max_level, ext_level; |
392 | |
393 | unsigned int vendor; |
394 | unsigned int model, family; |
395 | |
396 | unsigned int has_sse3, has_ssse3, has_cmpxchg16b; |
397 | unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; |
398 | |
399 | /* Extended features */ |
400 | unsigned int has_lahf_lm = 0, has_sse4a = 0; |
401 | unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; |
402 | unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; |
403 | unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; |
404 | unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; |
405 | unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; |
406 | unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; |
407 | unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0; |
408 | unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; |
409 | unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; |
410 | unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; |
411 | unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; |
412 | unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; |
413 | unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; |
414 | unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; |
415 | unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; |
416 | unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0; |
417 | unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0; |
418 | unsigned int has_gfni = 0, has_avx512vbmi2 = 0; |
419 | unsigned int has_ibt = 0, has_shstk = 0; |
420 | unsigned int has_avx512vnni = 0, has_vaes = 0; |
421 | |
422 | bool arch; |
423 | |
424 | unsigned int l2sizekb = 0; |
425 | |
426 | if (argc < 1) |
427 | return NULL; |
428 | |
429 | arch = !strcmp (argv[0], "arch" ); |
430 | |
431 | if (!arch && strcmp (argv[0], "tune" )) |
432 | return NULL; |
433 | |
434 | max_level = __get_cpuid_max (0, &vendor); |
435 | if (max_level < 1) |
436 | goto done; |
437 | |
438 | __cpuid (1, eax, ebx, ecx, edx); |
439 | |
440 | model = (eax >> 4) & 0x0f; |
441 | family = (eax >> 8) & 0x0f; |
442 | if (vendor == signature_INTEL_ebx |
443 | || vendor == signature_AMD_ebx) |
444 | { |
445 | unsigned int extended_model, extended_family; |
446 | |
447 | extended_model = (eax >> 12) & 0xf0; |
448 | extended_family = (eax >> 20) & 0xff; |
449 | if (family == 0x0f) |
450 | { |
451 | family += extended_family; |
452 | model += extended_model; |
453 | } |
454 | else if (family == 0x06) |
455 | model += extended_model; |
456 | } |
457 | |
458 | has_sse3 = ecx & bit_SSE3; |
459 | has_ssse3 = ecx & bit_SSSE3; |
460 | has_sse4_1 = ecx & bit_SSE4_1; |
461 | has_sse4_2 = ecx & bit_SSE4_2; |
462 | has_avx = ecx & bit_AVX; |
463 | has_osxsave = ecx & bit_OSXSAVE; |
464 | has_cmpxchg16b = ecx & bit_CMPXCHG16B; |
465 | has_movbe = ecx & bit_MOVBE; |
466 | has_popcnt = ecx & bit_POPCNT; |
467 | has_aes = ecx & bit_AES; |
468 | has_pclmul = ecx & bit_PCLMUL; |
469 | has_fma = ecx & bit_FMA; |
470 | has_f16c = ecx & bit_F16C; |
471 | has_rdrnd = ecx & bit_RDRND; |
472 | has_xsave = ecx & bit_XSAVE; |
473 | |
474 | has_cmpxchg8b = edx & bit_CMPXCHG8B; |
475 | has_cmov = edx & bit_CMOV; |
476 | has_mmx = edx & bit_MMX; |
477 | has_fxsr = edx & bit_FXSAVE; |
478 | has_sse = edx & bit_SSE; |
479 | has_sse2 = edx & bit_SSE2; |
480 | |
481 | if (max_level >= 7) |
482 | { |
483 | __cpuid_count (7, 0, eax, ebx, ecx, edx); |
484 | |
485 | has_bmi = ebx & bit_BMI; |
486 | has_sgx = ebx & bit_SGX; |
487 | has_hle = ebx & bit_HLE; |
488 | has_rtm = ebx & bit_RTM; |
489 | has_avx2 = ebx & bit_AVX2; |
490 | has_bmi2 = ebx & bit_BMI2; |
491 | has_fsgsbase = ebx & bit_FSGSBASE; |
492 | has_rdseed = ebx & bit_RDSEED; |
493 | has_adx = ebx & bit_ADX; |
494 | has_avx512f = ebx & bit_AVX512F; |
495 | has_avx512er = ebx & bit_AVX512ER; |
496 | has_avx512pf = ebx & bit_AVX512PF; |
497 | has_avx512cd = ebx & bit_AVX512CD; |
498 | has_sha = ebx & bit_SHA; |
499 | has_clflushopt = ebx & bit_CLFLUSHOPT; |
500 | has_clwb = ebx & bit_CLWB; |
501 | has_avx512dq = ebx & bit_AVX512DQ; |
502 | has_avx512bw = ebx & bit_AVX512BW; |
503 | has_avx512vl = ebx & bit_AVX512VL; |
504 | has_avx512ifma = ebx & bit_AVX512IFMA; |
505 | |
506 | has_prefetchwt1 = ecx & bit_PREFETCHWT1; |
507 | has_avx512vbmi = ecx & bit_AVX512VBMI; |
508 | has_pku = ecx & bit_OSPKE; |
509 | has_avx512vbmi2 = ecx & bit_AVX512VBMI2; |
510 | has_avx512vnni = ecx & bit_AVX512VNNI; |
511 | has_rdpid = ecx & bit_RDPID; |
512 | has_gfni = ecx & bit_GFNI; |
513 | has_vaes = ecx & bit_VAES; |
514 | |
515 | has_avx5124vnniw = edx & bit_AVX5124VNNIW; |
516 | has_avx5124fmaps = edx & bit_AVX5124FMAPS; |
517 | |
518 | has_shstk = ecx & bit_SHSTK; |
519 | has_ibt = edx & bit_IBT; |
520 | } |
521 | |
522 | if (max_level >= 13) |
523 | { |
524 | __cpuid_count (13, 1, eax, ebx, ecx, edx); |
525 | |
526 | has_xsaveopt = eax & bit_XSAVEOPT; |
527 | has_xsavec = eax & bit_XSAVEC; |
528 | has_xsaves = eax & bit_XSAVES; |
529 | } |
530 | |
531 | /* Check cpuid level of extended features. */ |
532 | __cpuid (0x80000000, ext_level, ebx, ecx, edx); |
533 | |
534 | if (ext_level >= 0x80000001) |
535 | { |
536 | __cpuid (0x80000001, eax, ebx, ecx, edx); |
537 | |
538 | has_lahf_lm = ecx & bit_LAHF_LM; |
539 | has_sse4a = ecx & bit_SSE4a; |
540 | has_abm = ecx & bit_ABM; |
541 | has_lwp = ecx & bit_LWP; |
542 | has_fma4 = ecx & bit_FMA4; |
543 | has_xop = ecx & bit_XOP; |
544 | has_tbm = ecx & bit_TBM; |
545 | has_lzcnt = ecx & bit_LZCNT; |
546 | has_prfchw = ecx & bit_PRFCHW; |
547 | |
548 | has_longmode = edx & bit_LM; |
549 | has_3dnowp = edx & bit_3DNOWP; |
550 | has_3dnow = edx & bit_3DNOW; |
551 | has_mwaitx = ecx & bit_MWAITX; |
552 | } |
553 | |
554 | if (ext_level >= 0x80000008) |
555 | { |
556 | __cpuid (0x80000008, eax, ebx, ecx, edx); |
557 | has_clzero = ebx & bit_CLZERO; |
558 | } |
559 | |
560 | /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ |
561 | #define XCR_XFEATURE_ENABLED_MASK 0x0 |
562 | #define XSTATE_FP 0x1 |
563 | #define XSTATE_SSE 0x2 |
564 | #define XSTATE_YMM 0x4 |
565 | #define XSTATE_OPMASK 0x20 |
566 | #define XSTATE_ZMM 0x40 |
567 | #define XSTATE_HI_ZMM 0x80 |
568 | |
569 | #define XCR_AVX_ENABLED_MASK \ |
570 | (XSTATE_SSE | XSTATE_YMM) |
571 | #define XCR_AVX512F_ENABLED_MASK \ |
572 | (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) |
573 | |
574 | if (has_osxsave) |
575 | asm (".byte 0x0f; .byte 0x01; .byte 0xd0" |
576 | : "=a" (eax), "=d" (edx) |
577 | : "c" (XCR_XFEATURE_ENABLED_MASK)); |
578 | else |
579 | eax = 0; |
580 | |
581 | /* Check if AVX registers are supported. */ |
582 | if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) |
583 | { |
584 | has_avx = 0; |
585 | has_avx2 = 0; |
586 | has_fma = 0; |
587 | has_fma4 = 0; |
588 | has_f16c = 0; |
589 | has_xop = 0; |
590 | has_xsave = 0; |
591 | has_xsaveopt = 0; |
592 | has_xsaves = 0; |
593 | has_xsavec = 0; |
594 | } |
595 | |
596 | /* Check if AVX512F registers are supported. */ |
597 | if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) |
598 | { |
599 | has_avx512f = 0; |
600 | has_avx512er = 0; |
601 | has_avx512pf = 0; |
602 | has_avx512cd = 0; |
603 | has_avx512dq = 0; |
604 | has_avx512bw = 0; |
605 | has_avx512vl = 0; |
606 | } |
607 | |
608 | if (!arch) |
609 | { |
610 | if (vendor == signature_AMD_ebx |
611 | || vendor == signature_CENTAUR_ebx |
612 | || vendor == signature_CYRIX_ebx |
613 | || vendor == signature_NSC_ebx) |
614 | cache = detect_caches_amd (ext_level); |
615 | else if (vendor == signature_INTEL_ebx) |
616 | { |
617 | bool xeon_mp = (family == 15 && model == 6); |
618 | cache = detect_caches_intel (xeon_mp, max_level, |
619 | ext_level, &l2sizekb); |
620 | } |
621 | } |
622 | |
623 | if (vendor == signature_AMD_ebx) |
624 | { |
625 | unsigned int name; |
626 | |
627 | /* Detect geode processor by its processor signature. */ |
628 | if (ext_level >= 0x80000002) |
629 | __cpuid (0x80000002, name, ebx, ecx, edx); |
630 | else |
631 | name = 0; |
632 | |
633 | if (name == signature_NSC_ebx) |
634 | processor = PROCESSOR_GEODE; |
635 | else if (has_movbe && family == 22) |
636 | processor = PROCESSOR_BTVER2; |
637 | else if (has_clzero) |
638 | processor = PROCESSOR_ZNVER1; |
639 | else if (has_avx2) |
640 | processor = PROCESSOR_BDVER4; |
641 | else if (has_xsaveopt) |
642 | processor = PROCESSOR_BDVER3; |
643 | else if (has_bmi) |
644 | processor = PROCESSOR_BDVER2; |
645 | else if (has_xop) |
646 | processor = PROCESSOR_BDVER1; |
647 | else if (has_sse4a && has_ssse3) |
648 | processor = PROCESSOR_BTVER1; |
649 | else if (has_sse4a) |
650 | processor = PROCESSOR_AMDFAM10; |
651 | else if (has_sse2 || has_longmode) |
652 | processor = PROCESSOR_K8; |
653 | else if (has_3dnowp && family == 6) |
654 | processor = PROCESSOR_ATHLON; |
655 | else if (has_mmx) |
656 | processor = PROCESSOR_K6; |
657 | else |
658 | processor = PROCESSOR_PENTIUM; |
659 | } |
660 | else if (vendor == signature_CENTAUR_ebx) |
661 | { |
662 | processor = PROCESSOR_GENERIC; |
663 | |
664 | switch (family) |
665 | { |
666 | default: |
667 | /* We have no idea. */ |
668 | break; |
669 | |
670 | case 5: |
671 | if (has_3dnow || has_mmx) |
672 | processor = PROCESSOR_I486; |
673 | break; |
674 | |
675 | case 6: |
676 | if (has_longmode) |
677 | processor = PROCESSOR_K8; |
678 | else if (model >= 9) |
679 | processor = PROCESSOR_PENTIUMPRO; |
680 | else if (model >= 6) |
681 | processor = PROCESSOR_I486; |
682 | } |
683 | } |
684 | else |
685 | { |
686 | switch (family) |
687 | { |
688 | case 4: |
689 | processor = PROCESSOR_I486; |
690 | break; |
691 | case 5: |
692 | processor = PROCESSOR_PENTIUM; |
693 | break; |
694 | case 6: |
695 | processor = PROCESSOR_PENTIUMPRO; |
696 | break; |
697 | case 15: |
698 | processor = PROCESSOR_PENTIUM4; |
699 | break; |
700 | default: |
701 | /* We have no idea. */ |
702 | processor = PROCESSOR_GENERIC; |
703 | } |
704 | } |
705 | |
706 | switch (processor) |
707 | { |
708 | case PROCESSOR_I386: |
709 | /* Default. */ |
710 | break; |
711 | case PROCESSOR_I486: |
712 | if (arch && vendor == signature_CENTAUR_ebx) |
713 | { |
714 | if (model >= 6) |
715 | cpu = "c3" ; |
716 | else if (has_3dnow) |
717 | cpu = "winchip2" ; |
718 | else |
719 | /* Assume WinChip C6. */ |
720 | cpu = "winchip-c6" ; |
721 | } |
722 | else |
723 | cpu = "i486" ; |
724 | break; |
725 | case PROCESSOR_PENTIUM: |
726 | if (arch && has_mmx) |
727 | cpu = "pentium-mmx" ; |
728 | else |
729 | cpu = "pentium" ; |
730 | break; |
731 | case PROCESSOR_PENTIUMPRO: |
732 | switch (model) |
733 | { |
734 | case 0x1c: |
735 | case 0x26: |
736 | /* Bonnell. */ |
737 | cpu = "bonnell" ; |
738 | break; |
739 | case 0x37: |
740 | case 0x4a: |
741 | case 0x4d: |
742 | case 0x5a: |
743 | case 0x5d: |
744 | /* Silvermont. */ |
745 | cpu = "silvermont" ; |
746 | break; |
747 | case 0x0f: |
748 | /* Merom. */ |
749 | case 0x17: |
750 | case 0x1d: |
751 | /* Penryn. */ |
752 | cpu = "core2" ; |
753 | break; |
754 | case 0x1a: |
755 | case 0x1e: |
756 | case 0x1f: |
757 | case 0x2e: |
758 | /* Nehalem. */ |
759 | cpu = "nehalem" ; |
760 | break; |
761 | case 0x25: |
762 | case 0x2c: |
763 | case 0x2f: |
764 | /* Westmere. */ |
765 | cpu = "westmere" ; |
766 | break; |
767 | case 0x2a: |
768 | case 0x2d: |
769 | /* Sandy Bridge. */ |
770 | cpu = "sandybridge" ; |
771 | break; |
772 | case 0x3a: |
773 | case 0x3e: |
774 | /* Ivy Bridge. */ |
775 | cpu = "ivybridge" ; |
776 | break; |
777 | case 0x3c: |
778 | case 0x3f: |
779 | case 0x45: |
780 | case 0x46: |
781 | /* Haswell. */ |
782 | cpu = "haswell" ; |
783 | break; |
784 | case 0x3d: |
785 | case 0x47: |
786 | case 0x4f: |
787 | case 0x56: |
788 | /* Broadwell. */ |
789 | cpu = "broadwell" ; |
790 | break; |
791 | case 0x4e: |
792 | case 0x5e: |
793 | /* Skylake. */ |
794 | case 0x8e: |
795 | case 0x9e: |
796 | /* Kaby Lake. */ |
797 | cpu = "skylake" ; |
798 | break; |
799 | case 0x55: |
800 | /* Skylake with AVX-512. */ |
801 | cpu = "skylake-avx512" ; |
802 | break; |
803 | case 0x57: |
804 | /* Knights Landing. */ |
805 | cpu = "knl" ; |
806 | break; |
807 | case 0x66: |
808 | /* Cannon Lake. */ |
809 | cpu = "cannonlake" ; |
810 | break; |
811 | case 0x85: |
812 | /* Knights Mill. */ |
813 | cpu = "knm" ; |
814 | break; |
815 | default: |
816 | if (arch) |
817 | { |
818 | /* This is unknown family 0x6 CPU. */ |
819 | /* Assume Cannon Lake. */ |
820 | if (has_avx512vbmi) |
821 | cpu = "cannonlake" ; |
822 | /* Assume Knights Mill. */ |
823 | else if (has_avx5124vnniw) |
824 | cpu = "knm" ; |
825 | /* Assume Knights Landing. */ |
826 | else if (has_avx512er) |
827 | cpu = "knl" ; |
828 | /* Assume Skylake with AVX-512. */ |
829 | else if (has_avx512f) |
830 | cpu = "skylake-avx512" ; |
831 | /* Assume Skylake. */ |
832 | else if (has_clflushopt) |
833 | cpu = "skylake" ; |
834 | /* Assume Broadwell. */ |
835 | else if (has_adx) |
836 | cpu = "broadwell" ; |
837 | else if (has_avx2) |
838 | /* Assume Haswell. */ |
839 | cpu = "haswell" ; |
840 | else if (has_avx) |
841 | /* Assume Sandy Bridge. */ |
842 | cpu = "sandybridge" ; |
843 | else if (has_sse4_2) |
844 | { |
845 | if (has_movbe) |
846 | /* Assume Silvermont. */ |
847 | cpu = "silvermont" ; |
848 | else |
849 | /* Assume Nehalem. */ |
850 | cpu = "nehalem" ; |
851 | } |
852 | else if (has_ssse3) |
853 | { |
854 | if (has_movbe) |
855 | /* Assume Bonnell. */ |
856 | cpu = "bonnell" ; |
857 | else |
858 | /* Assume Core 2. */ |
859 | cpu = "core2" ; |
860 | } |
861 | else if (has_longmode) |
862 | /* Perhaps some emulator? Assume x86-64, otherwise gcc |
863 | -march=native would be unusable for 64-bit compilations, |
864 | as all the CPUs below are 32-bit only. */ |
865 | cpu = "x86-64" ; |
866 | else if (has_sse3) |
867 | { |
868 | if (vendor == signature_CENTAUR_ebx) |
869 | /* C7 / Eden "Esther" */ |
870 | cpu = "c7" ; |
871 | else |
872 | /* It is Core Duo. */ |
873 | cpu = "pentium-m" ; |
874 | } |
875 | else if (has_sse2) |
876 | /* It is Pentium M. */ |
877 | cpu = "pentium-m" ; |
878 | else if (has_sse) |
879 | { |
880 | if (vendor == signature_CENTAUR_ebx) |
881 | { |
882 | if (model >= 9) |
883 | /* Eden "Nehemiah" */ |
884 | cpu = "nehemiah" ; |
885 | else |
886 | cpu = "c3-2" ; |
887 | } |
888 | else |
889 | /* It is Pentium III. */ |
890 | cpu = "pentium3" ; |
891 | } |
892 | else if (has_mmx) |
893 | /* It is Pentium II. */ |
894 | cpu = "pentium2" ; |
895 | else |
896 | /* Default to Pentium Pro. */ |
897 | cpu = "pentiumpro" ; |
898 | } |
899 | else |
900 | /* For -mtune, we default to -mtune=generic. */ |
901 | cpu = "generic" ; |
902 | break; |
903 | } |
904 | break; |
905 | case PROCESSOR_PENTIUM4: |
906 | if (has_sse3) |
907 | { |
908 | if (has_longmode) |
909 | cpu = "nocona" ; |
910 | else |
911 | cpu = "prescott" ; |
912 | } |
913 | else |
914 | cpu = "pentium4" ; |
915 | break; |
916 | case PROCESSOR_GEODE: |
917 | cpu = "geode" ; |
918 | break; |
919 | case PROCESSOR_K6: |
920 | if (arch && has_3dnow) |
921 | cpu = "k6-3" ; |
922 | else |
923 | cpu = "k6" ; |
924 | break; |
925 | case PROCESSOR_ATHLON: |
926 | if (arch && has_sse) |
927 | cpu = "athlon-4" ; |
928 | else |
929 | cpu = "athlon" ; |
930 | break; |
931 | case PROCESSOR_K8: |
932 | if (arch) |
933 | { |
934 | if (vendor == signature_CENTAUR_ebx) |
935 | { |
936 | if (has_sse4_1) |
937 | /* Nano 3000 | Nano dual / quad core | Eden X4 */ |
938 | cpu = "nano-3000" ; |
939 | else if (has_ssse3) |
940 | /* Nano 1000 | Nano 2000 */ |
941 | cpu = "nano" ; |
942 | else if (has_sse3) |
943 | /* Eden X2 */ |
944 | cpu = "eden-x2" ; |
945 | else |
946 | /* Default to k8 */ |
947 | cpu = "k8" ; |
948 | } |
949 | else if (has_sse3) |
950 | cpu = "k8-sse3" ; |
951 | else |
952 | cpu = "k8" ; |
953 | } |
954 | else |
955 | /* For -mtune, we default to -mtune=k8 */ |
956 | cpu = "k8" ; |
957 | break; |
958 | case PROCESSOR_AMDFAM10: |
959 | cpu = "amdfam10" ; |
960 | break; |
961 | case PROCESSOR_BDVER1: |
962 | cpu = "bdver1" ; |
963 | break; |
964 | case PROCESSOR_BDVER2: |
965 | cpu = "bdver2" ; |
966 | break; |
967 | case PROCESSOR_BDVER3: |
968 | cpu = "bdver3" ; |
969 | break; |
970 | case PROCESSOR_BDVER4: |
971 | cpu = "bdver4" ; |
972 | break; |
973 | case PROCESSOR_ZNVER1: |
974 | cpu = "znver1" ; |
975 | break; |
976 | case PROCESSOR_BTVER1: |
977 | cpu = "btver1" ; |
978 | break; |
979 | case PROCESSOR_BTVER2: |
980 | cpu = "btver2" ; |
981 | break; |
982 | |
983 | default: |
984 | /* Use something reasonable. */ |
985 | if (arch) |
986 | { |
987 | if (has_ssse3) |
988 | cpu = "core2" ; |
989 | else if (has_sse3) |
990 | { |
991 | if (has_longmode) |
992 | cpu = "nocona" ; |
993 | else |
994 | cpu = "prescott" ; |
995 | } |
996 | else if (has_longmode) |
997 | /* Perhaps some emulator? Assume x86-64, otherwise gcc |
998 | -march=native would be unusable for 64-bit compilations, |
999 | as all the CPUs below are 32-bit only. */ |
1000 | cpu = "x86-64" ; |
1001 | else if (has_sse2) |
1002 | cpu = "pentium4" ; |
1003 | else if (has_cmov) |
1004 | cpu = "pentiumpro" ; |
1005 | else if (has_mmx) |
1006 | cpu = "pentium-mmx" ; |
1007 | else if (has_cmpxchg8b) |
1008 | cpu = "pentium" ; |
1009 | } |
1010 | else |
1011 | cpu = "generic" ; |
1012 | } |
1013 | |
1014 | if (arch) |
1015 | { |
1016 | const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx" ; |
1017 | const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow" ; |
1018 | const char *sse = has_sse ? " -msse" : " -mno-sse" ; |
1019 | const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2" ; |
1020 | const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3" ; |
1021 | const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3" ; |
1022 | const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a" ; |
1023 | const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16" ; |
1024 | const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf" ; |
1025 | const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe" ; |
1026 | const char *aes = has_aes ? " -maes" : " -mno-aes" ; |
1027 | const char *sha = has_sha ? " -msha" : " -mno-sha" ; |
1028 | const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul" ; |
1029 | const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt" ; |
1030 | const char *abm = has_abm ? " -mabm" : " -mno-abm" ; |
1031 | const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp" ; |
1032 | const char *fma = has_fma ? " -mfma" : " -mno-fma" ; |
1033 | const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4" ; |
1034 | const char *xop = has_xop ? " -mxop" : " -mno-xop" ; |
1035 | const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi" ; |
1036 | const char *sgx = has_sgx ? " -msgx" : " -mno-sgx" ; |
1037 | const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2" ; |
1038 | const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm" ; |
1039 | const char *avx = has_avx ? " -mavx" : " -mno-avx" ; |
1040 | const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2" ; |
1041 | const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2" ; |
1042 | const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1" ; |
1043 | const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt" ; |
1044 | const char *hle = has_hle ? " -mhle" : " -mno-hle" ; |
1045 | const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm" ; |
1046 | const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd" ; |
1047 | const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c" ; |
1048 | const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase" ; |
1049 | const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed" ; |
1050 | const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw" ; |
1051 | const char *adx = has_adx ? " -madx" : " -mno-adx" ; |
1052 | const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr" ; |
1053 | const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave" ; |
1054 | const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt" ; |
1055 | const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f" ; |
1056 | const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er" ; |
1057 | const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd" ; |
1058 | const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf" ; |
1059 | const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1" ; |
1060 | const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt" ; |
1061 | const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec" ; |
1062 | const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves" ; |
1063 | const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq" ; |
1064 | const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw" ; |
1065 | const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl" ; |
1066 | const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma" ; |
1067 | const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi" ; |
1068 | const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw" ; |
1069 | const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2" ; |
1070 | const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni" ; |
1071 | const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps" ; |
1072 | const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb" ; |
1073 | const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx" ; |
1074 | const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero" ; |
1075 | const char *pku = has_pku ? " -mpku" : " -mno-pku" ; |
1076 | const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid" ; |
1077 | const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni" ; |
1078 | const char *ibt = has_ibt ? " -mibt" : " -mno-ibt" ; |
1079 | const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk" ; |
1080 | const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes" ; |
1081 | options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, |
1082 | sse4a, cx16, sahf, movbe, aes, sha, pclmul, |
1083 | popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, |
1084 | tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, |
1085 | hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, |
1086 | fxsr, xsave, xsaveopt, avx512f, avx512er, |
1087 | avx512cd, avx512pf, prefetchwt1, clflushopt, |
1088 | xsavec, xsaves, avx512dq, avx512bw, avx512vl, |
1089 | avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, |
1090 | clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, |
1091 | avx512vbmi2, avx512vnni, vaes, NULL); |
1092 | } |
1093 | |
1094 | done: |
1095 | return concat (cache, "-m" , argv[0], "=" , cpu, options, NULL); |
1096 | } |
1097 | #else |
1098 | |
1099 | /* If we are compiling with GCC where %EBX register is fixed, then the |
1100 | driver will just ignore -march and -mtune "native" target and will leave |
1101 | to the newly built compiler to generate code for its default target. */ |
1102 | |
1103 | const char *host_detect_local_cpu (int, const char **) |
1104 | { |
1105 | return NULL; |
1106 | } |
1107 | #endif /* __GNUC__ */ |
1108 | |