1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2018 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#include "qsimd_p.h"
42#include "qalgorithms.h"
43#include <QByteArray>
44#include <stdio.h>
45
46#ifdef Q_OS_LINUX
47# include "../testlib/3rdparty/valgrind_p.h"
48#endif
49
50#if defined(Q_OS_WIN)
51# if !defined(Q_CC_GNU)
52# include <intrin.h>
53# endif
54#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32))
55#include "private/qcore_unix_p.h"
56
57// the kernel header definitions for HWCAP_*
58// (the ones we need/may need anyway)
59
60// copied from <asm/hwcap.h> (ARM)
61#define HWCAP_CRUNCH 1024
62#define HWCAP_THUMBEE 2048
63#define HWCAP_NEON 4096
64#define HWCAP_VFPv3 8192
65#define HWCAP_VFPv3D16 16384
66
67// copied from <asm/hwcap.h> (ARM):
68#define HWCAP2_CRC32 (1 << 4)
69
70// copied from <asm/hwcap.h> (Aarch64)
71#define HWCAP_CRC32 (1 << 7)
72
73// copied from <linux/auxvec.h>
74#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
75#define AT_HWCAP2 26 /* extension of AT_HWCAP */
76
77#elif defined(Q_CC_GHS)
78#include <INTEGRITY_types.h>
79#endif
80
81QT_BEGIN_NAMESPACE
82
83/*
84 * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
85 * we remove the terminating -1 that the script adds.
86 */
87
88// begin generated
89#if defined(Q_PROCESSOR_ARM)
90/* Data:
91 neon
92 crc32
93 */
94static const char features_string[] =
95 " neon\0"
96 " crc32\0"
97 "\0";
98static const int features_indices[] = { 0, 6 };
99#elif defined(Q_PROCESSOR_MIPS)
100/* Data:
101 dsp
102 dspr2
103*/
104static const char features_string[] =
105 " dsp\0"
106 " dspr2\0"
107 "\0";
108
109static const int features_indices[] = {
110 0, 5
111};
112#elif defined(Q_PROCESSOR_X86)
113# include "qsimd_x86.cpp" // generated by util/x86simdgen
114#else
115static const char features_string[] = "";
116static const int features_indices[] = { };
117#endif
118// end generated
119
120#if defined (Q_OS_NACL)
121static inline uint detectProcessorFeatures()
122{
123 return 0;
124}
125#elif defined(Q_PROCESSOR_ARM)
126static inline quint64 detectProcessorFeatures()
127{
128 quint64 features = 0;
129
130#if defined(Q_OS_LINUX)
131# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
132 features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit.
133# endif
134 int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
135 if (auxv != -1) {
136 unsigned long vector[64];
137 int nread;
138 while (features == 0) {
139 nread = qt_safe_read(auxv, (char *)vector, sizeof vector);
140 if (nread <= 0) {
141 // EOF or error
142 break;
143 }
144
145 int max = nread / (sizeof vector[0]);
146 for (int i = 0; i < max; i += 2) {
147 if (vector[i] == AT_HWCAP) {
148# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
149 // For Aarch64:
150 if (vector[i+1] & HWCAP_CRC32)
151 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
152# endif
153 // Aarch32, or ARMv7 or before:
154 if (vector[i+1] & HWCAP_NEON)
155 features |= Q_UINT64_C(1) << CpuFeatureNEON;
156 }
157# if defined(Q_PROCESSOR_ARM_32)
158 // For Aarch32:
159 if (vector[i] == AT_HWCAP2) {
160 if (vector[i+1] & HWCAP2_CRC32)
161 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
162 }
163# endif
164 }
165 }
166
167 qt_safe_close(auxv);
168 return features;
169 }
170 // fall back if /proc/self/auxv wasn't found
171#endif
172
173#if defined(__ARM_NEON__)
174 features |= Q_UINT64_C(1) << CpuFeatureNEON;
175#endif
176#if defined(__ARM_FEATURE_CRC32)
177 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
178#endif
179
180 return features;
181}
182
183#elif defined(Q_PROCESSOR_X86)
184
185#ifdef Q_PROCESSOR_X86_32
186# define PICreg "%%ebx"
187#else
188# define PICreg "%%rbx"
189#endif
190
191static int maxBasicCpuidSupported()
192{
193#if defined(Q_CC_EMSCRIPTEN)
194 return 6; // All features supported by Emscripten
195#elif defined(Q_CC_GNU)
196 qregisterint tmp1;
197
198# if Q_PROCESSOR_X86 < 5
199 // check if the CPUID instruction is supported
200 long cpuid_supported;
201 asm ("pushf\n"
202 "pop %0\n"
203 "mov %0, %1\n"
204 "xor $0x00200000, %0\n"
205 "push %0\n"
206 "popf\n"
207 "pushf\n"
208 "pop %0\n"
209 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
210 : "=a" (cpuid_supported), "=r" (tmp1)
211 );
212 if (!cpuid_supported)
213 return 0;
214# endif
215
216 int result;
217 asm ("xchg " PICreg", %1\n"
218 "cpuid\n"
219 "xchg " PICreg", %1\n"
220 : "=&a" (result), "=&r" (tmp1)
221 : "0" (0)
222 : "ecx", "edx");
223 return result;
224#elif defined(Q_OS_WIN)
225 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
226 int info[4];
227 __cpuid(info, 0);
228 return info[0];
229#elif defined(Q_CC_GHS)
230 unsigned int info[4];
231 __CPUID(0, info);
232 return info[0];
233#else
234 return 0;
235#endif
236}
237
238static void cpuidFeatures01(uint &ecx, uint &edx)
239{
240#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
241 qregisterint tmp1;
242 asm ("xchg " PICreg", %2\n"
243 "cpuid\n"
244 "xchg " PICreg", %2\n"
245 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
246 : "a" (1));
247#elif defined(Q_OS_WIN)
248 int info[4];
249 __cpuid(info, 1);
250 ecx = info[2];
251 edx = info[3];
252#elif defined(Q_CC_GHS)
253 unsigned int info[4];
254 __CPUID(1, info);
255 ecx = info[2];
256 edx = info[3];
257#else
258 Q_UNUSED(ecx);
259 Q_UNUSED(edx);
260#endif
261}
262
263#ifdef Q_OS_WIN
264inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
265#endif
266
267static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
268{
269#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
270 qregisteruint rbx; // in case it's 64-bit
271 qregisteruint rcx = 0;
272 qregisteruint rdx = 0;
273 asm ("xchg " PICreg", %0\n"
274 "cpuid\n"
275 "xchg " PICreg", %0\n"
276 : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
277 : "a" (7));
278 ebx = rbx;
279 ecx = rcx;
280 edx = rdx;
281#elif defined(Q_OS_WIN)
282 int info[4];
283 __cpuidex(info, 7, 0);
284 ebx = info[1];
285 ecx = info[2];
286 edx = info[3];
287#elif defined(Q_CC_GHS)
288 unsigned int info[4];
289 __CPUIDEX(7, 0, info);
290 ebx = info[1];
291 ecx = info[2];
292 edx = info[3];
293#else
294 Q_UNUSED(ebx);
295 Q_UNUSED(ecx);
296 Q_UNUSED(edx);
297#endif
298}
299
300#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
301// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
302inline quint64 _xgetbv(__int64) { return 0; }
303#endif
304static void xgetbv(uint in, uint &eax, uint &edx)
305{
306#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
307 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
308 : "=a" (eax), "=d" (edx)
309 : "c" (in));
310#elif defined(Q_OS_WIN)
311 quint64 result = _xgetbv(in);
312 eax = result;
313 edx = result >> 32;
314#else
315 Q_UNUSED(in);
316 Q_UNUSED(eax);
317 Q_UNUSED(edx);
318#endif
319}
320
321static quint64 detectProcessorFeatures()
322{
323 // Flags from the CR0 / XCR0 state register
324 enum XCR0Flags {
325 X87 = 1 << 0,
326 XMM0_15 = 1 << 1,
327 YMM0_15Hi128 = 1 << 2,
328 BNDRegs = 1 << 3,
329 BNDCSR = 1 << 4,
330 OpMask = 1 << 5,
331 ZMM0_15Hi256 = 1 << 6,
332 ZMM16_31 = 1 << 7,
333
334 SSEState = XMM0_15,
335 AVXState = XMM0_15 | YMM0_15Hi128,
336 AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
337 };
338 static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
339 static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
340
341 quint64 features = 0;
342 int cpuidLevel = maxBasicCpuidSupported();
343#if Q_PROCESSOR_X86 < 5
344 if (cpuidLevel < 1)
345 return 0;
346#else
347 Q_ASSERT(cpuidLevel >= 1);
348#endif
349
350 uint results[X86CpuidMaxLeaf] = {};
351 cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
352 if (cpuidLevel >= 7)
353 cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
354
355 // populate our feature list
356 for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
357 uint word = x86_locators[i] / 32;
358 uint bit = 1U << (x86_locators[i] % 32);
359 quint64 feature = Q_UINT64_C(1) << (i + 1);
360 if (results[word] & bit)
361 features |= feature;
362 }
363
364 // now check the AVX state
365 uint xgetbvA = 0, xgetbvD = 0;
366 if (results[Leaf1ECX] & (1u << 27)) {
367 // XGETBV enabled
368 xgetbv(0, xgetbvA, xgetbvD);
369 }
370
371 if ((xgetbvA & AVXState) != AVXState) {
372 // support for YMM registers is disabled, disable all AVX
373 features &= ~AllAVX;
374 } else if ((xgetbvA & AVX512State) != AVX512State) {
375 // support for ZMM registers or mask registers is disabled, disable all AVX512
376 features &= ~AllAVX512;
377 }
378
379 return features;
380}
381
382#elif defined(Q_PROCESSOR_MIPS_32)
383
384#if defined(Q_OS_LINUX)
385//
386// Do not use QByteArray: it could use SIMD instructions itself at
387// some point, thus creating a recursive dependency. Instead, use a
388// QSimpleBuffer, which has the bare minimum needed to use memory
389// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
390//
391struct QSimpleBuffer {
392 static const int chunk_size = 256;
393 char *data;
394 unsigned alloc;
395 unsigned size;
396
397 QSimpleBuffer(): data(0), alloc(0), size(0) {}
398 ~QSimpleBuffer() { ::free(data); }
399
400 void resize(unsigned newsize) {
401 if (newsize > alloc) {
402 unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
403 if (newalloc < newsize) newalloc = newsize;
404 if (newalloc != alloc) {
405 data = static_cast<char*>(::realloc(data, newalloc));
406 alloc = newalloc;
407 }
408 }
409 size = newsize;
410 }
411 void append(const QSimpleBuffer &other, unsigned appendsize) {
412 unsigned oldsize = size;
413 resize(oldsize + appendsize);
414 ::memcpy(data + oldsize, other.data, appendsize);
415 }
416 void popleft(unsigned amount) {
417 if (amount >= size) return resize(0);
418 size -= amount;
419 ::memmove(data, data + amount, size);
420 }
421 char* cString() {
422 if (!alloc) resize(1);
423 return (data[size] = '\0', data);
424 }
425};
426
427//
428// Uses a scratch "buffer" (which must be used for all reads done in the
429// same file descriptor) to read chunks of data from a file, to read
430// one line at a time. Lines include the trailing newline character ('\n').
431// On EOF, line.size is zero.
432//
433static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
434{
435 for (;;) {
436 char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size));
437 if (newline) {
438 unsigned piece_size = newline - buffer.data + 1;
439 line.append(buffer, piece_size);
440 buffer.popleft(piece_size);
441 line.resize(line.size - 1);
442 return;
443 }
444 if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
445 int oldsize = buffer.size;
446 buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
447 buffer.size = oldsize;
448 }
449 ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
450 if (read_bytes > 0) buffer.size += read_bytes;
451 else return;
452 }
453}
454
455//
456// Checks if any line with a given prefix from /proc/cpuinfo contains
457// a certain string, surrounded by spaces.
458//
459static bool procCpuinfoContains(const char *prefix, const char *string)
460{
461 int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
462 if (cpuinfo_fd == -1)
463 return false;
464
465 unsigned string_len = ::strlen(string);
466 unsigned prefix_len = ::strlen(prefix);
467 QSimpleBuffer line, buffer;
468 bool present = false;
469 do {
470 line.resize(0);
471 bufReadLine(cpuinfo_fd, line, buffer);
472 char *colon = static_cast<char*>(::memchr(line.data, ':', line.size));
473 if (colon && line.size > prefix_len + string_len) {
474 if (!::strncmp(prefix, line.data, prefix_len)) {
475 // prefix matches, next character must be ':' or space
476 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
477 // Does it contain the string?
478 char *found = ::strstr(line.cString(), string);
479 if (found && ::isspace(found[-1]) &&
480 (::isspace(found[string_len]) || found[string_len] == '\0')) {
481 present = true;
482 break;
483 }
484 }
485 }
486 }
487 } while (line.size);
488
489 ::qt_safe_close(cpuinfo_fd);
490 return present;
491}
492#endif
493
494static inline quint64 detectProcessorFeatures()
495{
496 // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
497 quint64 flags = 0;
498
499#if defined __mips_dsp
500 flags |= Q_UINT64_C(1) << CpuFeatureDSP;
501# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
502 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
503# elif defined(Q_OS_LINUX)
504 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
505 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
506# endif
507#elif defined(Q_OS_LINUX)
508 if (procCpuinfoContains("ASEs implemented", "dsp")) {
509 flags |= Q_UINT64_C(1) << CpuFeatureDSP;
510 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
511 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
512 }
513#endif
514
515 return flags;
516}
517
518#else
519static inline uint detectProcessorFeatures()
520{
521 return 0;
522}
523#endif
524
525static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
526
527// record what CPU features were enabled by default in this Qt build
528static const quint64 minFeature = qCompilerCpuFeatures;
529
530#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
531Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
532#else
533Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
534#endif
535
536quint64 qDetectCpuFeatures()
537{
538 quint64 f = detectProcessorFeatures();
539 QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
540 if (!disable.isEmpty()) {
541 disable.prepend(' ');
542 for (int i = 0; i < features_count; ++i) {
543 if (disable.contains(features_string + features_indices[i]))
544 f &= ~(Q_UINT64_C(1) << i);
545 }
546 }
547
548#ifdef RUNNING_ON_VALGRIND
549 bool runningOnValgrind = RUNNING_ON_VALGRIND;
550#else
551 bool runningOnValgrind = false;
552#endif
553 if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) {
554 quint64 missing = minFeature & ~f;
555 fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
556 for (int i = 0; i < features_count; ++i) {
557 if (missing & (Q_UINT64_C(1) << i))
558 fprintf(stderr, "%s", features_string + features_indices[i]);
559 }
560 fprintf(stderr, "\n");
561 fflush(stderr);
562 qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
563 features_string + features_indices[qCountTrailingZeroBits(missing)]);
564 }
565
566 qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
567#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
568 qt_cpu_features[1].storeRelaxed(f >> 32);
569#endif
570 return f;
571}
572
573void qDumpCPUFeatures()
574{
575 quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
576 printf("Processor features: ");
577 for (int i = 0; i < features_count; ++i) {
578 if (features & (Q_UINT64_C(1) << i))
579 printf("%s%s", features_string + features_indices[i],
580 minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
581 }
582 if ((features = (qCompilerCpuFeatures & ~features))) {
583 printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
584 for (int i = 0; i < features_count; ++i) {
585 if (features & (Q_UINT64_C(1) << i))
586 printf("%s", features_string + features_indices[i]);
587 }
588 printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
589 }
590 puts("");
591}
592
593QT_END_NAMESPACE
594