1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5// we need ICC to define the prototype for _rdseed64_step
6#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
7#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline
8
9#include "qsimd_p.h"
10#include "qalgorithms.h"
11
12#include <stdio.h>
13#include <string.h>
14
15#if defined(QT_NO_DEBUG) && !defined(NDEBUG)
16# define NDEBUG
17#endif
18#include <assert.h>
19
20#ifdef Q_OS_LINUX
21# include "../testlib/3rdparty/valgrind_p.h"
22#endif
23
24#define QT_FUNCTION_TARGET_BASELINE
25
26#if defined(Q_OS_WIN)
27# if !defined(Q_CC_GNU)
28# include <intrin.h>
29# endif
30# if defined(Q_PROCESSOR_ARM_64)
31# include <qt_windows.h>
32# include <processthreadsapi.h>
33# endif
34#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
35# include "private/qcore_unix_p.h"
36#elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM)
37# include <sys/auxv.h>
38
39// the kernel header definitions for HWCAP_*
40// (the ones we need/may need anyway)
41
42// copied from <asm/hwcap.h> (ARM)
43#define HWCAP_NEON 4096
44
45// copied from <asm/hwcap.h> (ARM):
46#define HWCAP2_AES (1 << 0)
47#define HWCAP2_CRC32 (1 << 4)
48
49// copied from <asm/hwcap.h> (Aarch64)
50#define HWCAP_AES (1 << 3)
51#define HWCAP_CRC32 (1 << 7)
52
53// copied from <linux/auxvec.h>
54#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
55#define AT_HWCAP2 26 /* extension of AT_HWCAP */
56
57#elif defined(Q_CC_GHS)
58# include <INTEGRITY_types.h>
59#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
60# include <sys/sysctl.h>
61#endif
62
63QT_BEGIN_NAMESPACE
64
65template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE
66uint arraysize(T (&)[N])
67{
68 // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE,
69 // otherwise some versions of GCC fail to compile.
70 return N;
71}
72
73#if defined(Q_PROCESSOR_ARM)
74/* Data:
75 neon
76 crc32
77 aes
78 */
79static const char features_string[] =
80 "\0"
81 " neon\0"
82 " crc32\0"
83 " aes\0";
84static const int features_indices[] = { 0, 1, 7, 14 };
85#elif defined(Q_PROCESSOR_MIPS)
86/* Data:
87 dsp
88 dspr2
89*/
90static const char features_string[] =
91 "\0"
92 " dsp\0"
93 " dspr2\0";
94
95static const int features_indices[] = {
96 0, 1, 6
97};
98#elif defined(Q_PROCESSOR_X86)
99# include "qsimd_x86.cpp" // generated by util/x86simdgen
100#else
101static const char features_string[] = "";
102static const int features_indices[] = { 0 };
103#endif
104// end generated
105
106#if defined (Q_OS_NACL)
107static inline uint detectProcessorFeatures()
108{
109 return 0;
110}
111#elif defined(Q_PROCESSOR_ARM)
112static inline quint64 detectProcessorFeatures()
113{
114 quint64 features = 0;
115
116#if QT_CONFIG(getauxval)
117 unsigned long auxvHwCap = getauxval(AT_HWCAP);
118 if (auxvHwCap != 0) {
119# if defined(Q_PROCESSOR_ARM_64)
120 // For Aarch64:
121 features |= CpuFeatureNEON; // NEON is always available
122 if (auxvHwCap & HWCAP_CRC32)
123 features |= CpuFeatureCRC32;
124 if (auxvHwCap & HWCAP_AES)
125 features |= CpuFeatureAES;
126# else
127 // For ARM32:
128 if (auxvHwCap & HWCAP_NEON)
129 features |= CpuFeatureNEON;
130 auxvHwCap = getauxval(AT_HWCAP2);
131 if (auxvHwCap & HWCAP2_CRC32)
132 features |= CpuFeatureCRC32;
133 if (auxvHwCap & HWCAP2_AES)
134 features |= CpuFeatureAES;
135# endif
136 return features;
137 }
138 // fall back to compile-time flags if getauxval failed
139#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
140 unsigned feature;
141 size_t len = sizeof(feature);
142 if (sysctlbyname("hw.optional.neon", &feature, &len, nullptr, 0) == 0)
143 features |= feature ? CpuFeatureNEON : 0;
144 if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0)
145 features |= feature ? CpuFeatureCRC32 : 0;
146 // There is currently no optional value for crypto/AES.
147#if defined(__ARM_FEATURE_CRYPTO)
148 features |= CpuFeatureAES;
149#endif
150 return features;
151#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64)
152 features |= CpuFeatureNEON;
153 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
154 features |= CpuFeatureCRC32;
155 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0)
156 features |= CpuFeatureAES;
157 return features;
158#endif
159#if defined(__ARM_NEON__) || defined(__ARM_NEON)
160 features |= CpuFeatureNEON;
161#endif
162#if defined(__ARM_FEATURE_CRC32)
163 features |= CpuFeatureCRC32;
164#endif
165#if defined(__ARM_FEATURE_CRYPTO)
166 features |= CpuFeatureAES;
167#endif
168
169 return features;
170}
171
172#elif defined(Q_PROCESSOR_X86)
173
174#ifdef Q_PROCESSOR_X86_32
175# define PICreg "%%ebx"
176#else
177# define PICreg "%%rbx"
178#endif
179#ifdef __SSE2_MATH__
180# define X86_BASELINE "no-sse3"
181#else
182# define X86_BASELINE "no-sse"
183#endif
184
185#if defined(Q_CC_GNU)
186// lower the target for functions in this file
187# undef QT_FUNCTION_TARGET_BASELINE
188# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE)))
189# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \
190 X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND
191#endif
192
193static bool checkRdrndWorks() noexcept;
194
195QT_FUNCTION_TARGET_BASELINE
196static int maxBasicCpuidSupported()
197{
198#if defined(Q_CC_EMSCRIPTEN)
199 return 6; // All features supported by Emscripten
200#elif defined(Q_CC_GNU)
201 qregisterint tmp1;
202
203# if Q_PROCESSOR_X86 < 5
204 // check if the CPUID instruction is supported
205 long cpuid_supported;
206 asm ("pushf\n"
207 "pop %0\n"
208 "mov %0, %1\n"
209 "xor $0x00200000, %0\n"
210 "push %0\n"
211 "popf\n"
212 "pushf\n"
213 "pop %0\n"
214 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
215 : "=a" (cpuid_supported), "=r" (tmp1)
216 );
217 if (!cpuid_supported)
218 return 0;
219# endif
220
221 int result;
222 asm ("xchg " PICreg", %1\n"
223 "cpuid\n"
224 "xchg " PICreg", %1\n"
225 : "=&a" (result), "=&r" (tmp1)
226 : "0" (0)
227 : "ecx", "edx");
228 return result;
229#elif defined(Q_OS_WIN)
230 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
231 int info[4];
232 __cpuid(info, 0);
233 return info[0];
234#elif defined(Q_CC_GHS)
235 unsigned int info[4];
236 __CPUID(0, info);
237 return info[0];
238#else
239 return 0;
240#endif
241}
242
243QT_FUNCTION_TARGET_BASELINE
244static void cpuidFeatures01(uint &ecx, uint &edx)
245{
246#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
247 qregisterint tmp1;
248 asm ("xchg " PICreg", %2\n"
249 "cpuid\n"
250 "xchg " PICreg", %2\n"
251 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
252 : "a" (1));
253#elif defined(Q_OS_WIN)
254 int info[4];
255 __cpuid(info, 1);
256 ecx = info[2];
257 edx = info[3];
258#elif defined(Q_CC_GHS)
259 unsigned int info[4];
260 __CPUID(1, info);
261 ecx = info[2];
262 edx = info[3];
263#else
264 Q_UNUSED(ecx);
265 Q_UNUSED(edx);
266#endif
267}
268
269#ifdef Q_OS_WIN
270inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
271#endif
272
273QT_FUNCTION_TARGET_BASELINE
274static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
275{
276#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
277 qregisteruint rbx; // in case it's 64-bit
278 qregisteruint rcx = 0;
279 qregisteruint rdx = 0;
280 asm ("xchg " PICreg", %0\n"
281 "cpuid\n"
282 "xchg " PICreg", %0\n"
283 : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
284 : "a" (7));
285 ebx = rbx;
286 ecx = rcx;
287 edx = rdx;
288#elif defined(Q_OS_WIN)
289 int info[4];
290 __cpuidex(info, 7, 0);
291 ebx = info[1];
292 ecx = info[2];
293 edx = info[3];
294#elif defined(Q_CC_GHS)
295 unsigned int info[4];
296 __CPUIDEX(7, 0, info);
297 ebx = info[1];
298 ecx = info[2];
299 edx = info[3];
300#else
301 Q_UNUSED(ebx);
302 Q_UNUSED(ecx);
303 Q_UNUSED(edx);
304#endif
305}
306
307QT_FUNCTION_TARGET_BASELINE
308#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
309// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
310inline quint64 _xgetbv(__int64) { return 0; }
311#endif
312static void xgetbv(uint in, uint &eax, uint &edx)
313{
314#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
315 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
316 : "=a" (eax), "=d" (edx)
317 : "c" (in));
318#elif defined(Q_OS_WIN)
319 quint64 result = _xgetbv(in);
320 eax = result;
321 edx = result >> 32;
322#else
323 Q_UNUSED(in);
324 Q_UNUSED(eax);
325 Q_UNUSED(edx);
326#endif
327}
328
329QT_FUNCTION_TARGET_BASELINE
330static quint64 adjustedXcr0(quint64 xcr0)
331{
332 /*
333 * Some OSes hide their capability of context-switching the AVX512 state in
334 * the XCR0 register. They do that so the first time we execute an
335 * instruction that may access the AVX512 state (requiring the EVEX prefix)
336 * they allocate the necessary context switch space.
337 *
338 * This behavior is deprecated with the XFD (Extended Feature Disable)
339 * register, but we can't change existing OSes.
340 */
341#ifdef Q_OS_DARWIN
342 // from <machine/cpu_capabilities.h> in xnu
343 // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
344 constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
345 constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
346 constexpr quintptr cpu_capabilities64 = commpage + 0x10;
347 quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
348 if (capab & kHasAVX512F)
349 xcr0 |= XSave_Avx512State;
350#endif
351
352 return xcr0;
353}
354
355QT_FUNCTION_TARGET_BASELINE
356static quint64 detectProcessorFeatures()
357{
358 quint64 features = 0;
359 int cpuidLevel = maxBasicCpuidSupported();
360#if Q_PROCESSOR_X86 < 5
361 if (cpuidLevel < 1)
362 return 0;
363#else
364 assert(cpuidLevel >= 1);
365#endif
366
367 uint results[X86CpuidMaxLeaf] = {};
368 cpuidFeatures01(ecx&: results[Leaf01ECX], edx&: results[Leaf01EDX]);
369 if (cpuidLevel >= 7)
370 cpuidFeatures07_00(ebx&: results[Leaf07_00EBX], ecx&: results[Leaf07_00ECX], edx&: results[Leaf07_00EDX]);
371
372 // populate our feature list
373 for (uint i = 0; i < arraysize(x86_locators); ++i) {
374 uint word = x86_locators[i] / 32;
375 uint bit = 1U << (x86_locators[i] % 32);
376 quint64 feature = Q_UINT64_C(1) << i;
377 if (results[word] & bit)
378 features |= feature;
379 }
380
381 // now check the AVX state
382 quint64 xcr0 = 0;
383 if (results[Leaf01ECX] & (1u << 27)) {
384 // XGETBV enabled
385 uint xgetbvA = 0, xgetbvD = 0;
386 xgetbv(in: 0, eax&: xgetbvA, edx&: xgetbvD);
387
388 xcr0 = xgetbvA;
389 if (sizeof(XSaveBits) > sizeof(xgetbvA))
390 xcr0 |= quint64(xgetbvD) << 32;
391 xcr0 = adjustedXcr0(xcr0);
392 }
393
394 for (auto req : xsave_requirements) {
395 if ((xcr0 & req.xsave_state) != req.xsave_state)
396 features &= ~req.cpu_features;
397 }
398
399 if (features & CpuFeatureRDRND && !checkRdrndWorks())
400 features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
401
402 return features;
403}
404
405#elif defined(Q_PROCESSOR_MIPS_32)
406
407#if defined(Q_OS_LINUX)
408//
409// Do not use QByteArray: it could use SIMD instructions itself at
410// some point, thus creating a recursive dependency. Instead, use a
411// QSimpleBuffer, which has the bare minimum needed to use memory
412// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
413//
414struct QSimpleBuffer
415{
416 static const int chunk_size = 256;
417 char *data;
418 unsigned alloc;
419 unsigned size;
420
421 QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
422 ~QSimpleBuffer() { ::free(data); }
423
424 void resize(unsigned newsize)
425 {
426 if (newsize > alloc) {
427 unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
428 if (newalloc < newsize)
429 newalloc = newsize;
430 if (newalloc != alloc) {
431 data = static_cast<char *>(::realloc(data, newalloc));
432 alloc = newalloc;
433 }
434 }
435 size = newsize;
436 }
437 void append(const QSimpleBuffer &other, unsigned appendsize)
438 {
439 unsigned oldsize = size;
440 resize(oldsize + appendsize);
441 ::memcpy(data + oldsize, other.data, appendsize);
442 }
443 void popleft(unsigned amount)
444 {
445 if (amount >= size)
446 return resize(0);
447 size -= amount;
448 ::memmove(data, data + amount, size);
449 }
450 char *cString()
451 {
452 if (!alloc)
453 resize(1);
454 return (data[size] = '\0', data);
455 }
456};
457
458//
459// Uses a scratch "buffer" (which must be used for all reads done in the
460// same file descriptor) to read chunks of data from a file, to read
461// one line at a time. Lines include the trailing newline character ('\n').
462// On EOF, line.size is zero.
463//
464static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
465{
466 for (;;) {
467 char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
468 if (newline) {
469 unsigned piece_size = newline - buffer.data + 1;
470 line.append(buffer, piece_size);
471 buffer.popleft(piece_size);
472 line.resize(line.size - 1);
473 return;
474 }
475 if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
476 int oldsize = buffer.size;
477 buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
478 buffer.size = oldsize;
479 }
480 ssize_t read_bytes =
481 ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
482 if (read_bytes > 0)
483 buffer.size += read_bytes;
484 else
485 return;
486 }
487}
488
489//
490// Checks if any line with a given prefix from /proc/cpuinfo contains
491// a certain string, surrounded by spaces.
492//
493static bool procCpuinfoContains(const char *prefix, const char *string)
494{
495 int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
496 if (cpuinfo_fd == -1)
497 return false;
498
499 unsigned string_len = ::strlen(string);
500 unsigned prefix_len = ::strlen(prefix);
501 QSimpleBuffer line, buffer;
502 bool present = false;
503 do {
504 line.resize(0);
505 bufReadLine(cpuinfo_fd, line, buffer);
506 char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
507 if (colon && line.size > prefix_len + string_len) {
508 if (!::strncmp(prefix, line.data, prefix_len)) {
509 // prefix matches, next character must be ':' or space
510 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
511 // Does it contain the string?
512 char *found = ::strstr(line.cString(), string);
513 if (found && ::isspace(found[-1]) &&
514 (::isspace(found[string_len]) || found[string_len] == '\0')) {
515 present = true;
516 break;
517 }
518 }
519 }
520 }
521 } while (line.size);
522
523 ::qt_safe_close(cpuinfo_fd);
524 return present;
525}
526#endif
527
528static inline quint64 detectProcessorFeatures()
529{
530 // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
531 quint64 flags = 0;
532
533#if defined __mips_dsp
534 flags |= CpuFeatureDSP;
535# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
536 flags |= CpuFeatureDSPR2;
537# elif defined(Q_OS_LINUX)
538 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
539 flags |= CpuFeatureDSPR2;
540# endif
541#elif defined(Q_OS_LINUX)
542 if (procCpuinfoContains("ASEs implemented", "dsp")) {
543 flags |= CpuFeatureDSP;
544 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
545 flags |= CpuFeatureDSPR2;
546 }
547#endif
548
549 return flags;
550}
551
552#else
553static inline uint detectProcessorFeatures()
554{
555 return 0;
556}
557#endif
558
559// record what CPU features were enabled by default in this Qt build
560static const quint64 minFeature = qCompilerCpuFeatures;
561
562static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1);
563Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 };
564
565QT_FUNCTION_TARGET_BASELINE
566uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)()
567{
568 auto minFeatureTest = minFeature;
569#if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)
570 // Yocto hard-codes CRC32+AES on. Since they are unlikely to be used
571 // automatically by compilers, we can just add runtime check.
572 minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32);
573#endif
574#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk)
575 // Controlflow Enforcement Technology (CET) is an OS-assisted
576 // hardware-feature, meaning the CPUID bit may be disabled if the OS
577 // doesn't support it, but that's ok.
578 minFeatureTest &= ~CpuFeatureSHSTK;
579#endif
580 QCpuFeatureType f = detectProcessorFeatures();
581
582 // Intentionally NOT qgetenv (this code runs too early)
583 if (char *disable = getenv(name: "QT_NO_CPU_FEATURE"); disable && *disable) {
584#if _POSIX_C_SOURCE >= 200112L
585 char *saveptr = nullptr;
586 auto strtok = [&saveptr](char *str, const char *delim) {
587 return ::strtok_r(s: str, delim: delim, save_ptr: &saveptr);
588 };
589#endif
590 while (char *token = strtok(disable, " ")) {
591 disable = nullptr;
592 for (uint i = 0; i < arraysize(features_indices); ++i) {
593 if (strcmp(s1: token, s2: features_string + features_indices[i]) == 0)
594 f &= ~(Q_UINT64_C(1) << i);
595 }
596 }
597 }
598
599#ifdef RUNNING_ON_VALGRIND
600 bool runningOnValgrind = RUNNING_ON_VALGRIND;
601#else
602 bool runningOnValgrind = false;
603#endif
604 if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
605 quint64 missing = minFeatureTest & ~quint64(f);
606 fprintf(stderr, format: "Incompatible processor. This Qt build requires the following features:\n ");
607 for (uint i = 0; i < arraysize(features_indices); ++i) {
608 if (missing & (Q_UINT64_C(1) << i))
609 fprintf(stderr, format: "%s", features_string + features_indices[i]);
610 }
611 fprintf(stderr, format: "\n");
612 fflush(stderr);
613 qAbort();
614 }
615
616 assert((f & SimdInitialized) == 0);
617 f |= SimdInitialized;
618 std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), i: f, m: std::memory_order_relaxed);
619 return f;
620}
621
622QT_FUNCTION_TARGET_BASELINE
623void qDumpCPUFeatures()
624{
625 quint64 features = detectProcessorFeatures() & ~SimdInitialized;
626 printf(format: "Processor features: ");
627 for (uint i = 0; i < arraysize(features_indices); ++i) {
628 if (features & (Q_UINT64_C(1) << i))
629 printf(format: "%s%s", features_string + features_indices[i],
630 minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
631 }
632 if ((features = (qCompilerCpuFeatures & ~features))) {
633 printf(format: "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
634 for (uint i = 0; i < arraysize(features_indices); ++i) {
635 if (features & (Q_UINT64_C(1) << i))
636 printf(format: "%s", features_string + features_indices[i]);
637 }
638 printf(format: "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
639 }
640 puts(s: "");
641}
642
643#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
644
645# ifdef Q_PROCESSOR_X86_64
646# define _rdrandXX_step _rdrand64_step
647# define _rdseedXX_step _rdseed64_step
648# else
649# define _rdrandXX_step _rdrand32_step
650# define _rdseedXX_step _rdseed32_step
651# endif
652
653// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
654// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
655// long on Windows, but unsigned long on Linux.
656namespace {
657template <typename F> struct ExtractParameter;
658template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
659using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
660}
661
662# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
663static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
664{
665 // Unlike for the RDRAND code below, the Intel whitepaper describing the
666 // use of the RDSEED instruction indicates we should not retry in a loop.
667 // If the independent bit generator used by RDSEED is out of entropy, it
668 // may take time to replenish.
669 // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
670 while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
671 if (_rdseedXX_step(p: reinterpret_cast<randuint *>(ptr)) == 0)
672 goto out;
673 ptr += sizeof(randuint) / sizeof(*ptr);
674 }
675
676 if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
677 if (_rdseed32_step(p: ptr) == 0)
678 goto out;
679 ++ptr;
680 }
681
682out:
683 return ptr;
684}
685# else
686static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
687{
688 return ptr;
689}
690# endif
691
692static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
693{
694 int retries = 10;
695 while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
696 if (_rdrandXX_step(p: reinterpret_cast<randuint *>(ptr)))
697 ptr += sizeof(randuint)/sizeof(*ptr);
698 else if (--retries == 0)
699 goto out;
700 }
701
702 while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
703 bool ok = _rdrand32_step(p: ptr);
704 if (!ok && --retries)
705 continue;
706 if (ok)
707 ++ptr;
708 break;
709 }
710
711out:
712 return ptr;
713}
714
715QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION
716static bool checkRdrndWorks() noexcept
717{
718 /*
719 * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
720 * failing random generation instruction, which always returns
721 * 0xffffffff, even when generation was "successful".
722 *
723 * This code checks if hardware random generator generates four consecutive
724 * equal numbers. If it does, then we probably have a failing one and
725 * should disable it completely.
726 *
727 * https://bugreports.qt.io/browse/QTBUG-69423
728 */
729 constexpr qsizetype TestBufferSize = 4;
730 unsigned testBuffer[TestBufferSize] = {};
731
732 unsigned *end = qt_random_rdrnd(ptr: testBuffer, end: testBuffer + TestBufferSize);
733 if (end < testBuffer + 3) {
734 // Random generation didn't produce enough data for us to make a
735 // determination whether it's working or not. Assume it isn't, but
736 // don't print a warning.
737 return false;
738 }
739
740 // Check the results for equality
741 if (testBuffer[0] == testBuffer[1]
742 && testBuffer[0] == testBuffer[2]
743 && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
744 fprintf(stderr, format: "WARNING: CPU random generator seem to be failing, "
745 "disabling hardware random number generation\n"
746 "WARNING: RDRND generated:");
747 for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
748 fprintf(stderr, format: " 0x%x", *ptr);
749 fprintf(stderr, format: "\n");
750 return false;
751 }
752
753 // We're good
754 return true;
755}
756
757QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
758{
759 unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
760 unsigned *end = ptr + count;
761
762 if (qCpuHasFeature(RDSEED))
763 ptr = qt_random_rdseed(ptr, end);
764
765 // fill the buffer with RDRND if RDSEED didn't
766 ptr = qt_random_rdrnd(ptr, end);
767 return ptr - reinterpret_cast<unsigned *>(buffer);
768}
769#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
770static bool checkRdrndWorks() noexcept { return false; }
771#endif // Q_PROCESSOR_X86 && RDRND
772
773#if QT_SUPPORTS_INIT_PRIORITY
774namespace {
775struct QSimdInitializer
776{
777 inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); }
778};
779}
780
781// This is intentionally a dynamic initialization of the variable
782Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer;
783#endif
784
785QT_END_NAMESPACE
786

source code of qtbase/src/corelib/global/qsimd.cpp