1/****************************************************************************
2**
3** Copyright (C) 2017 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "private/qsimd_p.h"
41
42// The x86 F16C instructions operate on AVX registers, so AVX support is
43// required.
44#if QT_COMPILER_SUPPORTS_HERE(AVX)
45
46#ifdef __cplusplus
47QT_BEGIN_NAMESPACE
48extern "C" {
49#endif
50
51QT_FUNCTION_TARGET(F16C)
52void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOEXCEPT
53{
54 qsizetype i = 0;
55 int epilog_i;
56 for (; i < len - 7; i += 8)
57 _mm_storeu_si128(p: (__m128i *)(out + i), _mm256_cvtps_ph(_mm256_loadu_ps(in + i), 0));
58 if (i < len - 3) {
59 _mm_storel_epi64(p: (__m128i *)(out + i), _mm_cvtps_ph(_mm_loadu_ps(in + i), 0));
60 i += 4;
61 }
62 // Inlining "qfloat16::qfloat16(float f)":
63 for (epilog_i = 0; i < len && epilog_i < 3; ++i, ++epilog_i)
64 out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
65}
66
67QT_FUNCTION_TARGET(F16C)
68void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOEXCEPT
69{
70 qsizetype i = 0;
71 int epilog_i;
72 for (; i < len - 7; i += 8)
73 _mm256_storeu_ps(p: out + i, a: _mm256_cvtph_ps(a: _mm_loadu_si128(p: (const __m128i *)(in + i))));
74 if (i < len - 3) {
75 _mm_storeu_ps(p: out + i, a: _mm_cvtph_ps(a: _mm_loadl_epi64(p: (const __m128i *)(in + i))));
76 i += 4;
77 }
78 // Inlining "qfloat16::operator float()":
79 for (epilog_i = 0; i < len && epilog_i < 3; ++i, ++epilog_i)
80 out[i] = _mm_cvtss_f32(a: _mm_cvtph_ps(a: _mm_cvtsi32_si128(a: in[i])));
81}
82
83#ifdef __cplusplus
84} // extern "C"
85QT_END_NAMESPACE
86#endif
87
88#endif // QT_COMPILER_SUPPORTS_HERE(AVX)
89

source code of qtbase/src/corelib/global/qfloat16_f16c.c