Warning: That file was not part of the compilation database. It may have many parsing errors.

1/* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42typedef int __v16si __attribute__ ((__vector_size__ (64)));
43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
100extern __inline __m512d
101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
104{
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
107}
108
109extern __inline __m512
110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111_mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
115{
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
119}
120
121#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
134extern __inline __m512
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm512_undefined_ps (void)
137{
138 __m512 __Y = __Y;
139 return __Y;
140}
141
142#define _mm512_undefined _mm512_undefined_ps
143
144extern __inline __m512d
145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146_mm512_undefined_pd (void)
147{
148 __m512d __Y = __Y;
149 return __Y;
150}
151
152extern __inline __m512i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_mm512_undefined_epi32 (void)
155{
156 __m512i __Y = __Y;
157 return __Y;
158}
159
160#define _mm512_undefined_si512 _mm512_undefined_epi32
161
162extern __inline __m512i
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm512_set1_epi8 (char __A)
165{
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
175}
176
177extern __inline __m512i
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm512_set1_epi16 (short __A)
180{
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
186}
187
188extern __inline __m512d
189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190_mm512_set1_pd (double __A)
191{
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
197}
198
199extern __inline __m512
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm512_set1_ps (float __A)
202{
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
208}
209
210/* Create the vector [A B C D A B C D A B C D A B C D]. */
211extern __inline __m512i
212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214{
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
218}
219
220extern __inline __m512i
221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222_mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
224{
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
227}
228
229extern __inline __m512d
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm512_set4_pd (double __A, double __B, double __C, double __D)
232{
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
235}
236
237extern __inline __m512
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm512_set4_ps (float __A, float __B, float __C, float __D)
240{
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
244}
245
246#define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
248
249#define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
251
252#define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
254
255#define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
257
258extern __inline __m512
259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260_mm512_setzero_ps (void)
261{
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264}
265
266extern __inline __m512d
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm512_setzero_pd (void)
269{
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271}
272
273extern __inline __m512i
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm512_setzero_epi32 (void)
276{
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278}
279
280extern __inline __m512i
281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282_mm512_setzero_si512 (void)
283{
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285}
286
287extern __inline __m512d
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290{
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
294}
295
296extern __inline __m512d
297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299{
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
304}
305
306extern __inline __m512
307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309{
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
313}
314
315extern __inline __m512
316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318{
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
323}
324
325extern __inline __m512d
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_load_pd (void const *__P)
328{
329 return *(__m512d *) __P;
330}
331
332extern __inline __m512d
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335{
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
339}
340
341extern __inline __m512d
342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344{
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
349}
350
351extern __inline void
352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353_mm512_store_pd (void *__P, __m512d __A)
354{
355 *(__m512d *) __P = __A;
356}
357
358extern __inline void
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361{
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
364}
365
366extern __inline __m512
367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368_mm512_load_ps (void const *__P)
369{
370 return *(__m512 *) __P;
371}
372
373extern __inline __m512
374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376{
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
380}
381
382extern __inline __m512
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385{
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
390}
391
392extern __inline void
393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394_mm512_store_ps (void *__P, __m512 __A)
395{
396 *(__m512 *) __P = __A;
397}
398
399extern __inline void
400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402{
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
405}
406
407extern __inline __m512i
408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410{
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
414}
415
416extern __inline __m512i
417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419{
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
424}
425
426extern __inline __m512i
427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428_mm512_load_epi64 (void const *__P)
429{
430 return *(__m512i *) __P;
431}
432
433extern __inline __m512i
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436{
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
440}
441
442extern __inline __m512i
443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445{
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
450}
451
452extern __inline void
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm512_store_epi64 (void *__P, __m512i __A)
455{
456 *(__m512i *) __P = __A;
457}
458
459extern __inline void
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462{
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
465}
466
467extern __inline __m512i
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470{
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
474}
475
476extern __inline __m512i
477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479{
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
484}
485
486extern __inline __m512i
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_load_si512 (void const *__P)
489{
490 return *(__m512i *) __P;
491}
492
493extern __inline __m512i
494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495_mm512_load_epi32 (void const *__P)
496{
497 return *(__m512i *) __P;
498}
499
500extern __inline __m512i
501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503{
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
507}
508
509extern __inline __m512i
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512{
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
517}
518
519extern __inline void
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm512_store_si512 (void *__P, __m512i __A)
522{
523 *(__m512i *) __P = __A;
524}
525
526extern __inline void
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm512_store_epi32 (void *__P, __m512i __A)
529{
530 *(__m512i *) __P = __A;
531}
532
533extern __inline void
534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536{
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
539}
540
541extern __inline __m512i
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm512_mullo_epi32 (__m512i __A, __m512i __B)
544{
545 return (__m512i) ((__v16su) __A * (__v16su) __B);
546}
547
548extern __inline __m512i
549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551{
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
557}
558
559extern __inline __m512i
560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562{
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
566}
567
568extern __inline __m512i
569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571{
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
575 _mm512_undefined_epi32 (),
576 (__mmask16) -1);
577}
578
579extern __inline __m512i
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582{
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
587}
588
589extern __inline __m512i
590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592{
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
598}
599
600extern __inline __m512i
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm512_srav_epi32 (__m512i __X, __m512i __Y)
603{
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
607 _mm512_undefined_epi32 (),
608 (__mmask16) -1);
609}
610
611extern __inline __m512i
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614{
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
619}
620
621extern __inline __m512i
622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624{
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
630}
631
632extern __inline __m512i
633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635{
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
639 _mm512_undefined_epi32 (),
640 (__mmask16) -1);
641}
642
643extern __inline __m512i
644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646{
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
651}
652
653extern __inline __m512i
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656{
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
662}
663
664extern __inline __m512i
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm512_add_epi64 (__m512i __A, __m512i __B)
667{
668 return (__m512i) ((__v8du) __A + (__v8du) __B);
669}
670
671extern __inline __m512i
672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674{
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
679}
680
681extern __inline __m512i
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684{
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
690}
691
692extern __inline __m512i
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm512_sub_epi64 (__m512i __A, __m512i __B)
695{
696 return (__m512i) ((__v8du) __A - (__v8du) __B);
697}
698
699extern __inline __m512i
700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712{
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
718}
719
720extern __inline __m512i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723{
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
727 _mm512_undefined_pd (),
728 (__mmask8) -1);
729}
730
731extern __inline __m512i
732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734{
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
739}
740
741extern __inline __m512i
742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744{
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
750}
751
752extern __inline __m512i
753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754_mm512_srav_epi64 (__m512i __X, __m512i __Y)
755{
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
759 _mm512_undefined_epi32 (),
760 (__mmask8) -1);
761}
762
763extern __inline __m512i
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766{
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
771}
772
773extern __inline __m512i
774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776{
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
782}
783
784extern __inline __m512i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787{
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
791 _mm512_undefined_epi32 (),
792 (__mmask8) -1);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798{
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
803}
804
805extern __inline __m512i
806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808{
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
814}
815
816extern __inline __m512i
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm512_add_epi32 (__m512i __A, __m512i __B)
819{
820 return (__m512i) ((__v16su) __A + (__v16su) __B);
821}
822
823extern __inline __m512i
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826{
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
831}
832
833extern __inline __m512i
834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836{
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
842}
843
844extern __inline __m512i
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm512_mul_epi32 (__m512i __X, __m512i __Y)
847{
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
851 _mm512_undefined_epi32 (),
852 (__mmask8) -1);
853}
854
855extern __inline __m512i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858{
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
862}
863
864extern __inline __m512i
865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867{
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
873}
874
875extern __inline __m512i
876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877_mm512_sub_epi32 (__m512i __A, __m512i __B)
878{
879 return (__m512i) ((__v16su) __A - (__v16su) __B);
880}
881
882extern __inline __m512i
883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885{
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
890}
891
892extern __inline __m512i
893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895{
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
901}
902
903extern __inline __m512i
904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905_mm512_mul_epu32 (__m512i __X, __m512i __Y)
906{
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
910 _mm512_undefined_epi32 (),
911 (__mmask8) -1);
912}
913
914extern __inline __m512i
915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917{
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
921}
922
923extern __inline __m512i
924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926{
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
932}
933
934#ifdef __OPTIMIZE__
935extern __inline __m512i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm512_slli_epi64 (__m512i __A, unsigned int __B)
938{
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
941 _mm512_undefined_epi32 (),
942 (__mmask8) -1);
943}
944
945extern __inline __m512i
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
949{
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
953}
954
955extern __inline __m512i
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958{
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
963}
964#else
965#define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968 (__mmask8)-1))
969
970#define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
974
975#define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979#endif
980
981extern __inline __m512i
982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983_mm512_sll_epi64 (__m512i __A, __m128i __B)
984{
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
988 _mm512_undefined_epi32 (),
989 (__mmask8) -1);
990}
991
992extern __inline __m512i
993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995{
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1000}
1001
1002extern __inline __m512i
1003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005{
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1011}
1012
1013#ifdef __OPTIMIZE__
1014extern __inline __m512i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017{
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
1020 _mm512_undefined_epi32 (),
1021 (__mmask8) -1);
1022}
1023
1024extern __inline __m512i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1028{
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1032}
1033
1034extern __inline __m512i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037{
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1042}
1043#else
1044#define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047 (__mmask8)-1))
1048
1049#define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1053
1054#define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058#endif
1059
1060extern __inline __m512i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm512_srl_epi64 (__m512i __A, __m128i __B)
1063{
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
1067 _mm512_undefined_epi32 (),
1068 (__mmask8) -1);
1069}
1070
1071extern __inline __m512i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074{
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1079}
1080
1081extern __inline __m512i
1082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084{
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1090}
1091
1092#ifdef __OPTIMIZE__
1093extern __inline __m512i
1094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096{
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
1099 _mm512_undefined_epi32 (),
1100 (__mmask8) -1);
1101}
1102
1103extern __inline __m512i
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1107{
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1111}
1112
1113extern __inline __m512i
1114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116{
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1121}
1122#else
1123#define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126 (__mmask8)-1))
1127
1128#define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1132
1133#define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137#endif
1138
1139extern __inline __m512i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm512_sra_epi64 (__m512i __A, __m128i __B)
1142{
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
1146 _mm512_undefined_epi32 (),
1147 (__mmask8) -1);
1148}
1149
1150extern __inline __m512i
1151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153{
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1158}
1159
1160extern __inline __m512i
1161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163{
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1169}
1170
1171#ifdef __OPTIMIZE__
1172extern __inline __m512i
1173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175{
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
1178 _mm512_undefined_epi32 (),
1179 (__mmask16) -1);
1180}
1181
1182extern __inline __m512i
1183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1186{
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1190}
1191
1192extern __inline __m512i
1193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195{
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1200}
1201#else
1202#define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205 (__mmask16)-1))
1206
1207#define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1211
1212#define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216#endif
1217
1218extern __inline __m512i
1219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220_mm512_sll_epi32 (__m512i __A, __m128i __B)
1221{
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
1225 _mm512_undefined_epi32 (),
1226 (__mmask16) -1);
1227}
1228
1229extern __inline __m512i
1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232{
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1237}
1238
1239extern __inline __m512i
1240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242{
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1248}
1249
1250#ifdef __OPTIMIZE__
1251extern __inline __m512i
1252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254{
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
1257 _mm512_undefined_epi32 (),
1258 (__mmask16) -1);
1259}
1260
1261extern __inline __m512i
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1265{
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1269}
1270
1271extern __inline __m512i
1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274{
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1279}
1280#else
1281#define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284 (__mmask16)-1))
1285
1286#define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1290
1291#define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295#endif
1296
1297extern __inline __m512i
1298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299_mm512_srl_epi32 (__m512i __A, __m128i __B)
1300{
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
1304 _mm512_undefined_epi32 (),
1305 (__mmask16) -1);
1306}
1307
1308extern __inline __m512i
1309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311{
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1316}
1317
1318extern __inline __m512i
1319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321{
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1327}
1328
1329#ifdef __OPTIMIZE__
1330extern __inline __m512i
1331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333{
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
1336 _mm512_undefined_epi32 (),
1337 (__mmask16) -1);
1338}
1339
1340extern __inline __m512i
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1344{
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1348}
1349
1350extern __inline __m512i
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353{
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1358}
1359#else
1360#define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363 (__mmask16)-1))
1364
1365#define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1369
1370#define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374#endif
1375
1376extern __inline __m512i
1377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378_mm512_sra_epi32 (__m512i __A, __m128i __B)
1379{
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
1383 _mm512_undefined_epi32 (),
1384 (__mmask16) -1);
1385}
1386
1387extern __inline __m512i
1388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390{
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1395}
1396
1397extern __inline __m512i
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400{
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1406}
1407
1408#ifdef __OPTIMIZE__
1409extern __inline __m128d
1410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412{
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1416}
1417
1418extern __inline __m128d
1419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421 __m128d __B, const int __R)
1422{
1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424 (__v2df) __B,
1425 (__v2df) __W,
1426 (__mmask8) __U, __R);
1427}
1428
1429extern __inline __m128d
1430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432 const int __R)
1433{
1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435 (__v2df) __B,
1436 (__v2df)
1437 _mm_setzero_pd (),
1438 (__mmask8) __U, __R);
1439}
1440
1441extern __inline __m128
1442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1444{
1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446 (__v4sf) __B,
1447 __R);
1448}
1449
1450extern __inline __m128
1451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453 __m128 __B, const int __R)
1454{
1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456 (__v4sf) __B,
1457 (__v4sf) __W,
1458 (__mmask8) __U, __R);
1459}
1460
1461extern __inline __m128
1462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464 const int __R)
1465{
1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467 (__v4sf) __B,
1468 (__v4sf)
1469 _mm_setzero_ps (),
1470 (__mmask8) __U, __R);
1471}
1472
1473extern __inline __m128d
1474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1476{
1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478 (__v2df) __B,
1479 __R);
1480}
1481
1482extern __inline __m128d
1483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485 __m128d __B, const int __R)
1486{
1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488 (__v2df) __B,
1489 (__v2df) __W,
1490 (__mmask8) __U, __R);
1491}
1492
1493extern __inline __m128d
1494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496 const int __R)
1497{
1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499 (__v2df) __B,
1500 (__v2df)
1501 _mm_setzero_pd (),
1502 (__mmask8) __U, __R);
1503}
1504
1505extern __inline __m128
1506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1508{
1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510 (__v4sf) __B,
1511 __R);
1512}
1513
1514extern __inline __m128
1515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517 __m128 __B, const int __R)
1518{
1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520 (__v4sf) __B,
1521 (__v4sf) __W,
1522 (__mmask8) __U, __R);
1523}
1524
1525extern __inline __m128
1526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528 const int __R)
1529{
1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531 (__v4sf) __B,
1532 (__v4sf)
1533 _mm_setzero_ps (),
1534 (__mmask8) __U, __R);
1535}
1536
1537#else
1538#define _mm_add_round_sd(A, B, C) \
1539 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1540
1541#define _mm_mask_add_round_sd(W, U, A, B, C) \
1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1543
1544#define _mm_maskz_add_round_sd(U, A, B, C) \
1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1546
1547#define _mm_add_round_ss(A, B, C) \
1548 (__m128)__builtin_ia32_addss_round(A, B, C)
1549
1550#define _mm_mask_add_round_ss(W, U, A, B, C) \
1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1552
1553#define _mm_maskz_add_round_ss(U, A, B, C) \
1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1555
1556#define _mm_sub_round_sd(A, B, C) \
1557 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1558
1559#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1561
1562#define _mm_maskz_sub_round_sd(U, A, B, C) \
1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1564
1565#define _mm_sub_round_ss(A, B, C) \
1566 (__m128)__builtin_ia32_subss_round(A, B, C)
1567
1568#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1570
1571#define _mm_maskz_sub_round_ss(U, A, B, C) \
1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1573
1574#endif
1575
1576#ifdef __OPTIMIZE__
1577extern __inline __m512i
1578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580 const int __imm)
1581{
1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583 (__v8di) __B,
1584 (__v8di) __C, __imm,
1585 (__mmask8) -1);
1586}
1587
1588extern __inline __m512i
1589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1591 __m512i __C, const int __imm)
1592{
1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594 (__v8di) __B,
1595 (__v8di) __C, __imm,
1596 (__mmask8) __U);
1597}
1598
1599extern __inline __m512i
1600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1602 __m512i __C, const int __imm)
1603{
1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605 (__v8di) __B,
1606 (__v8di) __C,
1607 __imm, (__mmask8) __U);
1608}
1609
1610extern __inline __m512i
1611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613 const int __imm)
1614{
1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616 (__v16si) __B,
1617 (__v16si) __C,
1618 __imm, (__mmask16) -1);
1619}
1620
1621extern __inline __m512i
1622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1624 __m512i __C, const int __imm)
1625{
1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627 (__v16si) __B,
1628 (__v16si) __C,
1629 __imm, (__mmask16) __U);
1630}
1631
1632extern __inline __m512i
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1635 __m512i __C, const int __imm)
1636{
1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638 (__v16si) __B,
1639 (__v16si) __C,
1640 __imm, (__mmask16) __U);
1641}
1642#else
1643#define _mm512_ternarylogic_epi64(A, B, C, I) \
1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652#define _mm512_ternarylogic_epi32(A, B, C, I) \
1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1655 (__mmask16)-1))
1656#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1659 (__mmask16)(U)))
1660#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1663 (__mmask16)(U)))
1664#endif
1665
1666extern __inline __m512d
1667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668_mm512_rcp14_pd (__m512d __A)
1669{
1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671 (__v8df)
1672 _mm512_undefined_pd (),
1673 (__mmask8) -1);
1674}
1675
1676extern __inline __m512d
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1679{
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1683}
1684
1685extern __inline __m512d
1686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1688{
1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690 (__v8df)
1691 _mm512_setzero_pd (),
1692 (__mmask8) __U);
1693}
1694
1695extern __inline __m512
1696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697_mm512_rcp14_ps (__m512 __A)
1698{
1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700 (__v16sf)
1701 _mm512_undefined_ps (),
1702 (__mmask16) -1);
1703}
1704
1705extern __inline __m512
1706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1708{
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf) __W,
1711 (__mmask16) __U);
1712}
1713
1714extern __inline __m512
1715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1717{
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf)
1720 _mm512_setzero_ps (),
1721 (__mmask16) __U);
1722}
1723
1724extern __inline __m128d
1725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726_mm_rcp14_sd (__m128d __A, __m128d __B)
1727{
1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729 (__v2df) __A);
1730}
1731
1732extern __inline __m128d
1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1735{
1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737 (__v2df) __A,
1738 (__v2df) __W,
1739 (__mmask8) __U);
1740}
1741
1742extern __inline __m128d
1743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1745{
1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747 (__v2df) __A,
1748 (__v2df) _mm_setzero_ps (),
1749 (__mmask8) __U);
1750}
1751
1752extern __inline __m128
1753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754_mm_rcp14_ss (__m128 __A, __m128 __B)
1755{
1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757 (__v4sf) __A);
1758}
1759
1760extern __inline __m128
1761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1763{
1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765 (__v4sf) __A,
1766 (__v4sf) __W,
1767 (__mmask8) __U);
1768}
1769
1770extern __inline __m128
1771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1773{
1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775 (__v4sf) __A,
1776 (__v4sf) _mm_setzero_ps (),
1777 (__mmask8) __U);
1778}
1779
1780extern __inline __m512d
1781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782_mm512_rsqrt14_pd (__m512d __A)
1783{
1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785 (__v8df)
1786 _mm512_undefined_pd (),
1787 (__mmask8) -1);
1788}
1789
1790extern __inline __m512d
1791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1793{
1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795 (__v8df) __W,
1796 (__mmask8) __U);
1797}
1798
1799extern __inline __m512d
1800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1802{
1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804 (__v8df)
1805 _mm512_setzero_pd (),
1806 (__mmask8) __U);
1807}
1808
1809extern __inline __m512
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm512_rsqrt14_ps (__m512 __A)
1812{
1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814 (__v16sf)
1815 _mm512_undefined_ps (),
1816 (__mmask16) -1);
1817}
1818
1819extern __inline __m512
1820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822{
1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824 (__v16sf) __W,
1825 (__mmask16) __U);
1826}
1827
1828extern __inline __m512
1829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1831{
1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833 (__v16sf)
1834 _mm512_setzero_ps (),
1835 (__mmask16) __U);
1836}
1837
1838extern __inline __m128d
1839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1841{
1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843 (__v2df) __A);
1844}
1845
1846extern __inline __m128d
1847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1849{
1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851 (__v2df) __A,
1852 (__v2df) __W,
1853 (__mmask8) __U);
1854}
1855
1856extern __inline __m128d
1857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1859{
1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861 (__v2df) __A,
1862 (__v2df) _mm_setzero_pd (),
1863 (__mmask8) __U);
1864}
1865
1866extern __inline __m128
1867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1869{
1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871 (__v4sf) __A);
1872}
1873
1874extern __inline __m128
1875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1877{
1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879 (__v4sf) __A,
1880 (__v4sf) __W,
1881 (__mmask8) __U);
1882}
1883
1884extern __inline __m128
1885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1887{
1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889 (__v4sf) __A,
1890 (__v4sf) _mm_setzero_ps (),
1891 (__mmask8) __U);
1892}
1893
1894#ifdef __OPTIMIZE__
1895extern __inline __m512d
1896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897_mm512_sqrt_round_pd (__m512d __A, const int __R)
1898{
1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900 (__v8df)
1901 _mm512_undefined_pd (),
1902 (__mmask8) -1, __R);
1903}
1904
1905extern __inline __m512d
1906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908 const int __R)
1909{
1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911 (__v8df) __W,
1912 (__mmask8) __U, __R);
1913}
1914
1915extern __inline __m512d
1916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1918{
1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920 (__v8df)
1921 _mm512_setzero_pd (),
1922 (__mmask8) __U, __R);
1923}
1924
1925extern __inline __m512
1926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927_mm512_sqrt_round_ps (__m512 __A, const int __R)
1928{
1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930 (__v16sf)
1931 _mm512_undefined_ps (),
1932 (__mmask16) -1, __R);
1933}
1934
1935extern __inline __m512
1936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1938{
1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940 (__v16sf) __W,
1941 (__mmask16) __U, __R);
1942}
1943
1944extern __inline __m512
1945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1947{
1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949 (__v16sf)
1950 _mm512_setzero_ps (),
1951 (__mmask16) __U, __R);
1952}
1953
1954extern __inline __m128d
1955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1957{
1958 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1959 (__v2df) __A,
1960 __R);
1961}
1962
1963extern __inline __m128
1964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1966{
1967 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1968 (__v4sf) __A,
1969 __R);
1970}
1971#else
1972#define _mm512_sqrt_round_pd(A, C) \
1973 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1974
1975#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1976 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1977
1978#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1979 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1980
1981#define _mm512_sqrt_round_ps(A, C) \
1982 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1983
1984#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1985 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1986
1987#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1988 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1989
1990#define _mm_sqrt_round_sd(A, B, C) \
1991 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1992
1993#define _mm_sqrt_round_ss(A, B, C) \
1994 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1995#endif
1996
1997extern __inline __m512i
1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999_mm512_cvtepi8_epi32 (__m128i __A)
2000{
2001 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2002 (__v16si)
2003 _mm512_undefined_epi32 (),
2004 (__mmask16) -1);
2005}
2006
2007extern __inline __m512i
2008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2010{
2011 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2012 (__v16si) __W,
2013 (__mmask16) __U);
2014}
2015
2016extern __inline __m512i
2017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2019{
2020 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2021 (__v16si)
2022 _mm512_setzero_si512 (),
2023 (__mmask16) __U);
2024}
2025
2026extern __inline __m512i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_cvtepi8_epi64 (__m128i __A)
2029{
2030 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2031 (__v8di)
2032 _mm512_undefined_epi32 (),
2033 (__mmask8) -1);
2034}
2035
2036extern __inline __m512i
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2039{
2040 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2041 (__v8di) __W,
2042 (__mmask8) __U);
2043}
2044
2045extern __inline __m512i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2048{
2049 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2050 (__v8di)
2051 _mm512_setzero_si512 (),
2052 (__mmask8) __U);
2053}
2054
2055extern __inline __m512i
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm512_cvtepi16_epi32 (__m256i __A)
2058{
2059 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2060 (__v16si)
2061 _mm512_undefined_epi32 (),
2062 (__mmask16) -1);
2063}
2064
2065extern __inline __m512i
2066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2068{
2069 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2070 (__v16si) __W,
2071 (__mmask16) __U);
2072}
2073
2074extern __inline __m512i
2075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2076_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2077{
2078 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2079 (__v16si)
2080 _mm512_setzero_si512 (),
2081 (__mmask16) __U);
2082}
2083
2084extern __inline __m512i
2085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm512_cvtepi16_epi64 (__m128i __A)
2087{
2088 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2089 (__v8di)
2090 _mm512_undefined_epi32 (),
2091 (__mmask8) -1);
2092}
2093
2094extern __inline __m512i
2095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2096_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2097{
2098 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2099 (__v8di) __W,
2100 (__mmask8) __U);
2101}
2102
2103extern __inline __m512i
2104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2105_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2106{
2107 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2108 (__v8di)
2109 _mm512_setzero_si512 (),
2110 (__mmask8) __U);
2111}
2112
2113extern __inline __m512i
2114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115_mm512_cvtepi32_epi64 (__m256i __X)
2116{
2117 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2118 (__v8di)
2119 _mm512_undefined_epi32 (),
2120 (__mmask8) -1);
2121}
2122
2123extern __inline __m512i
2124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2125_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2126{
2127 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2128 (__v8di) __W,
2129 (__mmask8) __U);
2130}
2131
2132extern __inline __m512i
2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2135{
2136 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2137 (__v8di)
2138 _mm512_setzero_si512 (),
2139 (__mmask8) __U);
2140}
2141
2142extern __inline __m512i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm512_cvtepu8_epi32 (__m128i __A)
2145{
2146 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2147 (__v16si)
2148 _mm512_undefined_epi32 (),
2149 (__mmask16) -1);
2150}
2151
2152extern __inline __m512i
2153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2155{
2156 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2157 (__v16si) __W,
2158 (__mmask16) __U);
2159}
2160
2161extern __inline __m512i
2162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2164{
2165 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2166 (__v16si)
2167 _mm512_setzero_si512 (),
2168 (__mmask16) __U);
2169}
2170
2171extern __inline __m512i
2172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173_mm512_cvtepu8_epi64 (__m128i __A)
2174{
2175 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2176 (__v8di)
2177 _mm512_undefined_epi32 (),
2178 (__mmask8) -1);
2179}
2180
2181extern __inline __m512i
2182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2184{
2185 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2186 (__v8di) __W,
2187 (__mmask8) __U);
2188}
2189
2190extern __inline __m512i
2191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2193{
2194 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2195 (__v8di)
2196 _mm512_setzero_si512 (),
2197 (__mmask8) __U);
2198}
2199
2200extern __inline __m512i
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm512_cvtepu16_epi32 (__m256i __A)
2203{
2204 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2205 (__v16si)
2206 _mm512_undefined_epi32 (),
2207 (__mmask16) -1);
2208}
2209
2210extern __inline __m512i
2211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2213{
2214 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2215 (__v16si) __W,
2216 (__mmask16) __U);
2217}
2218
2219extern __inline __m512i
2220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2222{
2223 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2224 (__v16si)
2225 _mm512_setzero_si512 (),
2226 (__mmask16) __U);
2227}
2228
2229extern __inline __m512i
2230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231_mm512_cvtepu16_epi64 (__m128i __A)
2232{
2233 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2234 (__v8di)
2235 _mm512_undefined_epi32 (),
2236 (__mmask8) -1);
2237}
2238
2239extern __inline __m512i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2242{
2243 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2244 (__v8di) __W,
2245 (__mmask8) __U);
2246}
2247
2248extern __inline __m512i
2249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2251{
2252 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2253 (__v8di)
2254 _mm512_setzero_si512 (),
2255 (__mmask8) __U);
2256}
2257
2258extern __inline __m512i
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_cvtepu32_epi64 (__m256i __X)
2261{
2262 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2263 (__v8di)
2264 _mm512_undefined_epi32 (),
2265 (__mmask8) -1);
2266}
2267
2268extern __inline __m512i
2269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2271{
2272 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2273 (__v8di) __W,
2274 (__mmask8) __U);
2275}
2276
2277extern __inline __m512i
2278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2280{
2281 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2282 (__v8di)
2283 _mm512_setzero_si512 (),
2284 (__mmask8) __U);
2285}
2286
2287#ifdef __OPTIMIZE__
2288extern __inline __m512d
2289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2291{
2292 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2293 (__v8df) __B,
2294 (__v8df)
2295 _mm512_undefined_pd (),
2296 (__mmask8) -1, __R);
2297}
2298
2299extern __inline __m512d
2300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2302 __m512d __B, const int __R)
2303{
2304 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2305 (__v8df) __B,
2306 (__v8df) __W,
2307 (__mmask8) __U, __R);
2308}
2309
2310extern __inline __m512d
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2313 const int __R)
2314{
2315 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2316 (__v8df) __B,
2317 (__v8df)
2318 _mm512_setzero_pd (),
2319 (__mmask8) __U, __R);
2320}
2321
2322extern __inline __m512
2323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2325{
2326 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2327 (__v16sf) __B,
2328 (__v16sf)
2329 _mm512_undefined_ps (),
2330 (__mmask16) -1, __R);
2331}
2332
2333extern __inline __m512
2334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2336 __m512 __B, const int __R)
2337{
2338 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2339 (__v16sf) __B,
2340 (__v16sf) __W,
2341 (__mmask16) __U, __R);
2342}
2343
2344extern __inline __m512
2345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2347{
2348 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2349 (__v16sf) __B,
2350 (__v16sf)
2351 _mm512_setzero_ps (),
2352 (__mmask16) __U, __R);
2353}
2354
2355extern __inline __m512d
2356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2358{
2359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2360 (__v8df) __B,
2361 (__v8df)
2362 _mm512_undefined_pd (),
2363 (__mmask8) -1, __R);
2364}
2365
2366extern __inline __m512d
2367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2369 __m512d __B, const int __R)
2370{
2371 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2372 (__v8df) __B,
2373 (__v8df) __W,
2374 (__mmask8) __U, __R);
2375}
2376
2377extern __inline __m512d
2378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2380 const int __R)
2381{
2382 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2383 (__v8df) __B,
2384 (__v8df)
2385 _mm512_setzero_pd (),
2386 (__mmask8) __U, __R);
2387}
2388
2389extern __inline __m512
2390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2391_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2392{
2393 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2394 (__v16sf) __B,
2395 (__v16sf)
2396 _mm512_undefined_ps (),
2397 (__mmask16) -1, __R);
2398}
2399
2400extern __inline __m512
2401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2403 __m512 __B, const int __R)
2404{
2405 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2406 (__v16sf) __B,
2407 (__v16sf) __W,
2408 (__mmask16) __U, __R);
2409}
2410
2411extern __inline __m512
2412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2414{
2415 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2416 (__v16sf) __B,
2417 (__v16sf)
2418 _mm512_setzero_ps (),
2419 (__mmask16) __U, __R);
2420}
2421#else
2422#define _mm512_add_round_pd(A, B, C) \
2423 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2424
2425#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2426 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2427
2428#define _mm512_maskz_add_round_pd(U, A, B, C) \
2429 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2430
2431#define _mm512_add_round_ps(A, B, C) \
2432 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2433
2434#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2435 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2436
2437#define _mm512_maskz_add_round_ps(U, A, B, C) \
2438 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2439
2440#define _mm512_sub_round_pd(A, B, C) \
2441 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2442
2443#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2444 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2445
2446#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2447 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2448
2449#define _mm512_sub_round_ps(A, B, C) \
2450 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2451
2452#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2453 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2454
2455#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2456 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2457#endif
2458
2459#ifdef __OPTIMIZE__
2460extern __inline __m512d
2461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2462_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2463{
2464 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2465 (__v8df) __B,
2466 (__v8df)
2467 _mm512_undefined_pd (),
2468 (__mmask8) -1, __R);
2469}
2470
2471extern __inline __m512d
2472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2474 __m512d __B, const int __R)
2475{
2476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2477 (__v8df) __B,
2478 (__v8df) __W,
2479 (__mmask8) __U, __R);
2480}
2481
2482extern __inline __m512d
2483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2484_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2485 const int __R)
2486{
2487 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2488 (__v8df) __B,
2489 (__v8df)
2490 _mm512_setzero_pd (),
2491 (__mmask8) __U, __R);
2492}
2493
2494extern __inline __m512
2495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2497{
2498 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2499 (__v16sf) __B,
2500 (__v16sf)
2501 _mm512_undefined_ps (),
2502 (__mmask16) -1, __R);
2503}
2504
2505extern __inline __m512
2506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2507_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2508 __m512 __B, const int __R)
2509{
2510 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2511 (__v16sf) __B,
2512 (__v16sf) __W,
2513 (__mmask16) __U, __R);
2514}
2515
2516extern __inline __m512
2517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2518_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2519{
2520 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2521 (__v16sf) __B,
2522 (__v16sf)
2523 _mm512_setzero_ps (),
2524 (__mmask16) __U, __R);
2525}
2526
2527extern __inline __m512d
2528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2530{
2531 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2532 (__v8df) __V,
2533 (__v8df)
2534 _mm512_undefined_pd (),
2535 (__mmask8) -1, __R);
2536}
2537
2538extern __inline __m512d
2539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2541 __m512d __V, const int __R)
2542{
2543 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2544 (__v8df) __V,
2545 (__v8df) __W,
2546 (__mmask8) __U, __R);
2547}
2548
2549extern __inline __m512d
2550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2552 const int __R)
2553{
2554 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2555 (__v8df) __V,
2556 (__v8df)
2557 _mm512_setzero_pd (),
2558 (__mmask8) __U, __R);
2559}
2560
2561extern __inline __m512
2562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2563_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2564{
2565 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2566 (__v16sf) __B,
2567 (__v16sf)
2568 _mm512_undefined_ps (),
2569 (__mmask16) -1, __R);
2570}
2571
2572extern __inline __m512
2573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2574_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2575 __m512 __B, const int __R)
2576{
2577 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2578 (__v16sf) __B,
2579 (__v16sf) __W,
2580 (__mmask16) __U, __R);
2581}
2582
2583extern __inline __m512
2584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2585_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2586{
2587 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2592}
2593
2594extern __inline __m128d
2595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2597{
2598 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2601}
2602
2603extern __inline __m128d
2604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2606 __m128d __B, const int __R)
2607{
2608 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2609 (__v2df) __B,
2610 (__v2df) __W,
2611 (__mmask8) __U, __R);
2612}
2613
2614extern __inline __m128d
2615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2616_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2617 const int __R)
2618{
2619 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2620 (__v2df) __B,
2621 (__v2df)
2622 _mm_setzero_pd (),
2623 (__mmask8) __U, __R);
2624}
2625
2626extern __inline __m128
2627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2628_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2629{
2630 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2631 (__v4sf) __B,
2632 __R);
2633}
2634
2635extern __inline __m128
2636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2637_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2638 __m128 __B, const int __R)
2639{
2640 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2641 (__v4sf) __B,
2642 (__v4sf) __W,
2643 (__mmask8) __U, __R);
2644}
2645
2646extern __inline __m128
2647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2648_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2649 const int __R)
2650{
2651 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2652 (__v4sf) __B,
2653 (__v4sf)
2654 _mm_setzero_ps (),
2655 (__mmask8) __U, __R);
2656}
2657
2658extern __inline __m128d
2659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2661{
2662 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2663 (__v2df) __B,
2664 __R);
2665}
2666
2667extern __inline __m128d
2668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670 __m128d __B, const int __R)
2671{
2672 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2673 (__v2df) __B,
2674 (__v2df) __W,
2675 (__mmask8) __U, __R);
2676}
2677
2678extern __inline __m128d
2679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681 const int __R)
2682{
2683 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2684 (__v2df) __B,
2685 (__v2df)
2686 _mm_setzero_pd (),
2687 (__mmask8) __U, __R);
2688}
2689
2690extern __inline __m128
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2693{
2694 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2695 (__v4sf) __B,
2696 __R);
2697}
2698
2699extern __inline __m128
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702 __m128 __B, const int __R)
2703{
2704 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2705 (__v4sf) __B,
2706 (__v4sf) __W,
2707 (__mmask8) __U, __R);
2708}
2709
2710extern __inline __m128
2711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713 const int __R)
2714{
2715 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2716 (__v4sf) __B,
2717 (__v4sf)
2718 _mm_setzero_ps (),
2719 (__mmask8) __U, __R);
2720}
2721
2722#else
2723#define _mm512_mul_round_pd(A, B, C) \
2724 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2725
2726#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2727 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2728
2729#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2730 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2731
2732#define _mm512_mul_round_ps(A, B, C) \
2733 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2734
2735#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2736 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2737
2738#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2739 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2740
2741#define _mm512_div_round_pd(A, B, C) \
2742 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2743
2744#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2745 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2746
2747#define _mm512_maskz_div_round_pd(U, A, B, C) \
2748 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2749
2750#define _mm512_div_round_ps(A, B, C) \
2751 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2752
2753#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2754 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2755
2756#define _mm512_maskz_div_round_ps(U, A, B, C) \
2757 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2758
2759#define _mm_mul_round_sd(A, B, C) \
2760 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2761
2762#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2763 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2764
2765#define _mm_maskz_mul_round_sd(U, A, B, C) \
2766 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2767
2768#define _mm_mul_round_ss(A, B, C) \
2769 (__m128)__builtin_ia32_mulss_round(A, B, C)
2770
2771#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2772 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2773
2774#define _mm_maskz_mul_round_ss(U, A, B, C) \
2775 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2776
2777#define _mm_div_round_sd(A, B, C) \
2778 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2779
2780#define _mm_mask_div_round_sd(W, U, A, B, C) \
2781 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2782
2783#define _mm_maskz_div_round_sd(U, A, B, C) \
2784 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2785
2786#define _mm_div_round_ss(A, B, C) \
2787 (__m128)__builtin_ia32_divss_round(A, B, C)
2788
2789#define _mm_mask_div_round_ss(W, U, A, B, C) \
2790 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2791
2792#define _mm_maskz_div_round_ss(U, A, B, C) \
2793 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2794
2795#endif
2796
2797#ifdef __OPTIMIZE__
2798extern __inline __m512d
2799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2801{
2802 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2803 (__v8df) __B,
2804 (__v8df)
2805 _mm512_undefined_pd (),
2806 (__mmask8) -1, __R);
2807}
2808
2809extern __inline __m512d
2810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2812 __m512d __B, const int __R)
2813{
2814 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __W,
2817 (__mmask8) __U, __R);
2818}
2819
2820extern __inline __m512d
2821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2823 const int __R)
2824{
2825 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df)
2828 _mm512_setzero_pd (),
2829 (__mmask8) __U, __R);
2830}
2831
2832extern __inline __m512
2833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2835{
2836 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2837 (__v16sf) __B,
2838 (__v16sf)
2839 _mm512_undefined_ps (),
2840 (__mmask16) -1, __R);
2841}
2842
2843extern __inline __m512
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2846 __m512 __B, const int __R)
2847{
2848 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __W,
2851 (__mmask16) __U, __R);
2852}
2853
2854extern __inline __m512
2855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2857{
2858 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2859 (__v16sf) __B,
2860 (__v16sf)
2861 _mm512_setzero_ps (),
2862 (__mmask16) __U, __R);
2863}
2864
2865extern __inline __m512d
2866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2868{
2869 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 (__v8df)
2872 _mm512_undefined_pd (),
2873 (__mmask8) -1, __R);
2874}
2875
2876extern __inline __m512d
2877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2878_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2879 __m512d __B, const int __R)
2880{
2881 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2882 (__v8df) __B,
2883 (__v8df) __W,
2884 (__mmask8) __U, __R);
2885}
2886
2887extern __inline __m512d
2888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2889_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2890 const int __R)
2891{
2892 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2893 (__v8df) __B,
2894 (__v8df)
2895 _mm512_setzero_pd (),
2896 (__mmask8) __U, __R);
2897}
2898
2899extern __inline __m512
2900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2902{
2903 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2904 (__v16sf) __B,
2905 (__v16sf)
2906 _mm512_undefined_ps (),
2907 (__mmask16) -1, __R);
2908}
2909
2910extern __inline __m512
2911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2912_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2913 __m512 __B, const int __R)
2914{
2915 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2916 (__v16sf) __B,
2917 (__v16sf) __W,
2918 (__mmask16) __U, __R);
2919}
2920
2921extern __inline __m512
2922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2924{
2925 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2926 (__v16sf) __B,
2927 (__v16sf)
2928 _mm512_setzero_ps (),
2929 (__mmask16) __U, __R);
2930}
2931#else
2932#define _mm512_max_round_pd(A, B, R) \
2933 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2934
2935#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2936 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2937
2938#define _mm512_maskz_max_round_pd(U, A, B, R) \
2939 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2940
2941#define _mm512_max_round_ps(A, B, R) \
2942 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2943
2944#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2945 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2946
2947#define _mm512_maskz_max_round_ps(U, A, B, R) \
2948 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2949
2950#define _mm512_min_round_pd(A, B, R) \
2951 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2952
2953#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2954 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2955
2956#define _mm512_maskz_min_round_pd(U, A, B, R) \
2957 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2958
2959#define _mm512_min_round_ps(A, B, R) \
2960 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2961
2962#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2963 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2964
2965#define _mm512_maskz_min_round_ps(U, A, B, R) \
2966 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2967#endif
2968
2969#ifdef __OPTIMIZE__
2970extern __inline __m512d
2971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2973{
2974 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2975 (__v8df) __B,
2976 (__v8df)
2977 _mm512_undefined_pd (),
2978 (__mmask8) -1, __R);
2979}
2980
2981extern __inline __m512d
2982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2984 __m512d __B, const int __R)
2985{
2986 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __W,
2989 (__mmask8) __U, __R);
2990}
2991
2992extern __inline __m512d
2993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2995 const int __R)
2996{
2997 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df)
3000 _mm512_setzero_pd (),
3001 (__mmask8) __U, __R);
3002}
3003
3004extern __inline __m512
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3007{
3008 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3009 (__v16sf) __B,
3010 (__v16sf)
3011 _mm512_undefined_ps (),
3012 (__mmask16) -1, __R);
3013}
3014
3015extern __inline __m512
3016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3018 __m512 __B, const int __R)
3019{
3020 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __W,
3023 (__mmask16) __U, __R);
3024}
3025
3026extern __inline __m512
3027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3029 const int __R)
3030{
3031 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3032 (__v16sf) __B,
3033 (__v16sf)
3034 _mm512_setzero_ps (),
3035 (__mmask16) __U, __R);
3036}
3037
3038extern __inline __m128d
3039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3041{
3042 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
3043 (__v2df) __B,
3044 __R);
3045}
3046
3047extern __inline __m128
3048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3050{
3051 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
3052 (__v4sf) __B,
3053 __R);
3054}
3055#else
3056#define _mm512_scalef_round_pd(A, B, C) \
3057 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3058
3059#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3060 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3061
3062#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3063 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3064
3065#define _mm512_scalef_round_ps(A, B, C) \
3066 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3067
3068#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3069 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3070
3071#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3072 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3073
3074#define _mm_scalef_round_sd(A, B, C) \
3075 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
3076
3077#define _mm_scalef_round_ss(A, B, C) \
3078 (__m128)__builtin_ia32_scalefss_round(A, B, C)
3079#endif
3080
3081#ifdef __OPTIMIZE__
3082extern __inline __m512d
3083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3084_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3085{
3086 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3087 (__v8df) __B,
3088 (__v8df) __C,
3089 (__mmask8) -1, __R);
3090}
3091
3092extern __inline __m512d
3093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3095 __m512d __C, const int __R)
3096{
3097 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3098 (__v8df) __B,
3099 (__v8df) __C,
3100 (__mmask8) __U, __R);
3101}
3102
3103extern __inline __m512d
3104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3106 __mmask8 __U, const int __R)
3107{
3108 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3109 (__v8df) __B,
3110 (__v8df) __C,
3111 (__mmask8) __U, __R);
3112}
3113
3114extern __inline __m512d
3115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3116_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3117 __m512d __C, const int __R)
3118{
3119 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3120 (__v8df) __B,
3121 (__v8df) __C,
3122 (__mmask8) __U, __R);
3123}
3124
3125extern __inline __m512
3126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3128{
3129 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3130 (__v16sf) __B,
3131 (__v16sf) __C,
3132 (__mmask16) -1, __R);
3133}
3134
3135extern __inline __m512
3136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3138 __m512 __C, const int __R)
3139{
3140 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3141 (__v16sf) __B,
3142 (__v16sf) __C,
3143 (__mmask16) __U, __R);
3144}
3145
3146extern __inline __m512
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3149 __mmask16 __U, const int __R)
3150{
3151 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3152 (__v16sf) __B,
3153 (__v16sf) __C,
3154 (__mmask16) __U, __R);
3155}
3156
3157extern __inline __m512
3158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3160 __m512 __C, const int __R)
3161{
3162 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3163 (__v16sf) __B,
3164 (__v16sf) __C,
3165 (__mmask16) __U, __R);
3166}
3167
3168extern __inline __m512d
3169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3171{
3172 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3173 (__v8df) __B,
3174 -(__v8df) __C,
3175 (__mmask8) -1, __R);
3176}
3177
3178extern __inline __m512d
3179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3181 __m512d __C, const int __R)
3182{
3183 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3184 (__v8df) __B,
3185 -(__v8df) __C,
3186 (__mmask8) __U, __R);
3187}
3188
3189extern __inline __m512d
3190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3191_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3192 __mmask8 __U, const int __R)
3193{
3194 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3195 (__v8df) __B,
3196 (__v8df) __C,
3197 (__mmask8) __U, __R);
3198}
3199
3200extern __inline __m512d
3201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3202_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3203 __m512d __C, const int __R)
3204{
3205 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3206 (__v8df) __B,
3207 -(__v8df) __C,
3208 (__mmask8) __U, __R);
3209}
3210
3211extern __inline __m512
3212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3214{
3215 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3216 (__v16sf) __B,
3217 -(__v16sf) __C,
3218 (__mmask16) -1, __R);
3219}
3220
3221extern __inline __m512
3222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3224 __m512 __C, const int __R)
3225{
3226 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3227 (__v16sf) __B,
3228 -(__v16sf) __C,
3229 (__mmask16) __U, __R);
3230}
3231
3232extern __inline __m512
3233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3235 __mmask16 __U, const int __R)
3236{
3237 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3238 (__v16sf) __B,
3239 (__v16sf) __C,
3240 (__mmask16) __U, __R);
3241}
3242
3243extern __inline __m512
3244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3246 __m512 __C, const int __R)
3247{
3248 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3249 (__v16sf) __B,
3250 -(__v16sf) __C,
3251 (__mmask16) __U, __R);
3252}
3253
3254extern __inline __m512d
3255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3257{
3258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3259 (__v8df) __B,
3260 (__v8df) __C,
3261 (__mmask8) -1, __R);
3262}
3263
3264extern __inline __m512d
3265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3267 __m512d __C, const int __R)
3268{
3269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3270 (__v8df) __B,
3271 (__v8df) __C,
3272 (__mmask8) __U, __R);
3273}
3274
3275extern __inline __m512d
3276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3278 __mmask8 __U, const int __R)
3279{
3280 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3281 (__v8df) __B,
3282 (__v8df) __C,
3283 (__mmask8) __U, __R);
3284}
3285
3286extern __inline __m512d
3287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3288_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3289 __m512d __C, const int __R)
3290{
3291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3292 (__v8df) __B,
3293 (__v8df) __C,
3294 (__mmask8) __U, __R);
3295}
3296
3297extern __inline __m512
3298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3299_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3300{
3301 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3302 (__v16sf) __B,
3303 (__v16sf) __C,
3304 (__mmask16) -1, __R);
3305}
3306
3307extern __inline __m512
3308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3309_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3310 __m512 __C, const int __R)
3311{
3312 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3313 (__v16sf) __B,
3314 (__v16sf) __C,
3315 (__mmask16) __U, __R);
3316}
3317
3318extern __inline __m512
3319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3321 __mmask16 __U, const int __R)
3322{
3323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3324 (__v16sf) __B,
3325 (__v16sf) __C,
3326 (__mmask16) __U, __R);
3327}
3328
3329extern __inline __m512
3330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3332 __m512 __C, const int __R)
3333{
3334 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3335 (__v16sf) __B,
3336 (__v16sf) __C,
3337 (__mmask16) __U, __R);
3338}
3339
3340extern __inline __m512d
3341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3343{
3344 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 -(__v8df) __C,
3347 (__mmask8) -1, __R);
3348}
3349
3350extern __inline __m512d
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3353 __m512d __C, const int __R)
3354{
3355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3356 (__v8df) __B,
3357 -(__v8df) __C,
3358 (__mmask8) __U, __R);
3359}
3360
3361extern __inline __m512d
3362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3364 __mmask8 __U, const int __R)
3365{
3366 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3367 (__v8df) __B,
3368 (__v8df) __C,
3369 (__mmask8) __U, __R);
3370}
3371
3372extern __inline __m512d
3373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3375 __m512d __C, const int __R)
3376{
3377 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3378 (__v8df) __B,
3379 -(__v8df) __C,
3380 (__mmask8) __U, __R);
3381}
3382
3383extern __inline __m512
3384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3385_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3386{
3387 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3388 (__v16sf) __B,
3389 -(__v16sf) __C,
3390 (__mmask16) -1, __R);
3391}
3392
3393extern __inline __m512
3394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3396 __m512 __C, const int __R)
3397{
3398 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3399 (__v16sf) __B,
3400 -(__v16sf) __C,
3401 (__mmask16) __U, __R);
3402}
3403
3404extern __inline __m512
3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3407 __mmask16 __U, const int __R)
3408{
3409 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3410 (__v16sf) __B,
3411 (__v16sf) __C,
3412 (__mmask16) __U, __R);
3413}
3414
3415extern __inline __m512
3416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3418 __m512 __C, const int __R)
3419{
3420 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3421 (__v16sf) __B,
3422 -(__v16sf) __C,
3423 (__mmask16) __U, __R);
3424}
3425
3426extern __inline __m512d
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3429{
3430 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3431 (__v8df) __B,
3432 (__v8df) __C,
3433 (__mmask8) -1, __R);
3434}
3435
3436extern __inline __m512d
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3439 __m512d __C, const int __R)
3440{
3441 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3442 (__v8df) __B,
3443 (__v8df) __C,
3444 (__mmask8) __U, __R);
3445}
3446
3447extern __inline __m512d
3448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3450 __mmask8 __U, const int __R)
3451{
3452 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3453 (__v8df) __B,
3454 (__v8df) __C,
3455 (__mmask8) __U, __R);
3456}
3457
3458extern __inline __m512d
3459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3461 __m512d __C, const int __R)
3462{
3463 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3464 (__v8df) __B,
3465 (__v8df) __C,
3466 (__mmask8) __U, __R);
3467}
3468
3469extern __inline __m512
3470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3471_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3472{
3473 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3474 (__v16sf) __B,
3475 (__v16sf) __C,
3476 (__mmask16) -1, __R);
3477}
3478
3479extern __inline __m512
3480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3482 __m512 __C, const int __R)
3483{
3484 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3485 (__v16sf) __B,
3486 (__v16sf) __C,
3487 (__mmask16) __U, __R);
3488}
3489
3490extern __inline __m512
3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3493 __mmask16 __U, const int __R)
3494{
3495 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3496 (__v16sf) __B,
3497 (__v16sf) __C,
3498 (__mmask16) __U, __R);
3499}
3500
3501extern __inline __m512
3502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3503_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3504 __m512 __C, const int __R)
3505{
3506 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3507 (__v16sf) __B,
3508 (__v16sf) __C,
3509 (__mmask16) __U, __R);
3510}
3511
3512extern __inline __m512d
3513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3514_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3515{
3516 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3517 (__v8df) __B,
3518 -(__v8df) __C,
3519 (__mmask8) -1, __R);
3520}
3521
3522extern __inline __m512d
3523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3524_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3525 __m512d __C, const int __R)
3526{
3527 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3528 (__v8df) __B,
3529 (__v8df) __C,
3530 (__mmask8) __U, __R);
3531}
3532
3533extern __inline __m512d
3534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3536 __mmask8 __U, const int __R)
3537{
3538 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3539 (__v8df) __B,
3540 (__v8df) __C,
3541 (__mmask8) __U, __R);
3542}
3543
3544extern __inline __m512d
3545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3547 __m512d __C, const int __R)
3548{
3549 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3550 (__v8df) __B,
3551 -(__v8df) __C,
3552 (__mmask8) __U, __R);
3553}
3554
3555extern __inline __m512
3556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3558{
3559 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3560 (__v16sf) __B,
3561 -(__v16sf) __C,
3562 (__mmask16) -1, __R);
3563}
3564
3565extern __inline __m512
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3568 __m512 __C, const int __R)
3569{
3570 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3571 (__v16sf) __B,
3572 (__v16sf) __C,
3573 (__mmask16) __U, __R);
3574}
3575
3576extern __inline __m512
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3579 __mmask16 __U, const int __R)
3580{
3581 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3582 (__v16sf) __B,
3583 (__v16sf) __C,
3584 (__mmask16) __U, __R);
3585}
3586
3587extern __inline __m512
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3590 __m512 __C, const int __R)
3591{
3592 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3593 (__v16sf) __B,
3594 -(__v16sf) __C,
3595 (__mmask16) __U, __R);
3596}
3597#else
3598#define _mm512_fmadd_round_pd(A, B, C, R) \
3599 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3600
3601#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3602 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3603
3604#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3605 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3606
3607#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3608 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3609
3610#define _mm512_fmadd_round_ps(A, B, C, R) \
3611 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3612
3613#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3614 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3615
3616#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3617 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3618
3619#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3620 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3621
3622#define _mm512_fmsub_round_pd(A, B, C, R) \
3623 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3624
3625#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3626 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3627
3628#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3629 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3630
3631#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3632 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3633
3634#define _mm512_fmsub_round_ps(A, B, C, R) \
3635 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3636
3637#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3638 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3639
3640#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3641 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3642
3643#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3644 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3645
3646#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3647 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3648
3649#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3650 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3651
3652#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3653 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3654
3655#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3656 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3657
3658#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3659 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3660
3661#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3662 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3663
3664#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3665 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3666
3667#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3668 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3669
3670#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3671 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3672
3673#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3674 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3675
3676#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3677 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3678
3679#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3680 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3681
3682#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3683 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3684
3685#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3686 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3687
3688#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3689 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3690
3691#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3692 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3693
3694#define _mm512_fnmadd_round_pd(A, B, C, R) \
3695 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3696
3697#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3698 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3699
3700#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3701 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3702
3703#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3704 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3705
3706#define _mm512_fnmadd_round_ps(A, B, C, R) \
3707 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3708
3709#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3710 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3711
3712#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3713 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3714
3715#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3716 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3717
3718#define _mm512_fnmsub_round_pd(A, B, C, R) \
3719 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3720
3721#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3722 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3723
3724#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3725 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3726
3727#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3728 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3729
3730#define _mm512_fnmsub_round_ps(A, B, C, R) \
3731 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3732
3733#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3734 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3735
3736#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3737 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3738
3739#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3740 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3741#endif
3742
3743extern __inline __m512i
3744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3745_mm512_abs_epi64 (__m512i __A)
3746{
3747 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3748 (__v8di)
3749 _mm512_undefined_epi32 (),
3750 (__mmask8) -1);
3751}
3752
3753extern __inline __m512i
3754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3756{
3757 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3758 (__v8di) __W,
3759 (__mmask8) __U);
3760}
3761
3762extern __inline __m512i
3763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3765{
3766 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3767 (__v8di)
3768 _mm512_setzero_si512 (),
3769 (__mmask8) __U);
3770}
3771
3772extern __inline __m512i
3773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3774_mm512_abs_epi32 (__m512i __A)
3775{
3776 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3777 (__v16si)
3778 _mm512_undefined_epi32 (),
3779 (__mmask16) -1);
3780}
3781
3782extern __inline __m512i
3783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3785{
3786 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3787 (__v16si) __W,
3788 (__mmask16) __U);
3789}
3790
3791extern __inline __m512i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3794{
3795 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3796 (__v16si)
3797 _mm512_setzero_si512 (),
3798 (__mmask16) __U);
3799}
3800
3801extern __inline __m512
3802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3803_mm512_broadcastss_ps (__m128 __A)
3804{
3805 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3806 (__v16sf)
3807 _mm512_undefined_ps (),
3808 (__mmask16) -1);
3809}
3810
3811extern __inline __m512
3812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3814{
3815 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3816 (__v16sf) __O, __M);
3817}
3818
3819extern __inline __m512
3820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3822{
3823 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3824 (__v16sf)
3825 _mm512_setzero_ps (),
3826 __M);
3827}
3828
3829extern __inline __m512d
3830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3831_mm512_broadcastsd_pd (__m128d __A)
3832{
3833 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3834 (__v8df)
3835 _mm512_undefined_pd (),
3836 (__mmask8) -1);
3837}
3838
3839extern __inline __m512d
3840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3842{
3843 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3844 (__v8df) __O, __M);
3845}
3846
3847extern __inline __m512d
3848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3849_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3850{
3851 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 __M);
3855}
3856
3857extern __inline __m512i
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm512_broadcastd_epi32 (__m128i __A)
3860{
3861 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3862 (__v16si)
3863 _mm512_undefined_epi32 (),
3864 (__mmask16) -1);
3865}
3866
3867extern __inline __m512i
3868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3870{
3871 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3872 (__v16si) __O, __M);
3873}
3874
3875extern __inline __m512i
3876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3877_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3878{
3879 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3880 (__v16si)
3881 _mm512_setzero_si512 (),
3882 __M);
3883}
3884
3885extern __inline __m512i
3886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887_mm512_set1_epi32 (int __A)
3888{
3889 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3890 (__v16si)
3891 _mm512_undefined_epi32 (),
3892 (__mmask16)(-1));
3893}
3894
3895extern __inline __m512i
3896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3898{
3899 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3900 __M);
3901}
3902
3903extern __inline __m512i
3904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3905_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3906{
3907 return (__m512i)
3908 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 __M);
3911}
3912
3913extern __inline __m512i
3914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915_mm512_broadcastq_epi64 (__m128i __A)
3916{
3917 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3918 (__v8di)
3919 _mm512_undefined_epi32 (),
3920 (__mmask8) -1);
3921}
3922
3923extern __inline __m512i
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3926{
3927 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3928 (__v8di) __O, __M);
3929}
3930
3931extern __inline __m512i
3932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3933_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3934{
3935 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3936 (__v8di)
3937 _mm512_setzero_si512 (),
3938 __M);
3939}
3940
3941extern __inline __m512i
3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943_mm512_set1_epi64 (long long __A)
3944{
3945 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3946 (__v8di)
3947 _mm512_undefined_epi32 (),
3948 (__mmask8)(-1));
3949}
3950
3951extern __inline __m512i
3952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3954{
3955 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3956 __M);
3957}
3958
3959extern __inline __m512i
3960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3961_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3962{
3963 return (__m512i)
3964 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3965 (__v8di) _mm512_setzero_si512 (),
3966 __M);
3967}
3968
3969extern __inline __m512
3970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971_mm512_broadcast_f32x4 (__m128 __A)
3972{
3973 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3974 (__v16sf)
3975 _mm512_undefined_ps (),
3976 (__mmask16) -1);
3977}
3978
3979extern __inline __m512
3980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3981_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3982{
3983 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3984 (__v16sf) __O,
3985 __M);
3986}
3987
3988extern __inline __m512
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3991{
3992 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3993 (__v16sf)
3994 _mm512_setzero_ps (),
3995 __M);
3996}
3997
3998extern __inline __m512i
3999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4000_mm512_broadcast_i32x4 (__m128i __A)
4001{
4002 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4003 (__v16si)
4004 _mm512_undefined_epi32 (),
4005 (__mmask16) -1);
4006}
4007
4008extern __inline __m512i
4009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4011{
4012 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4013 (__v16si) __O,
4014 __M);
4015}
4016
4017extern __inline __m512i
4018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4019_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4020{
4021 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4022 (__v16si)
4023 _mm512_setzero_si512 (),
4024 __M);
4025}
4026
4027extern __inline __m512d
4028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029_mm512_broadcast_f64x4 (__m256d __A)
4030{
4031 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4032 (__v8df)
4033 _mm512_undefined_pd (),
4034 (__mmask8) -1);
4035}
4036
4037extern __inline __m512d
4038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4039_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4040{
4041 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4042 (__v8df) __O,
4043 __M);
4044}
4045
4046extern __inline __m512d
4047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4049{
4050 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4051 (__v8df)
4052 _mm512_setzero_pd (),
4053 __M);
4054}
4055
4056extern __inline __m512i
4057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058_mm512_broadcast_i64x4 (__m256i __A)
4059{
4060 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4061 (__v8di)
4062 _mm512_undefined_epi32 (),
4063 (__mmask8) -1);
4064}
4065
4066extern __inline __m512i
4067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4069{
4070 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4071 (__v8di) __O,
4072 __M);
4073}
4074
4075extern __inline __m512i
4076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4077_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4078{
4079 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4080 (__v8di)
4081 _mm512_setzero_si512 (),
4082 __M);
4083}
4084
4085typedef enum
4086{
4087 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4088 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4089 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4090 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4091 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4092 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4093 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4094 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4095 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4096 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4097 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4098 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4099 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4100 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4101 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4102 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4103 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4104 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4105 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4106 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4107 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4108 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4109 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4110 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4111 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4112 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4113 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4114 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4115 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4116 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4117 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4118 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4119 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4120 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4121 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4122 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4123 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4124 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4125 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4126 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4127 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4128 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4129 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4130 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4131 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4132 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4133 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4134 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4135 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4136 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4137 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4138 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4139 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4140 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4141 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4142 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4143 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4144 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4145 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4146 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4147 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4148 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4149 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4150 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4151 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4152 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4153 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4154 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4155 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4156 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4157 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4158 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4159 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4160 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4161 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4162 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4163 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4164 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4165 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4166 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4167 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4168 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4169 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4170 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4171 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4172 _MM_PERM_DDDD = 0xFF
4173} _MM_PERM_ENUM;
4174
4175#ifdef __OPTIMIZE__
4176extern __inline __m512i
4177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4179{
4180 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4181 __mask,
4182 (__v16si)
4183 _mm512_undefined_epi32 (),
4184 (__mmask16) -1);
4185}
4186
4187extern __inline __m512i
4188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4190 _MM_PERM_ENUM __mask)
4191{
4192 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4193 __mask,
4194 (__v16si) __W,
4195 (__mmask16) __U);
4196}
4197
4198extern __inline __m512i
4199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4201{
4202 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4203 __mask,
4204 (__v16si)
4205 _mm512_setzero_si512 (),
4206 (__mmask16) __U);
4207}
4208
4209extern __inline __m512i
4210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4211_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4212{
4213 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4214 (__v8di) __B, __imm,
4215 (__v8di)
4216 _mm512_undefined_epi32 (),
4217 (__mmask8) -1);
4218}
4219
4220extern __inline __m512i
4221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4222_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4223 __m512i __B, const int __imm)
4224{
4225 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4226 (__v8di) __B, __imm,
4227 (__v8di) __W,
4228 (__mmask8) __U);
4229}
4230
4231extern __inline __m512i
4232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4234 const int __imm)
4235{
4236 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4237 (__v8di) __B, __imm,
4238 (__v8di)
4239 _mm512_setzero_si512 (),
4240 (__mmask8) __U);
4241}
4242
4243extern __inline __m512i
4244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4245_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4246{
4247 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4248 (__v16si) __B,
4249 __imm,
4250 (__v16si)
4251 _mm512_undefined_epi32 (),
4252 (__mmask16) -1);
4253}
4254
4255extern __inline __m512i
4256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4257_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4258 __m512i __B, const int __imm)
4259{
4260 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4261 (__v16si) __B,
4262 __imm,
4263 (__v16si) __W,
4264 (__mmask16) __U);
4265}
4266
4267extern __inline __m512i
4268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4270 const int __imm)
4271{
4272 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4273 (__v16si) __B,
4274 __imm,
4275 (__v16si)
4276 _mm512_setzero_si512 (),
4277 (__mmask16) __U);
4278}
4279
4280extern __inline __m512d
4281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4282_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4283{
4284 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4285 (__v8df) __B, __imm,
4286 (__v8df)
4287 _mm512_undefined_pd (),
4288 (__mmask8) -1);
4289}
4290
4291extern __inline __m512d
4292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4294 __m512d __B, const int __imm)
4295{
4296 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4297 (__v8df) __B, __imm,
4298 (__v8df) __W,
4299 (__mmask8) __U);
4300}
4301
4302extern __inline __m512d
4303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4305 const int __imm)
4306{
4307 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4308 (__v8df) __B, __imm,
4309 (__v8df)
4310 _mm512_setzero_pd (),
4311 (__mmask8) __U);
4312}
4313
4314extern __inline __m512
4315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4317{
4318 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4319 (__v16sf) __B, __imm,
4320 (__v16sf)
4321 _mm512_undefined_ps (),
4322 (__mmask16) -1);
4323}
4324
4325extern __inline __m512
4326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4328 __m512 __B, const int __imm)
4329{
4330 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4331 (__v16sf) __B, __imm,
4332 (__v16sf) __W,
4333 (__mmask16) __U);
4334}
4335
4336extern __inline __m512
4337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4339 const int __imm)
4340{
4341 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4342 (__v16sf) __B, __imm,
4343 (__v16sf)
4344 _mm512_setzero_ps (),
4345 (__mmask16) __U);
4346}
4347
4348#else
4349#define _mm512_shuffle_epi32(X, C) \
4350 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4351 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4352 (__mmask16)-1))
4353
4354#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4355 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4356 (__v16si)(__m512i)(W),\
4357 (__mmask16)(U)))
4358
4359#define _mm512_maskz_shuffle_epi32(U, X, C) \
4360 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4361 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4362 (__mmask16)(U)))
4363
4364#define _mm512_shuffle_i64x2(X, Y, C) \
4365 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4366 (__v8di)(__m512i)(Y), (int)(C),\
4367 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4368 (__mmask8)-1))
4369
4370#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4371 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4372 (__v8di)(__m512i)(Y), (int)(C),\
4373 (__v8di)(__m512i)(W),\
4374 (__mmask8)(U)))
4375
4376#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4377 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4378 (__v8di)(__m512i)(Y), (int)(C),\
4379 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4380 (__mmask8)(U)))
4381
4382#define _mm512_shuffle_i32x4(X, Y, C) \
4383 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4384 (__v16si)(__m512i)(Y), (int)(C),\
4385 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4386 (__mmask16)-1))
4387
4388#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4389 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4390 (__v16si)(__m512i)(Y), (int)(C),\
4391 (__v16si)(__m512i)(W),\
4392 (__mmask16)(U)))
4393
4394#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4395 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4396 (__v16si)(__m512i)(Y), (int)(C),\
4397 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4398 (__mmask16)(U)))
4399
4400#define _mm512_shuffle_f64x2(X, Y, C) \
4401 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4402 (__v8df)(__m512d)(Y), (int)(C),\
4403 (__v8df)(__m512d)_mm512_undefined_pd(),\
4404 (__mmask8)-1))
4405
4406#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4407 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4408 (__v8df)(__m512d)(Y), (int)(C),\
4409 (__v8df)(__m512d)(W),\
4410 (__mmask8)(U)))
4411
4412#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4413 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4414 (__v8df)(__m512d)(Y), (int)(C),\
4415 (__v8df)(__m512d)_mm512_setzero_pd(),\
4416 (__mmask8)(U)))
4417
4418#define _mm512_shuffle_f32x4(X, Y, C) \
4419 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4420 (__v16sf)(__m512)(Y), (int)(C),\
4421 (__v16sf)(__m512)_mm512_undefined_ps(),\
4422 (__mmask16)-1))
4423
4424#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4425 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4426 (__v16sf)(__m512)(Y), (int)(C),\
4427 (__v16sf)(__m512)(W),\
4428 (__mmask16)(U)))
4429
4430#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4431 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4432 (__v16sf)(__m512)(Y), (int)(C),\
4433 (__v16sf)(__m512)_mm512_setzero_ps(),\
4434 (__mmask16)(U)))
4435#endif
4436
4437extern __inline __m512i
4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4440{
4441 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4442 (__v16si) __B,
4443 (__v16si)
4444 _mm512_undefined_epi32 (),
4445 (__mmask16) -1);
4446}
4447
4448extern __inline __m512i
4449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4451{
4452 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4453 (__v16si) __B,
4454 (__v16si) __W,
4455 (__mmask16) __U);
4456}
4457
4458extern __inline __m512i
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4461{
4462 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4463 (__v16si) __B,
4464 (__v16si)
4465 _mm512_setzero_si512 (),
4466 (__mmask16) __U);
4467}
4468
4469extern __inline __m512i
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4472{
4473 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4474 (__v16si) __B,
4475 (__v16si)
4476 _mm512_undefined_epi32 (),
4477 (__mmask16) -1);
4478}
4479
4480extern __inline __m512i
4481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4483{
4484 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4485 (__v16si) __B,
4486 (__v16si) __W,
4487 (__mmask16) __U);
4488}
4489
4490extern __inline __m512i
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4493{
4494 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4495 (__v16si) __B,
4496 (__v16si)
4497 _mm512_setzero_si512 (),
4498 (__mmask16) __U);
4499}
4500
4501extern __inline __m512i
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4504{
4505 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4506 (__v8di) __B,
4507 (__v8di)
4508 _mm512_undefined_epi32 (),
4509 (__mmask8) -1);
4510}
4511
4512extern __inline __m512i
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4515{
4516 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4517 (__v8di) __B,
4518 (__v8di) __W,
4519 (__mmask8) __U);
4520}
4521
4522extern __inline __m512i
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4525{
4526 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di)
4529 _mm512_setzero_si512 (),
4530 (__mmask8) __U);
4531}
4532
4533extern __inline __m512i
4534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4535_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4536{
4537 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4538 (__v8di) __B,
4539 (__v8di)
4540 _mm512_undefined_epi32 (),
4541 (__mmask8) -1);
4542}
4543
4544extern __inline __m512i
4545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4546_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4547{
4548 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4549 (__v8di) __B,
4550 (__v8di) __W,
4551 (__mmask8) __U);
4552}
4553
4554extern __inline __m512i
4555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4557{
4558 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4559 (__v8di) __B,
4560 (__v8di)
4561 _mm512_setzero_si512 (),
4562 (__mmask8) __U);
4563}
4564
4565#ifdef __OPTIMIZE__
4566extern __inline __m256i
4567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4568_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4569{
4570 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4571 (__v8si)
4572 _mm256_undefined_si256 (),
4573 (__mmask8) -1, __R);
4574}
4575
4576extern __inline __m256i
4577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4579 const int __R)
4580{
4581 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4582 (__v8si) __W,
4583 (__mmask8) __U, __R);
4584}
4585
4586extern __inline __m256i
4587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4589{
4590 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4591 (__v8si)
4592 _mm256_setzero_si256 (),
4593 (__mmask8) __U, __R);
4594}
4595
4596extern __inline __m256i
4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4599{
4600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4601 (__v8si)
4602 _mm256_undefined_si256 (),
4603 (__mmask8) -1, __R);
4604}
4605
4606extern __inline __m256i
4607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4609 const int __R)
4610{
4611 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4612 (__v8si) __W,
4613 (__mmask8) __U, __R);
4614}
4615
4616extern __inline __m256i
4617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4619{
4620 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4621 (__v8si)
4622 _mm256_setzero_si256 (),
4623 (__mmask8) __U, __R);
4624}
4625#else
4626#define _mm512_cvtt_roundpd_epi32(A, B) \
4627 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4628
4629#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4630 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4631
4632#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4633 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4634
4635#define _mm512_cvtt_roundpd_epu32(A, B) \
4636 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4637
4638#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4639 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4640
4641#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4642 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4643#endif
4644
4645#ifdef __OPTIMIZE__
4646extern __inline __m256i
4647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4649{
4650 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4651 (__v8si)
4652 _mm256_undefined_si256 (),
4653 (__mmask8) -1, __R);
4654}
4655
4656extern __inline __m256i
4657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4659 const int __R)
4660{
4661 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4662 (__v8si) __W,
4663 (__mmask8) __U, __R);
4664}
4665
4666extern __inline __m256i
4667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4669{
4670 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4671 (__v8si)
4672 _mm256_setzero_si256 (),
4673 (__mmask8) __U, __R);
4674}
4675
4676extern __inline __m256i
4677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4679{
4680 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4681 (__v8si)
4682 _mm256_undefined_si256 (),
4683 (__mmask8) -1, __R);
4684}
4685
4686extern __inline __m256i
4687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4689 const int __R)
4690{
4691 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4692 (__v8si) __W,
4693 (__mmask8) __U, __R);
4694}
4695
4696extern __inline __m256i
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4699{
4700 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4701 (__v8si)
4702 _mm256_setzero_si256 (),
4703 (__mmask8) __U, __R);
4704}
4705#else
4706#define _mm512_cvt_roundpd_epi32(A, B) \
4707 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4708
4709#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4710 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4711
4712#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4714
4715#define _mm512_cvt_roundpd_epu32(A, B) \
4716 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4717
4718#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4719 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4720
4721#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4722 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4723#endif
4724
4725#ifdef __OPTIMIZE__
4726extern __inline __m512i
4727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4729{
4730 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4731 (__v16si)
4732 _mm512_undefined_epi32 (),
4733 (__mmask16) -1, __R);
4734}
4735
4736extern __inline __m512i
4737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4738_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4739 const int __R)
4740{
4741 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4742 (__v16si) __W,
4743 (__mmask16) __U, __R);
4744}
4745
4746extern __inline __m512i
4747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4749{
4750 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4751 (__v16si)
4752 _mm512_setzero_si512 (),
4753 (__mmask16) __U, __R);
4754}
4755
4756extern __inline __m512i
4757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4758_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4759{
4760 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4761 (__v16si)
4762 _mm512_undefined_epi32 (),
4763 (__mmask16) -1, __R);
4764}
4765
4766extern __inline __m512i
4767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4769 const int __R)
4770{
4771 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4772 (__v16si) __W,
4773 (__mmask16) __U, __R);
4774}
4775
4776extern __inline __m512i
4777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4779{
4780 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4781 (__v16si)
4782 _mm512_setzero_si512 (),
4783 (__mmask16) __U, __R);
4784}
4785#else
4786#define _mm512_cvtt_roundps_epi32(A, B) \
4787 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4788
4789#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4790 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4791
4792#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4793 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4794
4795#define _mm512_cvtt_roundps_epu32(A, B) \
4796 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4797
4798#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4799 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4800
4801#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4802 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4803#endif
4804
4805#ifdef __OPTIMIZE__
4806extern __inline __m512i
4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4809{
4810 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4811 (__v16si)
4812 _mm512_undefined_epi32 (),
4813 (__mmask16) -1, __R);
4814}
4815
4816extern __inline __m512i
4817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4819 const int __R)
4820{
4821 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4822 (__v16si) __W,
4823 (__mmask16) __U, __R);
4824}
4825
4826extern __inline __m512i
4827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4828_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4829{
4830 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4831 (__v16si)
4832 _mm512_setzero_si512 (),
4833 (__mmask16) __U, __R);
4834}
4835
4836extern __inline __m512i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4839{
4840 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4841 (__v16si)
4842 _mm512_undefined_epi32 (),
4843 (__mmask16) -1, __R);
4844}
4845
4846extern __inline __m512i
4847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4849 const int __R)
4850{
4851 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4852 (__v16si) __W,
4853 (__mmask16) __U, __R);
4854}
4855
4856extern __inline __m512i
4857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4859{
4860 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4861 (__v16si)
4862 _mm512_setzero_si512 (),
4863 (__mmask16) __U, __R);
4864}
4865#else
4866#define _mm512_cvt_roundps_epi32(A, B) \
4867 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4868
4869#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4870 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4871
4872#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4873 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4874
4875#define _mm512_cvt_roundps_epu32(A, B) \
4876 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4877
4878#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4879 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4880
4881#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4882 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4883#endif
4884
4885extern __inline __m128d
4886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4887_mm_cvtu32_sd (__m128d __A, unsigned __B)
4888{
4889 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4890}
4891
4892#ifdef __x86_64__
4893#ifdef __OPTIMIZE__
4894extern __inline __m128d
4895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4897{
4898 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4899}
4900
4901extern __inline __m128d
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4904{
4905 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4906}
4907
4908extern __inline __m128d
4909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4910_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4911{
4912 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4913}
4914#else
4915#define _mm_cvt_roundu64_sd(A, B, C) \
4916 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4917
4918#define _mm_cvt_roundi64_sd(A, B, C) \
4919 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4920
4921#define _mm_cvt_roundsi64_sd(A, B, C) \
4922 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4923#endif
4924
4925#endif
4926
4927#ifdef __OPTIMIZE__
4928extern __inline __m128
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4931{
4932 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4933}
4934
4935extern __inline __m128
4936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4937_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4938{
4939 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4940}
4941
4942extern __inline __m128
4943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4945{
4946 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4947}
4948#else
4949#define _mm_cvt_roundu32_ss(A, B, C) \
4950 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4951
4952#define _mm_cvt_roundi32_ss(A, B, C) \
4953 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4954
4955#define _mm_cvt_roundsi32_ss(A, B, C) \
4956 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4957#endif
4958
4959#ifdef __x86_64__
4960#ifdef __OPTIMIZE__
4961extern __inline __m128
4962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4964{
4965 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4966}
4967
4968extern __inline __m128
4969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4971{
4972 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4973}
4974
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4978{
4979 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4980}
4981#else
4982#define _mm_cvt_roundu64_ss(A, B, C) \
4983 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4984
4985#define _mm_cvt_roundi64_ss(A, B, C) \
4986 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4987
4988#define _mm_cvt_roundsi64_ss(A, B, C) \
4989 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4990#endif
4991
4992#endif
4993
4994extern __inline __m128i
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm512_cvtepi32_epi8 (__m512i __A)
4997{
4998 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4999 (__v16qi)
5000 _mm_undefined_si128 (),
5001 (__mmask16) -1);
5002}
5003
5004extern __inline void
5005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5007{
5008 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5009}
5010
5011extern __inline __m128i
5012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5014{
5015 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5016 (__v16qi) __O, __M);
5017}
5018
5019extern __inline __m128i
5020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5022{
5023 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5024 (__v16qi)
5025 _mm_setzero_si128 (),
5026 __M);
5027}
5028
5029extern __inline __m128i
5030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5031_mm512_cvtsepi32_epi8 (__m512i __A)
5032{
5033 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5034 (__v16qi)
5035 _mm_undefined_si128 (),
5036 (__mmask16) -1);
5037}
5038
5039extern __inline void
5040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5042{
5043 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5044}
5045
5046extern __inline __m128i
5047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5049{
5050 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5051 (__v16qi) __O, __M);
5052}
5053
5054extern __inline __m128i
5055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5057{
5058 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5059 (__v16qi)
5060 _mm_setzero_si128 (),
5061 __M);
5062}
5063
5064extern __inline __m128i
5065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5066_mm512_cvtusepi32_epi8 (__m512i __A)
5067{
5068 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5069 (__v16qi)
5070 _mm_undefined_si128 (),
5071 (__mmask16) -1);
5072}
5073
5074extern __inline void
5075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5077{
5078 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5079}
5080
5081extern __inline __m128i
5082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5084{
5085 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5086 (__v16qi) __O,
5087 __M);
5088}
5089
5090extern __inline __m128i
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5093{
5094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5095 (__v16qi)
5096 _mm_setzero_si128 (),
5097 __M);
5098}
5099
5100extern __inline __m256i
5101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102_mm512_cvtepi32_epi16 (__m512i __A)
5103{
5104 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5105 (__v16hi)
5106 _mm256_undefined_si256 (),
5107 (__mmask16) -1);
5108}
5109
5110extern __inline void
5111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5112_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5113{
5114 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5115}
5116
5117extern __inline __m256i
5118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5119_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5120{
5121 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5122 (__v16hi) __O, __M);
5123}
5124
5125extern __inline __m256i
5126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5127_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5128{
5129 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5130 (__v16hi)
5131 _mm256_setzero_si256 (),
5132 __M);
5133}
5134
5135extern __inline __m256i
5136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5137_mm512_cvtsepi32_epi16 (__m512i __A)
5138{
5139 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5140 (__v16hi)
5141 _mm256_undefined_si256 (),
5142 (__mmask16) -1);
5143}
5144
5145extern __inline void
5146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5147_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5148{
5149 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5150}
5151
5152extern __inline __m256i
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5155{
5156 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5157 (