STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
zmmintrin.h
Go to the documentation of this file.
1 /***
2 * zmminitrin.h - Meta Header file for Intel(R) Architecture intrinsic functions.
3 *
4 * Copyright (C) 2007-2016 Intel Corporation. All rights reserved.
5 *
6 * The information and source code contained herein is the exclusive
7 * property of Intel Corporation and may not be disclosed, examined
8 * or reproduced in whole or in part without explicit written authorization
9 * from the company.
10 *
11 *
12 *******************************************************************************/
13 
14 #ifndef _ZMMINTRIN_H_INCLUDED
15 #define _ZMMINTRIN_H_INCLUDED
16 
17 #ifndef _INCLUDED_IMM
18 #error "Header should only be included from <immintrin.h>."
19 #endif
20 
21 //
22 // Definitions and declarations for use with 512-bit compiler intrinsics.
23 //
24 
25 //
26 // A word about intrinsic function naming conventions. Most 512-bit
27 // vector instructions have names such as v<operation><type>. For
28 // example "vaddps" is an addition operation (add) on packed single-
29 // precision (ps) values. The corresponding intrinsic is usually
30 // (not always) named _mm512_<operation>_<type>. For example, the
31 // "_mm512_add_ps" function generates VADDPS. The corresponding
32 // masked flavor adds "_mask" to the name, e.g. "_mm512_mask_add_ps".
33 //
34 // The types include:
35 //
36 // ps -- packed single precision
37 // pd -- packed double precision
38 // epi32 -- packed 32-bit integers
39 // epu32 -- packed 32-bit unsigned integers
40 // epi64 -- packed 64-bit integers
41 //
42 
43 typedef unsigned char __mmask8;
44 typedef unsigned short __mmask16;
45 typedef unsigned int __mmask32;
46 typedef unsigned __int64 __mmask64;
47 
48 typedef union __declspec(intrin_type) /* __declspec(align(64)) */ __m512 {
49  float m512_f32[16];
51 
52 typedef struct __declspec(intrin_type) /* __declspec(align(64)) */ __m512d {
53  double m512d_f64[8];
55 
56 typedef union __declspec(intrin_type) /* __declspec(align(64)) */ __m512i {
57  __int8 m512i_i8[64];
58  __int16 m512i_i16[32];
59  __int32 m512i_i32[16];
60  __int64 m512i_i64[8];
61  unsigned __int8 m512i_u8[64];
62  unsigned __int16 m512i_u16[32];
63  unsigned __int32 m512i_u32[16];
64  unsigned __int64 m512i_u64[8];
66 
67 
68 #ifdef __cplusplus
69 extern "C" {
70 // Intrinsic functions use C name-mangling.
71 #endif /* __cplusplus */
72 
73 /* Conversion from one type to another, no change in value. */
74 extern __m256 __cdecl _mm512_castps512_ps256(__m512);
75 extern __m512 __cdecl _mm512_castpd_ps(__m512d);
76 extern __m512 __cdecl _mm512_castps256_ps512(__m256);
77 extern __m512 __cdecl _mm512_castsi512_ps(__m512i);
78 extern __m512 __cdecl _mm512_castps128_ps512(__m128);
79 
80 extern __m256d __cdecl _mm512_castpd512_pd256(__m512d);
81 extern __m512d __cdecl _mm512_castpd256_pd512(__m256d);
82 extern __m512d __cdecl _mm512_castps_pd(__m512);
83 extern __m512d __cdecl _mm512_castsi512_pd(__m512i);
84 extern __m512d __cdecl _mm512_castpd128_pd512(__m128d);
85 
86 extern __m256i __cdecl _mm512_castsi512_si256(__m512i);
87 extern __m512i __cdecl _mm512_castpd_si512(__m512d);
88 extern __m512i __cdecl _mm512_castps_si512(__m512);
89 extern __m512i __cdecl _mm512_castsi256_si512(__m256i);
90 
91 /* Constant for special read-only mask register 'k0'. */
92 #define _MM_K0_REG (0xffff)
93 
94 /*
95  * Constants for rounding mode.
96  * These names beginnig with "_MM_ROUND" are deprecated.
97  * Use the names beginning with "_MM_FROUND" going forward.
98  */
99 typedef enum {
100  _MM_ROUND_MODE_NEAREST, /* round to nearest (even) */
101  _MM_ROUND_MODE_DOWN, /* round toward negative infinity */
102  _MM_ROUND_MODE_UP, /* round toward positive infinity */
103  _MM_ROUND_MODE_TOWARD_ZERO, /* round toward zero */
104  _MM_ROUND_MODE_DEFAULT /* round mode from MXCSR */
106 
107 
108 /*
109 * Intel(R) AVX-512 intrinsic functions
110 */
111 extern __m512 __cdecl _mm512_setzero_ps(void);
112 extern __m512d __cdecl _mm512_setzero_pd(void);
113 
114 extern __m512 __cdecl _mm512_set_ps(float /* e15 */, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float /* e0 */);
115 extern __m512d __cdecl _mm512_set_pd(double /* e7 */, double, double, double, double, double, double, double /* e0 */);
116 
117 extern __m512 __cdecl _mm512_set1_ps(float);
118 extern __m512d __cdecl _mm512_set1_pd(double);
119 
120 extern __m512 __cdecl _mm512_maskz_load_ps(__mmask16, void const*);
121 extern __m512d __cdecl _mm512_maskz_load_pd(__mmask8, void const*);
122 extern __m512 __cdecl _mm512_mask_load_ps(__m512, __mmask16, void const*);
123 extern __m512d __cdecl _mm512_mask_load_pd(__m512d, __mmask8, void const*);
124 extern __m512 __cdecl _mm512_maskz_loadu_ps(__mmask16, void const*);
125 extern __m512d __cdecl _mm512_maskz_loadu_pd(__mmask8, void const*);
126 extern __m512 __cdecl _mm512_mask_loadu_ps(__m512, __mmask16, void const*);
127 extern __m512d __cdecl _mm512_mask_loadu_pd(__m512d, __mmask8, void const*);
128 
129 extern void __cdecl _mm512_storeu_ps(void*, __m512);
130 extern void __cdecl _mm512_storeu_pd(void*, __m512d);
131 extern void __cdecl _mm512_mask_store_ps(void*, __mmask16, __m512);
132 extern void __cdecl _mm512_mask_store_pd(void*, __mmask8, __m512d);
133 extern void __cdecl _mm512_mask_storeu_ps(void*, __mmask16, __m512);
134 extern void __cdecl _mm512_mask_storeu_pd(void*, __mmask8, __m512d);
135 
136 extern __m512 __cdecl _mm512_maskz_add_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
137 extern __m512d __cdecl _mm512_maskz_add_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
138 extern __m512 __cdecl _mm512_mask_add_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
139 extern __m512d __cdecl _mm512_mask_add_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
140 
141 extern __m512 __cdecl _mm512_maskz_sub_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
142 extern __m512d __cdecl _mm512_maskz_sub_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
143 extern __m512 __cdecl _mm512_mask_sub_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
144 extern __m512d __cdecl _mm512_mask_sub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
145 
146 extern __m512 __cdecl _mm512_maskz_mul_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
147 extern __m512d __cdecl _mm512_maskz_mul_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
148 extern __m512 __cdecl _mm512_mask_mul_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
149 extern __m512d __cdecl _mm512_mask_mul_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
150 
151 extern __m512d __cdecl _mm512_maskz_div_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
152 extern __m512 __cdecl _mm512_maskz_div_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
153 extern __m512 __cdecl _mm512_mask_div_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
154 extern __m512d __cdecl _mm512_mask_div_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
155 
156 extern __m512 __cdecl _mm512_mask_fmadd_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
157 extern __m512 __cdecl _mm512_mask3_fmadd_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
158 extern __m512 __cdecl _mm512_maskz_fmadd_round_ps(__mmask16, __m512, __m512, __m512, const int);
159 extern __m512d __cdecl _mm512_mask_fmadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
160 extern __m512d __cdecl _mm512_mask3_fmadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
161 extern __m512d __cdecl _mm512_maskz_fmadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int);
162 
163 extern __m512 __cdecl _mm512_mask_fmsub_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
164 extern __m512 __cdecl _mm512_mask3_fmsub_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
165 extern __m512 __cdecl _mm512_maskz_fmsub_round_ps(__mmask16, __m512, __m512, __m512, const int /* rounding */);
166 extern __m512d __cdecl _mm512_mask_fmsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
167 extern __m512d __cdecl _mm512_mask3_fmsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
168 extern __m512d __cdecl _mm512_maskz_fmsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int /* rounding */);
169 
170 extern __m512 __cdecl _mm512_mask_fmaddsub_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
171 extern __m512 __cdecl _mm512_mask3_fmaddsub_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
172 extern __m512 __cdecl _mm512_maskz_fmaddsub_round_ps(__mmask16, __m512, __m512, __m512, const int /* rounding */);
173 extern __m512d __cdecl _mm512_mask_fmaddsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
174 extern __m512d __cdecl _mm512_mask3_fmaddsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
175 extern __m512d __cdecl _mm512_maskz_fmaddsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int /* rounding */);
176 
177 extern __m512 __cdecl _mm512_mask_fmsubadd_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
178 extern __m512 __cdecl _mm512_mask3_fmsubadd_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
179 extern __m512 __cdecl _mm512_maskz_fmsubadd_round_ps(__mmask16, __m512, __m512, __m512, const int /* rounding */);
180 extern __m512d __cdecl _mm512_mask_fmsubadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
181 extern __m512d __cdecl _mm512_mask3_fmsubadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
182 extern __m512d __cdecl _mm512_maskz_fmsubadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int /* rounding */);
183 
184 extern __m512 __cdecl _mm512_mask_fnmadd_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
185 extern __m512 __cdecl _mm512_mask3_fnmadd_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
186 extern __m512 __cdecl _mm512_maskz_fnmadd_round_ps(__mmask16, __m512, __m512, __m512, const int /* rounding */);
187 extern __m512d __cdecl _mm512_mask_fnmadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
188 extern __m512d __cdecl _mm512_mask3_fnmadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
189 extern __m512d __cdecl _mm512_maskz_fnmadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int /* rounding */);
190 
191 extern __m512 __cdecl _mm512_mask_fnmsub_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
192 extern __m512 __cdecl _mm512_mask3_fnmsub_round_ps(__m512, __m512, __m512, __mmask16, const int /* rounding */);
193 extern __m512 __cdecl _mm512_maskz_fnmsub_round_ps(__mmask16, __m512, __m512, __m512, const int /* rounding */);
194 extern __m512d __cdecl _mm512_mask_fnmsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
195 extern __m512d __cdecl _mm512_mask3_fnmsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int /* rounding */);
196 extern __m512d __cdecl _mm512_maskz_fnmsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int /* rounding */);
197 
198 extern __m512 __cdecl _mm512_maskz_sqrt_round_ps(__mmask16, __m512, const int /* rounding */);
199 extern __m512d __cdecl _mm512_maskz_sqrt_round_pd(__mmask8, __m512d, const int /* rounding */);
200 extern __m512 __cdecl _mm512_mask_sqrt_round_ps(__m512, __mmask16, __m512, const int /* rounding */);
201 extern __m512d __cdecl _mm512_mask_sqrt_round_pd(__m512d, __mmask8, __m512d, const int /* rounding */);
202 
203 extern __m512 __cdecl _mm512_maskz_abs_ps(__mmask16, __m512);
207 
208 extern __m512 __cdecl _mm512_maskz_max_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
209 extern __m512d __cdecl _mm512_maskz_max_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
210 extern __m512 __cdecl _mm512_mask_max_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
211 extern __m512d __cdecl _mm512_mask_max_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
212 
213 extern __m512 __cdecl _mm512_maskz_min_round_ps(__mmask16, __m512, __m512, const int /* rounding */);
214 extern __m512d __cdecl _mm512_maskz_min_round_pd(__mmask8, __m512d, __m512d, const int /* rounding */);
215 extern __m512 __cdecl _mm512_mask_min_round_ps(__m512, __mmask16, __m512, __m512, const int /* rounding */);
216 extern __m512d __cdecl _mm512_mask_min_round_pd(__m512d, __mmask8, __m512d, __m512d, const int /* rounding */);
217 
222 
227 
228 extern __m512 __cdecl _mm512_mask_rcp28_round_ps(__m512, __mmask16, __m512, const int);
229 extern __m512 __cdecl _mm512_maskz_rcp28_round_ps(__mmask16, __m512, const int);
230 extern __m512d __cdecl _mm512_mask_rcp28_round_pd(__m512d, __mmask8, __m512d, const int);
231 extern __m512d __cdecl _mm512_maskz_rcp28_round_pd(__mmask8, __m512d, const int);
232 extern __m512 __cdecl _mm512_mask_rsqrt28_round_ps(__m512, __mmask16, __m512, const int);
233 extern __m512 __cdecl _mm512_maskz_rsqrt28_round_ps(__mmask16, __m512, const int);
234 extern __m512d __cdecl _mm512_mask_rsqrt28_round_pd(__m512d, __mmask8, __m512d, const int);
235 extern __m512d __cdecl _mm512_maskz_rsqrt28_round_pd(__mmask8, __m512d, const int);
236 
237 extern __m512d __cdecl _mm512_maskz_cvt_roundps_pd(__mmask8, __m256, const int /* rounding */);
238 extern __m256 __cdecl _mm512_maskz_cvt_roundpd_ps(__mmask8, __m512d, const int /* rounding */);
239 extern __m512d __cdecl _mm512_mask_cvt_roundps_pd(__m512d, __mmask8, __m256, const int /* rounding */);
240 extern __m256 __cdecl _mm512_mask_cvt_roundpd_ps(__m256, __mmask8, __m512d, const int /* rounding */);
241 
242 extern __mmask16 __cdecl _mm512_mask_cmp_round_ps_mask(__mmask16, __m512, __m512, const int, const int /* rounding */);
243 extern __mmask8 __cdecl _mm512_mask_cmp_round_pd_mask(__mmask8, __m512d, __m512d, const int, const int /* rounding */);
244 
259 
260 extern __m128 __cdecl _mm512_mask_extractf32x4_ps(__m128, __mmask8, __m512, const int);
261 extern __m128 __cdecl _mm512_maskz_extractf32x4_ps(__mmask8, __m512, int);
262 extern __m256 __cdecl _mm512_mask_extractf32x8_ps(__m256, __mmask8, __m512, const int);
263 extern __m256 __cdecl _mm512_maskz_extractf32x8_ps(__mmask8, __m512, int);
264 extern __m128d __cdecl _mm512_mask_extractf64x2_pd(__m128d, __mmask8, __m512d, const int);
266 extern __m256d __cdecl _mm512_mask_extractf64x4_pd(__m256d, __mmask8, __m512d, const int);
268 
269 extern __m512 __cdecl _mm512_mask_insertf32x4(__m512, __mmask16, __m512, __m128, const int);
270 extern __m512 __cdecl _mm512_maskz_insertf32x4(__mmask16, __m512, __m128, int);
271 extern __m512 __cdecl _mm512_mask_insertf32x8(__m512, __mmask16, __m512, __m256, const int);
272 extern __m512 __cdecl _mm512_maskz_insertf32x8(__mmask16, __m512, __m256, int);
273 extern __m512d __cdecl _mm512_mask_insertf64x2(__m512d, __mmask8, __m512d, __m128d, const int);
275 extern __m512d __cdecl _mm512_mask_insertf64x4(__m512d, __mmask8, __m512d, __m256d, const int);
277 
278 extern __m512 __cdecl _mm512_mask_shuffle_f32x4(__m512, __mmask16, __m512, __m512, const int);
279 extern __m512 __cdecl _mm512_maskz_shuffle_f32x4(__mmask16, __m512, __m512, const int);
280 extern __m512d __cdecl _mm512_mask_shuffle_f64x2(__m512d, __mmask8, __m512d, __m512d, const int);
281 extern __m512d __cdecl _mm512_maskz_shuffle_f64x2(__mmask8, __m512d, __m512d, const int);
282 extern __m512d __cdecl _mm512_mask_shuffle_pd(__m512d, __mmask8, __m512d, __m512d, const int);
283 extern __m512d __cdecl _mm512_maskz_shuffle_pd(__mmask8, __m512d, __m512d, const int);
284 extern __m512 __cdecl _mm512_mask_shuffle_ps(__m512, __mmask16, __m512, __m512, const int);
285 extern __m512 __cdecl _mm512_maskz_shuffle_ps(__mmask16, __m512, __m512, const int);
286 
287 
288 #define _mm512_setzero() _mm512_setzero_ps()
289 #define _mm512_undefined() _mm512_setzero()
290 #define _mm512_undefined_pd() _mm512_setzero_pd()
291 #define _mm512_undefined_ps() _mm512_undefined()
292 
293 #define _mm512_set4_ps(a,b,c,d) \
294  _mm512_set_ps((a), (b), (c), (d), (a), (b), (c), (d), \
295  (a), (b), (c), (d), (a), (b), (c), (d))
296 
297 #define _mm512_set4_pd(a,b,c,d) \
298  _mm512_set_pd((a), (b), (c), (d), (a), (b), (c), (d))
299 
300 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8, \
301  e9, e10, e11, e12, e13, e14, e15) \
302  _mm512_set_ps((e15), (e14), (e13), (e12), (e11), (e10), \
303  (e9), (e8), (e7), (e6), (e5), (e4), (e3), (e2), (e1), (e0))
304 
305 #define _mm512_set_16to16_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8, \
306  e9, e10, e11, e12, e13, e14, e15) \
307  _mm512_set_ps((e0), (e1), (e2), (e3), (e4), (e5), (e6), (e7), \
308  (e8), (e9), (e10), (e11), (e12), (e13), (e14), (e15))
309 
310 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
311  _mm512_set_pd((e7), (e6), (e5), (e4), (e3), (e2), (e1), (e0))
312 
313 #define _mm512_set_8to8_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
314  _mm512_set_pd((e0), (e1), (e2), (e3), (e4), (e5), (e6), (e7))
315 
316 #define _mm512_setr4_ps(a,b,c,d) \
317  _mm512_set4_ps((d), (c), (b), (a))
318 
319 #define _mm512_set_4to16_ps(a,b,c,d) \
320  _mm512_set4_ps((d), (c), (b), (a))
321 
322 #define _mm512_setr4_pd(a,b,c,d) \
323  _mm512_set4_pd((d), (c), (b), (a))
324 
325 #define _mm512_set_4to8_pd(a,b,c,d) \
326  _mm512_set4_pd((d), (c), (b), (a))
327 
328 #define _mm512_set_1to16_ps(x) _mm512_set1_ps((x))
329 #define _mm512_set_1to8_pd(x) _mm512_set1_pd((x))
330 
331 #define _mm512_load_ps(x) _mm512_maskz_load_ps(_MM_K0_REG, (x))
332 #define _mm512_load_pd(x) _mm512_maskz_load_pd((__mmask8)_MM_K0_REG, (x))
333 #define _mm512_loadu_ps(x) _mm512_maskz_loadu_ps(_MM_K0_REG, (x))
334 #define _mm512_loadu_pd(x) _mm512_maskz_loadu_pd((__mmask8)_MM_K0_REG, (x))
335 
336 #define _mm512_store_ps(x, v) _mm512_mask_store_ps((x), _MM_K0_REG, (v))
337 #define _mm512_store_pd(x, v) _mm512_mask_store_pd((x), (__mmask8)_MM_K0_REG, (v))
338 
339 #define _mm512_add_ps(v1, v2) \
340  _mm512_maskz_add_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
341 #define _mm512_add_round_ps(v1, v2, e3) \
342  _mm512_maskz_add_round_ps(_MM_K0_REG, (v1), (v2), (e3))
343 #define _mm512_maskz_add_ps(k1, v2, v3) \
344  _mm512_maskz_add_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
345 #define _mm512_mask_add_ps(v1, k2, v3, v4) \
346  _mm512_mask_add_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
347 #define _mm512_add_pd(v1, v2) \
348  _mm512_maskz_add_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
349 #define _mm512_add_round_pd(v1, v2, e3) \
350  _mm512_maskz_add_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
351 #define _mm512_maskz_add_pd(k1, v2, v3) \
352  _mm512_maskz_add_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
353 #define _mm512_mask_add_pd(v1, k2, v3, v4) \
354  _mm512_mask_add_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
355 
356 #define _mm512_sub_ps(v1, v2) \
357  _mm512_maskz_sub_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
358 #define _mm512_sub_round_ps(v1, v2, e3) \
359  _mm512_maskz_sub_round_ps(_MM_K0_REG, (v1), (v2), (e3))
360 #define _mm512_maskz_sub_ps(k1, v2, v3) \
361  _mm512_maskz_sub_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
362 #define _mm512_mask_sub_ps(v1, k2, v3, v4) \
363  _mm512_mask_sub_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
364 #define _mm512_sub_pd(v1, v2) \
365  _mm512_maskz_sub_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
366 #define _mm512_sub_round_pd(v1, v2, e3) \
367  _mm512_maskz_sub_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
368 #define _mm512_maskz_sub_pd(k1, v2, v3) \
369  _mm512_maskz_sub_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
370 #define _mm512_mask_sub_pd(v1, k2, v3, v4) \
371  _mm512_mask_sub_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
372 
373 #define _mm512_mul_ps(v1, v2) \
374  _mm512_maskz_mul_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
375 #define _mm512_mul_round_ps(v1, v2, e3) \
376  _mm512_maskz_mul_round_ps(_MM_K0_REG, (v1), (v2), (e3))
377 #define _mm512_maskz_mul_ps(k1, v2, v3) \
378  _mm512_maskz_mul_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
379 #define _mm512_mask_mul_ps(v1, k2, v3, v4) \
380  _mm512_mask_mul_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
381 #define _mm512_mul_pd(v1, v2) \
382  _mm512_maskz_mul_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
383 #define _mm512_mul_round_pd(v1, v2, e3) \
384  _mm512_maskz_mul_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
385 #define _mm512_maskz_mul_pd(k1, v2, v3) \
386  _mm512_maskz_mul_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
387 #define _mm512_mask_mul_pd(v1, k2, v3, v4) \
388  _mm512_mask_mul_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
389 
390 #define _mm512_div_ps(v1, v2) \
391  _mm512_maskz_div_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
392 #define _mm512_div_round_ps(v1, v2, e3) \
393  _mm512_maskz_div_round_ps(_MM_K0_REG, (v1), (v2), (e3))
394 #define _mm512_maskz_div_ps(k1, v2, v3) \
395  _mm512_maskz_div_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
396 #define _mm512_mask_div_ps(v1, k2, v3, v4) \
397  _mm512_mask_div_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
398 #define _mm512_div_pd(v1, v2) \
399  _mm512_maskz_div_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
400 #define _mm512_div_round_pd(v1, v2, e3) \
401  _mm512_maskz_div_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
402 #define _mm512_maskz_div_pd(k1, v2, v3) \
403  _mm512_maskz_div_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
404 #define _mm512_mask_div_pd(v1, k2, v3, v4) \
405  _mm512_mask_div_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
406 
407 #define _mm512_fmadd_round_ps(v1, v2, v3, e4) \
408  _mm512_maskz_fmadd_round_ps(_MM_K0_REG, (v1), (v2), (v3), (e4))
409 #define _mm512_fmadd_ps(v1, v2, v3) \
410  _mm512_fmadd_round_ps((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
411 #define _mm512_mask_fmadd_ps(v1, k1, v2, v3) \
412  _mm512_mask_fmadd_round_ps((v1), (k1), (v2), (v3), \
413  _MM_FROUND_CUR_DIRECTION)
414 #define _mm512_maskz_fmadd_ps(k1, v1, v2, v3) \
415  _mm512_maskz_fmadd_round_ps((k1), (v1), (v2), (v3), \
416  _MM_FROUND_CUR_DIRECTION)
417 #define _mm512_fmadd_round_pd(v1, v2, v3, e4) \
418  _mm512_maskz_fmadd_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (v3), (e4))
419 #define _mm512_fmadd_pd(v1, v2, v3) \
420  _mm512_fmadd_round_pd((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
421 #define _mm512_mask_fmadd_pd(v1, k1, v2, v3) \
422  _mm512_mask_fmadd_round_pd((v1), (k1), (v2), (v3), \
423  _MM_FROUND_CUR_DIRECTION)
424 #define _mm512_maskz_fmadd_pd(k1, v1, v2, v3) \
425  _mm512_maskz_fmadd_round_pd((k1), (v1), (v2), (v3), \
426  _MM_FROUND_CUR_DIRECTION)
427 
428 #define _mm512_fmsub_round_ps(v1, v2, v3, e4) \
429  _mm512_maskz_fmsub_round_ps(_MM_K0_REG, (v1), (v2), (v3), (e4))
430 #define _mm512_fmsub_ps(v1, v2, v3) \
431  _mm512_fmsub_round_ps((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
432 #define _mm512_mask_fmsub_ps(v1, k1, v2, v3) \
433  _mm512_mask_fmsub_round_ps((v1), (k1), (v2), (v3), \
434  _MM_FROUND_CUR_DIRECTION)
435 #define _mm512_maskz_fmsub_ps(k1, v1, v2, v3) \
436  _mm512_maskz_fmsub_round_ps((k1), (v1), (v2), (v3), \
437  _MM_FROUND_CUR_DIRECTION)
438 #define _mm512_fmsub_round_pd(v1, v2, v3, e4) \
439  _mm512_maskz_fmsub_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (v3), (e4))
440 #define _mm512_fmsub_pd(v1, v2, v3) \
441  _mm512_fmsub_round_pd((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
442 #define _mm512_mask_fmsub_pd(v1, k1, v2, v3) \
443  _mm512_mask_fmsub_round_pd((v1), (k1), (v2), (v3), \
444  _MM_FROUND_CUR_DIRECTION)
445 #define _mm512_maskz_fmsub_pd(k1, v1, v2, v3) \
446  _mm512_maskz_fmsub_round_pd((k1), (v1), (v2), (v3), \
447  _MM_FROUND_CUR_DIRECTION)
448 
449 #define _mm512_fnmadd_round_ps(v1, v2, v3, e4) \
450  _mm512_maskz_fnmadd_round_ps(_MM_K0_REG, (v1), (v2), (v3), (e4))
451 #define _mm512_fnmadd_ps(v1, v2, v3) \
452  _mm512_fnmadd_round_ps((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
453 #define _mm512_mask_fnmadd_ps(v1, k1, v2, v3) \
454  _mm512_mask_fnmadd_round_ps((v1), (k1), (v2), (v3), \
455  _MM_FROUND_CUR_DIRECTION)
456 #define _mm512_maskz_fnmadd_ps(k1, v1, v2, v3) \
457  _mm512_maskz_fnmadd_round_ps((k1), (v1), (v2), (v3), \
458  _MM_FROUND_CUR_DIRECTION)
459 #define _mm512_fnmadd_round_pd(v1, v2, v3, e4) \
460  _mm512_maskz_fnmadd_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (v3), (e4))
461 #define _mm512_fnmadd_pd(v1, v2, v3) \
462  _mm512_fnmadd_round_pd((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
463 #define _mm512_mask_fnmadd_pd(v1, k1, v2, v3) \
464  _mm512_mask_fnmadd_round_pd((v1), (k1), (v2), (v3), \
465  _MM_FROUND_CUR_DIRECTION)
466 #define _mm512_maskz_fnmadd_pd(k1, v1, v2, v3) \
467  _mm512_maskz_fnmadd_round_pd((k1), (v1), (v2), (v3), \
468  _MM_FROUND_CUR_DIRECTION)
469 
470 #define _mm512_fnmsub_round_ps(v1, v2, v3, e4) \
471  _mm512_maskz_fnmsub_round_ps(_MM_K0_REG, (v1), (v2), (v3), (e4))
472 #define _mm512_fnmsub_ps(v1, v2, v3) \
473  _mm512_fnmsub_round_ps((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
474 #define _mm512_mask_fnmsub_ps(v1, k1, v2, v3) \
475  _mm512_mask_fnmsub_round_ps((v1), (k1), (v2), (v3), \
476  _MM_FROUND_CUR_DIRECTION)
477 #define _mm512_maskz_fnmsub_ps(k1, v1, v2, v3) \
478  _mm512_maskz_fnmsub_round_ps((k1), (v1), (v2), (v3), \
479  _MM_FROUND_CUR_DIRECTION)
480 #define _mm512_fnmsub_round_pd(v1, v2, v3, e4) \
481  _mm512_maskz_fnmsub_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), (v3), (e4))
482 #define _mm512_fnmsub_pd(v1, v2, v3) \
483  _mm512_fnmsub_round_pd((v1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
484 #define _mm512_mask_fnmsub_pd(v1, k1, v2, v3) \
485  _mm512_mask_fnmsub_round_pd((v1), (k1), (v2), (v3), \
486  _MM_FROUND_CUR_DIRECTION)
487 #define _mm512_maskz_fnmsub_pd(k1, v1, v2, v3) \
488  _mm512_maskz_fnmsub_round_pd((k1), (v1), (v2), (v3), \
489  _MM_FROUND_CUR_DIRECTION)
490 
491 #define _mm512_sqrt_ps(v1) \
492  _mm512_maskz_sqrt_round_ps(_MM_K0_REG, (v1), _MM_FROUND_CUR_DIRECTION)
493 #define _mm512_sqrt_round_ps(v1, e2) \
494  _mm512_maskz_sqrt_round_ps(_MM_K0_REG, (v1), e2)
495 #define _mm512_mask_sqrt_ps(v1, k2, v2) \
496  _mm512_mask_sqrt_round_ps(v1, k2, v2, _MM_FROUND_CUR_DIRECTION);
497 #define _mm512_maskz_sqrt_ps(k1, v1) \
498  _mm512_maskz_sqrt_round_ps((k1), (v1), _MM_FROUND_CUR_DIRECTION)
499 #define _mm512_sqrt_pd(v1) \
500  _mm512_maskz_sqrt_round_pd((__mmask8)_MM_K0_REG, (v1), _MM_FROUND_CUR_DIRECTION)
501 #define _mm512_sqrt_round_pd(v1, e2) \
502  _mm512_maskz_sqrt_round_pd((__mmask8)_MM_K0_REG, (v1), e2)
503 #define _mm512_mask_sqrt_pd(v1, k2, v2) \
504  _mm512_mask_sqrt_round_pd(v1, k2, v2, _MM_FROUND_CUR_DIRECTION);
505 #define _mm512_maskz_sqrt_pd(k1, v1) \
506  _mm512_maskz_sqrt_round_pd((k1), (v1), _MM_FROUND_CUR_DIRECTION)
507 
508 #define _mm512_abs_ps(v1) \
509  _mm512_maskz_abs_ps(_MM_K0_REG, (v1))
510 #define _mm512_abs_pd(v1) \
511  _mm512_maskz_abs_pd((__mmask8)_MM_K0_REG, (v1))
512 
513 #define _mm512_max_ps(v1, v2) \
514  _mm512_maskz_max_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
515 #define _mm512_max_round_ps(v1, v2, e3) \
516  _mm512_maskz_max_round_ps(_MM_K0_REG, (v1), (v2), e3)
517 #define _mm512_mask_max_ps(v1, k2, v3, v4) \
518  _mm512_mask_max_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
519 #define _mm512_maskz_max_ps(k1, v2, v3) \
520  _mm512_maskz_max_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
521 #define _mm512_max_pd(v1, v2) \
522  _mm512_maskz_max_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
523 #define _mm512_max_round_pd(v1, v2, e3) \
524  _mm512_maskz_max_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), e3)
525 #define _mm512_mask_max_pd(v1, k2, v3, v4) \
526  _mm512_mask_max_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
527 #define _mm512_maskz_max_pd(k1, v2, v3) \
528  _mm512_maskz_max_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
529 
530 #define _mm512_min_ps(v1, v2) \
531  _mm512_maskz_min_round_ps(_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
532 #define _mm512_min_round_ps(v1, v2, e3) \
533  _mm512_maskz_min_round_ps(_MM_K0_REG, (v1), (v2), e3)
534 #define _mm512_mask_min_ps(v1, k2, v3, v4) \
535  _mm512_mask_min_round_ps((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
536 #define _mm512_maskz_min_ps(k1, v2, v3) \
537  _mm512_maskz_min_round_ps((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
538 #define _mm512_min_pd(v1, v2) \
539  _mm512_maskz_min_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), _MM_FROUND_CUR_DIRECTION)
540 #define _mm512_min_round_pd(v1, v2, e3) \
541  _mm512_maskz_min_round_pd((__mmask8)_MM_K0_REG, (v1), (v2), e3)
542 #define _mm512_mask_min_pd(v1, k2, v3, v4) \
543  _mm512_mask_min_round_pd((v1), (k2), (v3), (v4), _MM_FROUND_CUR_DIRECTION)
544 #define _mm512_maskz_min_pd(k1, v2, v3) \
545  _mm512_maskz_min_round_pd((k1), (v2), (v3), _MM_FROUND_CUR_DIRECTION)
546 
547 #define _mm512_rcp14_ps(v1) \
548  _mm512_maskz_rcp14_ps(_MM_K0_REG, v1);
549 #define _mm512_rcp14_pd(v1) \
550  _mm512_maskz_rcp14_pd((__mmask8)_MM_K0_REG, v1);
551 
552 #define _mm512_rsqrt14_ps(v1) \
553  _mm512_maskz_rsqrt14_ps(_MM_K0_REG, v1);
554 #define _mm512_rsqrt14_pd(v1) \
555  _mm512_maskz_rsqrt14_pd((__mmask8)_MM_K0_REG, v1);
556 
557 #define _mm512_cvt_roundps_pd(v1, e2) \
558  _mm512_maskz_cvt_roundps_pd((__mmask8)_MM_K0_REG, (v1), e2)
559 #define _mm512_cvtps_pd(v1) \
560  _mm512_cvt_roundps_pd((v1), _MM_FROUND_CUR_DIRECTION)
561 #define _mm512_maskz_cvtps_pd(k1, v2) \
562  _mm512_maskz_cvt_roundps_pd((k1), (v2), _MM_FROUND_CUR_DIRECTION)
563 #define _mm512_mask_cvtps_pd(v1, k2, v3) \
564  _mm512_mask_cvt_roundps_pd((v1), (k2), (v3), _MM_FROUND_CUR_DIRECTION)
565 #define _mm512_cvt_roundpd_ps(v1, e2) \
566  _mm512_maskz_cvt_roundpd_ps((__mmask8)_MM_K0_REG, (v1), e2)
567 #define _mm512_cvtpd_ps(v1) \
568  _mm512_cvt_roundpd_ps((v1), _MM_FROUND_CUR_DIRECTION)
569 #define _mm512_maskz_cvtpd_ps(k1, v2) \
570  _mm512_maskz_cvt_roundpd_ps((k1), (v2), _MM_FROUND_CUR_DIRECTION)
571 #define _mm512_mask_cvtpd_ps(v1_old, k1, v2) \
572  _mm512_mask_cvt_roundpd_ps((v1_old), (k1), (v2), _MM_FROUND_CUR_DIRECTION)
573 
574 #define _mm512_cmp_ps_mask(v1, v2, i3) \
575  _mm512_mask_cmp_round_ps_mask(_MM_K0_REG, v1, v2, i3, _MM_FROUND_CUR_DIRECTION)
576 #define _mm512_cmp_round_ps_mask(v1, v2, i3, e4) \
577  _mm512_mask_cmp_round_ps_mask(_MM_K0_REG, v1, v2, i3, e4)
578 #define _mm512_mask_cmp_ps_mask(k1, v2, v3, i4) \
579  _mm512_mask_cmp_round_ps_mask(k1, v2, v3, i4, _MM_FROUND_CUR_DIRECTION)
580 #define _mm512_cmp_pd_mask(v1, v2, i3) \
581  _mm512_mask_cmp_round_pd_mask((__mmask8)_MM_K0_REG, v1, v2, i3, _MM_FROUND_CUR_DIRECTION)
582 #define _mm512_cmp_round_pd_mask(v1, v2, i3, e4) \
583  _mm512_mask_cmp_round_pd_mask((__mmask8)_MM_K0_REG, v1, v2, i3, e4)
584 #define _mm512_mask_cmp_pd_mask(k1, v2, v3, i4) \
585  _mm512_mask_cmp_round_pd_mask(k1, v2, v3, i4, _MM_FROUND_CUR_DIRECTION)
586 
587 #define _mm512_cmpeq_ps_mask(v1, v2) _mm512_cmp_ps_mask((v1), (v2), _CMP_EQ_OQ)
588 #define _mm512_mask_cmpeq_ps_mask(k1, v1, v2) \
589  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_EQ_OQ)
590 #define _mm512_cmplt_ps_mask(v1, v2) _mm512_cmp_ps_mask((v1), (v2), _CMP_LT_OS)
591 #define _mm512_mask_cmplt_ps_mask(k1, v1, v2) \
592  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_LT_OS)
593 #define _mm512_cmple_ps_mask(v1, v2) _mm512_cmp_ps_mask((v1), (v2), _CMP_LE_OS)
594 #define _mm512_mask_cmple_ps_mask(k1, v1, v2) \
595  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_LE_OS)
596 #define _mm512_cmpunord_ps_mask(v1, v2) \
597  _mm512_cmp_ps_mask((v1), (v2), _CMP_UNORD_Q)
598 #define _mm512_mask_cmpunord_ps_mask(k1, v1, v2) \
599  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_UNORD_Q)
600 #define _mm512_cmpneq_ps_mask(v1, v2) \
601  _mm512_cmp_ps_mask((v1), (v2), _CMP_NEQ_UQ)
602 #define _mm512_mask_cmpneq_ps_mask(k1, v1, v2) \
603  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_NEQ_UQ)
604 #define _mm512_cmpnlt_ps_mask(v1, v2) \
605  _mm512_cmp_ps_mask((v1), (v2), _CMP_NLT_US)
606 #define _mm512_mask_cmpnlt_ps_mask(k1, v1, v2) \
607  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_NLT_US)
608 #define _mm512_cmpnle_ps_mask(v1, v2) \
609  _mm512_cmp_ps_mask((v1), (v2), _CMP_NLE_US)
610 #define _mm512_mask_cmpnle_ps_mask(k1, v1, v2) \
611  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_NLE_US)
612 #define _mm512_cmpord_ps_mask(v1, v2) \
613  _mm512_cmp_ps_mask((v1), (v2), _CMP_ORD_Q)
614 #define _mm512_mask_cmpord_ps_mask(k1, v1, v2) \
615  _mm512_mask_cmp_ps_mask((k1), (v1), (v2), _CMP_ORD_Q)
616 
617 #define _mm512_cmpeq_pd_mask(v1, v2) _mm512_cmp_pd_mask((v1), (v2), _CMP_EQ_OQ)
618 #define _mm512_mask_cmpeq_pd_mask(k1, v1, v2) \
619  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_EQ_OQ)
620 #define _mm512_cmplt_pd_mask(v1, v2) _mm512_cmp_pd_mask((v1), (v2), _CMP_LT_OS)
621 #define _mm512_mask_cmplt_pd_mask(k1, v1, v2) \
622  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_LT_OS)
623 #define _mm512_cmple_pd_mask(v1, v2) _mm512_cmp_pd_mask((v1), (v2), _CMP_LE_OS)
624 #define _mm512_mask_cmple_pd_mask(k1, v1, v2) \
625  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_LE_OS)
626 #define _mm512_cmpunord_pd_mask(v1, v2) \
627  _mm512_cmp_pd_mask((v1), (v2), _CMP_UNORD_Q)
628 #define _mm512_mask_cmpunord_pd_mask(k1, v1, v2) \
629  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_UNORD_Q)
630 #define _mm512_cmpneq_pd_mask(v1, v2) \
631  _mm512_cmp_pd_mask((v1), (v2), _CMP_NEQ_UQ)
632 #define _mm512_mask_cmpneq_pd_mask(k1, v1, v2) \
633  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_NEQ_UQ)
634 #define _mm512_cmpnlt_pd_mask(v1, v2) \
635  _mm512_cmp_pd_mask((v1), (v2), _CMP_NLT_US)
636 #define _mm512_mask_cmpnlt_pd_mask(k1, v1, v2) \
637  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_NLT_US)
638 #define _mm512_cmpnle_pd_mask(v1, v2) \
639  _mm512_cmp_pd_mask((v1), (v2), _CMP_NLE_US)
640 #define _mm512_mask_cmpnle_pd_mask(k1, v1, v2) \
641  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_NLE_US)
642 #define _mm512_cmpord_pd_mask(v1, v2) \
643  _mm512_cmp_pd_mask((v1), (v2), _CMP_ORD_Q)
644 #define _mm512_mask_cmpord_pd_mask(k1, v1, v2) \
645  _mm512_mask_cmp_pd_mask((k1), (v1), (v2), _CMP_ORD_Q)
646 
647 #define _mm512_broadcast_f32x2(v1) \
648  _mm512_maskz_broadcast_f32x2(_MM_K0_REG, (v1))
649 #define _mm512_broadcast_f32x4(v1) \
650  _mm512_maskz_broadcast_f32x4(_MM_K0_REG, (v1))
651 #define _mm512_broadcast_f32x8(v1) \
652  _mm512_maskz_broadcast_f32x8(_MM_K0_REG, (v1))
653 #define _mm512_broadcast_f64x2(v1) \
654  _mm512_maskz_broadcast_f64x2((__mmask8)_MM_K0_REG, (v1))
655 #define _mm512_broadcast_f64x4(v1) \
656  _mm512_maskz_broadcast_f64x4((__mmask8)_MM_K0_REG, (v1))
657 #define _mm512_broadcastsd_pd(v1) \
658  _mm512_maskz_broadcastsd_pd((__mmask8)_MM_K0_REG, (v1))
659 #define _mm512_broadcastss_ps(v1) \
660  _mm512_maskz_broadcastss_ps(_MM_K0_REG, (v1))
661 
662 #define _mm512_extractf32x4_ps(v1, e2) \
663  _mm512_maskz_extractf32x4_ps((__mmask8)_MM_K0_REG, (v1), (e2))
664 #define _mm512_extractf32x8_ps(v1, e2) \
665  _mm512_maskz_extractf32x8_ps((__mmask8)_MM_K0_REG, (v1), (e2))
666 #define _mm512_extractf64x2_pd(v1, e2) \
667  _mm512_maskz_extractf64x2_pd((__mmask8)_MM_K0_REG, (v1), (e2))
668 #define _mm512_extractf64x4_pd(v1, e2) \
669  _mm512_maskz_extractf64x4_pd((__mmask8)_MM_K0_REG, (v1), (e2))
670 
671 #define _mm512_insertf32x4(v1, v2, e3) \
672  _mm512_maskz_insertf32x4(_MM_K0_REG, (v1), (v2), (e3))
673 #define _mm512_insertf32x8(v1, v2, e3) \
674  _mm512_maskz_insertf32x8(_MM_K0_REG, (v1), (v2), (e3))
675 #define _mm512_insertf64x2(v1, v2, e3) \
676  _mm512_maskz_insertf64x2((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
677 #define _mm512_insertf64x4(v1, v2, e3) \
678  _mm512_maskz_insertf64x4((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
679 
680 #define _mm512_shuffle_f32x4(v1, v2, e3) \
681  _mm512_maskz_shuffle_f32x4(_MM_K0_REG, (v1), (v2), (e3))
682 #define _mm512_shuffle_f64x2(v1, v2, e3) \
683  _mm512_maskz_shuffle_f64x2((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
684 #define _mm512_shuffle_pd(v1, v2, e3) \
685  _mm512_maskz_shuffle_pd((__mmask8)_MM_K0_REG, (v1), (v2), (e3))
686 #define _mm512_shuffle_ps(v1, v2, e3) \
687  _mm512_maskz_shuffle_ps(_MM_K0_REG, (v1), (v2), (e3))
688 
689 #define _mm512_kand(k1, k2) \
690  ((__mmask16) ((k1) & (k2)))
691 #define _mm512_kandn(k1, k2) \
692  ((__mmask16) (~(k1) & (k2)))
693 #define _mm512_kor(k1, k2) \
694  ((__mmask16) ((k1) | (k2)))
695 #define _mm512_kxor(k1, k2) \
696  ((__mmask16) ((k1) ^ (k2)))
697 #define _mm512_kxnor(k1, k2) \
698  ((__mmask16) (~((k1) ^ (k2))))
699 #define _mm512_knot(k1) \
700  ((__mmask16) (~(k1)))
701 
702 #ifdef __cplusplus
703 };
704 #endif /* __cplusplus */
705 
706 
707 #endif /* _ZMMINTRIN_H_INCLUDED */
__m512 __cdecl _mm512_mask_broadcast_f32x8(__m512, __mmask16, __m256)
__m512d __cdecl _mm512_mask_add_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512d __cdecl _mm512_maskz_cvt_roundps_pd(__mmask8, __m256, const int)
__m512 __cdecl _mm512_maskz_sub_round_ps(__mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_maskz_broadcast_f64x2(__mmask8, __m128d)
__m512 __cdecl _mm512_mask3_fmadd_round_ps(__m512, __m512, __m512, __mmask16, const int)
__m512 __cdecl _mm512_mask_shuffle_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_set_ps(float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float)
__m512d __cdecl _mm512_maskz_rsqrt14_pd(__mmask8, __m512d)
__m512 __cdecl _mm512_mask_fmsub_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m256 __cdecl _mm512_mask_cvt_roundpd_ps(__m256, __mmask8, __m512d, const int)
__m512 __cdecl _mm512_setzero_ps(void)
__m512 __cdecl _mm512_mask_fnmadd_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_maskz_load_ps(__mmask16, void const *)
__m512 __cdecl _mm512_maskz_max_round_ps(__mmask16, __m512, __m512, const int)
void __cdecl _mm512_storeu_pd(void *, __m512d)
__m512d __cdecl _mm512_castpd128_pd512(__m128d)
__m512d __cdecl _mm512_mask_abs_pd(__m512d, __mmask8, __m512d)
__m512 __cdecl _mm512_maskz_rsqrt28_round_ps(__mmask16, __m512, const int)
__m512 __cdecl _mm512_maskz_insertf32x8(__mmask16, __m512, __m256, int)
void __cdecl _mm512_mask_storeu_pd(void *, __mmask8, __m512d)
__m512d __cdecl _mm512_mask_fnmadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m256 __cdecl _mm512_maskz_extractf32x8_ps(__mmask8, __m512, int)
unsigned char __mmask8
Definition: zmmintrin.h:43
__m512 __cdecl _mm512_maskz_min_round_ps(__mmask16, __m512, __m512, const int)
__m256 __cdecl _mm512_maskz_cvt_roundpd_ps(__mmask8, __m512d, const int)
__m256i __cdecl _mm512_castsi512_si256(__m512i)
__m512 __cdecl _mm512_mask_rcp14_ps(__m512, __mmask16, __m512)
__m512d __cdecl _mm512_maskz_div_round_pd(__mmask8, __m512d, __m512d, const int)
__m512d __cdecl _mm512_maskz_insertf64x4(__mmask8, __m512d, __m256d, int)
__m512 __cdecl _mm512_mask_broadcastss_ps(__m512, __mmask16, __m128)
__m512d
Definition: zmmintrin.h:54
__m512d __cdecl _mm512_mask_shuffle_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512d __cdecl _mm512_maskz_mul_round_pd(__mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_rsqrt14_ps(__m512, __mmask16, __m512)
__m512 __cdecl _mm512_mask_shuffle_f32x4(__m512, __mmask16, __m512, __m512, const int)
unsigned __int64 __mmask64
Definition: zmmintrin.h:46
__m512 __cdecl _mm512_mask_insertf32x8(__m512, __mmask16, __m512, __m256, const int)
__m512d __cdecl _mm512_maskz_rcp28_round_pd(__mmask8, __m512d, const int)
void __cdecl _mm512_mask_store_pd(void *, __mmask8, __m512d)
__m512d __cdecl _mm512_maskz_load_pd(__mmask8, void const *)
__m512 __cdecl _mm512_mask_rsqrt28_round_ps(__m512, __mmask16, __m512, const int)
__m512d __cdecl _mm512_mask3_fnmadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
__m512d __cdecl _mm512_maskz_fmadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m512d __cdecl _mm512_mask3_fmsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
__m512d __cdecl _mm512_maskz_max_round_pd(__mmask8, __m512d, __m512d, const int)
__m512d __cdecl _mm512_mask_sub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_loadu_ps(__m512, __mmask16, void const *)
__m128d
Definition: emmintrin.h:57
__m512d __cdecl _mm512_mask_max_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m256d
Definition: immintrin.h:43
__m512d __cdecl _mm512_mask_loadu_pd(__m512d, __mmask8, void const *)
__m512d __cdecl _mm512_mask_fmsubadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512d __cdecl _mm512_maskz_abs_pd(__mmask8, __m512d)
__m512d __cdecl _mm512_maskz_rsqrt28_round_pd(__mmask8, __m512d, const int)
__m512 __cdecl _mm512_mask_fmadd_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_sqrt_round_pd(__m512d, __mmask8, __m512d, const int)
__m512 __cdecl _mm512_maskz_mul_round_ps(__mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_mask_sqrt_round_ps(__m512, __mmask16, __m512, const int)
Definition: zmmintrin.h:101
__m512d __cdecl _mm512_mask3_fmaddsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
__m512d __cdecl _mm512_mask_rsqrt14_pd(__m512d, __mmask8, __m512d)
Definition: zmmintrin.h:102
__m512 __cdecl _mm512_maskz_broadcast_f32x2(__mmask16, __m128)
__m128 __cdecl _mm512_mask_extractf32x4_ps(__m128, __mmask8, __m512, const int)
__m512d __cdecl _mm512_maskz_broadcastsd_pd(__mmask8, __m128d)
__m512 __cdecl _mm512_mask_fmaddsub_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_min_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_maskz_broadcast_f32x4(__mmask16, __m128)
__m512 __cdecl _mm512_maskz_loadu_ps(__mmask16, void const *)
__m512d __cdecl _mm512_maskz_loadu_pd(__mmask8, void const *)
__m512
Definition: zmmintrin.h:50
__m512d __cdecl _mm512_maskz_shuffle_f64x2(__mmask8, __m512d, __m512d, const int)
void __cdecl _mm512_mask_store_ps(void *, __mmask16, __m512)
__m512 __cdecl _mm512_mask3_fnmsub_round_ps(__m512, __m512, __m512, __mmask16, const int)
__m512d __cdecl _mm512_maskz_shuffle_pd(__mmask8, __m512d, __m512d, const int)
__mmask16 __cdecl _mm512_mask_cmp_round_ps_mask(__mmask16, __m512, __m512, const int, const int)
__m128 __cdecl _mm512_maskz_extractf32x4_ps(__mmask8, __m512, int)
__m512d __cdecl _mm512_set_pd(double, double, double, double, double, double, double, double)
__m512 __cdecl _mm512_mask_add_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_mask3_fnmadd_round_ps(__m512, __m512, __m512, __mmask16, const int)
Definition: zmmintrin.h:103
__m128d __cdecl _mm512_mask_extractf64x2_pd(__m128d, __mmask8, __m512d, const int)
__m512d __cdecl _mm512_mask_insertf64x2(__m512d, __mmask8, __m512d, __m128d, const int)
__m512d __cdecl _mm512_mask3_fmsubadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
__m512d __cdecl _mm512_maskz_insertf64x2(__mmask8, __m512d, __m128d, int)
__m512 __cdecl _mm512_maskz_sqrt_round_ps(__mmask16, __m512, const int)
__m512 __cdecl _mm512_mask_fmsubadd_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_maskz_add_round_ps(__mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_mask_max_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_castpd256_pd512(__m256d)
__m512d __cdecl _mm512_maskz_fnmsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m256d __cdecl _mm512_castpd512_pd256(__m512d)
__m512 __cdecl _mm512_mask_abs_ps(__m512, __mmask16, __m512)
__m512d __cdecl _mm512_maskz_rcp14_pd(__mmask8, __m512d)
__m512 __cdecl _mm512_castsi512_ps(__m512i)
__m512d __cdecl _mm512_mask_cvt_roundps_pd(__m512d, __mmask8, __m256, const int)
Definition: zmmintrin.h:100
__m512i
Definition: zmmintrin.h:65
__m512 __cdecl _mm512_mask_min_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_mul_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_broadcast_f32x4(__m512, __mmask16, __m128)
__m512d __cdecl _mm512_castsi512_pd(__m512i)
__m512 __cdecl _mm512_mask3_fmsubadd_round_ps(__m512, __m512, __m512, __mmask16, const int)
__m512d __cdecl _mm512_mask_rcp28_round_pd(__m512d, __mmask8, __m512d, const int)
__m512d __cdecl _mm512_maskz_sub_round_pd(__mmask8, __m512d, __m512d, const int)
__m128d __cdecl _mm512_maskz_extractf64x2_pd(__mmask8, __m512d, int)
__m512 __cdecl _mm512_mask_div_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_maskz_shuffle_f32x4(__mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_maskz_rsqrt14_ps(__mmask16, __m512)
__m256d __cdecl _mm512_maskz_extractf64x4_pd(__mmask8, __m512d, int)
__m512 __cdecl _mm512_mask3_fmsub_round_ps(__m512, __m512, __m512, __mmask16, const int)
__m512d __cdecl _mm512_mask_div_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m256
Definition: immintrin.h:39
__m512 __cdecl _mm512_mask_sub_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512i __cdecl _mm512_castpd_si512(__m512d)
__m512 __cdecl _mm512_maskz_fmsubadd_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m512 __cdecl _mm512_maskz_div_round_ps(__mmask16, __m512, __m512, const int)
__m128
Definition: xmmintrin.h:75
__m512d __cdecl _mm512_mask_fmsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_maskz_fnmsub_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m256 __cdecl _mm512_mask_extractf32x8_ps(__m256, __mmask8, __m512, const int)
__m512 __cdecl _mm512_maskz_abs_ps(__mmask16, __m512)
__m512 __cdecl _mm512_maskz_shuffle_ps(__mmask16, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_load_pd(__m512d, __mmask8, void const *)
__m512 __cdecl _mm512_castps128_ps512(__m128)
__m512d __cdecl _mm512_maskz_min_round_pd(__mmask8, __m512d, __m512d, const int)
void __cdecl _mm512_mask_storeu_ps(void *, __mmask16, __m512)
__m512 __cdecl _mm512_maskz_insertf32x4(__mmask16, __m512, __m128, int)
__m512 __cdecl _mm512_mask_broadcast_f32x2(__m512, __mmask16, __m128)
__m512 __cdecl _mm512_set1_ps(float)
__m512d __cdecl _mm512_mask3_fnmsub_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
__m512d __cdecl _mm512_maskz_fmsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m512d __cdecl _mm512_mask_broadcastsd_pd(__m512d, __mmask8, __m128d)
__m512 __cdecl _mm512_maskz_fnmadd_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_fnmsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_maskz_broadcastss_ps(__mmask16, __m128)
__m512d __cdecl _mm512_maskz_fmaddsub_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m512i __cdecl _mm512_castps_si512(__m512)
__m512d __cdecl _mm512_mask_insertf64x4(__m512d, __mmask8, __m512d, __m256d, const int)
__m512d __cdecl _mm512_maskz_fnmadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_fnmsub_round_ps(__m512, __mmask16, __m512, __m512, const int)
unsigned short __mmask16
Definition: zmmintrin.h:44
__m512d __cdecl _mm512_mask_rcp14_pd(__m512d, __mmask8, __m512d)
__mmask8 __cdecl _mm512_mask_cmp_round_pd_mask(__mmask8, __m512d, __m512d, const int, const int)
__m512d __cdecl _mm512_castps_pd(__m512)
__m512 __cdecl _mm512_castpd_ps(__m512d)
__m512 __cdecl _mm512_mask_insertf32x4(__m512, __mmask16, __m512, __m128, const int)
__m512 __cdecl _mm512_castps256_ps512(__m256)
__m512d __cdecl _mm512_maskz_broadcast_f64x4(__mmask8, __m256d)
__m512d __cdecl _mm512_mask_broadcast_f64x4(__m512d, __mmask8, __m256d)
void __cdecl _mm512_storeu_ps(void *, __m512)
__m256d __cdecl _mm512_mask_extractf64x4_pd(__m256d, __mmask8, __m512d, const int)
__m512d __cdecl _mm512_mask3_fmadd_round_pd(__m512d, __m512d, __m512d, __mmask8, const int)
unsigned int __mmask32
Definition: zmmintrin.h:45
_MM_ROUND_MODE_ENUM
Definition: zmmintrin.h:99
__m512d __cdecl _mm512_maskz_fmsubadd_round_pd(__mmask8, __m512d, __m512d, __m512d, const int)
__m512d __cdecl _mm512_set1_pd(double)
Definition: zmmintrin.h:104
__m512 __cdecl _mm512_maskz_broadcast_f32x8(__mmask16, __m256)
__m512 __cdecl _mm512_maskz_rcp28_round_ps(__mmask16, __m512, const int)
__m512 __cdecl _mm512_maskz_rcp14_ps(__mmask16, __m512)
__m512 __cdecl _mm512_mask_rcp28_round_ps(__m512, __mmask16, __m512, const int)
__m512d __cdecl _mm512_mask_rsqrt28_round_pd(__m512d, __mmask8, __m512d, const int)
__m512 __cdecl _mm512_maskz_fmsub_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m512d __cdecl _mm512_mask_shuffle_f64x2(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_load_ps(__m512, __mmask16, void const *)
__m512i __cdecl _mm512_castsi256_si512(__m256i)
__m512d __cdecl _mm512_mask_fmaddsub_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_maskz_fmadd_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m512d __cdecl _mm512_maskz_add_round_pd(__mmask8, __m512d, __m512d, const int)
__m256i
Definition: immintrin.h:54
__m512d __cdecl _mm512_mask_fmadd_round_pd(__m512d, __mmask8, __m512d, __m512d, const int)
__m512 __cdecl _mm512_mask_mul_round_ps(__m512, __mmask16, __m512, __m512, const int)
__m512 __cdecl _mm512_mask3_fmaddsub_round_ps(__m512, __m512, __m512, __mmask16, const int)
__m512d __cdecl _mm512_setzero_pd(void)
__m256 __cdecl _mm512_castps512_ps256(__m512)
__m512 __cdecl _mm512_maskz_fmaddsub_round_ps(__mmask16, __m512, __m512, __m512, const int)
__m512d __cdecl _mm512_maskz_sqrt_round_pd(__mmask8, __m512d, const int)
__m512d __cdecl _mm512_mask_broadcast_f64x2(__m512d, __mmask8, __m128d)