27 #ifndef _IMMINTRIN_H_INCLUDED
28 # error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
34 typedef long long __v4di
__attribute__ ((__vector_size__ (32)));
43 typedef long long __m256i
__attribute__ ((__vector_size__ (32),
51 #define _CMP_EQ_OQ 0x00
53 #define _CMP_LT_OS 0x01
55 #define _CMP_LE_OS 0x02
57 #define _CMP_UNORD_Q 0x03
59 #define _CMP_NEQ_UQ 0x04
61 #define _CMP_NLT_US 0x05
63 #define _CMP_NLE_US 0x06
65 #define _CMP_ORD_Q 0x07
67 #define _CMP_EQ_UQ 0x08
69 #define _CMP_NGE_US 0x09
71 #define _CMP_NGT_US 0x0a
73 #define _CMP_FALSE_OQ 0x0b
75 #define _CMP_NEQ_OQ 0x0c
77 #define _CMP_GE_OS 0x0d
79 #define _CMP_GT_OS 0x0e
81 #define _CMP_TRUE_UQ 0x0f
83 #define _CMP_EQ_OS 0x10
85 #define _CMP_LT_OQ 0x11
87 #define _CMP_LE_OQ 0x12
89 #define _CMP_UNORD_S 0x13
91 #define _CMP_NEQ_US 0x14
93 #define _CMP_NLT_UQ 0x15
95 #define _CMP_NLE_UQ 0x16
97 #define _CMP_ORD_S 0x17
99 #define _CMP_EQ_US 0x18
101 #define _CMP_NGE_UQ 0x19
103 #define _CMP_NGT_UQ 0x1a
105 #define _CMP_FALSE_OS 0x1b
107 #define _CMP_NEQ_OS 0x1c
109 #define _CMP_GE_OQ 0x1d
111 #define _CMP_GT_OQ 0x1e
113 #define _CMP_TRUE_US 0x1f
115 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
116 _mm256_add_pd (__m256d
__A, __m256d
__B)
118 return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
121 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
122 _mm256_add_ps (__m256
__A, __m256
__B)
124 return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
127 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
128 _mm256_addsub_pd (__m256d __A, __m256d __B)
130 return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
133 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
134 _mm256_addsub_ps (__m256 __A, __m256 __B)
136 return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
140 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
141 _mm256_and_pd (__m256d __A, __m256d __B)
143 return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
146 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
147 _mm256_and_ps (__m256 __A, __m256 __B)
149 return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
152 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
153 _mm256_andnot_pd (__m256d __A, __m256d __B)
155 return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
158 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 _mm256_andnot_ps (__m256 __A, __m256 __B)
161 return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
168 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
171 return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
176 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
179 return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
184 #define _mm256_blend_pd(X, Y, M) \
185 ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \
186 (__v4df)(__m256d)(Y), (int)(M)))
188 #define _mm256_blend_ps(X, Y, M) \
189 ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \
190 (__v8sf)(__m256)(Y), (int)(M)))
193 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
194 _mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
196 return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
201 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 _mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
204 return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
209 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
210 _mm256_div_pd (__m256d __A, __m256d __B)
212 return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
215 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
216 _mm256_div_ps (__m256 __A, __m256 __B)
218 return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
225 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
233 #define _mm256_dp_ps(X, Y, M) \
234 ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \
235 (__v8sf)(__m256)(Y), (int)(M)))
238 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
239 _mm256_hadd_pd (__m256d __X, __m256d __Y)
241 return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
244 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 _mm256_hadd_ps (__m256 __X, __m256 __Y)
247 return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
250 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
251 _mm256_hsub_pd (__m256d __X, __m256d __Y)
253 return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
256 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
257 _mm256_hsub_ps (__m256 __X, __m256 __Y)
259 return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
262 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
263 _mm256_max_pd (__m256d __A, __m256d __B)
265 return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
268 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
269 _mm256_max_ps (__m256 __A, __m256 __B)
271 return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
274 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
275 _mm256_min_pd (__m256d __A, __m256d __B)
277 return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
280 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
281 _mm256_min_ps (__m256 __A, __m256 __B)
283 return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
286 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
287 _mm256_mul_pd (__m256d __A, __m256d __B)
289 return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
292 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
293 _mm256_mul_ps (__m256 __A, __m256 __B)
295 return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
298 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
299 _mm256_or_pd (__m256d __A, __m256d __B)
301 return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
304 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
305 _mm256_or_ps (__m256 __A, __m256 __B)
307 return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
311 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
314 return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
318 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
321 return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
325 #define _mm256_shuffle_pd(A, B, N) \
326 ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \
327 (__v4df)(__m256d)(B), (int)(N)))
329 #define _mm256_shuffle_ps(A, B, N) \
330 ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \
331 (__v8sf)(__m256)(B), (int)(N)))
334 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
335 _mm256_sub_pd (__m256d __A, __m256d __B)
337 return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
340 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
341 _mm256_sub_ps (__m256 __A, __m256 __B)
343 return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
346 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
347 _mm256_xor_pd (__m256d __A, __m256d __B)
349 return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
352 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
353 _mm256_xor_ps (__m256 __A, __m256 __B)
355 return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
359 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
362 return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
365 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
366 _mm_cmp_ps (__m128 __X, __m128 __Y, const
int __P)
368 return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
371 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
374 return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
378 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
381 return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
385 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
386 _mm_cmp_sd (__m128d __X, __m128d __Y, const
int __P)
388 return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
391 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
392 _mm_cmp_ss (__m128 __X, __m128 __Y, const
int __P)
394 return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
397 #define _mm_cmp_pd(X, Y, P) \
398 ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
399 (__v2df)(__m128d)(Y), (int)(P)))
401 #define _mm_cmp_ps(X, Y, P) \
402 ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
403 (__v4sf)(__m128)(Y), (int)(P)))
405 #define _mm256_cmp_pd(X, Y, P) \
406 ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
407 (__v4df)(__m256d)(Y), (int)(P)))
409 #define _mm256_cmp_ps(X, Y, P) \
410 ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
411 (__v8sf)(__m256)(Y), (int)(P)))
413 #define _mm_cmp_sd(X, Y, P) \
414 ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
415 (__v2df)(__m128d)(Y), (int)(P)))
417 #define _mm_cmp_ss(X, Y, P) \
418 ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
419 (__v4sf)(__m128)(Y), (int)(P)))
422 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
423 _mm256_cvtepi32_pd (__m128i __A)
425 return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
428 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
429 _mm256_cvtepi32_ps (__m256i __A)
431 return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
434 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
435 _mm256_cvtpd_ps (__m256d __A)
437 return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
440 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
441 _mm256_cvtps_epi32 (__m256 __A)
443 return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
446 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
447 _mm256_cvtps_pd (__m128 __A)
449 return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
452 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
453 _mm256_cvttpd_epi32 (__m256d __A)
455 return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
458 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
459 _mm256_cvtpd_epi32 (__m256d __A)
461 return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
464 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
465 _mm256_cvttps_epi32 (__m256 __A)
467 return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
471 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
474 return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
477 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
480 return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
483 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
486 return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
489 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
493 return _mm_extract_epi32 (__Y, __N % 4);
496 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
500 return _mm_extract_epi16 (__Y, __N % 8);
503 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
507 return _mm_extract_epi8 (__Y, __N % 16);
511 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
512 _mm256_extract_epi64 (__m256i __X, const
int __N)
515 return _mm_extract_epi64 (__Y, __N % 2);
519 #define _mm256_extractf128_pd(X, N) \
520 ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
523 #define _mm256_extractf128_ps(X, N) \
524 ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \
527 #define _mm256_extractf128_si256(X, N) \
528 ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \
531 #define _mm256_extract_epi32(X, N) \
534 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
535 _mm_extract_epi32 (__Y, (N) % 4); \
538 #define _mm256_extract_epi16(X, N) \
541 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
542 _mm_extract_epi16 (__Y, (N) % 8); \
545 #define _mm256_extract_epi8(X, N) \
548 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
549 _mm_extract_epi8 (__Y, (N) % 16); \
553 #define _mm256_extract_epi64(X, N) \
556 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
557 _mm_extract_epi64 (__Y, (N) % 2); \
562 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
563 _mm256_zeroall (
void)
565 __builtin_ia32_vzeroall ();
568 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
569 _mm256_zeroupper (
void)
571 __builtin_ia32_vzeroupper ();
574 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
575 _mm_permutevar_pd (__m128d __A, __m128i
__C)
577 return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
581 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
582 _mm256_permutevar_pd (__m256d __A, __m256i
__C)
584 return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
588 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
589 _mm_permutevar_ps (__m128 __A, __m128i __C)
591 return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
595 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
596 _mm256_permutevar_ps (__m256 __A, __m256i __C)
598 return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
603 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
606 return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
609 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
612 return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
615 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
618 return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
621 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
624 return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
627 #define _mm_permute_pd(X, C) \
628 ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
630 #define _mm256_permute_pd(X, C) \
631 ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C)))
633 #define _mm_permute_ps(X, C) \
634 ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
636 #define _mm256_permute_ps(X, C) \
637 ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
641 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
644 return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
649 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
652 return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
657 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
660 return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
665 #define _mm256_permute2f128_pd(X, Y, C) \
666 ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \
667 (__v4df)(__m256d)(Y), \
670 #define _mm256_permute2f128_ps(X, Y, C) \
671 ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \
672 (__v8sf)(__m256)(Y), \
675 #define _mm256_permute2f128_si256(X, Y, C) \
676 ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \
677 (__v8si)(__m256i)(Y), \
681 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
682 _mm_broadcast_ss (
float const *__X)
684 return (__m128) __builtin_ia32_vbroadcastss (__X);
687 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
688 _mm256_broadcast_sd (
double const *__X)
690 return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
693 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
694 _mm256_broadcast_ss (
float const *__X)
696 return (__m256) __builtin_ia32_vbroadcastss256 (__X);
699 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
700 _mm256_broadcast_pd (__m128d const *__X)
702 return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
705 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
706 _mm256_broadcast_ps (__m128 const *__X)
708 return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
712 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
715 return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
720 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
723 return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
728 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
731 return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
736 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
740 __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
744 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
748 __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
752 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
756 __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
761 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
762 _mm256_insert_epi64 (__m256i __X,
long long __D,
int const __N)
765 __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
770 #define _mm256_insertf128_pd(X, Y, O) \
771 ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
772 (__v2df)(__m128d)(Y), \
775 #define _mm256_insertf128_ps(X, Y, O) \
776 ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
777 (__v4sf)(__m128)(Y), \
780 #define _mm256_insertf128_si256(X, Y, O) \
781 ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
782 (__v4si)(__m128i)(Y), \
785 #define _mm256_insert_epi32(X, D, N) \
788 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
789 __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \
790 _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \
793 #define _mm256_insert_epi16(X, D, N) \
796 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
797 __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \
798 _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \
801 #define _mm256_insert_epi8(X, D, N) \
804 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
805 __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \
806 _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \
810 #define _mm256_insert_epi64(X, D, N) \
813 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
814 __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \
815 _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \
820 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
821 _mm256_load_pd (
double const *__P)
823 return *(__m256d *)__P;
826 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
827 _mm256_store_pd (
double *__P, __m256d __A)
829 *(__m256d *)__P = __A;
832 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
833 _mm256_load_ps (
float const *__P)
835 return *(__m256 *)__P;
838 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
839 _mm256_store_ps (
float *__P, __m256 __A)
841 *(__m256 *)__P = __A;
844 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
845 _mm256_loadu_pd (
double const *__P)
847 return (__m256d) __builtin_ia32_loadupd256 (__P);
850 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
851 _mm256_storeu_pd (
double *__P, __m256d __A)
853 __builtin_ia32_storeupd256 (__P, (__v4df)__A);
856 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
857 _mm256_loadu_ps (
float const *__P)
859 return (__m256) __builtin_ia32_loadups256 (__P);
862 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
863 _mm256_storeu_ps (
float *__P, __m256 __A)
865 __builtin_ia32_storeups256 (__P, (__v8sf)__A);
868 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
869 _mm256_load_si256 (__m256i const *__P)
874 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
875 _mm256_store_si256 (__m256i *__P, __m256i __A)
880 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
881 _mm256_loadu_si256 (__m256i const *__P)
883 return (__m256i) __builtin_ia32_loaddqu256 ((
char const *)__P);
886 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
887 _mm256_storeu_si256 (__m256i *__P, __m256i __A)
889 __builtin_ia32_storedqu256 ((
char *)__P, (__v32qi)__A);
892 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
893 _mm_maskload_pd (
double const *__P, __m128i __M)
895 return (__m128d) __builtin_ia32_maskloadpd ((
const __v2df *)__P,
899 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
900 _mm_maskstore_pd (
double *__P, __m128i __M, __m128d __A)
902 __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
905 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
906 _mm256_maskload_pd (
double const *__P, __m256i __M)
908 return (__m256d) __builtin_ia32_maskloadpd256 ((
const __v4df *)__P,
912 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
913 _mm256_maskstore_pd (
double *__P, __m256i __M, __m256d __A)
915 __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
918 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
919 _mm_maskload_ps (
float const *__P, __m128i __M)
921 return (__m128) __builtin_ia32_maskloadps ((
const __v4sf *)__P,
925 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
926 _mm_maskstore_ps (
float *__P, __m128i __M, __m128 __A)
928 __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
931 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
932 _mm256_maskload_ps (
float const *__P, __m256i __M)
934 return (__m256) __builtin_ia32_maskloadps256 ((
const __v8sf *)__P,
938 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
939 _mm256_maskstore_ps (
float *__P, __m256i __M, __m256 __A)
941 __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
944 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
945 _mm256_movehdup_ps (__m256 __X)
947 return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
950 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
951 _mm256_moveldup_ps (__m256 __X)
953 return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
956 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
957 _mm256_movedup_pd (__m256d __X)
959 return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
962 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
963 _mm256_lddqu_si256 (__m256i const *__P)
965 return (__m256i) __builtin_ia32_lddqu256 ((
char const *)__P);
968 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
969 _mm256_stream_si256 (__m256i *__A, __m256i __B)
971 __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
974 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
975 _mm256_stream_pd (
double *__A, __m256d __B)
977 __builtin_ia32_movntpd256 (__A, (__v4df)__B);
980 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
981 _mm256_stream_ps (
float *__P, __m256 __A)
983 __builtin_ia32_movntps256 (__P, (__v8sf)__A);
986 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
987 _mm256_rcp_ps (__m256 __A)
989 return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
992 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
993 _mm256_rsqrt_ps (__m256 __A)
995 return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
998 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
999 _mm256_sqrt_pd (__m256d __A)
1001 return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
1004 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm256_sqrt_ps (__m256 __A)
1007 return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
1011 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1014 return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
1017 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1020 return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
1023 #define _mm256_round_pd(V, M) \
1024 ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
1026 #define _mm256_round_ps(V, M) \
1027 ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
1030 #define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL)
1031 #define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
1032 #define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
1033 #define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
1035 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm256_unpackhi_pd (__m256d __A, __m256d __B)
1038 return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
1041 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm256_unpacklo_pd (__m256d __A, __m256d __B)
1044 return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
1047 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1048 _mm256_unpackhi_ps (__m256 __A, __m256 __B)
1050 return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
1053 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1054 _mm256_unpacklo_ps (__m256 __A, __m256 __B)
1056 return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
1059 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm_testz_pd (__m128d __M, __m128d __V)
1062 return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
1065 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1066 _mm_testc_pd (__m128d __M, __m128d __V)
1068 return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
1071 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_testnzc_pd (__m128d __M, __m128d __V)
1074 return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
1077 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1078 _mm_testz_ps (__m128 __M, __m128 __V)
1080 return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
1083 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm_testc_ps (__m128 __M, __m128 __V)
1086 return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
1089 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1090 _mm_testnzc_ps (__m128 __M, __m128 __V)
1092 return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
1095 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1096 _mm256_testz_pd (__m256d __M, __m256d __V)
1098 return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
1101 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1102 _mm256_testc_pd (__m256d __M, __m256d __V)
1104 return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
1107 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1108 _mm256_testnzc_pd (__m256d __M, __m256d __V)
1110 return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
1113 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1114 _mm256_testz_ps (__m256 __M, __m256 __V)
1116 return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
1119 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm256_testc_ps (__m256 __M, __m256 __V)
1122 return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
1125 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1126 _mm256_testnzc_ps (__m256 __M, __m256 __V)
1128 return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
1131 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1132 _mm256_testz_si256 (__m256i __M, __m256i __V)
1134 return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
1137 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1138 _mm256_testc_si256 (__m256i __M, __m256i __V)
1140 return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
1143 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm256_testnzc_si256 (__m256i __M, __m256i __V)
1146 return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
1149 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1150 _mm256_movemask_pd (__m256d __A)
1152 return __builtin_ia32_movmskpd256 ((__v4df)__A);
1155 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1156 _mm256_movemask_ps (__m256 __A)
1158 return __builtin_ia32_movmskps256 ((__v8sf)__A);
1161 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm256_setzero_pd (
void)
1164 return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
1167 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm256_setzero_ps (
void)
1170 return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
1171 0.0, 0.0, 0.0, 0.0 };
1174 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1175 _mm256_setzero_si256 (
void)
1177 return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
1181 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm256_set_pd (
double __A,
double __B,
double __C,
double __D)
1184 return __extension__ (__m256d){
__D,
__C,
__B, __A };
1188 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1189 _mm256_set_ps (
float __A,
float __B,
float __C,
float __D,
1197 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1198 _mm256_set_epi32 (
int __A,
int __B,
int __C,
int __D,
1201 return __extension__ (__m256i)(__v8si){
__H,
__G,
__F,
__E,
1205 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1211 return __extension__ (__m256i)(__v16hi){
1217 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1227 return __extension__ (__m256i)(__v32qi){
1235 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm256_set_epi64x (
long long __A,
long long __B,
long long __C,
1239 return __extension__ (__m256i)(__v4di){
__D,
__C,
__B, __A };
1243 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1244 _mm256_set1_pd (
double __A)
1246 return __extension__ (__m256d){
__A,
__A,
__A, __A };
1250 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1251 _mm256_set1_ps (
float __A)
1258 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_set1_epi32 (
int __A)
1261 return __extension__ (__m256i)(__v8si){
__A,
__A,
__A,
__A,
1265 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm256_set1_epi16 (
short __A)
1268 return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
1269 __A, __A, __A, __A, __A, __A, __A, __A);
1272 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm256_set1_epi8 (
char __A)
1275 return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
1276 __A, __A, __A, __A, __A, __A, __A, __A,
1277 __A, __A, __A, __A, __A, __A, __A, __A,
1278 __A, __A, __A, __A, __A, __A, __A, __A);
1281 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1282 _mm256_set1_epi64x (
long long __A)
1284 return __extension__ (__m256i)(__v4di){
__A,
__A,
__A, __A };
1290 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm256_setr_pd (
double __A,
double __B,
double __C,
double __D)
1293 return _mm256_set_pd (__D, __C, __B, __A);
1296 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm256_setr_ps (
float __A,
float __B,
float __C,
float __D,
1300 return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
1303 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1304 _mm256_setr_epi32 (
int __A,
int __B,
int __C,
int __D,
1307 return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
1310 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1316 return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
1317 __q04, __q05, __q06, __q07,
1318 __q08, __q09, __q10, __q11,
1319 __q12, __q13, __q14, __q15);
1322 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1332 return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
1333 __q04, __q05, __q06, __q07,
1334 __q08, __q09, __q10, __q11,
1335 __q12, __q13, __q14, __q15,
1336 __q16, __q17, __q18, __q19,
1337 __q20, __q21, __q22, __q23,
1338 __q24, __q25, __q26, __q27,
1339 __q28, __q29, __q30, __q31);
1342 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1343 _mm256_setr_epi64x (
long long __A,
long long __B,
long long __C,
1346 return _mm256_set_epi64x (__D, __C, __B, __A);
1351 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm256_castpd_ps (__m256d __A)
1354 return (__m256)
__A;
1357 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1358 _mm256_castpd_si256 (__m256d __A)
1360 return (__m256i)
__A;
1363 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1364 _mm256_castps_pd (__m256 __A)
1366 return (__m256d)
__A;
1369 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm256_castps_si256(__m256 __A)
1372 return (__m256i)
__A;
1375 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm256_castsi256_ps (__m256i __A)
1378 return (__m256)
__A;
1381 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1382 _mm256_castsi256_pd (__m256i __A)
1384 return (__m256d)
__A;
1387 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm256_castpd256_pd128 (__m256d __A)
1390 return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
1393 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1394 _mm256_castps256_ps128 (__m256 __A)
1396 return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
1399 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1400 _mm256_castsi256_si128 (__m256i __A)
1402 return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
1410 extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm256_castpd128_pd256 (__m128d __A)
1413 return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
1416 extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm256_castps128_ps256 (__m128 __A)
1419 return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
1422 extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1423 _mm256_castsi128_si256 (__m128i __A)
1425 return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
#define _mm_cmp_pd(X, Y, P)
Definition: avxintrin.h:397
__inline __m256i char char char char char char __q25
Definition: avxintrin.h:1218
__inline __m256i char char char __q28
Definition: avxintrin.h:1218
#define _mm256_extractf128_si256(X, N)
Definition: avxintrin.h:527
#define _mm256_extract_epi8(X, N)
Definition: avxintrin.h:545
#define _mm_cmp_sd(X, Y, P)
Definition: avxintrin.h:413
__inline __m256i short short short short short short short short short short short short short short short __q00
Definition: avxintrin.h:1210
#define _mm256_shuffle_pd(A, B, N)
Definition: avxintrin.h:325
__inline __m128d __m128i __C
Definition: avxintrin.h:576
__inline __m256i short short short short short short __q09
Definition: avxintrin.h:1206
#define _mm256_round_ps(V, M)
Definition: avxintrin.h:1026
#define _mm_cmp_ps(X, Y, P)
Definition: avxintrin.h:401
__inline __m256i short short short short short short short short short short __q05
Definition: avxintrin.h:1206
#define _mm256_extract_epi16(X, N)
Definition: avxintrin.h:538
__inline __m256d __m256d __B
Definition: avxintrin.h:117
#define _mm256_extractf128_pd(X, N)
Definition: avxintrin.h:519
__inline __m256i char char char char char char char char char char char char char __q18
Definition: avxintrin.h:1218
#define _mm256_insert_epi16(X, D, N)
Definition: avxintrin.h:793
#define _mm_permute_pd(X, C)
Definition: avxintrin.h:627
#define _mm256_shuffle_ps(A, B, N)
Definition: avxintrin.h:329
double __v4df __attribute__((__vector_size__(32)))
Definition: avxintrin.h:32
__inline __m256i short __q14
Definition: avxintrin.h:1206
__inline __m256i char char char char char char char char char char char __q20
Definition: avxintrin.h:1218
__inline __m256i short short __q13
Definition: avxintrin.h:1206
__inline unsigned char unsigned int unsigned int unsigned int * __P
Definition: adxintrin.h:35
__inline int __m128d __V
Definition: avxintrin.h:1061
__inline __m256i char __q30
Definition: avxintrin.h:1218
#define _mm256_dp_ps(X, Y, M)
Definition: avxintrin.h:233
#define _mm256_extract_epi32(X, N)
Definition: avxintrin.h:531
__inline void __m256d __A
Definition: avxintrin.h:828
#define _mm256_blend_ps(X, Y, M)
Definition: avxintrin.h:188
__inline __m256i char char char char char char char char char char char char __q19
Definition: avxintrin.h:1218
__inline __m256i short short short short short __q10
Definition: avxintrin.h:1206
__inline __m256d double double double __D
Definition: avxintrin.h:1183
#define _mm256_permute2f128_ps(X, Y, C)
Definition: avxintrin.h:670
__inline unsigned char unsigned int __X
Definition: adxintrin.h:33
#define _mm256_permute_pd(X, C)
Definition: avxintrin.h:630
#define _mm256_insert_epi8(X, D, N)
Definition: avxintrin.h:801
__inline __m256i short short short short short short short short __q07
Definition: avxintrin.h:1206
__inline __m256i char char char char char char char char char char char char char char char char __q15
Definition: avxintrin.h:1218
__inline __m256i char char char char char __q26
Definition: avxintrin.h:1218
__inline __m256 float float float float float __F
Definition: avxintrin.h:1189
__inline __m256i char char char char char char char char char char char char char char __q17
Definition: avxintrin.h:1218
__inline __m256i char char char char char char char char char char char char char char char __q16
Definition: avxintrin.h:1218
__inline __m256i short short short short short short short short short short short __q04
Definition: avxintrin.h:1206
#define _mm256_insertf128_si256(X, Y, O)
Definition: avxintrin.h:780
#define _mm256_insertf128_ps(X, Y, O)
Definition: avxintrin.h:775
#define _mm_cmp_ss(X, Y, P)
Definition: avxintrin.h:417
#define _mm256_permute_ps(X, C)
Definition: avxintrin.h:636
#define _mm256_permute2f128_si256(X, Y, C)
Definition: avxintrin.h:675
__inline __m256i short short short short short short short short short short short short short __q02
Definition: avxintrin.h:1206
#define _mm_permute_ps(X, C)
Definition: avxintrin.h:633
__inline __m256i short short short short short short short short short short short short __q03
Definition: avxintrin.h:1206
__inline __m256d __m256d __Y
Definition: avxintrin.h:194
#define _mm256_extractf128_ps(X, N)
Definition: avxintrin.h:523
#define _mm256_insertf128_pd(X, Y, O)
Definition: avxintrin.h:770
__inline __m256i short short short short short short short __q08
Definition: avxintrin.h:1206
#define _mm256_insert_epi32(X, D, N)
Definition: avxintrin.h:785
#define _mm256_blend_pd(X, Y, M)
Definition: avxintrin.h:184
__inline __m256i short short short short __q11
Definition: avxintrin.h:1206
__inline __m256i char char char char char char char char char __q22
Definition: avxintrin.h:1218
__inline __m256i char char char char __q27
Definition: avxintrin.h:1218
__inline __m256i char char char char char char char char __q23
Definition: avxintrin.h:1218
#define _mm256_cmp_ps(X, Y, P)
Definition: avxintrin.h:409
#define _mm256_permute2f128_pd(X, Y, C)
Definition: avxintrin.h:665
__inline __m256 float float float float __E
Definition: avxintrin.h:1189
__inline __m256d __m256d __m256d __M
Definition: avxintrin.h:195
__inline __m256i short short short short short short short short short __q06
Definition: avxintrin.h:1206
#define _mm256_round_pd(V, M)
Definition: avxintrin.h:1023
__inline __m256i short short short __q12
Definition: avxintrin.h:1206
#define _mm256_cmp_pd(X, Y, P)
Definition: avxintrin.h:405
__inline __m256 float float float float float float __G
Definition: avxintrin.h:1189
__inline __m256i short short short short short short short short short short short short short short __q01
Definition: avxintrin.h:1206
__inline __m256 float float float float float float float __H
Definition: avxintrin.h:1191
__inline __m256i char char char char char char char char char char __q21
Definition: avxintrin.h:1218
__inline __m256i char char char char char char char __q24
Definition: avxintrin.h:1218
__inline __m256i char char __q29
Definition: avxintrin.h:1218