27 #ifndef _EMMINTRIN_H_INCLUDED
28 #define _EMMINTRIN_H_INCLUDED
31 # error "SSE2 instruction set not enabled"
39 typedef long long __v2di
__attribute__ ((__vector_size__ (16)));
46 typedef long long __m128i
__attribute__ ((__vector_size__ (16), __may_alias__));
47 typedef double __m128d
__attribute__ ((__vector_size__ (16), __may_alias__));
50 #define _MM_SHUFFLE2(fp1,fp0) \
51 (((fp1) << 1) | (fp0))
54 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
55 _mm_set_sd (
double __F)
57 return __extension__ (__m128d){
__F, 0.0 };
61 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
62 _mm_set1_pd (
double __F)
64 return __extension__ (__m128d){
__F, __F };
67 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_set_pd1 (
double __F)
70 return _mm_set1_pd (__F);
74 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
75 _mm_set_pd (
double __W,
double __X)
77 return __extension__ (__m128d){
__X, __W };
81 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
82 _mm_setr_pd (
double __W,
double __X)
84 return __extension__ (__m128d){ __W, __X };
88 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
91 return __extension__ (__m128d){ 0.0, 0.0 };
95 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
96 _mm_move_sd (__m128d
__A, __m128d
__B)
98 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
102 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
103 _mm_load_pd (
double const *
__P)
105 return *(__m128d *)__P;
109 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
110 _mm_loadu_pd (
double const *
__P)
112 return __builtin_ia32_loadupd (__P);
116 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
117 _mm_load1_pd (
double const *
__P)
119 return _mm_set1_pd (*__P);
123 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
124 _mm_load_sd (
double const *
__P)
126 return _mm_set_sd (*__P);
129 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
130 _mm_load_pd1 (
double const *
__P)
132 return _mm_load1_pd (__P);
136 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
137 _mm_loadr_pd (
double const *
__P)
139 __m128d __tmp = _mm_load_pd (__P);
140 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
144 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
145 _mm_store_pd (
double *__P, __m128d __A)
147 *(__m128d *)__P = __A;
151 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 _mm_storeu_pd (
double *__P, __m128d __A)
154 __builtin_ia32_storeupd (__P, __A);
158 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 _mm_store_sd (
double *__P, __m128d __A)
161 *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
164 extern __inline
double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165 _mm_cvtsd_f64 (__m128d __A)
167 return __builtin_ia32_vec_ext_v2df (__A, 0);
170 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
171 _mm_storel_pd (
double *__P, __m128d __A)
173 _mm_store_sd (__P, __A);
177 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178 _mm_storeh_pd (
double *__P, __m128d __A)
180 *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
185 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186 _mm_store1_pd (
double *__P, __m128d __A)
188 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
191 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
192 _mm_store_pd1 (
double *__P, __m128d __A)
194 _mm_store1_pd (__P, __A);
198 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
199 _mm_storer_pd (
double *__P, __m128d __A)
201 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
204 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
205 _mm_cvtsi128_si32 (__m128i __A)
207 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
212 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
213 _mm_cvtsi128_si64 (__m128i __A)
215 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
219 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
220 _mm_cvtsi128_si64x (__m128i __A)
222 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
226 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
227 _mm_add_pd (__m128d __A, __m128d __B)
229 return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
232 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
233 _mm_add_sd (__m128d __A, __m128d __B)
235 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
238 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
239 _mm_sub_pd (__m128d __A, __m128d __B)
241 return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
244 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 _mm_sub_sd (__m128d __A, __m128d __B)
247 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
250 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
251 _mm_mul_pd (__m128d __A, __m128d __B)
253 return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
256 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_mul_sd (__m128d __A, __m128d __B)
259 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
262 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
263 _mm_div_pd (__m128d __A, __m128d __B)
265 return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
268 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
269 _mm_div_sd (__m128d __A, __m128d __B)
271 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
274 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
275 _mm_sqrt_pd (__m128d __A)
277 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
281 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
282 _mm_sqrt_sd (__m128d __A, __m128d __B)
284 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
285 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
288 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
289 _mm_min_pd (__m128d __A, __m128d __B)
291 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
294 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
295 _mm_min_sd (__m128d __A, __m128d __B)
297 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
300 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
301 _mm_max_pd (__m128d __A, __m128d __B)
303 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
306 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_max_sd (__m128d __A, __m128d __B)
309 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
312 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
313 _mm_and_pd (__m128d __A, __m128d __B)
315 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
318 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_andnot_pd (__m128d __A, __m128d __B)
321 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
324 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
325 _mm_or_pd (__m128d __A, __m128d __B)
327 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
330 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
331 _mm_xor_pd (__m128d __A, __m128d __B)
333 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
336 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
337 _mm_cmpeq_pd (__m128d __A, __m128d __B)
339 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
342 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
343 _mm_cmplt_pd (__m128d __A, __m128d __B)
345 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
348 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
349 _mm_cmple_pd (__m128d __A, __m128d __B)
351 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
354 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
355 _mm_cmpgt_pd (__m128d __A, __m128d __B)
357 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
360 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
361 _mm_cmpge_pd (__m128d __A, __m128d __B)
363 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
366 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
367 _mm_cmpneq_pd (__m128d __A, __m128d __B)
369 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
372 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
373 _mm_cmpnlt_pd (__m128d __A, __m128d __B)
375 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
378 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
379 _mm_cmpnle_pd (__m128d __A, __m128d __B)
381 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
384 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
385 _mm_cmpngt_pd (__m128d __A, __m128d __B)
387 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
390 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
391 _mm_cmpnge_pd (__m128d __A, __m128d __B)
393 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
396 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
397 _mm_cmpord_pd (__m128d __A, __m128d __B)
399 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
402 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
403 _mm_cmpunord_pd (__m128d __A, __m128d __B)
405 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
408 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
409 _mm_cmpeq_sd (__m128d __A, __m128d __B)
411 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
414 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
415 _mm_cmplt_sd (__m128d __A, __m128d __B)
417 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
420 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
421 _mm_cmple_sd (__m128d __A, __m128d __B)
423 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
426 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
427 _mm_cmpgt_sd (__m128d __A, __m128d __B)
429 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
431 __builtin_ia32_cmpltsd ((__v2df) __B,
436 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
437 _mm_cmpge_sd (__m128d __A, __m128d __B)
439 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
441 __builtin_ia32_cmplesd ((__v2df) __B,
446 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
447 _mm_cmpneq_sd (__m128d __A, __m128d __B)
449 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
452 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
453 _mm_cmpnlt_sd (__m128d __A, __m128d __B)
455 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
458 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
459 _mm_cmpnle_sd (__m128d __A, __m128d __B)
461 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
464 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
465 _mm_cmpngt_sd (__m128d __A, __m128d __B)
467 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
469 __builtin_ia32_cmpnltsd ((__v2df) __B,
474 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
475 _mm_cmpnge_sd (__m128d __A, __m128d __B)
477 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
479 __builtin_ia32_cmpnlesd ((__v2df) __B,
484 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
485 _mm_cmpord_sd (__m128d __A, __m128d __B)
487 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
490 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
491 _mm_cmpunord_sd (__m128d __A, __m128d __B)
493 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
496 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
497 _mm_comieq_sd (__m128d __A, __m128d __B)
499 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
502 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
503 _mm_comilt_sd (__m128d __A, __m128d __B)
505 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
508 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
509 _mm_comile_sd (__m128d __A, __m128d __B)
511 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
514 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
515 _mm_comigt_sd (__m128d __A, __m128d __B)
517 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
520 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
521 _mm_comige_sd (__m128d __A, __m128d __B)
523 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
526 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
527 _mm_comineq_sd (__m128d __A, __m128d __B)
529 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
532 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
533 _mm_ucomieq_sd (__m128d __A, __m128d __B)
535 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
538 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_ucomilt_sd (__m128d __A, __m128d __B)
541 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
544 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
545 _mm_ucomile_sd (__m128d __A, __m128d __B)
547 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
550 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
551 _mm_ucomigt_sd (__m128d __A, __m128d __B)
553 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
556 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
557 _mm_ucomige_sd (__m128d __A, __m128d __B)
559 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
562 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
563 _mm_ucomineq_sd (__m128d __A, __m128d __B)
565 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
570 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_set_epi64x (
long long __q1,
long long __q0)
573 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
576 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
577 _mm_set_epi64 (__m64 __q1, __m64 __q0)
579 return _mm_set_epi64x ((
long long)__q1, (
long long)__q0);
582 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
583 _mm_set_epi32 (
int __q3,
int __q2,
int __q1,
int __q0)
585 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
588 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
589 _mm_set_epi16 (
short __q7,
short __q6,
short __q5,
short __q4,
590 short __q3,
short __q2,
short __q1,
short __q0)
592 return __extension__ (__m128i)(__v8hi){
593 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
596 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
602 return __extension__ (__m128i)(__v16qi){
610 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
611 _mm_set1_epi64x (
long long __A)
613 return _mm_set_epi64x (__A, __A);
616 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
617 _mm_set1_epi64 (__m64 __A)
619 return _mm_set_epi64 (__A, __A);
622 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
623 _mm_set1_epi32 (
int __A)
625 return _mm_set_epi32 (__A, __A, __A, __A);
628 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
629 _mm_set1_epi16 (
short __A)
631 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
634 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
635 _mm_set1_epi8 (
char __A)
637 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
638 __A, __A, __A, __A, __A, __A, __A, __A);
644 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
645 _mm_setr_epi64 (__m64 __q0, __m64 __q1)
647 return _mm_set_epi64 (__q1, __q0);
650 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
651 _mm_setr_epi32 (
int __q0,
int __q1,
int __q2,
int __q3)
653 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
656 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
657 _mm_setr_epi16 (
short __q0,
short __q1,
short __q2,
short __q3,
658 short __q4,
short __q5,
short __q6,
short __q7)
660 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
663 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
669 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
670 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
675 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
676 _mm_load_si128 (__m128i const *__P)
681 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
682 _mm_loadu_si128 (__m128i const *__P)
684 return (__m128i) __builtin_ia32_loaddqu ((
char const *)__P);
687 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
688 _mm_loadl_epi64 (__m128i const *__P)
690 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
693 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
694 _mm_store_si128 (__m128i *__P, __m128i __B)
699 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
700 _mm_storeu_si128 (__m128i *__P, __m128i __B)
702 __builtin_ia32_storedqu ((
char *)__P, (__v16qi)__B);
705 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
706 _mm_storel_epi64 (__m128i *__P, __m128i __B)
708 *(
long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)
__B, 0);
711 extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
712 _mm_movepi64_pi64 (__m128i __B)
714 return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
717 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
718 _mm_movpi64_epi64 (__m64 __A)
720 return _mm_set_epi64 ((__m64)0LL, __A);
723 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
724 _mm_move_epi64 (__m128i __A)
726 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
730 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
731 _mm_setzero_si128 (
void)
733 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
736 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
737 _mm_cvtepi32_pd (__m128i __A)
739 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
742 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
743 _mm_cvtepi32_ps (__m128i __A)
745 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
748 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
749 _mm_cvtpd_epi32 (__m128d __A)
751 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
754 extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
755 _mm_cvtpd_pi32 (__m128d __A)
757 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
760 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
761 _mm_cvtpd_ps (__m128d __A)
763 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
766 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_cvttpd_epi32 (__m128d __A)
769 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
772 extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
773 _mm_cvttpd_pi32 (__m128d __A)
775 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
778 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
779 _mm_cvtpi32_pd (__m64 __A)
781 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
784 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
785 _mm_cvtps_epi32 (__m128 __A)
787 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
790 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
791 _mm_cvttps_epi32 (__m128 __A)
793 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
796 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
797 _mm_cvtps_pd (__m128 __A)
799 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
802 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
803 _mm_cvtsd_si32 (__m128d __A)
805 return __builtin_ia32_cvtsd2si ((__v2df) __A);
810 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
811 _mm_cvtsd_si64 (__m128d __A)
813 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
817 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
818 _mm_cvtsd_si64x (__m128d __A)
820 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
824 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
825 _mm_cvttsd_si32 (__m128d __A)
827 return __builtin_ia32_cvttsd2si ((__v2df) __A);
832 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
833 _mm_cvttsd_si64 (__m128d __A)
835 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
839 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
840 _mm_cvttsd_si64x (__m128d __A)
842 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
846 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
847 _mm_cvtsd_ss (__m128 __A, __m128d __B)
849 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
852 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
853 _mm_cvtsi32_sd (__m128d __A,
int __B)
855 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
860 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
861 _mm_cvtsi64_sd (__m128d __A,
long long __B)
863 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
867 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
868 _mm_cvtsi64x_sd (__m128d __A,
long long __B)
870 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
874 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
875 _mm_cvtss_sd (__m128d __A, __m128 __B)
877 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
881 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
882 _mm_shuffle_pd(__m128d __A, __m128d __B, const
int __mask)
884 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
887 #define _mm_shuffle_pd(A, B, N) \
888 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
889 (__v2df)(__m128d)(B), (int)(N)))
892 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
893 _mm_unpackhi_pd (__m128d __A, __m128d __B)
895 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
898 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
899 _mm_unpacklo_pd (__m128d __A, __m128d __B)
901 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
904 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
905 _mm_loadh_pd (__m128d __A,
double const *__B)
907 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
910 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_loadl_pd (__m128d __A,
double const *__B)
913 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
916 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
917 _mm_movemask_pd (__m128d __A)
919 return __builtin_ia32_movmskpd ((__v2df)__A);
922 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
923 _mm_packs_epi16 (__m128i __A, __m128i __B)
925 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
928 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
929 _mm_packs_epi32 (__m128i __A, __m128i __B)
931 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
934 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
935 _mm_packus_epi16 (__m128i __A, __m128i __B)
937 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
940 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
941 _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
943 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
946 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
947 _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
949 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
952 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
953 _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
955 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
958 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
959 _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
961 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
964 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
965 _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
967 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
970 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
971 _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
973 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
976 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
977 _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
979 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
982 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
983 _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
985 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
988 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
989 _mm_add_epi8 (__m128i __A, __m128i __B)
991 return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
994 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
995 _mm_add_epi16 (__m128i __A, __m128i __B)
997 return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
1000 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1001 _mm_add_epi32 (__m128i __A, __m128i __B)
1003 return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
1006 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1007 _mm_add_epi64 (__m128i __A, __m128i __B)
1009 return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1012 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm_adds_epi8 (__m128i __A, __m128i __B)
1015 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1018 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1019 _mm_adds_epi16 (__m128i __A, __m128i __B)
1021 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1024 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1025 _mm_adds_epu8 (__m128i __A, __m128i __B)
1027 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1030 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm_adds_epu16 (__m128i __A, __m128i __B)
1033 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1036 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1037 _mm_sub_epi8 (__m128i __A, __m128i __B)
1039 return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1042 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043 _mm_sub_epi16 (__m128i __A, __m128i __B)
1045 return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1048 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1049 _mm_sub_epi32 (__m128i __A, __m128i __B)
1051 return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1054 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm_sub_epi64 (__m128i __A, __m128i __B)
1057 return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1060 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1061 _mm_subs_epi8 (__m128i __A, __m128i __B)
1063 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1066 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1067 _mm_subs_epi16 (__m128i __A, __m128i __B)
1069 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1072 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm_subs_epu8 (__m128i __A, __m128i __B)
1075 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1078 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1079 _mm_subs_epu16 (__m128i __A, __m128i __B)
1081 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1084 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1085 _mm_madd_epi16 (__m128i __A, __m128i __B)
1087 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1090 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1091 _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1093 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1096 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm_mullo_epi16 (__m128i __A, __m128i __B)
1099 return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1102 extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_mul_su32 (__m64 __A, __m64 __B)
1105 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1108 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1109 _mm_mul_epu32 (__m128i __A, __m128i __B)
1111 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1114 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm_slli_epi16 (__m128i __A,
int __B)
1117 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1120 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1121 _mm_slli_epi32 (__m128i __A,
int __B)
1123 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1126 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1127 _mm_slli_epi64 (__m128i __A,
int __B)
1129 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1132 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm_srai_epi16 (__m128i __A,
int __B)
1135 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1138 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm_srai_epi32 (__m128i __A,
int __B)
1141 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1145 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1146 _mm_bsrli_si128 (__m128i __A, const
int __N)
1148 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1151 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm_bslli_si128 (__m128i __A, const
int __N)
1154 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1157 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm_srli_si128 (__m128i __A, const
int __N)
1160 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1163 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1164 _mm_slli_si128 (__m128i __A, const
int __N)
1166 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1169 #define _mm_bsrli_si128(A, N) \
1170 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1171 #define _mm_bslli_si128(A, N) \
1172 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1173 #define _mm_srli_si128(A, N) \
1174 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1175 #define _mm_slli_si128(A, N) \
1176 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1179 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1180 _mm_srli_epi16 (__m128i __A,
int __B)
1182 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1185 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm_srli_epi32 (__m128i __A,
int __B)
1188 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1191 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1192 _mm_srli_epi64 (__m128i __A,
int __B)
1194 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1197 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1198 _mm_sll_epi16 (__m128i __A, __m128i __B)
1200 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1203 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1204 _mm_sll_epi32 (__m128i __A, __m128i __B)
1206 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1209 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1210 _mm_sll_epi64 (__m128i __A, __m128i __B)
1212 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1215 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1216 _mm_sra_epi16 (__m128i __A, __m128i __B)
1218 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1221 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1222 _mm_sra_epi32 (__m128i __A, __m128i __B)
1224 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1227 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm_srl_epi16 (__m128i __A, __m128i __B)
1230 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1233 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1234 _mm_srl_epi32 (__m128i __A, __m128i __B)
1236 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1239 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm_srl_epi64 (__m128i __A, __m128i __B)
1242 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1245 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1246 _mm_and_si128 (__m128i __A, __m128i __B)
1248 return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1251 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1252 _mm_andnot_si128 (__m128i __A, __m128i __B)
1254 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1257 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258 _mm_or_si128 (__m128i __A, __m128i __B)
1260 return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1263 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1264 _mm_xor_si128 (__m128i __A, __m128i __B)
1266 return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1269 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1270 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1272 return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1275 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1278 return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1281 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1282 _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1284 return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1287 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1288 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1290 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1293 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1294 _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1296 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1299 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1300 _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1302 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1305 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1306 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1308 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1311 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1314 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1317 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318 _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1320 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1324 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1325 _mm_extract_epi16 (__m128i const __A,
int const __N)
1327 return (
unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1330 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm_insert_epi16 (__m128i const __A,
int const
__D,
int const __N)
1333 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1336 #define _mm_extract_epi16(A, N) \
1337 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1338 #define _mm_insert_epi16(A, D, N) \
1339 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1340 (int)(D), (int)(N)))
1343 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm_max_epi16 (__m128i __A, __m128i __B)
1346 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1349 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm_max_epu8 (__m128i __A, __m128i __B)
1352 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1355 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1356 _mm_min_epi16 (__m128i __A, __m128i __B)
1358 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1361 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm_min_epu8 (__m128i __A, __m128i __B)
1364 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1367 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1368 _mm_movemask_epi8 (__m128i __A)
1370 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1373 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1374 _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1376 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1380 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1381 _mm_shufflehi_epi16 (__m128i __A, const
int __mask)
1383 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1386 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1387 _mm_shufflelo_epi16 (__m128i __A, const
int __mask)
1389 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1392 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1393 _mm_shuffle_epi32 (__m128i __A, const
int __mask)
1395 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1398 #define _mm_shufflehi_epi16(A, N) \
1399 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1400 #define _mm_shufflelo_epi16(A, N) \
1401 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1402 #define _mm_shuffle_epi32(A, N) \
1403 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1406 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1407 _mm_maskmoveu_si128 (__m128i __A, __m128i __B,
char *
__C)
1409 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1412 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_avg_epu8 (__m128i __A, __m128i __B)
1415 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1418 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1419 _mm_avg_epu16 (__m128i __A, __m128i __B)
1421 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1424 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1425 _mm_sad_epu8 (__m128i __A, __m128i __B)
1427 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1430 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_stream_si32 (
int *__A,
int __B)
1433 __builtin_ia32_movnti (__A, __B);
1437 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1438 _mm_stream_si64 (
long long int *__A,
long long int __B)
1440 __builtin_ia32_movnti64 (__A, __B);
1444 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1445 _mm_stream_si128 (__m128i *__A, __m128i __B)
1447 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1450 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm_stream_pd (
double *__A, __m128d __B)
1453 __builtin_ia32_movntpd (__A, (__v2df)__B);
1456 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm_clflush (
void const *__A)
1459 __builtin_ia32_clflush (__A);
1462 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1465 __builtin_ia32_lfence ();
1468 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1471 __builtin_ia32_mfence ();
1474 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_cvtsi32_si128 (
int __A)
1477 return _mm_set_epi32 (0, 0, 0, __A);
1482 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm_cvtsi64_si128 (
long long __A)
1485 return _mm_set_epi64x (0, __A);
1489 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm_cvtsi64x_si128 (
long long __A)
1492 return _mm_set_epi64x (0, __A);
1498 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm_castpd_ps(__m128d __A)
1501 return (__m128)
__A;
1504 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1505 _mm_castpd_si128(__m128d __A)
1507 return (__m128i)
__A;
1510 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1511 _mm_castps_pd(__m128 __A)
1513 return (__m128d)
__A;
1516 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1517 _mm_castps_si128(__m128 __A)
1519 return (__m128i)
__A;
1522 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm_castsi128_ps(__m128i __A)
1525 return (__m128)
__A;
1528 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1529 _mm_castsi128_pd(__m128i __A)
1531 return (__m128d)
__A;
__inline __m256i short short short short short short short short short short short short short short short __q00
Definition: avxintrin.h:1210
__inline __m128d __m128i __C
Definition: avxintrin.h:576
__inline __m256i short short short short short short __q09
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short short short short __q05
Definition: avxintrin.h:1206
__inline __m256i __m256i __B
Definition: avx2intrin.h:69
double __v4df __attribute__((__vector_size__(32)))
Definition: avxintrin.h:32
__inline __m256i short __q14
Definition: avxintrin.h:1206
__inline __m256i short short __q13
Definition: avxintrin.h:1206
__inline unsigned char unsigned int unsigned int unsigned int * __P
Definition: adxintrin.h:35
__inline void __m256d __A
Definition: avxintrin.h:828
__inline __m256i short short short short short __q10
Definition: avxintrin.h:1206
__inline __m256d double double double __D
Definition: avxintrin.h:1183
__inline unsigned char unsigned int __X
Definition: adxintrin.h:33
__inline __m256i short short short short short short short short __q07
Definition: avxintrin.h:1206
__inline __m256i char char char char char char char char char char char char char char char char __q15
Definition: avxintrin.h:1218
__inline __m256 float float float float float __F
Definition: avxintrin.h:1189
__inline __m256i short short short short short short short short short short short __q04
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short short short short short short short __q02
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short short short short short short __q03
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short __q08
Definition: avxintrin.h:1206
__inline __m256i short short short short __q11
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short short short __q06
Definition: avxintrin.h:1206
__inline __m256i short short short __q12
Definition: avxintrin.h:1206
__inline __m256i short short short short short short short short short short short short short short __q01
Definition: avxintrin.h:1206