24 #ifndef _IMMINTRIN_H_INCLUDED
25 # error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
32 extern __inline __m256i
33 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
36 return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
40 #define _mm256_mpsadbw_epu8(X, Y, M) \
41 ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
42 (__v32qi)(__m256i)(Y), (int)(M)))
45 extern __inline __m256i
47 _mm256_abs_epi8 (__m256i
__A)
49 return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
52 extern __inline __m256i
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _mm256_abs_epi16 (__m256i
__A)
56 return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
59 extern __inline __m256i
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 _mm256_abs_epi32 (__m256i __A)
63 return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
66 extern __inline __m256i
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 _mm256_packs_epi32 (__m256i __A, __m256i
__B)
70 return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
73 extern __inline __m256i
74 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
75 _mm256_packs_epi16 (__m256i __A, __m256i
__B)
77 return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
80 extern __inline __m256i
81 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
82 _mm256_packus_epi32 (__m256i __A, __m256i __B)
84 return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
87 extern __inline __m256i
88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89 _mm256_packus_epi16 (__m256i __A, __m256i __B)
91 return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
94 extern __inline __m256i
95 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
96 _mm256_add_epi8 (__m256i __A, __m256i __B)
98 return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
101 extern __inline __m256i
102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
103 _mm256_add_epi16 (__m256i __A, __m256i __B)
105 return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
108 extern __inline __m256i
109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
110 _mm256_add_epi32 (__m256i __A, __m256i __B)
112 return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
115 extern __inline __m256i
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm256_add_epi64 (__m256i __A, __m256i __B)
119 return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
122 extern __inline __m256i
123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
124 _mm256_adds_epi8 (__m256i __A, __m256i __B)
126 return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
129 extern __inline __m256i
130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
131 _mm256_adds_epi16 (__m256i __A, __m256i __B)
133 return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
136 extern __inline __m256i
137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
138 _mm256_adds_epu8 (__m256i __A, __m256i __B)
140 return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
143 extern __inline __m256i
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm256_adds_epu16 (__m256i __A, __m256i __B)
147 return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
151 extern __inline __m256i
152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
162 #define _mm256_alignr_epi8(A, B, N) \
163 ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
164 (__v4di)(__m256i)(B), \
168 extern __inline __m256i
169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
170 _mm256_and_si256 (__m256i __A, __m256i __B)
172 return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
175 extern __inline __m256i
176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
177 _mm256_andnot_si256 (__m256i __A, __m256i __B)
179 return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
182 extern __inline __m256i
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 _mm256_avg_epu8 (__m256i __A, __m256i __B)
186 return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
189 extern __inline __m256i
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 _mm256_avg_epu16 (__m256i __A, __m256i __B)
193 return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
196 extern __inline __m256i
197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
198 _mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
200 return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
206 extern __inline __m256i
207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
215 #define _mm256_blend_epi16(X, Y, M) \
216 ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
217 (__v16hi)(__m256i)(Y), (int)(M)))
220 extern __inline __m256i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
224 return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
227 extern __inline __m256i
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
231 return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
234 extern __inline __m256i
235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
236 _mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
238 return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
241 extern __inline __m256i
242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
243 _mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
245 return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
248 extern __inline __m256i
249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
250 _mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
252 return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
256 extern __inline __m256i
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258 _mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
260 return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
264 extern __inline __m256i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
268 return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
272 extern __inline __m256i
273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
274 _mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
276 return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
279 extern __inline __m256i
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm256_hadd_epi16 (__m256i __X, __m256i __Y)
283 return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
287 extern __inline __m256i
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm256_hadd_epi32 (__m256i __X, __m256i __Y)
291 return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
294 extern __inline __m256i
295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
296 _mm256_hadds_epi16 (__m256i __X, __m256i __Y)
298 return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
302 extern __inline __m256i
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm256_hsub_epi16 (__m256i __X, __m256i __Y)
306 return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
310 extern __inline __m256i
311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312 _mm256_hsub_epi32 (__m256i __X, __m256i __Y)
314 return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
317 extern __inline __m256i
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
321 return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
325 extern __inline __m256i
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
329 return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
333 extern __inline __m256i
334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
335 _mm256_madd_epi16 (__m256i __A, __m256i __B)
337 return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
341 extern __inline __m256i
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm256_max_epi8 (__m256i __A, __m256i __B)
345 return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
348 extern __inline __m256i
349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
350 _mm256_max_epi16 (__m256i __A, __m256i __B)
352 return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
355 extern __inline __m256i
356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
357 _mm256_max_epi32 (__m256i __A, __m256i __B)
359 return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
362 extern __inline __m256i
363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364 _mm256_max_epu8 (__m256i __A, __m256i __B)
366 return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
369 extern __inline __m256i
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371 _mm256_max_epu16 (__m256i __A, __m256i __B)
373 return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
376 extern __inline __m256i
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm256_max_epu32 (__m256i __A, __m256i __B)
380 return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
383 extern __inline __m256i
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm256_min_epi8 (__m256i __A, __m256i __B)
387 return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
390 extern __inline __m256i
391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
392 _mm256_min_epi16 (__m256i __A, __m256i __B)
394 return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
397 extern __inline __m256i
398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
399 _mm256_min_epi32 (__m256i __A, __m256i __B)
401 return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
404 extern __inline __m256i
405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
406 _mm256_min_epu8 (__m256i __A, __m256i __B)
408 return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
411 extern __inline __m256i
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 _mm256_min_epu16 (__m256i __A, __m256i __B)
415 return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
418 extern __inline __m256i
419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
420 _mm256_min_epu32 (__m256i __A, __m256i __B)
422 return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427 _mm256_movemask_epi8 (__m256i __A)
429 return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
432 extern __inline __m256i
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 _mm256_cvtepi8_epi16 (__m128i __X)
436 return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
439 extern __inline __m256i
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 _mm256_cvtepi8_epi32 (__m128i __X)
443 return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
446 extern __inline __m256i
447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448 _mm256_cvtepi8_epi64 (__m128i __X)
450 return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
453 extern __inline __m256i
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 _mm256_cvtepi16_epi32 (__m128i __X)
457 return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
460 extern __inline __m256i
461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462 _mm256_cvtepi16_epi64 (__m128i __X)
464 return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
467 extern __inline __m256i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm256_cvtepi32_epi64 (__m128i __X)
471 return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
474 extern __inline __m256i
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 _mm256_cvtepu8_epi16 (__m128i __X)
478 return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
481 extern __inline __m256i
482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
483 _mm256_cvtepu8_epi32 (__m128i __X)
485 return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
488 extern __inline __m256i
489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490 _mm256_cvtepu8_epi64 (__m128i __X)
492 return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
495 extern __inline __m256i
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 _mm256_cvtepu16_epi32 (__m128i __X)
499 return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
502 extern __inline __m256i
503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
504 _mm256_cvtepu16_epi64 (__m128i __X)
506 return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
509 extern __inline __m256i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm256_cvtepu32_epi64 (__m128i __X)
513 return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
516 extern __inline __m256i
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm256_mul_epi32 (__m256i __X, __m256i __Y)
520 return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
523 extern __inline __m256i
524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
525 _mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
527 return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
531 extern __inline __m256i
532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533 _mm256_mulhi_epu16 (__m256i __A, __m256i __B)
535 return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
538 extern __inline __m256i
539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540 _mm256_mulhi_epi16 (__m256i __A, __m256i __B)
542 return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
545 extern __inline __m256i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm256_mullo_epi16 (__m256i __A, __m256i __B)
549 return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
552 extern __inline __m256i
553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554 _mm256_mullo_epi32 (__m256i __A, __m256i __B)
556 return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
559 extern __inline __m256i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm256_mul_epu32 (__m256i __A, __m256i __B)
563 return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
566 extern __inline __m256i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm256_or_si256 (__m256i __A, __m256i __B)
570 return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
573 extern __inline __m256i
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm256_sad_epu8 (__m256i __A, __m256i __B)
577 return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
580 extern __inline __m256i
581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582 _mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
584 return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
589 extern __inline __m256i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
596 extern __inline __m256i
597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
603 extern __inline __m256i
604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
610 #define _mm256_shuffle_epi32(A, N) \
611 ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
612 #define _mm256_shufflehi_epi16(A, N) \
613 ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
614 #define _mm256_shufflelo_epi16(A, N) \
615 ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
618 extern __inline __m256i
619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620 _mm256_sign_epi8 (__m256i __X, __m256i __Y)
622 return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
625 extern __inline __m256i
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm256_sign_epi16 (__m256i __X, __m256i __Y)
629 return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
632 extern __inline __m256i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm256_sign_epi32 (__m256i __X, __m256i __Y)
636 return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
640 extern __inline __m256i
641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
644 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
647 extern __inline __m256i
648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
654 #define _mm256_bslli_epi128(A, N) \
655 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
656 #define _mm256_slli_si256(A, N) \
657 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
660 extern __inline __m256i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm256_slli_epi16 (__m256i __A,
int __B)
664 return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
667 extern __inline __m256i
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm256_sll_epi16 (__m256i __A, __m128i __B)
671 return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
674 extern __inline __m256i
675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676 _mm256_slli_epi32 (__m256i __A,
int __B)
678 return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
681 extern __inline __m256i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm256_sll_epi32 (__m256i __A, __m128i __B)
685 return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
688 extern __inline __m256i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm256_slli_epi64 (__m256i __A,
int __B)
692 return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
695 extern __inline __m256i
696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
697 _mm256_sll_epi64 (__m256i __A, __m128i __B)
699 return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
702 extern __inline __m256i
703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704 _mm256_srai_epi16 (__m256i __A,
int __B)
706 return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
709 extern __inline __m256i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm256_sra_epi16 (__m256i __A, __m128i __B)
713 return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
716 extern __inline __m256i
717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 _mm256_srai_epi32 (__m256i __A,
int __B)
720 return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
723 extern __inline __m256i
724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725 _mm256_sra_epi32 (__m256i __A, __m128i __B)
727 return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
731 extern __inline __m256i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
735 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
738 extern __inline __m256i
739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
745 #define _mm256_bsrli_epi128(A, N) \
746 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
747 #define _mm256_srli_si256(A, N) \
748 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
751 extern __inline __m256i
752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
753 _mm256_srli_epi16 (__m256i __A,
int __B)
755 return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
758 extern __inline __m256i
759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760 _mm256_srl_epi16 (__m256i __A, __m128i __B)
762 return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
765 extern __inline __m256i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm256_srli_epi32 (__m256i __A,
int __B)
769 return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
772 extern __inline __m256i
773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774 _mm256_srl_epi32 (__m256i __A, __m128i __B)
776 return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
779 extern __inline __m256i
780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781 _mm256_srli_epi64 (__m256i __A,
int __B)
783 return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
786 extern __inline __m256i
787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788 _mm256_srl_epi64 (__m256i __A, __m128i __B)
790 return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
793 extern __inline __m256i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm256_sub_epi8 (__m256i __A, __m256i __B)
797 return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
800 extern __inline __m256i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm256_sub_epi16 (__m256i __A, __m256i __B)
804 return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
807 extern __inline __m256i
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 _mm256_sub_epi32 (__m256i __A, __m256i __B)
811 return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
814 extern __inline __m256i
815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816 _mm256_sub_epi64 (__m256i __A, __m256i __B)
818 return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
821 extern __inline __m256i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm256_subs_epi8 (__m256i __A, __m256i __B)
825 return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
828 extern __inline __m256i
829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830 _mm256_subs_epi16 (__m256i __A, __m256i __B)
832 return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
835 extern __inline __m256i
836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
837 _mm256_subs_epu8 (__m256i __A, __m256i __B)
839 return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
842 extern __inline __m256i
843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844 _mm256_subs_epu16 (__m256i __A, __m256i __B)
846 return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
849 extern __inline __m256i
850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851 _mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
853 return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
856 extern __inline __m256i
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 _mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
860 return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
863 extern __inline __m256i
864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
865 _mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
867 return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
870 extern __inline __m256i
871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872 _mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
874 return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
877 extern __inline __m256i
878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879 _mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
881 return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
884 extern __inline __m256i
885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
886 _mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
888 return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
891 extern __inline __m256i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
895 return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
898 extern __inline __m256i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
902 return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
905 extern __inline __m256i
906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
907 _mm256_xor_si256 (__m256i __A, __m256i __B)
909 return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
912 extern __inline __m256i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm256_stream_load_si256 (__m256i const *__X)
916 return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
919 extern __inline __m128
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm_broadcastss_ps (__m128 __X)
923 return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
926 extern __inline __m256
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm256_broadcastss_ps (__m128 __X)
930 return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
933 extern __inline __m256d
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 _mm256_broadcastsd_pd (__m128d __X)
937 return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
940 extern __inline __m256i
941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
942 _mm256_broadcastsi128_si256 (__m128i __X)
944 return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
948 extern __inline __m128i
949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
952 return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
957 #define _mm_blend_epi32(X, Y, M) \
958 ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
959 (__v4si)(__m128i)(Y), (int)(M)))
963 extern __inline __m256i
964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
972 #define _mm256_blend_epi32(X, Y, M) \
973 ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
974 (__v8si)(__m256i)(Y), (int)(M)))
977 extern __inline __m256i
978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 _mm256_broadcastb_epi8 (__m128i __X)
981 return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
984 extern __inline __m256i
985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986 _mm256_broadcastw_epi16 (__m128i __X)
988 return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
991 extern __inline __m256i
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm256_broadcastd_epi32 (__m128i __X)
995 return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
998 extern __inline __m256i
999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1000 _mm256_broadcastq_epi64 (__m128i __X)
1002 return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
1005 extern __inline __m128i
1006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007 _mm_broadcastb_epi8 (__m128i __X)
1009 return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
1012 extern __inline __m128i
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 _mm_broadcastw_epi16 (__m128i __X)
1016 return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
1019 extern __inline __m128i
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm_broadcastd_epi32 (__m128i __X)
1023 return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
1026 extern __inline __m128i
1027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1028 _mm_broadcastq_epi64 (__m128i __X)
1030 return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
1033 extern __inline __m256i
1034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1035 _mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
1037 return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
1041 extern __inline __m256d
1042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045 return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
1048 #define _mm256_permute4x64_pd(X, M) \
1049 ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
1052 extern __inline __m256
1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054 _mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
1056 return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
1060 extern __inline __m256i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1064 return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
1067 #define _mm256_permute4x64_epi64(X, M) \
1068 ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
1073 extern __inline __m256i
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
1080 #define _mm256_permute2x128_si256(X, Y, M) \
1081 ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
1085 extern __inline __m128i
1086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
1092 #define _mm256_extracti128_si256(X, M) \
1093 ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
1097 extern __inline __m256i
1098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
1104 #define _mm256_inserti128_si256(X, Y, M) \
1105 ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
1106 (__v2di)(__m128i)(Y), \
1110 extern __inline __m256i
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm256_maskload_epi32 (
int const *__X, __m256i __M )
1114 return (__m256i) __builtin_ia32_maskloadd256 ((
const __v8si *)__X,
1118 extern __inline __m256i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm256_maskload_epi64 (
long long const *__X, __m256i __M )
1122 return (__m256i) __builtin_ia32_maskloadq256 ((
const __v4di *)__X,
1126 extern __inline __m128i
1127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128 _mm_maskload_epi32 (
int const *__X, __m128i __M )
1130 return (__m128i) __builtin_ia32_maskloadd ((
const __v4si *)__X,
1134 extern __inline __m128i
1135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1136 _mm_maskload_epi64 (
long long const *__X, __m128i __M )
1138 return (__m128i) __builtin_ia32_maskloadq ((
const __v2di *)__X,
1142 extern __inline
void
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm256_maskstore_epi32 (
int *__X, __m256i __M, __m256i __Y )
1146 __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
1149 extern __inline
void
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm256_maskstore_epi64 (
long long *__X, __m256i __M, __m256i __Y )
1153 __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
1156 extern __inline
void
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm_maskstore_epi32 (
int *__X, __m128i __M, __m128i __Y )
1160 __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
1163 extern __inline
void
1164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165 _mm_maskstore_epi64 (
long long *__X, __m128i __M, __m128i __Y )
1167 __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
1170 extern __inline __m256i
1171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1172 _mm256_sllv_epi32 (__m256i __X, __m256i __Y)
1174 return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
1177 extern __inline __m128i
1178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1179 _mm_sllv_epi32 (__m128i __X, __m128i __Y)
1181 return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
1184 extern __inline __m256i
1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm256_sllv_epi64 (__m256i __X, __m256i __Y)
1188 return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
1191 extern __inline __m128i
1192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193 _mm_sllv_epi64 (__m128i __X, __m128i __Y)
1195 return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
1198 extern __inline __m256i
1199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1200 _mm256_srav_epi32 (__m256i __X, __m256i __Y)
1202 return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
1205 extern __inline __m128i
1206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207 _mm_srav_epi32 (__m128i __X, __m128i __Y)
1209 return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
1212 extern __inline __m256i
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm256_srlv_epi32 (__m256i __X, __m256i __Y)
1216 return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
1219 extern __inline __m128i
1220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1221 _mm_srlv_epi32 (__m128i __X, __m128i __Y)
1223 return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
1226 extern __inline __m256i
1227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm256_srlv_epi64 (__m256i __X, __m256i __Y)
1230 return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
1233 extern __inline __m128i
1234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235 _mm_srlv_epi64 (__m128i __X, __m128i __Y)
1237 return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
1241 extern __inline __m128d
1242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245 __v2df src = _mm_setzero_pd ();
1246 __v2df mask = _mm_cmpeq_pd (src, src);
1248 return (__m128d) __builtin_ia32_gathersiv2df (src,
1255 extern __inline __m128d
1256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258 __m128d mask, const
int scale)
1260 return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
1267 extern __inline __m256d
1268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1271 __v4df src = _mm256_setzero_pd ();
1274 return (__m256d) __builtin_ia32_gathersiv4df (src,
1281 extern __inline __m256d
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284 __m128i index, __m256d mask, const
int scale)
1286 return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
1293 extern __inline __m128d
1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 __v2df src = _mm_setzero_pd ();
1298 __v2df mask = _mm_cmpeq_pd (src, src);
1300 return (__m128d) __builtin_ia32_gatherdiv2df (src,
1307 extern __inline __m128d
1308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 __m128d mask, const
int scale)
1312 return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
1319 extern __inline __m256d
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1323 __v4df src = _mm256_setzero_pd ();
1326 return (__m256d) __builtin_ia32_gatherdiv4df (src,
1333 extern __inline __m256d
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 __m256i index, __m256d mask, const
int scale)
1338 return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
1345 extern __inline __m128
1346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1349 __v4sf src = _mm_setzero_ps ();
1350 __v4sf mask = _mm_cmpeq_ps (src, src);
1352 return (__m128) __builtin_ia32_gathersiv4sf (src,
1359 extern __inline __m128
1360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 __m128 mask, const
int scale)
1364 return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
1371 extern __inline __m256
1372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1375 __v8sf src = _mm256_setzero_ps ();
1378 return (__m256) __builtin_ia32_gathersiv8sf (src,
1385 extern __inline __m256
1386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 __m256i index, __m256 mask, const
int scale)
1390 return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
1397 extern __inline __m128
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1401 __v4sf src = _mm_setzero_ps ();
1402 __v4sf mask = _mm_cmpeq_ps (src, src);
1404 return (__m128) __builtin_ia32_gatherdiv4sf (src,
1411 extern __inline __m128
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1414 __m128 mask, const
int scale)
1416 return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
1423 extern __inline __m128
1424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427 __v4sf src = _mm_setzero_ps ();
1428 __v4sf mask = _mm_cmpeq_ps (src, src);
1430 return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
1437 extern __inline __m128
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1440 __m256i index, __m128 mask, const
int scale)
1442 return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
1449 extern __inline __m128i
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 __m128i index, const
int scale)
1454 __v2di src = __extension__ (__v2di){ 0, 0 };
1455 __v2di mask = __extension__ (__v2di){ ~0, ~0 };
1457 return (__m128i) __builtin_ia32_gathersiv2di (src,
1464 extern __inline __m128i
1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1467 __m128i index, __m128i mask, const
int scale)
1469 return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
1476 extern __inline __m256i
1477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 __m128i index, const
int scale)
1481 __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
1482 __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
1484 return (__m256i) __builtin_ia32_gathersiv4di (src,
1491 extern __inline __m256i
1492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 __m128i index, __m256i mask, const
int scale)
1496 return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
1503 extern __inline __m128i
1504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1506 __m128i index, const
int scale)
1508 __v2di src = __extension__ (__v2di){ 0, 0 };
1509 __v2di mask = __extension__ (__v2di){ ~0, ~0 };
1511 return (__m128i) __builtin_ia32_gatherdiv2di (src,
1518 extern __inline __m128i
1519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1521 __m128i mask, const
int scale)
1523 return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
1530 extern __inline __m256i
1531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533 __m256i index, const
int scale)
1535 __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
1536 __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
1538 return (__m256i) __builtin_ia32_gatherdiv4di (src,
1545 extern __inline __m256i
1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1548 __m256i index, __m256i mask, const
int scale)
1550 return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
1557 extern __inline __m128i
1558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1562 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1564 return (__m128i) __builtin_ia32_gathersiv4si (src,
1571 extern __inline __m128i
1572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 __m128i mask, const
int scale)
1576 return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
1583 extern __inline __m256i
1584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1587 __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
1588 __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
1590 return (__m256i) __builtin_ia32_gathersiv8si (src,
1597 extern __inline __m256i
1598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 __m256i index, __m256i mask, const
int scale)
1602 return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
1609 extern __inline __m128i
1610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1613 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1614 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1616 return (__m128i) __builtin_ia32_gatherdiv4si (src,
1623 extern __inline __m128i
1624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 __m128i mask, const
int scale)
1628 return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
1635 extern __inline __m128i
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1640 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1642 return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
1649 extern __inline __m128i
1650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 __m256i index, __m128i mask, const
int scale)
1654 return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
1661 #define _mm_i32gather_pd(BASE, INDEX, SCALE) \
1662 (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
1663 (double const *)BASE, \
1664 (__v4si)(__m128i)INDEX, \
1665 (__v2df)_mm_set1_pd( \
1666 (double)(long long int) -1), \
1669 #define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1670 (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
1671 (double const *)BASE, \
1672 (__v4si)(__m128i)INDEX, \
1673 (__v2df)(__m128d)MASK, \
1676 #define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
1677 (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
1678 (double const *)BASE, \
1679 (__v4si)(__m128i)INDEX, \
1680 (__v4df)_mm256_set1_pd( \
1681 (double)(long long int) -1), \
1684 #define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1685 (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
1686 (double const *)BASE, \
1687 (__v4si)(__m128i)INDEX, \
1688 (__v4df)(__m256d)MASK, \
1691 #define _mm_i64gather_pd(BASE, INDEX, SCALE) \
1692 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
1693 (double const *)BASE, \
1694 (__v2di)(__m128i)INDEX, \
1695 (__v2df)_mm_set1_pd( \
1696 (double)(long long int) -1), \
1699 #define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1700 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
1701 (double const *)BASE, \
1702 (__v2di)(__m128i)INDEX, \
1703 (__v2df)(__m128d)MASK, \
1706 #define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
1707 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
1708 (double const *)BASE, \
1709 (__v4di)(__m256i)INDEX, \
1710 (__v4df)_mm256_set1_pd( \
1711 (double)(long long int) -1), \
1714 #define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1715 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
1716 (double const *)BASE, \
1717 (__v4di)(__m256i)INDEX, \
1718 (__v4df)(__m256d)MASK, \
1721 #define _mm_i32gather_ps(BASE, INDEX, SCALE) \
1722 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
1723 (float const *)BASE, \
1724 (__v4si)(__m128i)INDEX, \
1725 _mm_set1_ps ((float)(int) -1), \
1728 #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1729 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \
1730 (float const *)BASE, \
1731 (__v4si)(__m128i)INDEX, \
1732 (__v4sf)(__m128d)MASK, \
1735 #define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
1736 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
1737 (float const *)BASE, \
1738 (__v8si)(__m256i)INDEX, \
1739 (__v8sf)_mm256_set1_ps ( \
1743 #define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1744 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
1745 (float const *)BASE, \
1746 (__v8si)(__m256i)INDEX, \
1747 (__v8sf)(__m256d)MASK, \
1750 #define _mm_i64gather_ps(BASE, INDEX, SCALE) \
1751 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
1752 (float const *)BASE, \
1753 (__v2di)(__m128i)INDEX, \
1754 (__v4sf)_mm_set1_ps ( \
1758 #define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1759 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
1760 (float const *)BASE, \
1761 (__v2di)(__m128i)INDEX, \
1762 (__v4sf)(__m128d)MASK, \
1765 #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
1766 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
1767 (float const *)BASE, \
1768 (__v4di)(__m256i)INDEX, \
1769 (__v4sf)_mm_set1_ps( \
1773 #define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1774 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
1775 (float const *)BASE, \
1776 (__v4di)(__m256i)INDEX, \
1777 (__v4sf)(__m128)MASK, \
1780 #define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
1781 (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
1782 (long long const *)BASE, \
1783 (__v4si)(__m128i)INDEX, \
1784 (__v2di)_mm_set1_epi64x (-1), \
1787 #define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1788 (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
1789 (long long const *)BASE, \
1790 (__v4si)(__m128i)INDEX, \
1791 (__v2di)(__m128i)MASK, \
1794 #define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
1795 (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
1796 (long long const *)BASE, \
1797 (__v4si)(__m128i)INDEX, \
1798 (__v4di)_mm256_set1_epi64x (-1), \
1801 #define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1802 (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
1803 (long long const *)BASE, \
1804 (__v4si)(__m128i)INDEX, \
1805 (__v4di)(__m256i)MASK, \
1808 #define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
1809 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
1810 (long long const *)BASE, \
1811 (__v2di)(__m128i)INDEX, \
1812 (__v2di)_mm_set1_epi64x (-1), \
1815 #define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1816 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
1817 (long long const *)BASE, \
1818 (__v2di)(__m128i)INDEX, \
1819 (__v2di)(__m128i)MASK, \
1822 #define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
1823 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
1824 (long long const *)BASE, \
1825 (__v4di)(__m256i)INDEX, \
1826 (__v4di)_mm256_set1_epi64x (-1), \
1829 #define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1830 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
1831 (long long const *)BASE, \
1832 (__v4di)(__m256i)INDEX, \
1833 (__v4di)(__m256i)MASK, \
1836 #define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
1837 (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
1838 (int const *)BASE, \
1839 (__v4si)(__m128i)INDEX, \
1840 (__v4si)_mm_set1_epi32 (-1), \
1843 #define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1844 (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
1845 (int const *)BASE, \
1846 (__v4si)(__m128i)INDEX, \
1847 (__v4si)(__m128i)MASK, \
1850 #define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
1851 (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
1852 (int const *)BASE, \
1853 (__v8si)(__m256i)INDEX, \
1854 (__v8si)_mm256_set1_epi32 (-1), \
1857 #define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1858 (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
1859 (int const *)BASE, \
1860 (__v8si)(__m256i)INDEX, \
1861 (__v8si)(__m256i)MASK, \
1864 #define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
1865 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
1866 (int const *)BASE, \
1867 (__v2di)(__m128i)INDEX, \
1868 (__v4si)_mm_set1_epi32 (-1), \
1871 #define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1872 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
1873 (int const *)BASE, \
1874 (__v2di)(__m128i)INDEX, \
1875 (__v4si)(__m128i)MASK, \
1878 #define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
1879 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
1880 (int const *)BASE, \
1881 (__v4di)(__m256i)INDEX, \
1882 (__v4si)_mm_set1_epi32(-1), \
1885 #define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1886 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
1887 (int const *)BASE, \
1888 (__v4di)(__m256i)INDEX, \
1889 (__v4si)(__m128i)MASK, \
#define _mm256_inserti128_si256(X, Y, M)
Definition: avx2intrin.h:1104
#define _mm256_alignr_epi8(A, B, N)
Definition: avx2intrin.h:162
#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1773
#define _mm_i32gather_pd(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1661
#define _mm256_permute4x64_epi64(X, M)
Definition: avx2intrin.h:1067
#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1669
#define _mm_i32gather_epi32(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1836
#define _mm_i32gather_ps(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1721
#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1743
#define _mm_i64gather_pd(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1691
#define _mm256_extracti128_si256(X, M)
Definition: avx2intrin.h:1092
__inline __m256i __m256i __B
Definition: avx2intrin.h:69
#define _mm256_i32gather_pd(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1676
#define _mm_i64gather_epi32(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1864
#define _mm256_bslli_epi128(A, N)
Definition: avx2intrin.h:654
#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1857
#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1829
#define _mm256_i64gather_epi64(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1822
#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1871
#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1714
#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1758
#define _mm256_blend_epi16(X, Y, M)
Definition: avx2intrin.h:215
__inline void __m256d __A
Definition: avxintrin.h:828
#define _CMP_EQ_OQ
Definition: avxintrin.h:51
#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1801
#define _mm_i64gather_ps(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1750
#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1843
#define _mm256_shufflelo_epi16(A, N)
Definition: avx2intrin.h:614
#define _mm256_i32gather_ps(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1735
#define _mm256_permute4x64_pd(X, M)
Definition: avx2intrin.h:1048
#define _mm256_permute2x128_si256(X, Y, M)
Definition: avx2intrin.h:1080
__inline unsigned char unsigned int __X
Definition: adxintrin.h:33
#define _mm256_slli_si256(A, N)
Definition: avx2intrin.h:656
#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1699
__inline __m256i __m256i __Y
Definition: avx2intrin.h:198
#define _mm256_i64gather_ps(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1765
#define _mm256_i64gather_epi32(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1878
#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1684
#define _mm256_blend_epi32(X, Y, M)
Definition: avx2intrin.h:972
#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1885
#define _mm256_shufflehi_epi16(A, N)
Definition: avx2intrin.h:612
#define _mm256_srli_si256(A, N)
Definition: avx2intrin.h:747
#define _mm_blend_epi32(X, Y, M)
Definition: avx2intrin.h:957
#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1815
#define _mm_i64gather_epi64(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1808
#define _mm256_cmp_ps(X, Y, P)
Definition: avxintrin.h:409
__inline __m256i __m256i __m256i __M
Definition: avx2intrin.h:199
#define _mm256_bsrli_epi128(A, N)
Definition: avx2intrin.h:745
__inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_abs_epi8(__m256i __A)
Definition: avx2intrin.h:46
#define _mm256_i64gather_pd(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1706
#define _mm256_mpsadbw_epu8(X, Y, M)
Definition: avx2intrin.h:40
#define _mm256_cmp_pd(X, Y, P)
Definition: avxintrin.h:405
#define _mm256_i32gather_epi32(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1850
#define _mm_i32gather_epi64(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1780
#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1787
#define _mm256_shuffle_epi32(A, N)
Definition: avx2intrin.h:610
#define _mm256_i32gather_epi64(BASE, INDEX, SCALE)
Definition: avx2intrin.h:1794
#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)
Definition: avx2intrin.h:1728