23 #ifndef _DVEC_H_INCLUDED
24 #define _DVEC_H_INCLUDED
27 #if !defined __cplusplus
28 #error ERROR: This file is only supported in C++ compilations!
31 #if defined (_M_CEE_PURE)
32 #error ERROR: This file is not supported in the pure mode!
41 #define _VEC_ASSERT(_Expression) ((void)0)
53 #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
58 #pragma pack(push,_CRT_PACKING)
62 #if defined (_ENABLE_VEC_DEBUG)
74 #define _f64vec2_abs_mask ((F64vec2)__f64vec2_abs_mask_cheat.m)
90 #define _MM_16UB(element,vector) (*((unsigned char*)&##vector + ##element))
91 #define _MM_16B(element,vector) (*((signed char*)&##vector + ##element))
93 #define _MM_8UW(element,vector) (*((unsigned short*)&##vector + ##element))
94 #define _MM_8W(element,vector) (*((short*)&##vector + ##element))
96 #define _MM_4UDW(element,vector) (*((unsigned int*)&##vector + ##element))
97 #define _MM_4DW(element,vector) (*((int*)&##vector + ##element))
99 #define _MM_2QW(element,vector) (*((__int64*)&##vector + ##element))
104 #pragma warning(push)
105 #pragma warning(disable : 4640)
296 #if defined (_ENABLE_VEC_DEBUG)
341 Iu32vec4(
unsigned int ui3,
unsigned int ui2,
unsigned int ui1,
unsigned int ui0)
342 :
I32vec4(ui3, ui2, ui1, ui0) { }
366 #if defined (_ENABLE_VEC_DEBUG)
408 I16vec8(
short s7,
short s6,
short s5,
short s4,
short s3,
short s2,
short s1,
short s0)
451 Is16vec8(
signed short s7,
signed short s6,
signed short s5,
452 signed short s4,
signed short s3,
signed short s2,
453 signed short s1,
signed short s0)
454 :
I16vec8(s7, s6, s5, s4, s3, s2, s1, s0) { }
480 #if defined (_ENABLE_VEC_DEBUG)
484 os <<
"[7]:" <<
_MM_8W(7,a)
491 <<
" [0]:" <<
_MM_8W(0,a);
541 Iu16vec8(
unsigned short s7,
unsigned short s6,
unsigned short s5,
542 unsigned short s4,
unsigned short s3,
unsigned short s2,
543 unsigned short s1,
unsigned short s0)
544 :
I16vec8(s7, s6, s5, s4, s3, s2, s1, s0) { }
568 #if defined (_ENABLE_VEC_DEBUG)
572 os <<
"[7]:" << (
unsigned short)(
_MM_8UW(7,a))
573 <<
" [6]:" << (
unsigned short)(
_MM_8UW(6,a))
574 <<
" [5]:" << (
unsigned short)(
_MM_8UW(5,a))
575 <<
" [4]:" << (
unsigned short)(
_MM_8UW(4,a))
576 <<
" [3]:" << (
unsigned short)(
_MM_8UW(3,a))
577 <<
" [2]:" << (
unsigned short)(
_MM_8UW(2,a))
578 <<
" [1]:" << (
unsigned short)(
_MM_8UW(1,a))
579 <<
" [0]:" << (
unsigned short)(
_MM_8UW(0,a));
622 I8vec16(
char s15,
char s14,
char s13,
char s12,
char s11,
char s10,
623 char s9,
char s8,
char s7,
char s6,
char s5,
char s4,
624 char s3,
char s2,
char s1,
char s0)
626 vec =
_mm_set_epi8(s15, s14, s13, s12, s11, s10, s9, s8, s7, s6, s5, s4, s3, s2, s1, s0);
657 Is8vec16(
char s15,
char s14,
char s13,
char s12,
char s11,
char s10,
658 char s9,
char s8,
char s7,
char s6,
char s5,
char s4,
659 char s3,
char s2,
char s1,
char s0)
660 :
I8vec16(s15, s14, s13, s12, s11, s10, s9, s8,
661 s7, s6, s5, s4, s3, s2, s1, s0) { }
675 #if defined (_ENABLE_VEC_DEBUG)
679 os <<
"[15]:" << short(
_MM_16B(15,a))
680 <<
" [14]:" << short(
_MM_16B(14,a))
681 <<
" [13]:" << short(
_MM_16B(13,a))
682 <<
" [12]:" << short(
_MM_16B(12,a))
683 <<
" [11]:" << short(
_MM_16B(11,a))
684 <<
" [10]:" << short(
_MM_16B(10,a))
685 <<
" [9]:" << short(
_MM_16B(9,a))
686 <<
" [8]:" << short(
_MM_16B(8,a))
687 <<
" [7]:" << short(
_MM_16B(7,a))
688 <<
" [6]:" << short(
_MM_16B(6,a))
689 <<
" [5]:" << short(
_MM_16B(5,a))
690 <<
" [4]:" << short(
_MM_16B(4,a))
691 <<
" [3]:" << short(
_MM_16B(3,a))
692 <<
" [2]:" << short(
_MM_16B(2,a))
693 <<
" [1]:" << short(
_MM_16B(1,a))
694 <<
" [0]:" << short(
_MM_16B(0,a));
734 Iu8vec16(
unsigned char u15,
unsigned char u14,
unsigned char u13,
735 unsigned char u12,
unsigned char u11,
unsigned char u10,
736 unsigned char u9,
unsigned char u8,
unsigned char u7,
737 unsigned char u6,
unsigned char u5,
unsigned char u4,
738 unsigned char u3,
unsigned char u2,
unsigned char u1,
740 :
I8vec16(u15, u14, u13, u12, u11, u10, u9, u8,
741 u7, u6, u5, u4, u3, u2, u1, u0) { }
755 #if defined (_ENABLE_VEC_DEBUG)
759 os <<
"[15]:" << (
unsigned char)(
_MM_16UB(15,a))
760 <<
" [14]:" << (
unsigned char)(
_MM_16UB(14,a))
761 <<
" [13]:" << (
unsigned char)(
_MM_16UB(13,a))
762 <<
" [12]:" << (
unsigned char)(
_MM_16UB(12,a))
763 <<
" [11]:" << (
unsigned char)(
_MM_16UB(11,a))
764 <<
" [10]:" << (
unsigned char)(
_MM_16UB(10,a))
765 <<
" [9]:" << (
unsigned char)(
_MM_16UB(9,a))
766 <<
" [8]:" << (
unsigned char)(
_MM_16UB(8,a))
767 <<
" [7]:" << (
unsigned char)(
_MM_16UB(7,a))
768 <<
" [6]:" << (
unsigned char)(
_MM_16UB(6,a))
769 <<
" [5]:" << (
unsigned char)(
_MM_16UB(5,a))
770 <<
" [4]:" << (
unsigned char)(
_MM_16UB(4,a))
771 <<
" [3]:" << (
unsigned char)(
_MM_16UB(3,a))
772 <<
" [2]:" << (
unsigned char)(
_MM_16UB(2,a))
773 <<
" [1]:" << (
unsigned char)(
_MM_16UB(1,a))
774 <<
" [0]:" << (
unsigned char)(
_MM_16UB(0,a));
817 #define IVEC128_LOGICALS(vect,element) \
818 inline I##vect##vec##element operator& (const I##vect##vec##element &a, const I##vect##vec##element &b) \
819 { return _mm_and_si128( a,b); } \
820 inline I##vect##vec##element operator| (const I##vect##vec##element &a, const I##vect##vec##element &b) \
821 { return _mm_or_si128( a,b); } \
822 inline I##vect##vec##element operator^ (const I##vect##vec##element &a, const I##vect##vec##element &b) \
823 { return _mm_xor_si128( a,b); } \
824 inline I##vect##vec##element andnot (const I##vect##vec##element &a, const I##vect##vec##element &b) \
825 { return _mm_andnot_si128( a,b); }
831 IVEC128_LOGICALS(u16,8)
832 IVEC128_LOGICALS(s16,8)
833 IVEC128_LOGICALS(32,4)
834 IVEC128_LOGICALS(u32,4)
835 IVEC128_LOGICALS(s32,4)
836 IVEC128_LOGICALS(64,2)
837 IVEC128_LOGICALS(128,1)
838 #undef IVEC128_LOGICALS
841 #define IVEC128_ADD_SUB(vect,element,opsize) \
842 inline I##vect##vec##element operator+ (const I##vect##vec##element &a, const I##vect##vec##element &b) \
843 { return _mm_add_##opsize( a,b); } \
844 inline I##vect##vec##element operator- (const I##vect##vec##element &a, const I##vect##vec##element &b) \
845 { return _mm_sub_##opsize( a,b); }
851 IVEC128_ADD_SUB(u16,8, epi16)
852 IVEC128_ADD_SUB(s16,8, epi16)
853 IVEC128_ADD_SUB(32,4, epi32)
854 IVEC128_ADD_SUB(u32,4, epi32)
855 IVEC128_ADD_SUB(s32,4, epi32)
856 IVEC128_ADD_SUB(64,2, epi64)
857 #undef IVEC128_ADD_SUB
868 #define IVEC128_SELECT(vect12,vect34,element,selop) \
869 inline I##vect34##vec##element select_##selop ( \
870 const I##vect12##vec##element &a, \
871 const I##vect12##vec##element &b, \
872 const I##vect34##vec##element &c, \
873 const I##vect34##vec##element &d) \
875 I##vect12##vec##element mask = cmp##selop(a,b); \
876 return ( I##vect34##vec##element (mask & c ) | \
877 I##vect34##vec##element ((_mm_andnot_si128(mask, d )))); \
882 IVEC128_SELECT(8,8,16,eq)
883 IVEC128_SELECT(8,s8,16,neq)
884 IVEC128_SELECT(8,u8,16,neq)
885 IVEC128_SELECT(8,8,16,neq)
887 IVEC128_SELECT(16,s16,8,eq)
888 IVEC128_SELECT(16,u16,8,eq)
889 IVEC128_SELECT(16,16,8,eq)
890 IVEC128_SELECT(16,s16,8,neq)
891 IVEC128_SELECT(16,u16,8,neq)
892 IVEC128_SELECT(16,16,8,neq)
894 IVEC128_SELECT(32,s32,4,eq)
895 IVEC128_SELECT(32,u32,4,eq)
896 IVEC128_SELECT(32,32,4,eq)
897 IVEC128_SELECT(32,s32,4,neq)
898 IVEC128_SELECT(32,u32,4,neq)
899 IVEC128_SELECT(32,32,4,neq)
901 IVEC128_SELECT(s8,s8,16,gt)
902 IVEC128_SELECT(s8,u8,16,gt)
903 IVEC128_SELECT(s8,8,16,gt)
904 IVEC128_SELECT(s8,s8,16,lt)
905 IVEC128_SELECT(s8,u8,16,lt)
906 IVEC128_SELECT(s8,8,16,lt)
908 IVEC128_SELECT(s16,s16,8,gt)
909 IVEC128_SELECT(s16,u16,8,gt)
910 IVEC128_SELECT(s16,16,8,gt)
911 IVEC128_SELECT(s16,s16,8,lt)
912 IVEC128_SELECT(s16,u16,8,lt)
913 IVEC128_SELECT(s16,16,8,lt)
916 #undef IVEC128_SELECT
976 #define F64vec2_COMP(op) \
977 friend F64vec2 cmp##op (const F64vec2 &a, const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
1001 #define F64vec2_COMI(op) \
1002 friend int comi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
1012 #define F64vec2_UCOMI(op) \
1013 friend int ucomi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
1020 #undef F64vec2_UCOMI
1023 #if defined (_ENABLE_VEC_DEBUG)
1028 double *dp = (
double*)&a;
1029 os <<
"[1]:" << *(dp+1)
1039 double *dp = (
double*)&vec;
1047 double *dp = (
double*)&vec;
1082 #define F64vec2_SELECT(op) \
1083 inline F64vec2 select_##op (const F64vec2 &a, const F64vec2 &b, const F64vec2 &c, const F64vec2 &d) \
1085 F64vec2 mask = _mm_cmp##op##_pd(a,b); \
1086 return( (mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); \
1096 #undef F64vec2_SELECT
1099 inline int F64vec2ToInt(
const F64vec2 &a)
1149 F32vec8(
float f7,
float f6,
float f5,
float f4,
float f3,
float f2,
float f1,
float f0)
1231 #pragma warning(push)
1232 #pragma warning(disable:4640)
1233 static const F32vec8 fvecf0pt5(0.5f);
1234 static const F32vec8 fvecf3pt0(3.0f);
1235 #pragma warning(pop)
1237 return (fvecf0pt5 * Ra0) * (fvecf3pt0 - (a * Ra0) * Ra0);
1276 } __f32vec8_abs_mask = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff,
1277 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
1282 #if defined (_ENABLE_VEC_DEBUG)
1287 float *fp = (
float*) &a;
1288 os <<
"[7]:" << *(fp+7)
1289 <<
" [6]:" << *(fp+6)
1290 <<
" [5]:" << *(fp+5)
1291 <<
" [4]:" << *(fp+4)
1292 <<
" [3]:" << *(fp+3)
1293 <<
" [2]:" << *(fp+2)
1294 <<
" [1]:" << *(fp+1)
1305 float *fp = (
float*)&vec;
1314 float *fp = (
float*)&vec;
1416 F64vec4(
double d3,
double d2,
double d1,
double d0)
1494 } __f64vec4_abs_mask = { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff,
1495 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff};
1500 #if defined (_ENABLE_VEC_DEBUG)
1505 double *dp = (
double*) &a;
1506 os <<
"[3]:" << *(dp+3)
1507 <<
" [2]:" << *(dp+2)
1508 <<
" [3]:" << *(dp+1)
1519 double *dp = (
double*)&vec;
1527 double *dp = (
double*)&vec;
1620 #undef DVEC_DEFINE_OUTPUT_OPERATORS
__m128i _mm_sub_epi32(__m128i _A, __m128i _B)
void __cdecl _mm256_storeu_pd(double *, __m256d)
__m128i _mm_adds_epi8(__m128i _A, __m128i _B)
#define _CMP_NEQ_UQ
Definition: immintrin.h:59
F64vec2 F32vec4ToF64vec2(const F32vec4 &a)
Definition: dvec.h:1107
I16vec8 operator<<(int count)
Definition: dvec.h:428
EXPLICIT F32vec8(float f)
Definition: dvec.h:1155
__m128i _mm_mulhi_epi16(__m128i _A, __m128i _B)
#define _CMP_NGE_US
Definition: immintrin.h:65
I64vec2(__m128i mm)
Definition: dvec.h:176
__m256d __cdecl _mm256_sub_pd(__m256d, __m256d)
I16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:429
Is16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:465
friend F32vec8 operator-(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1183
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
I32vec4 & operator|=(const M128 &a)
Definition: dvec.h:242
Is8vec16 & operator|=(const M128 &a)
Definition: dvec.h:668
I128vec1(__m128i mm)
Definition: dvec.h:160
friend F64vec4 andnot(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1454
__m256d __cdecl _mm256_div_pd(__m256d, __m256d)
__m128i _mm_srai_epi32(__m128i _A, int _Count)
I16vec8 & operator<<=(int count)
Definition: dvec.h:430
F32vec8(__m256 m)
Definition: dvec.h:1146
friend F64vec4 cmp_ge(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1468
Iu32vec4 operator>>(int count)
Definition: dvec.h:362
__m256 __cdecl _mm256_div_ps(__m256, __m256)
signed short & operator[](int i)
Definition: dvec.h:504
friend F64vec4 operator&(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1428
Is32vec4 operator<<(const M128 &a)
Definition: dvec.h:286
unsigned char & operator[](int i)
Definition: dvec.h:787
I64vec2 operator<<(int count)
Definition: dvec.h:198
__m256 __cdecl _mm256_rsqrt_ps(__m256)
__m256d __cdecl _mm256_cvtps_pd(__m128)
friend F32vec8 sqrt(const F32vec8 &a)
Definition: dvec.h:1207
__m128i _mm_packs_epi32(__m128i _A, __m128i _B)
__m128i _mm_sra_epi16(__m128i _A, __m128i _Count)
I64vec2 operator*(const Iu32vec4 &a, const Iu32vec4 &b)
Definition: dvec.h:393
friend F64vec2 abs(const F64vec2 &a)
Definition: dvec.h:995
__m256d __cdecl _mm256_andnot_pd(__m256d, __m256d)
Is32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:283
friend F64vec4 operator|(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1429
__m128d _mm_loadu_pd(double const *_Dp)
Iu32vec4 operator>>(const M128 &a)
Definition: dvec.h:361
friend F64vec4 operator+(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1433
friend F64vec4 operator*(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1435
Iu16vec8 & operator&=(const M128 &a)
Definition: dvec.h:549
friend F32vec8 operator/(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1185
EXPLICIT F64vec2(double d)
Definition: dvec.h:935
#define _f64vec2_abs_mask
Definition: dvec.h:74
F64vec4 & operator^=(const F64vec4 &a)
Definition: dvec.h:1444
I16vec8(short s7, short s6, short s5, short s4, short s3, short s2, short s1, short s0)
Definition: dvec.h:408
I64vec2 & operator|=(const M128 &a)
Definition: dvec.h:189
Is32vec4 & operator=(const M128 &a)
Definition: dvec.h:274
__m128d _mm_cvtsi32_sd(__m128d _A, int _B)
Is8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:673
#define _MM_2QW(element, vector)
Definition: dvec.h:99
Iu8vec16 & operator^=(const M128 &a)
Definition: dvec.h:749
__m256d __cdecl _mm256_set_pd(double, double, double, double)
__m256 __cdecl _mm256_loadu_ps(float const *)
void __cdecl _mm256_storeu_ps(float *, __m256)
friend F64vec4 cmp_nge(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1478
I16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:422
Iu32vec4 & operator=(const M128 &a)
Definition: dvec.h:345
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
I8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:639
I16vec8()
Definition: dvec.h:406
I32vec4 & operator=(const M128 &a)
Definition: dvec.h:238
F64vec4 & operator+=(const F64vec4 &a)
Definition: dvec.h:1438
#define _CRTIMP
Definition: crtdefs.h:23
__m256d __cdecl _mm256_and_pd(__m256d, __m256d)
__m128i _mm_set1_epi64(__m64 _Q)
void __cdecl _mm_maskstore_ps(float *, __m128i, __m128)
friend F32vec8 andnot(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1204
I16vec8 & operator&=(const M128 &a)
Definition: dvec.h:417
M128 operator|(const M128 &a, const M128 &b)
Definition: dvec.h:147
__m128d _mm_set1_pd(double _A)
F64vec2 & operator*=(const F64vec2 &a)
Definition: dvec.h:953
Iu16vec8 & operator<<=(int count)
Definition: dvec.h:561
F64vec2 & operator^=(const F64vec2 &a)
Definition: dvec.h:957
Is16vec8()
Definition: dvec.h:449
__m128d _mm_unpacklo_pd(__m128d _A, __m128d _B)
friend F32vec8 rsqrt(const F32vec8 &a)
Definition: dvec.h:1213
F32vec8 select_nge(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1394
__m256 __cdecl _mm256_sqrt_ps(__m256)
__m128i _mm_castpd_si128(__m128d)
const __m128i get_mask128()
Definition: dvec.h:106
__m256 __cdecl _mm256_xor_ps(__m256, __m256)
F64vec4(double d3, double d2, double d1, double d0)
Definition: dvec.h:1416
friend F32vec8 cmp_eq(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1242
F32vec8 & operator+=(const F32vec8 &a)
Definition: dvec.h:1187
__m128i _mm_max_epi16(__m128i _A, __m128i _B)
iterator_traits< _InIt >::difference_type count(_InIt _First, _InIt _Last, const _Ty &_Val)
Definition: xutility:3086
#define _VEC_ASSERT(_Expression)
Definition: dvec.h:53
__m128d
Definition: emmintrin.h:48
Is16vec8 & operator|=(const M128 &a)
Definition: dvec.h:461
I32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:246
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
void maskload(F32vec8 &a, const float *p, const F32vec8 &m)
Definition: dvec.h:1352
__m256d
Definition: immintrin.h:38
Iu32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:363
friend F64vec4 simd_max(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1484
void __cdecl _mm256_stream_ps(float *, __m256)
__m256d __cdecl _mm256_loadu_pd(double const *)
__m128i _mm_add_epi64(__m128i _A, __m128i _B)
M128 & operator&=(const M128 &a)
Definition: dvec.h:140
double _mm_cvtsd_f64(__m128d _A)
I16vec8 operator<<(const M128 &a)
Definition: dvec.h:427
#define _CMP_NLT_US
Definition: immintrin.h:60
const double & operator[](int i) const
Definition: dvec.h:1515
#define _CMP_EQ_OQ
Definition: immintrin.h:55
Is16vec8 simd_max(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:529
__m128i _mm_srli_epi16(__m128i _A, int _Count)
__int64 & operator[](int i)
Definition: dvec.h:214
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
__m128d _mm_shuffle_pd(__m128d _A, __m128d _B, int _I)
Is16vec8(signed short s7, signed short s6, signed short s5, signed short s4, signed short s3, signed short s2, signed short s1, signed short s0)
Definition: dvec.h:451
__m128d m
Definition: dvec.h:71
Iu16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:560
__m128 __cdecl _mm_maskload_ps(float const *, __m128i)
void store_nta(double *p, F64vec2 &a)
Definition: dvec.h:1079
Iu8vec16(unsigned char u15, unsigned char u14, unsigned char u13, unsigned char u12, unsigned char u11, unsigned char u10, unsigned char u9, unsigned char u8, unsigned char u7, unsigned char u6, unsigned char u5, unsigned char u4, unsigned char u3, unsigned char u2, unsigned char u1, unsigned char u0)
Definition: dvec.h:734
I64vec2()
Definition: dvec.h:175
__m128i _mm_cmplt_epi8(__m128i _A, __m128i _B)
Is16vec8 pack_sat(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:812
__m128i _mm_set_epi16(short _W7, short _W6, short _W5, short _W4, short _W3, short _W2, short _W1, short _W0)
__m128i _mm_packs_epi16(__m128i _A, __m128i _B)
__m256 __cdecl _mm256_permute_ps(__m256, int)
I128vec1 & operator|=(const M128 &a)
Definition: dvec.h:164
__m128i _mm_add_epi8(__m128i _A, __m128i _B)
__m256 __cdecl _mm256_add_ps(__m256, __m256)
__m128d _mm_set_pd(double _Z, double _Y)
Iu16vec8 & operator=(const M128 &a)
Definition: dvec.h:547
Iu32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:359
__m128i _mm_srl_epi32(__m128i _A, __m128i _Count)
Iu32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:354
__m256d __cdecl _mm256_permute_pd(__m256d, int)
#define F64vec2_UCOMI(op)
Is8vec16(__m128i mm)
Definition: dvec.h:656
friend F64vec4 operator^(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1430
#define _MM_8UW(element, vector)
Definition: dvec.h:93
#define _MM_8W(element, vector)
Definition: dvec.h:94
F64vec2 & operator/=(const F64vec2 &a)
Definition: dvec.h:954
Is16vec8 operator<<(const M128 &a)
Definition: dvec.h:470
F32vec4 F64vec2ToF32vec4(const F64vec2 &a)
Definition: dvec.h:1113
Iu8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:752
F32vec8 & operator-=(const F32vec8 &a)
Definition: dvec.h:1188
const signed short & operator[](int i) const
Definition: dvec.h:497
__m128i _mm_sra_epi32(__m128i _A, __m128i _Count)
I8vec16(__m128i mm)
Definition: dvec.h:621
Is32vec4 & operator|=(const M128 &a)
Definition: dvec.h:278
Is32vec4 & operator&=(const M128 &a)
Definition: dvec.h:277
I32vec4 & operator<<=(int count)
Definition: dvec.h:253
Is8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:672
F32vec8 & operator^=(const F32vec8 &a)
Definition: dvec.h:1193
M128 andnot(const M128 &a, const M128 &b)
Definition: dvec.h:149
__m256d __cdecl _mm256_mul_pd(__m256d, __m256d)
__m128i _mm_srli_epi32(__m128i _A, int _Count)
Iu8vec16 & operator&=(const M128 &a)
Definition: dvec.h:747
Is32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:282
double & operator[](int i)
Definition: dvec.h:1523
const __int64 & operator[](int i) const
Definition: dvec.h:207
__m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0)
I32vec4 & operator&=(const M128 &a)
Definition: dvec.h:241
__m128i _mm_sll_epi16(__m128i _A, __m128i _Count)
const signed char & operator[](int i) const
Definition: dvec.h:700
__m128i _mm_avg_epu16(__m128i _A, __m128i _B)
friend F32vec8 operator*(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1184
F32vec4 F64vec4ToF32vec8(const F64vec4 &a)
Definition: dvec.h:1617
friend F64vec2 sqrt(const F64vec2 &a)
Definition: dvec.h:970
F64vec2()
Definition: dvec.h:926
#define _MM_16UB(element, vector)
Definition: dvec.h:90
Is16vec8(__m128i mm)
Definition: dvec.h:450
__m128i _mm_min_epi16(__m128i _A, __m128i _B)
F32vec8 select_eq(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1367
F64vec4 & operator|=(const F64vec4 &a)
Definition: dvec.h:1443
Is32vec4 cmpgt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:326
__m256i __cdecl _mm256_castpd_si256(__m256d)
#define _CMP_LT_OS
Definition: immintrin.h:56
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
Iu32vec4()
Definition: dvec.h:339
int & operator[](int i)
Definition: dvec.h:316
I8vec16 & operator=(const M128 &a)
Definition: dvec.h:630
friend F32vec8 simd_max(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1266
Iu16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:554
__m128i _mm_adds_epu8(__m128i _A, __m128i _B)
F32vec8(float f7, float f6, float f5, float f4, float f3, float f2, float f1, float f0)
Definition: dvec.h:1149
Is16vec8 sat_sub(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:527
void loadu(F64vec2 &a, double *p)
Definition: dvec.h:1069
Is8vec16(char s15, char s14, char s13, char s12, char s11, char s10, char s9, char s8, char s7, char s6, char s5, char s4, char s3, char s2, char s1, char s0)
Definition: dvec.h:657
__m128 _mm_cvtpd_ps(__m128d _A)
I64vec2 & operator=(const M128 &a)
Definition: dvec.h:185
#define _CMP_NGT_US
Definition: immintrin.h:67
friend float add_horizontal(const F32vec8 &a)
Definition: dvec.h:1196
F64vec2(__m128d m)
Definition: dvec.h:929
__m128i _mm_castps_si128(__m128)
Is16vec8 operator<<(int count)
Definition: dvec.h:471
friend F64vec4 cmp_nle(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1474
#define _CMP_GT_OS
Definition: immintrin.h:71
__m128i _mm_sll_epi32(__m128i _A, __m128i _Count)
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
I64vec2 operator<<(const I64vec2 &a)
Definition: dvec.h:197
Is32vec4 & operator<<=(int count)
Definition: dvec.h:289
__m256d __cdecl _mm256_sqrt_pd(__m256d)
I16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:423
unsigned short & operator[](int i)
Definition: dvec.h:592
__m128i _mm_slli_epi16(__m128i _A, int _Count)
int _mm_movemask_pd(__m128d _A)
F64vec2 IntToF64vec2(const F64vec2 &a, int b)
Definition: dvec.h:1119
__m256 __cdecl _mm256_sub_ps(__m256, __m256)
int _mm_cvttsd_si32(__m128d _A)
__m128i _mm_sad_epu8(__m128i _A, __m128i _B)
I8vec16 & operator|=(const M128 &a)
Definition: dvec.h:634
F64vec2_COMP(eq) F64vec2_COMP(lt) F64vec2_COMP(le) F64vec2_COMP(gt) F64vec2_COMP(ge) F64vec2_COMP(ngt) F64vec2_COMP(nge) F64vec2_COMP(neq) F64vec2_COMP(nlt) F64vec2_COMP(nle) friend F64vec2 simd_min(const F64vec2 &a
I8vec16 & operator^=(const M128 &a)
Definition: dvec.h:635
F32vec8 select_ge(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1379
__m128i _mm_max_epu8(__m128i _A, __m128i _B)
F32vec8 select_nle(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1388
int i[4]
Definition: dvec.h:70
I64vec2 sum_abs(const Iu8vec16 &a, const Iu8vec16 &b)
Definition: dvec.h:804
I16vec8(__m128i mm)
Definition: dvec.h:407
__m256 __cdecl _mm256_and_ps(__m256, __m256)
F64vec4()
Definition: dvec.h:1410
#define IVEC128_SELECT(vect12, vect34, element, selop)
Definition: dvec.h:868
F32vec8 select_gt(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1376
F64vec4 & operator-=(const F64vec4 &a)
Definition: dvec.h:1439
Iu16vec8 operator>>(int count)
Definition: dvec.h:563
I64vec2 & operator&=(const M128 &a)
Definition: dvec.h:188
__m256 vec
Definition: dvec.h:1138
#define _In_z_
Definition: sal.h:319
Iu32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:353
Is8vec16 & operator^=(const M128 &a)
Definition: dvec.h:669
#define _In_
Definition: sal.h:314
friend F64vec2 operator*(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:948
const unsigned short & operator[](int i) const
Definition: dvec.h:585
#define F64vec2_SELECT(op)
Definition: dvec.h:1082
I128vec1 & operator&=(const M128 &a)
Definition: dvec.h:163
__m128d _mm_add_pd(__m128d _A, __m128d _B)
__m128d _mm_or_pd(__m128d _A, __m128d _B)
F32vec8 & operator/=(const F32vec8 &a)
Definition: dvec.h:1190
void storeu(double *p, const F64vec2 &a)
Definition: dvec.h:1073
int __cdecl _mm256_movemask_pd(__m256d)
EXPLICIT F32vec8(double d)
Definition: dvec.h:1158
friend F32vec8 operator&(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1177
Is8vec16 & operator=(const M128 &a)
Definition: dvec.h:664
__m256 __cdecl _mm256_movehdup_ps(__m256)
void __cdecl _mm256_maskstore_pd(double *, __m256i, __m256d)
Iu8vec16 & operator=(const M128 &a)
Definition: dvec.h:744
__m128i _mm_mul_epu32(__m128i _A, __m128i _B)
Iu8vec16(__m128i mm)
Definition: dvec.h:733
__m256d __cdecl _mm256_unpacklo_pd(__m256d, __m256d)
friend F32vec8 cmp_nlt(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1254
__m128i _mm_min_epu8(__m128i _A, __m128i _B)
I64vec2 & operator^=(const M128 &a)
Definition: dvec.h:190
friend F32vec8 rsqrt_nr(const F32vec8 &a)
Definition: dvec.h:1229
F64vec4 & operator&=(const F64vec4 &a)
Definition: dvec.h:1442
__m256d __cdecl _mm256_max_pd(__m256d, __m256d)
__m128d _mm_div_pd(__m128d _A, __m128d _B)
int __cdecl _mm256_movemask_ps(__m256)
I64vec2(__m64 q1, __m64 q0)
Definition: dvec.h:178
Is16vec8 & operator>>=(int count)
Definition: dvec.h:478
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m64
Definition: mmintrin.h:42
__m128d _mm_and_pd(__m128d _A, __m128d _B)
I64vec2 & operator>>=(const I64vec2 &a)
Definition: dvec.h:203
__m256d __cdecl _mm256_unpackhi_pd(__m256d, __m256d)
__m128i _mm_srl_epi16(__m128i _A, __m128i _Count)
F64vec2_COMI(eq) F64vec2_COMI(lt) F64vec2_COMI(le) F64vec2_COMI(gt) F64vec2_COMI(ge) F64vec2_COMI(neq) F64vec2_UCOMI(eq) F64vec2_UCOMI(lt) F64vec2_UCOMI(le) F64vec2_UCOMI(gt) F64vec2_UCOMI(ge) F64vec2_UCOMI(neq) const double &operator[](int i) const
Definition: dvec.h:1003
__m128 __cdecl _mm256_castps256_ps128(__m256)
M128 & operator^=(const M128 &a)
Definition: dvec.h:142
I32vec4 & operator<<=(const I32vec4 &a)
Definition: dvec.h:252
Is16vec8 simd_min(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:530
__m128d __cdecl _mm_maskload_pd(double const *, __m128i)
M128 operator^(const M128 &a, const M128 &b)
Definition: dvec.h:148
Is32vec4 mul_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:524
friend F32vec8 cmp_nle(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1256
I8vec16()
Definition: dvec.h:620
__m128i _mm_slli_epi64(__m128i _A, int _Count)
__m128i _mm_cmpgt_epi8(__m128i _A, __m128i _B)
void __cdecl _mm_maskstore_pd(double *, __m128i, __m128d)
__m256 __cdecl _mm256_set1_ps(float)
F32vec8 select_lt(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1370
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
F64vec2 & operator|=(const F64vec2 &a)
Definition: dvec.h:956
F32vec8 & operator*=(const F32vec8 &a)
Definition: dvec.h:1189
friend F64vec4 cmp_eq(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1460
__m128d _mm_max_pd(__m128d _A, __m128d _B)
Is32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:288
friend F64vec2 operator&(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:941
__m256d __cdecl _mm256_min_pd(__m256d, __m256d)
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:678
friend F64vec4 operator-(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1434
I32vec4()
Definition: dvec.h:233
F32vec8 select_le(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1373
Iu16vec8(__m128i mm)
Definition: dvec.h:540
Is32vec4 & operator^=(const M128 &a)
Definition: dvec.h:279
__m128d __cdecl _mm256_castpd256_pd128(__m256d)
__m128i
Definition: emmintrin.h:44
__m256
Definition: immintrin.h:34
Is16vec8 & operator=(const M128 &a)
Definition: dvec.h:457
F64vec2 & operator+=(const F64vec2 &a)
Definition: dvec.h:951
F32vec8 select_nlt(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1385
#define _CMP_GE_OS
Definition: immintrin.h:70
__m128i _mm_srli_epi64(__m128i _A, int _Count)
M128()
Definition: dvec.h:134
Iu16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:564
__m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B)
Is32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:293
I32vec4(__m128i mm)
Definition: dvec.h:234
void maskstore(float *p, const F32vec8 &a, const F32vec8 &m)
Definition: dvec.h:1359
Iu16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:555
__m128d _mm_unpackhi_pd(__m128d _A, __m128d _B)
Is16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:472
const F64vec2 &b return _mm_min_pd(a, b)
Is16vec8 operator>>(int count)
Definition: dvec.h:476
#define _MM_16B(element, vector)
Definition: dvec.h:91
Iu8vec16()
Definition: dvec.h:732
I64vec2 & operator<<=(int count)
Definition: dvec.h:200
Iu16vec8 operator>>(const M128 &a)
Definition: dvec.h:562
void _mm_storeu_pd(double *_Dp, __m128d _A)
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
#define EXPLICIT
Definition: ivec.h:30
I64vec2 & operator>>=(int count)
Definition: dvec.h:204
I32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:247
__m128d _mm_xor_pd(__m128d _A, __m128d _B)
#define _CMP_NLE_US
Definition: immintrin.h:61
friend F32vec8 cmp_le(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1246
#define IVEC128_ADD_SUB(vect, element, opsize)
Definition: dvec.h:841
I32vec4 & operator^=(const M128 &a)
Definition: dvec.h:243
I16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:424
__m128d vec
Definition: dvec.h:922
F64vec4 & operator*=(const F64vec4 &a)
Definition: dvec.h:1440
__m256 __cdecl _mm256_mul_ps(__m256, __m256)
I32vec4 operator<<(const I32vec4 &a)
Definition: dvec.h:250
__m128i _mm_avg_epu8(__m128i _A, __m128i _B)
Iu16vec8()
Definition: dvec.h:539
I64vec2 & operator<<=(const I64vec2 &a)
Definition: dvec.h:199
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
friend F64vec2 simd_max(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:992
__m128i _mm_subs_epu8(__m128i _A, __m128i _B)
friend double add_horizontal(const F64vec4 &a)
Definition: dvec.h:1447
__m256d vec
Definition: dvec.h:1405
_Check_return_ _In_z_ const char _Inout_ FILE * _File
Definition: stdio.h:226
const int & operator[](int i) const
Definition: dvec.h:309
const unsigned char & operator[](int i) const
Definition: dvec.h:780
Iu16vec8 & operator|=(const M128 &a)
Definition: dvec.h:550
Iu8vec16 & operator|=(const M128 &a)
Definition: dvec.h:748
F64vec2(double d1, double d0)
Definition: dvec.h:932
#define _MM_4DW(element, vector)
Definition: dvec.h:97
friend F32vec8 operator|(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1178
float & operator[](int i)
Definition: dvec.h:1310
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m256 __cdecl _mm256_min_ps(__m256, __m256)
__m128i _mm_or_si128(__m128i _A, __m128i _B)
Is16vec8 & operator&=(const M128 &a)
Definition: dvec.h:460
__m128i _mm_sll_epi64(__m128i _A, __m128i _Count)
__m256 __cdecl _mm256_andnot_ps(__m256, __m256)
Is32vec4()
Definition: dvec.h:269
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
M128(__m128i mm)
Definition: dvec.h:135
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m256 __cdecl _mm256_unpackhi_ps(__m256, __m256)
friend F64vec2 operator/(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:949
friend F32vec8 cmp_nge(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1260
__m128i _mm_adds_epu16(__m128i _A, __m128i _B)
friend F32vec8 operator+(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1182
friend F64vec4 sqrt(const F64vec4 &a)
Definition: dvec.h:1457
__m128i _mm_add_epi32(__m128i _A, __m128i _B)
friend F64vec4 cmp_gt(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1466
__m256i __cdecl _mm256_castps_si256(__m256)
friend F32vec8 cmp_ge(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1250
Is16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:467
const float & operator[](int i) const
Definition: dvec.h:1301
__m256d __cdecl _mm256_maskload_pd(double const *, __m256i)
Iu32vec4 operator<<(int count)
Definition: dvec.h:358
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
double & operator[](int i)
Definition: dvec.h:1043
I64vec2 unpack_high(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:225
I16vec8 & operator^=(const M128 &a)
Definition: dvec.h:419
friend F64vec4 abs(const F64vec4 &a)
Definition: dvec.h:1488
__m128i vec
Definition: dvec.h:131
__m256 __cdecl _mm256_maskload_ps(float const *, __m256i)
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
I64vec2 operator>>(const I64vec2 &a)
Definition: dvec.h:201
Is8vec16 & operator&=(const M128 &a)
Definition: dvec.h:667
Iu32vec4 & operator<<=(int count)
Definition: dvec.h:360
F32vec8 & operator=(float f)
Definition: dvec.h:1161
__m256d __cdecl _mm256_or_pd(__m256d, __m256d)
EXPLICIT F64vec4(double d)
Definition: dvec.h:1422
friend F64vec4 cmp_le(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1464
friend F64vec4 operator/(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1436
const union @88 __f64vec2_abs_mask_cheat
M128 operator&(const M128 &a, const M128 &b)
Definition: dvec.h:146
Iu32vec4 & operator&=(const M128 &a)
Definition: dvec.h:348
Iu32vec4 & operator>>=(int count)
Definition: dvec.h:364
I32vec4(int i3, int i2, int i1, int i0)
Definition: dvec.h:235
Iu16vec8 & operator^=(const M128 &a)
Definition: dvec.h:551
I64vec2 operator>>(int count)
Definition: dvec.h:202
__m256 __cdecl _mm256_or_ps(__m256, __m256)
friend F32vec8 simd_min(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1264
__m128d _mm_sub_pd(__m128d _A, __m128d _B)
I128vec1 & operator=(const M128 &a)
Definition: dvec.h:162
Is16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:466
F64vec2 & operator&=(const F64vec2 &a)
Definition: dvec.h:955
__m128i _mm_slli_epi32(__m128i _A, int _Count)
I128vec1 & operator^=(const M128 &a)
Definition: dvec.h:165
__m128i _mm_sub_epi16(__m128i _A, __m128i _B)
void _mm_stream_pd(double *_Dp, __m128d _A)
__m256 __cdecl _mm256_rcp_ps(__m256)
Is16vec8 mul_high(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:523
F32vec8 & operator|=(const F32vec8 &a)
Definition: dvec.h:1192
friend F64vec4 cmp_lt(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1462
F32vec8 select_ngt(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1391
Iu16vec8 & operator>>=(int count)
Definition: dvec.h:565
Iu32vec4 & operator^=(const M128 &a)
Definition: dvec.h:350
I64vec2 & operator+=(const I64vec2 &a)
Definition: dvec.h:193
__m128i _mm_subs_epu16(__m128i _A, __m128i _B)
F32vec8 select_neq(const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
Definition: dvec.h:1382
__m256d __cdecl _mm256_set1_pd(double)
__m128i _mm_sub_epi8(__m128i _A, __m128i _B)
F64vec4(__m256d m)
Definition: dvec.h:1413
#define IVEC128_LOGICALS(vect, element)
Definition: dvec.h:817
Is32vec4 & operator>>=(int count)
Definition: dvec.h:294
friend F32vec8 abs(const F32vec8 &a)
Definition: dvec.h:1270
__m256 __cdecl _mm256_max_ps(__m256, __m256)
__m128i _mm_unpacklo_epi64(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
const unsigned int & operator[](int i) const
Definition: dvec.h:379
Is32vec4 operator>>(int count)
Definition: dvec.h:292
friend F64vec2 operator^(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:943
__m256 __cdecl _mm256_set_ps(float, float, float, float, float, float, float, float)
Is16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:477
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
Iu32vec4 & operator|=(const M128 &a)
Definition: dvec.h:349
__m128i _mm_srai_epi16(__m128i _A, int _Count)
I64vec2 & operator-=(const I64vec2 &a)
Definition: dvec.h:194
friend F32vec8 cmp_lt(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1244
__m128i _mm_subs_epi8(__m128i _A, __m128i _B)
F64vec4 & operator/=(const F64vec4 &a)
Definition: dvec.h:1441
Is16vec8 & operator^=(const M128 &a)
Definition: dvec.h:462
friend F64vec2 operator+(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:946
F64vec2 & operator-=(const F64vec2 &a)
Definition: dvec.h:952
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
I32vec4 operator<<(int count)
Definition: dvec.h:251
friend F32vec8 rcp(const F32vec8 &a)
Definition: dvec.h:1210
int move_mask(const F64vec2 &a)
Definition: dvec.h:1063
__m256d __cdecl _mm256_xor_pd(__m256d, __m256d)
I32vec4 cmpeq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:257
__m128d _mm_mul_pd(__m128d _A, __m128d _B)
friend F64vec2 andnot(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:967
__m256d __cdecl _mm256_add_pd(__m256d, __m256d)
Iu32vec4(__m128i mm)
Definition: dvec.h:340
friend F32vec8 cmp_ngt(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1258
float _mm_cvtss_f32(__m128 _A)
I8vec16(char s15, char s14, char s13, char s12, char s11, char s10, char s9, char s8, char s7, char s6, char s5, char s4, char s3, char s2, char s1, char s0)
Definition: dvec.h:622
#define _CMP_LE_OS
Definition: immintrin.h:57
__m128i _mm_add_epi16(__m128i _A, __m128i _B)
unsigned int & operator[](int i)
Definition: dvec.h:386
I8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:638
Iu8vec16 packu_sat(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:814
friend F64vec4 cmp_nlt(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1472
friend F32vec8 cmp_neq(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1252
I128vec1()
Definition: dvec.h:159
Iu32vec4(unsigned int ui3, unsigned int ui2, unsigned int ui1, unsigned int ui0)
Definition: dvec.h:341
__m128i _mm_set_epi8(char _B15, char _B14, char _B13, char _B12, char _B11, char _B10, char _B9, char _B8, char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0)
Iu8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:753
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
I32vec4 cmpneq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:258
Iu16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:553
friend F64vec4 cmp_ngt(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1476
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
__m128i _mm_adds_epi16(__m128i _A, __m128i _B)
__m128 __cdecl _mm256_cvtpd_ps(__m256d)
__m128d _mm_cvtps_pd(__m128 _A)
Is8vec16()
Definition: dvec.h:655
F64vec4 F32vec4ToF64vec4(const F32vec4 &a)
Definition: dvec.h:1613
friend F64vec4 simd_min(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1482
__m128d _mm_andnot_pd(__m128d _A, __m128d _B)
__m128i _mm_sub_epi64(__m128i _A, __m128i _B)
Is32vec4 operator<<(int count)
Definition: dvec.h:287
#define _MM_4UDW(element, vector)
Definition: dvec.h:96
__m128i _mm_packus_epi16(__m128i _A, __m128i _B)
I8vec16 & operator&=(const M128 &a)
Definition: dvec.h:633
Is32vec4(__m128i mm)
Definition: dvec.h:270
Iu16vec8 operator<<(int count)
Definition: dvec.h:559
__m128i _mm_mulhi_epu16(__m128i _A, __m128i _B)
M128 & operator|=(const M128 &a)
Definition: dvec.h:141
F32vec8 & operator&=(const F32vec8 &a)
Definition: dvec.h:1191
signed char & operator[](int i)
Definition: dvec.h:707
F32vec8()
Definition: dvec.h:1143
void __cdecl _mm256_maskstore_ps(float *, __m256i, __m256)
Is32vec4 cmplt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:327
Is32vec4 operator>>(const M128 &a)
Definition: dvec.h:291
Is16vec8 & operator<<=(int count)
Definition: dvec.h:473
Is16vec8 sat_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:526
friend F32vec8 cmp_gt(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1248
Iu16vec8(unsigned short s7, unsigned short s6, unsigned short s5, unsigned short s4, unsigned short s3, unsigned short s2, unsigned short s1, unsigned short s0)
Definition: dvec.h:541
Is32vec4(int i3, int i2, int i1, int i0)
Definition: dvec.h:271
Is16vec8 operator>>(const M128 &a)
Definition: dvec.h:475
__m128i _mm_srl_epi64(__m128i _A, __m128i _Count)
__m128d _mm_add_sd(__m128d _A, __m128d _B)
friend F32vec8 operator^(const F32vec8 &a, const F32vec8 &b)
Definition: dvec.h:1179
__m128i _mm_and_si128(__m128i _A, __m128i _B)
void __cdecl _mm256_stream_pd(double *, __m256d)
__m128i _mm_madd_epi16(__m128i _A, __m128i _B)
__m128d _mm_sqrt_pd(__m128d _A)
__m128 __cdecl _mm256_extractf128_ps(__m256, const int)
friend F64vec4 cmp_neq(const F64vec4 &a, const F64vec4 &b)
Definition: dvec.h:1470
friend F64vec2 operator|(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:942
__m128i _mm_subs_epi16(__m128i _A, __m128i _B)
I64vec2 unpack_low(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:224
Iu32vec4 operator<<(const M128 &a)
Definition: dvec.h:357
__m128d __cdecl _mm256_extractf128_pd(__m256d, const int)
friend F64vec2 operator-(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:947
friend double add_horizontal(const F64vec2 &a)
Definition: dvec.h:960
I16vec8 & operator=(const M128 &a)
Definition: dvec.h:414
friend F32vec8 rcp_nr(const F32vec8 &a)
Definition: dvec.h:1219
__m256 __cdecl _mm256_unpacklo_ps(__m256, __m256)
Iu16vec8 simd_avg(const Iu16vec8 &a, const Iu16vec8 &b)
Definition: dvec.h:611
I16vec8 & operator|=(const M128 &a)
Definition: dvec.h:418
Iu16vec8 operator<<(const M128 &a)
Definition: dvec.h:558