16 #if !defined(_M_IX86) && !defined(_M_X64)
17 #error This header is specific to X86 and X64 targets
24 #if defined (_M_CEE_PURE)
25 #error ERROR: Intel Architecture intrinsic functions not supported in the pure mode!
47 __int16 m256i_i16[16];
50 unsigned __int8 m256i_u8[32];
51 unsigned __int16 m256i_u16[16];
52 unsigned __int32 m256i_u32[8];
53 unsigned __int64 m256i_u64[4];
60 #define _CMP_EQ_OQ 0x00
61 #define _CMP_LT_OS 0x01
62 #define _CMP_LE_OS 0x02
63 #define _CMP_UNORD_Q 0x03
64 #define _CMP_NEQ_UQ 0x04
65 #define _CMP_NLT_US 0x05
66 #define _CMP_NLE_US 0x06
68 #define _CMP_ORD_Q 0x07
69 #define _CMP_EQ_UQ 0x08
70 #define _CMP_NGE_US 0x09
72 #define _CMP_NGT_US 0x0A
73 #define _CMP_FALSE_OQ 0x0B
74 #define _CMP_NEQ_OQ 0x0C
75 #define _CMP_GE_OS 0x0D
76 #define _CMP_GT_OS 0x0E
77 #define _CMP_TRUE_UQ 0x0F
78 #define _CMP_EQ_OS 0x10
79 #define _CMP_LT_OQ 0x11
80 #define _CMP_LE_OQ 0x12
81 #define _CMP_UNORD_S 0x13
82 #define _CMP_NEQ_US 0x14
83 #define _CMP_NLT_UQ 0x15
84 #define _CMP_NLE_UQ 0x16
86 #define _CMP_ORD_S 0x17
87 #define _CMP_EQ_US 0x18
88 #define _CMP_NGE_UQ 0x19
90 #define _CMP_NGT_UQ 0x1A
91 #define _CMP_FALSE_OS 0x1B
92 #define _CMP_NEQ_OS 0x1C
93 #define _CMP_GE_OQ 0x1D
95 #define _CMP_GT_OQ 0x1E
96 #define _CMP_TRUE_US 0x1F
776 #define _mm256_loadu2_m128( hiaddr, \
778 _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
780 #define _mm256_loadu2_m128d( hiaddr, \
782 _mm256_set_m128d(_mm_loadu_pd(hiaddr), _mm_loadu_pd(loaddr))
784 #define _mm256_loadu2_m128i( hiaddr, \
786 _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
794 #define _mm256_storeu2_m128( hiaddr, loaddr, \
798 _mm_storeu_ps((loaddr), _mm256_castps256_ps128(_a)); \
799 _mm_storeu_ps((hiaddr), _mm256_extractf128_ps(_a, 0x1)); \
802 #define _mm256_storeu2_m128d( hiaddr, loaddr, \
806 _mm_storeu_pd((loaddr), _mm256_castpd256_pd128(_a)); \
807 _mm_storeu_pd((hiaddr), _mm256_extractf128_pd(_a, 0x1)); \
810 #define _mm256_storeu2_m128i( hiaddr, loaddr, \
814 _mm_storeu_si128((loaddr), _mm256_castsi256_si128(_a)); \
815 _mm_storeu_si128((hiaddr), _mm256_extractf128_si256(_a, 0x1)); \
975 #define _mm256_ceil_pd(val) _mm256_round_pd((val), _MM_FROUND_CEIL)
976 #define _mm256_floor_pd(val) _mm256_round_pd((val), _MM_FROUND_FLOOR)
990 #define _mm256_ceil_ps(val) _mm256_round_ps((val), _MM_FROUND_CEIL)
991 #define _mm256_floor_ps(val) _mm256_round_ps((val), _MM_FROUND_FLOOR)
1034 #define _mm256_test_all_zeros(mask, val) \
1035 _mm256_testz_si256((mask), (val))
1038 #define _mm256_test_all_ones(val) \
1039 _mm256_testc_si256((val), _mm256_cmpeq_epi32((val),(val)))
1042 #define _mm256_test_mix_ones_zeros(mask, val) \
1043 _mm256_testnzc_si256((mask), (val))
1111 float,
float,
float,
float);
1113 char,
char,
char,
char,
1114 char,
char,
char,
char,
1115 char,
char,
char,
char,
1116 char,
char,
char,
char,
1117 char,
char,
char,
char,
1118 char,
char,
char,
char,
1119 char,
char,
char,
char);
1121 short,
short,
short,
short,
1122 short,
short,
short,
short,
1123 short,
short,
short,
short);
1125 int,
int,
int,
int);
1129 #define _mm256_set_m128( hi, lo) \
1130 _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
1132 #define _mm256_set_m128d( hi, lo) \
1133 _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), (hi), 0x1)
1135 #define _mm256_set_m128i( hi, lo) \
1136 _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
1140 float,
float,
float,
float);
1142 char,
char,
char,
char,
1143 char,
char,
char,
char,
1144 char,
char,
char,
char,
1145 char,
char,
char,
char,
1146 char,
char,
char,
char,
1147 char,
char,
char,
char,
1148 char,
char,
char,
char);
1150 short,
short,
short,
short,
1151 short,
short,
short,
short,
1152 short,
short,
short,
short);
1154 int,
int,
int,
int);
1157 #define _mm256_setr_m128(lo, hi) _mm256_set_m128((hi), (lo))
1158 #define _mm256_setr_m128d(lo, hi) _mm256_set_m128d((hi), (lo))
1159 #define _mm256_setr_m128i(lo, hi) _mm256_set_m128i((hi), (lo))
1205 #define _mm_undefined_ps _mm_setzero_ps
1206 #define _mm_undefined_pd _mm_setzero_pd
1207 #define _mm_undefined_si128 _mm_setzero_si128
1208 #define _mm256_undefined_ps _mm256_setzero_ps
1209 #define _mm256_undefined_pd _mm256_setzero_pd
1210 #define _mm256_undefined_si256 _mm256_setzero_si256
1216 #define _XCR_XFEATURE_ENABLED_MASK 0
1219 extern unsigned __int64 __cdecl
_xgetbv(
unsigned int);
1222 extern void __cdecl
_xsetbv(
unsigned int,
unsigned __int64);
1229 extern void __cdecl
_xsave(
void *,
unsigned __int64);
1230 #if defined (_M_X64)
1231 extern void __cdecl _xsave64(
void *,
unsigned __int64);
1239 extern void __cdecl
_xsaveopt(
void *,
unsigned __int64);
1240 #if defined (_M_X64)
1241 extern void __cdecl _xsaveopt64(
void *,
unsigned __int64);
1248 extern void __cdecl
_xsavec(
void *,
unsigned __int64);
1249 #if defined (_M_X64)
1250 extern void __cdecl _xsavec64(
void *,
unsigned __int64);
1258 extern void __cdecl
_xrstor(
void const *,
unsigned __int64);
1259 #if defined (_M_X64)
1260 extern void __cdecl _xrstor64(
void const *,
unsigned __int64);
1268 extern void __cdecl
_xsaves(
void *,
unsigned __int64);
1269 #if defined (_M_X64)
1270 extern void __cdecl _xsaves64(
void *,
unsigned __int64);
1278 extern void __cdecl
_xrstors(
void const *,
unsigned __int64);
1279 #if defined (_M_X64)
1280 extern void __cdecl _xrstors64(
void const *,
unsigned __int64);
1287 extern void __cdecl
_fxsave(
void *);
1288 #if defined (_M_X64)
1289 extern void __cdecl _fxsave64(
void *);
1296 extern void __cdecl
_fxrstor(
void const *);
1297 #if defined (_M_X64)
1298 extern void __cdecl _fxrstor64(
void const *);
1309 #if defined (_M_X64)
1310 extern int __cdecl _rdrand64_step(
unsigned __int64 *);
1313 #if defined (_M_X64)
1317 extern unsigned int __cdecl _readfsbase_u32();
1318 extern unsigned int __cdecl _readgsbase_u32();
1319 extern unsigned __int64 __cdecl _readfsbase_u64();
1320 extern unsigned __int64 __cdecl _readgsbase_u64();
1325 extern void __cdecl _writefsbase_u32(
unsigned int);
1326 extern void __cdecl _writegsbase_u32(
unsigned int);
1327 extern void __cdecl _writefsbase_u64(
unsigned __int64);
1328 extern void __cdecl _writegsbase_u64(
unsigned __int64);
1480 #define _mm256_bslli_epi128 _mm256_slli_si256
1482 #define _mm256_bsrli_epi128 _mm256_srli_si256
1805 extern unsigned int _bextr_u32(
unsigned int ,
1808 extern unsigned int _blsi_u32(
unsigned int);
1810 extern unsigned int _blsr_u32(
unsigned int);
1811 extern unsigned int _bzhi_u32(
unsigned int ,
1813 extern unsigned int _mulx_u32(
unsigned int ,
1816 extern unsigned int _pdep_u32(
unsigned int ,
1818 extern unsigned int _pext_u32(
unsigned int ,
1820 extern unsigned int _rorx_u32(
unsigned int ,
1821 const unsigned int );
1824 extern unsigned int _shlx_u32(
unsigned int ,
1826 extern unsigned int _shrx_u32(
unsigned int ,
1829 #if defined (_M_X64)
1830 extern unsigned __int64 _bextr_u64(
unsigned __int64 ,
1833 extern unsigned __int64 _blsi_u64(
unsigned __int64);
1834 extern unsigned __int64 _blsmsk_u64(
unsigned __int64);
1835 extern unsigned __int64 _blsr_u64(
unsigned __int64);
1836 extern unsigned __int64 _bzhi_u64(
unsigned __int64 ,
1838 extern unsigned __int64 _mulx_u64(
unsigned __int64 ,
1840 unsigned __int64 * );
1841 extern unsigned __int64 _pdep_u64(
unsigned __int64 ,
1843 extern unsigned __int64 _pext_u64(
unsigned __int64 ,
1845 extern unsigned __int64 _rorx_u64(
unsigned __int64 ,
1846 const unsigned int );
1847 extern __int64 _sarx_i64(__int64 ,
1849 extern unsigned __int64 _shlx_u64(
unsigned __int64 ,
1851 extern unsigned __int64 _shrx_u64(
unsigned __int64 ,
1862 extern unsigned int _lzcnt_u32(
unsigned int);
1863 #if defined (_M_X64)
1864 extern unsigned __int64 _lzcnt_u64(
unsigned __int64);
1874 extern unsigned int _tzcnt_u32(
unsigned int);
1875 #if defined (_M_X64)
1876 extern unsigned __int64 _tzcnt_u64(
unsigned __int64);
1884 extern void __cdecl
_invpcid(
unsigned int ,
void * );
1918 extern void _Store64_HLERelease(__int64
volatile *,__int64);
1919 extern __int64 _InterlockedExchange64_HLEAcquire(__int64
volatile *,__int64);
1920 extern __int64 _InterlockedExchange64_HLERelease(__int64
volatile *,__int64);
1922 extern __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64
volatile *,__int64);
1923 extern __int64 _InterlockedExchangeAdd64_HLERelease(__int64
volatile *,__int64);
1925 extern __int64 _InterlockedAnd64_HLEAcquire(__int64
volatile *,__int64);
1926 extern __int64 _InterlockedAnd64_HLERelease(__int64
volatile *,__int64);
1927 extern __int64 _InterlockedOr64_HLEAcquire(__int64
volatile *,__int64);
1928 extern __int64 _InterlockedOr64_HLERelease(__int64
volatile *,__int64);
1929 extern __int64 _InterlockedXor64_HLEAcquire(__int64
volatile *,__int64);
1930 extern __int64 _InterlockedXor64_HLERelease(__int64
volatile *,__int64);
1932 extern unsigned char _interlockedbittestandset64_HLEAcquire(__int64 *,__int64);
1933 extern unsigned char _interlockedbittestandset64_HLERelease(__int64 *,__int64);
1934 extern unsigned char _interlockedbittestandreset64_HLEAcquire(__int64 *,__int64);
1935 extern unsigned char _interlockedbittestandreset64_HLERelease(__int64 *,__int64);
1939 #define _XBEGIN_STARTED (~0u)
1940 #define _XABORT_EXPLICIT (1 << 0)
1941 #define _XABORT_RETRY (1 << 1)
1942 #define _XABORT_CONFLICT (1 << 2)
1943 #define _XABORT_CAPACITY (1 << 3)
1944 #define _XABORT_DEBUG (1 << 4)
1945 #define _XABORT_NESTED (1 << 5)
1946 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1948 extern unsigned int __cdecl
_xbegin(
void);
1949 extern void __cdecl
_xend(
void);
1950 extern void __cdecl
_xabort(
const unsigned int);
1951 extern unsigned char __cdecl
_xtest(
void);
1963 extern int __cdecl _rdseed64_step(
unsigned __int64 *);
1982 extern unsigned char __cdecl _addcarryx_u64(
unsigned char ,
1985 unsigned __int64 * );
1992 extern unsigned short __cdecl
_load_be_u16(
void const*);
1994 extern unsigned __int64 __cdecl
_load_be_u64(
void const*);
1995 #define _loadbe_i16(be_ptr) ((short) _load_be_u16(be_ptr))
1996 #define _loadbe_i32(be_ptr) ((int) _load_be_u32(be_ptr))
1997 #define _loadbe_i64(be_ptr) ((__int64)_load_be_u64(be_ptr))
2004 extern void __cdecl
_store_be_u64(
void *,
unsigned __int64);
2005 #define _storebe_i16(be_ptr, val) _store_be_u16(be_ptr, (unsigned short)(val))
2006 #define _storebe_i32(be_ptr, val) _store_be_u32(be_ptr, (unsigned int)(val))
2007 #define _storebe_i64(be_ptr, val) _store_be_u64(be_ptr, (unsigned __int64)(__int64)(val))
2036 #if defined __cplusplus
void __cdecl _mm256_storeu_pd(double *, __m256d)
__m256 __cdecl _mm256_setzero_ps(void)
__m256i __cdecl _mm256_set_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short)
__m256i __cdecl _mm256_i32gather_epi64(__int64 const *, __m128i, const int)
unsigned int _blsmsk_u32(unsigned int)
__m256d __cdecl _mm256_sub_pd(__m256d, __m256d)
int __cdecl _mm_testz_pd(__m128d, __m128d)
__m128i __cdecl _mm_broadcastw_epi16(__m128i)
void __cdecl _mm256_stream_si256(__m256i *, __m256i)
void __cdecl _store_be_u32(void *, unsigned int)
__m256i __cdecl _mm256_add_epi64(__m256i, __m256i)
__m128i __cdecl _mm_broadcastd_epi32(__m128i)
__m256 __cdecl _mm256_broadcast_ss(float const *)
void __cdecl _bnd_chk_ptr_lbounds(const void *)
void __cdecl _mm256_maskstore_epi64(__int64 *, __m256i, __m256i)
__m128d __cdecl _mm_permutevar_pd(__m128d, __m128i)
__m256d __cdecl _mm256_div_pd(__m256d, __m256d)
int __cdecl _mm256_testnzc_si256(__m256i, __m256i)
__m256i __cdecl _mm256_srli_epi32(__m256i, int)
long _InterlockedExchange_HLEAcquire(long volatile *, long)
__m128i __cdecl _mm_broadcastq_epi64(__m128i)
__m256i __cdecl _mm256_sll_epi16(__m256i, __m128i)
unsigned int __cdecl _load_be_u32(void const *)
__m256i __cdecl _mm256_srav_epi32(__m256i, __m256i)
__m256 __cdecl _mm256_div_ps(__m256, __m256)
void __cdecl _xabort(const unsigned int)
long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_blend_pd(__m256d, __m256d, const int)
__m256 __cdecl _mm256_rsqrt_ps(__m256)
__m256d __cdecl _mm256_cvtps_pd(__m128)
__m256i __cdecl _mm256_load_si256(__m256i const *)
__m256i __cdecl _mm256_sllv_epi64(__m256i, __m256i)
__m256 __cdecl _mm256_i32gather_ps(float const *, __m256i, const int)
__m256i __cdecl _mm256_subs_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_unpackhi_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_sub_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_andnot_pd(__m256d, __m256d)
void __cdecl _xsave(void *, unsigned __int64)
__m256i __cdecl _mm256_sign_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_round_pd(__m256d, int)
__m256i __cdecl _mm256_mulhrs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_shufflelo_epi16(__m256i, const int)
__m128 __cdecl _mm_cmp_ps(__m128, __m128, const int)
__m256d __cdecl _mm256_fmsubadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_min_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_srli_epi64(__m256i, int)
__m128 __cdecl _mm_fmadd_ss(__m128, __m128, __m128)
__m256i __cdecl _mm256_max_epi8(__m256i, __m256i)
void *__cdecl _bnd_load_ptr_bounds(const void **, const void *)
__m128 __cdecl _mm256_i64gather_ps(float const *, __m256i, const int)
__m256i __cdecl _mm256_max_epi32(__m256i, __m256i)
__m256d __cdecl _mm256_set_pd(double, double, double, double)
__m256 __cdecl _mm256_loadu_ps(float const *)
void __cdecl _mm256_storeu_ps(float *, __m256)
__m256d __cdecl _mm256_load_pd(double const *)
__m256i __cdecl _mm256_min_epu32(__m256i, __m256i)
__m256d __cdecl _mm256_and_pd(__m256d, __m256d)
int __cdecl _rdseed32_step(unsigned int *)
__m256i __cdecl _mm256_mullo_epi32(__m256i, __m256i)
void __cdecl _mm_maskstore_ps(float *, __m128i, __m128)
__m256i __cdecl _mm256_shuffle_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_insertf128_ps(__m256, __m128, int)
__m256i __cdecl _mm256_broadcastb_epi8(__m128i)
__m256i __cdecl _mm256_cmpgt_epi32(__m256i, __m256i)
__m128i __cdecl _mm_i32gather_epi32(int const *, __m128i, const int)
__m256i __cdecl _mm256_cmpgt_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short)
__m256d __cdecl _mm256_castps_pd(__m256)
unsigned int _blsr_u32(unsigned int)
__m256 __cdecl _mm256_sqrt_ps(__m256)
__m256d __cdecl _mm256_movedup_pd(__m256d)
__m128d __cdecl _mm_fnmsub_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_unpacklo_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_fnmadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_permute2f128_si256(__m256i, __m256i, int)
__m128i __cdecl _mm_maskload_epi64(__int64 const *, __m128i)
long _InterlockedXor_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_fnmsub_pd(__m256d, __m256d, __m256d)
__m256 __cdecl _mm256_xor_ps(__m256, __m256)
__m256i __cdecl _mm256_max_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_inserti128_si256(__m256i, __m128i, const int)
long _InterlockedXor_HLERelease(long volatile *, long)
__m128i __cdecl _mm_maskload_epi32(int const *, __m128i)
__m128d __cdecl _mm_cmp_pd(__m128d, __m128d, const int)
__m256i __cdecl _mm256_avg_epu8(__m256i, __m256i)
const void *__cdecl _bnd_get_ptr_lbound(const void *)
unsigned int _rorx_u32(unsigned int, const unsigned int)
__m128d
Definition: emmintrin.h:57
__m256 __cdecl _mm256_load_ps(float const *)
__m256i __cdecl _mm256_add_epi32(__m256i, __m256i)
__m128 __cdecl _mm256_mask_i64gather_ps(__m128, float const *, __m256i, __m128, const int)
__m256i __cdecl _mm256_avg_epu16(__m256i, __m256i)
__m128i __cdecl _mm_sha256msg2_epu32(__m128i, __m128i)
unsigned char _interlockedbittestandreset_HLERelease(long *, long)
__m256d
Definition: immintrin.h:43
void __cdecl _mm256_stream_ps(float *, __m256)
__m256d __cdecl _mm256_loadu_pd(double const *)
void * align(size_t _Bound, size_t _Size, void *&_Ptr, size_t &_Space) _NOEXCEPT
Definition: memory:1985
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long)
unsigned char __cdecl _addcarryx_u32(unsigned char, unsigned int, unsigned int, unsigned int *)
__m256i __cdecl _mm256_max_epu32(__m256i, __m256i)
unsigned int _mulx_u32(unsigned int, unsigned int, unsigned int *)
__m128 __cdecl _mm_maskload_ps(float const *, __m128i)
__m256i __cdecl _mm256_srli_epi16(__m256i, int)
__m128 __cdecl _mm_permutevar_ps(__m128, __m128i)
__m256 __cdecl _mm256_permutevar_ps(__m256, __m256i)
void __cdecl _xrstors(void const *, unsigned __int64)
__m256 __cdecl _mm256_castps128_ps256(__m128)
long _InterlockedOr_HLERelease(long volatile *, long)
void __cdecl _mm256_store_pd(double *, __m256d)
int __cdecl _mm_comi_sd(__m128d, __m128d, const int)
__m256 __cdecl _mm256_fmsubadd_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_permute_ps(__m256, int)
__m128d __cdecl _mm_fmsub_pd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_setzero_si256(void)
void * _InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *)
__m256 __cdecl _mm256_add_ps(__m256, __m256)
unsigned short __cdecl _load_be_u16(void const *)
__m256d __cdecl _mm256_permute_pd(__m256d, int)
int __cdecl _mm256_testnzc_pd(__m256d, __m256d)
unsigned int _shlx_u32(unsigned int, unsigned int)
__m256i __cdecl _mm256_cmpeq_epi64(__m256i, __m256i)
__m256d __cdecl _mm256_shuffle_pd(__m256d, __m256d, const int)
__m256i __cdecl _mm256_madd_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_set1_epi64x(long long)
__m256i __cdecl _mm256_packs_epi16(__m256i, __m256i)
__m128d __cdecl _mm_fmadd_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_blend_epi16(__m256i, __m256i, const int)
__m256d __cdecl _mm256_i64gather_pd(double const *, __m256i, const int)
__m256d __cdecl _mm256_permute2f128_pd(__m256d, __m256d, int)
__m256i __cdecl _mm256_adds_epu16(__m256i, __m256i)
__m256i __cdecl _mm256_maskload_epi32(int const *, __m256i)
__m256d __cdecl _mm256_mul_pd(__m256d, __m256d)
unsigned char __cdecl _xtest(void)
__m256d __cdecl _mm256_broadcast_sd(double const *)
int __cdecl _mm256_testc_pd(__m256d, __m256d)
__m256i __cdecl _mm256_sll_epi32(__m256i, __m128i)
__m256i __cdecl _mm256_set1_epi8(char)
void _StorePointer_HLERelease(void *volatile *, void *)
__m256i __cdecl _mm256_hsubs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_blend_epi32(__m256i, __m256i, const int)
__m256i __cdecl _mm256_alignr_epi8(__m256i, __m256i, const int)
__m256i __cdecl _mm256_mask_i64gather_epi64(__m256i, __int64 const *, __m256i, __m256i, const int)
__m256i __cdecl _mm256_unpackhi_epi16(__m256i, __m256i)
long _InterlockedExchangeAdd_HLERelease(long volatile *, long)
__m256 __cdecl _mm256_fmaddsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_cvtepi32_epi64(__m128i)
__m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epu8(__m256i, __m256i)
__m256 __cdecl _mm256_castpd_ps(__m256d)
void __cdecl _mm256_maskstore_epi32(int *, __m256i, __m256i)
__m256i __cdecl _mm256_set_epi64x(__int64, __int64, __int64, __int64)
void *__cdecl _bnd_set_ptr_bounds(const void *, size_t)
__m256d __cdecl _mm256_fmsub_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_castpd_si256(__m256d)
__m256i __cdecl _mm256_packs_epi32(__m256i, __m256i)
__m128 __cdecl _mm_fmsub_ss(__m128, __m128, __m128)
__m256i __cdecl _mm256_abs_epi32(__m256i)
__m128d __cdecl _mm_fmaddsub_pd(__m128d, __m128d, __m128d)
unsigned __int64 __cdecl _xgetbv(unsigned int)
__m256i __cdecl _mm256_cvtepu8_epi32(__m128i)
int __cdecl _rdrand16_step(unsigned short *)
__m256d __cdecl _mm256_cvtepi32_pd(__m128i)
__m128 __cdecl _mm_cvtph_ps(__m128i)
__m256i __cdecl _mm256_unpacklo_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi16_epi32(__m128i)
__m128d __cdecl _mm_broadcastsd_pd(__m128d)
__m256d __cdecl _mm256_sqrt_pd(__m256d)
__m256i __cdecl _mm256_max_epu16(__m256i, __m256i)
__m256i __cdecl _mm256_mask_i32gather_epi64(__m256i, __int64 const *, __m128i, __m256i, const int)
__m128 __cdecl _mm_fnmsub_ps(__m128, __m128, __m128)
int __cdecl _mm_testnzc_pd(__m128d, __m128d)
__m256 __cdecl _mm256_hsub_ps(__m256, __m256)
__m128i __cdecl _mm_sha1msg2_epu32(__m128i, __m128i)
void * _InterlockedExchangePointer_HLERelease(void *volatile *, void *)
__m256 __cdecl _mm256_sub_ps(__m256, __m256)
void __cdecl _bnd_chk_ptr_ubounds(const void *)
__m128i __cdecl _mm_srlv_epi32(__m128i, __m128i)
__m256i __cdecl _mm256_abs_epi16(__m256i)
int __cdecl _mm_testc_pd(__m128d, __m128d)
__m256i __cdecl _mm256_sign_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi32(int, int, int, int, int, int, int, int)
__m256i __cdecl _mm256_broadcastsi128_si256(__m128i)
__m256 __cdecl _mm256_moveldup_ps(__m256)
unsigned char _interlockedbittestandreset_HLEAcquire(long *, long)
__m128i __cdecl _mm_srav_epi32(__m128i, __m128i)
__m256i __cdecl _mm256_packus_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_mask_i64gather_pd(__m256d, double const *, __m256i, __m256d, const int)
__m256 __cdecl _mm256_and_ps(__m256, __m256)
__m256d __cdecl _mm256_addsub_pd(__m256d, __m256d)
__m128i __cdecl _mm_i64gather_epi64(__int64 const *, __m128i, const int)
__m256i __cdecl _mm256_sub_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_and_si256(__m256i, __m256i)
unsigned char _interlockedbittestandset_HLEAcquire(long *, long)
__m256i __cdecl _mm256_unpacklo_epi64(__m256i, __m256i)
__m128 __cdecl _mm_cmp_ss(__m128, __m128, const int)
__m256i __cdecl _mm256_castsi128_si256(__m128i)
__m128 __cdecl _mm_i32gather_ps(float const *, __m128i, const int)
__m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_abs_epi8(__m256i)
__m256i __cdecl _mm256_mul_epi32(__m256i, __m256i)
union __declspec(intrin_type) __declspec(align(32)) __m256
Definition: immintrin.h:37
__m256i __cdecl _mm256_sign_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_cvtph_ps(__m128i)
__m128i __cdecl _mm_sllv_epi64(__m128i, __m128i)
unsigned __int64 __cdecl _load_be_u64(void const *)
int __cdecl _mm256_movemask_pd(__m256d)
__m128i __cdecl _mm_i64gather_epi32(int const *, __m128i, const int)
__m256 __cdecl _mm256_castsi256_ps(__m256i)
__m256 __cdecl _mm256_movehdup_ps(__m256)
void _Store_HLERelease(long volatile *, long)
__m128 __cdecl _mm_fnmadd_ss(__m128, __m128, __m128)
void __cdecl _mm256_maskstore_pd(double *, __m256i, __m256d)
__m256i __cdecl _mm256_packus_epi32(__m256i, __m256i)
__m128d __cdecl _mm_mask_i32gather_pd(__m128d, double const *, __m128i, __m128d, const int)
__m256i __cdecl _mm256_set1_epi32(int)
__m256d __cdecl _mm256_i32gather_pd(double const *, __m128i, const int)
__m256d __cdecl _mm256_broadcast_pd(__m128d const *)
void *__cdecl _bnd_init_ptr_bounds(const void *)
__m256d __cdecl _mm256_hsub_pd(__m256d, __m256d)
__m256d __cdecl _mm256_unpacklo_pd(__m256d, __m256d)
__m256d __cdecl _mm256_max_pd(__m256d, __m256d)
__m256i __cdecl _mm256_cmpgt_epi64(__m256i, __m256i)
int __cdecl _mm256_movemask_ps(__m256)
void __cdecl _fxrstor(void const *)
__m256 __cdecl _mm256_fnmsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_srai_epi32(__m256i, int)
unsigned int _lzcnt_u32(unsigned int)
__m128i __cdecl _mm256_extractf128_si256(__m256i, const int)
__m256 __cdecl _mm256_dp_ps(__m256, __m256, const int)
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_mask_i32gather_epi32(__m256i, int const *, __m256i, __m256i, const int)
__m256i __cdecl _mm256_subs_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_min_epu16(__m256i, __m256i)
__m256d __cdecl _mm256_unpackhi_pd(__m256d, __m256d)
__m256i __cdecl _mm256_mulhi_epu16(__m256i, __m256i)
__m128 __cdecl _mm256_castps256_ps128(__m256)
__m256i __cdecl _mm256_sll_epi64(__m256i, __m128i)
__m128i __cdecl _mm256_mask_i64gather_epi32(__m128i, int const *, __m256i, __m128i, const int)
__m256i __cdecl _mm256_cmpeq_epi32(__m256i, __m256i)
__m128 __cdecl _mm_broadcastss_ps(__m128)
__m128d __cdecl _mm_maskload_pd(double const *, __m128i)
long _InterlockedAnd_HLERelease(long volatile *, long)
__m256 __cdecl _mm256_mask_i32gather_ps(__m256, float const *, __m256i, __m256, const int)
__m256i __cdecl _mm256_hsub_epi32(__m256i, __m256i)
__m256 __cdecl _mm256_cvtepi32_ps(__m256i)
__m256i __cdecl _mm256_unpackhi_epi64(__m256i, __m256i)
__m256i __cdecl _mm256_hadd_epi16(__m256i, __m256i)
__m256 __cdecl _mm256_hadd_ps(__m256, __m256)
__m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i)
void __cdecl _mm_maskstore_pd(double *, __m128i, __m128d)
__m128d __cdecl _mm_fmsubadd_pd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_cvtepu8_epi16(__m128i)
__m128i __cdecl _mm256_cvtpd_epi32(__m256d)
__m256 __cdecl _mm256_set1_ps(float)
void __cdecl _xrstor(void const *, unsigned __int64)
__m256i __cdecl _mm256_maskload_epi64(__int64 const *, __m256i)
__m256i __cdecl _mm256_srl_epi32(__m256i, __m128i)
__m256i __cdecl _mm256_mpsadbw_epu8(__m256i, __m256i, const int)
__m256 __cdecl _mm256_addsub_ps(__m256, __m256)
__m256d __cdecl _mm256_min_pd(__m256d, __m256d)
__m256i __cdecl _mm256_mulhi_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_fmadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_srlv_epi32(__m256i, __m256i)
__m128 __cdecl _mm_broadcast_ss(float const *)
__m128d __cdecl _mm256_castpd256_pd128(__m256d)
int __cdecl _mm_comi_ss(__m128, __m128, const int)
__m128i
Definition: emmintrin.h:53
__m256
Definition: immintrin.h:39
__m256 __cdecl _mm256_setr_ps(float, float, float, float, float, float, float, float)
__m128i __cdecl _mm_broadcastb_epi8(__m128i)
__m128i __cdecl _mm_sha256rnds2_epu32(__m128i, __m128i, __m128i)
__m256i __cdecl _mm256_mul_epu32(__m256i, __m256i)
__m128d __cdecl _mm_fmadd_pd(__m128d, __m128d, __m128d)
int __cdecl _mm256_testz_si256(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char)
__m128 __cdecl _mm_i64gather_ps(float const *, __m128i, const int)
void __cdecl _xsaveopt(void *, unsigned __int64)
__m256i __cdecl _mm256_broadcastw_epi16(__m128i)
__m256d __cdecl _mm256_permute4x64_pd(__m256d, const int)
__m256d __cdecl _mm256_setr_pd(double, double, double, double)
int __cdecl _mm256_testc_ps(__m256, __m256)
__m128
Definition: xmmintrin.h:75
void __cdecl _mm256_zeroall(void)
void __cdecl _mm256_store_ps(float *, __m256)
unsigned char _interlockedbittestandset_HLERelease(long *, long)
unsigned int _shrx_u32(unsigned int, unsigned int)
void __cdecl _mm256_storeu_si256(__m256i *, __m256i)
__m128 __cdecl _mm_fmsub_ps(__m128, __m128, __m128)
__m128i __cdecl _mm_sha1rnds4_epu32(__m128i, __m128i, const int)
__m128i __cdecl _mm_sllv_epi32(__m128i, __m128i)
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, __int64)
__m256i __cdecl _mm256_min_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi8_epi16(__m128i)
__m256i __cdecl _mm256_permute2x128_si256(__m256i, __m256i, const int)
__m256i __cdecl _mm256_stream_load_si256(__m256i const *)
__m256i __cdecl _mm256_or_si256(__m256i, __m256i)
__m256i __cdecl _mm256_add_epi8(__m256i, __m256i)
int __cdecl _mm_testz_ps(__m128, __m128)
__m256 __cdecl _mm256_mul_ps(__m256, __m256)
__m256i __cdecl _mm256_add_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi16_epi64(__m128i)
__m256i __cdecl _mm256_insertf128_si256(__m256i, __m128i, int)
int __cdecl _mm_testnzc_ps(__m128, __m128)
__m256i __cdecl _mm256_unpacklo_epi8(__m256i, __m256i)
void __cdecl _invpcid(unsigned int, void *)
__m256 __cdecl _mm256_fnmadd_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_cvtepi8_epi64(__m128i)
__m256i __cdecl _mm256_andnot_si256(__m256i, __m256i)
void __cdecl _mm256_zeroupper(void)
void __cdecl _store_be_u16(void *, unsigned short)
__m256i __cdecl _mm256_slli_epi64(__m256i, int)
__m128i __cdecl _mm256_extracti128_si256(__m256i, const int)
unsigned int __cdecl _xbegin(void)
__m256i __cdecl _mm256_hsub_epi16(__m256i, __m256i)
int __cdecl _rdseed16_step(unsigned short *)
__m128i __cdecl _mm256_cvtps_ph(__m256, int)
__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, __int64)
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int)
__m128d __cdecl _mm_i32gather_pd(double const *, __m128i, const int)
__m128i __cdecl _mm256_castsi256_si128(__m256i)
__m256 __cdecl _mm256_min_ps(__m256, __m256)
__m256i __cdecl _mm256_slli_si256(__m256i, const int)
__m128d __cdecl _mm_mask_i64gather_pd(__m128d, double const *, __m128i, __m128d, const int)
__m128i __cdecl _mm_sha1nexte_epu32(__m128i, __m128i)
__m128 __cdecl _mm_fnmadd_ps(__m128, __m128, __m128)
__m256 __cdecl _mm256_andnot_ps(__m256, __m256)
void __cdecl _fxsave(void *)
__m256i __cdecl _mm256_set1_epi16(short)
__m256i __cdecl _mm256_slli_epi16(__m256i, int)
int _sarx_i32(int, unsigned int)
__m256 __cdecl _mm256_blend_ps(__m256, __m256, const int)
unsigned int _tzcnt_u32(unsigned int)
__m256 __cdecl _mm256_unpackhi_ps(__m256, __m256)
__m256i __cdecl _mm256_broadcastd_epi32(__m128i)
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long)
__m256 __cdecl _mm256_round_ps(__m256, int)
__m256 __cdecl _mm256_permute2f128_ps(__m256, __m256, int)
unsigned int _pext_u32(unsigned int, unsigned int)
__m256d __cdecl _mm256_insertf128_pd(__m256d, __m128d, int)
void __cdecl _bnd_chk_ptr_bounds(const void *, size_t)
__m256i __cdecl _mm256_castps_si256(__m256)
__m256i __cdecl _mm256_xor_si256(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepu16_epi32(__m128i)
__m256d __cdecl _mm256_maskload_pd(double const *, __m256i)
unsigned int _pdep_u32(unsigned int, unsigned int)
__m256d __cdecl _mm256_castsi256_pd(__m256i)
void __cdecl _store_be_u64(void *, unsigned __int64)
__m256i __cdecl _mm256_sra_epi32(__m256i, __m128i)
void * _InterlockedExchangePointer_HLEAcquire(void *volatile *, void *)
__m256 __cdecl _mm256_maskload_ps(float const *, __m256i)
int __cdecl _mm256_testz_pd(__m256d, __m256d)
void *__cdecl _bnd_copy_ptr_bounds(const void *, const void *)
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_shuffle_epi32(__m256i, const int)
__m128i __cdecl _mm_mask_i32gather_epi32(__m128i, int const *, __m128i, __m128i, const int)
__m256d __cdecl _mm256_or_pd(__m256d, __m256d)
__m128i __cdecl _mm_mask_i64gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int)
__m256i __cdecl _mm256_srl_epi16(__m256i, __m128i)
void __cdecl _bnd_store_ptr_bounds(const void **, const void *)
__m256i __cdecl _mm256_cvtepi8_epi32(__m128i)
__m128 __cdecl _mm_fmaddsub_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_slli_epi32(__m256i, int)
__m256 __cdecl _mm256_or_ps(__m256, __m256)
__m128i __cdecl _mm_i32gather_epi64(__int64 const *, __m128i, const int)
__m128i __cdecl _mm_cvtps_ph(__m128, const int)
__m256i __cdecl _mm256_subs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_lddqu_si256(__m256i const *)
__m128i __cdecl _mm_sha1msg1_epu32(__m128i, __m128i)
__m256i __cdecl _mm256_hadd_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_srli_si256(__m256i, const int)
__m256i __cdecl _mm256_maddubs_epi16(__m256i, __m256i)
__m128 __cdecl _mm_mask_i64gather_ps(__m128, float const *, __m128i, __m128, const int)
long _InterlockedOr_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_setzero_pd(void)
__m128d __cdecl _mm_cmp_sd(__m128d, __m128d, const int)
__m128i __cdecl _mm_srlv_epi64(__m128i, __m128i)
__m256i __cdecl _mm256_cmpgt_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_rcp_ps(__m256)
__m256i __cdecl _mm256_unpackhi_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_sllv_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_i32gather_epi32(int const *, __m256i, const int)
__m256 __cdecl _mm256_permutevar8x32_ps(__m256, __m256i)
void __cdecl _xsaves(void *, unsigned __int64)
__m256i __cdecl _mm256_cvtepu8_epi64(__m128i)
__m128d __cdecl _mm_fnmsub_pd(__m128d, __m128d, __m128d)
__m128d __cdecl _mm_i64gather_pd(double const *, __m128i, const int)
int __cdecl _mm256_testnzc_ps(__m256, __m256)
__m256i __cdecl _mm256_broadcastq_epi64(__m128i)
__m256d __cdecl _mm256_broadcastsd_pd(__m128d)
__m256i __cdecl _mm256_loadu_si256(__m256i const *)
__m256d __cdecl _mm256_set1_pd(double)
__m256 __cdecl _mm256_fmsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_srl_epi64(__m256i, __m128i)
__m128i __cdecl _mm256_cvttpd_epi32(__m256d)
__m256i __cdecl _mm256_min_epi8(__m256i, __m256i)
__m256d __cdecl _mm256_hadd_pd(__m256d, __m256d)
__m256 __cdecl _mm256_max_ps(__m256, __m256)
__m256i __cdecl _mm256_min_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_srai_epi16(__m256i, int)
__m256i __cdecl _mm256_max_epi16(__m256i, __m256i)
__m128i __cdecl _mm256_i64gather_epi32(int const *, __m256i, const int)
__m256 __cdecl _mm256_set_ps(float, float, float, float, float, float, float, float)
__m256i __cdecl _mm256_blendv_epi8(__m256i, __m256i, __m256i)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
void *__cdecl _bnd_narrow_ptr_bounds(const void *, const void *, size_t)
unsigned int _bzhi_u32(unsigned int, unsigned int)
__m128i __cdecl _mm_sha256msg1_epu32(__m128i, __m128i)
__m256d __cdecl _mm256_permutevar_pd(__m256d, __m256i)
__m128 __cdecl _mm_fmsubadd_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_cvtepu16_epi64(__m128i)
__m256i __cdecl _mm256_sra_epi16(__m256i, __m128i)
void __cdecl _xsetbv(unsigned int, unsigned __int64)
__m128 __cdecl _mm_fmadd_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_cvtps_epi32(__m256)
__m256i __cdecl _mm256_sad_epu8(__m256i, __m256i)
int __cdecl _mm256_testz_ps(__m256, __m256)
__m256 __cdecl _mm256_broadcast_ps(__m128 const *)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
__m256d __cdecl _mm256_xor_pd(__m256d, __m256d)
__m256i __cdecl _mm256_i64gather_epi64(__int64 const *, __m256i, const int)
__m256d __cdecl _mm256_add_pd(__m256d, __m256d)
__m256 __cdecl _mm256_broadcastss_ps(__m128)
__m256i __cdecl _mm256_sub_epi32(__m256i, __m256i)
__m256d __cdecl _mm256_mask_i32gather_pd(__m256d, double const *, __m128i, __m256d, const int)
__m256i __cdecl _mm256_setr_epi64x(__int64, __int64, __int64, __int64)
int __cdecl _mm_testc_ps(__m128, __m128)
void __cdecl _mm_maskstore_epi64(__int64 *, __m128i, __m128i)
__m128 __cdecl _mm256_cvtpd_ps(__m256d)
__m256i __cdecl _mm256_permute4x64_epi64(__m256i, const int)
__m128i __cdecl _mm_mask_i32gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int)
__m128i __cdecl _mm_blend_epi32(__m128i, __m128i, const int)
unsigned int _blsi_u32(unsigned int)
const void *__cdecl _bnd_get_ptr_ubound(const void *)
int __cdecl _mm256_movemask_epi8(__m256i)
long _InterlockedAnd_HLEAcquire(long volatile *, long)
__m256i __cdecl _mm256_hadds_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epi16(__m256i, __m256i)
__m128d __cdecl _mm_fnmadd_pd(__m128d, __m128d, __m128d)
__m256i
Definition: immintrin.h:54
__m256i __cdecl _mm256_permutevar8x32_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_set_epi32(int, int, int, int, int, int, int, int)
__m256i __cdecl _mm256_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char)
__m256i __cdecl _mm256_sub_epi64(__m256i, __m256i)
void __cdecl _mm256_maskstore_ps(float *, __m256i, __m256)
int __cdecl _mm256_testc_si256(__m256i, __m256i)
void * _InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *)
__m256 __cdecl _mm256_fmadd_ps(__m256, __m256, __m256)
void __cdecl _mm_maskstore_epi32(int *, __m128i, __m128i)
__m128 __cdecl _mm_fnmsub_ss(__m128, __m128, __m128)
void __cdecl _xsavec(void *, unsigned __int64)
__m256d __cdecl _mm256_fmaddsub_pd(__m256d, __m256d, __m256d)
void __cdecl _mm256_store_si256(__m256i *, __m256i)
__m128d __cdecl _mm_fnmadd_sd(__m128d, __m128d, __m128d)
__m128i __cdecl _mm_mask_i64gather_epi32(__m128i, int const *, __m128i, __m128i, const int)
__m256i __cdecl _mm256_cvttps_epi32(__m256)
__m128 __cdecl _mm_permute_ps(__m128, int)
__m256i __cdecl _mm256_srlv_epi64(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepu32_epi64(__m128i)
__m256 __cdecl _mm256_shuffle_ps(__m256, __m256, const int)
__m256i __cdecl _mm256_mullo_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_shufflehi_epi16(__m256i, const int)
void __cdecl _mm256_stream_pd(double *, __m256d)
__m256d __cdecl _mm256_castpd128_pd256(__m128d)
long _InterlockedExchange_HLERelease(long volatile *, long)
__m128 __cdecl _mm256_extractf128_ps(__m256, const int)
__m128d __cdecl _mm256_extractf128_pd(__m256d, const int)
__m256i __cdecl _mm256_subs_epu16(__m256i, __m256i)
__m128 __cdecl _mm_mask_i32gather_ps(__m128, float const *, __m128i, __m128, const int)
int __cdecl _rdrand32_step(unsigned int *)
__m128d __cdecl _mm_permute_pd(__m128d, int)
__m256 __cdecl _mm256_unpacklo_ps(__m256, __m256)