19 #if defined (_M_CEE_PURE)
20 #error ERROR: Intel Architecture intrinsic functions not supported in the pure mode!
42 __int16 m256i_i16[16];
45 unsigned __int8 m256i_u8[32];
46 unsigned __int16 m256i_u16[16];
47 unsigned __int32 m256i_u32[8];
48 unsigned __int64 m256i_u64[4];
55 #define _CMP_EQ_OQ 0x00
56 #define _CMP_LT_OS 0x01
57 #define _CMP_LE_OS 0x02
58 #define _CMP_UNORD_Q 0x03
59 #define _CMP_NEQ_UQ 0x04
60 #define _CMP_NLT_US 0x05
61 #define _CMP_NLE_US 0x06
63 #define _CMP_ORD_Q 0x07
64 #define _CMP_EQ_UQ 0x08
65 #define _CMP_NGE_US 0x09
67 #define _CMP_NGT_US 0x0A
68 #define _CMP_FALSE_OQ 0x0B
69 #define _CMP_NEQ_OQ 0x0C
70 #define _CMP_GE_OS 0x0D
71 #define _CMP_GT_OS 0x0E
72 #define _CMP_TRUE_UQ 0x0F
73 #define _CMP_EQ_OS 0x10
74 #define _CMP_LT_OQ 0x11
75 #define _CMP_LE_OQ 0x12
76 #define _CMP_UNORD_S 0x13
77 #define _CMP_NEQ_US 0x14
78 #define _CMP_NLT_UQ 0x15
79 #define _CMP_NLE_UQ 0x16
81 #define _CMP_ORD_S 0x17
82 #define _CMP_EQ_US 0x18
83 #define _CMP_NGE_UQ 0x19
85 #define _CMP_NGT_UQ 0x1A
86 #define _CMP_FALSE_OS 0x1B
87 #define _CMP_NEQ_OS 0x1C
88 #define _CMP_GE_OQ 0x1D
90 #define _CMP_GT_OQ 0x1E
91 #define _CMP_TRUE_US 0x1F
759 #define _mm256_loadu2_m128( hiaddr, \
761 _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
763 #define _mm256_loadu2_m128d( hiaddr, \
765 _mm256_set_m128d(_mm_loadu_pd(hiaddr), _mm_loadu_pd(loaddr))
767 #define _mm256_loadu2_m128i( hiaddr, \
769 _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
777 #define _mm256_storeu2_m128( hiaddr, loaddr, \
781 _mm_storeu_ps((loaddr), _mm256_castps256_ps128(_a)); \
782 _mm_storeu_ps((hiaddr), _mm256_extractf128_ps(_a, 0x1)); \
785 #define _mm256_storeu2_m128d( hiaddr, loaddr, \
789 _mm_storeu_pd((loaddr), _mm256_castpd256_pd128(_a)); \
790 _mm_storeu_pd((hiaddr), _mm256_extractf128_pd(_a, 0x1)); \
793 #define _mm256_storeu2_m128i( hiaddr, loaddr, \
797 _mm_storeu_si128((loaddr), _mm256_castsi256_si128(_a)); \
798 _mm_storeu_si128((hiaddr), _mm256_extractf128_si256(_a, 0x1)); \
958 #define _mm256_ceil_pd(val) _mm256_round_pd((val), _MM_FROUND_CEIL)
959 #define _mm256_floor_pd(val) _mm256_round_pd((val), _MM_FROUND_FLOOR)
973 #define _mm256_ceil_ps(val) _mm256_round_ps((val), _MM_FROUND_CEIL)
974 #define _mm256_floor_ps(val) _mm256_round_ps((val), _MM_FROUND_FLOOR)
1017 #define _mm256_test_all_zeros(mask, val) \
1018 _mm256_testz_si256((mask), (val))
1021 #define _mm256_test_all_ones(val) \
1022 _mm256_testc_si256((val), _mm256_cmpeq_epi32((val),(val)))
1025 #define _mm256_test_mix_ones_zeros(mask, val) \
1026 _mm256_testnzc_si256((mask), (val))
1094 float,
float,
float,
float);
1096 char,
char,
char,
char,
1097 char,
char,
char,
char,
1098 char,
char,
char,
char,
1099 char,
char,
char,
char,
1100 char,
char,
char,
char,
1101 char,
char,
char,
char,
1102 char,
char,
char,
char);
1104 short,
short,
short,
short,
1105 short,
short,
short,
short,
1106 short,
short,
short,
short);
1108 int,
int,
int,
int);
1112 #define _mm256_set_m128( hi, lo) \
1113 _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
1115 #define _mm256_set_m128d( hi, lo) \
1116 _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), (hi), 0x1)
1118 #define _mm256_set_m128i( hi, lo) \
1119 _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
1123 float,
float,
float,
float);
1125 char,
char,
char,
char,
1126 char,
char,
char,
char,
1127 char,
char,
char,
char,
1128 char,
char,
char,
char,
1129 char,
char,
char,
char,
1130 char,
char,
char,
char,
1131 char,
char,
char,
char);
1133 short,
short,
short,
short,
1134 short,
short,
short,
short,
1135 short,
short,
short,
short);
1137 int,
int,
int,
int);
1140 #define _mm256_setr_m128(lo, hi) _mm256_set_m128((hi), (lo))
1141 #define _mm256_setr_m128d(lo, hi) _mm256_set_m128d((hi), (lo))
1142 #define _mm256_setr_m128i(lo, hi) _mm256_set_m128i((hi), (lo))
1188 #define _mm_undefined_ps _mm_setzero_ps
1189 #define _mm_undefined_pd _mm_setzero_pd
1190 #define _mm_undefined_si128 _mm_setzero_si128
1191 #define _mm256_undefined_ps _mm256_setzero_ps
1192 #define _mm256_undefined_pd _mm256_setzero_pd
1193 #define _mm256_undefined_si256 _mm256_setzero_si256
1199 #define _XCR_XFEATURE_ENABLED_MASK 0
1202 extern unsigned __int64 __cdecl
_xgetbv(
unsigned int);
1205 extern void __cdecl
_xsetbv(
unsigned int,
unsigned __int64);
1212 extern void __cdecl
_xsave(
void *,
unsigned __int64);
1213 #if defined (_M_X64)
1214 extern void __cdecl _xsave64(
void *,
unsigned __int64);
1222 extern void __cdecl
_xsaveopt(
void *,
unsigned __int64);
1223 #if defined (_M_X64)
1224 extern void __cdecl _xsaveopt64(
void *,
unsigned __int64);
1232 extern void __cdecl
_xrstor(
void const *,
unsigned __int64);
1233 #if defined (_M_X64)
1234 extern void __cdecl _xrstor64(
void const *,
unsigned __int64);
1241 extern void __cdecl
_fxsave(
void *);
1242 #if defined (_M_X64)
1243 extern void __cdecl _fxsave64(
void *);
1250 extern void __cdecl
_fxrstor(
void const *);
1251 #if defined (_M_X64)
1252 extern void __cdecl _fxrstor64(
void const *);
1263 #if defined (_M_X64)
1264 extern int __cdecl _rdrand64_step(
unsigned __int64 *);
1267 #if defined (_M_X64)
1271 extern unsigned int __cdecl _readfsbase_u32();
1272 extern unsigned int __cdecl _readgsbase_u32();
1273 extern unsigned __int64 __cdecl _readfsbase_u64();
1274 extern unsigned __int64 __cdecl _readgsbase_u64();
1279 extern void __cdecl _writefsbase_u32(
unsigned int);
1280 extern void __cdecl _writegsbase_u32(
unsigned int);
1281 extern void __cdecl _writefsbase_u64(
unsigned __int64);
1282 extern void __cdecl _writegsbase_u64(
unsigned __int64);
1755 extern unsigned int _bextr_u32(
unsigned int ,
1758 extern unsigned int _blsi_u32(
unsigned int);
1760 extern unsigned int _blsr_u32(
unsigned int);
1761 extern unsigned int _bzhi_u32(
unsigned int ,
1763 extern unsigned int _mulx_u32(
unsigned int ,
1766 extern unsigned int _pdep_u32(
unsigned int ,
1768 extern unsigned int _pext_u32(
unsigned int ,
1770 extern unsigned int _rorx_u32(
unsigned int ,
1771 const unsigned int );
1774 extern unsigned int _shlx_u32(
unsigned int ,
1776 extern unsigned int _shrx_u32(
unsigned int ,
1779 #if defined (_M_X64)
1780 extern unsigned __int64 _bextr_u64(
unsigned __int64 ,
1783 extern unsigned __int64 _blsi_u64(
unsigned __int64);
1784 extern unsigned __int64 _blsmsk_u64(
unsigned __int64);
1785 extern unsigned __int64 _blsr_u64(
unsigned __int64);
1786 extern unsigned __int64 _bzhi_u64(
unsigned __int64 ,
1788 extern unsigned __int64 _mulx_u64(
unsigned __int64 ,
1790 unsigned __int64 * );
1791 extern unsigned __int64 _pdep_u64(
unsigned __int64 ,
1793 extern unsigned __int64 _pext_u64(
unsigned __int64 ,
1795 extern unsigned __int64 _rorx_u64(
unsigned __int64 ,
1796 const unsigned int );
1797 extern __int64 _sarx_i64(__int64 ,
1799 extern unsigned __int64 _shlx_u64(
unsigned __int64 ,
1801 extern unsigned __int64 _shrx_u64(
unsigned __int64 ,
1812 extern unsigned int _lzcnt_u32(
unsigned int);
1813 #if defined (_M_X64)
1814 extern unsigned __int64 _lzcnt_u64(
unsigned __int64);
1824 extern unsigned int _tzcnt_u32(
unsigned int);
1825 #if defined (_M_X64)
1826 extern unsigned __int64 _tzcnt_u64(
unsigned __int64);
1834 extern void __cdecl
_invpcid(
unsigned int ,
void * );
1868 extern void _Store64_HLERelease(__int64
volatile *,__int64);
1869 extern __int64 _InterlockedExchange64_HLEAcquire(__int64
volatile *,__int64);
1870 extern __int64 _InterlockedExchange64_HLERelease(__int64
volatile *,__int64);
1872 extern __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64
volatile *,__int64);
1873 extern __int64 _InterlockedExchangeAdd64_HLERelease(__int64
volatile *,__int64);
1875 extern __int64 _InterlockedAnd64_HLEAcquire(__int64
volatile *,__int64);
1876 extern __int64 _InterlockedAnd64_HLERelease(__int64
volatile *,__int64);
1877 extern __int64 _InterlockedOr64_HLEAcquire(__int64
volatile *,__int64);
1878 extern __int64 _InterlockedOr64_HLERelease(__int64
volatile *,__int64);
1879 extern __int64 _InterlockedXor64_HLEAcquire(__int64
volatile *,__int64);
1880 extern __int64 _InterlockedXor64_HLERelease(__int64
volatile *,__int64);
1882 extern unsigned char _interlockedbittestandset64_HLEAcquire(__int64 *a,__int64 b);
1883 extern unsigned char _interlockedbittestandset64_HLERelease(__int64 *a,__int64 b);
1884 extern unsigned char _interlockedbittestandreset64_HLEAcquire(__int64 *a,__int64 b);
1885 extern unsigned char _interlockedbittestandreset64_HLERelease(__int64 *a,__int64 b);
1889 #define _XBEGIN_STARTED (~0u)
1890 #define _XABORT_EXPLICIT (1 << 0)
1891 #define _XABORT_RETRY (1 << 1)
1892 #define _XABORT_CONFLICT (1 << 2)
1893 #define _XABORT_CAPACITY (1 << 3)
1894 #define _XABORT_DEBUG (1 << 4)
1895 #define _XABORT_NESTED (1 << 5)
1896 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1898 extern unsigned int __cdecl
_xbegin(
void);
1899 extern void __cdecl
_xend(
void);
1900 extern void __cdecl
_xabort(
const unsigned int);
1901 extern unsigned char __cdecl
_xtest(
void);
1928 extern unsigned char __cdecl _addcarryx_u64(
unsigned char ,
1931 unsigned __int64 * );
1934 #if defined __cplusplus
void __cdecl _mm256_storeu_pd(double *, __m256d)
__m256 __cdecl _mm256_setzero_ps(void)
__m256i __cdecl _mm256_set_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short)
__m256i __cdecl _mm256_i32gather_epi64(__int64 const *, __m128i, const int)
unsigned int _blsmsk_u32(unsigned int)
__m256d __cdecl _mm256_sub_pd(__m256d, __m256d)
int __cdecl _mm_testz_pd(__m128d, __m128d)
__m128i __cdecl _mm_broadcastw_epi16(__m128i)
void __cdecl _mm256_stream_si256(__m256i *, __m256i)
__m256i __cdecl _mm256_add_epi64(__m256i, __m256i)
__m128i __cdecl _mm_broadcastd_epi32(__m128i)
__m256 __cdecl _mm256_broadcast_ss(float const *)
void __cdecl _mm256_maskstore_epi64(__int64 *, __m256i, __m256i)
__m128d __cdecl _mm_permutevar_pd(__m128d, __m128i)
__m256d __cdecl _mm256_div_pd(__m256d, __m256d)
int __cdecl _mm256_testnzc_si256(__m256i, __m256i)
__m256i __cdecl _mm256_srli_epi32(__m256i, int)
long _InterlockedExchange_HLEAcquire(long volatile *, long)
__m128i __cdecl _mm_broadcastq_epi64(__m128i)
__m256i __cdecl _mm256_sll_epi16(__m256i, __m128i)
__m256i __cdecl _mm256_srav_epi32(__m256i, __m256i)
__m256 __cdecl _mm256_div_ps(__m256, __m256)
void __cdecl _xabort(const unsigned int)
long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_blend_pd(__m256d, __m256d, const int)
__m256 __cdecl _mm256_rsqrt_ps(__m256)
__m256d __cdecl _mm256_cvtps_pd(__m128)
__m256i __cdecl _mm256_load_si256(__m256i const *)
__m256i __cdecl _mm256_sllv_epi64(__m256i, __m256i)
__m256 __cdecl _mm256_i32gather_ps(float const *, __m256i, const int)
__m256i __cdecl _mm256_subs_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_unpackhi_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_sub_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_andnot_pd(__m256d, __m256d)
void __cdecl _xsave(void *, unsigned __int64)
__m256i __cdecl _mm256_sign_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_round_pd(__m256d, int)
__m256i __cdecl _mm256_mulhrs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_shufflelo_epi16(__m256i, const int)
__m128 __cdecl _mm_cmp_ps(__m128, __m128, const int)
__m256d __cdecl _mm256_fmsubadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_min_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_srli_epi64(__m256i, int)
__m128 __cdecl _mm_fmadd_ss(__m128, __m128, __m128)
__m256i __cdecl _mm256_max_epi8(__m256i, __m256i)
__m128 __cdecl _mm256_i64gather_ps(float const *, __m256i, const int)
__m256i __cdecl _mm256_max_epi32(__m256i, __m256i)
__m256d __cdecl _mm256_set_pd(double, double, double, double)
__m256 __cdecl _mm256_loadu_ps(float const *)
void __cdecl _mm256_storeu_ps(float *, __m256)
__m256d __cdecl _mm256_load_pd(double const *)
__m256i __cdecl _mm256_min_epu32(__m256i, __m256i)
__m256d __cdecl _mm256_and_pd(__m256d, __m256d)
int __cdecl _rdseed32_step(unsigned int *)
__m256i __cdecl _mm256_mullo_epi32(__m256i, __m256i)
void __cdecl _mm_maskstore_ps(float *, __m128i, __m128)
__m256i __cdecl _mm256_shuffle_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_insertf128_ps(__m256, __m128, int)
__m256i __cdecl _mm256_broadcastb_epi8(__m128i)
__m256i __cdecl _mm256_cmpgt_epi32(__m256i, __m256i)
__m128i __cdecl _mm_i32gather_epi32(int const *, __m128i, const int)
__m256i __cdecl _mm256_cmpgt_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short)
__m256d __cdecl _mm256_castps_pd(__m256)
unsigned int _blsr_u32(unsigned int)
__m256 __cdecl _mm256_sqrt_ps(__m256)
__m256d __cdecl _mm256_movedup_pd(__m256d)
__m128d __cdecl _mm_fnmsub_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_unpacklo_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_fnmadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_permute2f128_si256(__m256i, __m256i, int)
__m128i __cdecl _mm_maskload_epi64(__int64 const *, __m128i)
long _InterlockedXor_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_fnmsub_pd(__m256d, __m256d, __m256d)
__m256 __cdecl _mm256_xor_ps(__m256, __m256)
__m256i __cdecl _mm256_max_epu8(__m256i, __m256i)
__m256i __cdecl _mm256_inserti128_si256(__m256i, __m128i, const int)
long _InterlockedXor_HLERelease(long volatile *, long)
__m128i __cdecl _mm_maskload_epi32(int const *, __m128i)
__m128d __cdecl _mm_cmp_pd(__m128d, __m128d, const int)
__m256i __cdecl _mm256_avg_epu8(__m256i, __m256i)
unsigned int _rorx_u32(unsigned int, const unsigned int)
__m128d
Definition: emmintrin.h:48
__m256 __cdecl _mm256_load_ps(float const *)
__m256i __cdecl _mm256_add_epi32(__m256i, __m256i)
__m128 __cdecl _mm256_mask_i64gather_ps(__m128, float const *, __m256i, __m128, const int)
__m256i __cdecl _mm256_avg_epu16(__m256i, __m256i)
__m256d
Definition: immintrin.h:38
void __cdecl _mm256_stream_ps(float *, __m256)
__m256d __cdecl _mm256_loadu_pd(double const *)
unsigned char _interlockedbittestandset_HLEAcquire(long *a, long b)
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long)
unsigned char __cdecl _addcarryx_u32(unsigned char, unsigned int, unsigned int, unsigned int *)
__m256i __cdecl _mm256_max_epu32(__m256i, __m256i)
unsigned int _mulx_u32(unsigned int, unsigned int, unsigned int *)
__m128 __cdecl _mm_maskload_ps(float const *, __m128i)
__m256i __cdecl _mm256_srli_epi16(__m256i, int)
__m128 __cdecl _mm_permutevar_ps(__m128, __m128i)
__m256 __cdecl _mm256_permutevar_ps(__m256, __m256i)
__m256 __cdecl _mm256_castps128_ps256(__m128)
long _InterlockedOr_HLERelease(long volatile *, long)
void __cdecl _mm256_store_pd(double *, __m256d)
__m256 __cdecl _mm256_fmsubadd_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_permute_ps(__m256, int)
__m128d __cdecl _mm_fmsub_pd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_setzero_si256(void)
void * _InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *)
__m256 __cdecl _mm256_add_ps(__m256, __m256)
__m256d __cdecl _mm256_permute_pd(__m256d, int)
int __cdecl _mm256_testnzc_pd(__m256d, __m256d)
unsigned int _shlx_u32(unsigned int, unsigned int)
__m256i __cdecl _mm256_cmpeq_epi64(__m256i, __m256i)
__m256d __cdecl _mm256_shuffle_pd(__m256d, __m256d, const int)
__m256i __cdecl _mm256_madd_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_set1_epi64x(long long)
__m256i __cdecl _mm256_packs_epi16(__m256i, __m256i)
__m128d __cdecl _mm_fmadd_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_blend_epi16(__m256i, __m256i, const int)
__m256d __cdecl _mm256_i64gather_pd(double const *, __m256i, const int)
__m256d __cdecl _mm256_permute2f128_pd(__m256d, __m256d, int)
__m256i __cdecl _mm256_adds_epu16(__m256i, __m256i)
__m256i __cdecl _mm256_maskload_epi32(int const *, __m256i)
__m256d __cdecl _mm256_mul_pd(__m256d, __m256d)
unsigned char __cdecl _xtest(void)
__m256d __cdecl _mm256_broadcast_sd(double const *)
int __cdecl _mm256_testc_pd(__m256d, __m256d)
__m256i __cdecl _mm256_sll_epi32(__m256i, __m128i)
__m256i __cdecl _mm256_set1_epi8(char)
void _StorePointer_HLERelease(void *volatile *, void *)
__m256i __cdecl _mm256_hsubs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_blend_epi32(__m256i, __m256i, const int)
__m256i __cdecl _mm256_alignr_epi8(__m256i, __m256i, const int)
__m256i __cdecl _mm256_mask_i64gather_epi64(__m256i, __int64 const *, __m256i, __m256i, const int)
__m256i __cdecl _mm256_unpackhi_epi16(__m256i, __m256i)
long _InterlockedExchangeAdd_HLERelease(long volatile *, long)
__m256 __cdecl _mm256_fmaddsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_cvtepi32_epi64(__m128i)
__m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epu8(__m256i, __m256i)
__m256 __cdecl _mm256_castpd_ps(__m256d)
void __cdecl _mm256_maskstore_epi32(int *, __m256i, __m256i)
__m256i __cdecl _mm256_set_epi64x(__int64, __int64, __int64, __int64)
__m256d __cdecl _mm256_fmsub_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_castpd_si256(__m256d)
__m256i __cdecl _mm256_packs_epi32(__m256i, __m256i)
__m128 __cdecl _mm_fmsub_ss(__m128, __m128, __m128)
__m256i __cdecl _mm256_abs_epi32(__m256i)
__m128d __cdecl _mm_fmaddsub_pd(__m128d, __m128d, __m128d)
unsigned __int64 __cdecl _xgetbv(unsigned int)
__m256i __cdecl _mm256_cvtepu8_epi32(__m128i)
int __cdecl _rdrand16_step(unsigned short *)
__m256d __cdecl _mm256_cvtepi32_pd(__m128i)
__m128 __cdecl _mm_cvtph_ps(__m128i)
__m256i __cdecl _mm256_unpacklo_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi16_epi32(__m128i)
__m128d __cdecl _mm_broadcastsd_pd(__m128d)
__m256d __cdecl _mm256_sqrt_pd(__m256d)
__m256i __cdecl _mm256_max_epu16(__m256i, __m256i)
__m256i __cdecl _mm256_mask_i32gather_epi64(__m256i, __int64 const *, __m128i, __m256i, const int)
__m128 __cdecl _mm_fnmsub_ps(__m128, __m128, __m128)
int __cdecl _mm_testnzc_pd(__m128d, __m128d)
__m256 __cdecl _mm256_hsub_ps(__m256, __m256)
void * _InterlockedExchangePointer_HLERelease(void *volatile *, void *)
__m256 __cdecl _mm256_sub_ps(__m256, __m256)
__m128i __cdecl _mm_srlv_epi32(__m128i, __m128i)
__m256i __cdecl _mm256_abs_epi16(__m256i)
int __cdecl _mm_testc_pd(__m128d, __m128d)
__m256i __cdecl _mm256_sign_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi32(int, int, int, int, int, int, int, int)
__m256i __cdecl _mm256_broadcastsi128_si256(__m128i)
__m256 __cdecl _mm256_moveldup_ps(__m256)
__m128i __cdecl _mm_srav_epi32(__m128i, __m128i)
__m256i __cdecl _mm256_packus_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_mask_i64gather_pd(__m256d, double const *, __m256i, __m256d, const int)
__m256 __cdecl _mm256_and_ps(__m256, __m256)
__m256d __cdecl _mm256_addsub_pd(__m256d, __m256d)
__m128i __cdecl _mm_i64gather_epi64(__int64 const *, __m128i, const int)
__m256i __cdecl _mm256_sub_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_and_si256(__m256i, __m256i)
__m256i __cdecl _mm256_unpacklo_epi64(__m256i, __m256i)
__m128 __cdecl _mm_cmp_ss(__m128, __m128, const int)
__m256i __cdecl _mm256_castsi128_si256(__m128i)
__m128 __cdecl _mm_i32gather_ps(float const *, __m128i, const int)
__m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_abs_epi8(__m256i)
__m256i __cdecl _mm256_mul_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_sign_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_cvtph_ps(__m128i)
__m128i __cdecl _mm_sllv_epi64(__m128i, __m128i)
int __cdecl _mm256_movemask_pd(__m256d)
__m128i __cdecl _mm_i64gather_epi32(int const *, __m128i, const int)
__m256 __cdecl _mm256_castsi256_ps(__m256i)
__m256 __cdecl _mm256_movehdup_ps(__m256)
void _Store_HLERelease(long volatile *, long)
__m128 __cdecl _mm_fnmadd_ss(__m128, __m128, __m128)
void __cdecl _mm256_maskstore_pd(double *, __m256i, __m256d)
__m256i __cdecl _mm256_packus_epi32(__m256i, __m256i)
__m128d __cdecl _mm_mask_i32gather_pd(__m128d, double const *, __m128i, __m128d, const int)
__m256i __cdecl _mm256_set1_epi32(int)
__m256d __cdecl _mm256_i32gather_pd(double const *, __m128i, const int)
unsigned char _interlockedbittestandreset_HLERelease(long *a, long b)
__m256d __cdecl _mm256_broadcast_pd(__m128d const *)
__m256d __cdecl _mm256_hsub_pd(__m256d, __m256d)
__m256d __cdecl _mm256_unpacklo_pd(__m256d, __m256d)
__m256d __cdecl _mm256_max_pd(__m256d, __m256d)
__m256i __cdecl _mm256_cmpgt_epi64(__m256i, __m256i)
int __cdecl _mm256_movemask_ps(__m256)
void __cdecl _fxrstor(void const *)
__m256 __cdecl _mm256_fnmsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_srai_epi32(__m256i, int)
unsigned int _lzcnt_u32(unsigned int)
__m128i __cdecl _mm256_extractf128_si256(__m256i, const int)
__m256 __cdecl _mm256_dp_ps(__m256, __m256, const int)
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_mask_i32gather_epi32(__m256i, int const *, __m256i, __m256i, const int)
__m256i __cdecl _mm256_subs_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_min_epu16(__m256i, __m256i)
__m256d __cdecl _mm256_unpackhi_pd(__m256d, __m256d)
__m256i __cdecl _mm256_mulhi_epu16(__m256i, __m256i)
__m128 __cdecl _mm256_castps256_ps128(__m256)
__m256i __cdecl _mm256_sll_epi64(__m256i, __m128i)
__m128i __cdecl _mm256_mask_i64gather_epi32(__m128i, int const *, __m256i, __m128i, const int)
__m256i __cdecl _mm256_cmpeq_epi32(__m256i, __m256i)
__m128 __cdecl _mm_broadcastss_ps(__m128)
__m128d __cdecl _mm_maskload_pd(double const *, __m128i)
long _InterlockedAnd_HLERelease(long volatile *, long)
__m256 __cdecl _mm256_mask_i32gather_ps(__m256, float const *, __m256i, __m256, const int)
__m256i __cdecl _mm256_hsub_epi32(__m256i, __m256i)
__m256 __cdecl _mm256_cvtepi32_ps(__m256i)
__m256i __cdecl _mm256_unpackhi_epi64(__m256i, __m256i)
__m256i __cdecl _mm256_hadd_epi16(__m256i, __m256i)
__m256 __cdecl _mm256_hadd_ps(__m256, __m256)
__m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i)
void __cdecl _mm_maskstore_pd(double *, __m128i, __m128d)
__m128d __cdecl _mm_fmsubadd_pd(__m128d, __m128d, __m128d)
__m256i __cdecl _mm256_cvtepu8_epi16(__m128i)
__m128i __cdecl _mm256_cvtpd_epi32(__m256d)
__m256 __cdecl _mm256_set1_ps(float)
void __cdecl _xrstor(void const *, unsigned __int64)
__m256i __cdecl _mm256_maskload_epi64(__int64 const *, __m256i)
__m256i __cdecl _mm256_srl_epi32(__m256i, __m128i)
__m256i __cdecl _mm256_mpsadbw_epu8(__m256i, __m256i, const int)
__m256 __cdecl _mm256_addsub_ps(__m256, __m256)
__m256d __cdecl _mm256_min_pd(__m256d, __m256d)
__m256i __cdecl _mm256_mulhi_epi16(__m256i, __m256i)
__m256d __cdecl _mm256_fmadd_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_srlv_epi32(__m256i, __m256i)
__m128 __cdecl _mm_broadcast_ss(float const *)
__m128d __cdecl _mm256_castpd256_pd128(__m256d)
__m128i
Definition: emmintrin.h:44
__m256
Definition: immintrin.h:34
__m256 __cdecl _mm256_setr_ps(float, float, float, float, float, float, float, float)
__m128i __cdecl _mm_broadcastb_epi8(__m128i)
__m256i __cdecl _mm256_mul_epu32(__m256i, __m256i)
__m128d __cdecl _mm_fmadd_pd(__m128d, __m128d, __m128d)
int __cdecl _mm256_testz_si256(__m256i, __m256i)
__m256i __cdecl _mm256_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char)
#define _CRT_ALIGN(x)
Definition: crtdefs.h:604
__m128 __cdecl _mm_i64gather_ps(float const *, __m128i, const int)
void __cdecl _xsaveopt(void *, unsigned __int64)
__m256i __cdecl _mm256_broadcastw_epi16(__m128i)
__m256d __cdecl _mm256_permute4x64_pd(__m256d, const int)
__m256d __cdecl _mm256_setr_pd(double, double, double, double)
int __cdecl _mm256_testc_ps(__m256, __m256)
__m128
Definition: xmmintrin.h:70
void __cdecl _mm256_zeroall(void)
void __cdecl _mm256_store_ps(float *, __m256)
unsigned int _shrx_u32(unsigned int, unsigned int)
void __cdecl _mm256_storeu_si256(__m256i *, __m256i)
__m128 __cdecl _mm_fmsub_ps(__m128, __m128, __m128)
__m128i __cdecl _mm_sllv_epi32(__m128i, __m128i)
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, __int64)
__m256i __cdecl _mm256_min_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi8_epi16(__m128i)
__m256i __cdecl _mm256_permute2x128_si256(__m256i, __m256i, const int)
__m256i __cdecl _mm256_stream_load_si256(__m256i const *)
__m256i __cdecl _mm256_or_si256(__m256i, __m256i)
__m256i __cdecl _mm256_add_epi8(__m256i, __m256i)
int __cdecl _mm_testz_ps(__m128, __m128)
__m256 __cdecl _mm256_mul_ps(__m256, __m256)
__m256i __cdecl _mm256_add_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepi16_epi64(__m128i)
__m256i __cdecl _mm256_insertf128_si256(__m256i, __m128i, int)
int __cdecl _mm_testnzc_ps(__m128, __m128)
__m256i __cdecl _mm256_unpacklo_epi8(__m256i, __m256i)
void __cdecl _invpcid(unsigned int, void *)
__m256 __cdecl _mm256_fnmadd_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_cvtepi8_epi64(__m128i)
__m256i __cdecl _mm256_andnot_si256(__m256i, __m256i)
void __cdecl _mm256_zeroupper(void)
__m256i __cdecl _mm256_slli_epi64(__m256i, int)
__m128i __cdecl _mm256_extracti128_si256(__m256i, const int)
unsigned int __cdecl _xbegin(void)
__m256i __cdecl _mm256_hsub_epi16(__m256i, __m256i)
int __cdecl _rdseed16_step(unsigned short *)
__m128i __cdecl _mm256_cvtps_ph(__m256, int)
__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, __int64)
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int)
__m128d __cdecl _mm_i32gather_pd(double const *, __m128i, const int)
__m128i __cdecl _mm256_castsi256_si128(__m256i)
__m256 __cdecl _mm256_min_ps(__m256, __m256)
__m256i __cdecl _mm256_slli_si256(__m256i, const int)
__m128d __cdecl _mm_mask_i64gather_pd(__m128d, double const *, __m128i, __m128d, const int)
__m128 __cdecl _mm_fnmadd_ps(__m128, __m128, __m128)
__m256 __cdecl _mm256_andnot_ps(__m256, __m256)
void __cdecl _fxsave(void *)
__m256i __cdecl _mm256_set1_epi16(short)
__m256i __cdecl _mm256_slli_epi16(__m256i, int)
int __cdecl _rdseed64_step(unsigned __int64 *)
int _sarx_i32(int, unsigned int)
__m256 __cdecl _mm256_blend_ps(__m256, __m256, const int)
unsigned int _tzcnt_u32(unsigned int)
__m256 __cdecl _mm256_unpackhi_ps(__m256, __m256)
__m256i __cdecl _mm256_broadcastd_epi32(__m128i)
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long)
__m256 __cdecl _mm256_round_ps(__m256, int)
__m256 __cdecl _mm256_permute2f128_ps(__m256, __m256, int)
unsigned int _pext_u32(unsigned int, unsigned int)
__m256d __cdecl _mm256_insertf128_pd(__m256d, __m128d, int)
__m256i __cdecl _mm256_castps_si256(__m256)
__m256i __cdecl _mm256_xor_si256(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepu16_epi32(__m128i)
__m256d __cdecl _mm256_maskload_pd(double const *, __m256i)
unsigned int _pdep_u32(unsigned int, unsigned int)
__m256d __cdecl _mm256_castsi256_pd(__m256i)
__m256i __cdecl _mm256_sra_epi32(__m256i, __m128i)
__m128i __cdecl _mm_cvtps_ph(__m128 m1, const int imm)
void * _InterlockedExchangePointer_HLEAcquire(void *volatile *, void *)
__m256 __cdecl _mm256_maskload_ps(float const *, __m256i)
int __cdecl _mm256_testz_pd(__m256d, __m256d)
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256i __cdecl _mm256_shuffle_epi32(__m256i, const int)
__m128i __cdecl _mm_mask_i32gather_epi32(__m128i, int const *, __m128i, __m128i, const int)
__m256d __cdecl _mm256_or_pd(__m256d, __m256d)
__m128i __cdecl _mm_mask_i64gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int)
__m256i __cdecl _mm256_srl_epi16(__m256i, __m128i)
__m256i __cdecl _mm256_cvtepi8_epi32(__m128i)
__m128 __cdecl _mm_fmaddsub_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_slli_epi32(__m256i, int)
__m256 __cdecl _mm256_or_ps(__m256, __m256)
__m128i __cdecl _mm_i32gather_epi64(__int64 const *, __m128i, const int)
__m256i __cdecl _mm256_subs_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_lddqu_si256(__m256i const *)
__m256i __cdecl _mm256_hadd_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_srli_si256(__m256i, const int)
__m256i __cdecl _mm256_maddubs_epi16(__m256i, __m256i)
__m128 __cdecl _mm_mask_i64gather_ps(__m128, float const *, __m128i, __m128, const int)
long _InterlockedOr_HLEAcquire(long volatile *, long)
__m256d __cdecl _mm256_setzero_pd(void)
__m128d __cdecl _mm_cmp_sd(__m128d, __m128d, const int)
__m128i __cdecl _mm_srlv_epi64(__m128i, __m128i)
unsigned char _interlockedbittestandreset_HLEAcquire(long *a, long b)
__m256i __cdecl _mm256_cmpgt_epi8(__m256i, __m256i)
__m256 __cdecl _mm256_rcp_ps(__m256)
__m256i __cdecl _mm256_unpackhi_epi8(__m256i, __m256i)
__m256i __cdecl _mm256_sllv_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_i32gather_epi32(int const *, __m256i, const int)
__m256 __cdecl _mm256_permutevar8x32_ps(__m256, __m256i)
__m256i __cdecl _mm256_cvtepu8_epi64(__m128i)
__m128d __cdecl _mm_fnmsub_pd(__m128d, __m128d, __m128d)
__m128d __cdecl _mm_i64gather_pd(double const *, __m128i, const int)
int __cdecl _mm256_testnzc_ps(__m256, __m256)
__m256i __cdecl _mm256_broadcastq_epi64(__m128i)
__m256d __cdecl _mm256_broadcastsd_pd(__m128d)
__int64 m1
Definition: ivec.h:114
__m256i __cdecl _mm256_loadu_si256(__m256i const *)
__m256d __cdecl _mm256_set1_pd(double)
__m256 __cdecl _mm256_fmsub_ps(__m256, __m256, __m256)
__m256i __cdecl _mm256_srl_epi64(__m256i, __m128i)
__m128i __cdecl _mm256_cvttpd_epi32(__m256d)
__m256i __cdecl _mm256_min_epi8(__m256i, __m256i)
__m256d __cdecl _mm256_hadd_pd(__m256d, __m256d)
__m256 __cdecl _mm256_max_ps(__m256, __m256)
__m256i __cdecl _mm256_min_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_srai_epi16(__m256i, int)
__m256i __cdecl _mm256_max_epi16(__m256i, __m256i)
__m128i __cdecl _mm256_i64gather_epi32(int const *, __m256i, const int)
__m256 __cdecl _mm256_set_ps(float, float, float, float, float, float, float, float)
__m256i __cdecl _mm256_blendv_epi8(__m256i, __m256i, __m256i)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
unsigned int _bzhi_u32(unsigned int, unsigned int)
__m256d __cdecl _mm256_permutevar_pd(__m256d, __m256i)
__m128 __cdecl _mm_fmsubadd_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_cvtepu16_epi64(__m128i)
__m256i __cdecl _mm256_sra_epi16(__m256i, __m128i)
void __cdecl _xsetbv(unsigned int, unsigned __int64)
__m128 __cdecl _mm_fmadd_ps(__m128, __m128, __m128)
__m256i __cdecl _mm256_cvtps_epi32(__m256)
__m256i __cdecl _mm256_sad_epu8(__m256i, __m256i)
int __cdecl _mm256_testz_ps(__m256, __m256)
__m256 __cdecl _mm256_broadcast_ps(__m128 const *)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
__m256d __cdecl _mm256_xor_pd(__m256d, __m256d)
__m256i __cdecl _mm256_i64gather_epi64(__int64 const *, __m256i, const int)
__m256d __cdecl _mm256_add_pd(__m256d, __m256d)
__m256 __cdecl _mm256_broadcastss_ps(__m128)
__m256i __cdecl _mm256_sub_epi32(__m256i, __m256i)
__m256d __cdecl _mm256_mask_i32gather_pd(__m256d, double const *, __m128i, __m256d, const int)
__m256i __cdecl _mm256_setr_epi64x(__int64, __int64, __int64, __int64)
int __cdecl _mm_testc_ps(__m128, __m128)
void __cdecl _mm_maskstore_epi64(__int64 *, __m128i, __m128i)
__m128 __cdecl _mm256_cvtpd_ps(__m256d)
__m256i __cdecl _mm256_permute4x64_epi64(__m256i, const int)
__m128i __cdecl _mm_mask_i32gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int)
__m128i __cdecl _mm_blend_epi32(__m128i, __m128i, const int)
unsigned int _blsi_u32(unsigned int)
int __cdecl _mm256_movemask_epi8(__m256i)
long _InterlockedAnd_HLEAcquire(long volatile *, long)
__m256i __cdecl _mm256_hadds_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_adds_epi16(__m256i, __m256i)
__m128d __cdecl _mm_fnmadd_pd(__m128d, __m128d, __m128d)
__m256i
Definition: immintrin.h:49
__m256i __cdecl _mm256_permutevar8x32_epi32(__m256i, __m256i)
__m256i __cdecl _mm256_set_epi32(int, int, int, int, int, int, int, int)
__m256i __cdecl _mm256_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char)
__m256i __cdecl _mm256_sub_epi64(__m256i, __m256i)
void __cdecl _mm256_maskstore_ps(float *, __m256i, __m256)
int __cdecl _mm256_testc_si256(__m256i, __m256i)
void * _InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *)
__m256 __cdecl _mm256_fmadd_ps(__m256, __m256, __m256)
void __cdecl _mm_maskstore_epi32(int *, __m128i, __m128i)
__m128 __cdecl _mm_fnmsub_ss(__m128, __m128, __m128)
__m256d __cdecl _mm256_fmaddsub_pd(__m256d, __m256d, __m256d)
void __cdecl _mm256_store_si256(__m256i *, __m256i)
__m128d __cdecl _mm_fnmadd_sd(__m128d, __m128d, __m128d)
__m128i __cdecl _mm_mask_i64gather_epi32(__m128i, int const *, __m128i, __m128i, const int)
__m256i __cdecl _mm256_cvttps_epi32(__m256)
__m128 __cdecl _mm_permute_ps(__m128, int)
__m256i __cdecl _mm256_srlv_epi64(__m256i, __m256i)
__m256i __cdecl _mm256_cvtepu32_epi64(__m128i)
__m256 __cdecl _mm256_shuffle_ps(__m256, __m256, const int)
__m256i __cdecl _mm256_mullo_epi16(__m256i, __m256i)
__m256i __cdecl _mm256_shufflehi_epi16(__m256i, const int)
unsigned char _interlockedbittestandset_HLERelease(long *a, long b)
void __cdecl _mm256_stream_pd(double *, __m256d)
__m256d __cdecl _mm256_castpd128_pd256(__m128d)
long _InterlockedExchange_HLERelease(long volatile *, long)
__m128 __cdecl _mm256_extractf128_ps(__m256, const int)
__m128d __cdecl _mm256_extractf128_pd(__m256d, const int)
__m256i __cdecl _mm256_subs_epu16(__m256i, __m256i)
__m128 __cdecl _mm_mask_i32gather_ps(__m128, float const *, __m128i, __m128, const int)
int __cdecl _rdrand32_step(unsigned int *)
__m128d __cdecl _mm_permute_pd(__m128d, int)
__m256 __cdecl _mm256_unpacklo_ps(__m256, __m256)