30 #if !defined(_M_IX86) && !defined(_M_X64)
31 #error This header is specific to X86 and X64 targets
38 #if defined (_M_CEE_PURE)
39 #error ERROR: XMM intrinsics not supported in the pure mode!
45 #ifndef _MMINTRIN_H_INCLUDED
49 #ifdef _MM2_FUNCTIONALITY
51 #ifndef _MM_FUNCTIONALITY
52 #define _MM_FUNCTIONALITY
57 #ifdef _MM_FUNCTIONALITY
67 unsigned __int64 m128_u64[2];
72 unsigned __int8 m128_u8[16];
73 unsigned __int16 m128_u16[8];
74 unsigned __int32 m128_u32[4];
77 #if !defined _VCRT_BUILD && !defined _INC_MALLOC
93 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
94 ((fp1) << 2) | ((fp0)))
107 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
108 __m128 _Tmp3, _Tmp2, _Tmp1, _Tmp0; \
110 _Tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
111 _Tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
112 _Tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
113 _Tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
115 (row0) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0x88); \
116 (row1) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0xDD); \
117 (row2) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0x88); \
118 (row3) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0xDD); \
123 #define _MM_HINT_NTA 0
124 #define _MM_HINT_T0 1
125 #define _MM_HINT_T1 2
126 #define _MM_HINT_T2 3
127 #define _MM_HINT_ENTA 4
134 #define _MM_ALIGN16 _VCRT_ALIGN(16)
137 #define _MM_EXCEPT_MASK 0x003f
138 #define _MM_EXCEPT_INVALID 0x0001
139 #define _MM_EXCEPT_DENORM 0x0002
140 #define _MM_EXCEPT_DIV_ZERO 0x0004
141 #define _MM_EXCEPT_OVERFLOW 0x0008
142 #define _MM_EXCEPT_UNDERFLOW 0x0010
143 #define _MM_EXCEPT_INEXACT 0x0020
145 #define _MM_MASK_MASK 0x1f80
146 #define _MM_MASK_INVALID 0x0080
147 #define _MM_MASK_DENORM 0x0100
148 #define _MM_MASK_DIV_ZERO 0x0200
149 #define _MM_MASK_OVERFLOW 0x0400
150 #define _MM_MASK_UNDERFLOW 0x0800
151 #define _MM_MASK_INEXACT 0x1000
153 #define _MM_ROUND_MASK 0x6000
154 #define _MM_ROUND_NEAREST 0x0000
155 #define _MM_ROUND_DOWN 0x2000
156 #define _MM_ROUND_UP 0x4000
157 #define _MM_ROUND_TOWARD_ZERO 0x6000
159 #define _MM_FLUSH_ZERO_MASK 0x8000
160 #define _MM_FLUSH_ZERO_ON 0x8000
161 #define _MM_FLUSH_ZERO_OFF 0x0000
163 #define _MM_SET_EXCEPTION_STATE(mask) \
164 _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask))
165 #define _MM_GET_EXCEPTION_STATE() \
166 (_mm_getcsr() & _MM_EXCEPT_MASK)
168 #define _MM_SET_EXCEPTION_MASK(mask) \
169 _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask))
170 #define _MM_GET_EXCEPTION_MASK() \
171 (_mm_getcsr() & _MM_MASK_MASK)
173 #define _MM_SET_ROUNDING_MODE(mode) \
174 _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode))
175 #define _MM_GET_ROUNDING_MODE() \
176 (_mm_getcsr() & _MM_ROUND_MASK)
178 #define _MM_SET_FLUSH_ZERO_MODE(mode) \
179 _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode))
180 #define _MM_GET_FLUSH_ZERO_MODE() \
181 (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
187 #if defined __cplusplus
287 extern __int64 _mm_cvtss_si64(
__m128 _A);
288 extern __int64 _mm_cvttss_si64(
__m128 _A);
312 extern int _m_pextrw(
__m64,
int);
318 extern int _m_pmovmskb(
__m64);
321 extern void _m_maskmovq(
__m64,
__m64,
char *);
358 extern void* __cdecl _mm_malloc(
size_t _Siz,
size_t _Al);
359 extern void __cdecl _mm_free(
void *_P);
364 #define _mm_cvtps_pi32 _mm_cvt_ps2pi
365 #define _mm_cvttps_pi32 _mm_cvtt_ps2pi
366 #define _mm_cvtpi32_ps _mm_cvt_pi2ps
367 #define _mm_extract_pi16 _m_pextrw
368 #define _mm_insert_pi16 _m_pinsrw
369 #define _mm_max_pi16 _m_pmaxsw
370 #define _mm_max_pu8 _m_pmaxub
371 #define _mm_min_pi16 _m_pminsw
372 #define _mm_min_pu8 _m_pminub
373 #define _mm_movemask_pi8 _m_pmovmskb
374 #define _mm_mulhi_pu16 _m_pmulhuw
375 #define _mm_shuffle_pi16 _m_pshufw
376 #define _mm_maskmove_si64 _m_maskmovq
377 #define _mm_avg_pu8 _m_pavgb
378 #define _mm_avg_pu16 _m_pavgw
379 #define _mm_sad_pu8 _m_psadbw
381 #define _mm_cvtss_si32 _mm_cvt_ss2si
382 #define _mm_cvttss_si32 _mm_cvtt_ss2si
383 #define _mm_cvtsi32_ss _mm_cvt_si2ss
384 #define _mm_set1_ps _mm_set_ps1
385 #define _mm_load1_ps _mm_load_ps1
386 #define _mm_store1_ps _mm_store_ps1
404 __m64 _Ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), _A);
406 _Tmp = _mm_cvtpi32_ps(
_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
408 _mm_unpacklo_pi16(_A, _Ext_val)));
423 __m64 _Ext_val = _mm_setzero_si64();
425 _Tmp = _mm_cvtpi32_ps(
_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
427 _mm_unpacklo_pi16(_A, _Ext_val)));
441 return _mm_packs_pi32(_mm_cvtps_pi32(_A),
456 __m64 _Ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), _A);
458 return _mm_cvtpi16_ps(_mm_unpacklo_pi8(_A, _Ext_val));
473 return _mm_cvtpu16_ps(_mm_unpacklo_pi8(_A, _mm_setzero_si64()));
487 return _mm_packs_pi16(_mm_cvtps_pi16(_A), _mm_setzero_si64());
508 #if defined __cplusplus
union __declspec(intrin_type) __declspec(align(16)) __m128
Definition: xmmintrin.h:65
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void _mm_store_ps1(float *_V, __m128 _A)
__m128 _mm_setr_ps(float _A, float _B, float _C, float _D)
int _mm_ucomilt_ss(__m128 _A, __m128 _B)
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ps(__m128 _A, __m128 _B)
int _mm_comigt_ss(__m128 _A, __m128 _B)
void _mm_storeu_ps(float *_V, __m128 _A)
int _mm_ucomigt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ps(__m128 _A, __m128 _B)
__m128 _mm_sqrt_ss(__m128 _A)
void * align(size_t _Bound, size_t _Size, void *&_Ptr, size_t &_Space) _NOEXCEPT
Definition: memory:1985
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
void _mm_storeh_pi(__m64 *, __m128)
int _mm_comile_ss(__m128 _A, __m128 _B)
int _mm_comige_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ss(__m128 _A, __m128 _B)
__m128 _mm_loadu_ps(float const *_A)
int _mm_ucomineq_ss(__m128 _A, __m128 _B)
int _mm_comineq_ss(__m128 _A, __m128 _B)
__m128 _mm_cmple_ps(__m128 _A, __m128 _B)
__m128 _mm_add_ps(__m128 _A, __m128 _B)
void _mm_store_ss(float *_V, __m128 _A)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
__m128 _mm_set_ss(float _A)
__m128 _mm_cmplt_ss(__m128 _A, __m128 _B)
int _mm_ucomile_ss(__m128 _A, __m128 _B)
__m128 _mm_rcp_ps(__m128 _A)
__m128 _mm_cmpeq_ps(__m128 _A, __m128 _B)
void _mm_setcsr(unsigned int)
__m128 _mm_cmpneq_ps(__m128 _A, __m128 _B)
__m64
Definition: mmintrin.h:45
void _mm_stream_ps(float *, __m128)
__m128 _mm_andnot_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B)
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
int _mm_comieq_ss(__m128 _A, __m128 _B)
__m128
Definition: xmmintrin.h:75
__m128 _mm_load_ps1(float const *_A)
int _mm_cvt_ss2si(__m128 _A)
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
int _mm_ucomige_ss(__m128 _A, __m128 _B)
__m128 _mm_set_ps1(float _A)
__m128 _mm_cmpneq_ss(__m128 _A, __m128 _B)
__m128 _mm_cvt_si2ss(__m128, int)
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ps(__m128 _A, __m128 _B)
int _mm_ucomieq_ss(__m128 _A, __m128 _B)
int _mm_comilt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpnle_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ps(__m128 _A, __m128 _B)
void _mm_store_ps(float *_V, __m128 _A)
int _mm_movemask_ps(__m128 _A)
__m128 _mm_setzero_ps(void)
__m128 _mm_rcp_ss(__m128 _A)
__m128 _mm_cmplt_ps(__m128 _A, __m128 _B)
__m128 _mm_move_ss(__m128 _A, __m128 _B)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
__m128 _mm_loadr_ps(float const *_A)
__m128 _mm_cmpnle_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpeq_ss(__m128 _A, __m128 _B)
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void _mm_storel_pi(__m64 *, __m128)
__m128 _mm_rsqrt_ps(__m128 _A)
__m128 _mm_max_ss(__m128 _A, __m128 _B)
__m128 _mm_or_ps(__m128 _A, __m128 _B)
__m128 _mm_cmple_ss(__m128 _A, __m128 _B)
void _mm_prefetch(char const *_A, int _Sel)
__m128 _mm_loadh_pi(__m128, __m64 const *)
void _mm_storer_ps(float *_V, __m128 _A)
__m128 _mm_cmpunord_ps(__m128 _A, __m128 _B)
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
__m128 _mm_loadl_pi(__m128, __m64 const *)
__m128 _mm_load_ps(float const *_A)
float _mm_cvtss_f32(__m128 _A)
__m128 _mm_cmpunord_ss(__m128 _A, __m128 _B)
int _mm_cvtt_ss2si(__m128 _A)
__m128 _mm_load_ss(float const *_A)
__m128 _mm_cmpnge_ss(__m128 _A, __m128 _B)
__m128 _mm_movelh_ps(__m128, __m128)
unsigned int _mm_getcsr(void)
__m128 _mm_cmpge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpge_ss(__m128 _A, __m128 _B)