33 #if defined (_M_CEE_PURE)
34 #error ERROR: XMM intrinsics not supported in the pure mode!
40 #ifndef _MMINTRIN_H_INCLUDED
44 #ifdef _MM2_FUNCTIONALITY
46 #ifndef _MM_FUNCTIONALITY
47 #define _MM_FUNCTIONALITY
52 #ifdef _MM_FUNCTIONALITY
62 unsigned __int64 m128_u64[2];
67 unsigned __int8 m128_u8[16];
68 unsigned __int16 m128_u16[8];
69 unsigned __int32 m128_u32[4];
88 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
89 ((fp1) << 2) | ((fp0)))
102 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
103 __m128 tmp3, tmp2, tmp1, tmp0; \
105 tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
106 tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
107 tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
108 tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
110 (row0) = _mm_shuffle_ps(tmp0, tmp1, 0x88); \
111 (row1) = _mm_shuffle_ps(tmp0, tmp1, 0xDD); \
112 (row2) = _mm_shuffle_ps(tmp2, tmp3, 0x88); \
113 (row3) = _mm_shuffle_ps(tmp2, tmp3, 0xDD); \
118 #define _MM_HINT_T0 1
119 #define _MM_HINT_T1 2
120 #define _MM_HINT_T2 3
121 #define _MM_HINT_NTA 0
124 #define _MM_ALIGN16 _CRT_ALIGN(16)
127 #define _MM_EXCEPT_MASK 0x003f
128 #define _MM_EXCEPT_INVALID 0x0001
129 #define _MM_EXCEPT_DENORM 0x0002
130 #define _MM_EXCEPT_DIV_ZERO 0x0004
131 #define _MM_EXCEPT_OVERFLOW 0x0008
132 #define _MM_EXCEPT_UNDERFLOW 0x0010
133 #define _MM_EXCEPT_INEXACT 0x0020
135 #define _MM_MASK_MASK 0x1f80
136 #define _MM_MASK_INVALID 0x0080
137 #define _MM_MASK_DENORM 0x0100
138 #define _MM_MASK_DIV_ZERO 0x0200
139 #define _MM_MASK_OVERFLOW 0x0400
140 #define _MM_MASK_UNDERFLOW 0x0800
141 #define _MM_MASK_INEXACT 0x1000
143 #define _MM_ROUND_MASK 0x6000
144 #define _MM_ROUND_NEAREST 0x0000
145 #define _MM_ROUND_DOWN 0x2000
146 #define _MM_ROUND_UP 0x4000
147 #define _MM_ROUND_TOWARD_ZERO 0x6000
149 #define _MM_FLUSH_ZERO_MASK 0x8000
150 #define _MM_FLUSH_ZERO_ON 0x8000
151 #define _MM_FLUSH_ZERO_OFF 0x0000
153 #define _MM_SET_EXCEPTION_STATE(mask) \
154 _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask))
155 #define _MM_GET_EXCEPTION_STATE() \
156 (_mm_getcsr() & _MM_EXCEPT_MASK)
158 #define _MM_SET_EXCEPTION_MASK(mask) \
159 _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask))
160 #define _MM_GET_EXCEPTION_MASK() \
161 (_mm_getcsr() & _MM_MASK_MASK)
163 #define _MM_SET_ROUNDING_MODE(mode) \
164 _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode))
165 #define _MM_GET_ROUNDING_MODE() \
166 (_mm_getcsr() & _MM_ROUND_MASK)
168 #define _MM_SET_FLUSH_ZERO_MODE(mode) \
169 _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode))
170 #define _MM_GET_FLUSH_ZERO_MODE() \
171 (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
177 #if defined __cplusplus
271 extern __int64 _mm_cvtss_si64(
__m128 _A);
272 extern __int64 _mm_cvttss_si64(
__m128 _A);
338 extern void* __cdecl
_mm_malloc(
size_t _Siz,
size_t _Al);
339 extern void __cdecl
_mm_free(
void *_P);
343 #define _mm_cvtss_si32 _mm_cvt_ss2si
344 #define _mm_cvtps_pi32 _mm_cvt_ps2pi
345 #define _mm_cvttss_si32 _mm_cvtt_ss2si
346 #define _mm_cvttps_pi32 _mm_cvtt_ps2pi
347 #define _mm_cvtsi32_ss _mm_cvt_si2ss
348 #define _mm_cvtpi32_ps _mm_cvt_pi2ps
349 #define _mm_extract_pi16 _m_pextrw
350 #define _mm_insert_pi16 _m_pinsrw
351 #define _mm_max_pi16 _m_pmaxsw
352 #define _mm_max_pu8 _m_pmaxub
353 #define _mm_min_pi16 _m_pminsw
354 #define _mm_min_pu8 _m_pminub
355 #define _mm_movemask_pi8 _m_pmovmskb
356 #define _mm_mulhi_pu16 _m_pmulhuw
357 #define _mm_shuffle_pi16 _m_pshufw
358 #define _mm_maskmove_si64 _m_maskmovq
359 #define _mm_avg_pu8 _m_pavgb
360 #define _mm_avg_pu16 _m_pavgw
361 #define _mm_sad_pu8 _m_psadbw
362 #define _mm_set1_ps _mm_set_ps1
363 #define _mm_load1_ps _mm_load_ps1
364 #define _mm_store1_ps _mm_store_ps1
484 #if defined __cplusplus
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
__m64 _m_psadbw(__m64, __m64)
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void _mm_store_ps1(float *_V, __m128 _A)
__m128 _mm_setr_ps(float _A, float _B, float _C, float _D)
int _mm_ucomilt_ss(__m128 _A, __m128 _B)
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ps(__m128 _A, __m128 _B)
__m64 _mm_cvt_ps2pi(__m128 _A)
__inline __m64 _mm_cvtps_pi16(__m128 a)
Definition: xmmintrin.h:416
#define _mm_cmpgt_pi8
Definition: mmintrin.h:176
void _mm_stream_pi(__m64 *, __m64)
int _mm_comigt_ss(__m128 _A, __m128 _B)
void _mm_storeu_ps(float *_V, __m128 _A)
int _mm_ucomigt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ss(__m128 _A, __m128 _B)
__inline __m128 _mm_cvtpi8_ps(__m64 a)
Definition: xmmintrin.h:431
__m64 _m_pinsrw(__m64, int, int)
__m128 _mm_cmpord_ps(__m128 _A, __m128 _B)
__m64 _mm_cvtt_ps2pi(__m128 _A)
__inline __m128 _mm_cvtpu8_ps(__m64 a)
Definition: xmmintrin.h:448
__m128 _mm_sqrt_ss(__m128 _A)
__m64 _m_pshufw(__m64, int)
#define _mm_free(a)
Definition: malloc.h:66
__m128 _mm_cvt_pi2ps(__m128, __m64)
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
void _mm_storeh_pi(__m64 *, __m128)
#define _mm_cvtpi32_ps
Definition: xmmintrin.h:348
int _mm_comile_ss(__m128 _A, __m128 _B)
__inline __m64 _mm_cvtps_pi8(__m128 a)
Definition: xmmintrin.h:462
int _mm_comige_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ss(__m128 _A, __m128 _B)
void _m_maskmovq(__m64, __m64, char *)
__m128 _mm_loadu_ps(float const *_A)
#define _mm_malloc(a, b)
Definition: malloc.h:67
int _mm_ucomineq_ss(__m128 _A, __m128 _B)
int _mm_comineq_ss(__m128 _A, __m128 _B)
__m128 _mm_cmple_ps(__m128 _A, __m128 _B)
__m128 _mm_add_ps(__m128 _A, __m128 _B)
#define _mm_unpackhi_pi16
Definition: mmintrin.h:131
#define _mm_packs_pi16
Definition: mmintrin.h:127
void _mm_store_ss(float *_V, __m128 _A)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
__m128 _mm_set_ss(float _A)
__m128 _mm_cmplt_ss(__m128 _A, __m128 _B)
int _mm_ucomile_ss(__m128 _A, __m128 _B)
__m128 _mm_rcp_ps(__m128 _A)
__m128 _mm_cmpeq_ps(__m128 _A, __m128 _B)
void _mm_setcsr(unsigned int)
__m128 _mm_cmpneq_ps(__m128 _A, __m128 _B)
__m64
Definition: mmintrin.h:42
#define _mm_cmpgt_pi16
Definition: mmintrin.h:177
void _mm_stream_ps(float *, __m128)
__m128 _mm_andnot_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B)
_In_ wchar_t _C
Definition: wchar.h:1295
__m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B)
#define _CRT_ALIGN(x)
Definition: crtdefs.h:604
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
int _mm_comieq_ss(__m128 _A, __m128 _B)
#define _mm_unpacklo_pi16
Definition: mmintrin.h:134
__m64 _m_pmaxub(__m64, __m64)
__m128
Definition: xmmintrin.h:70
__m128 _mm_load_ps1(float const *_A)
__inline __m128 _mm_cvtpi16_ps(__m64 a)
Definition: xmmintrin.h:378
int _mm_cvt_ss2si(__m128 _A)
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
__inline __m128 _mm_cvtpu16_ps(__m64 a)
Definition: xmmintrin.h:397
int _mm_ucomige_ss(__m128 _A, __m128 _B)
__m128 _mm_set_ps1(float _A)
#define _mm_cvtps_pi32
Definition: xmmintrin.h:344
__m128 _mm_cmpneq_ss(__m128 _A, __m128 _B)
__m128 _mm_cvt_si2ss(__m128, int)
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ps(__m128 _A, __m128 _B)
int _mm_ucomieq_ss(__m128 _A, __m128 _B)
int _mm_comilt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpnle_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ps(__m128 _A, __m128 _B)
void _mm_store_ps(float *_V, __m128 _A)
__m64 _m_pmaxsw(__m64, __m64)
int _mm_movemask_ps(__m128 _A)
__m128 _mm_setzero_ps(void)
__m64 _m_pminub(__m64, __m64)
__m64 _mm_setzero_si64(void)
__m64 _m_pavgw(__m64, __m64)
__m128 _mm_rcp_ss(__m128 _A)
__m128 _mm_cmplt_ps(__m128 _A, __m128 _B)
__m64 _m_pmulhuw(__m64, __m64)
__m128 _mm_move_ss(__m128 _A, __m128 _B)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
__m128 _mm_loadr_ps(float const *_A)
__m128 _mm_cmpnle_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpeq_ss(__m128 _A, __m128 _B)
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__inline __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
Definition: xmmintrin.h:477
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void _mm_storel_pi(__m64 *, __m128)
__m128 _mm_rsqrt_ps(__m128 _A)
__m128 _mm_max_ss(__m128 _A, __m128 _B)
__m128 _mm_or_ps(__m128 _A, __m128 _B)
__m128 _mm_cmple_ss(__m128 _A, __m128 _B)
void _mm_prefetch(char const *_A, int _Sel)
__m64 _m_pavgb(__m64, __m64)
__m128 _mm_loadh_pi(__m128, __m64 const *)
void _mm_storer_ps(float *_V, __m128 _A)
__m128 _mm_cmpunord_ps(__m128 _A, __m128 _B)
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
__m128 _mm_loadl_pi(__m128, __m64 const *)
__m128 _mm_load_ps(float const *_A)
float _mm_cvtss_f32(__m128 _A)
__m128 _mm_cmpunord_ss(__m128 _A, __m128 _B)
int _mm_cvtt_ss2si(__m128 _A)
#define _mm_unpacklo_pi8
Definition: mmintrin.h:133
__m128 _mm_load_ss(float const *_A)
__m128 _mm_cmpnge_ss(__m128 _A, __m128 _B)
__m128 _mm_movelh_ps(__m128, __m128)
unsigned int _mm_getcsr(void)
__m128 _mm_cmpge_ps(__m128 _A, __m128 _B)
#define _mm_packs_pi32
Definition: mmintrin.h:128
__m64 _m_pminsw(__m64, __m64)
int _m_pextrw(__m64, int)
__m128 _mm_cmpge_ss(__m128 _A, __m128 _B)