30 #if !defined(_M_IX86) && !defined(_M_X64) 
   31 #error This header is specific to X86 and X64 targets 
   38 #if defined (_M_CEE_PURE) 
   39         #error ERROR: XMM intrinsics not supported in the pure mode! 
   45 #ifndef _MMINTRIN_H_INCLUDED 
   49 #ifdef _MM2_FUNCTIONALITY 
   51 #ifndef _MM_FUNCTIONALITY 
   52 #define _MM_FUNCTIONALITY 
   57 #ifdef _MM_FUNCTIONALITY 
   67      unsigned __int64    m128_u64[2];
 
   72      unsigned __int8     m128_u8[16];
 
   73      unsigned __int16    m128_u16[8];
 
   74      unsigned __int32    m128_u32[4];
 
   77 #if !defined _VCRT_BUILD && !defined _INC_MALLOC 
   93 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ 
   94                                      ((fp1) << 2) | ((fp0))) 
  107 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) {                 \ 
  108             __m128 _Tmp3, _Tmp2, _Tmp1, _Tmp0;                          \ 
  110             _Tmp0   = _mm_shuffle_ps((row0), (row1), 0x44);          \ 
  111             _Tmp2   = _mm_shuffle_ps((row0), (row1), 0xEE);          \ 
  112             _Tmp1   = _mm_shuffle_ps((row2), (row3), 0x44);          \ 
  113             _Tmp3   = _mm_shuffle_ps((row2), (row3), 0xEE);          \ 
  115             (row0) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0x88);              \ 
  116             (row1) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0xDD);              \ 
  117             (row2) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0x88);              \ 
  118             (row3) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0xDD);              \ 
  123 #define _MM_HINT_NTA    0 
  124 #define _MM_HINT_T0     1 
  125 #define _MM_HINT_T1     2 
  126 #define _MM_HINT_T2     3 
  127 #define _MM_HINT_ENTA   4 
  134 #define _MM_ALIGN16 _VCRT_ALIGN(16) 
  137 #define _MM_EXCEPT_MASK       0x003f 
  138 #define _MM_EXCEPT_INVALID    0x0001 
  139 #define _MM_EXCEPT_DENORM     0x0002 
  140 #define _MM_EXCEPT_DIV_ZERO   0x0004 
  141 #define _MM_EXCEPT_OVERFLOW   0x0008 
  142 #define _MM_EXCEPT_UNDERFLOW  0x0010 
  143 #define _MM_EXCEPT_INEXACT    0x0020 
  145 #define _MM_MASK_MASK         0x1f80 
  146 #define _MM_MASK_INVALID      0x0080 
  147 #define _MM_MASK_DENORM       0x0100 
  148 #define _MM_MASK_DIV_ZERO     0x0200 
  149 #define _MM_MASK_OVERFLOW     0x0400 
  150 #define _MM_MASK_UNDERFLOW    0x0800 
  151 #define _MM_MASK_INEXACT      0x1000 
  153 #define _MM_ROUND_MASK        0x6000 
  154 #define _MM_ROUND_NEAREST     0x0000 
  155 #define _MM_ROUND_DOWN        0x2000 
  156 #define _MM_ROUND_UP          0x4000 
  157 #define _MM_ROUND_TOWARD_ZERO 0x6000 
  159 #define _MM_FLUSH_ZERO_MASK   0x8000 
  160 #define _MM_FLUSH_ZERO_ON     0x8000 
  161 #define _MM_FLUSH_ZERO_OFF    0x0000 
  163 #define _MM_SET_EXCEPTION_STATE(mask)                               \ 
  164             _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask)) 
  165 #define _MM_GET_EXCEPTION_STATE()                                   \ 
  166             (_mm_getcsr() & _MM_EXCEPT_MASK) 
  168 #define _MM_SET_EXCEPTION_MASK(mask)                                \ 
  169             _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask)) 
  170 #define _MM_GET_EXCEPTION_MASK()                                    \ 
  171             (_mm_getcsr() & _MM_MASK_MASK) 
  173 #define _MM_SET_ROUNDING_MODE(mode)                                 \ 
  174             _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode)) 
  175 #define _MM_GET_ROUNDING_MODE()                                     \ 
  176             (_mm_getcsr() & _MM_ROUND_MASK) 
  178 #define _MM_SET_FLUSH_ZERO_MODE(mode)                               \ 
  179             _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode)) 
  180 #define _MM_GET_FLUSH_ZERO_MODE()                                   \ 
  181             (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) 
  187 #if defined __cplusplus 
  287 extern __int64 _mm_cvtss_si64(
__m128 _A);
 
  288 extern __int64 _mm_cvttss_si64(
__m128 _A);
 
  312 extern int _m_pextrw(
__m64, 
int);
 
  318 extern int _m_pmovmskb(
__m64);
 
  321 extern void _m_maskmovq(
__m64, 
__m64, 
char *);
 
  358 extern void* __cdecl _mm_malloc(
size_t _Siz, 
size_t _Al);
 
  359 extern void __cdecl _mm_free(
void *_P);
 
  364 #define _mm_cvtps_pi32    _mm_cvt_ps2pi 
  365 #define _mm_cvttps_pi32   _mm_cvtt_ps2pi 
  366 #define _mm_cvtpi32_ps    _mm_cvt_pi2ps 
  367 #define _mm_extract_pi16  _m_pextrw 
  368 #define _mm_insert_pi16   _m_pinsrw 
  369 #define _mm_max_pi16      _m_pmaxsw 
  370 #define _mm_max_pu8       _m_pmaxub 
  371 #define _mm_min_pi16      _m_pminsw 
  372 #define _mm_min_pu8       _m_pminub 
  373 #define _mm_movemask_pi8  _m_pmovmskb 
  374 #define _mm_mulhi_pu16    _m_pmulhuw 
  375 #define _mm_shuffle_pi16  _m_pshufw 
  376 #define _mm_maskmove_si64 _m_maskmovq 
  377 #define _mm_avg_pu8       _m_pavgb 
  378 #define _mm_avg_pu16      _m_pavgw 
  379 #define _mm_sad_pu8       _m_psadbw 
  381 #define _mm_cvtss_si32    _mm_cvt_ss2si 
  382 #define _mm_cvttss_si32   _mm_cvtt_ss2si 
  383 #define _mm_cvtsi32_ss    _mm_cvt_si2ss 
  384 #define _mm_set1_ps       _mm_set_ps1 
  385 #define _mm_load1_ps      _mm_load_ps1 
  386 #define _mm_store1_ps     _mm_store_ps1 
  404   __m64  _Ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), _A);
 
  406   _Tmp = _mm_cvtpi32_ps(
_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
 
  408                         _mm_unpacklo_pi16(_A, _Ext_val)));
 
  423   __m64  _Ext_val = _mm_setzero_si64();
 
  425   _Tmp = _mm_cvtpi32_ps(
_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
 
  427                         _mm_unpacklo_pi16(_A, _Ext_val)));
 
  441   return _mm_packs_pi32(_mm_cvtps_pi32(_A),
 
  456   __m64  _Ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), _A);
 
  458   return _mm_cvtpi16_ps(_mm_unpacklo_pi8(_A, _Ext_val));
 
  473   return _mm_cvtpu16_ps(_mm_unpacklo_pi8(_A, _mm_setzero_si64()));
 
  487   return _mm_packs_pi16(_mm_cvtps_pi16(_A), _mm_setzero_si64());
 
  508 #if defined __cplusplus 
union __declspec(intrin_type) __declspec(align(16)) __m128
Definition: xmmintrin.h:65
 
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
 
__m128 _mm_and_ps(__m128 _A, __m128 _B)
 
void _mm_store_ps1(float *_V, __m128 _A)
 
__m128 _mm_setr_ps(float _A, float _B, float _C, float _D)
 
int _mm_ucomilt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_movehl_ps(__m128, __m128)
 
__m128 _mm_rsqrt_ss(__m128 _A)
 
__m128 _mm_sqrt_ps(__m128 _A)
 
__m128 _mm_max_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpgt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpngt_ps(__m128 _A, __m128 _B)
 
int _mm_comigt_ss(__m128 _A, __m128 _B)
 
void _mm_storeu_ps(float *_V, __m128 _A)
 
int _mm_ucomigt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpord_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpord_ps(__m128 _A, __m128 _B)
 
__m128 _mm_sqrt_ss(__m128 _A)
 
void * align(size_t _Bound, size_t _Size, void *&_Ptr, size_t &_Space) _NOEXCEPT
Definition: memory:1985
 
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
 
void _mm_storeh_pi(__m64 *, __m128)
 
int _mm_comile_ss(__m128 _A, __m128 _B)
 
int _mm_comige_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpngt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_loadu_ps(float const *_A)
 
int _mm_ucomineq_ss(__m128 _A, __m128 _B)
 
int _mm_comineq_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmple_ps(__m128 _A, __m128 _B)
 
__m128 _mm_add_ps(__m128 _A, __m128 _B)
 
void _mm_store_ss(float *_V, __m128 _A)
 
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
 
__m128 _mm_div_ps(__m128 _A, __m128 _B)
 
__m128 _mm_set_ss(float _A)
 
__m128 _mm_cmplt_ss(__m128 _A, __m128 _B)
 
int _mm_ucomile_ss(__m128 _A, __m128 _B)
 
__m128 _mm_rcp_ps(__m128 _A)
 
__m128 _mm_cmpeq_ps(__m128 _A, __m128 _B)
 
void _mm_setcsr(unsigned int)
 
__m128 _mm_cmpneq_ps(__m128 _A, __m128 _B)
 
__m64
Definition: mmintrin.h:45
 
void _mm_stream_ps(float *, __m128)
 
__m128 _mm_andnot_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
 
int _mm_comieq_ss(__m128 _A, __m128 _B)
 
__m128
Definition: xmmintrin.h:75
 
__m128 _mm_load_ps1(float const *_A)
 
int _mm_cvt_ss2si(__m128 _A)
 
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
 
int _mm_ucomige_ss(__m128 _A, __m128 _B)
 
__m128 _mm_set_ps1(float _A)
 
__m128 _mm_cmpneq_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cvt_si2ss(__m128, int)
 
__m128 _mm_add_ss(__m128 _A, __m128 _B)
 
__m128 _mm_min_ss(__m128 _A, __m128 _B)
 
__m128 _mm_min_ps(__m128 _A, __m128 _B)
 
int _mm_ucomieq_ss(__m128 _A, __m128 _B)
 
int _mm_comilt_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpnle_ss(__m128 _A, __m128 _B)
 
__m128 _mm_cmpgt_ps(__m128 _A, __m128 _B)
 
void _mm_store_ps(float *_V, __m128 _A)
 
int _mm_movemask_ps(__m128 _A)
 
__m128 _mm_setzero_ps(void)
 
__m128 _mm_rcp_ss(__m128 _A)
 
__m128 _mm_cmplt_ps(__m128 _A, __m128 _B)
 
__m128 _mm_move_ss(__m128 _A, __m128 _B)
 
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
 
__m128 _mm_loadr_ps(float const *_A)
 
__m128 _mm_cmpnle_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpnge_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpeq_ss(__m128 _A, __m128 _B)
 
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
 
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
 
__m128 _mm_div_ss(__m128 _A, __m128 _B)
 
void _mm_storel_pi(__m64 *, __m128)
 
__m128 _mm_rsqrt_ps(__m128 _A)
 
__m128 _mm_max_ss(__m128 _A, __m128 _B)
 
__m128 _mm_or_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmple_ss(__m128 _A, __m128 _B)
 
void _mm_prefetch(char const *_A, int _Sel)
 
__m128 _mm_loadh_pi(__m128, __m64 const *)
 
void _mm_storer_ps(float *_V, __m128 _A)
 
__m128 _mm_cmpunord_ps(__m128 _A, __m128 _B)
 
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
 
__m128 _mm_loadl_pi(__m128, __m64 const *)
 
__m128 _mm_load_ps(float const *_A)
 
float _mm_cvtss_f32(__m128 _A)
 
__m128 _mm_cmpunord_ss(__m128 _A, __m128 _B)
 
int _mm_cvtt_ss2si(__m128 _A)
 
__m128 _mm_load_ss(float const *_A)
 
__m128 _mm_cmpnge_ss(__m128 _A, __m128 _B)
 
__m128 _mm_movelh_ps(__m128, __m128)
 
unsigned int _mm_getcsr(void)
 
__m128 _mm_cmpge_ps(__m128 _A, __m128 _B)
 
__m128 _mm_cmpge_ss(__m128 _A, __m128 _B)