16 #ifndef _IVEC_H_INCLUDED 
   17 #define _IVEC_H_INCLUDED 
   20 #if !defined __cplusplus 
   21     #error ERROR: This file is only supported in C++ compilations! 
   24 #if defined (_M_CEE_PURE) 
   25     #error ERROR: This file is not supported in the pure mode! 
   34         #define _VEC_ASSERT(_Expression) ((void)0) 
   45         #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) ) 
   54 #if defined (_SILENCE_IVEC_C4799) 
   56     #pragma warning(disable: 4799) 
   62 #if defined (_ENABLE_VEC_DEBUG) 
   77 #define _MM_8UB(element,vector) (*((unsigned char*)&##vector + ##element)) 
   78 #define _MM_8B(element,vector) (*((signed char*)&##vector + ##element)) 
   80 #define _MM_4UW(element,vector) (*((unsigned short*)&##vector + ##element)) 
   81 #define _MM_4W(element,vector) (*((short*)&##vector + ##element)) 
   83 #define _MM_2UDW(element,vector) (*((unsigned int*)&##vector + ##element)) 
   84 #define _MM_2DW(element,vector) (*((int*)&##vector + ##element)) 
   86 #define _MM_QW (*((__int64*)&vec)) 
  100     M64(
__m64 _Mm)                           { vec = _Mm; }
 
  101     M64(__int64 _Mm)                         { vec = _mm_set_pi32((
int)(_Mm >> 32), (
int)_Mm); }
 
  102     M64(
int _I)                              { vec = _m_from_int(_I); }
 
  104     operator __m64()
 const                  { 
return vec; }
 
  107     M64& 
operator&=(
const M64 &_A)                   { 
return *
this = (M64) _m_pand(vec,_A); }
 
  108     M64& 
operator|=(
const M64 &_A)                   { 
return *
this = (M64) _m_por(vec,_A); }
 
  109     M64& 
operator^=(
const M64 &_A)                   { 
return *
this = (M64) _m_pxor(vec,_A); }
 
  113 const  union {__int64 m1; 
__m64 m2;} __mmx_all_ones_cheat =
 
  116 #define _mmx_all_ones ((M64)__mmx_all_ones_cheat.m2) 
  118 inline M64 
operator&(
const M64 &_A, 
const M64 &_B)    { 
return _m_pand(_A,_B); }
 
  119 inline M64 
operator|(
const M64 &_A, 
const M64 &_B)    { 
return _m_por(_A,_B); }
 
  120 inline M64 
operator^(
const M64 &_A, 
const M64 &_B)    { 
return _m_pxor(_A,_B); }
 
  121 inline M64 
andnot(
const M64 &_A, 
const M64 &_B)       { 
return _m_pandn(_A,_B); }
 
  128 class I64vec1 : 
public M64
 
  132     I64vec1(
__m64 _Mm) : M64(_Mm)             { }
 
  133     explicit I64vec1(
int _I) : M64(_I)        { }
 
  134     explicit I64vec1(__int64 _Mm) : M64(_Mm)  { }
 
  136     I64vec1& 
operator= (
const M64 &_A) { 
return *
this = (I64vec1) _A; }
 
  137     I64vec1& 
operator&=(
const M64 &_A) { 
return *
this = (I64vec1) _m_pand(vec,_A); }
 
  138     I64vec1& 
operator|=(
const M64 &_A) { 
return *
this = (I64vec1) _m_por(vec,_A); }
 
  139     I64vec1& 
operator^=(
const M64 &_A) { 
return *
this = (I64vec1) _m_pxor(vec,_A); }
 
  142     I64vec1 
operator<<(
const M64 &_A)                { 
return _m_psllq(vec,_A); }
 
  144     I64vec1& 
operator<<=(
const M64 &_A)              { 
return *
this = (I64vec1) _m_psllq(vec,_A); }
 
  145     I64vec1& 
operator<<=(
int _Count)                 { 
return *
this = (I64vec1) _m_psllqi(vec, _Count); }
 
  146     I64vec1 
operator>>(
const M64 &_A)                { 
return _m_psrlq(vec,_A); }
 
  147     I64vec1 
operator>>(
int _Count)                   { 
return _m_psrlqi(vec, _Count); }
 
  148     I64vec1& 
operator>>=(
const M64 &_A)              { 
return *
this = (I64vec1) _m_psrlq(vec,_A); }
 
  149     I64vec1& 
operator>>=(
int _Count)                 { 
return *
this = (I64vec1) _m_psrlqi(vec, _Count); }
 
  155 class I32vec2 : 
public M64
 
  159     I32vec2(
__m64 _Mm) : M64(_Mm) { }
 
  160     I32vec2(
int _I0, 
int _I1) { vec = _mm_set_pi32(_I0, _I1); }
 
  161     explicit I32vec2(
int _I) : M64 (_I) { }
 
  162     explicit I32vec2(__int64 _I): M64(_I) {}
 
  165     I32vec2& 
operator= (
const M64 &_A) { 
return *
this = (I32vec2)_A; }
 
  168     I32vec2& 
operator&=(
const M64 &_A) { 
return *
this = (I32vec2) _m_pand(vec,_A); }
 
  169     I32vec2& 
operator|=(
const M64 &_A) { 
return *
this = (I32vec2) _m_por(vec,_A); }
 
  170     I32vec2& 
operator^=(
const M64 &_A) { 
return *
this = (I32vec2) _m_pxor(vec,_A); }
 
  173     I32vec2& 
operator +=(
const I32vec2 &_A)          { 
return *
this = (I32vec2) _m_paddd(vec,_A); }
 
  174     I32vec2& 
operator -=(
const I32vec2 &_A)          { 
return *
this = (I32vec2) _m_psubd(vec,_A); }
 
  177     I32vec2 
operator<<(
const I32vec2 &_A)            { 
return _m_pslld(vec,_A); }
 
  178     I32vec2 
operator<<(
int _Count)                   { 
return _m_pslldi(vec,_Count); }
 
  179     I32vec2& 
operator<<=(
const I32vec2 &_A)          { 
return *
this = (I32vec2) _m_pslld(vec,_A); }
 
  180     I32vec2& 
operator<<=(
int _Count)                 { 
return *
this = (I32vec2) _m_pslldi(vec,_Count); }
 
  185 inline I32vec2 
cmpeq(
const I32vec2 &_A, 
const I32vec2 &_B)        { 
return _m_pcmpeqd(_A,_B); }
 
  186 inline I32vec2 
cmpneq(
const I32vec2 &_A, 
const I32vec2 &_B)       { 
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
 
  188 inline I32vec2 
unpack_low(
const I32vec2 &_A, 
const I32vec2 &_B)   {
return _m_punpckldq(_A,_B); }
 
  189 inline I32vec2 
unpack_high(
const I32vec2 &_A, 
const I32vec2 &_B)  {
return _m_punpckhdq(_A,_B); }
 
  194 class Is32vec2 : 
public I32vec2
 
  198     Is32vec2(
__m64 _Mm) : I32vec2(_Mm) { }
 
  199     Is32vec2(
signed int _I0, 
signed int _I1) : I32vec2(_I0, _I1) {}
 
  200     explicit Is32vec2(
int _I) : I32vec2 (_I)      {}
 
  201     explicit Is32vec2(__int64 _I): I32vec2(_I)    {}
 
  204     Is32vec2& 
operator= (
const M64 &_A)      { 
return *
this = (Is32vec2)_A; }
 
  207     Is32vec2& 
operator&=(
const M64 &_A)      { 
return *
this = (Is32vec2) _m_pand(vec,_A); }
 
  208     Is32vec2& 
operator|=(
const M64 &_A)      { 
return *
this = (Is32vec2) _m_por(vec,_A); }
 
  209     Is32vec2& 
operator^=(
const M64 &_A)      { 
return *
this = (Is32vec2) _m_pxor(vec,_A); }
 
  212     Is32vec2& 
operator +=(
const I32vec2 &_A) { 
return *
this = (Is32vec2) _m_paddd(vec,_A); }
 
  213     Is32vec2& 
operator -=(
const I32vec2 &_A) { 
return *
this = (Is32vec2) _m_psubd(vec,_A); }
 
  216     Is32vec2 
operator<<(
const M64 &_A)       { 
return _m_pslld(vec,_A); }
 
  217     Is32vec2 
operator<<(
int _Count)          { 
return _m_pslldi(vec,_Count); }
 
  218     Is32vec2& 
operator<<=(
const M64 &_A)     { 
return *
this = (Is32vec2) _m_pslld(vec,_A); }
 
  219     Is32vec2& 
operator<<=(
int _Count)        { 
return *
this = (Is32vec2) _m_pslldi(vec,_Count); }
 
  221     Is32vec2 
operator>>(
const M64 &_A)       { 
return _m_psrad(vec, _A); }
 
  222     Is32vec2 
operator>>(
int _Count)          { 
return _m_psradi(vec, _Count); }
 
  223     Is32vec2& 
operator>>=(
const M64 &_A)     { 
return *
this = (Is32vec2) _m_psrad(vec, _A); }
 
  224     Is32vec2& 
operator>>=(
int _Count)        { 
return *
this = (Is32vec2) _m_psradi(vec, _Count); }
 
  226 #if defined (_ENABLE_VEC_DEBUG) 
  230         _Os << 
" [1]:" << 
_MM_2DW(1,_A)
 
  237     const int& operator[](
int _I)
const 
  244     int& operator[](
int _I)
 
  252 inline Is32vec2 
cmpeq(
const Is32vec2 &_A, 
const Is32vec2 &_B)         { 
return _m_pcmpeqd(_A,_B); }
 
  253 inline Is32vec2 
cmpneq(
const Is32vec2 &_A, 
const Is32vec2 &_B)        { 
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
 
  254 inline Is32vec2 
cmpgt(
const Is32vec2 &_A, 
const Is32vec2 &_B)         { 
return _m_pcmpgtd(_A,_B); }
 
  255 inline Is32vec2 
cmplt(
const Is32vec2 &_A, 
const Is32vec2 &_B)         { 
return _m_pcmpgtd(_B,_A); }
 
  256 inline Is32vec2 cmple(
const Is32vec2 &_A, 
const Is32vec2 &_B)         { 
return _m_pandn(_m_pcmpgtd(_A,_B), _mmx_all_ones); }
 
  257 inline Is32vec2 cmpge(
const Is32vec2 &_A, 
const Is32vec2 &_B)         { 
return _m_pandn(_m_pcmpgtd(_B,_A), _mmx_all_ones); }
 
  259 inline Is32vec2 
unpack_low(
const Is32vec2 &_A, 
const Is32vec2 &_B)    { 
return _m_punpckldq(_A,_B); }
 
  260 inline Is32vec2 
unpack_high(
const Is32vec2 &_A, 
const Is32vec2 &_B)   { 
return _m_punpckhdq(_A,_B); }
 
  265 class Iu32vec2 : 
public I32vec2
 
  269     Iu32vec2(
__m64 _Mm) : I32vec2(_Mm) { }
 
  270     Iu32vec2(
unsigned int _I0, 
unsigned int _I1) : I32vec2(_I0, _I1) {}
 
  271     explicit Iu32vec2(
int _I) : I32vec2 (_I)      { }
 
  272     explicit Iu32vec2(__int64 _I) : I32vec2 (_I)  { }
 
  275     Iu32vec2& 
operator= (
const M64 &_A)      { 
return *
this = (Iu32vec2) _A; }
 
  278     Iu32vec2& 
operator&=(
const M64 &_A)      { 
return *
this = (Iu32vec2) _m_pand(vec,_A); }
 
  279     Iu32vec2& 
operator|=(
const M64 &_A)      { 
return *
this = (Iu32vec2) _m_por(vec,_A); }
 
  280     Iu32vec2& 
operator^=(
const M64 &_A)      { 
return *
this = (Iu32vec2) _m_pxor(vec,_A); }
 
  283     Iu32vec2& 
operator +=(
const I32vec2 &_A) { 
return *
this = (Iu32vec2) _m_paddd(vec,_A); }
 
  284     Iu32vec2& 
operator -=(
const I32vec2 &_A) { 
return *
this = (Iu32vec2) _m_psubd(vec,_A); }
 
  287     Iu32vec2 
operator<<(
const M64 &_A)       { 
return _m_pslld(vec,_A); }
 
  288     Iu32vec2 
operator<<(
int _Count)          { 
return _m_pslldi(vec,_Count); }
 
  289     Iu32vec2& 
operator<<=(
const M64 &_A)     { 
return *
this = (Iu32vec2) _m_pslld(vec,_A); }
 
  290     Iu32vec2& 
operator<<=(
int _Count)        { 
return *
this = (Iu32vec2) _m_pslldi(vec,_Count); }
 
  291     Iu32vec2 
operator>>(
const M64 &_A)       { 
return _m_psrld(vec,_A); }
 
  292     Iu32vec2 
operator>>(
int _Count)          { 
return _m_psrldi(vec,_Count); }
 
  293     Iu32vec2& 
operator>>=(
const M64 &_A)     { 
return *
this = (Iu32vec2) _m_psrld(vec,_A); }
 
  294     Iu32vec2& 
operator>>=(
int _Count)        { 
return *
this = (Iu32vec2) _m_psrldi(vec,_Count); }
 
  296 #if defined (_ENABLE_VEC_DEBUG) 
  307     const unsigned int& operator[](
int _I)
const 
  314     unsigned int& operator[](
int _I)
 
  322 inline Iu32vec2 
cmpeq(
const Iu32vec2 &_A, 
const Iu32vec2 &_B)         { 
return _m_pcmpeqd(_A,_B); }
 
  323 inline Iu32vec2 
cmpneq(
const Iu32vec2 &_A, 
const Iu32vec2 &_B)        { 
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
 
  325 inline Iu32vec2 
unpack_low(
const Iu32vec2 &_A, 
const Iu32vec2 &_B)    {
return _m_punpckldq(_A,_B); }
 
  326 inline Iu32vec2 
unpack_high(
const Iu32vec2 &_A, 
const Iu32vec2 &_B)   {
return _m_punpckhdq(_A,_B); }
 
  331 class I16vec4 : 
public M64
 
  335     I16vec4(
__m64 _Mm) : M64(_Mm) { }
 
  336     I16vec4(
short _I0, 
short _I1, 
short _I2, 
short _I3)
 
  338         vec = _mm_set_pi16(_I0, _I1, _I2, _I3);
 
  340     explicit I16vec4(__int64 _I) : M64 (_I) { }
 
  341     explicit I16vec4(
int _I) : M64 (_I) { }
 
  344     I16vec4& 
operator= (
const M64 &_A)               { 
return *
this = (I16vec4) _A; }
 
  347     I16vec4& 
operator&=(
const M64 &_A)               { 
return *
this = (I16vec4) _m_pand(vec,_A); }
 
  348     I16vec4& 
operator|=(
const M64 &_A)               { 
return *
this = (I16vec4) _m_por(vec,_A); }
 
  349     I16vec4& 
operator^=(
const M64 &_A)               { 
return *
this = (I16vec4) _m_pxor(vec,_A); }
 
  352     I16vec4& 
operator +=(
const I16vec4 &_A)          { 
return *
this = (I16vec4)_m_paddw(vec,_A); }
 
  353     I16vec4& 
operator -=(
const I16vec4 &_A)          { 
return *
this = (I16vec4)_m_psubw(vec,_A); }
 
  354     I16vec4& 
operator *=(
const I16vec4 &_A)          { 
return *
this = (I16vec4)_m_pmullw(vec,_A); }
 
  357     I16vec4 
operator<<(
const I16vec4 &_A)            { 
return _m_psllw(vec,_A); }
 
  358     I16vec4 
operator<<(
int _Count)                   { 
return _m_psllwi(vec,_Count); }
 
  359     I16vec4& 
operator<<=(
const I16vec4 &_A)          { 
return *
this = (I16vec4)_m_psllw(vec,_A); }
 
  360     I16vec4& 
operator<<=(
int _Count)                 { 
return *
this = (I16vec4)_m_psllwi(vec,_Count); }
 
  363 inline I16vec4 
operator*(
const I16vec4 &_A, 
const I16vec4 &_B)    { 
return _m_pmullw(_A,_B); }
 
  364 inline I16vec4 
cmpeq(
const I16vec4 &_A, 
const I16vec4 &_B)        { 
return _m_pcmpeqw(_A,_B); }
 
  365 inline I16vec4 
cmpneq(
const I16vec4 &_A, 
const I16vec4 &_B)       { 
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
 
  367 inline I16vec4 
unpack_low(
const I16vec4 &_A, 
const I16vec4 &_B)   { 
return _m_punpcklwd(_A,_B); }
 
  368 inline I16vec4 
unpack_high(
const I16vec4 &_A, 
const I16vec4 &_B)  { 
return _m_punpckhwd(_A,_B); }
 
  373 class Is16vec4 : 
public I16vec4
 
  377     Is16vec4(
__m64 _Mm) : I16vec4(_Mm) { }
 
  378     Is16vec4(
short _I0, 
short _I1, 
short _I2, 
short _I3) : I16vec4(_I0, _I1, _I2, _I3) { }
 
  379     explicit Is16vec4(__int64 _I) : I16vec4 (_I)  { }
 
  380     explicit Is16vec4(
int _I) : I16vec4 (_I)      { }
 
  383     Is16vec4& 
operator= (
const M64 &_A)      { 
return *
this = (Is16vec4) _A; }
 
  386     Is16vec4& 
operator&=(
const M64 &_A)      { 
return *
this = (Is16vec4) _m_pand(vec,_A); }
 
  387     Is16vec4& 
operator|=(
const M64 &_A)      { 
return *
this = (Is16vec4) _m_por(vec,_A); }
 
  388     Is16vec4& 
operator^=(
const M64 &_A)      { 
return *
this = (Is16vec4) _m_pxor(vec,_A); }
 
  391     Is16vec4& 
operator +=(
const I16vec4 &_A) { 
return *
this = (Is16vec4)_m_paddw(vec,_A); }
 
  392     Is16vec4& 
operator -=(
const I16vec4 &_A) { 
return *
this = (Is16vec4)_m_psubw(vec,_A); }
 
  393     Is16vec4& 
operator *=(
const I16vec4 &_A) { 
return *
this = (Is16vec4)_m_pmullw(vec,_A); }
 
  396     Is16vec4 
operator<<(
const M64 &_A)       { 
return _m_psllw(vec,_A); }
 
  397     Is16vec4 
operator<<(
int _Count)          { 
return _m_psllwi(vec,_Count); }
 
  398     Is16vec4& 
operator<<=(
const M64 &_A)     { 
return *
this = (Is16vec4)_m_psllw(vec,_A); }
 
  399     Is16vec4& 
operator<<=(
int _Count)        { 
return *
this = (Is16vec4)_m_psllwi(vec,_Count); }
 
  401     Is16vec4 
operator>>(
const M64 &_A)       { 
return _m_psraw(vec,_A); }
 
  402     Is16vec4 
operator>>(
int _Count)          { 
return _m_psrawi(vec,_Count); }
 
  403     Is16vec4& 
operator>>=(
const M64 &_A)     { 
return *
this = (Is16vec4) _m_psraw(vec,_A); }
 
  404     Is16vec4& 
operator>>=(
int _Count)        { 
return *
this = (Is16vec4) _m_psrawi(vec,_Count); }
 
  406 #if defined (_ENABLE_VEC_DEBUG) 
  410         _Os << 
"[3]:" << 
_MM_4W(3,_A)
 
  411             << 
" [2]:" << 
_MM_4W(2,_A)
 
  412             << 
" [1]:" << 
_MM_4W(1,_A)
 
  413             << 
" [0]:" << 
_MM_4W(0,_A);
 
  419     const short& operator[](
int _I)
const 
  426     short& operator[](
int _I)
 
  433 inline Is16vec4 
operator*(
const Is16vec4 &_A, 
const Is16vec4 &_B)     { 
return _m_pmullw(_A,_B); }
 
  436 inline Is16vec4 
cmpeq(
const Is16vec4 &_A, 
const Is16vec4 &_B)         { 
return _m_pcmpeqw(_A,_B); }
 
  437 inline Is16vec4 
cmpneq(
const Is16vec4 &_A, 
const Is16vec4 &_B)        { 
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
 
  438 inline Is16vec4 
cmpgt(
const Is16vec4 &_A, 
const Is16vec4 &_B)         { 
return _m_pcmpgtw(_A,_B); }
 
  439 inline Is16vec4 
cmplt(
const Is16vec4 &_A, 
const Is16vec4 &_B)         { 
return _m_pcmpgtw(_B,_A); }
 
  440 inline Is16vec4 cmple(
const Is16vec4 &_A, 
const Is16vec4 &_B)         { 
return _m_pandn(_m_pcmpgtw(_A,_B), _mmx_all_ones); }
 
  441 inline Is16vec4 cmpge(
const Is16vec4 &_A, 
const Is16vec4 &_B)         { 
return _m_pandn(_m_pcmpgtw(_B,_A), _mmx_all_ones); }
 
  443 inline Is16vec4 
unpack_low(
const Is16vec4 &_A, 
const Is16vec4 &_B)    { 
return _m_punpcklwd(_A,_B); }
 
  444 inline Is16vec4 
unpack_high(
const Is16vec4 &_A, 
const Is16vec4 &_B)   { 
return _m_punpckhwd(_A,_B); }
 
  446 inline Is16vec4 
sat_add(
const Is16vec4 &_A, 
const Is16vec4 &_B)       { 
return _m_paddsw(_A,_B); }
 
  447 inline Is16vec4 
sat_sub(
const Is16vec4 &_A, 
const Is16vec4 &_B)       { 
return _m_psubsw(_A,_B); }
 
  448 inline Is16vec4 
mul_high(
const Is16vec4 &_A, 
const Is16vec4 &_B)      { 
return _m_pmulhw(_A,_B); }
 
  449 inline Is32vec2 
mul_add(
const Is16vec4 &_A, 
const Is16vec4 &_B)       { 
return _m_pmaddwd(_A,_B);}
 
  455 class Iu16vec4 : 
public I16vec4
 
  459     Iu16vec4(
__m64 _Mm) : I16vec4(_Mm) { }
 
  460     Iu16vec4(
unsigned short _Ui0, 
unsigned short _Ui1,
 
  461         unsigned short _Ui2, 
unsigned short _Ui3)
 
  462         : I16vec4(_Ui0, _Ui1, _Ui2, _Ui3) { }
 
  463     explicit Iu16vec4(__int64 _I) : I16vec4 (_I) { }
 
  464     explicit Iu16vec4(
int _I) : I16vec4 (_I) { }
 
  467     Iu16vec4& 
operator= (
const M64 &_A)      { 
return *
this = (Iu16vec4) _A; }
 
  470     Iu16vec4& 
operator&=(
const M64 &_A)      { 
return *
this = (Iu16vec4) _m_pand(vec,_A); }
 
  471     Iu16vec4& 
operator|=(
const M64 &_A)      { 
return *
this = (Iu16vec4) _m_por(vec,_A); }
 
  472     Iu16vec4& 
operator^=(
const M64 &_A)      { 
return *
this = (Iu16vec4) _m_pxor(vec,_A); }
 
  475     Iu16vec4& 
operator +=(
const I16vec4 &_A) { 
return *
this = (Iu16vec4)_m_paddw(vec,_A); }
 
  476     Iu16vec4& 
operator -=(
const I16vec4 &_A) { 
return *
this = (Iu16vec4)_m_psubw(vec,_A); }
 
  477     Iu16vec4& 
operator *=(
const I16vec4 &_A) { 
return *
this = (Iu16vec4)_m_pmullw(vec,_A); }
 
  480     Iu16vec4 
operator<<(
const M64 &_A)               { 
return _m_psllw(vec,_A); }
 
  481     Iu16vec4 
operator<<(
int _Count)                  { 
return _m_psllwi(vec,_Count); }
 
  482     Iu16vec4& 
operator<<=(
const M64 &_A)             { 
return *
this = (Iu16vec4)_m_psllw(vec,_A); }
 
  483     Iu16vec4& 
operator<<=(
int _Count)                { 
return *
this = (Iu16vec4)_m_psllwi(vec,_Count); }
 
  484     Iu16vec4 
operator>>(
const M64 &_A)               { 
return _m_psrlw(vec,_A); }
 
  485     Iu16vec4 
operator>>(
int _Count)                  { 
return _m_psrlwi(vec,_Count); }
 
  486     Iu16vec4& 
operator>>=(
const M64 &_A)             { 
return *
this = (Iu16vec4) _m_psrlw(vec,_A); }
 
  487     Iu16vec4& 
operator>>=(
int _Count)                { 
return *
this = (Iu16vec4) _m_psrlwi(vec,_Count); }
 
  489 #if defined (_ENABLE_VEC_DEBUG) 
  502     const unsigned short& operator[](
int _I)
const 
  509     unsigned short& operator[](
int _I)
 
  516 inline Iu16vec4 
operator*(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)     { 
return _m_pmullw(_A,_B); }
 
  517 inline Iu16vec4 
cmpeq(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)         { 
return _m_pcmpeqw(_A,_B); }
 
  518 inline Iu16vec4 
cmpneq(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)        { 
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
 
  520 inline Iu16vec4 
sat_add(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)   { 
return _m_paddusw(_A,_B); }
 
  521 inline Iu16vec4 
sat_sub(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)   { 
return _m_psubusw(_A,_B); }
 
  523 inline Iu16vec4 
unpack_low(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)    { 
return _m_punpcklwd(_A,_B); }
 
  524 inline Iu16vec4 
unpack_high(
const Iu16vec4 &_A, 
const Iu16vec4 &_B)   { 
return _m_punpckhwd(_A,_B); }
 
  529 class I8vec8 : 
public M64
 
  533     I8vec8(
__m64 _Mm) : M64(_Mm) { }
 
  534     I8vec8(
char _S0, 
char _S1, 
char _S2, 
char _S3, 
char _S4, 
char _S5, 
char _S6, 
char _S7)
 
  536         vec = _mm_set_pi8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7);
 
  538     explicit I8vec8(__int64 _I) : M64 (_I) { }
 
  539     explicit I8vec8(
int _I) : M64 (_I) { }
 
  542     I8vec8& 
operator= (
const M64 &_A)        { 
return *
this = (I8vec8) _A; }
 
  545     I8vec8& 
operator&=(
const M64 &_A)        { 
return *
this = (I8vec8) _m_pand(vec,_A); }
 
  546     I8vec8& 
operator|=(
const M64 &_A)        { 
return *
this = (I8vec8) _m_por(vec,_A); }
 
  547     I8vec8& 
operator^=(
const M64 &_A)        { 
return *
this = (I8vec8) _m_pxor(vec,_A); }
 
  550     I8vec8& 
operator +=(
const I8vec8 &_A)    { 
return *
this = (I8vec8) _m_paddb(vec,_A); }
 
  551     I8vec8& 
operator -=(
const I8vec8 &_A)    { 
return *
this = (I8vec8) _m_psubb(vec,_A); }
 
  555 inline I8vec8 
cmpeq(
const I8vec8 &_A, 
const I8vec8 &_B)       { 
return _m_pcmpeqb(_A,_B); }
 
  556 inline I8vec8 
cmpneq(
const I8vec8 &_A, 
const I8vec8 &_B)      { 
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
 
  558 inline I8vec8 
unpack_low(
const I8vec8 &_A, 
const I8vec8 &_B)  { 
return _m_punpcklbw(_A,_B); }
 
  559 inline I8vec8 
unpack_high(
const I8vec8 &_A, 
const I8vec8 &_B) { 
return _m_punpckhbw(_A,_B); }
 
  564 class Is8vec8 : 
public I8vec8
 
  568     Is8vec8(
__m64 _Mm) : I8vec8(_Mm) { }
 
  569     Is8vec8(
signed char _S0, 
signed char _S1, 
signed char _S2, 
signed char _S3,
 
  570         signed char _S4, 
signed char _S5, 
signed char _S6, 
signed char _S7)
 
  571         : I8vec8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7) { }
 
  572     explicit Is8vec8(__int64 _I) : I8vec8 (_I) { }
 
  573     explicit Is8vec8(
int _I) : I8vec8 (_I) { }
 
  576     Is8vec8& 
operator= (
const M64 &_A)       { 
return *
this = (Is8vec8) _A; }
 
  579     Is8vec8& 
operator&=(
const M64 &_A)       { 
return *
this = (Is8vec8) _m_pand(vec,_A); }
 
  580     Is8vec8& 
operator|=(
const M64 &_A)       { 
return *
this = (Is8vec8) _m_por(vec,_A); }
 
  581     Is8vec8& 
operator^=(
const M64 &_A)       { 
return *
this = (Is8vec8) _m_pxor(vec,_A); }
 
  584     Is8vec8& 
operator +=(
const I8vec8 &_A)   { 
return *
this = (Is8vec8) _m_paddb(vec,_A); }
 
  585     Is8vec8& 
operator -=(
const I8vec8 &_A)   { 
return *
this = (Is8vec8) _m_psubb(vec,_A); }
 
  587 #if defined (_ENABLE_VEC_DEBUG) 
  591         _Os << 
"[7]:" << short(
_MM_8B(7,_A))
 
  592             << 
" [6]:" << short(
_MM_8B(6,_A))
 
  593             << 
" [5]:" << short(
_MM_8B(5,_A))
 
  594             << 
" [4]:" << short(
_MM_8B(4,_A))
 
  595             << 
" [3]:" << short(
_MM_8B(3,_A))
 
  596             << 
" [2]:" << short(
_MM_8B(2,_A))
 
  597             << 
" [1]:" << short(
_MM_8B(1,_A))
 
  598             << 
" [0]:" << short(
_MM_8B(0,_A));
 
  604     const signed char& operator[](
int _I)
const 
  611     signed char& operator[](
int _I)
 
  619 inline Is8vec8 
cmpeq(
const Is8vec8 &_A, 
const Is8vec8 &_B)        { 
return _m_pcmpeqb(_A,_B); }
 
  620 inline Is8vec8 
cmpneq(
const Is8vec8 &_A, 
const Is8vec8 &_B)       { 
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
 
  621 inline Is8vec8 
cmpgt(
const Is8vec8 &_A, 
const Is8vec8 &_B)        { 
return _m_pcmpgtb(_A,_B); }
 
  622 inline Is8vec8 
cmplt(
const Is8vec8 &_A, 
const Is8vec8 &_B)        { 
return _m_pcmpgtb(_B,_A); }
 
  623 inline Is8vec8 cmple(
const Is8vec8 &_A, 
const Is8vec8 &_B)        { 
return _m_pandn(_m_pcmpgtb(_A,_B), _mmx_all_ones); }
 
  624 inline Is8vec8 cmpge(
const Is8vec8 &_A, 
const Is8vec8 &_B)        { 
return _m_pandn(_m_pcmpgtb(_B,_A), _mmx_all_ones); }
 
  626 inline Is8vec8 
unpack_low(
const Is8vec8 &_A, 
const Is8vec8 &_B)   { 
return _m_punpcklbw(_A,_B); }
 
  627 inline Is8vec8 
unpack_high(
const Is8vec8 &_A, 
const Is8vec8 &_B)  { 
return _m_punpckhbw(_A,_B); }
 
  629 inline Is8vec8 
sat_add(
const Is8vec8 &_A, 
const Is8vec8 &_B)      { 
return _m_paddsb(_A,_B); }
 
  630 inline Is8vec8 
sat_sub(
const Is8vec8 &_A, 
const Is8vec8 &_B)      { 
return _m_psubsb(_A,_B); }
 
  635 class Iu8vec8 : 
public I8vec8
 
  639     Iu8vec8(
__m64 _Mm) : I8vec8(_Mm) { }
 
  640     Iu8vec8(
unsigned char _S0, 
unsigned char _S1, 
unsigned char _S2,
 
  641         unsigned char _S3, 
unsigned char _S4, 
unsigned char _S5,
 
  642         unsigned char _S6, 
unsigned char _S7)
 
  643         : I8vec8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7) { }
 
  644     explicit Iu8vec8(__int64 _I) : I8vec8 (_I) { }
 
  645     explicit Iu8vec8(
int _I) : I8vec8 (_I) { }
 
  648     Iu8vec8& 
operator= (
const M64 &_A)       { 
return *
this = (Iu8vec8) _A; }
 
  650     Iu8vec8& 
operator&=(
const M64 &_A)       { 
return *
this = (Iu8vec8) _m_pand(vec,_A); }
 
  651     Iu8vec8& 
operator|=(
const M64 &_A)       { 
return *
this = (Iu8vec8) _m_por(vec,_A); }
 
  652     Iu8vec8& 
operator^=(
const M64 &_A)       { 
return *
this = (Iu8vec8) _m_pxor(vec,_A); }
 
  654     Iu8vec8& 
operator +=(
const I8vec8 &_A)   { 
return *
this = (Iu8vec8) _m_paddb(vec,_A); }
 
  655     Iu8vec8& 
operator -=(
const I8vec8 &_A)   { 
return *
this = (Iu8vec8) _m_psubb(vec,_A); }
 
  657 #if defined (_ENABLE_VEC_DEBUG) 
  661         _Os << 
"[7]:"  << (
unsigned short) (
_MM_8UB(7,_A))
 
  662             << 
" [6]:" << (
unsigned short) (
_MM_8UB(6,_A))
 
  663             << 
" [5]:" << (
unsigned short) (
_MM_8UB(5,_A))
 
  664             << 
" [4]:" << (
unsigned short) (
_MM_8UB(4,_A))
 
  665             << 
" [3]:" << (
unsigned short) (
_MM_8UB(3,_A))
 
  666             << 
" [2]:" << (
unsigned short) (
_MM_8UB(2,_A))
 
  667             << 
" [1]:" << (
unsigned short) (
_MM_8UB(1,_A))
 
  668             << 
" [0]:" << (
unsigned short) (
_MM_8UB(0,_A));
 
  674     const unsigned char& operator[](
int _I)
const 
  681     unsigned char& operator[](
int _I)
 
  689 inline Iu8vec8 
cmpeq(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)        { 
return _m_pcmpeqb(_A,_B); }
 
  690 inline Iu8vec8 
cmpneq(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)       { 
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
 
  692 inline Iu8vec8 
unpack_low(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)   { 
return _m_punpcklbw(_A,_B); }
 
  693 inline Iu8vec8 
unpack_high(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)  { 
return _m_punpckhbw(_A,_B); }
 
  695 inline Iu8vec8 
sat_add(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)      { 
return _m_paddusb(_A,_B); }
 
  696 inline Iu8vec8 
sat_sub(
const Iu8vec8 &_A, 
const Iu8vec8 &_B)      { 
return _m_psubusb(_A,_B); }
 
  698 inline Is16vec4 
pack_sat(
const Is32vec2 &_A, 
const Is32vec2 &_B)      { 
return _m_packssdw(_A,_B); }
 
  699 inline Is8vec8 
pack_sat(
const Is16vec4 &_A, 
const Is16vec4 &_B)       { 
return _m_packsswb(_A,_B); }
 
  700 inline Iu8vec8 
packu_sat(
const Is16vec4 &_A, 
const Is16vec4 &_B)  { 
return _m_packuswb(_A,_B); }
 
  703 #define IVEC_LOGICALS(vect,element) \ 
  704 inline I##vect##vec##element operator& (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  705 { return _m_pand( _A,_B); } \ 
  706 inline I##vect##vec##element operator| (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  707 { return _m_por( _A,_B); } \ 
  708 inline I##vect##vec##element operator^ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  709 { return _m_pxor( _A,_B); } \ 
  710 inline I##vect##vec##element andnot (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  711 { return _m_pandn( _A,_B); } 
  726 #define IVEC_ADD_SUB(vect,element,opsize) \ 
  727 inline I##vect##vec##element operator+ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  728 { return _m_padd##opsize( _A,_B); } \ 
  729 inline I##vect##vec##element operator- (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \ 
  730 { return _m_psub##opsize( _A,_B); } 
  733 IVEC_ADD_SUB(u8,8, b)
 
  734 IVEC_ADD_SUB(s8,8, b)
 
  735 IVEC_ADD_SUB(16,4, w)
 
  736 IVEC_ADD_SUB(u16,4, w)
 
  737 IVEC_ADD_SUB(s16,4, w)
 
  738 IVEC_ADD_SUB(32,2, d)
 
  739 IVEC_ADD_SUB(u32,2, d)
 
  740 IVEC_ADD_SUB(s32,2, d)
 
  752 #define IVEC_SELECT(vect12,vect34,element,selop)               \ 
  753     inline I##vect34##vec##element select_##selop (            \ 
  754         const I##vect12##vec##element &_A,                      \ 
  755         const I##vect12##vec##element &_B,                      \ 
  756         const I##vect34##vec##element &_C,                      \ 
  757         const I##vect34##vec##element &_D)                      \ 
  759     I##vect12##vec##element _Mask = cmp##selop(_A,_B);            \ 
  760     return( (I##vect34##vec##element)(_Mask &_C ) |             \ 
  761             (I##vect34##vec##element)((_m_pandn(_Mask, _D ))));  \ 
  764 IVEC_SELECT(8,s8,8,eq)
 
  765 IVEC_SELECT(8,u8,8,eq)
 
  766 IVEC_SELECT(8,8,8,eq)
 
  767 IVEC_SELECT(8,s8,8,neq)
 
  768 IVEC_SELECT(8,u8,8,neq)
 
  769 IVEC_SELECT(8,8,8,neq)
 
  771 IVEC_SELECT(16,s16,4,eq)
 
  772 IVEC_SELECT(16,u16,4,eq)
 
  773 IVEC_SELECT(16,16,4,eq)
 
  774 IVEC_SELECT(16,s16,4,neq)
 
  775 IVEC_SELECT(16,u16,4,neq)
 
  776 IVEC_SELECT(16,16,4,neq)
 
  778 IVEC_SELECT(32,s32,2,eq)
 
  779 IVEC_SELECT(32,u32,2,eq)
 
  780 IVEC_SELECT(32,32,2,eq)
 
  781 IVEC_SELECT(32,s32,2,neq)
 
  782 IVEC_SELECT(32,u32,2,neq)
 
  783 IVEC_SELECT(32,32,2,neq)
 
  786 IVEC_SELECT(s8,s8,8,gt)
 
  787 IVEC_SELECT(s8,u8,8,gt)
 
  788 IVEC_SELECT(s8,8,8,gt)
 
  789 IVEC_SELECT(s8,s8,8,lt)
 
  790 IVEC_SELECT(s8,u8,8,lt)
 
  791 IVEC_SELECT(s8,8,8,lt)
 
  792 IVEC_SELECT(s8,s8,8,le)
 
  793 IVEC_SELECT(s8,u8,8,le)
 
  794 IVEC_SELECT(s8,8,8,le)
 
  795 IVEC_SELECT(s8,s8,8,ge)
 
  796 IVEC_SELECT(s8,u8,8,ge)
 
  797 IVEC_SELECT(s8,8,8,ge)
 
  799 IVEC_SELECT(s16,s16,4,gt)
 
  800 IVEC_SELECT(s16,u16,4,gt)
 
  801 IVEC_SELECT(s16,16,4,gt)
 
  802 IVEC_SELECT(s16,s16,4,lt)
 
  803 IVEC_SELECT(s16,u16,4,lt)
 
  804 IVEC_SELECT(s16,16,4,lt)
 
  805 IVEC_SELECT(s16,s16,4,le)
 
  806 IVEC_SELECT(s16,u16,4,le)
 
  807 IVEC_SELECT(s16,16,4,le)
 
  808 IVEC_SELECT(s16,s16,4,ge)
 
  809 IVEC_SELECT(s16,u16,4,ge)
 
  810 IVEC_SELECT(s16,16,4,ge)
 
  812 IVEC_SELECT(s32,s32,2,gt)
 
  813 IVEC_SELECT(s32,u32,2,gt)
 
  814 IVEC_SELECT(s32,32,2,gt)
 
  815 IVEC_SELECT(s32,s32,2,lt)
 
  816 IVEC_SELECT(s32,u32,2,lt)
 
  817 IVEC_SELECT(s32,32,2,lt)
 
  818 IVEC_SELECT(s32,s32,2,le)
 
  819 IVEC_SELECT(s32,u32,2,le)
 
  820 IVEC_SELECT(s32,32,2,le)
 
  821 IVEC_SELECT(s32,s32,2,ge)
 
  822 IVEC_SELECT(s32,u32,2,ge)
 
  823 IVEC_SELECT(s32,32,2,ge)
 
  828 inline static void empty(
void)      { _m_empty(); }
 
  830 #endif  // defined(_M_IX86) 
  832 #if defined (_SILENCE_IVEC_C4799) 
uint_2 operator|(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22852
 
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
 
_Variant_copymove_layer_ & operator=(_Variant_copymove_layer_ &&_That) _NOEXCEPT_OP((conjunction< is_nothrow_move_constructible< _Types >...
 
#define _MM_4UW(element, vector)
Definition: ivec.h:80
 
#define _MM_8B(element, vector)
Definition: ivec.h:78
 
T & operator<<=(T &lhs, SafeInt< U, E > rhs)
Definition: safeint.h:1505
 
Is32vec4 cmplt(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:325
 
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
 
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator*(const _Tuple_type< _Rank > &_Lhs, typename _Tuple_type< _Rank >::value_type _Rhs) __GPU
Definition: amp.h:890
 
I64vec2 unpack_high(const I64vec2 &_A, const I64vec2 &_B)
Definition: dvec.h:223
 
Is16vec8 mul_high(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:521
 
T & operator+=(T &lhs, SafeInt< U, E > rhs)
Definition: safeint.h:1439
 
uint_2 operator^(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22845
 
#define _CRTIMP
Definition: vcruntime.h:37
 
T & operator>>=(T &lhs, SafeInt< U, E > rhs)
Definition: safeint.h:1512
 
Iu8vec16 packu_sat(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:812
 
Is16vec8 pack_sat(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:810
 
launch & operator^=(launch &_Left, launch _Right)
Definition: future:86
 
#define _In_z_
Definition: sal.h:310
 
#define _In_
Definition: sal.h:305
 
_N wchar_t * _S1
Definition: wchar.h:163
 
T & operator*=(T &lhs, SafeInt< U, E > rhs)
Definition: safeint.h:1457
 
I64vec2 unpack_low(const I64vec2 &_A, const I64vec2 &_B)
Definition: dvec.h:222
 
Is32vec4 mul_add(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:522
 
__m64
Definition: mmintrin.h:45
 
Is16vec8 sat_add(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:524
 
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:625
 
launch & operator&=(launch &_Left, launch _Right)
Definition: future:74
 
#define _MM_4W(element, vector)
Definition: ivec.h:81
 
M128 andnot(const M128 &_A, const M128 &_B)
Definition: dvec.h:147
 
#define _MM_2DW(element, vector)
Definition: ivec.h:84
 
uint_2 operator>>(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22873
 
constexpr auto empty(const _Container &_Cont) -> decltype(_Cont.empty())
Definition: xutility:1491
 
#define _VEC_ASSERT(_Expression)
Definition: ivec.h:45
 
I32vec4 cmpneq(const I32vec4 &_A, const I32vec4 &_B)
Definition: dvec.h:256
 
_Diff _Count
Definition: algorithm:1941
 
I32vec4 cmpeq(const I32vec4 &_A, const I32vec4 &_B)
Definition: dvec.h:255
 
uint_2 operator&(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22859
 
#define _MM_2UDW(element, vector)
Definition: ivec.h:83
 
#define _MM_8UB(element, vector)
Definition: ivec.h:77
 
Is32vec4 cmpgt(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:324
 
launch & operator|=(launch &_Left, launch _Right)
Definition: future:80
 
Is16vec8 sat_sub(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:525
 
T & operator-=(T &lhs, SafeInt< U, E > rhs)
Definition: safeint.h:1448