16 #ifndef _IVEC_H_INCLUDED
17 #define _IVEC_H_INCLUDED
20 #if !defined __cplusplus
21 #error ERROR: This file is only supported in C++ compilations!
24 #if defined (_M_CEE_PURE)
25 #error ERROR: This file is not supported in the pure mode!
34 #define _VEC_ASSERT(_Expression) ((void)0)
45 #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
54 #if defined (_SILENCE_IVEC_C4799)
56 #pragma warning(disable: 4799)
62 #if defined (_ENABLE_VEC_DEBUG)
77 #define _MM_8UB(element,vector) (*((unsigned char*)&##vector + ##element))
78 #define _MM_8B(element,vector) (*((signed char*)&##vector + ##element))
80 #define _MM_4UW(element,vector) (*((unsigned short*)&##vector + ##element))
81 #define _MM_4W(element,vector) (*((short*)&##vector + ##element))
83 #define _MM_2UDW(element,vector) (*((unsigned int*)&##vector + ##element))
84 #define _MM_2DW(element,vector) (*((int*)&##vector + ##element))
86 #define _MM_QW (*((__int64*)&vec))
100 M64(
__m64 _Mm) { vec = _Mm; }
101 M64(__int64 _Mm) { vec = _mm_set_pi32((
int)(_Mm >> 32), (
int)_Mm); }
102 M64(
int _I) { vec = _m_from_int(_I); }
104 operator __m64()
const {
return vec; }
107 M64&
operator&=(
const M64 &_A) {
return *
this = (M64) _m_pand(vec,_A); }
108 M64&
operator|=(
const M64 &_A) {
return *
this = (M64) _m_por(vec,_A); }
109 M64&
operator^=(
const M64 &_A) {
return *
this = (M64) _m_pxor(vec,_A); }
113 const union {__int64 m1;
__m64 m2;} __mmx_all_ones_cheat =
116 #define _mmx_all_ones ((M64)__mmx_all_ones_cheat.m2)
118 inline M64
operator&(
const M64 &_A,
const M64 &_B) {
return _m_pand(_A,_B); }
119 inline M64
operator|(
const M64 &_A,
const M64 &_B) {
return _m_por(_A,_B); }
120 inline M64
operator^(
const M64 &_A,
const M64 &_B) {
return _m_pxor(_A,_B); }
121 inline M64
andnot(
const M64 &_A,
const M64 &_B) {
return _m_pandn(_A,_B); }
128 class I64vec1 :
public M64
132 I64vec1(
__m64 _Mm) : M64(_Mm) { }
133 explicit I64vec1(
int _I) : M64(_I) { }
134 explicit I64vec1(__int64 _Mm) : M64(_Mm) { }
136 I64vec1& operator= (
const M64 &_A) {
return *
this = (I64vec1) _A; }
137 I64vec1&
operator&=(
const M64 &_A) {
return *
this = (I64vec1) _m_pand(vec,_A); }
138 I64vec1&
operator|=(
const M64 &_A) {
return *
this = (I64vec1) _m_por(vec,_A); }
139 I64vec1&
operator^=(
const M64 &_A) {
return *
this = (I64vec1) _m_pxor(vec,_A); }
142 I64vec1
operator<<(
const M64 &_A) {
return _m_psllq(vec,_A); }
144 I64vec1& operator<<=(
const M64 &_A) {
return *
this = (I64vec1) _m_psllq(vec,_A); }
145 I64vec1& operator<<=(
int _Count) {
return *
this = (I64vec1) _m_psllqi(vec, _Count); }
146 I64vec1
operator>>(
const M64 &_A) {
return _m_psrlq(vec,_A); }
147 I64vec1
operator>>(
int _Count) {
return _m_psrlqi(vec, _Count); }
148 I64vec1& operator>>=(
const M64 &_A) {
return *
this = (I64vec1) _m_psrlq(vec,_A); }
149 I64vec1& operator>>=(
int _Count) {
return *
this = (I64vec1) _m_psrlqi(vec, _Count); }
155 class I32vec2 :
public M64
159 I32vec2(
__m64 _Mm) : M64(_Mm) { }
160 I32vec2(
int _I0,
int _I1) { vec = _mm_set_pi32(_I0, _I1); }
161 explicit I32vec2(
int _I) : M64 (_I) { }
162 explicit I32vec2(__int64 _I): M64(_I) {}
165 I32vec2& operator= (
const M64 &_A) {
return *
this = (I32vec2)_A; }
168 I32vec2&
operator&=(
const M64 &_A) {
return *
this = (I32vec2) _m_pand(vec,_A); }
169 I32vec2&
operator|=(
const M64 &_A) {
return *
this = (I32vec2) _m_por(vec,_A); }
170 I32vec2&
operator^=(
const M64 &_A) {
return *
this = (I32vec2) _m_pxor(vec,_A); }
173 I32vec2& operator +=(
const I32vec2 &_A) {
return *
this = (I32vec2) _m_paddd(vec,_A); }
174 I32vec2& operator -=(
const I32vec2 &_A) {
return *
this = (I32vec2) _m_psubd(vec,_A); }
177 I32vec2
operator<<(
const I32vec2 &_A) {
return _m_pslld(vec,_A); }
178 I32vec2
operator<<(
int _Count) {
return _m_pslldi(vec,_Count); }
179 I32vec2& operator<<=(
const I32vec2 &_A) {
return *
this = (I32vec2) _m_pslld(vec,_A); }
180 I32vec2& operator<<=(
int _Count) {
return *
this = (I32vec2) _m_pslldi(vec,_Count); }
185 inline I32vec2
cmpeq(
const I32vec2 &_A,
const I32vec2 &_B) {
return _m_pcmpeqd(_A,_B); }
186 inline I32vec2
cmpneq(
const I32vec2 &_A,
const I32vec2 &_B) {
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
188 inline I32vec2
unpack_low(
const I32vec2 &_A,
const I32vec2 &_B) {
return _m_punpckldq(_A,_B); }
189 inline I32vec2
unpack_high(
const I32vec2 &_A,
const I32vec2 &_B) {
return _m_punpckhdq(_A,_B); }
194 class Is32vec2 :
public I32vec2
198 Is32vec2(
__m64 _Mm) : I32vec2(_Mm) { }
199 Is32vec2(
signed int _I0,
signed int _I1) : I32vec2(_I0, _I1) {}
200 explicit Is32vec2(
int _I) : I32vec2 (_I) {}
201 explicit Is32vec2(__int64 _I): I32vec2(_I) {}
204 Is32vec2& operator= (
const M64 &_A) {
return *
this = (Is32vec2)_A; }
207 Is32vec2&
operator&=(
const M64 &_A) {
return *
this = (Is32vec2) _m_pand(vec,_A); }
208 Is32vec2&
operator|=(
const M64 &_A) {
return *
this = (Is32vec2) _m_por(vec,_A); }
209 Is32vec2&
operator^=(
const M64 &_A) {
return *
this = (Is32vec2) _m_pxor(vec,_A); }
212 Is32vec2& operator +=(
const I32vec2 &_A) {
return *
this = (Is32vec2) _m_paddd(vec,_A); }
213 Is32vec2& operator -=(
const I32vec2 &_A) {
return *
this = (Is32vec2) _m_psubd(vec,_A); }
216 Is32vec2
operator<<(
const M64 &_A) {
return _m_pslld(vec,_A); }
217 Is32vec2
operator<<(
int _Count) {
return _m_pslldi(vec,_Count); }
218 Is32vec2& operator<<=(
const M64 &_A) {
return *
this = (Is32vec2) _m_pslld(vec,_A); }
219 Is32vec2& operator<<=(
int _Count) {
return *
this = (Is32vec2) _m_pslldi(vec,_Count); }
221 Is32vec2
operator>>(
const M64 &_A) {
return _m_psrad(vec, _A); }
222 Is32vec2
operator>>(
int _Count) {
return _m_psradi(vec, _Count); }
223 Is32vec2& operator>>=(
const M64 &_A) {
return *
this = (Is32vec2) _m_psrad(vec, _A); }
224 Is32vec2& operator>>=(
int _Count) {
return *
this = (Is32vec2) _m_psradi(vec, _Count); }
226 #if defined (_ENABLE_VEC_DEBUG)
230 _Os <<
" [1]:" <<
_MM_2DW(1,_A)
237 const int& operator[](
int _I)
const
244 int& operator[](
int _I)
252 inline Is32vec2
cmpeq(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pcmpeqd(_A,_B); }
253 inline Is32vec2
cmpneq(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
254 inline Is32vec2
cmpgt(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pcmpgtd(_A,_B); }
255 inline Is32vec2
cmplt(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pcmpgtd(_B,_A); }
256 inline Is32vec2 cmple(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pandn(_m_pcmpgtd(_A,_B), _mmx_all_ones); }
257 inline Is32vec2 cmpge(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_pandn(_m_pcmpgtd(_B,_A), _mmx_all_ones); }
259 inline Is32vec2
unpack_low(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_punpckldq(_A,_B); }
260 inline Is32vec2
unpack_high(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_punpckhdq(_A,_B); }
265 class Iu32vec2 :
public I32vec2
269 Iu32vec2(
__m64 _Mm) : I32vec2(_Mm) { }
270 Iu32vec2(
unsigned int _I0,
unsigned int _I1) : I32vec2(_I0, _I1) {}
271 explicit Iu32vec2(
int _I) : I32vec2 (_I) { }
272 explicit Iu32vec2(__int64 _I) : I32vec2 (_I) { }
275 Iu32vec2& operator= (
const M64 &_A) {
return *
this = (Iu32vec2) _A; }
278 Iu32vec2&
operator&=(
const M64 &_A) {
return *
this = (Iu32vec2) _m_pand(vec,_A); }
279 Iu32vec2&
operator|=(
const M64 &_A) {
return *
this = (Iu32vec2) _m_por(vec,_A); }
280 Iu32vec2&
operator^=(
const M64 &_A) {
return *
this = (Iu32vec2) _m_pxor(vec,_A); }
283 Iu32vec2& operator +=(
const I32vec2 &_A) {
return *
this = (Iu32vec2) _m_paddd(vec,_A); }
284 Iu32vec2& operator -=(
const I32vec2 &_A) {
return *
this = (Iu32vec2) _m_psubd(vec,_A); }
287 Iu32vec2
operator<<(
const M64 &_A) {
return _m_pslld(vec,_A); }
288 Iu32vec2
operator<<(
int _Count) {
return _m_pslldi(vec,_Count); }
289 Iu32vec2& operator<<=(
const M64 &_A) {
return *
this = (Iu32vec2) _m_pslld(vec,_A); }
290 Iu32vec2& operator<<=(
int _Count) {
return *
this = (Iu32vec2) _m_pslldi(vec,_Count); }
291 Iu32vec2
operator>>(
const M64 &_A) {
return _m_psrld(vec,_A); }
292 Iu32vec2
operator>>(
int _Count) {
return _m_psrldi(vec,_Count); }
293 Iu32vec2& operator>>=(
const M64 &_A) {
return *
this = (Iu32vec2) _m_psrld(vec,_A); }
294 Iu32vec2& operator>>=(
int _Count) {
return *
this = (Iu32vec2) _m_psrldi(vec,_Count); }
296 #if defined (_ENABLE_VEC_DEBUG)
307 const unsigned int& operator[](
int _I)
const
314 unsigned int& operator[](
int _I)
322 inline Iu32vec2
cmpeq(
const Iu32vec2 &_A,
const Iu32vec2 &_B) {
return _m_pcmpeqd(_A,_B); }
323 inline Iu32vec2
cmpneq(
const Iu32vec2 &_A,
const Iu32vec2 &_B) {
return _m_pandn(_m_pcmpeqd(_A,_B), _mmx_all_ones); }
325 inline Iu32vec2
unpack_low(
const Iu32vec2 &_A,
const Iu32vec2 &_B) {
return _m_punpckldq(_A,_B); }
326 inline Iu32vec2
unpack_high(
const Iu32vec2 &_A,
const Iu32vec2 &_B) {
return _m_punpckhdq(_A,_B); }
331 class I16vec4 :
public M64
335 I16vec4(
__m64 _Mm) : M64(_Mm) { }
336 I16vec4(
short _I0,
short _I1,
short _I2,
short _I3)
338 vec = _mm_set_pi16(_I0, _I1, _I2, _I3);
340 explicit I16vec4(__int64 _I) : M64 (_I) { }
341 explicit I16vec4(
int _I) : M64 (_I) { }
344 I16vec4& operator= (
const M64 &_A) {
return *
this = (I16vec4) _A; }
347 I16vec4&
operator&=(
const M64 &_A) {
return *
this = (I16vec4) _m_pand(vec,_A); }
348 I16vec4&
operator|=(
const M64 &_A) {
return *
this = (I16vec4) _m_por(vec,_A); }
349 I16vec4&
operator^=(
const M64 &_A) {
return *
this = (I16vec4) _m_pxor(vec,_A); }
352 I16vec4& operator +=(
const I16vec4 &_A) {
return *
this = (I16vec4)_m_paddw(vec,_A); }
353 I16vec4& operator -=(
const I16vec4 &_A) {
return *
this = (I16vec4)_m_psubw(vec,_A); }
354 I16vec4& operator *=(
const I16vec4 &_A) {
return *
this = (I16vec4)_m_pmullw(vec,_A); }
357 I16vec4
operator<<(
const I16vec4 &_A) {
return _m_psllw(vec,_A); }
358 I16vec4
operator<<(
int _Count) {
return _m_psllwi(vec,_Count); }
359 I16vec4& operator<<=(
const I16vec4 &_A) {
return *
this = (I16vec4)_m_psllw(vec,_A); }
360 I16vec4& operator<<=(
int _Count) {
return *
this = (I16vec4)_m_psllwi(vec,_Count); }
363 inline I16vec4
operator*(
const I16vec4 &_A,
const I16vec4 &_B) {
return _m_pmullw(_A,_B); }
364 inline I16vec4
cmpeq(
const I16vec4 &_A,
const I16vec4 &_B) {
return _m_pcmpeqw(_A,_B); }
365 inline I16vec4
cmpneq(
const I16vec4 &_A,
const I16vec4 &_B) {
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
367 inline I16vec4
unpack_low(
const I16vec4 &_A,
const I16vec4 &_B) {
return _m_punpcklwd(_A,_B); }
368 inline I16vec4
unpack_high(
const I16vec4 &_A,
const I16vec4 &_B) {
return _m_punpckhwd(_A,_B); }
373 class Is16vec4 :
public I16vec4
377 Is16vec4(
__m64 _Mm) : I16vec4(_Mm) { }
378 Is16vec4(
short _I0,
short _I1,
short _I2,
short _I3) : I16vec4(_I0, _I1, _I2, _I3) { }
379 explicit Is16vec4(__int64 _I) : I16vec4 (_I) { }
380 explicit Is16vec4(
int _I) : I16vec4 (_I) { }
383 Is16vec4& operator= (
const M64 &_A) {
return *
this = (Is16vec4) _A; }
386 Is16vec4&
operator&=(
const M64 &_A) {
return *
this = (Is16vec4) _m_pand(vec,_A); }
387 Is16vec4&
operator|=(
const M64 &_A) {
return *
this = (Is16vec4) _m_por(vec,_A); }
388 Is16vec4&
operator^=(
const M64 &_A) {
return *
this = (Is16vec4) _m_pxor(vec,_A); }
391 Is16vec4& operator +=(
const I16vec4 &_A) {
return *
this = (Is16vec4)_m_paddw(vec,_A); }
392 Is16vec4& operator -=(
const I16vec4 &_A) {
return *
this = (Is16vec4)_m_psubw(vec,_A); }
393 Is16vec4& operator *=(
const I16vec4 &_A) {
return *
this = (Is16vec4)_m_pmullw(vec,_A); }
396 Is16vec4
operator<<(
const M64 &_A) {
return _m_psllw(vec,_A); }
397 Is16vec4
operator<<(
int _Count) {
return _m_psllwi(vec,_Count); }
398 Is16vec4& operator<<=(
const M64 &_A) {
return *
this = (Is16vec4)_m_psllw(vec,_A); }
399 Is16vec4& operator<<=(
int _Count) {
return *
this = (Is16vec4)_m_psllwi(vec,_Count); }
401 Is16vec4
operator>>(
const M64 &_A) {
return _m_psraw(vec,_A); }
402 Is16vec4
operator>>(
int _Count) {
return _m_psrawi(vec,_Count); }
403 Is16vec4& operator>>=(
const M64 &_A) {
return *
this = (Is16vec4) _m_psraw(vec,_A); }
404 Is16vec4& operator>>=(
int _Count) {
return *
this = (Is16vec4) _m_psrawi(vec,_Count); }
406 #if defined (_ENABLE_VEC_DEBUG)
410 _Os <<
"[3]:" <<
_MM_4W(3,_A)
411 <<
" [2]:" <<
_MM_4W(2,_A)
412 <<
" [1]:" <<
_MM_4W(1,_A)
413 <<
" [0]:" <<
_MM_4W(0,_A);
419 const short& operator[](
int _I)
const
426 short& operator[](
int _I)
433 inline Is16vec4
operator*(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pmullw(_A,_B); }
436 inline Is16vec4
cmpeq(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pcmpeqw(_A,_B); }
437 inline Is16vec4
cmpneq(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
438 inline Is16vec4
cmpgt(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pcmpgtw(_A,_B); }
439 inline Is16vec4
cmplt(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pcmpgtw(_B,_A); }
440 inline Is16vec4 cmple(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pandn(_m_pcmpgtw(_A,_B), _mmx_all_ones); }
441 inline Is16vec4 cmpge(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pandn(_m_pcmpgtw(_B,_A), _mmx_all_ones); }
443 inline Is16vec4
unpack_low(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_punpcklwd(_A,_B); }
444 inline Is16vec4
unpack_high(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_punpckhwd(_A,_B); }
446 inline Is16vec4
sat_add(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_paddsw(_A,_B); }
447 inline Is16vec4
sat_sub(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_psubsw(_A,_B); }
448 inline Is16vec4
mul_high(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pmulhw(_A,_B); }
449 inline Is32vec2
mul_add(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pmaddwd(_A,_B);}
455 class Iu16vec4 :
public I16vec4
459 Iu16vec4(
__m64 _Mm) : I16vec4(_Mm) { }
460 Iu16vec4(
unsigned short _Ui0,
unsigned short _Ui1,
461 unsigned short _Ui2,
unsigned short _Ui3)
462 : I16vec4(_Ui0, _Ui1, _Ui2, _Ui3) { }
463 explicit Iu16vec4(__int64 _I) : I16vec4 (_I) { }
464 explicit Iu16vec4(
int _I) : I16vec4 (_I) { }
467 Iu16vec4& operator= (
const M64 &_A) {
return *
this = (Iu16vec4) _A; }
470 Iu16vec4&
operator&=(
const M64 &_A) {
return *
this = (Iu16vec4) _m_pand(vec,_A); }
471 Iu16vec4&
operator|=(
const M64 &_A) {
return *
this = (Iu16vec4) _m_por(vec,_A); }
472 Iu16vec4&
operator^=(
const M64 &_A) {
return *
this = (Iu16vec4) _m_pxor(vec,_A); }
475 Iu16vec4& operator +=(
const I16vec4 &_A) {
return *
this = (Iu16vec4)_m_paddw(vec,_A); }
476 Iu16vec4& operator -=(
const I16vec4 &_A) {
return *
this = (Iu16vec4)_m_psubw(vec,_A); }
477 Iu16vec4& operator *=(
const I16vec4 &_A) {
return *
this = (Iu16vec4)_m_pmullw(vec,_A); }
480 Iu16vec4
operator<<(
const M64 &_A) {
return _m_psllw(vec,_A); }
481 Iu16vec4
operator<<(
int _Count) {
return _m_psllwi(vec,_Count); }
482 Iu16vec4& operator<<=(
const M64 &_A) {
return *
this = (Iu16vec4)_m_psllw(vec,_A); }
483 Iu16vec4& operator<<=(
int _Count) {
return *
this = (Iu16vec4)_m_psllwi(vec,_Count); }
484 Iu16vec4
operator>>(
const M64 &_A) {
return _m_psrlw(vec,_A); }
485 Iu16vec4
operator>>(
int _Count) {
return _m_psrlwi(vec,_Count); }
486 Iu16vec4& operator>>=(
const M64 &_A) {
return *
this = (Iu16vec4) _m_psrlw(vec,_A); }
487 Iu16vec4& operator>>=(
int _Count) {
return *
this = (Iu16vec4) _m_psrlwi(vec,_Count); }
489 #if defined (_ENABLE_VEC_DEBUG)
502 const unsigned short& operator[](
int _I)
const
509 unsigned short& operator[](
int _I)
516 inline Iu16vec4
operator*(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_pmullw(_A,_B); }
517 inline Iu16vec4
cmpeq(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_pcmpeqw(_A,_B); }
518 inline Iu16vec4
cmpneq(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_pandn(_m_pcmpeqw(_A,_B), _mmx_all_ones); }
520 inline Iu16vec4
sat_add(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_paddusw(_A,_B); }
521 inline Iu16vec4
sat_sub(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_psubusw(_A,_B); }
523 inline Iu16vec4
unpack_low(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_punpcklwd(_A,_B); }
524 inline Iu16vec4
unpack_high(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_punpckhwd(_A,_B); }
529 class I8vec8 :
public M64
533 I8vec8(
__m64 _Mm) : M64(_Mm) { }
534 I8vec8(
char _S0,
char _S1,
char _S2,
char _S3,
char _S4,
char _S5,
char _S6,
char _S7)
536 vec = _mm_set_pi8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7);
538 explicit I8vec8(__int64 _I) : M64 (_I) { }
539 explicit I8vec8(
int _I) : M64 (_I) { }
542 I8vec8& operator= (
const M64 &_A) {
return *
this = (I8vec8) _A; }
545 I8vec8&
operator&=(
const M64 &_A) {
return *
this = (I8vec8) _m_pand(vec,_A); }
546 I8vec8&
operator|=(
const M64 &_A) {
return *
this = (I8vec8) _m_por(vec,_A); }
547 I8vec8&
operator^=(
const M64 &_A) {
return *
this = (I8vec8) _m_pxor(vec,_A); }
550 I8vec8& operator +=(
const I8vec8 &_A) {
return *
this = (I8vec8) _m_paddb(vec,_A); }
551 I8vec8& operator -=(
const I8vec8 &_A) {
return *
this = (I8vec8) _m_psubb(vec,_A); }
555 inline I8vec8
cmpeq(
const I8vec8 &_A,
const I8vec8 &_B) {
return _m_pcmpeqb(_A,_B); }
556 inline I8vec8
cmpneq(
const I8vec8 &_A,
const I8vec8 &_B) {
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
558 inline I8vec8
unpack_low(
const I8vec8 &_A,
const I8vec8 &_B) {
return _m_punpcklbw(_A,_B); }
559 inline I8vec8
unpack_high(
const I8vec8 &_A,
const I8vec8 &_B) {
return _m_punpckhbw(_A,_B); }
564 class Is8vec8 :
public I8vec8
568 Is8vec8(
__m64 _Mm) : I8vec8(_Mm) { }
569 Is8vec8(
signed char _S0,
signed char _S1,
signed char _S2,
signed char _S3,
570 signed char _S4,
signed char _S5,
signed char _S6,
signed char _S7)
571 : I8vec8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7) { }
572 explicit Is8vec8(__int64 _I) : I8vec8 (_I) { }
573 explicit Is8vec8(
int _I) : I8vec8 (_I) { }
576 Is8vec8& operator= (
const M64 &_A) {
return *
this = (Is8vec8) _A; }
579 Is8vec8&
operator&=(
const M64 &_A) {
return *
this = (Is8vec8) _m_pand(vec,_A); }
580 Is8vec8&
operator|=(
const M64 &_A) {
return *
this = (Is8vec8) _m_por(vec,_A); }
581 Is8vec8&
operator^=(
const M64 &_A) {
return *
this = (Is8vec8) _m_pxor(vec,_A); }
584 Is8vec8& operator +=(
const I8vec8 &_A) {
return *
this = (Is8vec8) _m_paddb(vec,_A); }
585 Is8vec8& operator -=(
const I8vec8 &_A) {
return *
this = (Is8vec8) _m_psubb(vec,_A); }
587 #if defined (_ENABLE_VEC_DEBUG)
591 _Os <<
"[7]:" << short(
_MM_8B(7,_A))
592 <<
" [6]:" << short(
_MM_8B(6,_A))
593 <<
" [5]:" << short(
_MM_8B(5,_A))
594 <<
" [4]:" << short(
_MM_8B(4,_A))
595 <<
" [3]:" << short(
_MM_8B(3,_A))
596 <<
" [2]:" << short(
_MM_8B(2,_A))
597 <<
" [1]:" << short(
_MM_8B(1,_A))
598 <<
" [0]:" << short(
_MM_8B(0,_A));
604 const signed char& operator[](
int _I)
const
611 signed char& operator[](
int _I)
619 inline Is8vec8
cmpeq(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pcmpeqb(_A,_B); }
620 inline Is8vec8
cmpneq(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
621 inline Is8vec8
cmpgt(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pcmpgtb(_A,_B); }
622 inline Is8vec8
cmplt(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pcmpgtb(_B,_A); }
623 inline Is8vec8 cmple(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pandn(_m_pcmpgtb(_A,_B), _mmx_all_ones); }
624 inline Is8vec8 cmpge(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_pandn(_m_pcmpgtb(_B,_A), _mmx_all_ones); }
626 inline Is8vec8
unpack_low(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_punpcklbw(_A,_B); }
627 inline Is8vec8
unpack_high(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_punpckhbw(_A,_B); }
629 inline Is8vec8
sat_add(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_paddsb(_A,_B); }
630 inline Is8vec8
sat_sub(
const Is8vec8 &_A,
const Is8vec8 &_B) {
return _m_psubsb(_A,_B); }
635 class Iu8vec8 :
public I8vec8
639 Iu8vec8(
__m64 _Mm) : I8vec8(_Mm) { }
640 Iu8vec8(
unsigned char _S0,
unsigned char _S1,
unsigned char _S2,
641 unsigned char _S3,
unsigned char _S4,
unsigned char _S5,
642 unsigned char _S6,
unsigned char _S7)
643 : I8vec8(_S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7) { }
644 explicit Iu8vec8(__int64 _I) : I8vec8 (_I) { }
645 explicit Iu8vec8(
int _I) : I8vec8 (_I) { }
648 Iu8vec8& operator= (
const M64 &_A) {
return *
this = (Iu8vec8) _A; }
650 Iu8vec8&
operator&=(
const M64 &_A) {
return *
this = (Iu8vec8) _m_pand(vec,_A); }
651 Iu8vec8&
operator|=(
const M64 &_A) {
return *
this = (Iu8vec8) _m_por(vec,_A); }
652 Iu8vec8&
operator^=(
const M64 &_A) {
return *
this = (Iu8vec8) _m_pxor(vec,_A); }
654 Iu8vec8& operator +=(
const I8vec8 &_A) {
return *
this = (Iu8vec8) _m_paddb(vec,_A); }
655 Iu8vec8& operator -=(
const I8vec8 &_A) {
return *
this = (Iu8vec8) _m_psubb(vec,_A); }
657 #if defined (_ENABLE_VEC_DEBUG)
661 _Os <<
"[7]:" << (
unsigned short) (
_MM_8UB(7,_A))
662 <<
" [6]:" << (
unsigned short) (
_MM_8UB(6,_A))
663 <<
" [5]:" << (
unsigned short) (
_MM_8UB(5,_A))
664 <<
" [4]:" << (
unsigned short) (
_MM_8UB(4,_A))
665 <<
" [3]:" << (
unsigned short) (
_MM_8UB(3,_A))
666 <<
" [2]:" << (
unsigned short) (
_MM_8UB(2,_A))
667 <<
" [1]:" << (
unsigned short) (
_MM_8UB(1,_A))
668 <<
" [0]:" << (
unsigned short) (
_MM_8UB(0,_A));
674 const unsigned char& operator[](
int _I)
const
681 unsigned char& operator[](
int _I)
689 inline Iu8vec8
cmpeq(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_pcmpeqb(_A,_B); }
690 inline Iu8vec8
cmpneq(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_pandn(_m_pcmpeqb(_A,_B), _mmx_all_ones); }
692 inline Iu8vec8
unpack_low(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_punpcklbw(_A,_B); }
693 inline Iu8vec8
unpack_high(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_punpckhbw(_A,_B); }
695 inline Iu8vec8
sat_add(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_paddusb(_A,_B); }
696 inline Iu8vec8
sat_sub(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_psubusb(_A,_B); }
698 inline Is16vec4
pack_sat(
const Is32vec2 &_A,
const Is32vec2 &_B) {
return _m_packssdw(_A,_B); }
699 inline Is8vec8
pack_sat(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_packsswb(_A,_B); }
700 inline Iu8vec8
packu_sat(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_packuswb(_A,_B); }
703 #define IVEC_LOGICALS(vect,element) \
704 inline I##vect##vec##element operator& (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
705 { return _m_pand( _A,_B); } \
706 inline I##vect##vec##element operator| (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
707 { return _m_por( _A,_B); } \
708 inline I##vect##vec##element operator^ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
709 { return _m_pxor( _A,_B); } \
710 inline I##vect##vec##element andnot (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
711 { return _m_pandn( _A,_B); }
726 #define IVEC_ADD_SUB(vect,element,opsize) \
727 inline I##vect##vec##element operator+ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
728 { return _m_padd##opsize( _A,_B); } \
729 inline I##vect##vec##element operator- (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
730 { return _m_psub##opsize( _A,_B); }
733 IVEC_ADD_SUB(u8,8, b)
734 IVEC_ADD_SUB(s8,8, b)
735 IVEC_ADD_SUB(16,4, w)
736 IVEC_ADD_SUB(u16,4, w)
737 IVEC_ADD_SUB(s16,4, w)
738 IVEC_ADD_SUB(32,2, d)
739 IVEC_ADD_SUB(u32,2, d)
740 IVEC_ADD_SUB(s32,2, d)
752 #define IVEC_SELECT(vect12,vect34,element,selop) \
753 inline I##vect34##vec##element select_##selop ( \
754 const I##vect12##vec##element &_A, \
755 const I##vect12##vec##element &_B, \
756 const I##vect34##vec##element &_C, \
757 const I##vect34##vec##element &_D) \
759 I##vect12##vec##element _Mask = cmp##selop(_A,_B); \
760 return( (I##vect34##vec##element)(_Mask &_C ) | \
761 (I##vect34##vec##element)((_m_pandn(_Mask, _D )))); \
764 IVEC_SELECT(8,s8,8,eq)
765 IVEC_SELECT(8,u8,8,eq)
766 IVEC_SELECT(8,8,8,eq)
767 IVEC_SELECT(8,s8,8,neq)
768 IVEC_SELECT(8,u8,8,neq)
769 IVEC_SELECT(8,8,8,neq)
771 IVEC_SELECT(16,s16,4,eq)
772 IVEC_SELECT(16,u16,4,eq)
773 IVEC_SELECT(16,16,4,eq)
774 IVEC_SELECT(16,s16,4,neq)
775 IVEC_SELECT(16,u16,4,neq)
776 IVEC_SELECT(16,16,4,neq)
778 IVEC_SELECT(32,s32,2,eq)
779 IVEC_SELECT(32,u32,2,eq)
780 IVEC_SELECT(32,32,2,eq)
781 IVEC_SELECT(32,s32,2,neq)
782 IVEC_SELECT(32,u32,2,neq)
783 IVEC_SELECT(32,32,2,neq)
786 IVEC_SELECT(s8,s8,8,gt)
787 IVEC_SELECT(s8,u8,8,gt)
788 IVEC_SELECT(s8,8,8,gt)
789 IVEC_SELECT(s8,s8,8,lt)
790 IVEC_SELECT(s8,u8,8,lt)
791 IVEC_SELECT(s8,8,8,lt)
792 IVEC_SELECT(s8,s8,8,le)
793 IVEC_SELECT(s8,u8,8,le)
794 IVEC_SELECT(s8,8,8,le)
795 IVEC_SELECT(s8,s8,8,ge)
796 IVEC_SELECT(s8,u8,8,ge)
797 IVEC_SELECT(s8,8,8,ge)
799 IVEC_SELECT(s16,s16,4,gt)
800 IVEC_SELECT(s16,u16,4,gt)
801 IVEC_SELECT(s16,16,4,gt)
802 IVEC_SELECT(s16,s16,4,lt)
803 IVEC_SELECT(s16,u16,4,lt)
804 IVEC_SELECT(s16,16,4,lt)
805 IVEC_SELECT(s16,s16,4,le)
806 IVEC_SELECT(s16,u16,4,le)
807 IVEC_SELECT(s16,16,4,le)
808 IVEC_SELECT(s16,s16,4,ge)
809 IVEC_SELECT(s16,u16,4,ge)
810 IVEC_SELECT(s16,16,4,ge)
812 IVEC_SELECT(s32,s32,2,gt)
813 IVEC_SELECT(s32,u32,2,gt)
814 IVEC_SELECT(s32,32,2,gt)
815 IVEC_SELECT(s32,s32,2,lt)
816 IVEC_SELECT(s32,u32,2,lt)
817 IVEC_SELECT(s32,32,2,lt)
818 IVEC_SELECT(s32,s32,2,le)
819 IVEC_SELECT(s32,u32,2,le)
820 IVEC_SELECT(s32,32,2,le)
821 IVEC_SELECT(s32,s32,2,ge)
822 IVEC_SELECT(s32,u32,2,ge)
823 IVEC_SELECT(s32,32,2,ge)
828 inline static void empty(
void) { _m_empty(); }
830 #endif // defined(_M_IX86)
832 #if defined (_SILENCE_IVEC_C4799)
uint_2 operator|(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22852
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
#define _MM_4UW(element, vector)
Definition: ivec.h:80
#define _MM_8B(element, vector)
Definition: ivec.h:78
unsigned int _Count
Definition: xcomplex:668
Is32vec4 cmplt(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:325
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator*(const _Tuple_type< _Rank > &_Lhs, typename _Tuple_type< _Rank >::value_type _Rhs) __GPU
Definition: amp.h:890
I64vec2 unpack_high(const I64vec2 &_A, const I64vec2 &_B)
Definition: dvec.h:223
Is16vec8 mul_high(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:521
uint_2 operator^(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22845
#define _CRTIMP
Definition: vcruntime.h:37
Iu8vec16 packu_sat(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:812
Is16vec8 pack_sat(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:810
launch & operator^=(launch &_Left, launch _Right)
Definition: future:87
#define _In_z_
Definition: sal.h:310
#define _In_
Definition: sal.h:305
I64vec2 unpack_low(const I64vec2 &_A, const I64vec2 &_B)
Definition: dvec.h:222
Is32vec4 mul_add(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:522
__m64
Definition: mmintrin.h:45
Is16vec8 sat_add(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:524
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:679
launch & operator&=(launch &_Left, launch _Right)
Definition: future:75
#define _MM_4W(element, vector)
Definition: ivec.h:81
M128 andnot(const M128 &_A, const M128 &_B)
Definition: dvec.h:147
#define _MM_2DW(element, vector)
Definition: ivec.h:84
uint_2 operator>>(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22873
constexpr auto empty(const _Container &_Cont) -> decltype(_Cont.empty())
Definition: xutility:1492
#define _VEC_ASSERT(_Expression)
Definition: ivec.h:45
I32vec4 cmpneq(const I32vec4 &_A, const I32vec4 &_B)
Definition: dvec.h:256
I32vec4 cmpeq(const I32vec4 &_A, const I32vec4 &_B)
Definition: dvec.h:255
uint_2 operator&(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22859
#define _MM_2UDW(element, vector)
Definition: ivec.h:83
#define _MM_8UB(element, vector)
Definition: ivec.h:77
Is32vec4 cmpgt(const Is32vec4 &_A, const Is32vec4 &_B)
Definition: dvec.h:324
launch & operator|=(launch &_Left, launch _Right)
Definition: future:81
Is16vec8 sat_sub(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:525