29 #ifndef _FVEC_H_INCLUDED
30 #define _FVEC_H_INCLUDED
33 #if !defined __cplusplus
34 #error ERROR: This file is only supported in C++ compilations!
37 #if defined (_M_CEE_PURE)
38 #error ERROR: This file is not supported in the pure mode!
47 #define _VEC_ASSERT(_Expression) ((void)0)
59 #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
64 #if defined (_ENABLE_VEC_DEBUG)
76 #define _f32vec4_abs_mask ((F32vec4)__f32vec4_abs_mask_cheat.m)
158 #pragma warning(push)
159 #pragma warning(disable : 4640)
162 static const F32vec4 fvecf0pt5(0.5f);
163 static const F32vec4 fvecf3pt0(3.0f);
165 return (fvecf0pt5 * _Ra0) * (fvecf3pt0 - (_A * _Ra0) * _Ra0);
173 #define Fvec32s4_COMP(op) \
174 friend F32vec4 cmp##op (const F32vec4 &_A, const F32vec4 &_B) { return _mm_cmp##op##_ps(_A,_B); }
195 #if defined (_ENABLE_VEC_DEBUG)
200 float *_Fp = (
float*)&_A;
201 _Os <<
"[3]:" << *(_Fp+3)
202 <<
" [2]:" << *(_Fp+2)
203 <<
" [1]:" << *(_Fp+1)
213 float *_Fp = (
float*)&vec;
221 float *_Fp = (
float*)&vec;
267 #define Fvec32s4_SELECT(op) \
268 inline F32vec4 select_##op (const F32vec4 &_A, const F32vec4 &_B, const F32vec4 &_C, const F32vec4 &_D) \
270 F32vec4 _Mask = _mm_cmp##op##_ps(_A,_B); \
271 return( (_Mask & _C) | F32vec4((_mm_andnot_ps(_Mask,_D)))); \
283 #undef Fvec32s4_SELECT
289 inline Is16vec4
simd_max(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pmaxsw(_A,_B);}
290 inline Is16vec4
simd_min(
const Is16vec4 &_A,
const Is16vec4 &_B) {
return _m_pminsw(_A,_B);}
291 inline Iu8vec8
simd_max(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_pmaxub(_A,_B);}
292 inline Iu8vec8
simd_min(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _m_pminub(_A,_B);}
295 inline Iu16vec4
simd_avg(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _mm_avg_pu16(_A,_B); }
296 inline Iu8vec8
simd_avg(
const Iu8vec8 &_A,
const Iu8vec8 &_B) {
return _mm_avg_pu8(_A,_B); }
299 inline int move_mask(
const I8vec8 &_A) {
return _m_pmovmskb(_A);}
302 inline Iu16vec4
mul_high(
const Iu16vec4 &_A,
const Iu16vec4 &_B) {
return _m_pmulhuw(_A,_B); }
305 inline void mask_move(
const I8vec8 &_A,
const I8vec8 &_B,
char *_Addr) { _m_maskmovq(_A, _B, _Addr); }
308 inline void store_nta(
__m64 *_P,
const M64 &_A) { _mm_stream_pi(_P,_A); }
313 inline int F32vec4ToInt(
const F32vec4 &_A)
321 inline Is32vec2 F32vec4ToIs32vec2 (
const F32vec4 &_A)
325 _Result = _mm_cvtt_ps2pi(_A);
326 return Is32vec2(_Result);
341 inline F32vec4 Is32vec2ToF32vec4(
const F32vec4 &_A,
const Is32vec2 &_B)
345 _Result = _mm_cvt_pi2ps(_A,_B);
362 explicit F32vec1(
float _F) { vec =
_mm_set_ss(_F); }
365 explicit F32vec1(
double _D) { vec =
_mm_set_ss((
float) _D); }
368 F32vec1(
__m128 _M) { vec = _M; }
371 operator __m128()
const {
return vec; }
375 friend F32vec1
operator |(
const F32vec1 &_A,
const F32vec1 &_B) {
return _mm_or_ps(_A,_B); }
384 F32vec1& operator +=(
const F32vec1 &_A) {
return *
this =
_mm_add_ss(vec,_A); }
385 F32vec1& operator -=(
const F32vec1 &_A) {
return *
this =
_mm_sub_ss(vec,_A); }
386 F32vec1& operator *=(
const F32vec1 &_A) {
return *
this =
_mm_mul_ss(vec,_A); }
387 F32vec1& operator /=(
const F32vec1 &_A) {
return *
this =
_mm_div_ss(vec,_A); }
396 friend F32vec1
rcp(
const F32vec1 &_A) {
return _mm_rcp_ss(_A); }
402 friend F32vec1 rcp_nr(
const F32vec1 &_A)
410 #pragma warning(push)
411 #pragma warning(disable : 4640)
412 friend F32vec1 rsqrt_nr(
const F32vec1 &_A)
414 static const F32vec1 fvecf0pt5(0.5f);
415 static const F32vec1 fvecf3pt0(3.0f);
417 return (fvecf0pt5 * _Ra0) * (fvecf3pt0 - (_A * _Ra0) * _Ra0);
425 #define Fvec32s1_COMP(op) \
426 friend F32vec1 cmp##op (const F32vec1 &_A, const F32vec1 &_B) { return _mm_cmp##op##_ss(_A,_B); }
440 friend F32vec1
simd_min(
const F32vec1 &_A,
const F32vec1 &_B) {
return _mm_min_ss(_A,_B); }
441 friend F32vec1
simd_max(
const F32vec1 &_A,
const F32vec1 &_B) {
return _mm_max_ss(_A,_B); }
444 #if defined (_ENABLE_VEC_DEBUG)
449 float *_Fp = (
float*)&_A;
450 _Os <<
"float:" << *_Fp;
468 #define Fvec32s1_SELECT(op) \
469 inline F32vec1 select_##op (const F32vec1 &_A, const F32vec1 &_B, const F32vec1 &_C, const F32vec1 &_D) \
471 F32vec1 _Mask = _mm_cmp##op##_ss(_A,_B); \
472 return( (_Mask & _C) | F32vec1((_mm_andnot_ps(_Mask,_D)))); \
484 #undef Fvec32s1_SELECT
489 inline int F32vec1ToInt(
const F32vec1 &_A)
F32vec4(__m128 _M)
Definition: fvec.h:88
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
#define Fvec32s4_SELECT(op)
Definition: fvec.h:267
uint_2 operator|(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22852
__m128 _mm_and_ps(__m128 _A, __m128 _B)
friend F32vec4 simd_max(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:189
#define Fvec32s1_SELECT(op)
Definition: fvec.h:468
__m128 _mm_movehl_ps(__m128, __m128)
friend F32vec4 operator&(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:113
__m128 _mm_rsqrt_ss(__m128 _A)
float rsqrt(float _X) __GPU_ONLY
Returns the reciprocal of the square root of the argument
Definition: amp_math.h:954
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
Is16vec8 simd_min(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:528
void _mm_storeu_ps(float *_V, __m128 _A)
F32vec4 unpack_high(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:233
void store_nta(float *_P, const F32vec4 &_A)
Definition: fvec.h:253
float sqrt(float _X) __GPU_ONLY
Calculates the squre root of the argument
Definition: amp_math.h:1100
friend F32vec4 sqrt(const F32vec4 &_A)
Definition: fvec.h:142
F32vec4 & operator^=(const F32vec4 &_A)
Definition: fvec.h:131
__m128 _mm_sqrt_ss(__m128 _A)
F32vec4 & operator/=(const F32vec4 &_A)
Definition: fvec.h:128
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator*(const _Tuple_type< _Rank > &_Lhs, typename _Tuple_type< _Rank >::value_type _Rhs) __GPU
Definition: amp.h:890
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
friend float add_horizontal(const F32vec4 &_A)
Definition: fvec.h:134
__m128 _mm_loadu_ps(float const *_A)
Is16vec8 mul_high(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:521
friend F32vec4 rsqrt(const F32vec4 &_A)
Definition: fvec.h:146
friend F32vec4 rsqrt_nr(const F32vec4 &_A)
Definition: fvec.h:160
uint_2 operator^(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22845
float rcp(float _X) __GPU_ONLY
Calculates a fast, approximate reciprocal of the argument
Definition: amp.h:7493
__m128 _mm_add_ps(__m128 _A, __m128 _B)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
friend F32vec4 operator*(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:120
F32vec4 & operator|=(const F32vec4 &_A)
Definition: fvec.h:130
__m128 _mm_set_ss(float _A)
friend F32vec4 operator^(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:115
launch & operator^=(launch &_Left, launch _Right)
Definition: future:87
const float & operator[](int _I) const
Definition: fvec.h:209
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator-(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:845
#define _In_z_
Definition: sal.h:310
Is16vec8 simd_max(const Is16vec8 &_A, const Is16vec8 &_B)
Definition: dvec.h:527
#define _In_
Definition: sal.h:305
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator+(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:836
friend F32vec4 operator|(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:114
__m128 _mm_rcp_ps(__m128 _A)
const F32vec4 & _B
Definition: fvec.h:188
void storeu(float *_P, const F32vec4 &_A)
Definition: fvec.h:247
__m64
Definition: mmintrin.h:45
void _mm_stream_ps(float *, __m128)
friend F32vec4 operator-(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:119
friend F32vec4 operator+(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:118
int move_mask(const F32vec4 &_A)
Definition: fvec.h:237
friend F32vec4 rcp_nr(const F32vec4 &_A)
Definition: fvec.h:150
#define _f32vec4_abs_mask
Definition: fvec.h:76
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:679
friend F32vec4 abs(const F32vec4 &_A)
Definition: fvec.h:192
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
launch & operator&=(launch &_Left, launch _Right)
Definition: future:75
__m128
Definition: xmmintrin.h:75
F32vec4 & operator=(float _F)
Definition: fvec.h:101
__m128 m
Definition: fvec.h:73
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
#define _VEC_ASSERT(_Expression)
Definition: fvec.h:59
__m128 _mm_set_ps1(float _A)
Iu16vec8 simd_avg(const Iu16vec8 &_A, const Iu16vec8 &_B)
Definition: dvec.h:609
__m128 _mm_cvt_si2ss(__m128, int)
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
float & operator[](int _I)
Definition: fvec.h:217
F32vec4()
Definition: fvec.h:85
__m128 _mm_min_ps(__m128 _A, __m128 _B)
int _mm_movemask_ps(__m128 _A)
friend F32vec4 operator/(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:121
F32vec4 & operator+=(const F32vec4 &_A)
Definition: fvec.h:125
__m128 _mm_rcp_ss(__m128 _A)
F32vec4(double _D)
Definition: fvec.h:97
__m128 vec
Definition: fvec.h:81
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
F32vec4 & operator-=(const F32vec4 &_A)
Definition: fvec.h:126
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
__m128 _mm_div_ss(__m128 _A, __m128 _B)
__m128 _mm_rsqrt_ps(__m128 _A)
uint_2 operator&(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22859
__m128 _mm_max_ss(__m128 _A, __m128 _B)
__m128 _mm_or_ps(__m128 _A, __m128 _B)
F32vec4 & operator&=(const F32vec4 &_A)
Definition: fvec.h:129
int i[4]
Definition: fvec.h:72
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
float _mm_cvtss_f32(__m128 _A)
void loadu(F32vec4 &_A, float *_P)
Definition: fvec.h:243
friend F32vec4 rcp(const F32vec4 &_A)
Definition: fvec.h:144
F32vec4 unpack_low(const F32vec4 &_A, const F32vec4 &_B)
Definition: fvec.h:229
F32vec4(float _F3, float _F2, float _F1, float _F0)
Definition: fvec.h:91
int _mm_cvtt_ss2si(__m128 _A)
Fvec32s4_COMP(eq) Fvec32s4_COMP(lt) Fvec32s4_COMP(le) Fvec32s4_COMP(gt) Fvec32s4_COMP(ge) Fvec32s4_COMP(neq) Fvec32s4_COMP(nlt) Fvec32s4_COMP(nle) Fvec32s4_COMP(ngt) Fvec32s4_COMP(nge) friend F32vec4 simd_min(const F32vec4 &_A
#define Fvec32s1_COMP(op)
launch & operator|=(launch &_Left, launch _Right)
Definition: future:81
const union @87 __f32vec4_abs_mask_cheat
F32vec4(float _F)
Definition: fvec.h:94
F32vec4 & operator*=(const F32vec4 &_A)
Definition: fvec.h:127
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator/(const _Tuple_type< _Rank > &_Lhs, typename _Tuple_type< _Rank >::value_type _Rhs) __GPU
Definition: amp.h:908