29 #ifndef _FVEC_H_INCLUDED
30 #define _FVEC_H_INCLUDED
33 #if !defined __cplusplus
34 #error ERROR: This file is only supported in C++ compilations!
37 #if defined (_M_CEE_PURE)
38 #error ERROR: This file is not supported in the pure mode!
47 #define _VEC_ASSERT(_Expression) ((void)0)
59 #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
64 #if defined (_ENABLE_VEC_DEBUG)
69 #pragma pack(push,_CRT_PACKING)
80 #define _f32vec4_abs_mask ((F32vec4)__f32vec4_abs_mask_cheat.m)
162 #pragma warning(push)
163 #pragma warning(disable : 4640)
166 static const F32vec4 fvecf0pt5(0.5f);
167 static const F32vec4 fvecf3pt0(3.0f);
169 return (fvecf0pt5 * Ra0) * (fvecf3pt0 - (a * Ra0) * Ra0);
177 #define Fvec32s4_COMP(op) \
178 friend F32vec4 cmp##op (const F32vec4 &a, const F32vec4 &b) { return _mm_cmp##op##_ps(a,b); }
199 #if defined (_ENABLE_VEC_DEBUG)
204 float *fp = (
float*)&a;
205 os <<
"[3]:" << *(fp+3)
206 <<
" [2]:" << *(fp+2)
207 <<
" [1]:" << *(fp+1)
217 float *fp = (
float*)&vec;
225 float *fp = (
float*)&vec;
271 #define Fvec32s4_SELECT(op) \
272 inline F32vec4 select_##op (const F32vec4 &a, const F32vec4 &b, const F32vec4 &c, const F32vec4 &d) \
274 F32vec4 mask = _mm_cmp##op##_ps(a,b); \
275 return( (mask & c) | F32vec4((_mm_andnot_ps(mask,d)))); \
287 #undef Fvec32s4_SELECT
412 #pragma warning(push)
413 #pragma warning(disable : 4640)
416 static const F32vec1 fvecf0pt5(0.5f);
417 static const F32vec1 fvecf3pt0(3.0f);
419 return (fvecf0pt5 * Ra0) * (fvecf3pt0 - (a * Ra0) * Ra0);
427 #define Fvec32s1_COMP(op) \
428 friend F32vec1 cmp##op (const F32vec1 &a, const F32vec1 &b) { return _mm_cmp##op##_ss(a,b); }
446 #if defined (_ENABLE_VEC_DEBUG)
451 float *fp = (
float*)&a;
452 os <<
"float:" << *fp;
470 #define Fvec32s1_SELECT(op) \
471 inline F32vec1 select_##op (const F32vec1 &a, const F32vec1 &b, const F32vec1 &c, const F32vec1 &d) \
473 F32vec1 mask = _mm_cmp##op##_ss(a,b); \
474 return( (mask & c) | F32vec1((_mm_andnot_ps(mask,d)))); \
486 #undef Fvec32s1_SELECT
491 inline int F32vec1ToInt(
const F32vec1 &a)
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
#define Fvec32s4_SELECT(op)
Definition: fvec.h:271
F32vec1 & operator*=(const F32vec1 &a)
Definition: fvec.h:388
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void store_nta(float *p, const F32vec4 &a)
Definition: fvec.h:257
#define _mm_avg_pu16
Definition: xmmintrin.h:360
#define Fvec32s1_SELECT(op)
Definition: fvec.h:470
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
int move_mask(const F32vec4 &a)
Definition: fvec.h:241
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
void mask_move(const I8vec8 &a, const I8vec8 &b, char *addr)
Definition: fvec.h:308
void _mm_stream_pi(__m64 *, __m64)
const F32vec4 & b
Definition: fvec.h:192
void _mm_storeu_ps(float *_V, __m128 _A)
F32vec1()
Definition: fvec.h:359
friend F32vec4 rsqrt_nr(const F32vec4 &a)
Definition: fvec.h:164
Fvec32s1_COMP(eq) Fvec32s1_COMP(lt) Fvec32s1_COMP(le) Fvec32s1_COMP(gt) Fvec32s1_COMP(ge) Fvec32s1_COMP(neq) Fvec32s1_COMP(nlt) Fvec32s1_COMP(nle) Fvec32s1_COMP(ngt) Fvec32s1_COMP(nge) friend F32vec1 simd_min(const F32vec1 &a
friend F32vec1 simd_max(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:443
#define _CRTIMP
Definition: crtdefs.h:23
F32vec4 & operator/=(const F32vec4 &a)
Definition: fvec.h:132
__m64 _mm_cvtt_ps2pi(__m128 _A)
friend F32vec1 operator|(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:377
F32vec4 unpack_high(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:237
friend F32vec4 rsqrt(const F32vec4 &a)
Definition: fvec.h:150
__m128 _mm_sqrt_ss(__m128 _A)
Iu16vec4 simd_avg(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:298
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
F32vec4 & operator|=(const F32vec4 &a)
Definition: fvec.h:134
friend F32vec4 rcp_nr(const F32vec4 &a)
Definition: fvec.h:154
F32vec1 & operator&=(const F32vec1 &a)
Definition: fvec.h:390
__m128 _mm_cvt_pi2ps(__m128, __m64)
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
F32vec1 & operator-=(const F32vec1 &a)
Definition: fvec.h:387
friend F32vec1 operator&(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:376
void _m_maskmovq(__m64, __m64, char *)
__m128 _mm_loadu_ps(float const *_A)
friend F32vec4 operator&(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:117
F32vec1 & operator/=(const F32vec1 &a)
Definition: fvec.h:389
friend F32vec1 sqrt(const F32vec1 &a)
Definition: fvec.h:396
__m128 _mm_add_ps(__m128 _A, __m128 _B)
F32vec4 & operator+=(const F32vec4 &a)
Definition: fvec.h:129
F32vec4 & operator^=(const F32vec4 &a)
Definition: fvec.h:135
F32vec1 & operator|=(const F32vec1 &a)
Definition: fvec.h:391
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
friend F32vec4 simd_max(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:193
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
friend F32vec1 operator^(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:378
__m128 _mm_set_ss(float _A)
F32vec4 & operator-=(const F32vec4 &a)
Definition: fvec.h:130
EXPLICIT F32vec4(double d)
Definition: fvec.h:101
friend F32vec4 operator+(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:122
friend F32vec1 rcp(const F32vec1 &a)
Definition: fvec.h:398
F32vec4 Is32vec2ToF32vec4(const F32vec4 &a, const Is32vec2 &b)
Definition: fvec.h:344
Iu16vec4 mul_high(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:305
friend F32vec1 rcp_nr(const F32vec1 &a)
Definition: fvec.h:404
#define _In_z_
Definition: sal.h:319
#define _In_
Definition: sal.h:314
friend F32vec4 rcp(const F32vec4 &a)
Definition: fvec.h:148
__m128 _mm_rcp_ps(__m128 _A)
friend F32vec4 operator/(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:125
F32vec4 & operator*=(const F32vec4 &a)
Definition: fvec.h:131
friend F32vec4 operator|(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:118
__m64
Definition: mmintrin.h:42
void _mm_stream_ps(float *, __m128)
friend F32vec1 operator*(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:383
#define _f32vec4_abs_mask
Definition: fvec.h:80
EXPLICIT F32vec1(double d)
Definition: fvec.h:367
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:678
F32vec1 & operator+=(const F32vec1 &a)
Definition: fvec.h:386
#define _mm_avg_pu8
Definition: xmmintrin.h:359
EXPLICIT F32vec4(float f)
Definition: fvec.h:98
__m128 vec
Definition: fvec.h:355
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
F32vec4(__m128 m)
Definition: fvec.h:92
__m64 _m_pmaxub(__m64, __m64)
friend F32vec4 abs(const F32vec4 &a)
Definition: fvec.h:196
__m128
Definition: xmmintrin.h:70
friend F32vec1 rsqrt(const F32vec1 &a)
Definition: fvec.h:400
#define EXPLICIT
Definition: ivec.h:30
__m128 m
Definition: fvec.h:77
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
#define _VEC_ASSERT(_Expression)
Definition: fvec.h:59
float & operator[](int i)
Definition: fvec.h:221
Is32vec2 F32vec4ToIs32vec2(const F32vec4 &a)
Definition: fvec.h:324
__m128 _mm_set_ps1(float _A)
_Check_return_ _In_z_ const char _Inout_ FILE * _File
Definition: stdio.h:226
friend F32vec4 operator^(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:119
__m128 _mm_cvt_si2ss(__m128, int)
const union @91 __f32vec4_abs_mask_cheat
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
friend F32vec1 operator/(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:384
F32vec4()
Definition: fvec.h:89
__m128 _mm_min_ps(__m128 _A, __m128 _B)
friend F32vec1 operator+(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:381
friend F32vec1 rsqrt_nr(const F32vec1 &a)
Definition: fvec.h:414
int _mm_movemask_ps(__m128 _A)
__m64 _m_pminub(__m64, __m64)
F32vec4 unpack_low(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:233
int F32vec4ToInt(const F32vec4 &a)
Definition: fvec.h:316
__m128 _mm_rcp_ss(__m128 _A)
__m128 vec
Definition: fvec.h:85
const float & operator[](int i) const
Definition: fvec.h:213
__m64 _m_pmulhuw(__m64, __m64)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
friend F32vec4 operator-(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:123
friend F32vec1 operator-(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:382
F32vec4 & operator=(float f)
Definition: fvec.h:105
F32vec1 & operator^=(const F32vec1 &a)
Definition: fvec.h:392
const F32vec1 & b
Definition: fvec.h:442
F32vec4 & operator&=(const F32vec4 &a)
Definition: fvec.h:133
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
EXPLICIT F32vec1(float f)
Definition: fvec.h:364
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void storeu(float *p, const F32vec4 &a)
Definition: fvec.h:251
__m128 _mm_rsqrt_ps(__m128 _A)
Is16vec4 simd_min(const Is16vec4 &a, const Is16vec4 &b)
Definition: fvec.h:293
Iu8vec8 simd_max(const Iu8vec8 &a, const Iu8vec8 &b)
Definition: fvec.h:294
__m128 _mm_max_ss(__m128 _A, __m128 _B)
friend F32vec4 sqrt(const F32vec4 &a)
Definition: fvec.h:146
__m128 _mm_or_ps(__m128 _A, __m128 _B)
int i[4]
Definition: fvec.h:76
Fvec32s4_COMP(eq) Fvec32s4_COMP(lt) Fvec32s4_COMP(le) Fvec32s4_COMP(gt) Fvec32s4_COMP(ge) Fvec32s4_COMP(neq) Fvec32s4_COMP(nlt) Fvec32s4_COMP(nle) Fvec32s4_COMP(ngt) Fvec32s4_COMP(nge) friend F32vec4 simd_min(const F32vec4 &a
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
float _mm_cvtss_f32(__m128 _A)
F32vec1(__m128 m)
Definition: fvec.h:370
int _mm_cvtt_ss2si(__m128 _A)
void loadu(F32vec4 &a, float *p)
Definition: fvec.h:247
F32vec1(int i)
Definition: fvec.h:361
F32vec4(float f3, float f2, float f1, float f0)
Definition: fvec.h:95
F32vec4 IntToF32vec4(const F32vec4 &a, int i)
Definition: fvec.h:334
friend F32vec4 operator*(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:124
__m64 _m_pminsw(__m64, __m64)
const Is16vec4 &b return _m_pmaxsw(a, b)
friend float add_horizontal(const F32vec4 &a)
Definition: fvec.h:138