STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
fvec.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-2011 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * Definition of a C++ class interface to Streaming SIMD Extension intrinsics.
13  *
14  *
15  * File name : fvec.h Fvec class definitions
16  *
17  * Concept: A C++ abstraction of Streaming SIMD Extensions designed to improve
18  *
19  * programmer productivity. Speed and accuracy are sacrificed for utility.
20  *
21  * Facilitates an easy transition to compiler intrinsics
22  *
23  * or assembly language.
24  *
25  * F32vec4: 4 packed single precision
26  * 32-bit floating point numbers
27 */
28 
29 #ifndef _FVEC_H_INCLUDED
30 #define _FVEC_H_INCLUDED
31 #ifndef RC_INVOKED
32 
33 #if !defined __cplusplus
34  #error ERROR: This file is only supported in C++ compilations!
35 #endif /* !defined __cplusplus */
36 
37 #if defined (_M_CEE_PURE)
38  #error ERROR: This file is not supported in the pure mode!
39 #else /* defined (_M_CEE_PURE) */
40 
41 #include <xmmintrin.h> /* SSE Intrinsic function definition include file */
42 #include <ivec.h>
43 #include <crtdefs.h>
44 
45 #ifndef _VEC_ASSERT
46 #ifdef NDEBUG
47  #define _VEC_ASSERT(_Expression) ((void)0)
48 #else /* NDEBUG */
49 #ifdef __cplusplus
50  extern "C" {
51 #endif /* __cplusplus */
52 
53  _CRTIMP void __cdecl _wassert(_In_z_ const wchar_t * _Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line);
54 
55 #ifdef __cplusplus
56  }
57 #endif /* __cplusplus */
58 
59  #define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
60 #endif /* NDEBUG */
61 #endif /* _VEC_ASSERT */
62 
63 /* Define _ENABLE_VEC_DEBUG to enable std::ostream inserters for debug output */
64 #if defined (_ENABLE_VEC_DEBUG)
65  #include <iostream>
66 #endif /* defined (_ENABLE_VEC_DEBUG) */
67 
68 #ifdef _MSC_VER
69 #pragma pack(push,_CRT_PACKING)
70 #endif /* _MSC_VER */
71 
72 #pragma pack(push,16) /* Must ensure class & union 16-B aligned */
73 
74 const union
75 {
76  int i[4];
78 } __f32vec4_abs_mask_cheat = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
79 
80 #define _f32vec4_abs_mask ((F32vec4)__f32vec4_abs_mask_cheat.m)
81 
82 class F32vec4
83 {
84 protected:
86 public:
87 
88  /* Constructors: __m128, 4 floats, 1 float */
89  F32vec4() {}
90 
91  /* initialize 4 SP FP with __m128 data type */
92  F32vec4(__m128 m) { vec = m;}
93 
94  /* initialize 4 SP FPs with 4 floats */
95  F32vec4(float f3, float f2, float f1, float f0) { vec= _mm_set_ps(f3,f2,f1,f0); }
96 
97  /* Explicitly initialize each of 4 SP FPs with same float */
98  EXPLICIT F32vec4(float f) { vec = _mm_set_ps1(f); }
99 
100  /* Explicitly initialize each of 4 SP FPs with same double */
101  EXPLICIT F32vec4(double d) { vec = _mm_set_ps1((float) d); }
102 
103  /* Assignment operations */
104 
105  F32vec4& operator =(float f) { vec = _mm_set_ps1(f); return *this; }
106 
107  F32vec4& operator =(double d)
108  {
109  vec = _mm_set_ps1((float) d);
110  return *this;
111  }
112 
113  /* Conversion functions */
114  operator __m128() const { return vec; } /* Convert to __m128 */
115 
116  /* Logical Operators */
117  friend F32vec4 operator &(const F32vec4 &a, const F32vec4 &b) { return _mm_and_ps(a,b); }
118  friend F32vec4 operator |(const F32vec4 &a, const F32vec4 &b) { return _mm_or_ps(a,b); }
119  friend F32vec4 operator ^(const F32vec4 &a, const F32vec4 &b) { return _mm_xor_ps(a,b); }
120 
121  /* Arithmetic Operators */
122  friend F32vec4 operator +(const F32vec4 &a, const F32vec4 &b) { return _mm_add_ps(a,b); }
123  friend F32vec4 operator -(const F32vec4 &a, const F32vec4 &b) { return _mm_sub_ps(a,b); }
124  friend F32vec4 operator *(const F32vec4 &a, const F32vec4 &b) { return _mm_mul_ps(a,b); }
125  friend F32vec4 operator /(const F32vec4 &a, const F32vec4 &b) { return _mm_div_ps(a,b); }
126 
127  F32vec4& operator =(const F32vec4 &a) { vec = a.vec; return *this; }
128  F32vec4& operator =(const __m128 &avec) { vec = avec; return *this; }
129  F32vec4& operator +=(const F32vec4 &a) { return *this = _mm_add_ps(vec,a); }
130  F32vec4& operator -=(const F32vec4 &a) { return *this = _mm_sub_ps(vec,a); }
131  F32vec4& operator *=(const F32vec4 &a) { return *this = _mm_mul_ps(vec,a); }
132  F32vec4& operator /=(const F32vec4 &a) { return *this = _mm_div_ps(vec,a); }
133  F32vec4& operator &=(const F32vec4 &a) { return *this = _mm_and_ps(vec,a); }
134  F32vec4& operator |=(const F32vec4 &a) { return *this = _mm_or_ps(vec,a); }
135  F32vec4& operator ^=(const F32vec4 &a) { return *this = _mm_xor_ps(vec,a); }
136 
137  /* Horizontal Add */
138  friend float add_horizontal(const F32vec4 &a)
139  {
140  F32vec4 ftemp = _mm_add_ps(a, _mm_movehl_ps(a, a));
141  ftemp = _mm_add_ss(ftemp, _mm_shuffle_ps(ftemp, ftemp, 1));
142  return _mm_cvtss_f32(ftemp);
143  }
144 
145  /* Square Root */
146  friend F32vec4 sqrt(const F32vec4 &a) { return _mm_sqrt_ps(a); }
147  /* Reciprocal */
148  friend F32vec4 rcp(const F32vec4 &a) { return _mm_rcp_ps(a); }
149  /* Reciprocal Square Root */
150  friend F32vec4 rsqrt(const F32vec4 &a) { return _mm_rsqrt_ps(a); }
151 
152  /* NewtonRaphson Reciprocal
153  [2 * rcpps(x) - (x * rcpps(x) * rcpps(x))] */
154  friend F32vec4 rcp_nr(const F32vec4 &a)
155  {
156  F32vec4 Ra0 = _mm_rcp_ps(a);
157  return _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0));
158  }
159 
160  /* NewtonRaphson Reciprocal Square Root
161  0.5 * rsqrtps * (3 - x * rsqrtps(x) * rsqrtps(x)) */
162 #pragma warning(push)
163 #pragma warning(disable : 4640)
164  friend F32vec4 rsqrt_nr(const F32vec4 &a)
165  {
166  static const F32vec4 fvecf0pt5(0.5f);
167  static const F32vec4 fvecf3pt0(3.0f);
168  F32vec4 Ra0 = _mm_rsqrt_ps(a);
169  return (fvecf0pt5 * Ra0) * (fvecf3pt0 - (a * Ra0) * Ra0);
170  }
171 #pragma warning(pop)
172 
173  /* Compares: Mask is returned */
174  /* Macros expand to all compare intrinsics. Example:
175  friend F32vec4 cmpeq(const F32vec4 &a, const F32vec4 &b)
176  { return _mm_cmpeq_ps(a,b);} */
177  #define Fvec32s4_COMP(op) \
178  friend F32vec4 cmp##op (const F32vec4 &a, const F32vec4 &b) { return _mm_cmp##op##_ps(a,b); }
179  Fvec32s4_COMP(eq) /* expanded to cmpeq(a,b) */
180  Fvec32s4_COMP(lt) /* expanded to cmplt(a,b) */
181  Fvec32s4_COMP(le) /* expanded to cmple(a,b) */
182  Fvec32s4_COMP(gt) /* expanded to cmpgt(a,b) */
183  Fvec32s4_COMP(ge) /* expanded to cmpge(a,b) */
184  Fvec32s4_COMP(neq) /* expanded to cmpneq(a,b) */
185  Fvec32s4_COMP(nlt) /* expanded to cmpnlt(a,b) */
186  Fvec32s4_COMP(nle) /* expanded to cmpnle(a,b) */
187  Fvec32s4_COMP(ngt) /* expanded to cmpngt(a,b) */
188  Fvec32s4_COMP(nge) /* expanded to cmpnge(a,b) */
189  #undef Fvec32s4_COMP
190 
191  /* Min and Max */
192  friend F32vec4 simd_min(const F32vec4 &a, const F32vec4 &b) { return _mm_min_ps(a,b); }
193  friend F32vec4 simd_max(const F32vec4 &a, const F32vec4 &b) { return _mm_max_ps(a,b); }
194 
195  /* Absolute value */
196  friend F32vec4 abs(const F32vec4 &a) {return _mm_and_ps(a, _f32vec4_abs_mask); }
197 
198  /* Debug Features */
199 #if defined (_ENABLE_VEC_DEBUG)
200  /* Output */
201  friend std::ostream & operator<<(std::ostream & os, const F32vec4 &a)
202  {
203  /* To use: cout << "Elements of F32vec4 fvec are: " << fvec; */
204  float *fp = (float*)&a;
205  os << "[3]:" << *(fp+3)
206  << " [2]:" << *(fp+2)
207  << " [1]:" << *(fp+1)
208  << " [0]:" << *fp;
209  return os;
210  }
211 #endif /* defined (_ENABLE_VEC_DEBUG) */
212  /* Element Access Only, no modifications to elements*/
213  const float& operator[](int i) const
214  {
215  /* Assert enabled only during debug /DDEBUG */
216  _VEC_ASSERT((0 <= i) && (i <= 3)); /* User should only access elements 0-3 */
217  float *fp = (float*)&vec;
218  return *(fp+i);
219  }
220  /* Element Access and Modification*/
221  float& operator[](int i)
222  {
223  /* Assert enabled only during debug /DDEBUG */
224  _VEC_ASSERT((0 <= i) && (i <= 3)); /* User should only access elements 0-3 */
225  float *fp = (float*)&vec;
226  return *(fp+i);
227  }
228 };
229 
230  /* Miscellaneous */
231 
232 /* Interleave low order data elements of a and b into destination */
233 inline F32vec4 unpack_low(const F32vec4 &a, const F32vec4 &b)
234 { return _mm_unpacklo_ps(a, b); }
235 
236 /* Interleave high order data elements of a and b into target */
237 inline F32vec4 unpack_high(const F32vec4 &a, const F32vec4 &b)
238 { return _mm_unpackhi_ps(a, b); }
239 
240 /* Move Mask to Integer returns 4 bit mask formed of most significant bits of a */
241 inline int move_mask(const F32vec4 &a)
242 { return _mm_movemask_ps(a);}
243 
244  /* Data Motion Functions */
245 
246 /* Load Unaligned loadu_ps: Unaligned */
247 inline void loadu(F32vec4 &a, float *p)
248 { a = _mm_loadu_ps(p); }
249 
250 /* Store Temporal storeu_ps: Unaligned */
251 inline void storeu(float *p, const F32vec4 &a)
252 { _mm_storeu_ps(p, a); }
253 
254  /* Cacheability Support */
255 
256 /* Non-Temporal Store */
257 inline void store_nta(float *p, const F32vec4 &a)
258 { _mm_stream_ps(p,a);}
259 
260  /* Conditional Selects:*/
261 /*(a OP b)? c : d; where OP is any compare operator
262 Macros expand to conditional selects which use all compare intrinsics.
263 Example:
264 friend F32vec4 select_eq(const F32vec4 &a, const F32vec4 &b, const F32vec4 &c, const F32vec4 &d)
265 {
266  F32vec4 mask = _mm_cmpeq_ps(a,b);
267  return( (mask & c) | F32vec4((_mm_andnot_ps(mask,d))));
268 }
269 */
270 
271 #define Fvec32s4_SELECT(op) \
272 inline F32vec4 select_##op (const F32vec4 &a, const F32vec4 &b, const F32vec4 &c, const F32vec4 &d) \
273 { \
274  F32vec4 mask = _mm_cmp##op##_ps(a,b); \
275  return( (mask & c) | F32vec4((_mm_andnot_ps(mask,d)))); \
276 }
277 Fvec32s4_SELECT(eq) /* generates select_eq(a,b) */
278 Fvec32s4_SELECT(lt) /* generates select_lt(a,b) */
279 Fvec32s4_SELECT(le) /* generates select_le(a,b) */
280 Fvec32s4_SELECT(gt) /* generates select_gt(a,b) */
281 Fvec32s4_SELECT(ge) /* generates select_ge(a,b) */
282 Fvec32s4_SELECT(neq) /* generates select_neq(a,b) */
283 Fvec32s4_SELECT(nlt) /* generates select_nlt(a,b) */
284 Fvec32s4_SELECT(nle) /* generates select_nle(a,b) */
285 Fvec32s4_SELECT(ngt) /* generates select_ngt(a,b) */
286 Fvec32s4_SELECT(nge) /* generates select_nge(a,b) */
287 #undef Fvec32s4_SELECT
288 
289 /* Streaming SIMD Extensions Integer Intrinsic Functions */
290 
291 /* Max and Min */
292 inline Is16vec4 simd_max(const Is16vec4 &a, const Is16vec4 &b) { return _m_pmaxsw(a,b);}
293 inline Is16vec4 simd_min(const Is16vec4 &a, const Is16vec4 &b) { return _m_pminsw(a,b);}
294 inline Iu8vec8 simd_max(const Iu8vec8 &a, const Iu8vec8 &b) { return _m_pmaxub(a,b);}
295 inline Iu8vec8 simd_min(const Iu8vec8 &a, const Iu8vec8 &b) { return _m_pminub(a,b);}
296 
297 /* Average */
298 inline Iu16vec4 simd_avg(const Iu16vec4 &a, const Iu16vec4 &b) { return _mm_avg_pu16(a,b); }
299 inline Iu8vec8 simd_avg(const Iu8vec8 &a, const Iu8vec8 &b) { return _mm_avg_pu8(a,b); }
300 
301 /* Move ByteMask To Int: returns mask formed from most sig bits of each vec of a */
302 inline int move_mask(const I8vec8 &a) { return _m_pmovmskb(a);}
303 
304 /* Packed Multiply High Unsigned */
305 inline Iu16vec4 mul_high(const Iu16vec4 &a, const Iu16vec4 &b) { return _m_pmulhuw(a,b); }
306 
307 /* Byte Mask Write: Write bytes if most significant bit in each corresponding byte is set */
308 inline void mask_move(const I8vec8 &a, const I8vec8 &b, char *addr) { _m_maskmovq(a, b, addr); }
309 
310 /* Data Motion: Store Non Temporal */
311 inline void store_nta(__m64 *p, const M64 &a) { _mm_stream_pi(p,a); }
312 
313 /* Conversions between ivec <-> fvec */
314 
315 /* Convert first element of F32vec4 to int with truncation */
316 inline int F32vec4ToInt(const F32vec4 &a)
317 {
318 
319  return _mm_cvtt_ss2si(a);
320 
321 }
322 
323 /* Convert two lower SP FP values of a to Is32vec2 with truncation */
325 {
326 
327  __m64 result;
328  result = _mm_cvtt_ps2pi(a);
329  return Is32vec2(result);
330 
331 }
332 
333 /* Convert the 32-bit int i to an SP FP value; the upper three SP FP values are passed through from a. */
334 inline F32vec4 IntToF32vec4(const F32vec4 &a, int i)
335 {
336 
337  __m128 result;
338  result = _mm_cvt_si2ss(a,i);
339  return F32vec4(result);
340 
341 }
342 
343 /* Convert the two 32-bit integer values in b to two SP FP values; the upper two SP FP values are passed from a. */
344 inline F32vec4 Is32vec2ToF32vec4(const F32vec4 &a, const Is32vec2 &b)
345 {
346 
347  __m128 result;
348  result = _mm_cvt_pi2ps(a,b);
349  return F32vec4(result);
350 }
351 
352 class F32vec1
353 {
354 protected:
356 public:
357 
358  /* Constructors: 1 float */
359  F32vec1() {}
360 
361  F32vec1(int i) { vec = _mm_cvt_si2ss(vec,i);};
362 
363  /* Initialize each of 4 SP FPs with same float */
364  EXPLICIT F32vec1(float f) { vec = _mm_set_ss(f); }
365 
366  /* Initialize each of 4 SP FPs with same float */
367  EXPLICIT F32vec1(double d) { vec = _mm_set_ss((float) d); }
368 
369  /* initialize with __m128 data type */
370  F32vec1(__m128 m) { vec = m; }
371 
372  /* Conversion functions */
373  operator __m128() const { return vec; } /* Convert to float */
374 
375  /* Logical Operators */
376  friend F32vec1 operator &(const F32vec1 &a, const F32vec1 &b) { return _mm_and_ps(a,b); }
377  friend F32vec1 operator |(const F32vec1 &a, const F32vec1 &b) { return _mm_or_ps(a,b); }
378  friend F32vec1 operator ^(const F32vec1 &a, const F32vec1 &b) { return _mm_xor_ps(a,b); }
379 
380  /* Arithmetic Operators */
381  friend F32vec1 operator +(const F32vec1 &a, const F32vec1 &b) { return _mm_add_ss(a,b); }
382  friend F32vec1 operator -(const F32vec1 &a, const F32vec1 &b) { return _mm_sub_ss(a,b); }
383  friend F32vec1 operator *(const F32vec1 &a, const F32vec1 &b) { return _mm_mul_ss(a,b); }
384  friend F32vec1 operator /(const F32vec1 &a, const F32vec1 &b) { return _mm_div_ss(a,b); }
385 
386  F32vec1& operator +=(const F32vec1 &a) { return *this = _mm_add_ss(vec,a); }
387  F32vec1& operator -=(const F32vec1 &a) { return *this = _mm_sub_ss(vec,a); }
388  F32vec1& operator *=(const F32vec1 &a) { return *this = _mm_mul_ss(vec,a); }
389  F32vec1& operator /=(const F32vec1 &a) { return *this = _mm_div_ss(vec,a); }
390  F32vec1& operator &=(const F32vec1 &a) { return *this = _mm_and_ps(vec,a); }
391  F32vec1& operator |=(const F32vec1 &a) { return *this = _mm_or_ps(vec,a); }
392  F32vec1& operator ^=(const F32vec1 &a) { return *this = _mm_xor_ps(vec,a); }
393 
394 
395  /* Square Root */
396  friend F32vec1 sqrt(const F32vec1 &a) { return _mm_sqrt_ss(a); }
397  /* Reciprocal */
398  friend F32vec1 rcp(const F32vec1 &a) { return _mm_rcp_ss(a); }
399  /* Reciprocal Square Root */
400  friend F32vec1 rsqrt(const F32vec1 &a) { return _mm_rsqrt_ss(a); }
401 
402  /* NewtonRaphson Reciprocal
403  [2 * rcpss(x) - (x * rcpss(x) * rcpss(x))] */
404  friend F32vec1 rcp_nr(const F32vec1 &a)
405  {
406  F32vec1 Ra0 = _mm_rcp_ss(a);
407  return _mm_sub_ss(_mm_add_ss(Ra0, Ra0), _mm_mul_ss(_mm_mul_ss(Ra0, a), Ra0));
408  }
409 
410  /* NewtonRaphson Reciprocal Square Root
411  0.5 * rsqrtss * (3 - x * rsqrtss(x) * rsqrtss(x)) */
412 #pragma warning(push)
413 #pragma warning(disable : 4640)
414  friend F32vec1 rsqrt_nr(const F32vec1 &a)
415  {
416  static const F32vec1 fvecf0pt5(0.5f);
417  static const F32vec1 fvecf3pt0(3.0f);
418  F32vec1 Ra0 = _mm_rsqrt_ss(a);
419  return (fvecf0pt5 * Ra0) * (fvecf3pt0 - (a * Ra0) * Ra0);
420  }
421 #pragma warning(pop)
422 
423  /* Compares: Mask is returned */
424  /* Macros expand to all compare intrinsics. Example:
425  friend F32vec1 cmpeq(const F32vec1 &a, const F32vec1 &b)
426  { return _mm_cmpeq_ss(a,b);} */
427  #define Fvec32s1_COMP(op) \
428  friend F32vec1 cmp##op (const F32vec1 &a, const F32vec1 &b) { return _mm_cmp##op##_ss(a,b); }
429  Fvec32s1_COMP(eq) /* expanded to cmpeq(a,b) */
430  Fvec32s1_COMP(lt) /* expanded to cmplt(a,b) */
431  Fvec32s1_COMP(le) /* expanded to cmple(a,b) */
432  Fvec32s1_COMP(gt) /* expanded to cmpgt(a,b) */
433  Fvec32s1_COMP(ge) /* expanded to cmpge(a,b) */
434  Fvec32s1_COMP(neq) /* expanded to cmpneq(a,b) */
435  Fvec32s1_COMP(nlt) /* expanded to cmpnlt(a,b) */
436  Fvec32s1_COMP(nle) /* expanded to cmpnle(a,b) */
437  Fvec32s1_COMP(ngt) /* expanded to cmpngt(a,b) */
438  Fvec32s1_COMP(nge) /* expanded to cmpnge(a,b) */
439  #undef Fvec32s1_COMP
440 
441  /* Min and Max */
442  friend F32vec1 simd_min(const F32vec1 &a, const F32vec1 &b) { return _mm_min_ss(a,b); }
443  friend F32vec1 simd_max(const F32vec1 &a, const F32vec1 &b) { return _mm_max_ss(a,b); }
444 
445  /* Debug Features */
446 #if defined (_ENABLE_VEC_DEBUG)
447  /* Output */
448  friend std::ostream & operator<<(std::ostream & os, const F32vec1 &a)
449  {
450  /* To use: cout << "Elements of F32vec1 fvec are: " << fvec; */
451  float *fp = (float*)&a;
452  os << "float:" << *fp;
453  return os;
454  }
455 #endif /* defined (_ENABLE_VEC_DEBUG) */
456 
457 };
458 
459  /* Conditional Selects:*/
460 /*(a OP b)? c : d; where OP is any compare operator
461 Macros expand to conditional selects which use all compare intrinsics.
462 Example:
463 friend F32vec1 select_eq(const F32vec1 &a, const F32vec1 &b, const F32vec1 &c, const F32vec1 &d)
464 {
465  F32vec1 mask = _mm_cmpeq_ss(a,b);
466  return( (mask & c) | F32vec1((_mm_andnot_ps(mask,d))));
467 }
468 */
469 
470 #define Fvec32s1_SELECT(op) \
471 inline F32vec1 select_##op (const F32vec1 &a, const F32vec1 &b, const F32vec1 &c, const F32vec1 &d) \
472 { \
473  F32vec1 mask = _mm_cmp##op##_ss(a,b); \
474  return( (mask & c) | F32vec1((_mm_andnot_ps(mask,d)))); \
475 }
476 Fvec32s1_SELECT(eq) /* generates select_eq(a,b) */
477 Fvec32s1_SELECT(lt) /* generates select_lt(a,b) */
478 Fvec32s1_SELECT(le) /* generates select_le(a,b) */
479 Fvec32s1_SELECT(gt) /* generates select_gt(a,b) */
480 Fvec32s1_SELECT(ge) /* generates select_ge(a,b) */
481 Fvec32s1_SELECT(neq) /* generates select_neq(a,b) */
482 Fvec32s1_SELECT(nlt) /* generates select_nlt(a,b) */
483 Fvec32s1_SELECT(nle) /* generates select_nle(a,b) */
484 Fvec32s1_SELECT(ngt) /* generates select_ngt(a,b) */
485 Fvec32s1_SELECT(nge) /* generates select_nge(a,b) */
486 #undef Fvec32s1_SELECT
487 
488 /* Conversions between ivec <-> fvec */
489 
490 /* Convert F32vec1 to int */
491 inline int F32vec1ToInt(const F32vec1 &a)
492 {
493  return _mm_cvtt_ss2si(a);
494 }
495 
496 
497 
498 #pragma pack(pop) /* 16-B aligned */
499 
500 #ifdef _MSC_VER
501 #pragma pack(pop)
502 #endif /* _MSC_VER */
503 
504 #endif /* defined (_M_CEE_PURE) */
505 
506 #endif /* RC_INVOKED */
507 #endif /* _FVEC_H_INCLUDED */
Definition: fvec.h:82
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
#define Fvec32s4_SELECT(op)
Definition: fvec.h:271
F32vec1 & operator*=(const F32vec1 &a)
Definition: fvec.h:388
Definition: ivec.h:195
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void store_nta(float *p, const F32vec4 &a)
Definition: fvec.h:257
#define _mm_avg_pu16
Definition: xmmintrin.h:360
#define Fvec32s1_SELECT(op)
Definition: fvec.h:470
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
int move_mask(const F32vec4 &a)
Definition: fvec.h:241
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
void mask_move(const I8vec8 &a, const I8vec8 &b, char *addr)
Definition: fvec.h:308
void _mm_stream_pi(__m64 *, __m64)
const F32vec4 & b
Definition: fvec.h:192
void _mm_storeu_ps(float *_V, __m128 _A)
F32vec1()
Definition: fvec.h:359
friend F32vec4 rsqrt_nr(const F32vec4 &a)
Definition: fvec.h:164
Fvec32s1_COMP(eq) Fvec32s1_COMP(lt) Fvec32s1_COMP(le) Fvec32s1_COMP(gt) Fvec32s1_COMP(ge) Fvec32s1_COMP(neq) Fvec32s1_COMP(nlt) Fvec32s1_COMP(nle) Fvec32s1_COMP(ngt) Fvec32s1_COMP(nge) friend F32vec1 simd_min(const F32vec1 &a
friend F32vec1 simd_max(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:443
#define _CRTIMP
Definition: crtdefs.h:23
Definition: ivec.h:636
F32vec4 & operator/=(const F32vec4 &a)
Definition: fvec.h:132
__m64 _mm_cvtt_ps2pi(__m128 _A)
friend F32vec1 operator|(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:377
F32vec4 unpack_high(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:237
friend F32vec4 rsqrt(const F32vec4 &a)
Definition: fvec.h:150
__m128 _mm_sqrt_ss(__m128 _A)
Iu16vec4 simd_avg(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:298
uint_2 operator<<(const uint_2 &_Lhs, const uint_2 &_Rhs) __GPU
Definition: amp_short_vectors.h:22866
F32vec4 & operator|=(const F32vec4 &a)
Definition: fvec.h:134
friend F32vec4 rcp_nr(const F32vec4 &a)
Definition: fvec.h:154
F32vec1 & operator&=(const F32vec1 &a)
Definition: fvec.h:390
__m128 _mm_cvt_pi2ps(__m128, __m64)
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
F32vec1 & operator-=(const F32vec1 &a)
Definition: fvec.h:387
friend F32vec1 operator&(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:376
void _m_maskmovq(__m64, __m64, char *)
__m128 _mm_loadu_ps(float const *_A)
friend F32vec4 operator&(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:117
F32vec1 & operator/=(const F32vec1 &a)
Definition: fvec.h:389
friend F32vec1 sqrt(const F32vec1 &a)
Definition: fvec.h:396
__m128 _mm_add_ps(__m128 _A, __m128 _B)
F32vec4 & operator+=(const F32vec4 &a)
Definition: fvec.h:129
Definition: ivec.h:94
F32vec4 & operator^=(const F32vec4 &a)
Definition: fvec.h:135
F32vec1 & operator|=(const F32vec1 &a)
Definition: fvec.h:391
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
friend F32vec4 simd_max(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:193
Definition: fvec.h:352
int _m_pmovmskb(__m64)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
friend F32vec1 operator^(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:378
__m128 _mm_set_ss(float _A)
F32vec4 & operator-=(const F32vec4 &a)
Definition: fvec.h:130
EXPLICIT F32vec4(double d)
Definition: fvec.h:101
friend F32vec4 operator+(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:122
friend F32vec1 rcp(const F32vec1 &a)
Definition: fvec.h:398
F32vec4 Is32vec2ToF32vec4(const F32vec4 &a, const Is32vec2 &b)
Definition: fvec.h:344
Iu16vec4 mul_high(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:305
friend F32vec1 rcp_nr(const F32vec1 &a)
Definition: fvec.h:404
#define _In_z_
Definition: sal.h:319
#define _In_
Definition: sal.h:314
friend F32vec4 rcp(const F32vec4 &a)
Definition: fvec.h:148
__m128 _mm_rcp_ps(__m128 _A)
friend F32vec4 operator/(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:125
F32vec4 & operator*=(const F32vec4 &a)
Definition: fvec.h:131
friend F32vec4 operator|(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:118
Definition: ivec.h:456
__m64
Definition: mmintrin.h:42
void _mm_stream_ps(float *, __m128)
friend F32vec1 operator*(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:383
#define _f32vec4_abs_mask
Definition: fvec.h:80
EXPLICIT F32vec1(double d)
Definition: fvec.h:367
basic_ostream< char, char_traits< char > > ostream
Definition: iosfwd:678
F32vec1 & operator+=(const F32vec1 &a)
Definition: fvec.h:386
#define _mm_avg_pu8
Definition: xmmintrin.h:359
EXPLICIT F32vec4(float f)
Definition: fvec.h:98
__m128 vec
Definition: fvec.h:355
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
F32vec4(__m128 m)
Definition: fvec.h:92
__m64 _m_pmaxub(__m64, __m64)
friend F32vec4 abs(const F32vec4 &a)
Definition: fvec.h:196
__m128
Definition: xmmintrin.h:70
friend F32vec1 rsqrt(const F32vec1 &a)
Definition: fvec.h:400
#define EXPLICIT
Definition: ivec.h:30
__m128 m
Definition: fvec.h:77
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
#define _VEC_ASSERT(_Expression)
Definition: fvec.h:59
float & operator[](int i)
Definition: fvec.h:221
Is32vec2 F32vec4ToIs32vec2(const F32vec4 &a)
Definition: fvec.h:324
__m128 _mm_set_ps1(float _A)
Definition: ivec.h:530
_Check_return_ _In_z_ const char _Inout_ FILE * _File
Definition: stdio.h:226
friend F32vec4 operator^(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:119
__m128 _mm_cvt_si2ss(__m128, int)
const union @91 __f32vec4_abs_mask_cheat
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
friend F32vec1 operator/(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:384
F32vec4()
Definition: fvec.h:89
__m128 _mm_min_ps(__m128 _A, __m128 _B)
friend F32vec1 operator+(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:381
friend F32vec1 rsqrt_nr(const F32vec1 &a)
Definition: fvec.h:414
int _mm_movemask_ps(__m128 _A)
__m64 _m_pminub(__m64, __m64)
F32vec4 unpack_low(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:233
int F32vec4ToInt(const F32vec4 &a)
Definition: fvec.h:316
__m128 _mm_rcp_ss(__m128 _A)
Definition: ivec.h:374
__m128 vec
Definition: fvec.h:85
const float & operator[](int i) const
Definition: fvec.h:213
__m64 _m_pmulhuw(__m64, __m64)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
friend F32vec4 operator-(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:123
friend F32vec1 operator-(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:382
F32vec4 & operator=(float f)
Definition: fvec.h:105
F32vec1 & operator^=(const F32vec1 &a)
Definition: fvec.h:392
const F32vec1 & b
Definition: fvec.h:442
F32vec4 & operator&=(const F32vec4 &a)
Definition: fvec.h:133
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
EXPLICIT F32vec1(float f)
Definition: fvec.h:364
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void storeu(float *p, const F32vec4 &a)
Definition: fvec.h:251
__m128 _mm_rsqrt_ps(__m128 _A)
Is16vec4 simd_min(const Is16vec4 &a, const Is16vec4 &b)
Definition: fvec.h:293
Iu8vec8 simd_max(const Iu8vec8 &a, const Iu8vec8 &b)
Definition: fvec.h:294
__m128 _mm_max_ss(__m128 _A, __m128 _B)
friend F32vec4 sqrt(const F32vec4 &a)
Definition: fvec.h:146
__m128 _mm_or_ps(__m128 _A, __m128 _B)
int i[4]
Definition: fvec.h:76
Fvec32s4_COMP(eq) Fvec32s4_COMP(lt) Fvec32s4_COMP(le) Fvec32s4_COMP(gt) Fvec32s4_COMP(ge) Fvec32s4_COMP(neq) Fvec32s4_COMP(nlt) Fvec32s4_COMP(nle) Fvec32s4_COMP(ngt) Fvec32s4_COMP(nge) friend F32vec4 simd_min(const F32vec4 &a
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
float _mm_cvtss_f32(__m128 _A)
F32vec1(__m128 m)
Definition: fvec.h:370
int _mm_cvtt_ss2si(__m128 _A)
void loadu(F32vec4 &a, float *p)
Definition: fvec.h:247
F32vec1(int i)
Definition: fvec.h:361
F32vec4(float f3, float f2, float f1, float f0)
Definition: fvec.h:95
F32vec4 IntToF32vec4(const F32vec4 &a, int i)
Definition: fvec.h:334
friend F32vec4 operator*(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:124
__m64 _m_pminsw(__m64, __m64)
const Is16vec4 &b return _m_pmaxsw(a, b)
friend float add_horizontal(const F32vec4 &a)
Definition: fvec.h:138