STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
xmmintrin.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-1999 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * xmmintrin.h
13  *
14  * Principal header file for Streaming SIMD Extensions intrinsics
15  *
16  * The intrinsics package can be used in 2 ways, based whether or not
17  * _MM_FUNCTIONALITY is defined; if it is, the C/x87 implementation
18  * will be used (the "faux intrinsics").
19  *
20  *
21  * Note that the m128 datatype provided using _MM2_FUNCTIONALITY mode is
22  * implemented as struct, will not be 128b aligned, will be passed
23  * via the stack, etc. MM_FUNCTIONALITY mode is not intended for
24  * performance, just semantics.
25  *
26  */
27 
28 #pragma once
29 #ifndef __midl
30 #ifndef _INCLUDED_MM2
31 #define _INCLUDED_MM2
32 
33 #if defined (_M_CEE_PURE)
34  #error ERROR: XMM intrinsics not supported in the pure mode!
35 #else /* defined (_M_CEE_PURE) */
36 
37 /*
38  * the m64 type is required for the integer Streaming SIMD Extensions intrinsics
39  */
40 #ifndef _MMINTRIN_H_INCLUDED
41 #include <mmintrin.h>
42 #endif /* _MMINTRIN_H_INCLUDED */
43 
44 #ifdef _MM2_FUNCTIONALITY
45 /* support old notation */
46 #ifndef _MM_FUNCTIONALITY
47 #define _MM_FUNCTIONALITY
48 #endif /* _MM_FUNCTIONALITY */
49 #endif /* _MM2_FUNCTIONALITY */
50 
51 #ifdef __ICL
52 #ifdef _MM_FUNCTIONALITY
53 #include "xmm_func.h"
54 #else /* _MM_FUNCTIONALITY */
55 /* using real intrinsics */
56 typedef long long __m128;
57 #endif /* _MM_FUNCTIONALITY */
58 #else /* __ICL */
59 
60 typedef union __declspec(intrin_type) _CRT_ALIGN(16) __m128 {
61  float m128_f32[4];
62  unsigned __int64 m128_u64[2];
63  __int8 m128_i8[16];
64  __int16 m128_i16[8];
65  __int32 m128_i32[4];
66  __int64 m128_i64[2];
67  unsigned __int8 m128_u8[16];
68  unsigned __int16 m128_u16[8];
69  unsigned __int32 m128_u32[4];
70  } __m128;
71 
72 #ifndef _INC_MALLOC
73 /* pick up _mm_malloc() and _mm_free() */
74 #include <malloc.h>
75 #endif /* _INC_MALLOC */
76 #endif /* __ICL */
77 
78  /*******************************************************/
79  /* MACRO for shuffle parameter for _mm_shuffle_ps(). */
80  /* Argument fp3 is a digit[0123] that represents the fp*/
81  /* from argument "b" of mm_shuffle_ps that will be */
82  /* placed in fp3 of result. fp2 is the same for fp2 in */
83  /* result. fp1 is a digit[0123] that represents the fp */
84  /* from argument "a" of mm_shuffle_ps that will be */
85  /* places in fp1 of result. fp0 is the same for fp0 of */
86  /* result */
87  /*******************************************************/
88 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
89  ((fp1) << 2) | ((fp0)))
90 
91 
92  /*******************************************************/
93  /* MACRO for performing the transpose of a 4x4 matrix */
94  /* of single precision floating point values. */
95  /* Arguments row0, row1, row2, and row3 are __m128 */
96  /* values whose elements form the corresponding rows */
97  /* of a 4x4 matrix. The matrix transpose is returned */
98  /* in arguments row0, row1, row2, and row3 where row0 */
99  /* now holds column 0 of the original matrix, row1 now */
100  /* holds column 1 of the original matrix, etc. */
101  /*******************************************************/
102 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
103  __m128 tmp3, tmp2, tmp1, tmp0; \
104  \
105  tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
106  tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
107  tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
108  tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
109  \
110  (row0) = _mm_shuffle_ps(tmp0, tmp1, 0x88); \
111  (row1) = _mm_shuffle_ps(tmp0, tmp1, 0xDD); \
112  (row2) = _mm_shuffle_ps(tmp2, tmp3, 0x88); \
113  (row3) = _mm_shuffle_ps(tmp2, tmp3, 0xDD); \
114  }
115 
116 
117 /* constants for use with _mm_prefetch */
118 #define _MM_HINT_T0 1
119 #define _MM_HINT_T1 2
120 #define _MM_HINT_T2 3
121 #define _MM_HINT_NTA 0
122 
123 /* (this declspec not supported with 0.A or 0.B) */
124 #define _MM_ALIGN16 _CRT_ALIGN(16)
125 
126 /* MACRO functions for setting and reading the MXCSR */
127 #define _MM_EXCEPT_MASK 0x003f
128 #define _MM_EXCEPT_INVALID 0x0001
129 #define _MM_EXCEPT_DENORM 0x0002
130 #define _MM_EXCEPT_DIV_ZERO 0x0004
131 #define _MM_EXCEPT_OVERFLOW 0x0008
132 #define _MM_EXCEPT_UNDERFLOW 0x0010
133 #define _MM_EXCEPT_INEXACT 0x0020
134 
135 #define _MM_MASK_MASK 0x1f80
136 #define _MM_MASK_INVALID 0x0080
137 #define _MM_MASK_DENORM 0x0100
138 #define _MM_MASK_DIV_ZERO 0x0200
139 #define _MM_MASK_OVERFLOW 0x0400
140 #define _MM_MASK_UNDERFLOW 0x0800
141 #define _MM_MASK_INEXACT 0x1000
142 
143 #define _MM_ROUND_MASK 0x6000
144 #define _MM_ROUND_NEAREST 0x0000
145 #define _MM_ROUND_DOWN 0x2000
146 #define _MM_ROUND_UP 0x4000
147 #define _MM_ROUND_TOWARD_ZERO 0x6000
148 
149 #define _MM_FLUSH_ZERO_MASK 0x8000
150 #define _MM_FLUSH_ZERO_ON 0x8000
151 #define _MM_FLUSH_ZERO_OFF 0x0000
152 
153 #define _MM_SET_EXCEPTION_STATE(mask) \
154  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask))
155 #define _MM_GET_EXCEPTION_STATE() \
156  (_mm_getcsr() & _MM_EXCEPT_MASK)
157 
158 #define _MM_SET_EXCEPTION_MASK(mask) \
159  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask))
160 #define _MM_GET_EXCEPTION_MASK() \
161  (_mm_getcsr() & _MM_MASK_MASK)
162 
163 #define _MM_SET_ROUNDING_MODE(mode) \
164  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode))
165 #define _MM_GET_ROUNDING_MODE() \
166  (_mm_getcsr() & _MM_ROUND_MASK)
167 
168 #define _MM_SET_FLUSH_ZERO_MODE(mode) \
169  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode))
170 #define _MM_GET_FLUSH_ZERO_MODE() \
171  (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
172 
173  /*****************************************************/
174  /* INTRINSICS FUNCTION PROTOTYPES START HERE */
175  /*****************************************************/
176 
177 #if defined __cplusplus
178 extern "C" { /* Begin "C" */
179  /* Intrinsics use C name-mangling. */
180 #endif /* defined __cplusplus */
181 
182 /*
183  * FP, arithmetic
184  */
185 
186 extern __m128 _mm_add_ss(__m128 _A, __m128 _B);
187 extern __m128 _mm_add_ps(__m128 _A, __m128 _B);
188 extern __m128 _mm_sub_ss(__m128 _A, __m128 _B);
189 extern __m128 _mm_sub_ps(__m128 _A, __m128 _B);
190 extern __m128 _mm_mul_ss(__m128 _A, __m128 _B);
191 extern __m128 _mm_mul_ps(__m128 _A, __m128 _B);
192 extern __m128 _mm_div_ss(__m128 _A, __m128 _B);
193 extern __m128 _mm_div_ps(__m128 _A, __m128 _B);
194 extern __m128 _mm_sqrt_ss(__m128 _A);
195 extern __m128 _mm_sqrt_ps(__m128 _A);
196 extern __m128 _mm_rcp_ss(__m128 _A);
197 extern __m128 _mm_rcp_ps(__m128 _A);
198 extern __m128 _mm_rsqrt_ss(__m128 _A);
199 extern __m128 _mm_rsqrt_ps(__m128 _A);
200 extern __m128 _mm_min_ss(__m128 _A, __m128 _B);
201 extern __m128 _mm_min_ps(__m128 _A, __m128 _B);
202 extern __m128 _mm_max_ss(__m128 _A, __m128 _B);
203 extern __m128 _mm_max_ps(__m128 _A, __m128 _B);
204 
205 /*
206  * FP, logical
207  */
208 
209 extern __m128 _mm_and_ps(__m128 _A, __m128 _B);
210 extern __m128 _mm_andnot_ps(__m128 _A, __m128 _B);
211 extern __m128 _mm_or_ps(__m128 _A, __m128 _B);
212 extern __m128 _mm_xor_ps(__m128 _A, __m128 _B);
213 
214 /*
215  * FP, comparison
216  */
217 
218 extern __m128 _mm_cmpeq_ss(__m128 _A, __m128 _B);
219 extern __m128 _mm_cmpeq_ps(__m128 _A, __m128 _B);
220 extern __m128 _mm_cmplt_ss(__m128 _A, __m128 _B);
221 extern __m128 _mm_cmplt_ps(__m128 _A, __m128 _B);
222 extern __m128 _mm_cmple_ss(__m128 _A, __m128 _B);
223 extern __m128 _mm_cmple_ps(__m128 _A, __m128 _B);
224 extern __m128 _mm_cmpgt_ss(__m128 _A, __m128 _B);
225 extern __m128 _mm_cmpgt_ps(__m128 _A, __m128 _B);
226 extern __m128 _mm_cmpge_ss(__m128 _A, __m128 _B);
227 extern __m128 _mm_cmpge_ps(__m128 _A, __m128 _B);
228 extern __m128 _mm_cmpneq_ss(__m128 _A, __m128 _B);
229 extern __m128 _mm_cmpneq_ps(__m128 _A, __m128 _B);
230 extern __m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B);
231 extern __m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B);
232 extern __m128 _mm_cmpnle_ss(__m128 _A, __m128 _B);
233 extern __m128 _mm_cmpnle_ps(__m128 _A, __m128 _B);
234 extern __m128 _mm_cmpngt_ss(__m128 _A, __m128 _B);
235 extern __m128 _mm_cmpngt_ps(__m128 _A, __m128 _B);
236 extern __m128 _mm_cmpnge_ss(__m128 _A, __m128 _B);
237 extern __m128 _mm_cmpnge_ps(__m128 _A, __m128 _B);
238 extern __m128 _mm_cmpord_ss(__m128 _A, __m128 _B);
239 extern __m128 _mm_cmpord_ps(__m128 _A, __m128 _B);
240 extern __m128 _mm_cmpunord_ss(__m128 _A, __m128 _B);
241 extern __m128 _mm_cmpunord_ps(__m128 _A, __m128 _B);
242 extern int _mm_comieq_ss(__m128 _A, __m128 _B);
243 extern int _mm_comilt_ss(__m128 _A, __m128 _B);
244 extern int _mm_comile_ss(__m128 _A, __m128 _B);
245 extern int _mm_comigt_ss(__m128 _A, __m128 _B);
246 extern int _mm_comige_ss(__m128 _A, __m128 _B);
247 extern int _mm_comineq_ss(__m128 _A, __m128 _B);
248 extern int _mm_ucomieq_ss(__m128 _A, __m128 _B);
249 extern int _mm_ucomilt_ss(__m128 _A, __m128 _B);
250 extern int _mm_ucomile_ss(__m128 _A, __m128 _B);
251 extern int _mm_ucomigt_ss(__m128 _A, __m128 _B);
252 extern int _mm_ucomige_ss(__m128 _A, __m128 _B);
253 extern int _mm_ucomineq_ss(__m128 _A, __m128 _B);
254 
255 /*
256  * FP, conversions
257  */
258 
259 extern int _mm_cvt_ss2si(__m128 _A);
260 extern __m64 _mm_cvt_ps2pi(__m128 _A);
261 extern int _mm_cvtt_ss2si(__m128 _A);
262 extern __m64 _mm_cvtt_ps2pi(__m128 _A);
263 extern __m128 _mm_cvt_si2ss(__m128, int);
265 extern float _mm_cvtss_f32(__m128 _A);
266 
267 /*
268  * Support for 64-bit extension intrinsics
269  */
270 #if defined (_M_X64)
271 extern __int64 _mm_cvtss_si64(__m128 _A);
272 extern __int64 _mm_cvttss_si64(__m128 _A);
273 extern __m128 _mm_cvtsi64_ss(__m128 _A, __int64 _B);
274 #endif /* defined (_M_X64) */
275 
276 /*
277  * FP, misc
278  */
279 
280 extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8);
281 extern __m128 _mm_unpackhi_ps(__m128 _A, __m128 _B);
282 extern __m128 _mm_unpacklo_ps(__m128 _A, __m128 _B);
283 extern __m128 _mm_loadh_pi(__m128, __m64 const*);
286 extern void _mm_storeh_pi(__m64 *, __m128);
287 extern __m128 _mm_loadl_pi(__m128, __m64 const*);
288 extern void _mm_storel_pi(__m64 *, __m128);
289 extern int _mm_movemask_ps(__m128 _A);
290 
291 
292 /*
293  * Integer extensions
294  */
295 extern int _m_pextrw(__m64, int);
296 extern __m64 _m_pinsrw(__m64, int, int);
297 extern __m64 _m_pmaxsw(__m64, __m64);
298 extern __m64 _m_pmaxub(__m64, __m64);
299 extern __m64 _m_pminsw(__m64, __m64);
300 extern __m64 _m_pminub(__m64, __m64);
301 extern int _m_pmovmskb(__m64);
302 extern __m64 _m_pmulhuw(__m64, __m64);
303 extern __m64 _m_pshufw(__m64, int);
304 extern void _m_maskmovq(__m64, __m64, char *);
305 extern __m64 _m_pavgb(__m64, __m64);
306 extern __m64 _m_pavgw(__m64, __m64);
307 extern __m64 _m_psadbw(__m64, __m64);
308 
309 /*
310  * memory & initialization
311  */
312 
313 extern __m128 _mm_set_ss(float _A);
314 extern __m128 _mm_set_ps1(float _A);
315 extern __m128 _mm_set_ps(float _A, float _B, float _C, float _D);
316 extern __m128 _mm_setr_ps(float _A, float _B, float _C, float _D);
317 extern __m128 _mm_setzero_ps(void);
318 extern __m128 _mm_load_ss(float const*_A);
319 extern __m128 _mm_load_ps1(float const*_A);
320 extern __m128 _mm_load_ps(float const*_A);
321 extern __m128 _mm_loadr_ps(float const*_A);
322 extern __m128 _mm_loadu_ps(float const*_A);
323 extern void _mm_store_ss(float *_V, __m128 _A);
324 extern void _mm_store_ps1(float *_V, __m128 _A);
325 extern void _mm_store_ps(float *_V, __m128 _A);
326 extern void _mm_storer_ps(float *_V, __m128 _A);
327 extern void _mm_storeu_ps(float *_V, __m128 _A);
328 extern void _mm_prefetch(char const*_A, int _Sel);
329 extern void _mm_stream_pi(__m64 *, __m64);
330 extern void _mm_stream_ps(float *, __m128);
331 extern __m128 _mm_move_ss(__m128 _A, __m128 _B);
332 
333 extern void _mm_sfence(void);
334 extern unsigned int _mm_getcsr(void);
335 extern void _mm_setcsr(unsigned int);
336 
337 #ifdef __ICL
338 extern void* __cdecl _mm_malloc(size_t _Siz, size_t _Al);
339 extern void __cdecl _mm_free(void *_P);
340 #endif /* __ICL */
341 
342 /* Alternate intrinsic names definition */
343 #define _mm_cvtss_si32 _mm_cvt_ss2si
344 #define _mm_cvtps_pi32 _mm_cvt_ps2pi
345 #define _mm_cvttss_si32 _mm_cvtt_ss2si
346 #define _mm_cvttps_pi32 _mm_cvtt_ps2pi
347 #define _mm_cvtsi32_ss _mm_cvt_si2ss
348 #define _mm_cvtpi32_ps _mm_cvt_pi2ps
349 #define _mm_extract_pi16 _m_pextrw
350 #define _mm_insert_pi16 _m_pinsrw
351 #define _mm_max_pi16 _m_pmaxsw
352 #define _mm_max_pu8 _m_pmaxub
353 #define _mm_min_pi16 _m_pminsw
354 #define _mm_min_pu8 _m_pminub
355 #define _mm_movemask_pi8 _m_pmovmskb
356 #define _mm_mulhi_pu16 _m_pmulhuw
357 #define _mm_shuffle_pi16 _m_pshufw
358 #define _mm_maskmove_si64 _m_maskmovq
359 #define _mm_avg_pu8 _m_pavgb
360 #define _mm_avg_pu16 _m_pavgw
361 #define _mm_sad_pu8 _m_psadbw
362 #define _mm_set1_ps _mm_set_ps1
363 #define _mm_load1_ps _mm_load_ps1
364 #define _mm_store1_ps _mm_store_ps1
365 
366  /******************************************************/
367  /* UTILITY INTRINSICS FUNCTION DEFINITIONS START HERE */
368  /******************************************************/
369 
370  /*********************************************************/
371  /* NAME : _mm_cvtpi16_ps */
372  /* DESCRIPTION : Convert 4 16-bit signed integer values */
373  /* to 4 single-precision float values */
374  /* IN : __m64 a */
375  /* OUT : none */
376  /* RETURN : __m128 : (float)a */
377  /*********************************************************/
379 {
380  __m128 tmp;
381  __m64 ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), a);
382 
383  tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(a, ext_val));
384  return(_mm_cvtpi32_ps(_mm_movelh_ps(tmp, tmp),
385  _mm_unpacklo_pi16(a, ext_val)));
386 }
387 
388 
389  /***********************************************************/
390  /* NAME : _mm_cvtpu16_ps */
391  /* DESCRIPTION : Convert 4 16-bit unsigned integer values */
392  /* to 4 single-precision float values */
393  /* IN : __m64 a */
394  /* OUT : none */
395  /* RETURN : __m128 : (float)a */
396  /***********************************************************/
398 {
399  __m128 tmp;
400  __m64 ext_val = _mm_setzero_si64();
401 
402  tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(a, ext_val));
403  return(_mm_cvtpi32_ps(_mm_movelh_ps(tmp, tmp),
404  _mm_unpacklo_pi16(a, ext_val)));
405 }
406 
407 
408  /******************************************************/
409  /* NAME : _mm_cvtps_pi16 */
410  /* DESCRIPTION : Convert 4 single-precision float */
411  /* values to 4 16-bit integer values */
412  /* IN : __m128 a */
413  /* OUT : none */
414  /* RETURN : __m64 : (short)a */
415  /******************************************************/
417 {
418  return _mm_packs_pi32(_mm_cvtps_pi32(a),
420 }
421 
422 
423  /******************************************************/
424  /* NAME : _mm_cvtpi8_ps */
425  /* DESCRIPTION : Convert 4 8-bit integer values to 4 */
426  /* single-precision float values */
427  /* IN : __m64 a */
428  /* OUT : none */
429  /* RETURN : __m128 : (float)a */
430  /******************************************************/
432 {
433  __m64 ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), a);
434 
435  return _mm_cvtpi16_ps(_mm_unpacklo_pi8(a, ext_val));
436 }
437 
438 
439  /******************************************************/
440  /* NAME : _mm_cvtpu8_ps */
441  /* DESCRIPTION : Convert 4 8-bit unsigned integer */
442  /* values to 4 single-precision float */
443  /* values */
444  /* IN : __m64 a */
445  /* OUT : none */
446  /* RETURN : __m128 : (float)a */
447  /******************************************************/
449 {
451 }
452 
453 
454  /******************************************************/
455  /* NAME : _mm_cvtps_pi8 */
456  /* DESCRIPTION : Convert 4 single-precision float */
457  /* values to 4 8-bit integer values */
458  /* IN : __m128 a */
459  /* OUT : none */
460  /* RETURN : __m64 : (char)a */
461  /******************************************************/
463 {
465 }
466 
467 
468  /******************************************************/
469  /* NAME : _mm_cvtpi32x2_ps */
470  /* DESCRIPTION : Convert 4 32-bit integer values */
471  /* to 4 single-precision float values */
472  /* IN : __m64 a : operand 1 */
473  /* __m64 b : operand 2 */
474  /* OUT : none */
475  /* RETURN : __m128 : (float)a,(float)b */
476  /******************************************************/
478 {
481 }
482 
483 
484 #if defined __cplusplus
485 }; /* End "C" */
486 #endif /* defined __cplusplus */
487 
488 #endif /* defined (_M_CEE_PURE) */
489 
490 #endif /* _INCLUDED_MM2 */
491 #endif /* __midl */
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
__m64 _m_psadbw(__m64, __m64)
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void _mm_store_ps1(float *_V, __m128 _A)
__m128 _mm_setr_ps(float _A, float _B, float _C, float _D)
int _mm_ucomilt_ss(__m128 _A, __m128 _B)
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ps(__m128 _A, __m128 _B)
__m64 _mm_cvt_ps2pi(__m128 _A)
__inline __m64 _mm_cvtps_pi16(__m128 a)
Definition: xmmintrin.h:416
#define _mm_cmpgt_pi8
Definition: mmintrin.h:176
void _mm_stream_pi(__m64 *, __m64)
int _mm_comigt_ss(__m128 _A, __m128 _B)
void _mm_storeu_ps(float *_V, __m128 _A)
int _mm_ucomigt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ss(__m128 _A, __m128 _B)
__inline __m128 _mm_cvtpi8_ps(__m64 a)
Definition: xmmintrin.h:431
__m64 _m_pinsrw(__m64, int, int)
__m128 _mm_cmpord_ps(__m128 _A, __m128 _B)
__m64 _mm_cvtt_ps2pi(__m128 _A)
__inline __m128 _mm_cvtpu8_ps(__m64 a)
Definition: xmmintrin.h:448
__m128 _mm_sqrt_ss(__m128 _A)
__m64 _m_pshufw(__m64, int)
#define _mm_free(a)
Definition: malloc.h:66
__m128 _mm_cvt_pi2ps(__m128, __m64)
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
void _mm_storeh_pi(__m64 *, __m128)
#define _mm_cvtpi32_ps
Definition: xmmintrin.h:348
int _mm_comile_ss(__m128 _A, __m128 _B)
__inline __m64 _mm_cvtps_pi8(__m128 a)
Definition: xmmintrin.h:462
int _mm_comige_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ss(__m128 _A, __m128 _B)
void _m_maskmovq(__m64, __m64, char *)
__m128 _mm_loadu_ps(float const *_A)
#define _mm_malloc(a, b)
Definition: malloc.h:67
int _mm_ucomineq_ss(__m128 _A, __m128 _B)
int _mm_comineq_ss(__m128 _A, __m128 _B)
__m128 _mm_cmple_ps(__m128 _A, __m128 _B)
__m128 _mm_add_ps(__m128 _A, __m128 _B)
#define _mm_unpackhi_pi16
Definition: mmintrin.h:131
#define _mm_packs_pi16
Definition: mmintrin.h:127
int _m_pmovmskb(__m64)
void _mm_store_ss(float *_V, __m128 _A)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
__m128 _mm_set_ss(float _A)
__m128 _mm_cmplt_ss(__m128 _A, __m128 _B)
int _mm_ucomile_ss(__m128 _A, __m128 _B)
__m128 _mm_rcp_ps(__m128 _A)
__m128 _mm_cmpeq_ps(__m128 _A, __m128 _B)
void _mm_setcsr(unsigned int)
__m128 _mm_cmpneq_ps(__m128 _A, __m128 _B)
__m64
Definition: mmintrin.h:42
#define _mm_cmpgt_pi16
Definition: mmintrin.h:177
void _mm_stream_ps(float *, __m128)
__m128 _mm_andnot_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B)
_In_ wchar_t _C
Definition: wchar.h:1295
__m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B)
#define _CRT_ALIGN(x)
Definition: crtdefs.h:604
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
int _mm_comieq_ss(__m128 _A, __m128 _B)
#define _mm_unpacklo_pi16
Definition: mmintrin.h:134
__m64 _m_pmaxub(__m64, __m64)
__m128
Definition: xmmintrin.h:70
__m128 _mm_load_ps1(float const *_A)
__inline __m128 _mm_cvtpi16_ps(__m64 a)
Definition: xmmintrin.h:378
int _mm_cvt_ss2si(__m128 _A)
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
void _mm_sfence(void)
__inline __m128 _mm_cvtpu16_ps(__m64 a)
Definition: xmmintrin.h:397
int _mm_ucomige_ss(__m128 _A, __m128 _B)
__m128 _mm_set_ps1(float _A)
#define _mm_cvtps_pi32
Definition: xmmintrin.h:344
__m128 _mm_cmpneq_ss(__m128 _A, __m128 _B)
__m128 _mm_cvt_si2ss(__m128, int)
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ps(__m128 _A, __m128 _B)
int _mm_ucomieq_ss(__m128 _A, __m128 _B)
int _mm_comilt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpnle_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ps(__m128 _A, __m128 _B)
void _mm_store_ps(float *_V, __m128 _A)
__m64 _m_pmaxsw(__m64, __m64)
int _mm_movemask_ps(__m128 _A)
__m128 _mm_setzero_ps(void)
__m64 _m_pminub(__m64, __m64)
__m64 _mm_setzero_si64(void)
__m64 _m_pavgw(__m64, __m64)
__m128 _mm_rcp_ss(__m128 _A)
__m128 _mm_cmplt_ps(__m128 _A, __m128 _B)
__m64 _m_pmulhuw(__m64, __m64)
__m128 _mm_move_ss(__m128 _A, __m128 _B)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
__m128 _mm_loadr_ps(float const *_A)
__m128 _mm_cmpnle_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpeq_ss(__m128 _A, __m128 _B)
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__inline __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
Definition: xmmintrin.h:477
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void _mm_storel_pi(__m64 *, __m128)
__m128 _mm_rsqrt_ps(__m128 _A)
__m128 _mm_max_ss(__m128 _A, __m128 _B)
__m128 _mm_or_ps(__m128 _A, __m128 _B)
__m128 _mm_cmple_ss(__m128 _A, __m128 _B)
void _mm_prefetch(char const *_A, int _Sel)
__m64 _m_pavgb(__m64, __m64)
__m128 _mm_loadh_pi(__m128, __m64 const *)
void _mm_storer_ps(float *_V, __m128 _A)
__m128 _mm_cmpunord_ps(__m128 _A, __m128 _B)
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
__m128 _mm_loadl_pi(__m128, __m64 const *)
__m128 _mm_load_ps(float const *_A)
float _mm_cvtss_f32(__m128 _A)
__m128 _mm_cmpunord_ss(__m128 _A, __m128 _B)
int _mm_cvtt_ss2si(__m128 _A)
#define _mm_unpacklo_pi8
Definition: mmintrin.h:133
__m128 _mm_load_ss(float const *_A)
__m128 _mm_cmpnge_ss(__m128 _A, __m128 _B)
__m128 _mm_movelh_ps(__m128, __m128)
unsigned int _mm_getcsr(void)
__m128 _mm_cmpge_ps(__m128 _A, __m128 _B)
#define _mm_packs_pi32
Definition: mmintrin.h:128
__m64 _m_pminsw(__m64, __m64)
int _m_pextrw(__m64, int)
__m128 _mm_cmpge_ss(__m128 _A, __m128 _B)