STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
xmmintrin.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-2015 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * xmmintrin.h
13  *
14  * Principal header file for Streaming SIMD Extensions intrinsics
15  *
16  * The intrinsics package can be used in 2 ways, based whether or not
17  * _MM_FUNCTIONALITY is defined; if it is, the C/x87 implementation
18  * will be used (the "faux intrinsics").
19  *
20  *
21  * Note that the m128 datatype provided using _MM2_FUNCTIONALITY mode is
22  * implemented as struct, will not be 128b aligned, will be passed
23  * via the stack, etc. MM_FUNCTIONALITY mode is not intended for
24  * performance, just semantics.
25  *
26  */
27 
28 #pragma once
29 
30 #if !defined(_M_IX86) && !defined(_M_X64)
31 #error This header is specific to X86 and X64 targets
32 #endif
33 
34 #ifndef _INCLUDED_MM2
35 #define _INCLUDED_MM2
36 #ifndef __midl
37 
38 #if defined (_M_CEE_PURE)
39  #error ERROR: XMM intrinsics not supported in the pure mode!
40 #else /* defined (_M_CEE_PURE) */
41 
42 /*
43  * the m64 type is required for the integer Streaming SIMD Extensions intrinsics
44  */
45 #ifndef _MMINTRIN_H_INCLUDED
46 #include <mmintrin.h>
47 #endif /* _MMINTRIN_H_INCLUDED */
48 
49 #ifdef _MM2_FUNCTIONALITY
50 /* support old notation */
51 #ifndef _MM_FUNCTIONALITY
52 #define _MM_FUNCTIONALITY
53 #endif /* _MM_FUNCTIONALITY */
54 #endif /* _MM2_FUNCTIONALITY */
55 
56 #ifdef __ICL
57 #ifdef _MM_FUNCTIONALITY
58 #include "xmm_func.h"
59 #else /* _MM_FUNCTIONALITY */
60 /* using real intrinsics */
61 typedef long long __m128;
62 #endif /* _MM_FUNCTIONALITY */
63 #else /* __ICL */
64 
65 typedef union __declspec(intrin_type) __declspec(align(16)) __m128 {
66  float m128_f32[4];
67  unsigned __int64 m128_u64[2];
68  __int8 m128_i8[16];
69  __int16 m128_i16[8];
70  __int32 m128_i32[4];
71  __int64 m128_i64[2];
72  unsigned __int8 m128_u8[16];
73  unsigned __int16 m128_u16[8];
74  unsigned __int32 m128_u32[4];
75  } __m128;
76 
77 #if !defined _VCRT_BUILD && !defined _INC_MALLOC
78 /* pick up _mm_malloc() and _mm_free() */
79 #include <malloc.h>
80 #endif /* !defined _VCRT_BUILD && !defined _INC_MALLOC */
81 #endif /* __ICL */
82 
83  /*******************************************************/
84  /* MACRO for shuffle parameter for _mm_shuffle_ps(). */
85  /* Argument fp3 is a digit[0123] that represents the fp*/
86  /* from argument "b" of mm_shuffle_ps that will be */
87  /* placed in fp3 of result. fp2 is the same for fp2 in */
88  /* result. fp1 is a digit[0123] that represents the fp */
89  /* from argument "a" of mm_shuffle_ps that will be */
90  /* places in fp1 of result. fp0 is the same for fp0 of */
91  /* result */
92  /*******************************************************/
93 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
94  ((fp1) << 2) | ((fp0)))
95 
96 
97  /*******************************************************/
98  /* MACRO for performing the transpose of a 4x4 matrix */
99  /* of single precision floating point values. */
100  /* Arguments row0, row1, row2, and row3 are __m128 */
101  /* values whose elements form the corresponding rows */
102  /* of a 4x4 matrix. The matrix transpose is returned */
103  /* in arguments row0, row1, row2, and row3 where row0 */
104  /* now holds column 0 of the original matrix, row1 now */
105  /* holds column 1 of the original matrix, etc. */
106  /*******************************************************/
107 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
108  __m128 _Tmp3, _Tmp2, _Tmp1, _Tmp0; \
109  \
110  _Tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
111  _Tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
112  _Tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
113  _Tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
114  \
115  (row0) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0x88); \
116  (row1) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0xDD); \
117  (row2) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0x88); \
118  (row3) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0xDD); \
119  }
120 
121 
122 /* constants for use with _mm_prefetch */
123 #define _MM_HINT_NTA 0
124 #define _MM_HINT_T0 1
125 #define _MM_HINT_T1 2
126 #define _MM_HINT_T2 3
127 #define _MM_HINT_ENTA 4
128 // The values below are not yet supported.
129 //#define _MM_HINT_ET0 5
130 //#define _MM_HINT_ET1 6
131 //#define _MM_HINT_ET2 7
132 
133 /* (this declspec not supported with 0.A or 0.B) */
134 #define _MM_ALIGN16 _VCRT_ALIGN(16)
135 
136 /* MACRO functions for setting and reading the MXCSR */
137 #define _MM_EXCEPT_MASK 0x003f
138 #define _MM_EXCEPT_INVALID 0x0001
139 #define _MM_EXCEPT_DENORM 0x0002
140 #define _MM_EXCEPT_DIV_ZERO 0x0004
141 #define _MM_EXCEPT_OVERFLOW 0x0008
142 #define _MM_EXCEPT_UNDERFLOW 0x0010
143 #define _MM_EXCEPT_INEXACT 0x0020
144 
145 #define _MM_MASK_MASK 0x1f80
146 #define _MM_MASK_INVALID 0x0080
147 #define _MM_MASK_DENORM 0x0100
148 #define _MM_MASK_DIV_ZERO 0x0200
149 #define _MM_MASK_OVERFLOW 0x0400
150 #define _MM_MASK_UNDERFLOW 0x0800
151 #define _MM_MASK_INEXACT 0x1000
152 
153 #define _MM_ROUND_MASK 0x6000
154 #define _MM_ROUND_NEAREST 0x0000
155 #define _MM_ROUND_DOWN 0x2000
156 #define _MM_ROUND_UP 0x4000
157 #define _MM_ROUND_TOWARD_ZERO 0x6000
158 
159 #define _MM_FLUSH_ZERO_MASK 0x8000
160 #define _MM_FLUSH_ZERO_ON 0x8000
161 #define _MM_FLUSH_ZERO_OFF 0x0000
162 
163 #define _MM_SET_EXCEPTION_STATE(mask) \
164  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask))
165 #define _MM_GET_EXCEPTION_STATE() \
166  (_mm_getcsr() & _MM_EXCEPT_MASK)
167 
168 #define _MM_SET_EXCEPTION_MASK(mask) \
169  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask))
170 #define _MM_GET_EXCEPTION_MASK() \
171  (_mm_getcsr() & _MM_MASK_MASK)
172 
173 #define _MM_SET_ROUNDING_MODE(mode) \
174  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode))
175 #define _MM_GET_ROUNDING_MODE() \
176  (_mm_getcsr() & _MM_ROUND_MASK)
177 
178 #define _MM_SET_FLUSH_ZERO_MODE(mode) \
179  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode))
180 #define _MM_GET_FLUSH_ZERO_MODE() \
181  (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
182 
183  /*****************************************************/
184  /* INTRINSICS FUNCTION PROTOTYPES START HERE */
185  /*****************************************************/
186 
187 #if defined __cplusplus
188 extern "C" { /* Begin "C" */
189  /* Intrinsics use C name-mangling. */
190 #endif /* defined __cplusplus */
191 
192 /*
193  * FP, arithmetic
194  */
195 
196 extern __m128 _mm_add_ss(__m128 _A, __m128 _B);
197 extern __m128 _mm_add_ps(__m128 _A, __m128 _B);
198 extern __m128 _mm_sub_ss(__m128 _A, __m128 _B);
199 extern __m128 _mm_sub_ps(__m128 _A, __m128 _B);
200 extern __m128 _mm_mul_ss(__m128 _A, __m128 _B);
201 extern __m128 _mm_mul_ps(__m128 _A, __m128 _B);
202 extern __m128 _mm_div_ss(__m128 _A, __m128 _B);
203 extern __m128 _mm_div_ps(__m128 _A, __m128 _B);
204 extern __m128 _mm_sqrt_ss(__m128 _A);
205 extern __m128 _mm_sqrt_ps(__m128 _A);
206 extern __m128 _mm_rcp_ss(__m128 _A);
207 extern __m128 _mm_rcp_ps(__m128 _A);
208 extern __m128 _mm_rsqrt_ss(__m128 _A);
209 extern __m128 _mm_rsqrt_ps(__m128 _A);
210 extern __m128 _mm_min_ss(__m128 _A, __m128 _B);
211 extern __m128 _mm_min_ps(__m128 _A, __m128 _B);
212 extern __m128 _mm_max_ss(__m128 _A, __m128 _B);
213 extern __m128 _mm_max_ps(__m128 _A, __m128 _B);
214 
215 /*
216  * FP, logical
217  */
218 
219 extern __m128 _mm_and_ps(__m128 _A, __m128 _B);
220 extern __m128 _mm_andnot_ps(__m128 _A, __m128 _B);
221 extern __m128 _mm_or_ps(__m128 _A, __m128 _B);
222 extern __m128 _mm_xor_ps(__m128 _A, __m128 _B);
223 
224 /*
225  * FP, comparison
226  */
227 
228 extern __m128 _mm_cmpeq_ss(__m128 _A, __m128 _B);
229 extern __m128 _mm_cmpeq_ps(__m128 _A, __m128 _B);
230 extern __m128 _mm_cmplt_ss(__m128 _A, __m128 _B);
231 extern __m128 _mm_cmplt_ps(__m128 _A, __m128 _B);
232 extern __m128 _mm_cmple_ss(__m128 _A, __m128 _B);
233 extern __m128 _mm_cmple_ps(__m128 _A, __m128 _B);
234 extern __m128 _mm_cmpgt_ss(__m128 _A, __m128 _B);
235 extern __m128 _mm_cmpgt_ps(__m128 _A, __m128 _B);
236 extern __m128 _mm_cmpge_ss(__m128 _A, __m128 _B);
237 extern __m128 _mm_cmpge_ps(__m128 _A, __m128 _B);
238 extern __m128 _mm_cmpneq_ss(__m128 _A, __m128 _B);
239 extern __m128 _mm_cmpneq_ps(__m128 _A, __m128 _B);
240 extern __m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B);
241 extern __m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B);
242 extern __m128 _mm_cmpnle_ss(__m128 _A, __m128 _B);
243 extern __m128 _mm_cmpnle_ps(__m128 _A, __m128 _B);
244 extern __m128 _mm_cmpngt_ss(__m128 _A, __m128 _B);
245 extern __m128 _mm_cmpngt_ps(__m128 _A, __m128 _B);
246 extern __m128 _mm_cmpnge_ss(__m128 _A, __m128 _B);
247 extern __m128 _mm_cmpnge_ps(__m128 _A, __m128 _B);
248 extern __m128 _mm_cmpord_ss(__m128 _A, __m128 _B);
249 extern __m128 _mm_cmpord_ps(__m128 _A, __m128 _B);
250 extern __m128 _mm_cmpunord_ss(__m128 _A, __m128 _B);
251 extern __m128 _mm_cmpunord_ps(__m128 _A, __m128 _B);
252 extern int _mm_comieq_ss(__m128 _A, __m128 _B);
253 extern int _mm_comilt_ss(__m128 _A, __m128 _B);
254 extern int _mm_comile_ss(__m128 _A, __m128 _B);
255 extern int _mm_comigt_ss(__m128 _A, __m128 _B);
256 extern int _mm_comige_ss(__m128 _A, __m128 _B);
257 extern int _mm_comineq_ss(__m128 _A, __m128 _B);
258 extern int _mm_ucomieq_ss(__m128 _A, __m128 _B);
259 extern int _mm_ucomilt_ss(__m128 _A, __m128 _B);
260 extern int _mm_ucomile_ss(__m128 _A, __m128 _B);
261 extern int _mm_ucomigt_ss(__m128 _A, __m128 _B);
262 extern int _mm_ucomige_ss(__m128 _A, __m128 _B);
263 extern int _mm_ucomineq_ss(__m128 _A, __m128 _B);
264 
265 /*
266  * FP, conversions
267  */
268 
269 extern int _mm_cvt_ss2si(__m128 _A);
270 extern int _mm_cvtt_ss2si(__m128 _A);
271 extern __m128 _mm_cvt_si2ss(__m128, int);
272 extern float _mm_cvtss_f32(__m128 _A);
273 
274 #if defined(_M_IX86)
275 /*
276  * Support for MMX extension intrinsics
277  */
278 extern __m64 _mm_cvt_ps2pi(__m128 _A);
279 extern __m64 _mm_cvtt_ps2pi(__m128 _A);
280 extern __m128 _mm_cvt_pi2ps(__m128, __m64);
281 #endif
282 
283 #if defined (_M_X64)
284 /*
285  * Support for 64-bit intrinsics
286  */
287 extern __int64 _mm_cvtss_si64(__m128 _A);
288 extern __int64 _mm_cvttss_si64(__m128 _A);
289 extern __m128 _mm_cvtsi64_ss(__m128 _A, __int64 _B);
290 #endif /* defined (_M_X64) */
291 
292 /*
293  * FP, misc
294  */
295 
296 extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8);
297 extern __m128 _mm_unpackhi_ps(__m128 _A, __m128 _B);
298 extern __m128 _mm_unpacklo_ps(__m128 _A, __m128 _B);
299 extern __m128 _mm_loadh_pi(__m128, __m64 const*);
302 extern void _mm_storeh_pi(__m64 *, __m128);
303 extern __m128 _mm_loadl_pi(__m128, __m64 const*);
304 extern void _mm_storel_pi(__m64 *, __m128);
305 extern int _mm_movemask_ps(__m128 _A);
306 
307 
308 #if defined(_M_IX86)
309 /*
310  * Integer (MMX) extensions
311  */
312 extern int _m_pextrw(__m64, int);
313 extern __m64 _m_pinsrw(__m64, int, int);
314 extern __m64 _m_pmaxsw(__m64, __m64);
315 extern __m64 _m_pmaxub(__m64, __m64);
316 extern __m64 _m_pminsw(__m64, __m64);
317 extern __m64 _m_pminub(__m64, __m64);
318 extern int _m_pmovmskb(__m64);
319 extern __m64 _m_pmulhuw(__m64, __m64);
320 extern __m64 _m_pshufw(__m64, int);
321 extern void _m_maskmovq(__m64, __m64, char *);
322 extern __m64 _m_pavgb(__m64, __m64);
323 extern __m64 _m_pavgw(__m64, __m64);
324 extern __m64 _m_psadbw(__m64, __m64);
325 #endif
326 
327 /*
328  * memory & initialization
329  */
330 
331 extern __m128 _mm_set_ss(float _A);
332 extern __m128 _mm_set_ps1(float _A);
333 extern __m128 _mm_set_ps(float _A, float _B, float _C, float _D);
334 extern __m128 _mm_setr_ps(float _A, float _B, float _C, float _D);
335 extern __m128 _mm_setzero_ps(void);
336 extern __m128 _mm_load_ss(float const*_A);
337 extern __m128 _mm_load_ps1(float const*_A);
338 extern __m128 _mm_load_ps(float const*_A);
339 extern __m128 _mm_loadr_ps(float const*_A);
340 extern __m128 _mm_loadu_ps(float const*_A);
341 extern void _mm_store_ss(float *_V, __m128 _A);
342 extern void _mm_store_ps1(float *_V, __m128 _A);
343 extern void _mm_store_ps(float *_V, __m128 _A);
344 extern void _mm_storer_ps(float *_V, __m128 _A);
345 extern void _mm_storeu_ps(float *_V, __m128 _A);
346 extern void _mm_prefetch(char const*_A, int _Sel);
347 #if defined(_M_IX86)
348 extern void _mm_stream_pi(__m64 *, __m64);
349 #endif
350 extern void _mm_stream_ps(float *, __m128);
351 extern __m128 _mm_move_ss(__m128 _A, __m128 _B);
352 
353 extern void _mm_sfence(void);
354 extern unsigned int _mm_getcsr(void);
355 extern void _mm_setcsr(unsigned int);
356 
357 #ifdef __ICL
358 extern void* __cdecl _mm_malloc(size_t _Siz, size_t _Al);
359 extern void __cdecl _mm_free(void *_P);
360 #endif /* __ICL */
361 
362 /* Alternate intrinsic names definition */
363 #if defined(_M_IX86)
364 #define _mm_cvtps_pi32 _mm_cvt_ps2pi
365 #define _mm_cvttps_pi32 _mm_cvtt_ps2pi
366 #define _mm_cvtpi32_ps _mm_cvt_pi2ps
367 #define _mm_extract_pi16 _m_pextrw
368 #define _mm_insert_pi16 _m_pinsrw
369 #define _mm_max_pi16 _m_pmaxsw
370 #define _mm_max_pu8 _m_pmaxub
371 #define _mm_min_pi16 _m_pminsw
372 #define _mm_min_pu8 _m_pminub
373 #define _mm_movemask_pi8 _m_pmovmskb
374 #define _mm_mulhi_pu16 _m_pmulhuw
375 #define _mm_shuffle_pi16 _m_pshufw
376 #define _mm_maskmove_si64 _m_maskmovq
377 #define _mm_avg_pu8 _m_pavgb
378 #define _mm_avg_pu16 _m_pavgw
379 #define _mm_sad_pu8 _m_psadbw
380 #endif
381 #define _mm_cvtss_si32 _mm_cvt_ss2si
382 #define _mm_cvttss_si32 _mm_cvtt_ss2si
383 #define _mm_cvtsi32_ss _mm_cvt_si2ss
384 #define _mm_set1_ps _mm_set_ps1
385 #define _mm_load1_ps _mm_load_ps1
386 #define _mm_store1_ps _mm_store_ps1
387 
388  /******************************************************/
389  /* UTILITY INTRINSICS FUNCTION DEFINITIONS START HERE */
390  /******************************************************/
391 
392 #if defined(_M_IX86)
393  /*********************************************************/
394  /* NAME : _mm_cvtpi16_ps */
395  /* DESCRIPTION : Convert 4 16-bit signed integer values */
396  /* to 4 single-precision float values */
397  /* IN : __m64 _A */
398  /* OUT : none */
399  /* RETURN : __m128 : (float)_A */
400  /*********************************************************/
401 __inline __m128 _mm_cvtpi16_ps(__m64 _A)
402 {
403  __m128 _Tmp;
404  __m64 _Ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), _A);
405 
406  _Tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
407  return(_mm_cvtpi32_ps(_mm_movelh_ps(_Tmp, _Tmp),
408  _mm_unpacklo_pi16(_A, _Ext_val)));
409 }
410 
411 
412  /***********************************************************/
413  /* NAME : _mm_cvtpu16_ps */
414  /* DESCRIPTION : Convert 4 16-bit unsigned integer values */
415  /* to 4 single-precision float values */
416  /* IN : __m64 _A */
417  /* OUT : none */
418  /* RETURN : __m128 : (float)_A */
419  /***********************************************************/
420 __inline __m128 _mm_cvtpu16_ps(__m64 _A)
421 {
422  __m128 _Tmp;
423  __m64 _Ext_val = _mm_setzero_si64();
424 
425  _Tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
426  return(_mm_cvtpi32_ps(_mm_movelh_ps(_Tmp, _Tmp),
427  _mm_unpacklo_pi16(_A, _Ext_val)));
428 }
429 
430 
431  /******************************************************/
432  /* NAME : _mm_cvtps_pi16 */
433  /* DESCRIPTION : Convert 4 single-precision float */
434  /* values to 4 16-bit integer values */
435  /* IN : __m128 a */
436  /* OUT : none */
437  /* RETURN : __m64 : (short)a */
438  /******************************************************/
439 __inline __m64 _mm_cvtps_pi16(__m128 _A)
440 {
441  return _mm_packs_pi32(_mm_cvtps_pi32(_A),
442  _mm_cvtps_pi32(_mm_movehl_ps(_A, _A)));
443 }
444 
445 
446  /******************************************************/
447  /* NAME : _mm_cvtpi8_ps */
448  /* DESCRIPTION : Convert 4 8-bit integer values to 4 */
449  /* single-precision float values */
450  /* IN : __m64 _A */
451  /* OUT : none */
452  /* RETURN : __m128 : (float)_A */
453  /******************************************************/
454 __inline __m128 _mm_cvtpi8_ps(__m64 _A)
455 {
456  __m64 _Ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), _A);
457 
458  return _mm_cvtpi16_ps(_mm_unpacklo_pi8(_A, _Ext_val));
459 }
460 
461 
462  /******************************************************/
463  /* NAME : _mm_cvtpu8_ps */
464  /* DESCRIPTION : Convert 4 8-bit unsigned integer */
465  /* values to 4 single-precision float */
466  /* values */
467  /* IN : __m64 _A */
468  /* OUT : none */
469  /* RETURN : __m128 : (float)_A */
470  /******************************************************/
471 __inline __m128 _mm_cvtpu8_ps(__m64 _A)
472 {
473  return _mm_cvtpu16_ps(_mm_unpacklo_pi8(_A, _mm_setzero_si64()));
474 }
475 
476 
477  /******************************************************/
478  /* NAME : _mm_cvtps_pi8 */
479  /* DESCRIPTION : Convert 4 single-precision float */
480  /* values to 4 8-bit integer values */
481  /* IN : __m128 _A */
482  /* OUT : none */
483  /* RETURN : __m64 : (char)_A */
484  /******************************************************/
485 __inline __m64 _mm_cvtps_pi8(__m128 _A)
486 {
487  return _mm_packs_pi16(_mm_cvtps_pi16(_A), _mm_setzero_si64());
488 }
489 
490 
491  /******************************************************/
492  /* NAME : _mm_cvtpi32x2_ps */
493  /* DESCRIPTION : Convert 4 32-bit integer values */
494  /* to 4 single-precision float values */
495  /* IN : __m64 _A : operand 1 */
496  /* __m64 _B : operand 2 */
497  /* OUT : none */
498  /* RETURN : __m128 : (float)_A,(float)_B */
499  /******************************************************/
500 __inline __m128 _mm_cvtpi32x2_ps(__m64 _A, __m64 _B)
501 {
502  return _mm_movelh_ps(_mm_cvt_pi2ps(_mm_setzero_ps(), _A),
503  _mm_cvt_pi2ps(_mm_setzero_ps(), _B));
504 }
505 #endif // _M_IX86
506 
507 
508 #if defined __cplusplus
509 }; /* End "C" */
510 #endif /* defined __cplusplus */
511 
512 #endif /* defined (_M_CEE_PURE) */
513 #endif /* __midl */
514 #endif /* _INCLUDED_MM2 */
union __declspec(intrin_type) __declspec(align(16)) __m128
Definition: xmmintrin.h:65
__m128 _mm_set_ps(float _A, float _B, float _C, float _D)
__m128 _mm_and_ps(__m128 _A, __m128 _B)
void _mm_store_ps1(float *_V, __m128 _A)
__m128 _mm_setr_ps(float _A, float _B, float _C, float _D)
int _mm_ucomilt_ss(__m128 _A, __m128 _B)
__m128 _mm_movehl_ps(__m128, __m128)
__m128 _mm_rsqrt_ss(__m128 _A)
__m128 _mm_sqrt_ps(__m128 _A)
__m128 _mm_max_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ps(__m128 _A, __m128 _B)
int _mm_comigt_ss(__m128 _A, __m128 _B)
void _mm_storeu_ps(float *_V, __m128 _A)
int _mm_ucomigt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpord_ps(__m128 _A, __m128 _B)
__m128 _mm_sqrt_ss(__m128 _A)
void * align(size_t _Bound, size_t _Size, void *&_Ptr, size_t &_Space) _NOEXCEPT
Definition: memory:1985
__m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8)
void _mm_storeh_pi(__m64 *, __m128)
int _mm_comile_ss(__m128 _A, __m128 _B)
int _mm_comige_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpngt_ss(__m128 _A, __m128 _B)
__m128 _mm_loadu_ps(float const *_A)
int _mm_ucomineq_ss(__m128 _A, __m128 _B)
int _mm_comineq_ss(__m128 _A, __m128 _B)
__m128 _mm_cmple_ps(__m128 _A, __m128 _B)
__m128 _mm_add_ps(__m128 _A, __m128 _B)
void _mm_store_ss(float *_V, __m128 _A)
__m128 _mm_sub_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ps(__m128 _A, __m128 _B)
__m128 _mm_set_ss(float _A)
__m128 _mm_cmplt_ss(__m128 _A, __m128 _B)
int _mm_ucomile_ss(__m128 _A, __m128 _B)
__m128 _mm_rcp_ps(__m128 _A)
__m128 _mm_cmpeq_ps(__m128 _A, __m128 _B)
void _mm_setcsr(unsigned int)
__m128 _mm_cmpneq_ps(__m128 _A, __m128 _B)
__m64
Definition: mmintrin.h:45
void _mm_stream_ps(float *, __m128)
__m128 _mm_andnot_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnlt_ss(__m128 _A, __m128 _B)
__m128 _mm_xor_ps(__m128 _A, __m128 _B)
int _mm_comieq_ss(__m128 _A, __m128 _B)
__m128
Definition: xmmintrin.h:75
__m128 _mm_load_ps1(float const *_A)
int _mm_cvt_ss2si(__m128 _A)
__m128 _mm_mul_ps(__m128 _A, __m128 _B)
void _mm_sfence(void)
int _mm_ucomige_ss(__m128 _A, __m128 _B)
__m128 _mm_set_ps1(float _A)
__m128 _mm_cmpneq_ss(__m128 _A, __m128 _B)
__m128 _mm_cvt_si2ss(__m128, int)
__m128 _mm_add_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ss(__m128 _A, __m128 _B)
__m128 _mm_min_ps(__m128 _A, __m128 _B)
int _mm_ucomieq_ss(__m128 _A, __m128 _B)
int _mm_comilt_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpnle_ss(__m128 _A, __m128 _B)
__m128 _mm_cmpgt_ps(__m128 _A, __m128 _B)
void _mm_store_ps(float *_V, __m128 _A)
int _mm_movemask_ps(__m128 _A)
__m128 _mm_setzero_ps(void)
__m128 _mm_rcp_ss(__m128 _A)
__m128 _mm_cmplt_ps(__m128 _A, __m128 _B)
__m128 _mm_move_ss(__m128 _A, __m128 _B)
__m128 _mm_sub_ss(__m128 _A, __m128 _B)
__m128 _mm_loadr_ps(float const *_A)
__m128 _mm_cmpnle_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpnge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpeq_ss(__m128 _A, __m128 _B)
__m128 _mm_unpackhi_ps(__m128 _A, __m128 _B)
__m128 _mm_unpacklo_ps(__m128 _A, __m128 _B)
__m128 _mm_div_ss(__m128 _A, __m128 _B)
void _mm_storel_pi(__m64 *, __m128)
__m128 _mm_rsqrt_ps(__m128 _A)
__m128 _mm_max_ss(__m128 _A, __m128 _B)
__m128 _mm_or_ps(__m128 _A, __m128 _B)
__m128 _mm_cmple_ss(__m128 _A, __m128 _B)
void _mm_prefetch(char const *_A, int _Sel)
__m128 _mm_loadh_pi(__m128, __m64 const *)
void _mm_storer_ps(float *_V, __m128 _A)
__m128 _mm_cmpunord_ps(__m128 _A, __m128 _B)
__m128 _mm_mul_ss(__m128 _A, __m128 _B)
__m128 _mm_loadl_pi(__m128, __m64 const *)
__m128 _mm_load_ps(float const *_A)
float _mm_cvtss_f32(__m128 _A)
__m128 _mm_cmpunord_ss(__m128 _A, __m128 _B)
int _mm_cvtt_ss2si(__m128 _A)
__m128 _mm_load_ss(float const *_A)
__m128 _mm_cmpnge_ss(__m128 _A, __m128 _B)
__m128 _mm_movelh_ps(__m128, __m128)
unsigned int _mm_getcsr(void)
__m128 _mm_cmpge_ps(__m128 _A, __m128 _B)
__m128 _mm_cmpge_ss(__m128 _A, __m128 _B)