STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
emmintrin.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-2015 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * emmintrin.h
13  *
14  * Principal header file for Willamette New Instruction intrinsics
15  *
16  * The intrinsics package can be used in 2 ways, based whether or not
17  * _EMM_FUNCTIONALITY is defined; if it is, the C implementation
18  * will be used (the "functional intrinsics").
19  */
20 
21 #pragma once
22 
23 #if !defined(_M_IX86) && !defined(_M_X64)
24 #error This header is specific to X86 and X64 targets
25 #endif
26 
27 #ifndef _INCLUDED_EMM
28 #define _INCLUDED_EMM
29 #ifndef __midl
30 
31 #if !defined _M_IX86 && !defined _M_X64
32  #error This header is specific to X86 and X64 targets
33 #endif
34 
35 #if defined (_M_CEE_PURE)
36  #error ERROR: EMM intrinsics not supported in the pure mode!
37 #else /* defined (_M_CEE_PURE) */
38 
39 /*
40  * the __m128 & __m64 types are required for the intrinsics
41  */
42 #include <xmmintrin.h>
43 
44 typedef union __declspec(intrin_type) __declspec(align(16)) __m128i {
45  __int8 m128i_i8[16];
46  __int16 m128i_i16[8];
47  __int32 m128i_i32[4];
48  __int64 m128i_i64[2];
49  unsigned __int8 m128i_u8[16];
50  unsigned __int16 m128i_u16[8];
51  unsigned __int32 m128i_u32[4];
52  unsigned __int64 m128i_u64[2];
54 
55 typedef struct __declspec(intrin_type) __declspec(align(16)) __m128d {
56  double m128d_f64[2];
58 
59 /*
60  * Macro function for shuffle
61  */
62 #define _MM_SHUFFLE2(x,y) (((x)<<1) | (y))
63 
64  /*****************************************************/
65  /* INTRINSICS FUNCTION PROTOTYPES START HERE */
66  /*****************************************************/
67 
68 #if defined __cplusplus
69 extern "C" { /* Begin "C" */
70  /* Intrinsics use C name-mangling. */
71 #endif /* defined __cplusplus */
72 
73 /*
74  * DP, arithmetic
75  */
76 
77 extern __m128d _mm_add_sd(__m128d _A, __m128d _B);
78 extern __m128d _mm_add_pd(__m128d _A, __m128d _B);
79 extern __m128d _mm_sub_sd(__m128d _A, __m128d _B);
80 extern __m128d _mm_sub_pd(__m128d _A, __m128d _B);
81 extern __m128d _mm_mul_sd(__m128d _A, __m128d _B);
82 extern __m128d _mm_mul_pd(__m128d _A, __m128d _B);
83 extern __m128d _mm_sqrt_sd(__m128d _A, __m128d _B);
84 extern __m128d _mm_sqrt_pd(__m128d _A);
85 extern __m128d _mm_div_sd(__m128d _A, __m128d _B);
86 extern __m128d _mm_div_pd(__m128d _A, __m128d _B);
87 extern __m128d _mm_min_sd(__m128d _A, __m128d _B);
88 extern __m128d _mm_min_pd(__m128d _A, __m128d _B);
89 extern __m128d _mm_max_sd(__m128d _A, __m128d _B);
90 extern __m128d _mm_max_pd(__m128d _A, __m128d _B);
91 
92 /*
93  * DP, logicals
94  */
95 
96 extern __m128d _mm_and_pd(__m128d _A, __m128d _B);
97 extern __m128d _mm_andnot_pd(__m128d _A, __m128d _B);
98 extern __m128d _mm_or_pd(__m128d _A, __m128d _B);
99 extern __m128d _mm_xor_pd(__m128d _A, __m128d _B);
100 
101 /*
102  * DP, comparisons
103  */
104 
105 extern __m128d _mm_cmpeq_sd(__m128d _A, __m128d _B);
106 extern __m128d _mm_cmpeq_pd(__m128d _A, __m128d _B);
107 extern __m128d _mm_cmplt_sd(__m128d _A, __m128d _B);
108 extern __m128d _mm_cmplt_pd(__m128d _A, __m128d _B);
109 extern __m128d _mm_cmple_sd(__m128d _A, __m128d _B);
110 extern __m128d _mm_cmple_pd(__m128d _A, __m128d _B);
111 extern __m128d _mm_cmpgt_sd(__m128d _A, __m128d _B);
112 extern __m128d _mm_cmpgt_pd(__m128d _A, __m128d _B);
113 extern __m128d _mm_cmpge_sd(__m128d _A, __m128d _B);
114 extern __m128d _mm_cmpge_pd(__m128d _A, __m128d _B);
115 extern __m128d _mm_cmpneq_sd(__m128d _A, __m128d _B);
116 extern __m128d _mm_cmpneq_pd(__m128d _A, __m128d _B);
117 extern __m128d _mm_cmpnlt_sd(__m128d _A, __m128d _B);
118 extern __m128d _mm_cmpnlt_pd(__m128d _A, __m128d _B);
119 extern __m128d _mm_cmpnle_sd(__m128d _A, __m128d _B);
120 extern __m128d _mm_cmpnle_pd(__m128d _A, __m128d _B);
121 extern __m128d _mm_cmpngt_sd(__m128d _A, __m128d _B);
122 extern __m128d _mm_cmpngt_pd(__m128d _A, __m128d _B);
123 extern __m128d _mm_cmpnge_sd(__m128d _A, __m128d _B);
124 extern __m128d _mm_cmpnge_pd(__m128d _A, __m128d _B);
125 extern __m128d _mm_cmpord_pd(__m128d _A, __m128d _B);
126 extern __m128d _mm_cmpord_sd(__m128d _A, __m128d _B);
127 extern __m128d _mm_cmpunord_pd(__m128d _A, __m128d _B);
128 extern __m128d _mm_cmpunord_sd(__m128d _A, __m128d _B);
129 extern int _mm_comieq_sd(__m128d _A, __m128d _B);
130 extern int _mm_comilt_sd(__m128d _A, __m128d _B);
131 extern int _mm_comile_sd(__m128d _A, __m128d _B);
132 extern int _mm_comigt_sd(__m128d _A, __m128d _B);
133 extern int _mm_comige_sd(__m128d _A, __m128d _B);
134 extern int _mm_comineq_sd(__m128d _A, __m128d _B);
135 extern int _mm_ucomieq_sd(__m128d _A, __m128d _B);
136 extern int _mm_ucomilt_sd(__m128d _A, __m128d _B);
137 extern int _mm_ucomile_sd(__m128d _A, __m128d _B);
138 extern int _mm_ucomigt_sd(__m128d _A, __m128d _B);
139 extern int _mm_ucomige_sd(__m128d _A, __m128d _B);
140 extern int _mm_ucomineq_sd(__m128d _A, __m128d _B);
141 
142 /*
143  * DP, converts
144  */
145 
146 extern __m128d _mm_cvtepi32_pd(__m128i _A);
147 extern __m128i _mm_cvtpd_epi32(__m128d _A);
148 extern __m128i _mm_cvttpd_epi32(__m128d _A);
149 extern __m128 _mm_cvtepi32_ps(__m128i _A);
150 extern __m128i _mm_cvtps_epi32(__m128 _A);
151 extern __m128i _mm_cvttps_epi32(__m128 _A);
152 extern __m128 _mm_cvtpd_ps(__m128d _A);
153 extern __m128d _mm_cvtps_pd(__m128 _A);
154 extern __m128 _mm_cvtsd_ss(__m128 _A, __m128d _B);
155 extern __m128d _mm_cvtss_sd(__m128d _A, __m128 _B);
156 
157 extern int _mm_cvtsd_si32(__m128d _A);
158 extern int _mm_cvttsd_si32(__m128d _A);
159 extern __m128d _mm_cvtsi32_sd(__m128d _A, int _B);
160 
161 #if defined(_M_IX86)
162 extern __m64 _mm_cvtpd_pi32(__m128d _A);
163 extern __m64 _mm_cvttpd_pi32(__m128d _A);
164 extern __m128d _mm_cvtpi32_pd(__m64 _A);
165 #endif
166 
167 /*
168  * DP, misc
169  */
170 
171 extern __m128d _mm_unpackhi_pd(__m128d _A, __m128d _B);
172 extern __m128d _mm_unpacklo_pd(__m128d _A, __m128d _B);
173 extern int _mm_movemask_pd(__m128d _A);
174 extern __m128d _mm_shuffle_pd(__m128d _A, __m128d _B, int _I);
175 
176 /*
177  * DP, loads
178  */
179 
180 extern __m128d _mm_load_pd(double const*_Dp);
181 extern __m128d _mm_load1_pd(double const*_Dp);
182 extern __m128d _mm_loadr_pd(double const*_Dp);
183 extern __m128d _mm_loadu_pd(double const*_Dp);
184 extern __m128d _mm_load_sd(double const*_Dp);
185 extern __m128d _mm_loadh_pd(__m128d _A, double const*_Dp);
186 extern __m128d _mm_loadl_pd(__m128d _A, double const*_Dp);
187 
188 /*
189  * DP, sets
190  */
191 
192 extern __m128d _mm_set_sd(double _W);
193 extern __m128d _mm_set1_pd(double _A);
194 extern __m128d _mm_set_pd(double _Z, double _Y);
195 extern __m128d _mm_setr_pd(double _Y, double _Z);
196 extern __m128d _mm_setzero_pd(void);
197 extern __m128d _mm_move_sd(__m128d _A, __m128d _B);
198 
199 /*
200  * DP, stores
201  */
202 
203 extern void _mm_store_sd(double *_Dp, __m128d _A);
204 extern void _mm_store1_pd(double *_Dp, __m128d _A);
205 extern void _mm_store_pd(double *_Dp, __m128d _A);
206 extern void _mm_storeu_pd(double *_Dp, __m128d _A);
207 extern void _mm_storer_pd(double *_Dp, __m128d _A);
208 extern void _mm_storeh_pd(double *_Dp, __m128d _A);
209 extern void _mm_storel_pd(double *_Dp, __m128d _A);
210 
211 /*
212  * Integer, arithmetic
213  */
214 
215 extern __m128i _mm_add_epi8(__m128i _A, __m128i _B);
216 extern __m128i _mm_add_epi16(__m128i _A, __m128i _B);
217 extern __m128i _mm_add_epi32(__m128i _A, __m128i _B);
218 #if defined(_M_IX86)
219 extern __m64 _mm_add_si64(__m64 _A, __m64 _B);
220 #endif
221 extern __m128i _mm_add_epi64(__m128i _A, __m128i _B);
222 extern __m128i _mm_adds_epi8(__m128i _A, __m128i _B);
223 extern __m128i _mm_adds_epi16(__m128i _A, __m128i _B);
224 extern __m128i _mm_adds_epu8(__m128i _A, __m128i _B);
225 extern __m128i _mm_adds_epu16(__m128i _A, __m128i _B);
226 extern __m128i _mm_avg_epu8(__m128i _A, __m128i _B);
227 extern __m128i _mm_avg_epu16(__m128i _A, __m128i _B);
228 extern __m128i _mm_madd_epi16(__m128i _A, __m128i _B);
229 extern __m128i _mm_max_epi16(__m128i _A, __m128i _B);
230 extern __m128i _mm_max_epu8(__m128i _A, __m128i _B);
231 extern __m128i _mm_min_epi16(__m128i _A, __m128i _B);
232 extern __m128i _mm_min_epu8(__m128i _A, __m128i _B);
233 extern __m128i _mm_mulhi_epi16(__m128i _A, __m128i _B);
234 extern __m128i _mm_mulhi_epu16(__m128i _A, __m128i _B);
235 extern __m128i _mm_mullo_epi16(__m128i _A, __m128i _B);
236 #if defined(_M_IX86)
237 extern __m64 _mm_mul_su32(__m64 _A, __m64 _B);
238 #endif
239 extern __m128i _mm_mul_epu32(__m128i _A, __m128i _B);
240 extern __m128i _mm_sad_epu8(__m128i _A, __m128i _B);
241 extern __m128i _mm_sub_epi8(__m128i _A, __m128i _B);
242 extern __m128i _mm_sub_epi16(__m128i _A, __m128i _B);
243 extern __m128i _mm_sub_epi32(__m128i _A, __m128i _B);
244 #if defined(_M_IX86)
245 extern __m64 _mm_sub_si64(__m64 _A, __m64 _B);
246 #endif
247 extern __m128i _mm_sub_epi64(__m128i _A, __m128i _B);
248 extern __m128i _mm_subs_epi8(__m128i _A, __m128i _B);
249 extern __m128i _mm_subs_epi16(__m128i _A, __m128i _B);
250 extern __m128i _mm_subs_epu8(__m128i _A, __m128i _B);
251 extern __m128i _mm_subs_epu16(__m128i _A, __m128i _B);
252 
253 /*
254  * Integer, logicals
255  */
256 
257 extern __m128i _mm_and_si128(__m128i _A, __m128i _B);
258 extern __m128i _mm_andnot_si128(__m128i _A, __m128i _B);
259 extern __m128i _mm_or_si128(__m128i _A, __m128i _B);
260 extern __m128i _mm_xor_si128(__m128i _A, __m128i _B);
261 
262 /*
263  * Integer, shifts
264  */
265 
266 extern __m128i _mm_slli_si128(__m128i _A, int _Imm);
267 extern __m128i _mm_slli_epi16(__m128i _A, int _Count);
268 extern __m128i _mm_sll_epi16(__m128i _A, __m128i _Count);
269 extern __m128i _mm_slli_epi32(__m128i _A, int _Count);
270 extern __m128i _mm_sll_epi32(__m128i _A, __m128i _Count);
271 extern __m128i _mm_slli_epi64(__m128i _A, int _Count);
272 extern __m128i _mm_sll_epi64(__m128i _A, __m128i _Count);
273 extern __m128i _mm_srai_epi16(__m128i _A, int _Count);
274 extern __m128i _mm_sra_epi16(__m128i _A, __m128i _Count);
275 extern __m128i _mm_srai_epi32(__m128i _A, int _Count);
276 extern __m128i _mm_sra_epi32(__m128i _A, __m128i _Count);
277 extern __m128i _mm_srli_si128(__m128i _A, int _Imm);
278 extern __m128i _mm_srli_epi16(__m128i _A, int _Count);
279 extern __m128i _mm_srl_epi16(__m128i _A, __m128i _Count);
280 extern __m128i _mm_srli_epi32(__m128i _A, int _Count);
281 extern __m128i _mm_srl_epi32(__m128i _A, __m128i _Count);
282 extern __m128i _mm_srli_epi64(__m128i _A, int _Count);
283 extern __m128i _mm_srl_epi64(__m128i _A, __m128i _Count);
284 
285 /*
286  * Integer, comparisons
287  */
288 
289 extern __m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B);
290 extern __m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B);
291 extern __m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B);
292 extern __m128i _mm_cmpgt_epi8(__m128i _A, __m128i _B);
293 extern __m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B);
294 extern __m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B);
295 extern __m128i _mm_cmplt_epi8(__m128i _A, __m128i _B);
296 extern __m128i _mm_cmplt_epi16(__m128i _A, __m128i _B);
297 extern __m128i _mm_cmplt_epi32(__m128i _A, __m128i _B);
298 
299 /*
300  * Integer, converts
301  */
302 
303 extern __m128i _mm_cvtsi32_si128(int _A);
304 extern int _mm_cvtsi128_si32(__m128i _A);
305 
306 /*
307  * Integer, misc
308  */
309 
310 extern __m128i _mm_packs_epi16(__m128i _A, __m128i _B);
311 extern __m128i _mm_packs_epi32(__m128i _A, __m128i _B);
312 extern __m128i _mm_packus_epi16(__m128i _A, __m128i _B);
313 extern int _mm_extract_epi16(__m128i _A, int _Imm);
314 extern __m128i _mm_insert_epi16(__m128i _A, int _B, int _Imm);
315 extern int _mm_movemask_epi8(__m128i _A);
316 extern __m128i _mm_shuffle_epi32(__m128i _A, int _Imm);
317 extern __m128i _mm_shufflehi_epi16(__m128i _A, int _Imm);
318 extern __m128i _mm_shufflelo_epi16(__m128i _A, int _Imm);
327 
328 /*
329  * Integer, loads
330  */
331 
332 extern __m128i _mm_load_si128(__m128i const*_P);
333 extern __m128i _mm_loadu_si128(__m128i const*_P);
334 extern __m128i _mm_loadl_epi64(__m128i const*_P);
335 
336 /*
337  * Integer, sets
338  */
339 
340 #if defined(_M_IX86)
341 extern __m128i _mm_set_epi64(__m64 _Q1, __m64 _Q0);
342 #endif
343 extern __m128i _mm_set_epi64x(__int64 _I1,__int64 _I0);
344 extern __m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0);
345 extern __m128i _mm_set_epi16(short _W7, short _W6, short _W5, short _W4,
346  short _W3, short _W2, short _W1, short _W0);
347 extern __m128i _mm_set_epi8(char _B15, char _B14, char _B13, char _B12,
348  char _B11, char _B10, char _B9, char _B8,
349  char _B7, char _B6, char _B5, char _B4,
350  char _B3, char _B2, char _B1, char _B0);
351 #if defined(_M_IX86)
352 extern __m128i _mm_set1_epi64(__m64 _Q);
353 #endif
354 extern __m128i _mm_set1_epi64x(__int64 i);
355 extern __m128i _mm_set1_epi32(int _I);
356 extern __m128i _mm_set1_epi16(short _W);
357 extern __m128i _mm_set1_epi8(char _B);
358 extern __m128i _mm_setl_epi64(__m128i _Q);
359 #if defined(_M_IX86)
360 extern __m128i _mm_setr_epi64(__m64 _Q0, __m64 _Q1);
361 #endif
362 extern __m128i _mm_setr_epi32(int _I0, int _I1, int _I2, int _I3);
363 extern __m128i _mm_setr_epi16(short _W0, short _W1, short _W2, short _W3,
364  short _W4, short _W5, short _W6, short _W7);
365 extern __m128i _mm_setr_epi8(char _B15, char _B14, char _B13, char _B12,
366  char _B11, char _B10, char _B9, char _B8,
367  char _B7, char _B6, char _B5, char _B4,
368  char _B3, char _B2, char _B1, char _B0);
369 extern __m128i _mm_setzero_si128(void);
370 
371 /*
372  * Integer, stores
373  */
374 
375 extern void _mm_store_si128(__m128i *_P, __m128i _B);
376 extern void _mm_storeu_si128(__m128i *_P, __m128i _B);
377 extern void _mm_storel_epi64(__m128i *_P, __m128i _Q);
378 extern void _mm_maskmoveu_si128(__m128i _D, __m128i _N, char *_P);
379 
380 /*
381  * Integer, moves
382  */
383 
384 extern __m128i _mm_move_epi64(__m128i _Q);
385 #if defined(_M_IX86)
386 extern __m128i _mm_movpi64_epi64(__m64 _Q);
387 extern __m64 _mm_movepi64_pi64(__m128i _Q);
388 #endif
389 
390 /*
391  * Cacheability support
392  */
393 
394 extern void _mm_stream_pd(double *_Dp, __m128d _A);
395 extern void _mm_stream_si128(__m128i *_P, __m128i _A);
396 extern void _mm_clflush(void const*_P);
397 extern void _mm_lfence(void);
398 extern void _mm_mfence(void);
399 extern void _mm_stream_si32(int *_P, int _I);
400 extern void _mm_pause(void);
401 
402 /*
403  * New convert to float
404  */
405 
406 extern double _mm_cvtsd_f64(__m128d _A);
407 
408 /*
409  * Support for casting between various SP, DP, INT vector types.
410  * Note that these do no conversion of values, they just change
411  * the type.
412  */
413 
420 
421 /*
422  * Support for 64-bit extension intrinsics
423  */
424 
425 #if defined (_M_X64)
426 extern __int64 _mm_cvtsd_si64(__m128d);
427 extern __int64 _mm_cvttsd_si64(__m128d);
428 extern __m128d _mm_cvtsi64_sd(__m128d, __int64);
429 extern __m128i _mm_cvtsi64_si128(__int64);
430 extern __int64 _mm_cvtsi128_si64(__m128i);
431 /* Alternate intrinsic name definitions */
432 #define _mm_stream_si64 _mm_stream_si64x
433 #endif /* defined (_M_X64) */
434 
435 #if defined __cplusplus
436 }; /* End "C" */
437 #endif /* defined __cplusplus */
438 
439 #endif /* defined (_M_CEE_PURE) */
440 #endif /* __midl */
441 #endif /* _INCLUDED_EMM */
__m128i _mm_sub_epi32(__m128i _A, __m128i _B)
__m128i _mm_adds_epi8(__m128i _A, __m128i _B)
__m128i _mm_cvttps_epi32(__m128 _A)
int _mm_movemask_epi8(__m128i _A)
void _mm_store_sd(double *_Dp, __m128d _A)
__m128i _mm_mulhi_epi16(__m128i _A, __m128i _B)
__m128d _mm_cmpngt_pd(__m128d _A, __m128d _B)
int _mm_comineq_sd(__m128d _A, __m128d _B)
int _mm_extract_epi16(__m128i _A, int _Imm)
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
__m128d _mm_move_sd(__m128d _A, __m128d _B)
__m128i _mm_loadu_si128(__m128i const *_P)
__m128d _mm_load_sd(double const *_Dp)
__m128i _mm_srai_epi32(__m128i _A, int _Count)
__m128d _mm_cmpord_sd(__m128d _A, __m128d _B)
__m128i _mm_packs_epi32(__m128i _A, __m128i _B)
__m128d _mm_min_pd(__m128d _A, __m128d _B)
__m128d _mm_cmpeq_pd(__m128d _A, __m128d _B)
__m128i _mm_sra_epi16(__m128i _A, __m128i _Count)
int _mm_ucomilt_sd(__m128d _A, __m128d _B)
__m128d _mm_cmpneq_sd(__m128d _A, __m128d _B)
int _mm_comieq_sd(__m128d _A, __m128d _B)
__m128d _mm_loadu_pd(double const *_Dp)
int _mm_ucomigt_sd(__m128d _A, __m128d _B)
__m128d _mm_cvtsi32_sd(__m128d _A, int _B)
__m128d _mm_cmplt_pd(__m128d _A, __m128d _B)
unsigned int _Count
Definition: xcomplex:668
__m128i _mm_move_epi64(__m128i _Q)
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
__m128d _mm_castps_pd(__m128)
__m128d _mm_cmpgt_sd(__m128d _A, __m128d _B)
void _mm_pause(void)
__m128d _mm_set1_pd(double _A)
__m128d _mm_unpacklo_pd(__m128d _A, __m128d _B)
int _mm_ucomieq_sd(__m128d _A, __m128d _B)
void _mm_store1_pd(double *_Dp, __m128d _A)
__m128i _mm_castpd_si128(__m128d)
__m128d _mm_cmple_pd(__m128d _A, __m128d _B)
__m128i _mm_max_epi16(__m128i _A, __m128i _B)
__m128d _mm_cmpunord_sd(__m128d _A, __m128d _B)
__m128d _mm_cmpge_sd(__m128d _A, __m128d _B)
__m128d
Definition: emmintrin.h:57
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
int _mm_comige_sd(__m128d _A, __m128d _B)
void * align(size_t _Bound, size_t _Size, void *&_Ptr, size_t &_Space) _NOEXCEPT
Definition: memory:1985
__m128i _mm_add_epi64(__m128i _A, __m128i _B)
__m128i _mm_set1_epi16(short _W)
int _mm_comigt_sd(__m128d _A, __m128d _B)
double _mm_cvtsd_f64(__m128d _A)
__m128i _mm_set1_epi8(char _B)
__m128i _mm_setl_epi64(__m128i _Q)
__m128i _mm_set1_epi64x(__int64 i)
__m128i _mm_srli_epi16(__m128i _A, int _Count)
__m128d _mm_loadh_pd(__m128d _A, double const *_Dp)
__m128d _mm_shuffle_pd(__m128d _A, __m128d _B, int _I)
__m128i _mm_cmplt_epi8(__m128i _A, __m128i _B)
int _mm_cvtsd_si32(__m128d _A)
__m128i _mm_set_epi16(short _W7, short _W6, short _W5, short _W4, short _W3, short _W2, short _W1, short _W0)
__m128i _mm_packs_epi16(__m128i _A, __m128i _B)
__m128i _mm_add_epi8(__m128i _A, __m128i _B)
__m128i _mm_setr_epi8(char _B15, char _B14, char _B13, char _B12, char _B11, char _B10, char _B9, char _B8, char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0)
__m128d _mm_set_pd(double _Z, double _Y)
__m128i _mm_srl_epi32(__m128i _A, __m128i _Count)
__m128i _mm_set1_epi32(int _I)
__m128i _mm_sra_epi32(__m128i _A, __m128i _Count)
__m128i _mm_setzero_si128(void)
__m128i _mm_srli_epi32(__m128i _A, int _Count)
__m128d _mm_cmpnge_sd(__m128d _A, __m128d _B)
__m128d _mm_cmplt_sd(__m128d _A, __m128d _B)
__m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0)
__m128i _mm_sll_epi16(__m128i _A, __m128i _Count)
__m128i _mm_avg_epu16(__m128i _A, __m128i _B)
__m128i _mm_insert_epi16(__m128i _A, int _B, int _Imm)
__m128d _mm_load_pd(double const *_Dp)
__m128i _mm_min_epi16(__m128i _A, __m128i _B)
void _mm_mfence(void)
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
__m128i _mm_adds_epu8(__m128i _A, __m128i _B)
__m128d _mm_sqrt_sd(__m128d _A, __m128d _B)
__m128 _mm_cvtpd_ps(__m128d _A)
int _mm_ucomile_sd(__m128d _A, __m128d _B)
__m128i _mm_castps_si128(__m128)
__m128i _mm_sll_epi32(__m128i _A, __m128i _Count)
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
__m128d _mm_cmpnle_pd(__m128d _A, __m128d _B)
__m128i _mm_slli_epi16(__m128i _A, int _Count)
int _mm_movemask_pd(__m128d _A)
__m128d _mm_cmpngt_sd(__m128d _A, __m128d _B)
int _mm_cvttsd_si32(__m128d _A)
__m128i _mm_sad_epu8(__m128i _A, __m128i _B)
__m128i _mm_setr_epi16(short _W0, short _W1, short _W2, short _W3, short _W4, short _W5, short _W6, short _W7)
__m128i _mm_cvtpd_epi32(__m128d _A)
__m128i _mm_max_epu8(__m128i _A, __m128i _B)
__m128d _mm_cmpneq_pd(__m128d _A, __m128d _B)
int i[4]
Definition: dvec.h:68
int _mm_comile_sd(__m128d _A, __m128d _B)
__m128d _mm_cmpnlt_sd(__m128d _A, __m128d _B)
__m128d _mm_cmpnge_pd(__m128d _A, __m128d _B)
__m128d _mm_add_pd(__m128d _A, __m128d _B)
__m128d _mm_or_pd(__m128d _A, __m128d _B)
__m128d _mm_castsi128_pd(__m128i)
__m128i _mm_mul_epu32(__m128i _A, __m128i _B)
__m128i _mm_min_epu8(__m128i _A, __m128i _B)
__m128d _mm_cmpord_pd(__m128d _A, __m128d _B)
__m128i _mm_cmplt_epi32(__m128i _A, __m128i _B)
__m128d _mm_div_pd(__m128d _A, __m128d _B)
void _mm_storel_pd(double *_Dp, __m128d _A)
__m128d _mm_cvtepi32_pd(__m128i _A)
int _mm_comilt_sd(__m128d _A, __m128d _B)
__m128i _mm_slli_si128(__m128i _A, int _Imm)
__m64
Definition: mmintrin.h:45
__m128i _mm_srli_si128(__m128i _A, int _Imm)
__m128d _mm_and_pd(__m128d _A, __m128d _B)
__m128i _mm_srl_epi16(__m128i _A, __m128i _Count)
__m128 _mm_castpd_ps(__m128d)
void _mm_store_pd(double *_Dp, __m128d _A)
__m128i _mm_slli_epi64(__m128i _A, int _Count)
__m128i _mm_cmpgt_epi8(__m128i _A, __m128i _B)
void _mm_clflush(void const *_P)
int _mm_ucomige_sd(__m128d _A, __m128d _B)
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
__m128d _mm_cmpgt_pd(__m128d _A, __m128d _B)
__m128d _mm_max_pd(__m128d _A, __m128d _B)
__m128 _mm_cvtepi32_ps(__m128i _A)
__m128i _mm_cvttpd_epi32(__m128d _A)
__m128i
Definition: emmintrin.h:53
void _mm_store_si128(__m128i *_P, __m128i _B)
__m128d _mm_cmpnlt_pd(__m128d _A, __m128d _B)
__m128i _mm_srli_epi64(__m128i _A, int _Count)
__m128i _mm_set_epi64x(__int64 _I1, __int64 _I0)
__m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B)
int _mm_cvtsi128_si32(__m128i _A)
__m128d _mm_unpackhi_pd(__m128d _A, __m128d _B)
__m128
Definition: xmmintrin.h:75
void _mm_storeu_pd(double *_Dp, __m128d _A)
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
void _mm_storeh_pd(double *_Dp, __m128d _A)
__m128d _mm_load1_pd(double const *_Dp)
__m128d _mm_xor_pd(__m128d _A, __m128d _B)
__m128d _mm_loadr_pd(double const *_Dp)
__m128i _mm_shufflelo_epi16(__m128i _A, int _Imm)
__m128d _mm_loadl_pd(__m128d _A, double const *_Dp)
__m128i _mm_cmplt_epi16(__m128i _A, __m128i _B)
__m128i _mm_avg_epu8(__m128i _A, __m128i _B)
__m128i _mm_cvtsi32_si128(int _A)
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
__m128d _mm_setzero_pd(void)
__m128i _mm_subs_epu8(__m128i _A, __m128i _B)
void _mm_storer_pd(double *_Dp, __m128d _A)
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_or_si128(__m128i _A, __m128i _B)
__m128i _mm_shufflehi_epi16(__m128i _A, int _Imm)
__m128i _mm_sll_epi64(__m128i _A, __m128i _Count)
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_load_si128(__m128i const *_P)
__m128i _mm_adds_epu16(__m128i _A, __m128i _B)
__m128i _mm_setr_epi32(int _I0, int _I1, int _I2, int _I3)
__m128i _mm_add_epi32(__m128i _A, __m128i _B)
union __declspec(intrin_type) __declspec(align(16)) __m128i
Definition: emmintrin.h:44
__m128d _mm_cmpnle_sd(__m128d _A, __m128d _B)
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
void _mm_storel_epi64(__m128i *_P, __m128i _Q)
__m128d _mm_sub_pd(__m128d _A, __m128d _B)
__m128 _mm_castsi128_ps(__m128i)
__m128d _mm_sub_sd(__m128d _A, __m128d _B)
__m128d _mm_cvtss_sd(__m128d _A, __m128 _B)
__m128i _mm_slli_epi32(__m128i _A, int _Count)
__m128i _mm_sub_epi16(__m128i _A, __m128i _B)
void _mm_stream_pd(double *_Dp, __m128d _A)
__m128d _mm_cmple_sd(__m128d _A, __m128d _B)
__m128d _mm_mul_sd(__m128d _A, __m128d _B)
int _mm_ucomineq_sd(__m128d _A, __m128d _B)
__m128i _mm_subs_epu16(__m128i _A, __m128i _B)
__m128i _mm_sub_epi8(__m128i _A, __m128i _B)
__m128i _mm_shuffle_epi32(__m128i _A, int _Imm)
void _mm_storeu_si128(__m128i *_P, __m128i _B)
__m128i _mm_unpacklo_epi64(__m128i _A, __m128i _B)
void _mm_stream_si32(int *_P, int _I)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128d _mm_max_sd(__m128d _A, __m128d _B)
__m128d _mm_div_sd(__m128d _A, __m128d _B)
__m128i _mm_loadl_epi64(__m128i const *_P)
__m128d _mm_set_sd(double _W)
__m128i _mm_srai_epi16(__m128i _A, int _Count)
__m128i _mm_cvtps_epi32(__m128 _A)
__m128i _mm_subs_epi8(__m128i _A, __m128i _B)
__m128d _mm_mul_pd(__m128d _A, __m128d _B)
__m128d _mm_min_sd(__m128d _A, __m128d _B)
__m128i _mm_add_epi16(__m128i _A, __m128i _B)
void _mm_maskmoveu_si128(__m128i _D, __m128i _N, char *_P)
__m128i _mm_set_epi8(char _B15, char _B14, char _B13, char _B12, char _B11, char _B10, char _B9, char _B8, char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
__m128i _mm_adds_epi16(__m128i _A, __m128i _B)
__m128d _mm_cvtps_pd(__m128 _A)
__m128d _mm_andnot_pd(__m128d _A, __m128d _B)
__m128i _mm_sub_epi64(__m128i _A, __m128i _B)
__m128 _mm_cvtsd_ss(__m128 _A, __m128d _B)
__m128d _mm_cmpeq_sd(__m128d _A, __m128d _B)
__m128i _mm_packus_epi16(__m128i _A, __m128i _B)
__m128i _mm_mulhi_epu16(__m128i _A, __m128i _B)
__m128d _mm_cmpge_pd(__m128d _A, __m128d _B)
__m128i _mm_srl_epi64(__m128i _A, __m128i _Count)
__m128d _mm_setr_pd(double _Y, double _Z)
void _mm_lfence(void)
__m128d _mm_add_sd(__m128d _A, __m128d _B)
__m128i _mm_and_si128(__m128i _A, __m128i _B)
__m128i _mm_madd_epi16(__m128i _A, __m128i _B)
void _mm_stream_si128(__m128i *_P, __m128i _A)
__m128d _mm_sqrt_pd(__m128d _A)
__m128i _mm_subs_epi16(__m128i _A, __m128i _B)
__m128d _mm_cmpunord_pd(__m128d _A, __m128d _B)