STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
ammintrin.h
Go to the documentation of this file.
1 /****
2 * Copyright (C) 2007-2008 Advanced Micro Devices Inc. All rights reserved.
3 *
4 * The information and source code contained herein is the exclusive
5 * property of Advanced Micro Devices and may not be disclosed, examined
6 * or reproduced in whole or in part without explicit written authorization
7 * from the company.
8 *
9 * ammintrin.h - Definitions for AMD-specific intrinsics
10 *
11 ****/
12 
13 #pragma once
14 
15 #if !defined(_M_IX86) && !defined(_M_X64)
16 #error This header is specific to X86 and X64 targets
17 #endif
18 
19 #ifndef _INCLUDED_AMM
20 #define _INCLUDED_AMM
21 #ifndef __midl
22 
23 #if !defined _M_IX86 && !defined _M_X64
24  #error This header is specific to X86 and X64 targets
25 #endif
26 
27 #if defined (_M_CEE_PURE)
28  #error ERROR: This file is not supported in the pure mode!
29 #else /* defined (_M_CEE_PURE) */
30 
31 #if defined __cplusplus
32 extern "C" { /* Intrinsics use C name-mangling. */
33 #endif /* defined __cplusplus */
34 
35 /*
36  * Vector integer comparison control macros
37  */
38 
39 #define _MM_PCOMCTRL_LT 0
40 #define _MM_PCOMCTRL_LE 1
41 #define _MM_PCOMCTRL_GT 2
42 #define _MM_PCOMCTRL_GE 3
43 #define _MM_PCOMCTRL_EQ 4
44 #define _MM_PCOMCTRL_NEQ 5
45 #define _MM_PCOMCTRL_FALSE 6
46 #define _MM_PCOMCTRL_TRUE 7
47 
48 /*
49  * MACRO functions for vector integer comparisons
50  */
51 
52 #define _mm_comlt_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_LT)
53 #define _mm_comle_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_LE)
54 #define _mm_comgt_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_GT)
55 #define _mm_comge_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_GE)
56 #define _mm_comeq_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_EQ)
57 #define _mm_comneq_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_NEQ)
58 #define _mm_comfalse_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_FALSE)
59 #define _mm_comtrue_epu8(v1, v2) _mm_com_epu8(v1, v2, _MM_PCOMCTRL_TRUE)
60 
61 #define _mm_comlt_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_LT)
62 #define _mm_comle_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_LE)
63 #define _mm_comgt_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_GT)
64 #define _mm_comge_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_GE)
65 #define _mm_comeq_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_EQ)
66 #define _mm_comneq_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_NEQ)
67 #define _mm_comfalse_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_FALSE)
68 #define _mm_comtrue_epu16(v1, v2) _mm_com_epu16(v1, v2, _MM_PCOMCTRL_TRUE)
69 
70 #define _mm_comlt_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_LT)
71 #define _mm_comle_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_LE)
72 #define _mm_comgt_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_GT)
73 #define _mm_comge_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_GE)
74 #define _mm_comeq_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_EQ)
75 #define _mm_comneq_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_NEQ)
76 #define _mm_comfalse_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_FALSE)
77 #define _mm_comtrue_epu32(v1, v2) _mm_com_epu32(v1, v2, _MM_PCOMCTRL_TRUE)
78 
79 #define _mm_comlt_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_LT)
80 #define _mm_comle_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_LE)
81 #define _mm_comgt_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_GT)
82 #define _mm_comge_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_GE)
83 #define _mm_comeq_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_EQ)
84 #define _mm_comneq_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_NEQ)
85 #define _mm_comfalse_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_FALSE)
86 #define _mm_comtrue_epu64(v1, v2) _mm_com_epu64(v1, v2, _MM_PCOMCTRL_TRUE)
87 
88 #define _mm_comlt_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_LT)
89 #define _mm_comle_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_LE)
90 #define _mm_comgt_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_GT)
91 #define _mm_comge_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_GE)
92 #define _mm_comeq_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_EQ)
93 #define _mm_comneq_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_NEQ)
94 #define _mm_comfalse_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_FALSE)
95 #define _mm_comtrue_epi8(v1, v2) _mm_com_epi8(v1, v2, _MM_PCOMCTRL_TRUE)
96 
97 #define _mm_comlt_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_LT)
98 #define _mm_comle_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_LE)
99 #define _mm_comgt_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_GT)
100 #define _mm_comge_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_GE)
101 #define _mm_comeq_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_EQ)
102 #define _mm_comneq_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_NEQ)
103 #define _mm_comfalse_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_FALSE)
104 #define _mm_comtrue_epi16(v1, v2) _mm_com_epi16(v1, v2, _MM_PCOMCTRL_TRUE)
105 
106 #define _mm_comlt_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_LT)
107 #define _mm_comle_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_LE)
108 #define _mm_comgt_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_GT)
109 #define _mm_comge_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_GE)
110 #define _mm_comeq_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_EQ)
111 #define _mm_comneq_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_NEQ)
112 #define _mm_comfalse_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_FALSE)
113 #define _mm_comtrue_epi32(v1, v2) _mm_com_epi32(v1, v2, _MM_PCOMCTRL_TRUE)
114 
115 #define _mm_comlt_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_LT)
116 #define _mm_comle_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_LE)
117 #define _mm_comgt_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_GT)
118 #define _mm_comge_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_GE)
119 #define _mm_comeq_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_EQ)
120 #define _mm_comneq_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_NEQ)
121 #define _mm_comfalse_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_FALSE)
122 #define _mm_comtrue_epi64(v1, v2) _mm_com_epi64(v1, v2, _MM_PCOMCTRL_TRUE)
123 
124 /* SSE5 intrinsics */
125 
126 /* Float/double multiply-accumulate */
147 
148 /* Integer multiply-accumulate */
161 
162 /* Horizontal add/subtract */
178 
179 /* Vector conditional moves */
182 
183 /* Vector shifts and rotates */
200 
201 /* Vector integer comparisons */
202 
211 
212 /* Precision control */
213 
218 
219 /* Control values for permute2 intrinsics */
220 #define _MM_PERMUTE2_COPY 0 /* just copy the selected value */
221 /* Note that using the constant 1 would have the same effect as 0 */
222 #define _MM_PERMUTE2_ZEROIF1 2 /* zero selected value if src3 bit is 1 */
223 #define _MM_PERMUTE2_ZEROIF0 3 /* zero selected value if src3 bit is 3 */
224 
225 /* Permutation */
228 
229 
230 /* YMM versions */
248 
249 /* LWP intrinsics */
250 void __llwpcb(void *);
251 void *__slwpcb();
252 void __lwpval32(unsigned int, unsigned int, unsigned int);
253 unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
254 #if defined (_M_X64)
255 void __lwpval64(unsigned __int64, unsigned int, unsigned int);
256 unsigned char __lwpins64(unsigned __int64, unsigned int, unsigned int);
257 #endif /* defined (_M_X64) */
258 
259 /*BMI intrinsics */
260 unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
261 unsigned int _andn_u32(unsigned int, unsigned int);
262 unsigned int _tzcnt_u32(unsigned int);
263 unsigned int _lzcnt_u32(unsigned int);
264 unsigned int _blsr_u32(unsigned int);
265 unsigned int _blsmsk_u32(unsigned int);
266 unsigned int _blsi_u32(unsigned int);
267 #if defined (_M_X64)
268 unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
269 unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
270 unsigned __int64 _tzcnt_u64(unsigned __int64);
271 unsigned __int64 _lzcnt_u64(unsigned __int64);
272 unsigned __int64 _blsr_u64(unsigned __int64);
273 unsigned __int64 _blsmsk_u64(unsigned __int64);
274 unsigned __int64 _blsi_u64(unsigned __int64);
275 #endif /* defined (_M_X64) */
276 
277 /* TBM intrinsics */
278 unsigned int _bextri_u32(unsigned int, unsigned int);
279 unsigned int _blcfill_u32(unsigned int);
280 unsigned int _blsfill_u32(unsigned int);
281 unsigned int _blcs_u32(unsigned int);
282 unsigned int _tzmsk_u32(unsigned int);
283 unsigned int _blcic_u32(unsigned int);
284 unsigned int _blsic_u32(unsigned int);
285 unsigned int _t1mskc_u32(unsigned int);
286 unsigned int _blcmsk_u32(unsigned int);
287 unsigned int _blci_u32(unsigned int);
288 #if defined (_M_X64)
289 unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
290 unsigned __int64 _blcfill_u64(unsigned __int64);
291 unsigned __int64 _blsfill_u64(unsigned __int64);
292 unsigned __int64 _blcs_u64(unsigned __int64);
293 unsigned __int64 _tzmsk_u64(unsigned __int64);
294 unsigned __int64 _blcic_u64(unsigned __int64);
295 unsigned __int64 _blsic_u64(unsigned __int64);
296 unsigned __int64 _t1mskc_u64(unsigned __int64);
297 unsigned __int64 _blcmsk_u64(unsigned __int64);
298 unsigned __int64 _blci_u64(unsigned __int64);
299 #endif /* defined (_M_X64) */
300 
301 void _mm_monitorx(void const *, unsigned int, unsigned int);
302 void _mm_mwaitx(unsigned int, unsigned int, unsigned int);
303 
304 void _mm_clzero(void const *);
305 
306 #if defined __cplusplus
307 }; /* End "C" */
308 #endif /* defined __cplusplus */
309 
310 #endif /* defined (_M_CEE_PURE) */
311 #endif /* __midl */
312 #endif /* _INCLUDED_AMM */
void _mm_clzero(void const *)
__m256d _mm256_maddsub_pd(__m256d, __m256d, __m256d)
__m128i _mm_shl_epi64(__m128i, __m128i)
__m128i _mm_roti_epi8(__m128i, int)
__m128 _mm_msub_ss(__m128, __m128, __m128)
__m128 _mm_maddsub_ps(__m128, __m128, __m128)
__m128i _mm_sha_epi8(__m128i, __m128i)
__m128i _mm_com_epi32(__m128i, __m128i, int)
__m128i _mm_hsubd_epi16(__m128i)
__m128 _mm_nmacc_ss(__m128, __m128, __m128)
__m128d _mm_permute2_pd(__m128d, __m128d, __m128i, int)
__m128i _mm_com_epu8(__m128i, __m128i, int)
__m128i _mm_macchi_epi32(__m128i, __m128i, __m128i)
__m128i _mm_maddd_epi16(__m128i, __m128i, __m128i)
unsigned int _blsi_u32(unsigned int)
__m128i _mm_roti_epi32(__m128i, int)
__m128i _mm_sha_epi64(__m128i, __m128i)
__m128i _mm_haddd_epi8(__m128i)
__m256 _mm256_msub_ps(__m256, __m256, __m256)
__m128i _mm_cmov_si128(__m128i, __m128i, __m128i)
__m128i _mm_haddq_epi16(__m128i)
unsigned int _blcmsk_u32(unsigned int)
__m128 _mm_permute2_ps(__m128, __m128, __m128i, int)
__m128d
Definition: emmintrin.h:57
__m128i _mm_maddsd_epi16(__m128i, __m128i, __m128i)
__m256d
Definition: immintrin.h:43
__m128 _mm_macc_ps(__m128, __m128, __m128)
__m128i _mm_macc_epi32(__m128i, __m128i, __m128i)
__m128i _mm_shl_epi8(__m128i, __m128i)
unsigned int _blcfill_u32(unsigned int)
unsigned int _tzmsk_u32(unsigned int)
__m128d _mm_maddsub_pd(__m128d, __m128d, __m128d)
__m128i _mm_rot_epi16(__m128i, __m128i)
__m256d _mm256_nmsub_pd(__m256d, __m256d, __m256d)
__m128d _mm_msub_pd(__m128d, __m128d, __m128d)
__m128i _mm_roti_epi16(__m128i, int)
__m128 _mm_nmsub_ss(__m128, __m128, __m128)
__m128d _mm_nmacc_pd(__m128d, __m128d, __m128d)
void __lwpval32(unsigned int, unsigned int, unsigned int)
__m256 _mm256_macc_ps(__m256, __m256, __m256)
__m256d _mm256_permute2_pd(__m256d, __m256d, __m256i, int)
__m128i _mm_com_epu16(__m128i, __m128i, int)
__m128d _mm_frcz_sd(__m128d, __m128d)
__m128i _mm_shl_epi32(__m128i, __m128i)
__m256d _mm256_frcz_pd(__m256d)
__m128 _mm_nmacc_ps(__m128, __m128, __m128)
__m128i _mm_perm_epi8(__m128i, __m128i, __m128i)
__m256 _mm256_msubadd_ps(__m256, __m256, __m256)
__m128i _mm_rot_epi64(__m128i, __m128i)
void __llwpcb(void *)
unsigned int _blcs_u32(unsigned int)
__m128i _mm_sha_epi32(__m128i, __m128i)
__m256 _mm256_nmsub_ps(__m256, __m256, __m256)
__m128i _mm_maccsd_epi16(__m128i, __m128i, __m128i)
__m128i _mm_com_epi8(__m128i, __m128i, int)
unsigned int _blsic_u32(unsigned int)
__m128i _mm_macc_epi16(__m128i, __m128i, __m128i)
__m128i _mm_haddq_epu32(__m128i)
__m128i _mm_hsubq_epi32(__m128i)
__m128i _mm_maccd_epi16(__m128i, __m128i, __m128i)
void * __slwpcb()
__m256 _mm256_permute2_ps(__m256, __m256, __m256i, int)
__m128i _mm_rot_epi32(__m128i, __m128i)
__m256d _mm256_msub_pd(__m256d, __m256d, __m256d)
__m128i
Definition: emmintrin.h:53
__m256
Definition: immintrin.h:39
unsigned char __lwpins32(unsigned int, unsigned int, unsigned int)
void _mm_monitorx(void const *, unsigned int, unsigned int)
__m128 _mm_msubadd_ps(__m128, __m128, __m128)
__m128
Definition: xmmintrin.h:75
unsigned int _andn_u32(unsigned int, unsigned int)
__m256d _mm256_msubadd_pd(__m256d, __m256d, __m256d)
unsigned int _bextri_u32(unsigned int, unsigned int)
__m128i _mm_haddq_epi32(__m128i)
unsigned int _blsmsk_u32(unsigned int)
__m128d _mm_nmsub_sd(__m128d, __m128d, __m128d)
__m128d _mm_msubadd_pd(__m128d, __m128d, __m128d)
__m128i _mm_macclo_epi32(__m128i, __m128i, __m128i)
__m128i _mm_haddd_epu8(__m128i)
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int)
__m128i _mm_com_epu32(__m128i, __m128i, int)
__m128i _mm_haddw_epu8(__m128i)
__m128i _mm_maccshi_epi32(__m128i, __m128i, __m128i)
unsigned int _t1mskc_u32(unsigned int)
__m128d _mm_macc_sd(__m128d, __m128d, __m128d)
__m128i _mm_shl_epi16(__m128i, __m128i)
__m128i _mm_maccslo_epi32(__m128i, __m128i, __m128i)
unsigned int _blsr_u32(unsigned int)
unsigned int _tzcnt_u32(unsigned int)
__m128i _mm_maccs_epi16(__m128i, __m128i, __m128i)
__m128i _mm_sha_epi16(__m128i, __m128i)
__m128d _mm_msub_sd(__m128d, __m128d, __m128d)
__m256 _mm256_frcz_ps(__m256)
__m128 _mm_macc_ss(__m128, __m128, __m128)
__m256d _mm256_macc_pd(__m256d, __m256d, __m256d)
__m128i _mm_roti_epi64(__m128i, int)
__m128i _mm_haddq_epi8(__m128i)
__m128i _mm_haddd_epu16(__m128i)
__m128d _mm_macc_pd(__m128d, __m128d, __m128d)
__m128d _mm_nmsub_pd(__m128d, __m128d, __m128d)
__m128d _mm_frcz_pd(__m128d)
__m128 _mm_msub_ps(__m128, __m128, __m128)
__m128i _mm_haddw_epi8(__m128i)
void _mm_mwaitx(unsigned int, unsigned int, unsigned int)
__m256d _mm256_nmacc_pd(__m256d, __m256d, __m256d)
unsigned int _blcic_u32(unsigned int)
unsigned int _blsfill_u32(unsigned int)
__m128 _mm_frcz_ss(__m128, __m128)
__m128i _mm_haddd_epi16(__m128i)
__m256i _mm256_cmov_si256(__m256i, __m256i, __m256i)
__m128i _mm_hsubw_epi8(__m128i)
__m128i _mm_haddq_epu16(__m128i)
unsigned int _blci_u32(unsigned int)
unsigned int _lzcnt_u32(unsigned int)
__m128i _mm_rot_epi8(__m128i, __m128i)
__m128i _mm_com_epi64(__m128i, __m128i, int)
__m128 _mm_frcz_ps(__m128)
__m128d _mm_nmacc_sd(__m128d, __m128d, __m128d)
__m256 _mm256_maddsub_ps(__m256, __m256, __m256)
__m256i
Definition: immintrin.h:54
__m128i _mm_com_epi16(__m128i, __m128i, int)
__m128i _mm_haddq_epu8(__m128i)
__m128 _mm_nmsub_ps(__m128, __m128, __m128)
__m256 _mm256_nmacc_ps(__m256, __m256, __m256)
__m128i _mm_maccs_epi32(__m128i, __m128i, __m128i)
__m128i _mm_com_epu64(__m128i, __m128i, int)