STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
smmintrin.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-2015 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * smmintrin.h
13  *
14  * Principal header file for Intel(R) Core(TM) 2 Duo processor
15  * SSE4.1 intrinsics
16  */
17 
18 #pragma once
19 
20 #if !defined(_M_IX86) && !defined(_M_X64)
21 #error This header is specific to X86 and X64 targets
22 #endif
23 
24 #ifndef _INCLUDED_SMM
25 #define _INCLUDED_SMM
26 #ifndef __midl
27 
28 #if defined (_M_CEE_PURE)
29  #error ERROR: EMM intrinsics not supported in the pure mode!
30 #else /* defined (_M_CEE_PURE) */
31 
32 #include <tmmintrin.h>
33 
34 
35 /*
36  * Rounding mode macros
37  */
38 
39 #define _MM_FROUND_TO_NEAREST_INT 0x00
40 #define _MM_FROUND_TO_NEG_INF 0x01
41 #define _MM_FROUND_TO_POS_INF 0x02
42 #define _MM_FROUND_TO_ZERO 0x03
43 #define _MM_FROUND_CUR_DIRECTION 0x04
44 
45 #define _MM_FROUND_RAISE_EXC 0x00
46 #define _MM_FROUND_NO_EXC 0x08
47 
48 #define _MM_FROUND_NINT _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC
49 #define _MM_FROUND_FLOOR _MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC
50 #define _MM_FROUND_CEIL _MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC
51 #define _MM_FROUND_TRUNC _MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC
52 #define _MM_FROUND_RINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC
53 #define _MM_FROUND_NEARBYINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC
54 
55 /*
56  * MACRO functions for ceil/floor intrinsics
57  */
58 
59 #define _mm_ceil_pd(val) _mm_round_pd((val), _MM_FROUND_CEIL)
60 #define _mm_ceil_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_CEIL)
61 
62 #define _mm_floor_pd(val) _mm_round_pd((val), _MM_FROUND_FLOOR)
63 #define _mm_floor_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_FLOOR)
64 
65 #define _mm_ceil_ps(val) _mm_round_ps((val), _MM_FROUND_CEIL)
66 #define _mm_ceil_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_CEIL)
67 
68 #define _mm_floor_ps(val) _mm_round_ps((val), _MM_FROUND_FLOOR)
69 #define _mm_floor_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_FLOOR)
70 
71 #define _mm_test_all_zeros(mask, val) _mm_testz_si128((mask), (val))
72 
73 /*
74  * MACRO functions for packed integer 128-bit comparison intrinsics.
75  */
76 
77 #define _mm_test_all_ones(val) \
78  _mm_testc_si128((val), _mm_cmpeq_epi32((val),(val)))
79 
80 #define _mm_test_mix_ones_zeros(mask, val) _mm_testnzc_si128((mask), (val))
81 
82 #if __cplusplus
83 extern "C" {
84 #endif /* __cplusplus */
85 
86  // Integer blend instructions - select data from 2 sources
87  // using constant or variable mask
88 
89  extern __m128i _mm_blend_epi16 (__m128i, __m128i, const int /* mask */);
91 
92  // Float single precision blend instructions - select data
93  // from 2 sources using constant/variable mask
94 
95  extern __m128 _mm_blend_ps (__m128, __m128, const int /* mask */);
96  extern __m128 _mm_blendv_ps(__m128, __m128, __m128 /* mask */);
97 
98  // Float double precision blend instructions - select data
99  // from 2 sources using constant/variable mask
100 
101  extern __m128d _mm_blend_pd (__m128d, __m128d, const int /* mask */);
102  extern __m128d _mm_blendv_pd(__m128d, __m128d, __m128d /* mask */);
103 
104  // Dot product instructions with mask-defined summing and zeroing
105  // of result's parts
106 
107  extern __m128 _mm_dp_ps(__m128, __m128, const int /* mask */);
108  extern __m128d _mm_dp_pd(__m128d, __m128d, const int /* mask */);
109 
110  // Packed integer 64-bit comparison, zeroing or filling with ones
111  // corresponding parts of result
112 
114 
115  // Min/max packed integer instructions
116 
119 
122 
127 
128  // Packed integer 32-bit multiplication with truncation
129  // of upper halves of results
130 
132 
133  // Packed integer 32-bit multiplication of 2 pairs of operands
134  // producing two 64-bit results
135 
137 
138  // Packed integer 128-bit bitwise comparison.
139  // return 1 if (val 'and' mask) == 0
140 
141  extern int _mm_testz_si128(__m128i /* mask */, __m128i /* val */);
142 
143  // Packed integer 128-bit bitwise comparison.
144  // return 1 if (val 'and_not' mask) == 0
145 
146  extern int _mm_testc_si128(__m128i /* mask */, __m128i /* val */);
147 
148  // Packed integer 128-bit bitwise comparison
149  // ZF = ((val 'and' mask) == 0) CF = ((val 'and_not' mask) == 0)
150  // return 1 if both ZF and CF are 0
151 
152  extern int _mm_testnzc_si128(__m128i /* mask */, __m128i /* val */);
153 
154  // Insert single precision float into packed single precision
155  // array element selected by index.
156  // The bits [7-6] of the 3d parameter define src index,
157  // the bits [5-4] define dst index, and bits [3-0] define zeroing
158  // mask for dst
159 
160  extern __m128 _mm_insert_ps(__m128 /* dst */, __m128 /* src */, const int /* index */);
161 
162  // Helper macro to create index-parameter value for _mm_insert_ps
163 
164 #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) \
165  (((srcField)<<6) | ((dstField)<<4) | (zeroMask))
166 
167  // Extract binary representation of single precision float from
168  // packed single precision array element selected by index
169 
170  extern int _mm_extract_ps(__m128 /* src */, const int /* index */);
171 
172  // Extract single precision float from packed single precision
173  // array element selected by index into dest
174 
175 #define _MM_EXTRACT_FLOAT(dest, src, ndx) \
176  *((int*)&(dest)) = _mm_extract_ps((src), (ndx))
177 
178  // Extract specified single precision float element
179  // into the lower part of __m128
180 
181 #define _MM_PICK_OUT_PS(src, num) \
182  _mm_insert_ps(_mm_setzero_ps(), (src), \
183  _MM_MK_INSERTPS_NDX((num), 0, 0x0e))
184 
185  // Insert integer into packed integer array element
186  // selected by index
187 
188  extern __m128i _mm_insert_epi8 (__m128i /* dst */, int /* src */, const int /* index */);
189  extern __m128i _mm_insert_epi32(__m128i /* dst */, int /* src */, const int /* index */);
190 
191 #if defined (_M_X64)
192  extern __m128i _mm_insert_epi64(__m128i /* dst */, __int64 /* src */, const int /* index */);
193 #endif /* defined (_M_X64) */
194  // Extract integer from packed integer array element
195  // selected by index
196 
197  extern int _mm_extract_epi8 (__m128i /* src */, const int /* index */);
198  extern int _mm_extract_epi32(__m128i /* src */, const int /* index */);
199 
200 #if defined (_M_X64)
201  extern __int64 _mm_extract_epi64(__m128i /* src */, const int /* index */);
202 #endif /* defined (_M_X64) */
203 
204  // Horizontal packed word minimum and its index in
205  // result[15:0] and result[18:16] respectively
206 
208 
209  // Packed/single float double precision rounding
210 
211  extern __m128d _mm_round_pd(__m128d /* val */, int /* iRoundMode */);
212  extern __m128d _mm_round_sd(__m128d /* dst */, __m128d /* val */, int /* iRoundMode */);
213 
214  // Packed/single float single precision rounding
215 
216  extern __m128 _mm_round_ps(__m128 /* val */, int /* iRoundMode */);
217  extern __m128 _mm_round_ss(__m128 /* dst */, __m128 /* val */, int /* iRoundMode */);
218 
219  // Packed integer sign-extension
220 
227 
228  // Packed integer zero-extension
229 
236 
237 
238  // Pack 8 double words from 2 operands into 8 words of result
239  // with unsigned saturation
240 
242 
243  // Sum absolute 8-bit integer difference of adjacent groups of 4 byte
244  // integers in operands. Starting offsets within operands are
245  // determined by mask
246 
247  extern __m128i _mm_mpsadbw_epu8(__m128i /* s1 */, __m128i /* s2 */, const int /* mask */);
248 
249  /*
250  * Load double quadword using non-temporal aligned hint
251  */
252 
253  extern __m128i _mm_stream_load_si128(const __m128i*);
254 
255 #if defined __cplusplus
256 }; /* End "C" */
257 #endif /* defined __cplusplus */
258 
259 #endif /* defined (_M_CEE_PURE) */
260 #endif /* __midl */
261 #endif /* _INCLUDED_SMM */
__m128i _mm_cvtepu16_epi64(__m128i)
__m128i _mm_max_epi32(__m128i, __m128i)
__m128i _mm_blendv_epi8(__m128i, __m128i, __m128i mask)
__m128i _mm_cvtepu16_epi32(__m128i)
__m128 _mm_blend_ps(__m128, __m128, const int)
__m128i _mm_cvtepi8_epi16(__m128i)
__m128i _mm_blend_epi16(__m128i, __m128i, const int)
__m128i _mm_cvtepu8_epi64(__m128i)
__m128i _mm_packus_epi32(__m128i, __m128i)
__m128d
Definition: emmintrin.h:57
int _mm_extract_epi8(__m128i, const int)
__m128i _mm_mpsadbw_epu8(__m128i, __m128i, const int)
__m128i _mm_cvtepi16_epi32(__m128i)
__m128i _mm_cmpeq_epi64(__m128i, __m128i)
__m128i _mm_cvtepi32_epi64(__m128i)
int _mm_testnzc_si128(__m128i, __m128i)
__m128i _mm_minpos_epu16(__m128i)
__m128i _mm_insert_epi32(__m128i, int, const int)
int _mm_testz_si128(__m128i, __m128i)
__m128i _mm_min_epi8(__m128i, __m128i)
__m128i _mm_cvtepu8_epi16(__m128i)
__m128i _mm_cvtepu32_epi64(__m128i)
__m128 _mm_round_ss(__m128, __m128, int)
__m128i _mm_cvtepi8_epi32(__m128i)
__m128i _mm_min_epi32(__m128i, __m128i)
__m128 _mm_insert_ps(__m128, __m128, const int)
__m128i
Definition: emmintrin.h:53
__m128d _mm_blendv_pd(__m128d, __m128d, __m128d)
int _mm_extract_epi32(__m128i, const int)
__m128 _mm_round_ps(__m128, int)
__m128
Definition: xmmintrin.h:75
int _mm_testc_si128(__m128i, __m128i)
__m128d _mm_round_pd(__m128d, int)
int _mm_extract_ps(__m128, const int)
__m128i _mm_cvtepi8_epi64(__m128i)
__m128d _mm_dp_pd(__m128d, __m128d, const int)
__m128i _mm_max_epi8(__m128i, __m128i)
__m128i _mm_stream_load_si128(const __m128i *)
__m128d _mm_blend_pd(__m128d, __m128d, const int)
__m128d _mm_round_sd(__m128d, __m128d, int)
__m128i _mm_max_epu32(__m128i, __m128i)
__m128i _mm_cvtepi16_epi64(__m128i)
__m128i _mm_min_epu16(__m128i, __m128i)
__m128i _mm_min_epu32(__m128i, __m128i)
__m128i _mm_insert_epi8(__m128i, int, const int)
__m128i _mm_max_epu16(__m128i, __m128i)
__m128i _mm_mul_epi32(__m128i, __m128i)
__m128i _mm_mullo_epi32(__m128i, __m128i)
__m128 _mm_blendv_ps(__m128, __m128, __m128)
__m128i _mm_cvtepu8_epi32(__m128i)
__m128 _mm_dp_ps(__m128, __m128, const int)