STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
smmintrin.h
Go to the documentation of this file.
1 /***
2 *** Copyright (C) 1985-2007 Intel Corporation. All rights reserved.
3 ***
4 *** The information and source code contained herein is the exclusive
5 *** property of Intel Corporation and may not be disclosed, examined
6 *** or reproduced in whole or in part without explicit written authorization
7 *** from the company.
8 ***
9 ****/
10 
11 /*
12  * smmintrin.h
13  *
14  * Principal header file for Intel(R) Core(TM) 2 Duo processor
15  * SSE4.1 intrinsics
16  */
17 
18 #pragma once
19 #ifndef __midl
20 #ifndef _INCLUDED_SMM
21 #define _INCLUDED_SMM
22 
23 #if defined (_M_CEE_PURE)
24  #error ERROR: EMM intrinsics not supported in the pure mode!
25 #else /* defined (_M_CEE_PURE) */
26 
27 #include <tmmintrin.h>
28 
29 
30 /*
31  * Rounding mode macros
32  */
33 
34 #define _MM_FROUND_TO_NEAREST_INT 0x00
35 #define _MM_FROUND_TO_NEG_INF 0x01
36 #define _MM_FROUND_TO_POS_INF 0x02
37 #define _MM_FROUND_TO_ZERO 0x03
38 #define _MM_FROUND_CUR_DIRECTION 0x04
39 
40 #define _MM_FROUND_RAISE_EXC 0x00
41 #define _MM_FROUND_NO_EXC 0x08
42 
43 #define _MM_FROUND_NINT _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC
44 #define _MM_FROUND_FLOOR _MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC
45 #define _MM_FROUND_CEIL _MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC
46 #define _MM_FROUND_TRUNC _MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC
47 #define _MM_FROUND_RINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC
48 #define _MM_FROUND_NEARBYINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC
49 
50 /*
51  * MACRO functions for ceil/floor intrinsics
52  */
53 
54 #define _mm_ceil_pd(val) _mm_round_pd((val), _MM_FROUND_CEIL)
55 #define _mm_ceil_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_CEIL)
56 
57 #define _mm_floor_pd(val) _mm_round_pd((val), _MM_FROUND_FLOOR)
58 #define _mm_floor_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_FLOOR)
59 
60 #define _mm_ceil_ps(val) _mm_round_ps((val), _MM_FROUND_CEIL)
61 #define _mm_ceil_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_CEIL)
62 
63 #define _mm_floor_ps(val) _mm_round_ps((val), _MM_FROUND_FLOOR)
64 #define _mm_floor_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_FLOOR)
65 
66 #define _mm_test_all_zeros(mask, val) _mm_testz_si128((mask), (val))
67 
68 /*
69  * MACRO functions for packed integer 128-bit comparison intrinsics.
70  */
71 
72 #define _mm_test_all_ones(val) \
73  _mm_testc_si128((val), _mm_cmpeq_epi32((val),(val)))
74 
75 #define _mm_test_mix_ones_zeros(mask, val) _mm_testnzc_si128((mask), (val))
76 
77 #if __cplusplus
78 extern "C" {
79 #endif /* __cplusplus */
80 
81  // Integer blend instructions - select data from 2 sources
82  // using constant/variable mask
83 
84  extern __m128i _mm_blend_epi16 (__m128i v1, __m128i v2,
85  const int mask);
86  extern __m128i _mm_blendv_epi8 (__m128i v1, __m128i v2, __m128i mask);
87 
88  // Float single precision blend instructions - select data
89  // from 2 sources using constant/variable mask
90 
91  extern __m128 _mm_blend_ps (__m128 v1, __m128 v2, const int mask);
92  extern __m128 _mm_blendv_ps(__m128 v1, __m128 v2, __m128 v3);
93 
94  // Float double precision blend instructions - select data
95  // from 2 sources using constant/variable mask
96 
97  extern __m128d _mm_blend_pd (__m128d v1, __m128d v2, const int mask);
98  extern __m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3);
99 
100  // Dot product instructions with mask-defined summing and zeroing
101  // of result's parts
102 
103  extern __m128 _mm_dp_ps(__m128 val1, __m128 val2, const int mask);
104  extern __m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask);
105 
106  // Packed integer 64-bit comparison, zeroing or filling with ones
107  // corresponding parts of result
108 
109  extern __m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2);
110 
111  // Min/max packed integer instructions
112 
113  extern __m128i _mm_min_epi8 (__m128i val1, __m128i val2);
114  extern __m128i _mm_max_epi8 (__m128i val1, __m128i val2);
115 
116  extern __m128i _mm_min_epu16(__m128i val1, __m128i val2);
117  extern __m128i _mm_max_epu16(__m128i val1, __m128i val2);
118 
119  extern __m128i _mm_min_epi32(__m128i val1, __m128i val2);
120  extern __m128i _mm_max_epi32(__m128i val1, __m128i val2);
121  extern __m128i _mm_min_epu32(__m128i val1, __m128i val2);
122  extern __m128i _mm_max_epu32(__m128i val1, __m128i val2);
123 
124  // Packed integer 32-bit multiplication with truncation
125  // of upper halves of results
126 
127  extern __m128i _mm_mullo_epi32(__m128i a, __m128i b);
128 
129  // Packed integer 32-bit multiplication of 2 pairs of operands
130  // producing two 64-bit results
131 
132  extern __m128i _mm_mul_epi32(__m128i a, __m128i b);
133 
134  // Packed integer 128-bit bitwise comparison.
135  // return 1 if (val 'and' mask) == 0
136 
137  extern int _mm_testz_si128(__m128i mask, __m128i val);
138 
139  // Packed integer 128-bit bitwise comparison.
140  // return 1 if (val 'and_not' mask) == 0
141 
142  extern int _mm_testc_si128(__m128i mask, __m128i val);
143 
144  // Packed integer 128-bit bitwise comparison
145  // ZF = ((val 'and' mask) == 0) CF = ((val 'and_not' mask) == 0)
146  // return 1 if both ZF and CF are 0
147 
148  extern int _mm_testnzc_si128(__m128i mask, __m128i s2);
149 
150  // Insert single precision float into packed single precision
151  // array element selected by index.
152  // The bits [7-6] of the 3d parameter define src index,
153  // the bits [5-4] define dst index, and bits [3-0] define zeroing
154  // mask for dst
155 
156  extern __m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx);
157 
158  // Helper macro to create ndx-parameter value for _mm_insert_ps
159 
160 #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) \
161  (((srcField)<<6) | ((dstField)<<4) | (zeroMask))
162 
163  // Extract binary representation of single precision float from
164  // packed single precision array element selected by index
165 
166  extern int _mm_extract_ps(__m128 src, const int ndx);
167 
168  // Extract single precision float from packed single precision
169  // array element selected by index into dest
170 
171 #define _MM_EXTRACT_FLOAT(dest, src, ndx) \
172  *((int*)&(dest)) = _mm_extract_ps((src), (ndx))
173 
174  // Extract specified single precision float element
175  // into the lower part of __m128
176 
177 #define _MM_PICK_OUT_PS(src, num) \
178  _mm_insert_ps(_mm_setzero_ps(), (src), \
179  _MM_MK_INSERTPS_NDX((num), 0, 0x0e))
180 
181  // Insert integer into packed integer array element
182  // selected by index
183 
184  extern __m128i _mm_insert_epi8 (__m128i dst, int s, const int ndx);
185  extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx);
186 
187 #if defined (_M_X64)
188  extern __m128i _mm_insert_epi64(__m128i dst, __int64 s, const int ndx);
189 #endif /* defined (_M_X64) */
190  // Extract integer from packed integer array element
191  // selected by index
192 
193  extern int _mm_extract_epi8 (__m128i src, const int ndx);
194  extern int _mm_extract_epi32(__m128i src, const int ndx);
195 
196 #if defined (_M_X64)
197  extern __int64 _mm_extract_epi64(__m128i src, const int ndx);
198 #endif /* defined (_M_X64) */
199 
200  // Horizontal packed word minimum and its index in
201  // result[15:0] and result[18:16] respectively
202 
203  extern __m128i _mm_minpos_epu16(__m128i shortValues);
204 
205  // Packed/single float double precision rounding
206 
207  extern __m128d _mm_round_pd(__m128d val, int iRoundMode);
208  extern __m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode);
209 
210  // Packed/single float single precision rounding
211 
212  extern __m128 _mm_round_ps(__m128 val, int iRoundMode);
213  extern __m128 _mm_round_ss(__m128 dst, __m128 val, int iRoundMode);
214 
215  // Packed integer sign-extension
216 
217  extern __m128i _mm_cvtepi8_epi32 (__m128i byteValues);
218  extern __m128i _mm_cvtepi16_epi32(__m128i shortValues);
219  extern __m128i _mm_cvtepi8_epi64 (__m128i byteValues);
220  extern __m128i _mm_cvtepi32_epi64(__m128i intValues);
221  extern __m128i _mm_cvtepi16_epi64(__m128i shortValues);
222  extern __m128i _mm_cvtepi8_epi16 (__m128i byteValues);
223 
224  // Packed integer zero-extension
225 
226  extern __m128i _mm_cvtepu8_epi32 (__m128i byteValues);
227  extern __m128i _mm_cvtepu16_epi32(__m128i shortValues);
228  extern __m128i _mm_cvtepu8_epi64 (__m128i shortValues);
229  extern __m128i _mm_cvtepu32_epi64(__m128i intValues);
230  extern __m128i _mm_cvtepu16_epi64(__m128i shortValues);
231  extern __m128i _mm_cvtepu8_epi16 (__m128i byteValues);
232 
233 
234  // Pack 8 double words from 2 operands into 8 words of result
235  // with unsigned saturation
236 
237  extern __m128i _mm_packus_epi32(__m128i val1, __m128i val2);
238 
239  // Sum absolute 8-bit integer difference of adjacent groups of 4 byte
240  // integers in operands. Starting offsets within operands are
241  // determined by mask
242 
243  extern __m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk);
244 
245  /*
246  * Load double quadword using non-temporal aligned hint
247  */
248 
250 
251 #if defined __cplusplus
252 }; /* End "C" */
253 #endif /* defined __cplusplus */
254 
255 #endif /* defined (_M_CEE_PURE) */
256 
257 #endif /* _INCLUDED_SMM */
258 #endif /* __midl */
__m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode)
__m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx)
__m128 _mm_dp_ps(__m128 val1, __m128 val2, const int mask)
__m128i _mm_cvtepi32_epi64(__m128i intValues)
__m128i _mm_minpos_epu16(__m128i shortValues)
__m128i _mm_cvtepi8_epi16(__m128i byteValues)
__m128i _mm_cvtepu32_epi64(__m128i intValues)
__m128i _mm_min_epu32(__m128i val1, __m128i val2)
__m128i _mm_blendv_epi8(__m128i v1, __m128i v2, __m128i mask)
__m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask)
__m128 _mm_blendv_ps(__m128 v1, __m128 v2, __m128 v3)
__m128d
Definition: emmintrin.h:48
__m128i _mm_stream_load_si128(__m128i *v1)
__m128 _mm_round_ss(__m128 dst, __m128 val, int iRoundMode)
__m128i _mm_cvtepi16_epi32(__m128i shortValues)
__m128 _mm_blend_ps(__m128 v1, __m128 v2, const int mask)
int _mm_testnzc_si128(__m128i mask, __m128i s2)
__m128 _mm_round_ps(__m128 val, int iRoundMode)
__m128i _mm_cvtepu8_epi16(__m128i byteValues)
__m128i _mm_min_epi32(__m128i val1, __m128i val2)
__m128i _mm_max_epu32(__m128i val1, __m128i val2)
int _mm_extract_epi32(__m128i src, const int ndx)
__m128i _mm_cvtepu16_epi32(__m128i shortValues)
int _mm_extract_epi8(__m128i src, const int ndx)
__m128i _mm_cvtepi8_epi32(__m128i byteValues)
__m128i _mm_cvtepu8_epi64(__m128i shortValues)
__m128d _mm_round_pd(__m128d val, int iRoundMode)
__m128i _mm_insert_epi32(__m128i dst, int s, const int ndx)
__m128i _mm_min_epi8(__m128i val1, __m128i val2)
__m128i _mm_mul_epi32(__m128i a, __m128i b)
__m128i
Definition: emmintrin.h:44
__m128
Definition: xmmintrin.h:70
__m128i _mm_mullo_epi32(__m128i a, __m128i b)
__m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2)
__m128i _mm_max_epi8(__m128i val1, __m128i val2)
__m128i _mm_blend_epi16(__m128i v1, __m128i v2, const int mask)
int _mm_extract_ps(__m128 src, const int ndx)
__m128d _mm_blend_pd(__m128d v1, __m128d v2, const int mask)
__m128i _mm_min_epu16(__m128i val1, __m128i val2)
__m128i _mm_cvtepu8_epi32(__m128i byteValues)
__m128i _mm_cvtepi16_epi64(__m128i shortValues)
__m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3)
__m128i _mm_max_epi32(__m128i val1, __m128i val2)
int _mm_testc_si128(__m128i mask, __m128i val)
__m128i _mm_cvtepu16_epi64(__m128i shortValues)
__m128i _mm_max_epu16(__m128i val1, __m128i val2)
int _mm_testz_si128(__m128i mask, __m128i val)
__m128i _mm_insert_epi8(__m128i dst, int s, const int ndx)
__m128i _mm_packus_epi32(__m128i val1, __m128i val2)
__m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk)
__m128i _mm_cvtepi8_epi64(__m128i byteValues)