STLdoc/VS2013/smmintrin_8h_source.html

 /***

 *** Copyright (C) 1985-2007 Intel Corporation.  All rights reserved.

 ***

 *** The information and source code contained herein is the exclusive

 *** property of Intel Corporation and may not be disclosed, examined

 *** or reproduced in whole or in part without explicit written authorization

 *** from the company.

 ***

 ****/


 /*

  * smmintrin.h

  *

  * Principal header file for Intel(R) Core(TM) 2 Duo processor

  * SSE4.1 intrinsics

  */


 #pragma once

 #ifndef __midl

 #ifndef _INCLUDED_SMM

 #define _INCLUDED_SMM


 #if defined (_M_CEE_PURE)

         #error ERROR: EMM intrinsics not supported in the pure mode!

 #else  /* defined (_M_CEE_PURE) */


 #include <tmmintrin.h>


 /*

  * Rounding mode macros

  */


 #define _MM_FROUND_TO_NEAREST_INT    0x00

 #define _MM_FROUND_TO_NEG_INF        0x01

 #define _MM_FROUND_TO_POS_INF        0x02

 #define _MM_FROUND_TO_ZERO           0x03

 #define _MM_FROUND_CUR_DIRECTION     0x04


 #define _MM_FROUND_RAISE_EXC         0x00

 #define _MM_FROUND_NO_EXC            0x08


 #define _MM_FROUND_NINT      _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC

 #define _MM_FROUND_FLOOR     _MM_FROUND_TO_NEG_INF     | _MM_FROUND_RAISE_EXC

 #define _MM_FROUND_CEIL      _MM_FROUND_TO_POS_INF     | _MM_FROUND_RAISE_EXC

 #define _MM_FROUND_TRUNC     _MM_FROUND_TO_ZERO        | _MM_FROUND_RAISE_EXC

 #define _MM_FROUND_RINT      _MM_FROUND_CUR_DIRECTION  | _MM_FROUND_RAISE_EXC

 #define _MM_FROUND_NEARBYINT _MM_FROUND_CUR_DIRECTION  | _MM_FROUND_NO_EXC


 /*

  * MACRO functions for ceil/floor intrinsics

  */


 #define _mm_ceil_pd(val)       _mm_round_pd((val), _MM_FROUND_CEIL)

 #define _mm_ceil_sd(dst, val)  _mm_round_sd((dst), (val), _MM_FROUND_CEIL)


 #define _mm_floor_pd(val)      _mm_round_pd((val), _MM_FROUND_FLOOR)

 #define _mm_floor_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_FLOOR)


 #define _mm_ceil_ps(val)       _mm_round_ps((val), _MM_FROUND_CEIL)

 #define _mm_ceil_ss(dst, val)  _mm_round_ss((dst), (val), _MM_FROUND_CEIL)


 #define _mm_floor_ps(val)      _mm_round_ps((val), _MM_FROUND_FLOOR)

 #define _mm_floor_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_FLOOR)


 #define _mm_test_all_zeros(mask, val)      _mm_testz_si128((mask), (val))


 /*

  * MACRO functions for packed integer 128-bit comparison intrinsics.

  */


 #define _mm_test_all_ones(val) \

               _mm_testc_si128((val), _mm_cmpeq_epi32((val),(val)))


 #define _mm_test_mix_ones_zeros(mask, val) _mm_testnzc_si128((mask), (val))


 #if __cplusplus

 extern "C" {

 #endif  /* __cplusplus */


         // Integer blend instructions - select data from 2 sources

         // using constant/variable mask


         extern __m128i _mm_blend_epi16 (__m128i v1, __m128i v2,

                                         const int mask);

         extern __m128i _mm_blendv_epi8 (__m128i v1, __m128i v2, __m128i mask);


         // Float single precision blend instructions - select data

         // from 2 sources using constant/variable mask


         extern __m128  _mm_blend_ps (__m128  v1, __m128  v2, const int mask);

         extern __m128  _mm_blendv_ps(__m128  v1, __m128  v2, __m128 v3);


         // Float double precision blend instructions - select data

         // from 2 sources using constant/variable mask


         extern __m128d _mm_blend_pd (__m128d v1, __m128d v2, const int mask);

         extern __m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3);


         // Dot product instructions with mask-defined summing and zeroing

         // of result's parts


         extern __m128  _mm_dp_ps(__m128  val1, __m128  val2, const int mask);

         extern __m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask);


         // Packed integer 64-bit comparison, zeroing or filling with ones

         // corresponding parts of result


         extern __m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2);


         // Min/max packed integer instructions


         extern __m128i _mm_min_epi8 (__m128i val1, __m128i val2);

         extern __m128i _mm_max_epi8 (__m128i val1, __m128i val2);


         extern __m128i _mm_min_epu16(__m128i val1, __m128i val2);

         extern __m128i _mm_max_epu16(__m128i val1, __m128i val2);


         extern __m128i _mm_min_epi32(__m128i val1, __m128i val2);

         extern __m128i _mm_max_epi32(__m128i val1, __m128i val2);

         extern __m128i _mm_min_epu32(__m128i val1, __m128i val2);

         extern __m128i _mm_max_epu32(__m128i val1, __m128i val2);


         // Packed integer 32-bit multiplication with truncation

         // of upper halves of results


         extern __m128i _mm_mullo_epi32(__m128i a, __m128i b);


         // Packed integer 32-bit multiplication of 2 pairs of operands

         // producing two 64-bit results


         extern __m128i _mm_mul_epi32(__m128i a, __m128i b);


         // Packed integer 128-bit bitwise comparison.

         // return 1 if (val 'and' mask) == 0


         extern int _mm_testz_si128(__m128i mask, __m128i val);


         // Packed integer 128-bit bitwise comparison.

         // return 1 if (val 'and_not' mask) == 0


         extern int _mm_testc_si128(__m128i mask, __m128i val);


         // Packed integer 128-bit bitwise comparison

         // ZF = ((val 'and' mask) == 0)  CF = ((val 'and_not' mask) == 0)

         // return 1 if both ZF and CF are 0


         extern int _mm_testnzc_si128(__m128i mask, __m128i s2);


         // Insert single precision float into packed single precision

         // array element selected by index.

         // The bits [7-6] of the 3d parameter define src index,

         // the bits [5-4] define dst index, and bits [3-0] define zeroing

         // mask for dst


         extern __m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx);


         // Helper macro to create ndx-parameter value for _mm_insert_ps


 #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) \

         (((srcField)<<6) | ((dstField)<<4) | (zeroMask))


         // Extract binary representation of single precision float from

         // packed single precision array element selected by index


         extern int _mm_extract_ps(__m128 src, const int ndx);


         // Extract single precision float from packed single precision

         // array element selected by index into dest


 #define _MM_EXTRACT_FLOAT(dest, src, ndx) \

         *((int*)&(dest)) = _mm_extract_ps((src), (ndx))


         // Extract specified single precision float element

         // into the lower part of __m128


 #define _MM_PICK_OUT_PS(src, num) \

         _mm_insert_ps(_mm_setzero_ps(), (src), \

                       _MM_MK_INSERTPS_NDX((num), 0, 0x0e))


         // Insert integer into packed integer array element

         // selected by index


         extern __m128i _mm_insert_epi8 (__m128i dst, int s, const int ndx);

         extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx);


 #if defined (_M_X64)

         extern __m128i _mm_insert_epi64(__m128i dst, __int64 s, const int ndx);

 #endif  /* defined (_M_X64) */

         // Extract integer from packed integer array element

         // selected by index


         extern int   _mm_extract_epi8 (__m128i src, const int ndx);

         extern int   _mm_extract_epi32(__m128i src, const int ndx);


 #if defined (_M_X64)

         extern __int64 _mm_extract_epi64(__m128i src, const int ndx);

 #endif  /* defined (_M_X64) */


         // Horizontal packed word minimum and its index in

         // result[15:0] and result[18:16] respectively


         extern __m128i _mm_minpos_epu16(__m128i shortValues);


         // Packed/single float double precision rounding


         extern __m128d _mm_round_pd(__m128d val, int iRoundMode);

         extern __m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode);


         // Packed/single float single precision rounding


         extern __m128  _mm_round_ps(__m128  val, int iRoundMode);

         extern __m128  _mm_round_ss(__m128 dst, __m128  val, int iRoundMode);


         // Packed integer sign-extension


         extern __m128i _mm_cvtepi8_epi32 (__m128i byteValues);

         extern __m128i _mm_cvtepi16_epi32(__m128i shortValues);

         extern __m128i _mm_cvtepi8_epi64 (__m128i byteValues);

         extern __m128i _mm_cvtepi32_epi64(__m128i intValues);

         extern __m128i _mm_cvtepi16_epi64(__m128i shortValues);

         extern __m128i _mm_cvtepi8_epi16 (__m128i byteValues);


         // Packed integer zero-extension


         extern __m128i _mm_cvtepu8_epi32 (__m128i byteValues);

         extern __m128i _mm_cvtepu16_epi32(__m128i shortValues);

         extern __m128i _mm_cvtepu8_epi64 (__m128i shortValues);

         extern __m128i _mm_cvtepu32_epi64(__m128i intValues);

         extern __m128i _mm_cvtepu16_epi64(__m128i shortValues);

         extern __m128i _mm_cvtepu8_epi16 (__m128i byteValues);


         // Pack 8 double words from 2 operands into 8 words of result

         // with unsigned saturation


         extern __m128i _mm_packus_epi32(__m128i val1, __m128i val2);


         // Sum absolute 8-bit integer difference of adjacent groups of 4 byte

         // integers in operands. Starting offsets within operands are

         // determined by mask


         extern __m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk);


         /*

          * Load double quadword using non-temporal aligned hint

          */


         extern __m128i _mm_stream_load_si128(__m128i* v1);


 #if defined __cplusplus

 }; /* End "C" */

 #endif  /* defined __cplusplus */


 #endif  /* defined (_M_CEE_PURE) */


 #endif  /* _INCLUDED_SMM */

 #endif  /* __midl */

_mm_round_sd
__m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode)

_mm_insert_ps
__m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx)

_mm_dp_ps
__m128 _mm_dp_ps(__m128 val1, __m128 val2, const int mask)

_mm_cvtepi32_epi64
__m128i _mm_cvtepi32_epi64(__m128i intValues)

_mm_minpos_epu16
__m128i _mm_minpos_epu16(__m128i shortValues)

_mm_cvtepi8_epi16
__m128i _mm_cvtepi8_epi16(__m128i byteValues)

_mm_cvtepu32_epi64
__m128i _mm_cvtepu32_epi64(__m128i intValues)

_mm_min_epu32
__m128i _mm_min_epu32(__m128i val1, __m128i val2)

_mm_blendv_epi8
__m128i _mm_blendv_epi8(__m128i v1, __m128i v2, __m128i mask)

_mm_dp_pd
__m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask)

_mm_blendv_ps
__m128 _mm_blendv_ps(__m128 v1, __m128 v2, __m128 v3)

__m128d
__m128d
Definition: emmintrin.h:48

_mm_stream_load_si128
__m128i _mm_stream_load_si128(__m128i *v1)

_mm_round_ss
__m128 _mm_round_ss(__m128 dst, __m128 val, int iRoundMode)

_mm_cvtepi16_epi32
__m128i _mm_cvtepi16_epi32(__m128i shortValues)

tmmintrin.h

_mm_blend_ps
__m128 _mm_blend_ps(__m128 v1, __m128 v2, const int mask)

_mm_testnzc_si128
int _mm_testnzc_si128(__m128i mask, __m128i s2)

_mm_round_ps
__m128 _mm_round_ps(__m128 val, int iRoundMode)

_mm_cvtepu8_epi16
__m128i _mm_cvtepu8_epi16(__m128i byteValues)

_mm_min_epi32
__m128i _mm_min_epi32(__m128i val1, __m128i val2)

_mm_max_epu32
__m128i _mm_max_epu32(__m128i val1, __m128i val2)

_mm_extract_epi32
int _mm_extract_epi32(__m128i src, const int ndx)

_mm_cvtepu16_epi32
__m128i _mm_cvtepu16_epi32(__m128i shortValues)

_mm_extract_epi8
int _mm_extract_epi8(__m128i src, const int ndx)

_mm_cvtepi8_epi32
__m128i _mm_cvtepi8_epi32(__m128i byteValues)

_mm_cvtepu8_epi64
__m128i _mm_cvtepu8_epi64(__m128i shortValues)

_mm_round_pd
__m128d _mm_round_pd(__m128d val, int iRoundMode)

_mm_insert_epi32
__m128i _mm_insert_epi32(__m128i dst, int s, const int ndx)

_mm_min_epi8
__m128i _mm_min_epi8(__m128i val1, __m128i val2)

_mm_mul_epi32
__m128i _mm_mul_epi32(__m128i a, __m128i b)

__m128i
__m128i
Definition: emmintrin.h:44

__m128
__m128
Definition: xmmintrin.h:70

_mm_mullo_epi32
__m128i _mm_mullo_epi32(__m128i a, __m128i b)

_mm_cmpeq_epi64
__m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2)

_mm_max_epi8
__m128i _mm_max_epi8(__m128i val1, __m128i val2)

_mm_blend_epi16
__m128i _mm_blend_epi16(__m128i v1, __m128i v2, const int mask)

_mm_extract_ps
int _mm_extract_ps(__m128 src, const int ndx)

_mm_blend_pd
__m128d _mm_blend_pd(__m128d v1, __m128d v2, const int mask)

_mm_min_epu16
__m128i _mm_min_epu16(__m128i val1, __m128i val2)

_mm_cvtepu8_epi32
__m128i _mm_cvtepu8_epi32(__m128i byteValues)

_mm_cvtepi16_epi64
__m128i _mm_cvtepi16_epi64(__m128i shortValues)

_mm_blendv_pd
__m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3)

_mm_max_epi32
__m128i _mm_max_epi32(__m128i val1, __m128i val2)

_mm_testc_si128
int _mm_testc_si128(__m128i mask, __m128i val)

_mm_cvtepu16_epi64
__m128i _mm_cvtepu16_epi64(__m128i shortValues)

_mm_max_epu16
__m128i _mm_max_epu16(__m128i val1, __m128i val2)

_mm_testz_si128
int _mm_testz_si128(__m128i mask, __m128i val)

_mm_insert_epi8
__m128i _mm_insert_epi8(__m128i dst, int s, const int ndx)

_mm_packus_epi32
__m128i _mm_packus_epi32(__m128i val1, __m128i val2)

_mm_mpsadbw_epu8
__m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk)

_mm_cvtepi8_epi64
__m128i _mm_cvtepi8_epi64(__m128i byteValues)