STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Classes | Macros | Functions | Variables
dvec.h File Reference
#include <immintrin.h>
#include <fvec.h>
#include <vcruntime.h>

Go to the source code of this file.

Classes

class  M128
 
class  I128vec1
 
class  I64vec2
 
class  I32vec4
 
class  Is32vec4
 
class  Iu32vec4
 
class  I16vec8
 
class  Is16vec8
 
class  Iu16vec8
 
class  I8vec16
 
class  Is8vec16
 
class  Iu8vec16
 
class  F64vec2
 
class  F32vec8
 
class  F64vec4
 

Macros

#define _VEC_ASSERT(_Expression)   (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
 
#define _f64vec2_abs_mask   ((F64vec2)__f64vec2_abs_mask_cheat.m)
 
#define _MM_16UB(element, vector)   (*((unsigned char*)&##vector + ##element))
 
#define _MM_16B(element, vector)   (*((signed char*)&##vector + ##element))
 
#define _MM_8UW(element, vector)   (*((unsigned short*)&##vector + ##element))
 
#define _MM_8W(element, vector)   (*((short*)&##vector + ##element))
 
#define _MM_4UDW(element, vector)   (*((unsigned int*)&##vector + ##element))
 
#define _MM_4DW(element, vector)   (*((int*)&##vector + ##element))
 
#define _MM_2QW(element, vector)   (*((__int64*)&##vector + ##element))
 
#define IVEC128_LOGICALS(vect, element)
 
#define IVEC128_ADD_SUB(vect, element, opsize)
 
#define IVEC128_SELECT(vect12, vect34, element, selop)
 
#define F64vec2_COMP(op)   friend F64vec2 cmp##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_cmp##op##_pd(_A,_B); }
 
#define F64vec2_COMI(op)   friend int comi##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_comi##op##_sd(_A,_B); }
 
#define F64vec2_UCOMI(op)   friend int ucomi##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_ucomi##op##_sd(_A,_B); }
 
#define F64vec2_SELECT(op)
 

Functions

void __cdecl _wassert (_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
 
const __m128i get_mask128 ()
 
M128 operator& (const M128 &_A, const M128 &_B)
 
M128 operator| (const M128 &_A, const M128 &_B)
 
M128 operator^ (const M128 &_A, const M128 &_B)
 
M128 andnot (const M128 &_A, const M128 &_B)
 
I64vec2 unpack_low (const I64vec2 &_A, const I64vec2 &_B)
 
I64vec2 unpack_high (const I64vec2 &_A, const I64vec2 &_B)
 
I32vec4 cmpeq (const I32vec4 &_A, const I32vec4 &_B)
 
I32vec4 cmpneq (const I32vec4 &_A, const I32vec4 &_B)
 
I32vec4 unpack_low (const I32vec4 &_A, const I32vec4 &_B)
 
I32vec4 unpack_high (const I32vec4 &_A, const I32vec4 &_B)
 
Is32vec4 cmpeq (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is32vec4 cmpneq (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is32vec4 cmpgt (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is32vec4 cmplt (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is32vec4 unpack_low (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is32vec4 unpack_high (const Is32vec4 &_A, const Is32vec4 &_B)
 
I64vec2 operator* (const Iu32vec4 &_A, const Iu32vec4 &_B)
 
Iu32vec4 cmpeq (const Iu32vec4 &_A, const Iu32vec4 &_B)
 
Iu32vec4 cmpneq (const Iu32vec4 &_A, const Iu32vec4 &_B)
 
Iu32vec4 unpack_low (const Iu32vec4 &_A, const Iu32vec4 &_B)
 
Iu32vec4 unpack_high (const Iu32vec4 &_A, const Iu32vec4 &_B)
 
I16vec8 operator* (const I16vec8 &_A, const I16vec8 &_B)
 
I16vec8 cmpeq (const I16vec8 &_A, const I16vec8 &_B)
 
I16vec8 cmpneq (const I16vec8 &_A, const I16vec8 &_B)
 
I16vec8 unpack_low (const I16vec8 &_A, const I16vec8 &_B)
 
I16vec8 unpack_high (const I16vec8 &_A, const I16vec8 &_B)
 
Is16vec8 operator* (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 cmpeq (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 cmpneq (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 cmpgt (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 cmplt (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 unpack_low (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 unpack_high (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 mul_high (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is32vec4 mul_add (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 sat_add (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 sat_sub (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 simd_max (const Is16vec8 &_A, const Is16vec8 &_B)
 
Is16vec8 simd_min (const Is16vec8 &_A, const Is16vec8 &_B)
 
Iu16vec8 operator* (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 cmpeq (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 cmpneq (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 unpack_low (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 unpack_high (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 sat_add (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 sat_sub (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
Iu16vec8 simd_avg (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
I16vec8 mul_high (const Iu16vec8 &_A, const Iu16vec8 &_B)
 
I8vec16 cmpeq (const I8vec16 &_A, const I8vec16 &_B)
 
I8vec16 cmpneq (const I8vec16 &_A, const I8vec16 &_B)
 
I8vec16 unpack_low (const I8vec16 &_A, const I8vec16 &_B)
 
I8vec16 unpack_high (const I8vec16 &_A, const I8vec16 &_B)
 
Is8vec16 cmpeq (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 cmpneq (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 cmpgt (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 cmplt (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 unpack_low (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 unpack_high (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 sat_add (const Is8vec16 &_A, const Is8vec16 &_B)
 
Is8vec16 sat_sub (const Is8vec16 &_A, const Is8vec16 &_B)
 
Iu8vec16 cmpeq (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 cmpneq (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 unpack_low (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 unpack_high (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 sat_add (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 sat_sub (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
I64vec2 sum_abs (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 simd_avg (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 simd_max (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Iu8vec16 simd_min (const Iu8vec16 &_A, const Iu8vec16 &_B)
 
Is16vec8 pack_sat (const Is32vec4 &_A, const Is32vec4 &_B)
 
Is8vec16 pack_sat (const Is16vec8 &_A, const Is16vec8 &_B)
 
Iu8vec16 packu_sat (const Is16vec8 &_A, const Is16vec8 &_B)
 
F64vec2 unpack_low (const F64vec2 &_A, const F64vec2 &_B)
 
F64vec2 unpack_high (const F64vec2 &_A, const F64vec2 &_B)
 
int move_mask (const F64vec2 &_A)
 
void loadu (F64vec2 &_A, double *_P)
 
void storeu (double *_P, const F64vec2 &_A)
 
void store_nta (double *_P, F64vec2 &_A)
 
 F64vec2_SELECT (eq) F64vec2_SELECT(lt) F64vec2_SELECT(le) F64vec2_SELECT(gt) F64vec2_SELECT(ge) F64vec2_SELECT(neq) F64vec2_SELECT(nlt) F64vec2_SELECT(nle) inline int F64vec2ToInt(const F64vec2 &_A)
 
F64vec2 F32vec4ToF64vec2 (const F32vec4 &_A)
 
F32vec4 F64vec2ToF32vec4 (const F64vec2 &_A)
 
F64vec2 IntToF64vec2 (const F64vec2 &_A, int _B)
 
F32vec8 unpack_low (const F32vec8 &_A, const F32vec8 &_B)
 
F32vec8 unpack_high (const F32vec8 &_A, const F32vec8 &_B)
 
int move_mask (const F32vec8 &_A)
 
void loadu (F32vec8 &_A, const float *_P)
 
void storeu (float *_P, const F32vec8 &_A)
 
void store_nta (float *_P, const F32vec8 &_A)
 
void maskload (F32vec8 &_A, const float *_P, const F32vec8 &_M)
 
void maskload (F32vec4 &_A, const float *_P, const F32vec4 &_M)
 
void maskstore (float *_P, const F32vec8 &_A, const F32vec8 &_M)
 
void maskstore (float *_P, const F32vec4 &_A, const F32vec4 &_M)
 
F32vec8 select_eq (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_lt (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_le (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_gt (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_ge (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_neq (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_nlt (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_nle (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_ngt (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F32vec8 select_nge (const F32vec8 &_A, const F32vec8 &_B, const F32vec8 &_C, const F32vec8 &_D)
 
F64vec4 unpack_low (const F64vec4 &_A, const F64vec4 &_B)
 
F64vec4 unpack_high (const F64vec4 &_A, const F64vec4 &_B)
 
int move_mask (const F64vec4 &_A)
 
void loadu (F64vec4 &_A, double *_P)
 
void storeu (double *_P, const F64vec4 &_A)
 
void store_nta (double *_P, const F64vec4 &_A)
 
void maskload (F64vec4 &_A, const double *_P, const F64vec4 &_M)
 
void maskload (F64vec2 &_A, const double *_P, const F64vec2 &_M)
 
void maskstore (double *_P, const F64vec4 &_A, const F64vec4 &_M)
 
void maskstore (double *_P, const F64vec2 &_A, const F64vec2 &_M)
 
F64vec4 select_eq (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_lt (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_le (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_gt (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_ge (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_neq (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_nlt (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_nle (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_ngt (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 select_nge (const F64vec4 &_A, const F64vec4 &_B, const F64vec4 &_C, const F64vec4 &_D)
 
F64vec4 F32vec4ToF64vec4 (const F32vec4 &_A)
 
F32vec4 F64vec4ToF32vec8 (const F64vec4 &_A)
 

Variables

union {
   int   i [4]
 
   __m128d   m
 
__f64vec2_abs_mask_cheat = {-1, 0x7fffffff, -1, 0x7fffffff}
 

Macro Definition Documentation

#define _f64vec2_abs_mask   ((F64vec2)__f64vec2_abs_mask_cheat.m)
#define _MM_16B (   element,
  vector 
)    (*((signed char*)&##vector + ##element))
#define _MM_16UB (   element,
  vector 
)    (*((unsigned char*)&##vector + ##element))
#define _MM_2QW (   element,
  vector 
)    (*((__int64*)&##vector + ##element))
#define _MM_4DW (   element,
  vector 
)    (*((int*)&##vector + ##element))
#define _MM_4UDW (   element,
  vector 
)    (*((unsigned int*)&##vector + ##element))
#define _MM_8UW (   element,
  vector 
)    (*((unsigned short*)&##vector + ##element))
#define _MM_8W (   element,
  vector 
)    (*((short*)&##vector + ##element))
#define _VEC_ASSERT (   _Expression)    (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
#define F64vec2_COMI (   op)    friend int comi##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_comi##op##_sd(_A,_B); }
#define F64vec2_COMP (   op)    friend F64vec2 cmp##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_cmp##op##_pd(_A,_B); }
#define F64vec2_SELECT (   op)
Value:
inline F64vec2 select_##op (const F64vec2 &_A, const F64vec2 &_B, const F64vec2 &_C, const F64vec2 &_D) \
{ \
F64vec2 _Mask = _mm_cmp##op##_pd(_A,_B); \
return( (_Mask & _C) | F64vec2((_mm_andnot_pd(_Mask,_D)))); \
}
Definition: dvec.h:917
return(_PAIR_TYPE(_FwdIt)(_First, _First))
__m128d _mm_andnot_pd(__m128d _A, __m128d _B)
#define F64vec2_UCOMI (   op)    friend int ucomi##op (const F64vec2 &_A, const F64vec2 &_B) { return _mm_ucomi##op##_sd(_A,_B); }
#define IVEC128_ADD_SUB (   vect,
  element,
  opsize 
)
Value:
inline I##vect##vec##element operator+ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_add_##opsize( _A,_B); } \
inline I##vect##vec##element operator- (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_sub_##opsize( _A,_B); }
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator-(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:845
std::enable_if< details::_Is_extent_or_index< _Tuple_type< _Rank > >::value, _Tuple_type< _Rank > >::type operator+(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:836
#define IVEC128_LOGICALS (   vect,
  element 
)
Value:
inline I##vect##vec##element operator& (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_and_si128( _A,_B); } \
inline I##vect##vec##element operator| (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_or_si128( _A,_B); } \
inline I##vect##vec##element operator^ (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_xor_si128( _A,_B); } \
inline I##vect##vec##element andnot (const I##vect##vec##element &_A, const I##vect##vec##element &_B) \
{ return _mm_andnot_si128( _A,_B); }
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
M128 operator&(const M128 &_A, const M128 &_B)
Definition: dvec.h:144
M128 andnot(const M128 &_A, const M128 &_B)
Definition: dvec.h:147
__m128i _mm_or_si128(__m128i _A, __m128i _B)
M128 operator|(const M128 &_A, const M128 &_B)
Definition: dvec.h:145
M128 operator^(const M128 &_A, const M128 &_B)
Definition: dvec.h:146
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_and_si128(__m128i _A, __m128i _B)
#define IVEC128_SELECT (   vect12,
  vect34,
  element,
  selop 
)
Value:
inline I##vect34##vec##element select_##selop ( \
const I##vect12##vec##element &_A, \
const I##vect12##vec##element &_B, \
const I##vect34##vec##element &_C, \
const I##vect34##vec##element &_D) \
{ \
I##vect12##vec##element _Mask = cmp##selop(_A,_B); \
return ( I##vect34##vec##element (_Mask & _C ) | \
I##vect34##vec##element ((_mm_andnot_si128(_Mask, _D )))); \
}
return(_PAIR_TYPE(_FwdIt)(_First, _First))
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)

Function Documentation

void __cdecl _wassert ( _In_z_ const wchar_t _Message,
_In_z_ const wchar_t _File,
_In_ unsigned  _Line 
)
M128 andnot ( const M128 _A,
const M128 _B 
)
inline
147 { return _mm_andnot_si128(_A,_B); }
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I32vec4 cmpeq ( const I32vec4 _A,
const I32vec4 _B 
)
inline
255 { return _mm_cmpeq_epi32(_A,_B); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
Is32vec4 cmpeq ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
322 { return _mm_cmpeq_epi32(_A,_B); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
Iu32vec4 cmpeq ( const Iu32vec4 _A,
const Iu32vec4 _B 
)
inline
392 { return _mm_cmpeq_epi32(_A,_B); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
I16vec8 cmpeq ( const I16vec8 _A,
const I16vec8 _B 
)
inline
435 { return _mm_cmpeq_epi16(_A,_B); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
Is16vec8 cmpeq ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
513 { return _mm_cmpeq_epi16(_A,_B); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
Iu16vec8 cmpeq ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
600 { return _mm_cmpeq_epi16(_A,_B); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
I8vec16 cmpeq ( const I8vec16 _A,
const I8vec16 _B 
)
inline
641 { return _mm_cmpeq_epi8(_A,_B); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is8vec16 cmpeq ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
713 { return _mm_cmpeq_epi8(_A,_B); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Iu8vec16 cmpeq ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
793 { return _mm_cmpeq_epi8(_A,_B); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is32vec4 cmpgt ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
324 { return _mm_cmpgt_epi32(_A,_B); }
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
Is16vec8 cmpgt ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
515 { return _mm_cmpgt_epi16(_A,_B); }
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
Is8vec16 cmpgt ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
715 { return _mm_cmpgt_epi8(_A,_B); }
__m128i _mm_cmpgt_epi8(__m128i _A, __m128i _B)
Is32vec4 cmplt ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
325 { return _mm_cmpgt_epi32(_B,_A); }
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
Is16vec8 cmplt ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
516 { return _mm_cmpgt_epi16(_B,_A); }
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
Is8vec16 cmplt ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
716 { return _mm_cmplt_epi8(_A,_B); }
__m128i _mm_cmplt_epi8(__m128i _A, __m128i _B)
I32vec4 cmpneq ( const I32vec4 _A,
const I32vec4 _B 
)
inline
256 { return _mm_andnot_si128(_mm_cmpeq_epi32(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Is32vec4 cmpneq ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
323 { return _mm_andnot_si128(_mm_cmpeq_epi32(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Iu32vec4 cmpneq ( const Iu32vec4 _A,
const Iu32vec4 _B 
)
inline
393 { return _mm_andnot_si128(_mm_cmpeq_epi32(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I16vec8 cmpneq ( const I16vec8 _A,
const I16vec8 _B 
)
inline
436 { return _mm_andnot_si128(_mm_cmpeq_epi16(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Is16vec8 cmpneq ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
514 { return _mm_andnot_si128(_mm_cmpeq_epi16(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Iu16vec8 cmpneq ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
601 { return _mm_andnot_si128(_mm_cmpeq_epi16(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I8vec16 cmpneq ( const I8vec16 _A,
const I8vec16 _B 
)
inline
642 { return _mm_andnot_si128(_mm_cmpeq_epi8(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is8vec16 cmpneq ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
714 { return _mm_andnot_si128(_mm_cmpeq_epi8(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Iu8vec16 cmpneq ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
794 { return _mm_andnot_si128(_mm_cmpeq_epi8(_A,_B), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:104
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
F64vec2 F32vec4ToF64vec2 ( const F32vec4 _A)
inline
1106 {
1107  return _mm_cvtps_pd(_A);
1108 }
__m128d _mm_cvtps_pd(__m128 _A)
F64vec4 F32vec4ToF64vec4 ( const F32vec4 _A)
inline
1611  {
1612  return _mm256_cvtps_pd(_A); }
__m256d __cdecl _mm256_cvtps_pd(__m128)
F64vec2_SELECT ( eq  ) const
1098 {
1099 
1100  return _mm_cvttsd_si32(_A);
1101 
1102 }
int _mm_cvttsd_si32(__m128d _A)
F32vec4 F64vec2ToF32vec4 ( const F64vec2 _A)
inline
1112 {
1113  return _mm_cvtpd_ps(_A);
1114 }
__m128 _mm_cvtpd_ps(__m128d _A)
F32vec4 F64vec4ToF32vec8 ( const F64vec4 _A)
inline
1615  {
1616  return _mm256_cvtpd_ps(_A); }
__m128 __cdecl _mm256_cvtpd_ps(__m256d)
const __m128i get_mask128 ( )
inline
105 {
106  static const __m128i _Mask128 = _mm_set1_epi64x(0xffffffffffffffffi64);
107  return _Mask128;
108 }
__m128i _mm_set1_epi64x(__int64 i)
__m128i
Definition: emmintrin.h:53
F64vec2 IntToF64vec2 ( const F64vec2 _A,
int  _B 
)
inline
1118 {
1119  return _mm_cvtsi32_sd(_A,_B);
1120 }
__m128d _mm_cvtsi32_sd(__m128d _A, int _B)
void loadu ( F64vec2 _A,
double *  _P 
)
inline
1068 { _A = _mm_loadu_pd(_P); }
__m128d _mm_loadu_pd(double const *_Dp)
void loadu ( F32vec8 _A,
const float *  _P 
)
inline
1334  {
1335  _A = _mm256_loadu_ps(_P); }
__m256 __cdecl _mm256_loadu_ps(float const *)
void loadu ( F64vec4 _A,
double *  _P 
)
inline
1547  {
1548  _A = _mm256_loadu_pd(_P); }
__m256d __cdecl _mm256_loadu_pd(double const *)
void maskload ( F32vec8 _A,
const float *  _P,
const F32vec8 _M 
)
inline
1350  {
1351  _A = _mm256_maskload_ps(_P, _mm256_castps_si256(_M)); }
__m256i __cdecl _mm256_castps_si256(__m256)
__m256 __cdecl _mm256_maskload_ps(float const *, __m256i)
void maskload ( F32vec4 _A,
const float *  _P,
const F32vec4 _M 
)
inline
1353  {
1354  _A = _mm_maskload_ps(_P, _mm_castps_si128(_M)); }
__m128 __cdecl _mm_maskload_ps(float const *, __m128i)
__m128i _mm_castps_si128(__m128)
void maskload ( F64vec4 _A,
const double *  _P,
const F64vec4 _M 
)
inline
1563  {
1564  _A = _mm256_maskload_pd(_P, _mm256_castpd_si256(_M)); }
__m256i __cdecl _mm256_castpd_si256(__m256d)
__m256d __cdecl _mm256_maskload_pd(double const *, __m256i)
void maskload ( F64vec2 _A,
const double *  _P,
const F64vec2 _M 
)
inline
1566  {
1567  _A = _mm_maskload_pd(_P, _mm_castpd_si128(_M)); }
__m128i _mm_castpd_si128(__m128d)
__m128d __cdecl _mm_maskload_pd(double const *, __m128i)
void maskstore ( float *  _P,
const F32vec8 _A,
const F32vec8 _M 
)
inline
1357  {
__m256i __cdecl _mm256_castps_si256(__m256)
void __cdecl _mm256_maskstore_ps(float *, __m256i, __m256)
void maskstore ( float *  _P,
const F32vec4 _A,
const F32vec4 _M 
)
inline
1360  {
1361  _mm_maskstore_ps(_P, _mm_castps_si128(_M), _A); }
void __cdecl _mm_maskstore_ps(float *, __m128i, __m128)
__m128i _mm_castps_si128(__m128)
void maskstore ( double *  _P,
const F64vec4 _A,
const F64vec4 _M 
)
inline
1570  {
__m256i __cdecl _mm256_castpd_si256(__m256d)
void __cdecl _mm256_maskstore_pd(double *, __m256i, __m256d)
void maskstore ( double *  _P,
const F64vec2 _A,
const F64vec2 _M 
)
inline
1573  {
1574  _mm_maskstore_pd(_P, _mm_castpd_si128(_M), _A); }
__m128i _mm_castpd_si128(__m128d)
void __cdecl _mm_maskstore_pd(double *, __m128i, __m128d)
int move_mask ( const F64vec2 _A)
inline
1062 { return _mm_movemask_pd(_A);}
int _mm_movemask_pd(__m128d _A)
int move_mask ( const F32vec8 _A)
inline
1328  {
1329  return _mm256_movemask_ps(_A); }
int __cdecl _mm256_movemask_ps(__m256)
int move_mask ( const F64vec4 _A)
inline
1541  {
1542  return _mm256_movemask_pd(_A); }
int __cdecl _mm256_movemask_pd(__m256d)
Is32vec4 mul_add ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
522 { return _mm_madd_epi16(_A,_B);}
__m128i _mm_madd_epi16(__m128i _A, __m128i _B)
Is16vec8 mul_high ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
521 { return _mm_mulhi_epi16(_A,_B); }
__m128i _mm_mulhi_epi16(__m128i _A, __m128i _B)
I16vec8 mul_high ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
610 { return _mm_mulhi_epu16(_A,_B); }
__m128i _mm_mulhi_epu16(__m128i _A, __m128i _B)
M128 operator& ( const M128 _A,
const M128 _B 
)
inline
144 { return _mm_and_si128(_A,_B); }
__m128i _mm_and_si128(__m128i _A, __m128i _B)
I64vec2 operator* ( const Iu32vec4 _A,
const Iu32vec4 _B 
)
inline
391 { return _mm_mul_epu32(_A,_B); }
__m128i _mm_mul_epu32(__m128i _A, __m128i _B)
I16vec8 operator* ( const I16vec8 _A,
const I16vec8 _B 
)
inline
433 { return _mm_mullo_epi16(_A,_B); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
Is16vec8 operator* ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
509 { return _mm_mullo_epi16(_A,_B); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
Iu16vec8 operator* ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
597 { return _mm_mullo_epi16(_A,_B); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
M128 operator^ ( const M128 _A,
const M128 _B 
)
inline
146 { return _mm_xor_si128(_A,_B); }
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
M128 operator| ( const M128 _A,
const M128 _B 
)
inline
145 { return _mm_or_si128(_A,_B); }
__m128i _mm_or_si128(__m128i _A, __m128i _B)
Is16vec8 pack_sat ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
810 { return _mm_packs_epi32(_A,_B); }
__m128i _mm_packs_epi32(__m128i _A, __m128i _B)
Is8vec16 pack_sat ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
811 { return _mm_packs_epi16(_A,_B); }
__m128i _mm_packs_epi16(__m128i _A, __m128i _B)
Iu8vec16 packu_sat ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
812 { return _mm_packus_epi16(_A,_B);}
__m128i _mm_packus_epi16(__m128i _A, __m128i _B)
Is16vec8 sat_add ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
524 { return _mm_adds_epi16(_A,_B); }
__m128i _mm_adds_epi16(__m128i _A, __m128i _B)
Iu16vec8 sat_add ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
606 { return _mm_adds_epu16(_A,_B); }
__m128i _mm_adds_epu16(__m128i _A, __m128i _B)
Is8vec16 sat_add ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
721 { return _mm_adds_epi8(_A,_B); }
__m128i _mm_adds_epi8(__m128i _A, __m128i _B)
Iu8vec16 sat_add ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
799 { return _mm_adds_epu8(_A,_B); }
__m128i _mm_adds_epu8(__m128i _A, __m128i _B)
Is16vec8 sat_sub ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
525 { return _mm_subs_epi16(_A,_B); }
__m128i _mm_subs_epi16(__m128i _A, __m128i _B)
Iu16vec8 sat_sub ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
607 { return _mm_subs_epu16(_A,_B); }
__m128i _mm_subs_epu16(__m128i _A, __m128i _B)
Is8vec16 sat_sub ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
722 { return _mm_subs_epi8(_A,_B); }
__m128i _mm_subs_epi8(__m128i _A, __m128i _B)
Iu8vec16 sat_sub ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
800 { return _mm_subs_epu8(_A,_B); }
__m128i _mm_subs_epu8(__m128i _A, __m128i _B)
F32vec8 select_eq ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1365  {
1366  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_EQ_OQ)); }
#define _CMP_EQ_OQ
Definition: immintrin.h:60
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_eq ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1578  {
1579  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_EQ_OQ)); }
#define _CMP_EQ_OQ
Definition: immintrin.h:60
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_ge ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1377  {
1378  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_GE_OS)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
#define _CMP_GE_OS
Definition: immintrin.h:75
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_ge ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1590  {
1591  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_GE_OS)); }
#define _CMP_GE_OS
Definition: immintrin.h:75
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_gt ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1374  {
1375  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_GT_OS)); }
#define _CMP_GT_OS
Definition: immintrin.h:76
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_gt ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1587  {
1588  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_GT_OS)); }
#define _CMP_GT_OS
Definition: immintrin.h:76
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_le ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1371  {
1372  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_LE_OS)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
#define _CMP_LE_OS
Definition: immintrin.h:62
F64vec4 select_le ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1584  {
1585  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_LE_OS)); }
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
#define _CMP_LE_OS
Definition: immintrin.h:62
F32vec8 select_lt ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1368  {
1369  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_LT_OS)); }
#define _CMP_LT_OS
Definition: immintrin.h:61
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_lt ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1581  {
1582  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_LT_OS)); }
#define _CMP_LT_OS
Definition: immintrin.h:61
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_neq ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1380  {
1381  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_NEQ_UQ)); }
#define _CMP_NEQ_UQ
Definition: immintrin.h:64
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_neq ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1593  {
1594  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_NEQ_UQ)); }
#define _CMP_NEQ_UQ
Definition: immintrin.h:64
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nge ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1392  {
1393  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_NGE_US)); }
#define _CMP_NGE_US
Definition: immintrin.h:70
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nge ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1605  {
1606  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_NGE_US)); }
#define _CMP_NGE_US
Definition: immintrin.h:70
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_ngt ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1389  {
1390  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_NGT_US)); }
#define _CMP_NGT_US
Definition: immintrin.h:72
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_ngt ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1602  {
1603  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_NGT_US)); }
#define _CMP_NGT_US
Definition: immintrin.h:72
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nle ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1386  {
1387  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_NLE_US)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
#define _CMP_NLE_US
Definition: immintrin.h:66
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nle ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1599  {
1600  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_NLE_US)); }
#define _CMP_NLE_US
Definition: immintrin.h:66
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nlt ( const F32vec8 _A,
const F32vec8 _B,
const F32vec8 _C,
const F32vec8 _D 
)
inline
1383  {
1384  return _mm256_blendv_ps(_D, _C, _mm256_cmp_ps(_A, _B, _CMP_NLT_US)); }
#define _CMP_NLT_US
Definition: immintrin.h:65
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nlt ( const F64vec4 _A,
const F64vec4 _B,
const F64vec4 _C,
const F64vec4 _D 
)
inline
1596  {
1597  return _mm256_blendv_pd(_D, _C, _mm256_cmp_pd(_A, _B, _CMP_NLT_US)); }
#define _CMP_NLT_US
Definition: immintrin.h:65
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
Iu16vec8 simd_avg ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
609 { return _mm_avg_epu16(_A,_B); }
__m128i _mm_avg_epu16(__m128i _A, __m128i _B)
Iu8vec16 simd_avg ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
804 { return _mm_avg_epu8(_A,_B); }
__m128i _mm_avg_epu8(__m128i _A, __m128i _B)
Is16vec8 simd_max ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
527 { return _mm_max_epi16(_A,_B); }
__m128i _mm_max_epi16(__m128i _A, __m128i _B)
Iu8vec16 simd_max ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
805 { return _mm_max_epu8(_A,_B); }
__m128i _mm_max_epu8(__m128i _A, __m128i _B)
Is16vec8 simd_min ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
528 { return _mm_min_epi16(_A,_B); }
__m128i _mm_min_epi16(__m128i _A, __m128i _B)
Iu8vec16 simd_min ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
806 { return _mm_min_epu8(_A,_B); }
__m128i _mm_min_epu8(__m128i _A, __m128i _B)
void store_nta ( double *  _P,
F64vec2 _A 
)
inline
1078 { _mm_stream_pd(_P,_A);}
void _mm_stream_pd(double *_Dp, __m128d _A)
void store_nta ( float *  _P,
const F32vec8 _A 
)
inline
1344  {
1345  _mm256_stream_ps(_P, _A); }
void __cdecl _mm256_stream_ps(float *, __m256)
void store_nta ( double *  _P,
const F64vec4 _A 
)
inline
1557  {
1558  _mm256_stream_pd(_P, _A); }
void __cdecl _mm256_stream_pd(double *, __m256d)
void storeu ( double *  _P,
const F64vec2 _A 
)
inline
1072 { _mm_storeu_pd(_P, _A); }
void _mm_storeu_pd(double *_Dp, __m128d _A)
void storeu ( float *  _P,
const F32vec8 _A 
)
inline
1338  {
1339  _mm256_storeu_ps(_P, _A); }
void __cdecl _mm256_storeu_ps(float *, __m256)
void storeu ( double *  _P,
const F64vec4 _A 
)
inline
1551  {
1552  _mm256_storeu_pd(_P, _A); }
void __cdecl _mm256_storeu_pd(double *, __m256d)
I64vec2 sum_abs ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
802 { return _mm_sad_epu8(_A,_B); }
__m128i _mm_sad_epu8(__m128i _A, __m128i _B)
I64vec2 unpack_high ( const I64vec2 _A,
const I64vec2 _B 
)
inline
223 {return _mm_unpackhi_epi64(_A,_B); }
__m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B)
I32vec4 unpack_high ( const I32vec4 _A,
const I32vec4 _B 
)
inline
259 { return _mm_unpackhi_epi32(_A,_B); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
Is32vec4 unpack_high ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
329 { return _mm_unpackhi_epi32(_A,_B); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
Iu32vec4 unpack_high ( const Iu32vec4 _A,
const Iu32vec4 _B 
)
inline
396 { return _mm_unpackhi_epi32(_A,_B); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
I16vec8 unpack_high ( const I16vec8 _A,
const I16vec8 _B 
)
inline
439 { return _mm_unpackhi_epi16(_A,_B); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
Is16vec8 unpack_high ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
519 { return _mm_unpackhi_epi16(_A,_B); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
Iu16vec8 unpack_high ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
604 { return _mm_unpackhi_epi16(_A,_B); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
I8vec16 unpack_high ( const I8vec16 _A,
const I8vec16 _B 
)
inline
645 { return _mm_unpackhi_epi8(_A,_B); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
Is8vec16 unpack_high ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
719 { return _mm_unpackhi_epi8(_A,_B); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
Iu8vec16 unpack_high ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
797 { return _mm_unpackhi_epi8(_A,_B); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
F64vec2 unpack_high ( const F64vec2 _A,
const F64vec2 _B 
)
inline
1058 { return _mm_unpackhi_pd(_A, _B); }
__m128d _mm_unpackhi_pd(__m128d _A, __m128d _B)
F32vec8 unpack_high ( const F32vec8 _A,
const F32vec8 _B 
)
inline
1324  {
1325  return _mm256_unpackhi_ps(_A, _B); }
__m256 __cdecl _mm256_unpackhi_ps(__m256, __m256)
F64vec4 unpack_high ( const F64vec4 _A,
const F64vec4 _B 
)
inline
1537  {
1538  return _mm256_unpackhi_pd(_A, _B); }
__m256d __cdecl _mm256_unpackhi_pd(__m256d, __m256d)
I64vec2 unpack_low ( const I64vec2 _A,
const I64vec2 _B 
)
inline
222 {return _mm_unpacklo_epi64(_A,_B); }
__m128i _mm_unpacklo_epi64(__m128i _A, __m128i _B)
I32vec4 unpack_low ( const I32vec4 _A,
const I32vec4 _B 
)
inline
258 { return _mm_unpacklo_epi32(_A,_B); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
Is32vec4 unpack_low ( const Is32vec4 _A,
const Is32vec4 _B 
)
inline
328 { return _mm_unpacklo_epi32(_A,_B); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
Iu32vec4 unpack_low ( const Iu32vec4 _A,
const Iu32vec4 _B 
)
inline
395 { return _mm_unpacklo_epi32(_A,_B); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
I16vec8 unpack_low ( const I16vec8 _A,
const I16vec8 _B 
)
inline
438 { return _mm_unpacklo_epi16(_A,_B); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
Is16vec8 unpack_low ( const Is16vec8 _A,
const Is16vec8 _B 
)
inline
518 { return _mm_unpacklo_epi16(_A,_B); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
Iu16vec8 unpack_low ( const Iu16vec8 _A,
const Iu16vec8 _B 
)
inline
603 { return _mm_unpacklo_epi16(_A,_B); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
I8vec16 unpack_low ( const I8vec16 _A,
const I8vec16 _B 
)
inline
644 { return _mm_unpacklo_epi8(_A,_B); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
Is8vec16 unpack_low ( const Is8vec16 _A,
const Is8vec16 _B 
)
inline
718 { return _mm_unpacklo_epi8(_A,_B); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
Iu8vec16 unpack_low ( const Iu8vec16 _A,
const Iu8vec16 _B 
)
inline
796 { return _mm_unpacklo_epi8(_A,_B); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
F64vec2 unpack_low ( const F64vec2 _A,
const F64vec2 _B 
)
inline
1054 { return _mm_unpacklo_pd(_A, _B); }
__m128d _mm_unpacklo_pd(__m128d _A, __m128d _B)
F32vec8 unpack_low ( const F32vec8 _A,
const F32vec8 _B 
)
inline
1320  {
1321  return _mm256_unpacklo_ps(_A, _B); }
__m256 __cdecl _mm256_unpacklo_ps(__m256, __m256)
F64vec4 unpack_low ( const F64vec4 _A,
const F64vec4 _B 
)
inline
1533  {
1534  return _mm256_unpacklo_pd(_A, _B); }
__m256d __cdecl _mm256_unpacklo_pd(__m256d, __m256d)

Variable Documentation

const { ... } __f64vec2_abs_mask_cheat
int i[4]