STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Classes | Macros | Functions | Variables
dvec.h File Reference
#include <immintrin.h>
#include <fvec.h>
#include <crtdefs.h>

Go to the source code of this file.

Classes

class  M128
 
class  I128vec1
 
class  I64vec2
 
class  I32vec4
 
class  Is32vec4
 
class  Iu32vec4
 
class  I16vec8
 
class  Is16vec8
 
class  Iu16vec8
 
class  I8vec16
 
class  Is8vec16
 
class  Iu8vec16
 
class  F64vec2
 
class  F32vec8
 
class  F64vec4
 

Macros

#define _VEC_ASSERT(_Expression)   (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
 
#define _f64vec2_abs_mask   ((F64vec2)__f64vec2_abs_mask_cheat.m)
 
#define _MM_16UB(element, vector)   (*((unsigned char*)&##vector + ##element))
 
#define _MM_16B(element, vector)   (*((signed char*)&##vector + ##element))
 
#define _MM_8UW(element, vector)   (*((unsigned short*)&##vector + ##element))
 
#define _MM_8W(element, vector)   (*((short*)&##vector + ##element))
 
#define _MM_4UDW(element, vector)   (*((unsigned int*)&##vector + ##element))
 
#define _MM_4DW(element, vector)   (*((int*)&##vector + ##element))
 
#define _MM_2QW(element, vector)   (*((__int64*)&##vector + ##element))
 
#define IVEC128_LOGICALS(vect, element)
 
#define IVEC128_ADD_SUB(vect, element, opsize)
 
#define IVEC128_SELECT(vect12, vect34, element, selop)
 
#define F64vec2_COMP(op)   friend F64vec2 cmp##op (const F64vec2 &a, const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
 
#define F64vec2_COMI(op)   friend int comi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
 
#define F64vec2_UCOMI(op)   friend int ucomi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
 
#define F64vec2_SELECT(op)
 

Functions

_CRTIMP void __cdecl _wassert (_In_z_ const wchar_t *_Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line)
 
const __m128i get_mask128 ()
 
M128 operator& (const M128 &a, const M128 &b)
 
M128 operator| (const M128 &a, const M128 &b)
 
M128 operator^ (const M128 &a, const M128 &b)
 
M128 andnot (const M128 &a, const M128 &b)
 
I64vec2 unpack_low (const I64vec2 &a, const I64vec2 &b)
 
I64vec2 unpack_high (const I64vec2 &a, const I64vec2 &b)
 
I32vec4 cmpeq (const I32vec4 &a, const I32vec4 &b)
 
I32vec4 cmpneq (const I32vec4 &a, const I32vec4 &b)
 
I32vec4 unpack_low (const I32vec4 &a, const I32vec4 &b)
 
I32vec4 unpack_high (const I32vec4 &a, const I32vec4 &b)
 
Is32vec4 cmpeq (const Is32vec4 &a, const Is32vec4 &b)
 
Is32vec4 cmpneq (const Is32vec4 &a, const Is32vec4 &b)
 
Is32vec4 cmpgt (const Is32vec4 &a, const Is32vec4 &b)
 
Is32vec4 cmplt (const Is32vec4 &a, const Is32vec4 &b)
 
Is32vec4 unpack_low (const Is32vec4 &a, const Is32vec4 &b)
 
Is32vec4 unpack_high (const Is32vec4 &a, const Is32vec4 &b)
 
I64vec2 operator* (const Iu32vec4 &a, const Iu32vec4 &b)
 
Iu32vec4 cmpeq (const Iu32vec4 &a, const Iu32vec4 &b)
 
Iu32vec4 cmpneq (const Iu32vec4 &a, const Iu32vec4 &b)
 
Iu32vec4 unpack_low (const Iu32vec4 &a, const Iu32vec4 &b)
 
Iu32vec4 unpack_high (const Iu32vec4 &a, const Iu32vec4 &b)
 
I16vec8 operator* (const I16vec8 &a, const I16vec8 &b)
 
I16vec8 cmpeq (const I16vec8 &a, const I16vec8 &b)
 
I16vec8 cmpneq (const I16vec8 &a, const I16vec8 &b)
 
I16vec8 unpack_low (const I16vec8 &a, const I16vec8 &b)
 
I16vec8 unpack_high (const I16vec8 &a, const I16vec8 &b)
 
Is16vec8 operator* (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 cmpeq (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 cmpneq (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 cmpgt (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 cmplt (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 unpack_low (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 unpack_high (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 mul_high (const Is16vec8 &a, const Is16vec8 &b)
 
Is32vec4 mul_add (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 sat_add (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 sat_sub (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 simd_max (const Is16vec8 &a, const Is16vec8 &b)
 
Is16vec8 simd_min (const Is16vec8 &a, const Is16vec8 &b)
 
Iu16vec8 operator* (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 cmpeq (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 cmpneq (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 unpack_low (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 unpack_high (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 sat_add (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 sat_sub (const Iu16vec8 &a, const Iu16vec8 &b)
 
Iu16vec8 simd_avg (const Iu16vec8 &a, const Iu16vec8 &b)
 
I16vec8 mul_high (const Iu16vec8 &a, const Iu16vec8 &b)
 
I8vec16 cmpeq (const I8vec16 &a, const I8vec16 &b)
 
I8vec16 cmpneq (const I8vec16 &a, const I8vec16 &b)
 
I8vec16 unpack_low (const I8vec16 &a, const I8vec16 &b)
 
I8vec16 unpack_high (const I8vec16 &a, const I8vec16 &b)
 
Is8vec16 cmpeq (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 cmpneq (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 cmpgt (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 cmplt (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 unpack_low (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 unpack_high (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 sat_add (const Is8vec16 &a, const Is8vec16 &b)
 
Is8vec16 sat_sub (const Is8vec16 &a, const Is8vec16 &b)
 
Iu8vec16 cmpeq (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 cmpneq (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 unpack_low (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 unpack_high (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 sat_add (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 sat_sub (const Iu8vec16 &a, const Iu8vec16 &b)
 
I64vec2 sum_abs (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 simd_avg (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 simd_max (const Iu8vec16 &a, const Iu8vec16 &b)
 
Iu8vec16 simd_min (const Iu8vec16 &a, const Iu8vec16 &b)
 
Is16vec8 pack_sat (const Is32vec4 &a, const Is32vec4 &b)
 
Is8vec16 pack_sat (const Is16vec8 &a, const Is16vec8 &b)
 
Iu8vec16 packu_sat (const Is16vec8 &a, const Is16vec8 &b)
 
F64vec2 unpack_low (const F64vec2 &a, const F64vec2 &b)
 
F64vec2 unpack_high (const F64vec2 &a, const F64vec2 &b)
 
int move_mask (const F64vec2 &a)
 
void loadu (F64vec2 &a, double *p)
 
void storeu (double *p, const F64vec2 &a)
 
void store_nta (double *p, F64vec2 &a)
 
 F64vec2_SELECT (eq) F64vec2_SELECT(lt) F64vec2_SELECT(le) F64vec2_SELECT(gt) F64vec2_SELECT(ge) F64vec2_SELECT(neq) F64vec2_SELECT(nlt) F64vec2_SELECT(nle) inline int F64vec2ToInt(const F64vec2 &a)
 
F64vec2 F32vec4ToF64vec2 (const F32vec4 &a)
 
F32vec4 F64vec2ToF32vec4 (const F64vec2 &a)
 
F64vec2 IntToF64vec2 (const F64vec2 &a, int b)
 
F32vec8 unpack_low (const F32vec8 &a, const F32vec8 &b)
 
F32vec8 unpack_high (const F32vec8 &a, const F32vec8 &b)
 
int move_mask (const F32vec8 &a)
 
void loadu (F32vec8 &a, const float *p)
 
void storeu (float *p, const F32vec8 &a)
 
void store_nta (float *p, const F32vec8 &a)
 
void maskload (F32vec8 &a, const float *p, const F32vec8 &m)
 
void maskload (F32vec4 &a, const float *p, const F32vec4 &m)
 
void maskstore (float *p, const F32vec8 &a, const F32vec8 &m)
 
void maskstore (float *p, const F32vec4 &a, const F32vec4 &m)
 
F32vec8 select_eq (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_lt (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_le (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_gt (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_ge (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_neq (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_nlt (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_nle (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_ngt (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F32vec8 select_nge (const F32vec8 &a, const F32vec8 &b, const F32vec8 &c, const F32vec8 &d)
 
F64vec4 unpack_low (const F64vec4 &a, const F64vec4 &b)
 
F64vec4 unpack_high (const F64vec4 &a, const F64vec4 &b)
 
int move_mask (const F64vec4 &a)
 
void loadu (F64vec4 &a, double *p)
 
void storeu (double *p, const F64vec4 &a)
 
void store_nta (double *p, const F64vec4 &a)
 
void maskload (F64vec4 &a, const double *p, const F64vec4 &m)
 
void maskload (F64vec2 &a, const double *p, const F64vec2 &m)
 
void maskstore (double *p, const F64vec4 &a, const F64vec4 &m)
 
void maskstore (double *p, const F64vec2 &a, const F64vec2 &m)
 
F64vec4 select_eq (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_lt (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_le (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_gt (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_ge (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_neq (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_nlt (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_nle (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_ngt (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 select_nge (const F64vec4 &a, const F64vec4 &b, const F64vec4 &c, const F64vec4 &d)
 
F64vec4 F32vec4ToF64vec4 (const F32vec4 &a)
 
F32vec4 F64vec4ToF32vec8 (const F64vec4 &a)
 

Variables

union {
   int   i [4]
 
   __m128d   m
 
__f64vec2_abs_mask_cheat = {0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff}
 

Macro Definition Documentation

#define _f64vec2_abs_mask   ((F64vec2)__f64vec2_abs_mask_cheat.m)
#define _MM_16B (   element,
  vector 
)    (*((signed char*)&##vector + ##element))
#define _MM_16UB (   element,
  vector 
)    (*((unsigned char*)&##vector + ##element))
#define _MM_2QW (   element,
  vector 
)    (*((__int64*)&##vector + ##element))
#define _MM_4DW (   element,
  vector 
)    (*((int*)&##vector + ##element))
#define _MM_4UDW (   element,
  vector 
)    (*((unsigned int*)&##vector + ##element))
#define _MM_8UW (   element,
  vector 
)    (*((unsigned short*)&##vector + ##element))
#define _MM_8W (   element,
  vector 
)    (*((short*)&##vector + ##element))
#define _VEC_ASSERT (   _Expression)    (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
#define F64vec2_COMI (   op)    friend int comi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
#define F64vec2_COMP (   op)    friend F64vec2 cmp##op (const F64vec2 &a, const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
#define F64vec2_SELECT (   op)
Value:
inline F64vec2 select_##op (const F64vec2 &a, const F64vec2 &b, const F64vec2 &c, const F64vec2 &d) \
{ \
F64vec2 mask = _mm_cmp##op##_pd(a,b); \
return( (mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); \
}
Definition: dvec.h:919
return(_PAIR_TYPE(_FwdIt)(_First, _First))
__m128d _mm_andnot_pd(__m128d _A, __m128d _B)
#define F64vec2_UCOMI (   op)    friend int ucomi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
#define IVEC128_ADD_SUB (   vect,
  element,
  opsize 
)
Value:
inline I##vect##vec##element operator+ (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_add_##opsize( a,b); } \
inline I##vect##vec##element operator- (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_sub_##opsize( a,b); }
_Tuple_type< _Rank > operator+(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:822
_Tuple_type< _Rank > operator-(const _Tuple_type< _Rank > &_Lhs, const _Tuple_type< _Rank > &_Rhs) __GPU
Definition: amp.h:830
#define I
Definition: complex.h:70
#define IVEC128_LOGICALS (   vect,
  element 
)
Value:
inline I##vect##vec##element operator& (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_and_si128( a,b); } \
inline I##vect##vec##element operator| (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_or_si128( a,b); } \
inline I##vect##vec##element operator^ (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_xor_si128( a,b); } \
inline I##vect##vec##element andnot (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_andnot_si128( a,b); }
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
M128 operator|(const M128 &a, const M128 &b)
Definition: dvec.h:147
M128 andnot(const M128 &a, const M128 &b)
Definition: dvec.h:149
M128 operator^(const M128 &a, const M128 &b)
Definition: dvec.h:148
__m128i _mm_or_si128(__m128i _A, __m128i _B)
M128 operator&(const M128 &a, const M128 &b)
Definition: dvec.h:146
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
#define I
Definition: complex.h:70
__m128i _mm_and_si128(__m128i _A, __m128i _B)
#define IVEC128_SELECT (   vect12,
  vect34,
  element,
  selop 
)
Value:
inline I##vect34##vec##element select_##selop ( \
const I##vect12##vec##element &a, \
const I##vect12##vec##element &b, \
const I##vect34##vec##element &c, \
const I##vect34##vec##element &d) \
{ \
I##vect12##vec##element mask = cmp##selop(a,b); \
return ( I##vect34##vec##element (mask & c ) | \
I##vect34##vec##element ((_mm_andnot_si128(mask, d )))); \
}
return(_PAIR_TYPE(_FwdIt)(_First, _First))
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
#define I
Definition: complex.h:70

Function Documentation

_CRTIMP void __cdecl _wassert ( _In_z_ const wchar_t _Message,
_In_z_ const wchar_t _File,
_In_ unsigned  _Line 
)
M128 andnot ( const M128 a,
const M128 b 
)
inline
149 { return _mm_andnot_si128(a,b); }
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I32vec4 cmpeq ( const I32vec4 a,
const I32vec4 b 
)
inline
257 { return _mm_cmpeq_epi32(a,b); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
Is32vec4 cmpeq ( const Is32vec4 a,
const Is32vec4 b 
)
inline
324 { return _mm_cmpeq_epi32(a,b); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
Iu32vec4 cmpeq ( const Iu32vec4 a,
const Iu32vec4 b 
)
inline
394 { return _mm_cmpeq_epi32(a,b); }
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
I16vec8 cmpeq ( const I16vec8 a,
const I16vec8 b 
)
inline
437 { return _mm_cmpeq_epi16(a,b); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
Is16vec8 cmpeq ( const Is16vec8 a,
const Is16vec8 b 
)
inline
515 { return _mm_cmpeq_epi16(a,b); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
Iu16vec8 cmpeq ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
602 { return _mm_cmpeq_epi16(a,b); }
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
I8vec16 cmpeq ( const I8vec16 a,
const I8vec16 b 
)
inline
643 { return _mm_cmpeq_epi8(a,b); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is8vec16 cmpeq ( const Is8vec16 a,
const Is8vec16 b 
)
inline
715 { return _mm_cmpeq_epi8(a,b); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Iu8vec16 cmpeq ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
795 { return _mm_cmpeq_epi8(a,b); }
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is32vec4 cmpgt ( const Is32vec4 a,
const Is32vec4 b 
)
inline
326 { return _mm_cmpgt_epi32(a,b); }
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
Is16vec8 cmpgt ( const Is16vec8 a,
const Is16vec8 b 
)
inline
517 { return _mm_cmpgt_epi16(a,b); }
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
Is8vec16 cmpgt ( const Is8vec16 a,
const Is8vec16 b 
)
inline
717 { return _mm_cmpgt_epi8(a,b); }
__m128i _mm_cmpgt_epi8(__m128i _A, __m128i _B)
Is32vec4 cmplt ( const Is32vec4 a,
const Is32vec4 b 
)
inline
327 { return _mm_cmpgt_epi32(b,a); }
__m128i _mm_cmpgt_epi32(__m128i _A, __m128i _B)
Is16vec8 cmplt ( const Is16vec8 a,
const Is16vec8 b 
)
inline
518 { return _mm_cmpgt_epi16(b,a); }
__m128i _mm_cmpgt_epi16(__m128i _A, __m128i _B)
Is8vec16 cmplt ( const Is8vec16 a,
const Is8vec16 b 
)
inline
718 { return _mm_cmplt_epi8(a,b); }
__m128i _mm_cmplt_epi8(__m128i _A, __m128i _B)
I32vec4 cmpneq ( const I32vec4 a,
const I32vec4 b 
)
inline
258 { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Is32vec4 cmpneq ( const Is32vec4 a,
const Is32vec4 b 
)
inline
325 { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Iu32vec4 cmpneq ( const Iu32vec4 a,
const Iu32vec4 b 
)
inline
395 { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi32(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I16vec8 cmpneq ( const I16vec8 a,
const I16vec8 b 
)
inline
438 { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Is16vec8 cmpneq ( const Is16vec8 a,
const Is16vec8 b 
)
inline
516 { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
Iu16vec8 cmpneq ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
603 { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_cmpeq_epi16(__m128i _A, __m128i _B)
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
I8vec16 cmpneq ( const I8vec16 a,
const I8vec16 b 
)
inline
644 { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Is8vec16 cmpneq ( const Is8vec16 a,
const Is8vec16 b 
)
inline
716 { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
Iu8vec16 cmpneq ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
796 { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b), get_mask128()); }
const __m128i get_mask128()
Definition: dvec.h:106
__m128i _mm_andnot_si128(__m128i _A, __m128i _B)
__m128i _mm_cmpeq_epi8(__m128i _A, __m128i _B)
F64vec2 F32vec4ToF64vec2 ( const F32vec4 a)
inline
1108 {
1109  return _mm_cvtps_pd(a);
1110 }
__m128d _mm_cvtps_pd(__m128 _A)
F64vec4 F32vec4ToF64vec4 ( const F32vec4 a)
inline
1613  {
1614  return _mm256_cvtps_pd(a); }
__m256d __cdecl _mm256_cvtps_pd(__m128)
F64vec2_SELECT ( eq  ) const
1100 {
1101 
1102  return _mm_cvttsd_si32(a);
1103 
1104 }
int _mm_cvttsd_si32(__m128d _A)
F32vec4 F64vec2ToF32vec4 ( const F64vec2 a)
inline
1114 {
1115  return _mm_cvtpd_ps(a);
1116 }
__m128 _mm_cvtpd_ps(__m128d _A)
F32vec4 F64vec4ToF32vec8 ( const F64vec4 a)
inline
1617  {
1618  return _mm256_cvtpd_ps(a); }
__m128 __cdecl _mm256_cvtpd_ps(__m256d)
const __m128i get_mask128 ( )
inline
107 {
108  static const __m128i mask128 = _mm_set1_epi64(M64(0xffffffffffffffffi64));
109  return mask128;
110 }
__m128i _mm_set1_epi64(__m64 _Q)
Definition: ivec.h:94
__m128i
Definition: emmintrin.h:44
F64vec2 IntToF64vec2 ( const F64vec2 a,
int  b 
)
inline
1120 {
1121  return _mm_cvtsi32_sd(a,b);
1122 }
__m128d _mm_cvtsi32_sd(__m128d _A, int _B)
void loadu ( F64vec2 a,
double *  p 
)
inline
1070 { a = _mm_loadu_pd(p); }
__m128d _mm_loadu_pd(double const *_Dp)
void loadu ( F32vec8 a,
const float *  p 
)
inline
1336  {
1337  a = _mm256_loadu_ps(p); }
__m256 __cdecl _mm256_loadu_ps(float const *)
void loadu ( F64vec4 a,
double *  p 
)
inline
1549  {
1550  a = _mm256_loadu_pd(p); }
__m256d __cdecl _mm256_loadu_pd(double const *)
void maskload ( F32vec8 a,
const float *  p,
const F32vec8 m 
)
inline
1352  {
__m256i __cdecl _mm256_castps_si256(__m256)
__m256 __cdecl _mm256_maskload_ps(float const *, __m256i)
void maskload ( F32vec4 a,
const float *  p,
const F32vec4 m 
)
inline
1355  {
1356  a = _mm_maskload_ps(p, _mm_castps_si128(m)); }
__m128 __cdecl _mm_maskload_ps(float const *, __m128i)
__m128i _mm_castps_si128(__m128)
void maskload ( F64vec4 a,
const double *  p,
const F64vec4 m 
)
inline
1565  {
__m256i __cdecl _mm256_castpd_si256(__m256d)
__m256d __cdecl _mm256_maskload_pd(double const *, __m256i)
void maskload ( F64vec2 a,
const double *  p,
const F64vec2 m 
)
inline
1568  {
1569  a = _mm_maskload_pd(p, _mm_castpd_si128(m)); }
__m128i _mm_castpd_si128(__m128d)
__m128d __cdecl _mm_maskload_pd(double const *, __m128i)
void maskstore ( float *  p,
const F32vec8 a,
const F32vec8 m 
)
inline
1359  {
__m256i __cdecl _mm256_castps_si256(__m256)
void __cdecl _mm256_maskstore_ps(float *, __m256i, __m256)
void maskstore ( float *  p,
const F32vec4 a,
const F32vec4 m 
)
inline
1362  {
1363  _mm_maskstore_ps(p, _mm_castps_si128(m), a); }
void __cdecl _mm_maskstore_ps(float *, __m128i, __m128)
__m128i _mm_castps_si128(__m128)
void maskstore ( double *  p,
const F64vec4 a,
const F64vec4 m 
)
inline
1572  {
__m256i __cdecl _mm256_castpd_si256(__m256d)
void __cdecl _mm256_maskstore_pd(double *, __m256i, __m256d)
void maskstore ( double *  p,
const F64vec2 a,
const F64vec2 m 
)
inline
1575  {
1576  _mm_maskstore_pd(p, _mm_castpd_si128(m), a); }
__m128i _mm_castpd_si128(__m128d)
void __cdecl _mm_maskstore_pd(double *, __m128i, __m128d)
int move_mask ( const F64vec2 a)
inline
1064 { return _mm_movemask_pd(a);}
int _mm_movemask_pd(__m128d _A)
int move_mask ( const F32vec8 a)
inline
1330  {
1331  return _mm256_movemask_ps(a); }
int __cdecl _mm256_movemask_ps(__m256)
int move_mask ( const F64vec4 a)
inline
1543  {
1544  return _mm256_movemask_pd(a); }
int __cdecl _mm256_movemask_pd(__m256d)
Is32vec4 mul_add ( const Is16vec8 a,
const Is16vec8 b 
)
inline
524 { return _mm_madd_epi16(a,b);}
__m128i _mm_madd_epi16(__m128i _A, __m128i _B)
Is16vec8 mul_high ( const Is16vec8 a,
const Is16vec8 b 
)
inline
523 { return _mm_mulhi_epi16(a,b); }
__m128i _mm_mulhi_epi16(__m128i _A, __m128i _B)
I16vec8 mul_high ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
612 { return _mm_mulhi_epu16(a,b); }
__m128i _mm_mulhi_epu16(__m128i _A, __m128i _B)
M128 operator& ( const M128 a,
const M128 b 
)
inline
146 { return _mm_and_si128(a,b); }
__m128i _mm_and_si128(__m128i _A, __m128i _B)
I64vec2 operator* ( const Iu32vec4 a,
const Iu32vec4 b 
)
inline
393 { return _mm_mul_epu32(a,b); }
__m128i _mm_mul_epu32(__m128i _A, __m128i _B)
I16vec8 operator* ( const I16vec8 a,
const I16vec8 b 
)
inline
435 { return _mm_mullo_epi16(a,b); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
Is16vec8 operator* ( const Is16vec8 a,
const Is16vec8 b 
)
inline
511 { return _mm_mullo_epi16(a,b); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
Iu16vec8 operator* ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
599 { return _mm_mullo_epi16(a,b); }
__m128i _mm_mullo_epi16(__m128i _A, __m128i _B)
M128 operator^ ( const M128 a,
const M128 b 
)
inline
148 { return _mm_xor_si128(a,b); }
__m128i _mm_xor_si128(__m128i _A, __m128i _B)
M128 operator| ( const M128 a,
const M128 b 
)
inline
147 { return _mm_or_si128(a,b); }
__m128i _mm_or_si128(__m128i _A, __m128i _B)
Is16vec8 pack_sat ( const Is32vec4 a,
const Is32vec4 b 
)
inline
812 { return _mm_packs_epi32(a,b); }
__m128i _mm_packs_epi32(__m128i _A, __m128i _B)
Is8vec16 pack_sat ( const Is16vec8 a,
const Is16vec8 b 
)
inline
813 { return _mm_packs_epi16(a,b); }
__m128i _mm_packs_epi16(__m128i _A, __m128i _B)
Iu8vec16 packu_sat ( const Is16vec8 a,
const Is16vec8 b 
)
inline
814 { return _mm_packus_epi16(a,b);}
__m128i _mm_packus_epi16(__m128i _A, __m128i _B)
Is16vec8 sat_add ( const Is16vec8 a,
const Is16vec8 b 
)
inline
526 { return _mm_adds_epi16(a,b); }
__m128i _mm_adds_epi16(__m128i _A, __m128i _B)
Iu16vec8 sat_add ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
608 { return _mm_adds_epu16(a,b); }
__m128i _mm_adds_epu16(__m128i _A, __m128i _B)
Is8vec16 sat_add ( const Is8vec16 a,
const Is8vec16 b 
)
inline
723 { return _mm_adds_epi8(a,b); }
__m128i _mm_adds_epi8(__m128i _A, __m128i _B)
Iu8vec16 sat_add ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
801 { return _mm_adds_epu8(a,b); }
__m128i _mm_adds_epu8(__m128i _A, __m128i _B)
Is16vec8 sat_sub ( const Is16vec8 a,
const Is16vec8 b 
)
inline
527 { return _mm_subs_epi16(a,b); }
__m128i _mm_subs_epi16(__m128i _A, __m128i _B)
Iu16vec8 sat_sub ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
609 { return _mm_subs_epu16(a,b); }
__m128i _mm_subs_epu16(__m128i _A, __m128i _B)
Is8vec16 sat_sub ( const Is8vec16 a,
const Is8vec16 b 
)
inline
724 { return _mm_subs_epi8(a,b); }
__m128i _mm_subs_epi8(__m128i _A, __m128i _B)
Iu8vec16 sat_sub ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
802 { return _mm_subs_epu8(a,b); }
__m128i _mm_subs_epu8(__m128i _A, __m128i _B)
F32vec8 select_eq ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1367  {
1368  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_EQ_OQ)); }
#define _CMP_EQ_OQ
Definition: immintrin.h:55
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_eq ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1580  {
1581  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_EQ_OQ)); }
#define _CMP_EQ_OQ
Definition: immintrin.h:55
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_ge ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1379  {
1380  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_GE_OS)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
#define _CMP_GE_OS
Definition: immintrin.h:70
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_ge ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1592  {
1593  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_GE_OS)); }
#define _CMP_GE_OS
Definition: immintrin.h:70
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_gt ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1376  {
1377  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_GT_OS)); }
#define _CMP_GT_OS
Definition: immintrin.h:71
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_gt ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1589  {
1590  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_GT_OS)); }
#define _CMP_GT_OS
Definition: immintrin.h:71
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_le ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1373  {
1374  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_LE_OS)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
#define _CMP_LE_OS
Definition: immintrin.h:57
F64vec4 select_le ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1586  {
1587  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_LE_OS)); }
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
#define _CMP_LE_OS
Definition: immintrin.h:57
F32vec8 select_lt ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1370  {
1371  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_LT_OS)); }
#define _CMP_LT_OS
Definition: immintrin.h:56
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_lt ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1583  {
1584  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_LT_OS)); }
#define _CMP_LT_OS
Definition: immintrin.h:56
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_neq ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1382  {
1383  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_NEQ_UQ)); }
#define _CMP_NEQ_UQ
Definition: immintrin.h:59
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_neq ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1595  {
1596  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_NEQ_UQ)); }
#define _CMP_NEQ_UQ
Definition: immintrin.h:59
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nge ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1394  {
1395  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_NGE_US)); }
#define _CMP_NGE_US
Definition: immintrin.h:65
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nge ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1607  {
1608  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_NGE_US)); }
#define _CMP_NGE_US
Definition: immintrin.h:65
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_ngt ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1391  {
1392  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_NGT_US)); }
#define _CMP_NGT_US
Definition: immintrin.h:67
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_ngt ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1604  {
1605  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_NGT_US)); }
#define _CMP_NGT_US
Definition: immintrin.h:67
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nle ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1388  {
1389  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_NLE_US)); }
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
#define _CMP_NLE_US
Definition: immintrin.h:61
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nle ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1601  {
1602  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_NLE_US)); }
#define _CMP_NLE_US
Definition: immintrin.h:61
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
F32vec8 select_nlt ( const F32vec8 a,
const F32vec8 b,
const F32vec8 c,
const F32vec8 d 
)
inline
1385  {
1386  return _mm256_blendv_ps(d, c, _mm256_cmp_ps(a, b, _CMP_NLT_US)); }
#define _CMP_NLT_US
Definition: immintrin.h:60
__m256 __cdecl _mm256_blendv_ps(__m256, __m256, __m256)
__m256 __cdecl _mm256_cmp_ps(__m256, __m256, const int)
F64vec4 select_nlt ( const F64vec4 a,
const F64vec4 b,
const F64vec4 c,
const F64vec4 d 
)
inline
1598  {
1599  return _mm256_blendv_pd(d, c, _mm256_cmp_pd(a, b, _CMP_NLT_US)); }
#define _CMP_NLT_US
Definition: immintrin.h:60
__m256d __cdecl _mm256_blendv_pd(__m256d, __m256d, __m256d)
__m256d __cdecl _mm256_cmp_pd(__m256d, __m256d, const int)
Iu16vec8 simd_avg ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
611 { return _mm_avg_epu16(a,b); }
__m128i _mm_avg_epu16(__m128i _A, __m128i _B)
Iu8vec16 simd_avg ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
806 { return _mm_avg_epu8(a,b); }
__m128i _mm_avg_epu8(__m128i _A, __m128i _B)
Is16vec8 simd_max ( const Is16vec8 a,
const Is16vec8 b 
)
inline
529 { return _mm_max_epi16(a,b); }
__m128i _mm_max_epi16(__m128i _A, __m128i _B)
Iu8vec16 simd_max ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
807 { return _mm_max_epu8(a,b); }
__m128i _mm_max_epu8(__m128i _A, __m128i _B)
Is16vec8 simd_min ( const Is16vec8 a,
const Is16vec8 b 
)
inline
530 { return _mm_min_epi16(a,b); }
__m128i _mm_min_epi16(__m128i _A, __m128i _B)
Iu8vec16 simd_min ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
808 { return _mm_min_epu8(a,b); }
__m128i _mm_min_epu8(__m128i _A, __m128i _B)
void store_nta ( double *  p,
F64vec2 a 
)
inline
1080 { _mm_stream_pd(p,a);}
void _mm_stream_pd(double *_Dp, __m128d _A)
void store_nta ( float *  p,
const F32vec8 a 
)
inline
1346  {
1347  _mm256_stream_ps(p, a); }
void __cdecl _mm256_stream_ps(float *, __m256)
void store_nta ( double *  p,
const F64vec4 a 
)
inline
1559  {
1560  _mm256_stream_pd(p, a); }
void __cdecl _mm256_stream_pd(double *, __m256d)
void storeu ( double *  p,
const F64vec2 a 
)
inline
1074 { _mm_storeu_pd(p, a); }
void _mm_storeu_pd(double *_Dp, __m128d _A)
void storeu ( float *  p,
const F32vec8 a 
)
inline
1340  {
1341  _mm256_storeu_ps(p, a); }
void __cdecl _mm256_storeu_ps(float *, __m256)
void storeu ( double *  p,
const F64vec4 a 
)
inline
1553  {
1554  _mm256_storeu_pd(p, a); }
void __cdecl _mm256_storeu_pd(double *, __m256d)
I64vec2 sum_abs ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
804 { return _mm_sad_epu8(a,b); }
__m128i _mm_sad_epu8(__m128i _A, __m128i _B)
I64vec2 unpack_high ( const I64vec2 a,
const I64vec2 b 
)
inline
225 {return _mm_unpackhi_epi64(a,b); }
__m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B)
I32vec4 unpack_high ( const I32vec4 a,
const I32vec4 b 
)
inline
261 { return _mm_unpackhi_epi32(a,b); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
Is32vec4 unpack_high ( const Is32vec4 a,
const Is32vec4 b 
)
inline
331 { return _mm_unpackhi_epi32(a,b); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
Iu32vec4 unpack_high ( const Iu32vec4 a,
const Iu32vec4 b 
)
inline
398 { return _mm_unpackhi_epi32(a,b); }
__m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B)
I16vec8 unpack_high ( const I16vec8 a,
const I16vec8 b 
)
inline
441 { return _mm_unpackhi_epi16(a,b); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
Is16vec8 unpack_high ( const Is16vec8 a,
const Is16vec8 b 
)
inline
521 { return _mm_unpackhi_epi16(a,b); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
Iu16vec8 unpack_high ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
606 { return _mm_unpackhi_epi16(a,b); }
__m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B)
I8vec16 unpack_high ( const I8vec16 a,
const I8vec16 b 
)
inline
647 { return _mm_unpackhi_epi8(a,b); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
Is8vec16 unpack_high ( const Is8vec16 a,
const Is8vec16 b 
)
inline
721 { return _mm_unpackhi_epi8(a,b); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
Iu8vec16 unpack_high ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
799 { return _mm_unpackhi_epi8(a,b); }
__m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B)
F64vec2 unpack_high ( const F64vec2 a,
const F64vec2 b 
)
inline
1060 { return _mm_unpackhi_pd(a, b); }
__m128d _mm_unpackhi_pd(__m128d _A, __m128d _B)
F32vec8 unpack_high ( const F32vec8 a,
const F32vec8 b 
)
inline
1326  {
1327  return _mm256_unpackhi_ps(a, b); }
__m256 __cdecl _mm256_unpackhi_ps(__m256, __m256)
F64vec4 unpack_high ( const F64vec4 a,
const F64vec4 b 
)
inline
1539  {
1540  return _mm256_unpackhi_pd(a, b); }
__m256d __cdecl _mm256_unpackhi_pd(__m256d, __m256d)
I64vec2 unpack_low ( const I64vec2 a,
const I64vec2 b 
)
inline
224 {return _mm_unpacklo_epi64(a,b); }
__m128i _mm_unpacklo_epi64(__m128i _A, __m128i _B)
I32vec4 unpack_low ( const I32vec4 a,
const I32vec4 b 
)
inline
260 { return _mm_unpacklo_epi32(a,b); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
Is32vec4 unpack_low ( const Is32vec4 a,
const Is32vec4 b 
)
inline
330 { return _mm_unpacklo_epi32(a,b); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
Iu32vec4 unpack_low ( const Iu32vec4 a,
const Iu32vec4 b 
)
inline
397 { return _mm_unpacklo_epi32(a,b); }
__m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B)
I16vec8 unpack_low ( const I16vec8 a,
const I16vec8 b 
)
inline
440 { return _mm_unpacklo_epi16(a,b); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
Is16vec8 unpack_low ( const Is16vec8 a,
const Is16vec8 b 
)
inline
520 { return _mm_unpacklo_epi16(a,b); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
Iu16vec8 unpack_low ( const Iu16vec8 a,
const Iu16vec8 b 
)
inline
605 { return _mm_unpacklo_epi16(a,b); }
__m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B)
I8vec16 unpack_low ( const I8vec16 a,
const I8vec16 b 
)
inline
646 { return _mm_unpacklo_epi8(a,b); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
Is8vec16 unpack_low ( const Is8vec16 a,
const Is8vec16 b 
)
inline
720 { return _mm_unpacklo_epi8(a,b); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
Iu8vec16 unpack_low ( const Iu8vec16 a,
const Iu8vec16 b 
)
inline
798 { return _mm_unpacklo_epi8(a,b); }
__m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B)
F64vec2 unpack_low ( const F64vec2 a,
const F64vec2 b 
)
inline
1056 { return _mm_unpacklo_pd(a, b); }
__m128d _mm_unpacklo_pd(__m128d _A, __m128d _B)
F32vec8 unpack_low ( const F32vec8 a,
const F32vec8 b 
)
inline
1322  {
1323  return _mm256_unpacklo_ps(a, b); }
__m256 __cdecl _mm256_unpacklo_ps(__m256, __m256)
F64vec4 unpack_low ( const F64vec4 a,
const F64vec4 b 
)
inline
1535  {
1536  return _mm256_unpacklo_pd(a, b); }
__m256d __cdecl _mm256_unpacklo_pd(__m256d, __m256d)

Variable Documentation

const { ... } __f64vec2_abs_mask_cheat
int i[4]