51#ifndef VSPLINE_STD_SIMD_TYPE_H
52#define VSPLINE_STD_SIMD_TYPE_H
54#ifndef VSPLINE_VECTOR_NBYTES
56#define VSPLINE_VECTOR_NBYTES 64
61#include <experimental/simd>
81template <
typename _value_type ,
84:
private std::experimental::simd
86 std::experimental::simd_abi::fixed_size < _vsize >
89 typedef std::experimental::simd_abi::fixed_size < _vsize >
abi_t ;
95 typedef std::experimental::simd < value_type , abi_t >
base_t ;
96 typedef std::experimental::simd < int , abi_t >
index_type ;
97 using typename base_t::mask_type ;
111 using base_t::operator[] ;
142 return ( *
static_cast < base_t *
> (
this ) ) ;
147 return ( *
static_cast < const base_t *
const > (
this ) ) ;
168 #define BUILD_FROM_CONTAINER(SIZE_TYPE,VSZ) \
169 template < typename U , template < typename , SIZE_TYPE > class V > \
170 simd_type & operator= ( const V < U , VSZ > & rhs ) \
172 for ( size_type i = 0 ; i < vsize ; i++ ) \
173 to_base() [ i ] = value_type ( rhs [ i ] ) ; \
176 template < typename U , template < typename , SIZE_TYPE > class V > \
177 simd_type ( const V < U , VSZ > & ini ) \
184 #undef BUILD_FROM_CONTAINER
200 static const IT ceiling = std::numeric_limits < IT > :: max() ;
201 assert ( (
vsize - 1 ) <= std::size_t ( ceiling ) ) ;
205 ix [ i ] =
int ( i ) ;
213 std::size_t step = 1 )
216 static const IT ceiling = std::numeric_limits < IT > :: max() ;
217 assert ( start + (
vsize - 1 ) * step <= std::size_t ( ceiling ) ) ;
242 osr << it [ i ] <<
", " ;
243 osr << it [
vsize - 1 ] <<
")" ;
266 std::experimental::element_aligned_tag() ) ;
280 template <
typename index_type >
286 {
return p_src [ indexes[i] ] ; } ) ;
291 template <
typename index_type >
295 for ( std::size_t i = 0 ; i <
vsize ; i++ )
296 (*
this)[i] = p_src [ indexes [ i ] ] ;
303 template <
typename index_type >
307 gather ( p_src , indexes ) ;
315 std::experimental::element_aligned_tag() ) ;
322 template <
typename index_type >
329 base_t dummy ( [&] (
const size_t & i )
330 {
return p_trg [ indexes[i] ] =
to_base()[i] ; } ) ;
335 template <
typename index_type >
339 for ( std::size_t i = 0 ; i <
vsize ; i++ )
340 p_trg [ indexes [ i ] ] = (*
this)[i] ;
351 const std::size_t & step )
354 gather ( p_src , indexes ) ;
358 const std::size_t & step )
const
372 #define BROADCAST_STD_FUNC(FUNC) \
373 friend simd_type FUNC ( simd_type arg ) \
375 return FUNC ( arg.to_base() ) ; \
386 arg ( arg < 0 ) = - arg ;
419 for ( std::size_t i = 0 ; i <
size() ; i++ )
420 result[i] = std::cos ( arg[i] ) ;
427 for ( std::size_t i = 0 ; i <
size() ; i++ )
428 result[i] = std::sin ( arg[i] ) ;
439 #undef BROADCAST_STD_FUNC
441 #define BROADCAST_STD_FUNC2(FUNC) \
442 friend simd_type FUNC ( simd_type arg1 , \
445 return FUNC ( arg1.to_base() , arg2.to_base() ) ; \
454 #undef BROADCAST_STD_FUNC2
456 #define BROADCAST_STD_FUNC3(FUNC) \
457 friend simd_type FUNC ( simd_type arg1 , \
461 return FUNC ( arg1.to_base() , arg2.to_base() , arg3.to_base() ) ; \
466 #undef BROADCAST_STD_FUNC3
474 #define INTEGRAL_ONLY \
475 static_assert ( std::is_integral < value_type > :: value , \
476 "this operation is only allowed for integral types" ) ;
479 static_assert ( std::is_same < value_type , bool > :: value , \
480 "this operation is only allowed for booleans" ) ;
488 #define OPEQ_FUNC(OPFUNC,OPEQ,CONSTRAINT) \
489 simd_type & OPFUNC ( value_type rhs ) \
492 to_base() OPEQ rhs ; \
495 simd_type & OPFUNC ( simd_type rhs ) \
498 to_base() OPEQ rhs.to_base() ; \
523 #define C_PROMOTE(A,B) \
524 typename std::conditional \
525 < std::is_same < A , B > :: value , \
527 decltype ( std::declval < A > () \
528 + std::declval < B > () ) \
534 #define OP_FUNC(OPFUNC,OP,CONSTRAINT) \
535 template < typename RHST , \
536 typename = typename std::enable_if \
537 < std::is_fundamental < RHST > :: value \
540 simd_type < C_PROMOTE ( value_type , RHST ) , size() > \
541 OPFUNC ( simd_type < RHST , vsize > _rhs ) const \
544 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > lhs ( *this ) ; \
545 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > rhs ( _rhs ) ; \
546 return lhs.to_base() OP rhs.to_base() ; \
548 template < typename RHST , \
549 typename = typename std::enable_if \
550 < std::is_fundamental < RHST > :: value \
553 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > \
554 OPFUNC ( RHST _rhs ) const \
557 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > lhs ( *this ) ; \
558 C_PROMOTE ( value_type , RHST ) rhs ( _rhs ) ; \
559 return lhs.to_base() OP rhs ; \
561 template < typename LHST , \
562 typename = typename std::enable_if \
563 < std::is_fundamental < LHST > :: value \
566 friend simd_type < C_PROMOTE ( LHST , value_type ) , vsize > \
567 OPFUNC ( LHST _lhs , simd_type _rhs ) \
570 C_PROMOTE ( value_type , LHST ) lhs ( _lhs ) ; \
571 simd_type < C_PROMOTE ( LHST , value_type ) , vsize > rhs ( _rhs ) ; \
572 return lhs OP rhs.to_base() ; \
613 #define OP_FUNC(OPFUNC,OP,CONSTRAINT) \
614 simd_type OPFUNC() const \
616 return OP this->to_base() ; \
628 #define COMPARE_FUNC(OP,OPFUNC) \
629 friend mask_type OPFUNC ( simd_type lhs , \
632 return lhs.to_base() OP rhs.to_base() ; \
634 friend mask_type OPFUNC ( simd_type lhs , \
637 return lhs.to_base() OP rhs ; \
639 friend mask_type OPFUNC ( value_type lhs , \
642 return lhs OP rhs.to_base() ; \
670 typedef std::experimental::where_expression < mask_type , base_t >
we_t ;
685 #define OPEQ_FUNC(OPFUNC,OPEQ,CONSTRAINT) \
686 simd_type & OPFUNC ( value_type rhs ) \
689 we_t ( whether , whither.to_base() ) OPEQ rhs ; \
692 simd_type & OPFUNC ( simd_type rhs ) \
695 we_t ( whether , whither.to_base() ) OPEQ rhs.to_base() ; \
736 #define CLAMP(FNAME,REL) \
737 simd_type FNAME ( simd_type threshold ) const \
739 return REL ( to_base() , threshold.to_base() ) ; \
741 simd_type FNAME ( value_type threshold ) const \
743 return REL ( to_base() , threshold ) ; \
758 for ( std::size_t e = 0 ; e <
vsize ; e++ )
void load(const value_type *const p_src)
friend simd_type abs(simd_type arg)
std::experimental::simd_abi::fixed_size< _vsize > abi_t
COMPARE_FUNC(==, operator==)
void store(value_type *const p_trg) const
simd_type(const simd_type &)=default
COMPARE_FUNC(<, operator<)
void rgather(const value_type *const p_src, const std::size_t &step)
COMPARE_FUNC(>, operator>)
friend std::istream & operator>>(std::istream &isr, simd_type it)
static const simd_type Zero()
static constexpr size_type size()
simd_type(const value_type &ini)
std::experimental::simd< value_type, abi_t > base_t
static const index_type IndexesFromZero()
void scatter(value_type *const p_trg, const index_type &indexes) const
friend std::ostream & operator<<(std::ostream &osr, simd_type it)
simd_type & operator=(const value_type &rhs)
static const simd_type iota()
std::experimental::simd< int, abi_t > index_type
static const size_type vsize
COMPARE_FUNC(!=, operator!=)
const base_t & to_base() const
void rscatter(value_type *p_trg, const std::size_t &step) const
static const index_type IndexesFrom(std::size_t start, std::size_t step=1)
simd_type(const value_type *const p_src, const index_type &indexes)
COMPARE_FUNC(>=, operator>=)
simd_type< int, vsize > index_type
void gather(const value_type *const p_src, const index_type &indexes)
std::experimental::where_expression< mask_type, base_t > we_t
masked_type operator()(mask_type mask)
simd_type(const base_t &ini)
static const simd_type One()
COMPARE_FUNC(<=, operator<=)
#define BUILD_FROM_CONTAINER(SIZE_TYPE, VSZ)
#define BROADCAST_STD_FUNC3(FUNC)
#define OPEQ_FUNC(OPFUNC, OPEQ, CONSTRAINT)
#define BROADCAST_STD_FUNC(FUNC)
#define OP_FUNC(OPFUNC, OP, CONSTRAINT)
#define CLAMP(FNAME, REL)
#define BROADCAST_STD_FUNC2(FUNC)
masked_type(mask_type _whether, simd_type &_whither)