vspline 1.1.0
Generic C++11 Code for Uniform B-Splines
std_simd_type.h
Go to the documentation of this file.
1/************************************************************************/
2/* */
3/* vspline - a set of generic tools for creation and evaluation */
4/* of uniform b-splines */
5/* */
6/* Copyright 2015 - 2023 by Kay F. Jahnke */
7/* */
8/* The git repository for this software is at */
9/* */
10/* https://bitbucket.org/kfj/vspline */
11/* */
12/* Please direct questions, bug reports, and contributions to */
13/* */
14/* kfjahnke+vspline@gmail.com */
15/* */
16/* Permission is hereby granted, free of charge, to any person */
17/* obtaining a copy of this software and associated documentation */
18/* files (the "Software"), to deal in the Software without */
19/* restriction, including without limitation the rights to use, */
20/* copy, modify, merge, publish, distribute, sublicense, and/or */
21/* sell copies of the Software, and to permit persons to whom the */
22/* Software is furnished to do so, subject to the following */
23/* conditions: */
24/* */
25/* The above copyright notice and this permission notice shall be */
26/* included in all copies or substantial portions of the */
27/* Software. */
28/* */
29/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
30/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
31/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
32/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
33/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
34/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
35/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
36/* OTHER DEALINGS IN THE SOFTWARE. */
37/* */
38/************************************************************************/
39
40/*! \file simd_type.h
41
42 \brief SIMD type derived from std::simd
43
44 To use this header, an implementation of std::simd has to be
45 installed, and the -std=c++17 option is needed as well.
46 It has been tried with clang++ and g++; you'll need a
47 recent version.
48
49*/
50
51#ifndef VSPLINE_STD_SIMD_TYPE_H
52#define VSPLINE_STD_SIMD_TYPE_H
53
54#ifndef VSPLINE_VECTOR_NBYTES
55
56#define VSPLINE_VECTOR_NBYTES 64
57
58#endif
59
60#include <iostream>
61#include <experimental/simd>
62
63namespace vspline
64{
65
66/// class template simd_type provides a fixed-size SIMD type.
67/// This implementation of vspline::simd_type uses std::simd as
68/// base class. This class is used as a stand-in for Vc::SimdArray
69/// - it does not cover the whole interface, but a reasonably
70/// large subset - my choice of SimdArray functionality is what
71/// I need in lux/vspline.
72/// Most of the 'loop variant' of simd_type has been ported to
73/// use std::simd instead, making use of std::simd's
74/// - constructors
75/// - copy_from and copy_to
76/// - masks and where expressions
77/// - operator functions
78/// - overloads for several mathematical functions
79/// - min/max
80
81template < typename _value_type ,
82 std::size_t _vsize >
83struct simd_type
84: private std::experimental::simd
85 < _value_type ,
86 std::experimental::simd_abi::fixed_size < _vsize >
87 >
88{
89 typedef std::experimental::simd_abi::fixed_size < _vsize > abi_t ;
90
91 typedef std::size_t size_type ;
92 typedef _value_type value_type ;
93 static const size_type vsize = _vsize ;
94
95 typedef std::experimental::simd < value_type , abi_t > base_t ;
96 typedef std::experimental::simd < int , abi_t > index_type ;
97 using typename base_t::mask_type ;
98
99 // provide the size as a constexpr
100
101 static constexpr size_type size()
102 {
103 return vsize ;
104 }
105
108
109 // operator[] is mapped to std::simd element access
110
111 using base_t::operator[] ;
112
113 // assignment from a value_type. The assignment is coded as a loop,
114 // but it should be obvious to the compiler's loop vectorizer that
115 // the loop is a 'SIMD operation in disguise', so here we have the
116 // first appearance of 'goading'.
117
119 {
120 to_base() = base_t ( rhs ) ;
121 return *this ;
122 }
123
124 // c'tor from value_type. We use the assignment operator for
125 // initialization.
126
127 simd_type ( const value_type & ini )
128 : base_t ( ini )
129 { }
130
131 simd_type ( const base_t & ini )
132 : base_t ( ini )
133 { }
134
135 // these two c'tors are left in default mode
136
137 simd_type() = default ;
138 simd_type ( const simd_type & ) = default ;
139
141 {
142 return ( * static_cast < base_t * > ( this ) ) ;
143 }
144
145 const base_t & to_base() const
146 {
147 return ( * static_cast < const base_t * const > ( this ) ) ;
148 }
149
150 // assignment from equally-sized container. Most containers use std::size_t
151 // for the template argument defining the number of elements they hold,
152 // but some (notably vigra::TinyVector) use int, which is probably a relic
153 // from times when non-type template arguments were of a restricted type
154 // set only. By providing a specialization for SIZE_TYPE int, we make
155 // equally-sized vigra::TinyVectors permitted initializers.
156 // the c'tor from an equally-sized container also uses the corresponding
157 // operator= overload, so we use one macro for both.
158 // we also need two different variants of vsize for g++; clang++ accepts
159 // size_type vsize for both places where VSZ is used, but g++ requires
160 // an integer.
161 // Note that the rhs can use any elementary type which can be legally
162 // assigned to value_type. This allows transport of information from
163 // differently typed objects, but there are no further constraints on
164 // the types involved, which may degrade precision. It's the user's
165 // responsibility to make sure such assignments have the desired effect
166 // and overload them if necessary.
167
168 #define BUILD_FROM_CONTAINER(SIZE_TYPE,VSZ) \
169 template < typename U , template < typename , SIZE_TYPE > class V > \
170 simd_type & operator= ( const V < U , VSZ > & rhs ) \
171 { \
172 for ( size_type i = 0 ; i < vsize ; i++ ) \
173 to_base() [ i ] = value_type ( rhs [ i ] ) ; \
174 return *this ; \
175 } \
176 template < typename U , template < typename , SIZE_TYPE > class V > \
177 simd_type ( const V < U , VSZ > & ini ) \
178 { \
179 *this = ini ; \
180 }
181
182 BUILD_FROM_CONTAINER(std::size_t,vsize)
183
184 #undef BUILD_FROM_CONTAINER
185
186 static const simd_type iota()
187 {
188 simd_type result ;
189 for ( size_type i = 0 ; i < vsize ; i++ )
190 result [ i ] = value_type ( i ) ;
191 return result ;
192 }
193
194 // mimick Vc's IndexesFromZero. This function produces an index
195 // vector filled with indexes starting with zero.
196
198 {
199 typedef typename index_type::value_type IT ;
200 static const IT ceiling = std::numeric_limits < IT > :: max() ;
201 assert ( ( vsize - 1 ) <= std::size_t ( ceiling ) ) ;
202
203 index_type ix ;
204 for ( size_type i = 0 ; i < vsize ; i++ )
205 ix [ i ] = int ( i ) ;
206 return ix ;
207 }
208
209 // variant which starts from a different starting point and optionally
210 // uses steps other than one.
211
212 static const index_type IndexesFrom ( std::size_t start ,
213 std::size_t step = 1 )
214 {
215 typedef typename index_type::value_type IT ;
216 static const IT ceiling = std::numeric_limits < IT > :: max() ;
217 assert ( start + ( vsize - 1 ) * step <= std::size_t ( ceiling ) ) ;
218
219 return ( IndexesFromZero() * int(step) ) + int(start) ;
220 }
221
222 // functions Zero and One produce simd_type objects filled with
223 // 0, or 1, respectively
224
225 static const simd_type Zero()
226 {
227 return simd_type ( value_type ( 0 ) ) ;
228 }
229
230 static const simd_type One()
231 {
232 return simd_type ( value_type ( 1 ) ) ;
233 }
234
235 // echo the vector to a std::ostream, read it from an istream
236
237 friend std::ostream & operator<< ( std::ostream & osr ,
238 simd_type it )
239 {
240 osr << "(" ;
241 for ( size_type i = 0 ; i < vsize - 1 ; i++ )
242 osr << it [ i ] << ", " ;
243 osr << it [ vsize - 1 ] << ")" ;
244 return osr ;
245 }
246
247 friend std::istream & operator>> ( std::istream & isr ,
248 simd_type it )
249 {
250 for ( size_type i = 0 ; i < vsize ; i++ )
251 isr >> it [ i ] ;
252 return isr ;
253 }
254
255 // memory access functions, which load and store vector data.
256 // We start out with functions transporting data from memory into
257 // the simd_type. Some of these operations have corresponding
258 // c'tors which use the member function to initialize to_base().
259
260 // load delegates to std::simd::copy_from. TODO: consider
261 // overalignment
262
263 void load ( const value_type * const p_src )
264 {
265 to_base().copy_from ( p_src ,
266 std::experimental::element_aligned_tag() ) ;
267 }
268
269 // std::simd does not offer gather/scatter, but it offers a
270 // c'tor taking a functor to set the elements. In theory, this
271 // is a good idea, because the optimizer might realize that
272 // the sum of invocations of the functor can be represented
273 // by a gather/scatter operation, but how well this works is
274 // a different matter and mileage varies.
275
276#define GS_LAMBDA
277
278#ifdef GS_LAMBDA
279
280 template < typename index_type >
281 void gather ( const value_type * const p_src ,
282 const index_type & indexes )
283 {
284 // assign base_t object created by gen-type c'tor
285 to_base() = base_t ( [&] ( const size_t & i )
286 { return p_src [ indexes[i] ] ; } ) ;
287 }
288
289#else
290
291 template < typename index_type >
292 void gather ( const value_type * const p_src ,
293 const index_type & indexes )
294 {
295 for ( std::size_t i = 0 ; i < vsize ; i++ )
296 (*this)[i] = p_src [ indexes [ i ] ] ;
297 }
298
299#endif
300
301 // c'tor from pointer and indexes, uses gather
302
303 template < typename index_type >
304 simd_type ( const value_type * const p_src ,
305 const index_type & indexes )
306 {
307 gather ( p_src , indexes ) ;
308 }
309
310 // store saves the content of the vector to memory
311
312 void store ( value_type * const p_trg ) const
313 {
314 to_base().copy_to ( p_trg ,
315 std::experimental::element_aligned_tag() ) ;
316 }
317
318 // scatter is the reverse operation to gather
319
320#ifdef GS_LAMBDA
321
322 template < typename index_type >
323 void scatter ( value_type * const p_trg ,
324 const index_type & indexes ) const
325 {
326 // gen-type c'tor is only used for side effects; let the compiler
327 // figure out that the result is unused.
328
329 base_t dummy ( [&] ( const size_t & i )
330 { return p_trg [ indexes[i] ] = to_base()[i] ; } ) ;
331 }
332
333#else
334
335 template < typename index_type >
336 void scatter ( value_type * const p_trg ,
337 const index_type & indexes ) const
338 {
339 for ( std::size_t i = 0 ; i < vsize ; i++ )
340 p_trg [ indexes [ i ] ] = (*this)[i] ;
341 }
342
343#endif
344
345 // 'regular' gather and scatter, accessing strided memory so that the
346 // first address visited is p_src/p_trg, and successive addresses are
347 // 'step' apart - in units of T. Might also be done with goading, the
348 // loop should autovectorize.
349
350 void rgather ( const value_type * const p_src ,
351 const std::size_t & step )
352 {
353 auto indexes = IndexesFrom ( 0 , step ) ;
354 gather ( p_src , indexes ) ;
355 }
356
357 void rscatter ( value_type * p_trg ,
358 const std::size_t & step ) const
359 {
360 auto indexes = IndexesFrom ( 0 , step ) ;
361 scatter ( p_trg , indexes ) ;
362 }
363
364 // apply functions from namespace std to each element in a vector,
365 // or to each corresponding set of elements in a set of vectors
366 // - going up to three for fma.
367 // many standard functions autovectorize well. Note that the
368 // autovectorization of standard functions often needs additional
369 // compiler flags, like, e.g., -fno-math-errno for clang++, to
370 // produce hardware SIMD instructions.
371
372 #define BROADCAST_STD_FUNC(FUNC) \
373 friend simd_type FUNC ( simd_type arg ) \
374 { \
375 return FUNC ( arg.to_base() ) ; \
376 }
377
378 // TODO: getting zero back for negative args, hence no BROADCAST_STD_FUNC
379 // this happens with clang++ only, I opened an issue with VcDevel/std-simd:
380 // https://github.com/VcDevel/std-simd/issues/31
381
382// BROADCAST_STD_FUNC(abs)
383
384 friend simd_type abs ( simd_type arg )
385 {
386 arg ( arg < 0 ) = - arg ;
387 return arg ;
388 }
389
390 BROADCAST_STD_FUNC(trunc)
391
392 BROADCAST_STD_FUNC(round)
393 BROADCAST_STD_FUNC(floor)
398
399 // the support for autovectorization of trigonometric functions is
400 // sketchy - e.g. the clang++ reference does not mention them as
401 // functions which autovectorize. Vc offers hand-coded trigonometric
402 // functions which might be worth while porting to std::simd, but so
403 // far I haven't seen this happen. In my application, this results in
404 // bad performance when these functions are used.
405
410
411 // TODO: odd: with clang++, sin and cos don't perform as expected;
412 // using a loop does the trick:
413
414#ifdef __clang__
415
416 friend simd_type cos ( simd_type arg )
417 {
418 simd_type result ;
419 for ( std::size_t i = 0 ; i < size() ; i++ )
420 result[i] = std::cos ( arg[i] ) ;
421 return result ;
422 }
423
424 friend simd_type sin ( simd_type arg )
425 {
426 simd_type result ;
427 for ( std::size_t i = 0 ; i < size() ; i++ )
428 result[i] = std::sin ( arg[i] ) ;
429 return result ;
430 }
431
432#else
433
436
437#endif
438
439 #undef BROADCAST_STD_FUNC
440
441 #define BROADCAST_STD_FUNC2(FUNC) \
442 friend simd_type FUNC ( simd_type arg1 , \
443 simd_type arg2 ) \
444 { \
445 return FUNC ( arg1.to_base() , arg2.to_base() ) ; \
446 }
447
448 // a short note on atan2: Vc provides a hand-written vectorized version
449 // of atan2 which is especially fast and superior to autovectorized code.
450
453
454 #undef BROADCAST_STD_FUNC2
455
456 #define BROADCAST_STD_FUNC3(FUNC) \
457 friend simd_type FUNC ( simd_type arg1 , \
458 simd_type arg2 , \
459 simd_type arg3 ) \
460 { \
461 return FUNC ( arg1.to_base() , arg2.to_base() , arg3.to_base() ) ; \
462 }
463
465
466 #undef BROADCAST_STD_FUNC3
467
468 // macros used for the parameter 'CONSTRAINT' in the definitions
469 // further down. Some operations are only allowed for integral types
470 // or boolans. This might be enforced by enable_if, here we use a
471 // static_assert with a clear error message.
472 // TODO: might relax constraints by using 'std::is_convertible'
473
474 #define INTEGRAL_ONLY \
475 static_assert ( std::is_integral < value_type > :: value , \
476 "this operation is only allowed for integral types" ) ;
477
478 #define BOOL_ONLY \
479 static_assert ( std::is_same < value_type , bool > :: value , \
480 "this operation is only allowed for booleans" ) ;
481
482 // augmented assignment operators. Some operators are only applicable
483 // to specific data types, which is enforced by 'CONSTRAINT'.
484 // One might consider widening the scope by making these operator
485 // functions templates and accepting arbitrary indexable types.
486 // Only value_type and simd_type itto_base() are taken as rhs arguments.
487
488 #define OPEQ_FUNC(OPFUNC,OPEQ,CONSTRAINT) \
489 simd_type & OPFUNC ( value_type rhs ) \
490 { \
491 CONSTRAINT \
492 to_base() OPEQ rhs ; \
493 return *this ; \
494 } \
495 simd_type & OPFUNC ( simd_type rhs ) \
496 { \
497 CONSTRAINT \
498 to_base() OPEQ rhs.to_base() ; \
499 return *this ; \
500 }
501
502 OPEQ_FUNC(operator+=,+=,)
503 OPEQ_FUNC(operator-=,-=,)
504 OPEQ_FUNC(operator*=,*=,)
505 OPEQ_FUNC(operator/=,/=,)
506
507 OPEQ_FUNC(operator%=,%=,INTEGRAL_ONLY)
508 OPEQ_FUNC(operator&=,&=,INTEGRAL_ONLY)
509 OPEQ_FUNC(operator|=,|=,INTEGRAL_ONLY)
510 OPEQ_FUNC(operator^=,^=,INTEGRAL_ONLY)
511 OPEQ_FUNC(operator<<=,<<=,INTEGRAL_ONLY)
512 OPEQ_FUNC(operator>>=,>>=,INTEGRAL_ONLY)
513
514 #undef OPEQ_FUNC
515
516 // we use a simple scheme for type promotion: the promoted type
517 // of two values should be the same as the type we would receive
518 // when adding the two values. That's standard C semantics, but
519 // it won't widen the result type to avoid overflow or increase
520 // precision - such conversions have to be made by user code if
521 // necessary.
522
523 #define C_PROMOTE(A,B) \
524 typename std::conditional \
525 < std::is_same < A , B > :: value , \
526 A , \
527 decltype ( std::declval < A > () \
528 + std::declval < B > () ) \
529 > :: type
530
531 // binary operators and left and right scalar operations with
532 // value_type, unary operators -, ! and ~
533
534 #define OP_FUNC(OPFUNC,OP,CONSTRAINT) \
535 template < typename RHST , \
536 typename = typename std::enable_if \
537 < std::is_fundamental < RHST > :: value \
538 > :: type \
539 > \
540 simd_type < C_PROMOTE ( value_type , RHST ) , size() > \
541 OPFUNC ( simd_type < RHST , vsize > _rhs ) const \
542 { \
543 CONSTRAINT \
544 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > lhs ( *this ) ; \
545 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > rhs ( _rhs ) ; \
546 return lhs.to_base() OP rhs.to_base() ; \
547 } \
548 template < typename RHST , \
549 typename = typename std::enable_if \
550 < std::is_fundamental < RHST > :: value \
551 > :: type \
552 > \
553 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > \
554 OPFUNC ( RHST _rhs ) const \
555 { \
556 CONSTRAINT \
557 simd_type < C_PROMOTE ( value_type , RHST ) , vsize > lhs ( *this ) ; \
558 C_PROMOTE ( value_type , RHST ) rhs ( _rhs ) ; \
559 return lhs.to_base() OP rhs ; \
560 } \
561 template < typename LHST , \
562 typename = typename std::enable_if \
563 < std::is_fundamental < LHST > :: value \
564 > :: type \
565 > \
566 friend simd_type < C_PROMOTE ( LHST , value_type ) , vsize > \
567 OPFUNC ( LHST _lhs , simd_type _rhs ) \
568 { \
569 CONSTRAINT \
570 C_PROMOTE ( value_type , LHST ) lhs ( _lhs ) ; \
571 simd_type < C_PROMOTE ( LHST , value_type ) , vsize > rhs ( _rhs ) ; \
572 return lhs OP rhs.to_base() ; \
573 }
574
575 // binary operators and left and right scalar operations with
576 // value_type, unary operators -, ! and ~
577
578 // #define OP_FUNC(OPFUNC,OP,CONSTRAINT) \
579 // simd_type OPFUNC ( simd_type rhs ) const \
580 // { \
581 // CONSTRAINT \
582 // return this->to_base() OP rhs.to_base() ; \
583 // } \
584 // simd_type OPFUNC ( value_type rhs ) const \
585 // { \
586 // CONSTRAINT \
587 // return this->to_base() OP rhs ; \
588 // } \
589 // friend simd_type OPFUNC ( value_type lhs , \
590 // simd_type rhs ) \
591 // { \
592 // CONSTRAINT \
593 // return lhs OP rhs.to_base() ; \
594 // }
595 //
596 OP_FUNC(operator+,+,)
597 OP_FUNC(operator-,-,)
598 OP_FUNC(operator*,*,)
599 OP_FUNC(operator/,/,)
600
601 OP_FUNC(operator%,%,INTEGRAL_ONLY)
602 OP_FUNC(operator&,&,INTEGRAL_ONLY)
603 OP_FUNC(operator|,|,INTEGRAL_ONLY)
604 OP_FUNC(operator^,^,INTEGRAL_ONLY)
605 OP_FUNC(operator<<,<<,INTEGRAL_ONLY)
606 OP_FUNC(operator>>,>>,INTEGRAL_ONLY)
607
608 OP_FUNC(operator&&,&&,BOOL_ONLY)
609 OP_FUNC(operator||,||,BOOL_ONLY)
610
611 #undef OP_FUNC
612
613 #define OP_FUNC(OPFUNC,OP,CONSTRAINT) \
614 simd_type OPFUNC() const \
615 { \
616 return OP this->to_base() ; \
617 }
618
619 OP_FUNC(operator-,-,)
620 OP_FUNC(operator!,!,BOOL_ONLY)
621 OP_FUNC(operator~,~,INTEGRAL_ONLY)
622
623 #undef OP_FUNC
624
625 // provide methods to produce a mask on comparing a vector
626 // with another vector or a value_type.
627
628 #define COMPARE_FUNC(OP,OPFUNC) \
629 friend mask_type OPFUNC ( simd_type lhs , \
630 simd_type rhs ) \
631 { \
632 return lhs.to_base() OP rhs.to_base() ; \
633 } \
634 friend mask_type OPFUNC ( simd_type lhs , \
635 value_type rhs ) \
636 { \
637 return lhs.to_base() OP rhs ; \
638 } \
639 friend mask_type OPFUNC ( value_type lhs , \
640 simd_type rhs ) \
641 { \
642 return lhs OP rhs.to_base() ; \
643 }
644
645 COMPARE_FUNC(<,operator<) ;
646 COMPARE_FUNC(<=,operator<=) ;
647 COMPARE_FUNC(>,operator>) ;
648 COMPARE_FUNC(>=,operator>=) ;
649 COMPARE_FUNC(==,operator==) ;
650 COMPARE_FUNC(!=,operator!=) ;
651
652 #undef COMPARE_FUNC
653
654 // note: std::simd's mask_type has associated functions any_of, all_of
655 // and none_of, so we needn't define them for this backend
656
657 // next we define a masked vector as an object holding two references:
658 // one reference to a mask type, determining which of the vector's
659 // elements will be 'open' to an effect, and one reference to a vector,
660 // which will be affected by the operation.
661 // The resulting object will only be viable as long as the referred-to
662 // mask and vector are alive - it's meant as a construct to be processed
663 // in the same scope, as the lhs of an assignment, typically using
664 // notation introduced by Vc: a vector's operator() is overloaded to
665 // to produce a masked_type when called with a mask_type object, and
666 // the resulting masked_type object is then assigned to.
667 // Note that this does not have any effect on those values in 'whither'
668 // for which the mask is false. They remain unchanged.
669
670 typedef std::experimental::where_expression < mask_type , base_t > we_t ;
671
672 struct masked_type
673 {
674 mask_type whether ; // if the mask is true at whether[i]
675 simd_type & whither ; // whither[i] will be assigned to
676
678 simd_type & _whither )
679 : whether ( _whether ) ,
680 whither ( _whither )
681 { }
682
683 // for the masked vector, we define the complete set of assignments:
684
685 #define OPEQ_FUNC(OPFUNC,OPEQ,CONSTRAINT) \
686 simd_type & OPFUNC ( value_type rhs ) \
687 { \
688 CONSTRAINT \
689 we_t ( whether , whither.to_base() ) OPEQ rhs ; \
690 return whither ; \
691 } \
692 simd_type & OPFUNC ( simd_type rhs ) \
693 { \
694 CONSTRAINT \
695 we_t ( whether , whither.to_base() ) OPEQ rhs.to_base() ; \
696 return whither ; \
697 }
698
699 OPEQ_FUNC(operator=,=,)
700 OPEQ_FUNC(operator+=,+=,)
701 OPEQ_FUNC(operator-=,-=,)
702 OPEQ_FUNC(operator*=,*=,)
703 OPEQ_FUNC(operator/=,/=,)
704 OPEQ_FUNC(operator%=,%=,INTEGRAL_ONLY)
705 OPEQ_FUNC(operator&=,&=,INTEGRAL_ONLY)
706 OPEQ_FUNC(operator|=,|=,INTEGRAL_ONLY)
707 OPEQ_FUNC(operator^=,^=,INTEGRAL_ONLY)
708 OPEQ_FUNC(operator<<=,<<=,INTEGRAL_ONLY)
709 OPEQ_FUNC(operator>>=,>>=,INTEGRAL_ONLY)
710
711 #undef OPEQ_FUNC
712
713 #undef INTEGRAL_ONLY
714 #undef BOOL_ONLY
715
716 } ;
717
718 // mimicking Vc, we define operator() with a mask_type argument
719 // to produce a masked_type object, which can be used later on to
720 // masked-assign to the referred-to vector. With this definition
721 // we can use the same syntax Vc uses, e.g. v1 ( v1 > v2 ) = v3
722 // This helps write code which compiles with Vc and without,
723 // because this idiom is 'very Vc'.
724
726 {
727 return masked_type ( mask , *this ) ;
728 }
729
730 // member functions at_least and at_most. These functions provide the
731 // same functionality as max, or min, respectively. Given simd_type X
732 // and some threshold Y, X.at_least ( Y ) == max ( X , Y )
733 // Having the functionality as a member function makes it easy to
734 // implement, e.g., min as: min ( X , Y ) { return X.at_most ( Y ) ; }
735
736 #define CLAMP(FNAME,REL) \
737 simd_type FNAME ( simd_type threshold ) const \
738 { \
739 return REL ( to_base() , threshold.to_base() ) ; \
740 } \
741 simd_type FNAME ( value_type threshold ) const \
742 { \
743 return REL ( to_base() , threshold ) ; \
744 } \
745
746 CLAMP(at_least,max)
747 CLAMP(at_most,min)
748
749 #undef CLAMP
750
751 // sum of vector elements. Note that there is no type promotion; the
752 // summation is done to value_type. Caller must make sure that overflow
753 // is not a problem.
754
756 {
757 value_type s ( 0 ) ;
758 for ( std::size_t e = 0 ; e < vsize ; e++ )
759 s += (*this) [ e ] ;
760 return s ;
761 }
762} ;
763
764} ;
765
766#endif // #define VSPLINE_SIMD_TYPE_H
void load(const value_type *const p_src)
friend simd_type abs(simd_type arg)
std::size_t size_type
Definition: std_simd_type.h:91
std::experimental::simd_abi::fixed_size< _vsize > abi_t
Definition: std_simd_type.h:89
COMPARE_FUNC(==, operator==)
value_type sum() const
void store(value_type *const p_trg) const
_value_type value_type
Definition: std_simd_type.h:92
simd_type(const simd_type &)=default
COMPARE_FUNC(<, operator<)
void rgather(const value_type *const p_src, const std::size_t &step)
COMPARE_FUNC(>, operator>)
friend std::istream & operator>>(std::istream &isr, simd_type it)
Definition: simd_type.h:360
static const simd_type Zero()
static constexpr size_type size()
simd_type(const value_type &ini)
std::experimental::simd< value_type, abi_t > base_t
Definition: std_simd_type.h:95
static const index_type IndexesFromZero()
void scatter(value_type *const p_trg, const index_type &indexes) const
friend std::ostream & operator<<(std::ostream &osr, simd_type it)
Definition: simd_type.h:350
simd_type & operator=(const value_type &rhs)
Definition: simd_type.h:209
static const simd_type iota()
std::experimental::simd< int, abi_t > index_type
Definition: std_simd_type.h:96
static const size_type vsize
Definition: simd_type.h:162
COMPARE_FUNC(!=, operator!=)
const base_t & to_base() const
void rscatter(value_type *p_trg, const std::size_t &step) const
static const index_type IndexesFrom(std::size_t start, std::size_t step=1)
simd_type(const value_type *const p_src, const index_type &indexes)
COMPARE_FUNC(>=, operator>=)
simd_type< int, vsize > index_type
Definition: simd_type.h:185
void gather(const value_type *const p_src, const index_type &indexes)
std::experimental::where_expression< mask_type, base_t > we_t
masked_type operator()(mask_type mask)
Definition: simd_type.h:834
simd_type(const base_t &ini)
static const simd_type One()
index_type IndexType
COMPARE_FUNC(<=, operator<=)
Definition: basis.h:79
#define BUILD_FROM_CONTAINER(SIZE_TYPE, VSZ)
#define BROADCAST_STD_FUNC3(FUNC)
#define OPEQ_FUNC(OPFUNC, OPEQ, CONSTRAINT)
#define BROADCAST_STD_FUNC(FUNC)
#define OP_FUNC(OPFUNC, OP, CONSTRAINT)
#define CLAMP(FNAME, REL)
#define INTEGRAL_ONLY
#define BOOL_ONLY
#define BROADCAST_STD_FUNC2(FUNC)
masked_type(mask_type _whether, simd_type &_whither)