convolve_8h_source.html

/************************************************************************/

/*                                                                      */

/*    vspline - a set of generic tools for creation and evaluation      */

/*              of uniform b-splines                                    */

/*                                                                      */

/*            Copyright 2015 - 2023 by Kay F. Jahnke                    */

/*                                                                      */

/*    The git repository for this software is at                        */

/*                                                                      */

/*    https://bitbucket.org/kfj/vspline                                 */

/*                                                                      */

/*    Please direct questions, bug reports, and contributions to        */

/*                                                                      */

/*    kfjahnke+vspline@gmail.com                                        */

/*                                                                      */

/*    Permission is hereby granted, free of charge, to any person       */

/*    obtaining a copy of this software and associated documentation    */

/*    files (the "Software"), to deal in the Software without           */

/*    restriction, including without limitation the rights to use,      */

/*    copy, modify, merge, publish, distribute, sublicense, and/or      */

/*    sell copies of the Software, and to permit persons to whom the    */

/*    Software is furnished to do so, subject to the following          */

/*    conditions:                                                       */

/*                                                                      */

/*    The above copyright notice and this permission notice shall be    */

/*    included in all copies or substantial portions of the             */

/*    Software.                                                         */

/*                                                                      */

/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */

/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */

/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */

/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */

/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */

/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */

/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */

/*    OTHER DEALINGS IN THE SOFTWARE.                                   */

/*                                                                      */

/************************************************************************/


/*! \file convolve.h


    \brief separable convolution of nD arrays


    This file provides the core filtering code for convolution, which

    can be used by itself to filter 1D arrays, or is used with the

    'wielding' code in filter.h to filter nD arrays. The latter use is

    what's used throughout most of vspline, since it provides automatic

    multithreading and vectorization by buffering the data and applying

    the 1D code to the buffer.


    The implementation of convolution in this file can safely operate

    in-place. The actual convolution operation is done using a small

    kernel-sized circular buffer, which is multiplied with an adequately

    shifted and rotated representation of the kernel. This is done

    avoiding conditionals as best as possible. The 1D data are extrapolated

    with one of the boundary condition codes known to class extrapolator

    (see extrapolate.h). This is done transparently by putting extrapolated

    data into the small circular buffer where this is needed.


    The code is trivial insofar as it only uses indexed assignments, addition

    and multiplication. So it can operate on a wide variety of data types,

    prominently among them SIMD vector types.


    Note how I use the kernel front-to-back, in the same forward sequence as

    the data it is applied to. This is contrary to the normal convention of

    using the kernel values back-to-front. Inside vspline, where only

    symmetrical kernels are used, this makes no difference, but when vpline's

    convolution code is used for other convolutions, this has to be kept in

    mind.

*/


#ifndef VSPLINE_CONVOLVE_H

#define VSPLINE_CONVOLVE_H


#include "common.h"

#include "extrapolate.h"

#include "filter.h"


namespace vspline {


/// fir_filter_specs holds the parameters for a filter performing

/// a convolution along a single axis. In vspline, the place where

/// the specifications for a filter are fixed and the place where

/// it is finally created are far apart: the filter is created

/// in the separate worker threads. So this structure serves as

/// a vehicle to transport the arguments.

/// Note the specification of 'headroom': this allows for

/// non-symmetrical and even kernels. When applying the kernel

/// to obtain output[i], the kernel is applied to

/// input [ i - headroom ] , ... , input [ i - headroom + ksize - 1 ]


struct fir_filter_specs

{

  vspline::bc_code bc ;     // boundary conditions

  int ksize ;               // kernel size

  int headroom ;            // part of kernel 'to the left'

  const xlf_type * kernel ; // pointer to kernel values


  fir_filter_specs ( vspline::bc_code _bc ,

                     int _ksize ,

                     int _headroom ,

                     const xlf_type * _kernel )

  : bc ( _bc ) ,

    ksize ( _ksize ) ,

    headroom ( _headroom ) ,

    kernel ( _kernel )

  {

    assert ( headroom < ksize ) ;

  } ;

} ;


/// class fir_filter provides the 'solve' routine which convolves

/// a 1D signal with selectable extrapolation. Here, the convolution

/// kernel is applied to the incoming signal and the result is written

/// to the specified output location. Note that this operation

/// can be done in-place, but input and output may also be different.

/// While most of the time this routine will be invoked by class

/// convolve (below), it is also directly used by the specialized

/// code for 1D filtering.

/// Note how we conveniently inherit from the specs class. This also

/// enables us to use an instance of fir_filter or class convolve

/// as specs argument to create further filters with the same arguments.


// TODO: some kernels are symmetric, which might be exploited.


// TODO: special code for filters with 0-valued coefficients, like

//       sinc-derived half band filters


template < typename in_type ,

           typename out_type = in_type ,

           typename _math_type = out_type >

struct fir_filter

: public fir_filter_specs

{

  // this filter type does not need storage of intermediate results.


  static const bool is_single_pass { true } ;


  typedef vigra::MultiArrayView < 1 , in_type > in_buffer_type ;

  typedef vigra::MultiArrayView < 1 , out_type > out_buffer_type ;

  typedef _math_type math_type ;


  // we put all state data into a single area of memory called 'reactor'.

  // The separate parts holding the small circular buffer, the repeated

  // kernel and the tail buffer are implemented as views to 'reactor'.

  // This way, all data participating in the arithmetics are as close

  // together in memory as possible.

  // note how the current implementation does therefore hold the kernel

  // values in the 'reactor' as simdized types (if math_type is simdized).

  // this may be suboptimal, since the kernel values might be supplied

  // as scalars and could be kept in a smaller area of memory.

  // TODO: investigate


  using allocator_t

  = typename vspline::allocator_traits < math_type > :: type ;


  vigra::MultiArray < 1 , math_type , allocator_t > reactor ;

  vigra::MultiArrayView < 1 , math_type > circular_buffer ;

  vigra::MultiArrayView < 1 , math_type > kernel_values ;

  vigra::MultiArrayView < 1 , math_type > tail_buffer ;


  fir_filter ( const fir_filter_specs & specs )

  : fir_filter_specs ( specs ) ,

    reactor ( vigra::Shape1 ( specs.ksize * 4 ) )

  {

    circular_buffer = reactor.subarray

      ( vigra::Shape1 ( 0 ) , vigra::Shape1 ( ksize ) ) ;


    kernel_values = reactor.subarray

      ( vigra::Shape1 ( ksize ) , vigra::Shape1 ( ksize * 3 ) ) ;


    tail_buffer = reactor.subarray

      ( vigra::Shape1 ( ksize * 3 ) , vigra::Shape1 ( ksize * 4 ) ) ;


    for ( int i = 0 ; i < ksize ; i++ )

      kernel_values [ i ] = kernel_values [ i + ksize ] = kernel [ i ] ;

  } ;


  /// calling code may have to set up buffers with additional

  /// space around the actual data to allow filtering code to

  /// 'run up' to the data, shedding margin effects in the

  /// process. We stay on the safe side and return the width

  /// of the whole kernel, which is always sufficient to

  /// provide safe runup.


  int get_support_width() const

  {

    return ksize ;

  }


  /// public 'solve' routine. This is for calls 'from outside',

  /// like when this object is used by itself, not as a base class

  /// of class convolve below.

  /// an extrapolator for the boundary condition code 'bc'

  /// (see fir_filter_specs) is made, then the call is delegated

  /// to the protected routine below which accepts an extrapolator

  /// on top of input and output.


  void solve ( const in_buffer_type & input ,

               out_buffer_type & output )

  {

    int size = output.size() ;

    extrapolator < in_buffer_type > source ( bc , input ) ;

    solve ( input , output , source ) ;

  }


protected:


  /// protected solve routine taking an extrapolator on top of

  /// input and output. This way, the derived class (class convolve)

  /// can maintain an extrapolator fixed to it's buffer and reuse

  /// it for subsequent calls to this routine.

  /// we use the following strategy:

  /// - keep a small circular buffer as large as the kernel

  /// - have two kernels concatenated in another buffer

  /// - by pointing into the concatenated kernels, we can always

  ///   have ksize kernel values in sequence so that this sequence

  ///   is correct for the values in the circular buffer.

  /// this strategy avoids conditionals as best as possible and

  /// should be easy to optimize. the actual code is a bit more

  /// complex to account for the fact that at the beginning and

  /// end of the data, a few extrapolated values are used. The

  /// central loop can directly read from input without using the

  /// extrapolator, which is most efficient.


  void solve ( const in_buffer_type & input ,

               out_buffer_type & output ,

               const extrapolator < in_buffer_type > & source )

  {

    if ( ksize < 1 )

    {

      // if kernel size is zero or even negative, then,

      // if operation isn't in-place, copy input to output


      if ( (void*) ( input.data() ) != (void*) ( output.data() ) )

      {

        for ( std::ptrdiff_t i = 0 ; i < output.size() ; i++ )

          output[i] = out_type ( input[i] ) ;

      }


      return ; // we're done prematurely

    }

    else if ( ksize == 1 )

    {

      // for kernel size 1 we perform the multiplication of the

      // single kernel value with the input in a simple loop without

      // using the circular buffering mechanism below. This is an

      // optimization, the circular buffer code can also handle

      // single-value kernels.


      math_type factor ( kernel[0] ) ;


      for ( std::ptrdiff_t i = 0 ; i < output.size() ; i++ )

        output[i] = out_type ( factor * math_type ( input[i] ) ) ;


      return ; // we're done prematurely

    }


    int si = - headroom ; // read position

    int ti = 0 ;          // store position


    // initialize circular buffer using the extrapolator

    // note: initially I coded to fetch only the first 'headroom'

    // values from the extrapolator, then up to ksize straight

    // from 'input'. but this is *not* correct: 'input' may by

    // very small, and with a large kernel we also need the

    // extrapolator further on after the input is already

    // consumed. So this is the correct way of doing it:


    for ( int i = 0 ; i < ksize ; i++ , si++ )

      circular_buffer[i] = source ( si ) ;


    // see how many full cycles we can run, directly accessing

    // 'input' without resorting to extrapolation


    int size = output.size() ;

    int leftover = size - si ;

    int full_cycles = 0 ;

    if ( leftover > 0 )

      full_cycles = leftover / ksize ;


    // stash the trailing extrapolated values: we want to be able

    // to operate in-place, and if we write to the buffer we can't

    // use the extrapolator over it anymore. note how we only fill

    // in ksize - headroom values. this is all we'll need, the buffer

    // may be slightly larger.


    int ntail = ksize - headroom ;

    int z = size ;

    for ( int i = 0 ; i < ntail ; i++ , z++ )

      tail_buffer[i] = source ( z ) ;


    // central loop, reading straight from input without extrapolation


    for ( int cycle = 0 ; cycle < full_cycles ; cycle++ )

    {

      auto p_kernel = kernel_values.data() + ksize ;

      auto p_data = circular_buffer.data() ;


      for ( int i = 0 ; i < ksize ; )

      {

        // perform the actual convolution

        // TODO: exploit symmetry


        math_type result = circular_buffer[0] * p_kernel[0] ;


      // KFJ 2019-02-12 tentative use of fma


#ifdef USE_FMA

        for ( int j = 1 ; j < ksize ; j++ )

          result = fma ( circular_buffer[j] , p_kernel[j] , result ) ;

#else

        for ( int j = 1 ; j < ksize ; j++ )

          result += circular_buffer[j] * p_kernel[j] ;

#endif


        // stash result


        output [ ti ] = out_type ( result ) ;


        // fetch next input value


        * p_data = input [ si ] ;


        // adjust pointers and indices


        ++ si ;

        ++ ti ;

        ++ i ;


        if ( i == ksize )

          break ;


        ++ p_data ;

        -- p_kernel ;

      }

    }


    // produce the last few values, resorting to tail_buffer

    // where it is necessary


    while ( ti < size )

    {

      auto p_kernel = kernel_values.data() + ksize ;

      auto p_data = circular_buffer.data() ;


      for ( int i = 0 ; i < ksize && ti < size ; i++ )

      {

        math_type result = circular_buffer[0] * p_kernel[0] ;

        for ( int j = 1 ; j < ksize ; j++ )

          result += circular_buffer[j] * p_kernel[j] ;


        output [ ti ] = out_type ( result ) ;


        if ( si < size )

          // still sweet

          * p_data = input [ si ] ;

        else

          // input used up, use stashed extrapolated values

          * p_data = tail_buffer [ si - size ] ;


        ++ si ;

        ++ ti ;


        ++ p_data ;

        -- p_kernel ;

      }

    }

  }

} ;


/// class convolve provides the combination of class fir_filter

/// above with a vector-friendly buffer. Calling code provides

/// information about what should be buffered, the data are sucked

/// into the buffer, filtered, and moved back from there.

/// The operation is orchestrated by the code in filter.h, which

/// is also used to 'wield' the b-spline prefilter. Both operations

/// are sufficiently similar to share the wielding code.


template < template < typename , size_t > class _vtype ,

           typename _math_ele_type ,

           size_t _vsize >

struct convolve

: public buffer_handling < _vtype , _math_ele_type , _vsize > ,

  public vspline::fir_filter < _vtype < _math_ele_type , _vsize > >

{

  // provide this type for queries


  typedef _math_ele_type math_ele_type ;


  // we'll use a few types from the buffer_handling type


  typedef buffer_handling < _vtype , _math_ele_type , _vsize >

    buffer_handling_type ;


  using typename buffer_handling_type::vtype ;

  using buffer_handling_type::vsize ;

  using buffer_handling_type::init ;


  // instances of class convolve hold the buffer as state:


  using allocator_t

  = typename vspline::allocator_traits < vtype > :: type ;


  typedef vigra::MultiArray < 1 ,  vtype , allocator_t > buffer_type ;

  typedef vigra::MultiArrayView < 1 ,  vtype > buffer_view_type ;


  buffer_type buffer ;


  // and also an extrapolator, which is fixed to the buffer


  extrapolator < buffer_view_type > buffer_extrapolator ;


  // the filter's 'solve' routine has the workhorse code to filter

  // the data inside the buffer:


  typedef _vtype < _math_ele_type , _vsize > simdized_math_type ;

  typedef vspline::fir_filter < simdized_math_type > filter_type ;

  using filter_type::solve ;

  using filter_type::headroom ;


  // by defining arg_type, we allow code to infer what type of

  // initializer ('specs') the filter takes


  typedef fir_filter_specs arg_type ;


  // the constructor invokes the filter's constructor,

  // sets up the buffer and initializes the buffer_handling

  // component to use the whole buffer to accept incoming and

  // provide outgoing data.


  convolve ( const fir_filter_specs & specs , size_t size )

  : filter_type ( specs ) ,

    buffer ( size ) ,

    buffer_extrapolator ( specs.bc , buffer )

  {

    init ( buffer , buffer ) ;

  } ;


  // operator() simply delegates to the filter's 'solve' routine,

  // which filters the data in the buffer. Note how the solve

  // overload accepting an extrapolator is used: the extrapolator

  // remains the same, so there's no point creating a new one

  // with every call.


  void operator() ()

  {

    solve ( buffer , buffer , buffer_extrapolator ) ;

  }


  // factory function to provide a filter with the same set of

  // parameters, but possibly different data types. this is used

  // for processing of 1D data, where the normal buffering mechanism

  // may be sidestepped.


  template < typename in_type ,

             typename out_type = in_type ,

             typename math_type = out_type >

  static vspline::fir_filter < in_type , out_type , math_type >

         get_raw_filter ( const fir_filter_specs & specs )

  {

    return vspline::fir_filter < in_type , out_type , math_type >

           ( specs ) ;

  }


} ;


} ; // namespace vspline


#endif // VSPLINE_CONVOLVE_H

vspline::buffer_handling
buffer_handling provides services needed for interfacing with a buffer of simdized/goading data....
Definition: filter.h:227

vspline::buffer_handling< _vtype, _math_ele_type, _vsize >::vsize
@ vsize
Definition: filter.h:230

vspline::buffer_handling< _vtype, _math_ele_type, _vsize >::vtype
_vtype< dtype, vsize > vtype
Definition: filter.h:232

vspline::buffer_handling< _vtype, _math_ele_type, _vsize >::init
void init(vigra::MultiArrayView< 1, vtype > &_in_window, vigra::MultiArrayView< 1, vtype > &_out_window)
Definition: filter.h:237

common.h
definitions common to all files in this project, utility code

extrapolate.h
extrapolation of 1D data sets with specific boundary conditions

filter.h
generic implementation of separable filtering for nD arrays

vspline
Definition: basis.h:79

vspline::xlf_type
long double xlf_type
Definition: common.h:102

vspline::bc_code
bc_code
This enumeration is used for codes connected to boundary conditions. There are two aspects to boundar...
Definition: common.h:71

vspline::allocator_traits
vspline creates vigra::MultiArrays of vectorized types. As long as the vectorized types are Vc::SimdA...
Definition: common.h:267

vspline::convolve
class convolve provides the combination of class fir_filter above with a vector-friendly buffer....
Definition: convolve.h:387

vspline::convolve::arg_type
fir_filter_specs arg_type
Definition: convolve.h:426

vspline::convolve::buffer_extrapolator
extrapolator< buffer_view_type > buffer_extrapolator
Definition: convolve.h:413

vspline::convolve::buffer
buffer_type buffer
Definition: convolve.h:409

vspline::convolve::buffer_view_type
vigra::MultiArrayView< 1, vtype > buffer_view_type
Definition: convolve.h:407

vspline::convolve::convolve
convolve(const fir_filter_specs &specs, size_t size)
Definition: convolve.h:433

vspline::convolve::operator()
void operator()()
Definition: convolve.h:447

vspline::convolve::solve
void solve(const in_buffer_type &input, out_buffer_type &output)
public 'solve' routine. This is for calls 'from outside', like when this object is used by itself,...
Definition: convolve.h:200

vspline::convolve::buffer_handling_type
buffer_handling< _vtype, _math_ele_type, _vsize > buffer_handling_type
Definition: convolve.h:395

vspline::convolve::allocator_t
typename vspline::allocator_traits< vtype > ::type allocator_t
Definition: convolve.h:404

vspline::convolve::simdized_math_type
_vtype< _math_ele_type, _vsize > simdized_math_type
Definition: convolve.h:418

vspline::convolve::buffer_type
vigra::MultiArray< 1, vtype, allocator_t > buffer_type
Definition: convolve.h:406

vspline::convolve::math_ele_type
_math_ele_type math_ele_type
Definition: convolve.h:390

vspline::convolve::get_raw_filter
static vspline::fir_filter< in_type, out_type, math_type > get_raw_filter(const fir_filter_specs &specs)
Definition: convolve.h:461

vspline::convolve::filter_type
vspline::fir_filter< simdized_math_type > filter_type
Definition: convolve.h:419

vspline::extrapolator
struct extrapolator is a helper class providing extrapolated values for a 1D buffer indexed with poss...
Definition: extrapolate.h:70

vspline::fir_filter_specs
fir_filter_specs holds the parameters for a filter performing a convolution along a single axis....
Definition: convolve.h:93

vspline::fir_filter_specs::fir_filter_specs
fir_filter_specs(vspline::bc_code _bc, int _ksize, int _headroom, const xlf_type *_kernel)
Definition: convolve.h:99

vspline::fir_filter_specs::ksize
int ksize
Definition: convolve.h:95

vspline::fir_filter_specs::headroom
int headroom
Definition: convolve.h:96

vspline::fir_filter_specs::kernel
const xlf_type * kernel
Definition: convolve.h:97

vspline::fir_filter_specs::bc
vspline::bc_code bc
Definition: convolve.h:94

vspline::fir_filter
class fir_filter provides the 'solve' routine which convolves a 1D signal with selectable extrapolati...
Definition: convolve.h:134

vspline::fir_filter::math_type
_math_type math_type
Definition: convolve.h:141

vspline::fir_filter::is_single_pass
static const bool is_single_pass
Definition: convolve.h:137

vspline::fir_filter::circular_buffer
vigra::MultiArrayView< 1, math_type > circular_buffer
Definition: convolve.h:158

vspline::fir_filter::kernel_values
vigra::MultiArrayView< 1, math_type > kernel_values
Definition: convolve.h:159

vspline::fir_filter::get_support_width
int get_support_width() const
calling code may have to set up buffers with additional space around the actual data to allow filteri...
Definition: convolve.h:187

vspline::fir_filter::reactor
vigra::MultiArray< 1, math_type, allocator_t > reactor
Definition: convolve.h:157

vspline::fir_filter::solve
void solve(const in_buffer_type &input, out_buffer_type &output)
public 'solve' routine. This is for calls 'from outside', like when this object is used by itself,...
Definition: convolve.h:200

vspline::fir_filter::out_buffer_type
vigra::MultiArrayView< 1, out_type > out_buffer_type
Definition: convolve.h:140

vspline::fir_filter::allocator_t
typename vspline::allocator_traits< math_type > ::type allocator_t
Definition: convolve.h:155

vspline::fir_filter::solve
void solve(const in_buffer_type &input, out_buffer_type &output, const extrapolator< in_buffer_type > &source)
protected solve routine taking an extrapolator on top of input and output. This way,...
Definition: convolve.h:227

vspline::fir_filter::in_buffer_type
vigra::MultiArrayView< 1, in_type > in_buffer_type
Definition: convolve.h:139

vspline::fir_filter::tail_buffer
vigra::MultiArrayView< 1, math_type > tail_buffer
Definition: convolve.h:160

vspline::fir_filter::fir_filter
fir_filter(const fir_filter_specs &specs)
Definition: convolve.h:163