Skip to content
Snippets Groups Projects
Select Git revision
  • 7ae31caaf746655674400265876c1dba3c4af4d2
  • master default protected
  • develop-GA
  • timeFstatmap
  • add-higher-spindown-components
  • develop-DK
  • adds-header-to-grid-search
  • v1.2
  • v1.1.2
  • v1.1.0
  • v1.0.1
11 results

cudaTransientFstatExpWindow.cu

Blame
  • Forked from Gregory Ashton / PyFstat
    98 commits behind the upstream repository.
    David Keitel's avatar
    David Keitel authored
     -optional import of pyCUDA package only if requested
     -including kernel .cu files in packaging
    7ae31caa
    History
    cudaTransientFstatExpWindow.cu 4.30 KiB
    __global__ void cudaTransientFstatExpWindow ( float *input,
                                                  unsigned int numAtoms,
                                                  unsigned int TAtom,
                                                  unsigned int t0_data,
                                                  unsigned int win_t0,
                                                  unsigned int win_dt0,
                                                  unsigned int win_tau,
                                                  unsigned int win_dtau,
                                                  unsigned int Fmn_rows,
                                                  unsigned int Fmn_cols,
                                                  float *Fmn
                                                )
    {
    
      /* match CUDA thread indexing and high-level (t0,tau) indexing */
      unsigned int m         = blockDim.x * blockIdx.x + threadIdx.x; // t0:  row
      unsigned int n         = blockDim.y * blockIdx.y + threadIdx.y; // tau: column
      /* unraveled 1D index for 2D output array */
      unsigned int outidx    = Fmn_cols * m + n;
    
      /* hardcoded copy from lalpulsar */
      unsigned int TRANSIENT_EXP_EFOLDING = 3;
    
      if ( (m < Fmn_rows) && (n < Fmn_cols) ) {
    
        /* compute Fstat-atom index i_t0 in [0, numAtoms) */
        unsigned int TAtomHalf = TAtom/2; // integer division
        unsigned int t0 = win_t0 + m * win_dt0;
        int i_tmp = ( t0 - t0_data + TAtomHalf ) / TAtom; // integer round: floor(x+0.5)
        if ( i_tmp < 0 ) {
            i_tmp = 0;
        }
        unsigned int i_t0 = (unsigned int)i_tmp;
        if ( i_t0 >= numAtoms ) {
            i_t0 = numAtoms - 1;
        }
    
        /* translate n into an atoms end-index for this search interval [t0, t0+Tcoh],
         * giving the index range of atoms to sum over
         */
        unsigned int tau = win_tau + n * win_dtau;
    
        /* get end-time t1 of this transient-window search
         * for given tau, what Tcoh should the exponential window cover?
         * for speed reasons we want to truncate Tcoh = tau * TRANSIENT_EXP_EFOLDING
         * with the e-folding factor chosen such that the window-value
         * is practically negligible after that, where it will be set to 0
         */
    //     unsigned int t1 = lround( win_t0 + TRANSIENT_EXP_EFOLDING * win_tau);
        unsigned int t1 = t0 + TRANSIENT_EXP_EFOLDING * tau;
    
        /* compute window end-time Fstat-atom index i_t1 in [0, numAtoms) */
        i_tmp = ( t1 - t0_data + TAtomHalf ) / TAtom  - 1; // integer round: floor(x+0.5)
        if ( i_tmp < 0 ) {
            i_tmp = 0;
        }
        unsigned int i_t1 = (unsigned int)i_tmp;
        if ( i_t1 >= numAtoms ) {
            i_t1 = numAtoms - 1;
        }
    
        /* now we have two valid atoms-indices [i_t0, i_t1]
         * spanning our Fstat-window to sum over
         */
    
        float Ad    = 0.0f;
        float Bd    = 0.0f;
        float Cd    = 0.0f;
        float Fa_re = 0.0f;
        float Fa_im = 0.0f;
        float Fb_re = 0.0f;
        float Fb_im = 0.0f;
    
        unsigned short input_cols = 7; // must match input matrix!
    
        /* sum up atoms */
        for ( unsigned int i=i_t0; i<=i_t1; i++ ) {
    
          unsigned int t_i = t0_data + i * TAtom;
    
          float win_i = 0.0;
          if ( t_i >= t0 && t_i <= t1 ) {
            float x = 1.0 * ( t_i - t0 ) / tau;
            win_i = exp ( -x );
          }
    
          float win2_i = win_i * win_i;
    
          Ad    += input[i*input_cols+0] * win2_i; // a2_alpha
          Bd    += input[i*input_cols+1] * win2_i; // b2_alpha
          Cd    += input[i*input_cols+2] * win2_i; // ab_alpha
          Fa_re += input[i*input_cols+3] * win_i; // Fa_alpha_re
          Fa_im += input[i*input_cols+4] * win_i; // Fa_alpha_im
          Fb_re += input[i*input_cols+5] * win_i; // Fb_alpha_re
          Fb_im += input[i*input_cols+6] * win_i; // Fb_alpha_im
    
        }
    
        /* get determinant */
        float Dd = ( Ad * Bd - Cd * Cd );
        float DdInv = 0.0f;
        /* safety catch as in XLALWeightMultiAMCoeffs():
         * make it so that in the end F=0 instead of -nan
         */
        if ( Dd > 0.0 ) {
          DdInv  = 1.0 / Dd;
        }
    
        /* from XLALComputeFstatFromFaFb */
        float F  = DdInv * (  Bd * ( Fa_re*Fa_re + Fa_im*Fa_im )
                            + Ad * ( Fb_re*Fb_re + Fb_im*Fb_im )
                            - 2.0 * Cd * ( Fa_re * Fb_re + Fa_im * Fb_im )
                           );
    
        /* store result in Fstat-matrix
         * at unraveled index of element {m,n}
         */
        Fmn[outidx] = F;
    
      } // ( (m < Fmn_rows) && (n < Fmn_cols) )
    
    } // cudaTransientFstatExpWindow()