tcw_fstat_map_funcs.py 19.5 KB
Newer Older
1
2
""" Additional helper functions dealing with transient-CW F(t0,tau) maps """

3
4
import numpy as np
import os
5
6
7
8
9
10
import logging

# optional imports
import importlib as imp


David Keitel's avatar
David Keitel committed
11
def _optional_import ( modulename, shorthand=None ):
12
13
14
15
16
    '''
    Import a module/submodule only if it's available.

    using importlib instead of __import__
    because the latter doesn't handle sub.modules
17
18
19

    Also including a special check to fail more gracefully
    when CUDA_DEVICE is set to too high a number.
20
    '''
21

22
23
24
25
26
    if shorthand is None:
        shorthand    = modulename
        shorthandbit = ''
    else:
        shorthandbit = ' as '+shorthand
27

28
29
30
31
32
33
34
35
36
37
38
    try:
        globals()[shorthand] = imp.import_module(modulename)
        logging.debug('Successfully imported module %s%s.'
                      % (modulename, shorthandbit))
        success = True
    except ImportError, e:
        if e.message == 'No module named '+modulename:
            logging.debug('No module {:s} found.'.format(modulename))
            success = False
        else:
            raise
39

40
41
42
    return success


43
44
45
46
47
48
49
50
51
52
53
54
55
56
class pyTransientFstatMap(object):
    '''
    simplified object class for a F(t0,tau) F-stat map (not 2F!)
    based on LALSuite's transientFstatMap_t type
    replacing the gsl matrix with a numpy array

    F_mn:   2D array of 2F values
    maxF:   maximum of F (not 2F!)
    t0_ML:  maximum likelihood transient start time t0 estimate
    tau_ML: maximum likelihood transient duration tau estimate
    '''

    def __init__(self, N_t0Range, N_tauRange):
        self.F_mn   = np.zeros((N_t0Range, N_tauRange), dtype=np.float32)
David Keitel's avatar
David Keitel committed
57
58
59
60
61
        # Initializing maxF to a negative value ensures
        # that we always update at least once and hence return
        # sane t0_d_ML, tau_d_ML
        # even if there is only a single bin where F=0 happens.
        self.maxF   = float(-1.0)
62
63
64
65
        self.t0_ML  = float(0.0)
        self.tau_ML = float(0.0)


66
67
68
69
70
71
# dictionary of the actual callable F-stat map functions we support,
# if the corresponding modules are available.
fstatmap_versions = {
                     'lal':    lambda multiFstatAtoms, windowRange:
                               getattr(lalpulsar,'ComputeTransientFstatMap')
                                ( multiFstatAtoms, windowRange, False ),
72
73
74
                     'pycuda': lambda multiFstatAtoms, windowRange:
                               pycuda_compute_transient_fstat_map
                                ( multiFstatAtoms, windowRange )
75
76
77
                    }


78
def init_transient_fstat_map_features ( wantCuda=False, cudaDeviceName=None ):
79
80
81
82
83
84
85
    '''
    Initialization of available modules (or "features") for F-stat maps.

    Returns a dictionary of method names, to match fstatmap_versions
    each key's value set to True only if
    all required modules are importable on this system.
    '''
86

87
    features = {}
88

David Keitel's avatar
David Keitel committed
89
90
    have_lal           = _optional_import('lal')
    have_lalpulsar     = _optional_import('lalpulsar')
91
    features['lal']    = have_lal and have_lalpulsar
92
93

    # import GPU features
David Keitel's avatar
David Keitel committed
94
95
96
97
98
99
100
    have_pycuda          = _optional_import('pycuda')
    have_pycuda_drv      = _optional_import('pycuda.driver', 'drv')
    have_pycuda_gpuarray = _optional_import('pycuda.gpuarray', 'gpuarray')
    have_pycuda_tools    = _optional_import('pycuda.tools', 'cudatools')
    have_pycuda_compiler = _optional_import('pycuda.compiler', 'cudacomp')
    features['pycuda']   = ( have_pycuda_drv and have_pycuda_gpuarray and
                            have_pycuda_tools and have_pycuda_compiler )
101

102
103
    logging.debug('Got the following features for transient F-stat maps:')
    logging.debug(features)
104

105
    if wantCuda and features['pycuda']:
David Keitel's avatar
David Keitel committed
106
        logging.debug('CUDA version: '+'.'.join(map(str,drv.get_version())))
107

108
        drv.init()
David Keitel's avatar
David Keitel committed
109
110
        logging.debug('Starting with default pyCUDA context,' \
                      ' then checking all available devices...')
111
112
113
114
115
116
117
118
119
120
121
        try:
            context0 = pycuda.tools.make_default_context()
        except pycuda._driver.LogicError, e:
            if e.message == 'cuDeviceGet failed: invalid device ordinal':
                devn = int(os.environ['CUDA_DEVICE'])
                raise RuntimeError('Requested CUDA device number {} exceeds' \
                                   ' number of available devices!' \
                                   ' Please change through environment' \
                                   ' variable $CUDA_DEVICE.'.format(devn))
            else:
                raise pycuda._driver.LogicError(e.message)
122

123
124
125
126
        num_gpus = drv.Device.count()
        logging.debug('Found {} CUDA device(s).'.format(num_gpus))

        devices = []
127
        devnames = np.empty(num_gpus,dtype='S32')
128
        for n in range(num_gpus):
129
130
131
            devn = drv.Device(n)
            devices.append(devn)
            devnames[n] = devn.name().replace(' ','-').replace('_','-')
David Keitel's avatar
David Keitel committed
132
133
            logging.debug('device {}: model: {}, RAM: {}MB'.format(
                n, devnames[n], devn.total_memory()/(2.**20) ))
134

135
        if 'CUDA_DEVICE' in os.environ:
136
137
138
139
            devnum0 = int(os.environ['CUDA_DEVICE'])
        else:
            devnum0 = 0

140
        matchbit = ''
141
        if cudaDeviceName:
142
            # allow partial matches in device names
David Keitel's avatar
David Keitel committed
143
144
            devmatches = [devidx for devidx, devname in enumerate(devnames)
                          if cudaDeviceName in devname]
145
146
            if len(devmatches) == 0:
                context0.detach()
David Keitel's avatar
David Keitel committed
147
148
149
                raise RuntimeError('Requested CUDA device "{}" not found.' \
                                   ' Available devices: [{}]'.format(
                                      cudaDeviceName,','.join(devnames)))
150
151
152
            else:
                devnum = devmatches[0]
                if len(devmatches) > 1:
David Keitel's avatar
David Keitel committed
153
154
155
                    logging.warning('Found {} CUDA devices matching name "{}".' \
                                    ' Choosing first one with index {}.'.format(
                                        len(devmatches),cudaDeviceName,devnum))
156
            os.environ['CUDA_DEVICE'] = str(devnum)
157
            matchbit =  '(matched to user request "{}")'.format(cudaDeviceName)
158
        elif 'CUDA_DEVICE' in os.environ:
159
160
161
            devnum = int(os.environ['CUDA_DEVICE'])
        else:
            devnum = 0
162
        devn = devices[devnum]
David Keitel's avatar
David Keitel committed
163
164
165
        logging.info('Choosing CUDA device {},' \
                     ' of {} devices present: {}{}...'.format(
                         devnum, num_gpus, devn.name(), matchbit))
166
167
168
169
170
171
172
        if devnum == devnum0:
            gpu_context = context0
        else:
            context0.pop()
            gpu_context = pycuda.tools.make_default_context()
            gpu_context.push()

David Keitel's avatar
David Keitel committed
173
        _print_GPU_memory_MB('Available')
174
175
    else:
        gpu_context = None
176

177
    return features, gpu_context
178
179


David Keitel's avatar
David Keitel committed
180
181
182
183
def call_compute_transient_fstat_map ( version,
                                       features,
                                       multiFstatAtoms=None,
                                       windowRange=None ):
184
185
186
187
188
189
    '''Choose which version of the ComputeTransientFstatMap function to call.'''

    if version in fstatmap_versions:
        if features[version]:
            FstatMap = fstatmap_versions[version](multiFstatAtoms, windowRange)
        else:
David Keitel's avatar
David Keitel committed
190
191
            raise Exception('Required module(s) for transient F-stat map' \
                            ' method "{}" not available!'.format(version))
192
    else:
David Keitel's avatar
David Keitel committed
193
194
        raise Exception('Transient F-stat map method "{}"' \
                        ' not implemented!'.format(version))
195
    return FstatMap
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228


def reshape_FstatAtomsVector ( atomsVector ):
    '''
    Make a dictionary of ndarrays out of a atoms "vector" structure.

    The input is a "vector"-like structure with times as the higher hierarchical
    level and a set of "atoms" quantities defined at each timestamp.
    The output is a dictionary with an entry for each quantity,
    which is a 1D ndarray over timestamps for that one quantity.
    '''

    numAtoms = atomsVector.length
    atomsDict = {}
    atom_fieldnames = ['timestamp', 'Fa_alpha', 'Fb_alpha',
                       'a2_alpha', 'ab_alpha', 'b2_alpha']
    atom_dtypes     = [np.uint32, complex, complex,
                       np.float32, np.float32, np.float32]
    for f, field in enumerate(atom_fieldnames):
        atomsDict[field] = np.ndarray(numAtoms,dtype=atom_dtypes[f])

    for n,atom in enumerate(atomsVector.data):
        for field in atom_fieldnames:
            atomsDict[field][n] = atom.__getattribute__(field)

    atomsDict['Fa_alpha_re'] = np.float32(atomsDict['Fa_alpha'].real)
    atomsDict['Fa_alpha_im'] = np.float32(atomsDict['Fa_alpha'].imag)
    atomsDict['Fb_alpha_re'] = np.float32(atomsDict['Fb_alpha'].real)
    atomsDict['Fb_alpha_im'] = np.float32(atomsDict['Fb_alpha'].imag)

    return atomsDict


David Keitel's avatar
David Keitel committed
229
def _get_absolute_kernel_path ( kernel ):
230
231
232
233
234
    pyfstatdir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
    kernelfile = kernel + '.cu'
    return os.path.join(pyfstatdir,'pyCUDAkernels',kernelfile)


David Keitel's avatar
David Keitel committed
235
236
237
238
239
240
241
def _print_GPU_memory_MB ( key ):
    mem_used_MB  = drv.mem_get_info()[0]/(2.**20)
    mem_total_MB = drv.mem_get_info()[1]/(2.**20)
    logging.debug('{} GPU memory: {:.4f} / {:.4f} MB free'.format(
                      key, mem_used_MB, mem_total_MB))


242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
def pycuda_compute_transient_fstat_map ( multiFstatAtoms, windowRange ):
    '''
    GPU version of the function to compute transient-window "F-statistic map"
    over start-time and timescale {t0, tau}.
    Based on XLALComputeTransientFstatMap from LALSuite,
    (C) 2009 Reinhard Prix, licensed under GPL

    Returns a 2D matrix F_mn,
    with m = index over start-times t0,
    and  n = index over timescales tau,
    in steps of dt0  in [t0,  t0+t0Band],
    and         dtau in [tau, tau+tauBand]
    as defined in windowRange input.
    '''

    if ( windowRange.type >= lalpulsar.TRANSIENT_LAST ):
David Keitel's avatar
David Keitel committed
258
259
260
        raise ValueError ('Unknown window-type ({}) passed as input.' \
                          ' Allowed are [0,{}].'.format(
                              windowRange.type, lalpulsar.TRANSIENT_LAST-1))
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

    # internal dict for search/setup parameters
    tCWparams = {}

    # first combine all multi-atoms
    # into a single atoms-vector with *unique* timestamps
    tCWparams['TAtom'] = multiFstatAtoms.data[0].TAtom
    TAtomHalf          = int(tCWparams['TAtom']/2) # integer division
    atoms = lalpulsar.mergeMultiFstatAtomsBinned ( multiFstatAtoms,
                                                   tCWparams['TAtom'] )

    # make a combined input matrix of all atoms vectors, for transfer to GPU
    tCWparams['numAtoms'] = atoms.length
    atomsDict = reshape_FstatAtomsVector(atoms)
    atomsInputMatrix = np.column_stack ( (atomsDict['a2_alpha'],
                                          atomsDict['b2_alpha'],
                                          atomsDict['ab_alpha'],
                                          atomsDict['Fa_alpha_re'],
                                          atomsDict['Fa_alpha_im'],
                                          atomsDict['Fb_alpha_re'],
                                          atomsDict['Fb_alpha_im'])
                                       )

    # actual data spans [t0_data, t0_data + tCWparams['numAtoms'] * TAtom]
    # in steps of TAtom
    tCWparams['t0_data'] = int(atoms.data[0].timestamp)
David Keitel's avatar
David Keitel committed
287
288
    tCWparams['t1_data'] = int(atoms.data[tCWparams['numAtoms']-1].timestamp
                               + tCWparams['TAtom'])
289

David Keitel's avatar
David Keitel committed
290
291
292
293
294
295
296
    logging.debug('Transient F-stat map:' \
                  ' t0_data={:d}, t1_data={:d}'.format(
                      tCWparams['t0_data'], tCWparams['t1_data']))
    logging.debug('Transient F-stat map:' \
                  ' numAtoms={:d}, TAtom={:d},' \
                  ' TAtomHalf={:d}'.format(
                      tCWparams['numAtoms'], tCWparams['TAtom'], TAtomHalf))
297
298
299
300
301
302
303
304
305
306
307
308

    # special treatment of window_type = none
    # ==> replace by rectangular window spanning all the data
    if ( windowRange.type == lalpulsar.TRANSIENT_NONE ):
        windowRange.type    = lalpulsar.TRANSIENT_RECTANGULAR
        windowRange.t0      = tCWparams['t0_data']
        windowRange.t0Band  = 0
        windowRange.dt0     = tCWparams['TAtom'] # irrelevant
        windowRange.tau     = tCWparams['numAtoms'] * tCWparams['TAtom']
        windowRange.tauBand = 0;
        windowRange.dtau    = tCWparams['TAtom'] # irrelevant

David Keitel's avatar
David Keitel committed
309
310
311
312
    """ NOTE: indices {i,j} enumerate *actual* atoms and their timestamps t_i,
    * while the indices {m,n} enumerate the full grid of values
    * in [t0_min, t0_max]x[Tcoh_min, Tcoh_max] in steps of deltaT.
    * This allows us to deal with gaps in the data in a transparent way.
313
    *
David Keitel's avatar
David Keitel committed
314
315
316
317
    * NOTE2: we operate on the 'binned' atoms returned
    * from XLALmergeMultiFstatAtomsBinned(),
    * which means we can safely assume all atoms to be lined up
    * perfectly on a 'deltaT' binned grid.
318
319
    *
    * The mapping used will therefore be {i,j} -> {m,n}:
David Keitel's avatar
David Keitel committed
320
321
322
323
    *   m = offs_i  / deltaT
    *   start-time offset from t0_min measured in deltaT
    *   n = Tcoh_ij / deltaT
    *   duration Tcoh_ij measured in deltaT,
324
325
326
327
328
329
330
331
    *
    * where
    *   offs_i  = t_i - t0_min
    *   Tcoh_ij = t_j - t_i + deltaT
    *
    """

    # We allocate a matrix  {m x n} = t0Range * TcohRange elements
David Keitel's avatar
David Keitel committed
332
333
334
335
336
337
338
339
340
341
342
343
344
    # covering the full transient window-range [t0,t0+t0Band]x[tau,tau+tauBand]
    tCWparams['N_t0Range']  = int(np.floor( 1.0*windowRange.t0Band /
                                            windowRange.dt0 ) + 1)
    tCWparams['N_tauRange'] = int(np.floor( 1.0*windowRange.tauBand /
                                            windowRange.dtau ) + 1)
    FstatMap = pyTransientFstatMap ( tCWparams['N_t0Range'],
                                     tCWparams['N_tauRange'] )

    logging.debug('Transient F-stat map:' \
                  ' N_t0Range={:d}, N_tauRange={:d},' \
                  ' total grid points: {:d}'.format(
                      tCWparams['N_t0Range'], tCWparams['N_tauRange'],
                      tCWparams['N_t0Range']*tCWparams['N_tauRange']))
345
346

    if ( windowRange.type == lalpulsar.TRANSIENT_RECTANGULAR ):
David Keitel's avatar
David Keitel committed
347
348
        FstatMap.F_mn = pycuda_compute_transient_fstat_map_rect (
                           atomsInputMatrix, windowRange, tCWparams )
349
    elif ( windowRange.type == lalpulsar.TRANSIENT_EXPONENTIAL ):
David Keitel's avatar
David Keitel committed
350
351
        FstatMap.F_mn = pycuda_compute_transient_fstat_map_exp (
                           atomsInputMatrix, windowRange, tCWparams )
352
    else:
David Keitel's avatar
David Keitel committed
353
354
355
356
        raise ValueError('Invalid transient window type {}' \
                         ' not in [{}, {}].'.format(
                            windowRange.type, lalpulsar.TRANSIENT_NONE,
                            lalpulsar.TRANSIENT_LAST-1))
357
358
359
360
361
362
363
364
365

    # out of loop: get max2F and ML estimates over the m x n matrix
    FstatMap.maxF = FstatMap.F_mn.max()
    maxidx = np.unravel_index ( FstatMap.F_mn.argmax(),
                               (tCWparams['N_t0Range'],
                                tCWparams['N_tauRange']))
    FstatMap.t0_ML  = windowRange.t0  + maxidx[0] * windowRange.dt0
    FstatMap.tau_ML = windowRange.tau + maxidx[1] * windowRange.dtau

David Keitel's avatar
David Keitel committed
366
367
368
    logging.debug('Done computing transient F-stat map.' \
                  ' maxF={:.4f}, t0_ML={}, tau_ML={}'.format(
                      FstatMap.maxF , FstatMap.t0_ML, FstatMap.tau_ML))
369
370
371
372

    return FstatMap


David Keitel's avatar
David Keitel committed
373
374
375
376
377
378
379
def pycuda_compute_transient_fstat_map_rect ( atomsInputMatrix,
                                              windowRange,
                                              tCWparams ):
    '''
    only GPU-parallizing outer loop,
    keeping partial sums with memory in kernel
    '''
380
381

    # gpu data setup and transfer
David Keitel's avatar
David Keitel committed
382
    _print_GPU_memory_MB('Initial')
383
    input_gpu = gpuarray.to_gpu ( atomsInputMatrix )
David Keitel's avatar
David Keitel committed
384
385
386
387
    Fmn_gpu   = gpuarray.GPUArray ( (tCWparams['N_t0Range'],
                                     tCWparams['N_tauRange']),
                                    dtype=np.float32 )
    _print_GPU_memory_MB('After input+output allocation:')
388
389
390

    # GPU kernel
    kernel = 'cudaTransientFstatRectWindow'
David Keitel's avatar
David Keitel committed
391
    kernelfile = _get_absolute_kernel_path ( kernel )
392
393
394
395
396
397
398
399
400
401
402
    partial_Fstat_cuda_code = cudacomp.SourceModule(open(kernelfile,'r').read())
    partial_Fstat_cuda = partial_Fstat_cuda_code.get_function(kernel)
    partial_Fstat_cuda.prepare('PIIIIIIIIP')

    # GPU grid setup
    blockRows = min(1024,tCWparams['N_t0Range'])
    blockCols = 1
    gridRows  = int(np.ceil(1.0*tCWparams['N_t0Range']/blockRows))
    gridCols  = 1

    # running the kernel
David Keitel's avatar
David Keitel committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
    logging.debug('Calling pyCUDA kernel with a grid of {}*{}={} blocks' \
                  ' of {}*{}={} threads each: {} total threads...'.format(
                      gridRows, gridCols, gridRows*gridCols,
                      blockRows, blockCols, blockRows*blockCols,
                      gridRows*gridCols*blockRows*blockCols))
    partial_Fstat_cuda.prepared_call ( (gridRows,gridCols),
                                       (blockRows,blockCols,1),
                                       input_gpu.gpudata,
                                       tCWparams['numAtoms'],
                                       tCWparams['TAtom'],
                                       tCWparams['t0_data'],
                                       windowRange.t0, windowRange.dt0,
                                       windowRange.tau, windowRange.dtau,
                                       tCWparams['N_tauRange'],
                                       Fmn_gpu.gpudata )
418
419
420
421

    # return results to host
    F_mn = Fmn_gpu.get()

David Keitel's avatar
David Keitel committed
422
    _print_GPU_memory_MB('Final')
423
424
425
426

    return F_mn


David Keitel's avatar
David Keitel committed
427
428
429
def pycuda_compute_transient_fstat_map_exp ( atomsInputMatrix,
                                             windowRange,
                                             tCWparams ):
430
431
432
    '''exponential window, inner and outer loop GPU-parallelized'''

    # gpu data setup and transfer
David Keitel's avatar
David Keitel committed
433
    _print_GPU_memory_MB('Initial')
434
    input_gpu = gpuarray.to_gpu ( atomsInputMatrix )
David Keitel's avatar
David Keitel committed
435
436
437
438
    Fmn_gpu   = gpuarray.GPUArray ( (tCWparams['N_t0Range'],
                                     tCWparams['N_tauRange']),
                                    dtype=np.float32 )
    _print_GPU_memory_MB('After input+output allocation:')
439
440
441

    # GPU kernel
    kernel = 'cudaTransientFstatExpWindow'
David Keitel's avatar
David Keitel committed
442
    kernelfile = _get_absolute_kernel_path ( kernel )
443
444
445
446
447
448
449
450
451
452
453
    partial_Fstat_cuda_code = cudacomp.SourceModule(open(kernelfile,'r').read())
    partial_Fstat_cuda = partial_Fstat_cuda_code.get_function(kernel)
    partial_Fstat_cuda.prepare('PIIIIIIIIIP')

    # GPU grid setup
    blockRows = min(32,tCWparams['N_t0Range'])
    blockCols = min(32,tCWparams['N_tauRange'])
    gridRows  = int(np.ceil(1.0*tCWparams['N_t0Range']/blockRows))
    gridCols  = int(np.ceil(1.0*tCWparams['N_tauRange']/blockCols))

    # running the kernel
David Keitel's avatar
David Keitel committed
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
    logging.debug('Calling kernel with a grid of {}*{}={} blocks' \
                  ' of {}*{}={} threads each: {} total threads...'.format(
                      gridRows, gridCols, gridRows*gridCols,
                      blockRows, blockCols, blockRows*blockCols,
                      gridRows*gridCols*blockRows*blockCols))
    partial_Fstat_cuda.prepared_call ( (gridRows,gridCols),
                                       (blockRows,blockCols,1),
                                       input_gpu.gpudata,
                                       tCWparams['numAtoms'],
                                       tCWparams['TAtom'],
                                       tCWparams['t0_data'],
                                       windowRange.t0, windowRange.dt0,
                                       windowRange.tau, windowRange.dtau,
                                       tCWparams['N_t0Range'],
                                       tCWparams['N_tauRange'],
                                       Fmn_gpu.gpudata )
470
471
472
473

    # return results to host
    F_mn = Fmn_gpu.get()

David Keitel's avatar
David Keitel committed
474
    _print_GPU_memory_MB('Final')
475
476

    return F_mn