Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
David Keitel
PyFstat
Commits
7d8e0ae6
Commit
7d8e0ae6
authored
Feb 09, 2018
by
Gregory Ashton
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into develop-GA
parents
25aab707
d55178b6
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
818 additions
and
18 deletions
+818
-18
examples/transient_examples/short_transient_search_gridded.py
...ples/transient_examples/short_transient_search_gridded.py
+2
-1
pyfstat/core.py
pyfstat/core.py
+36
-6
pyfstat/grid_based_searches.py
pyfstat/grid_based_searches.py
+46
-8
pyfstat/mcmc_based_searches.py
pyfstat/mcmc_based_searches.py
+8
-3
pyfstat/pyCUDAkernels/cudaTransientFstatExpWindow.cu
pyfstat/pyCUDAkernels/cudaTransientFstatExpWindow.cu
+127
-0
pyfstat/pyCUDAkernels/cudaTransientFstatRectWindow.cu
pyfstat/pyCUDAkernels/cudaTransientFstatRectWindow.cu
+119
-0
pyfstat/tcw_fstat_map_funcs.py
pyfstat/tcw_fstat_map_funcs.py
+477
-0
setup.py
setup.py
+3
-0
No files found.
examples/transient_examples/short_transient_search_gridded.py
View file @
7d8e0ae6
...
...
@@ -50,7 +50,8 @@ search2 = pyfstat.TransientGridSearch(
minStartTime
=
minStartTime
,
maxStartTime
=
maxStartTime
,
transientWindowType
=
'rect'
,
t0Band
=
Tspan
-
2
*
Tsft
,
tauBand
=
Tspan
,
BSGL
=
False
,
outputTransientFstatMap
=
True
)
outputTransientFstatMap
=
True
,
tCWFstatMapVersion
=
'lal'
)
search2
.
run
()
search2
.
print_max_twoF
()
...
...
pyfstat/core.py
View file @
7d8e0ae6
...
...
@@ -13,6 +13,7 @@ import scipy.optimize
import
lal
import
lalpulsar
import
pyfstat.helper_functions
as
helper_functions
import
pyfstat.tcw_fstat_map_funcs
as
tcw
# workaround for matplotlib on X-less remote logins
if
'DISPLAY'
in
os
.
environ
:
...
...
@@ -334,7 +335,8 @@ class ComputeFstat(BaseSearchClass):
dt0
=
None
,
dtau
=
None
,
detectors
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
injectSources
=
None
,
injectSqrtSX
=
None
,
assumeSqrtSX
=
None
,
SSBprec
=
None
):
SSBprec
=
None
,
tCWFstatMapVersion
=
'lal'
,
cudaDeviceName
=
None
):
"""
Parameters
----------
...
...
@@ -382,6 +384,11 @@ class ComputeFstat(BaseSearchClass):
SSBprec : int
Flag to set the SSB calculation: 0=Newtonian, 1=relativistic,
2=relativisitic optimised, 3=DMoff, 4=NO_SPIN
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
cudaDeviceName: str
GPU name to be matched against drv.Device output.
"""
...
...
@@ -624,13 +631,14 @@ class ComputeFstat(BaseSearchClass):
self
.
windowRange
.
dt0
=
self
.
Tsft
self
.
windowRange
.
dtau
=
self
.
Tsft
# special treatment of window_type = none ==> replace by rectangular window spanning all the data
# special treatment of window_type = none
# ==> replace by rectangular window spanning all the data
if
self
.
windowRange
.
type
==
lalpulsar
.
TRANSIENT_NONE
:
self
.
windowRange
.
t0
=
int
(
self
.
minStartTime
)
self
.
windowRange
.
t0Band
=
0
self
.
windowRange
.
tau
=
int
(
self
.
maxStartTime
-
self
.
minStartTime
)
self
.
windowRange
.
tauBand
=
0
else
:
# user-set bands and spacings
else
:
# user-set bands and spacings
if
self
.
t0Band
is
None
:
self
.
windowRange
.
t0Band
=
0
else
:
...
...
@@ -652,6 +660,11 @@ class ComputeFstat(BaseSearchClass):
if
self
.
dtau
:
self
.
windowRange
.
dtau
=
self
.
dtau
logging
.
info
(
'Initialising transient FstatMap features...'
)
self
.
tCWFstatMapFeatures
,
self
.
gpu_context
=
(
tcw
.
init_transient_fstat_map_features
(
self
.
tCWFstatMapVersion
==
'pycuda'
,
self
.
cudaDeviceName
))
def
get_fullycoherent_twoF
(
self
,
tstart
,
tend
,
F0
,
F1
,
F2
,
Alpha
,
Delta
,
asini
=
None
,
period
=
None
,
ecc
=
None
,
tp
=
None
,
argp
=
None
):
...
...
@@ -694,9 +707,13 @@ class ComputeFstat(BaseSearchClass):
# F-stat computation
self
.
windowRange
.
tau
=
int
(
2
*
self
.
Tsft
)
self
.
FstatMap
=
lalpulsar
.
ComputeTransientFstatMap
(
self
.
FstatResults
.
multiFatoms
[
0
],
self
.
windowRange
,
False
)
F_mn
=
self
.
FstatMap
.
F_mn
.
data
self
.
FstatMap
=
tcw
.
call_compute_transient_fstat_map
(
self
.
tCWFstatMapVersion
,
self
.
tCWFstatMapFeatures
,
self
.
FstatResults
.
multiFatoms
[
0
],
self
.
windowRange
)
if
self
.
tCWFstatMapVersion
==
'lal'
:
F_mn
=
self
.
FstatMap
.
F_mn
.
data
else
:
F_mn
=
self
.
FstatMap
.
F_mn
twoF
=
2
*
np
.
max
(
F_mn
)
if
self
.
BSGL
is
False
:
...
...
@@ -931,6 +948,15 @@ class ComputeFstat(BaseSearchClass):
raise
RuntimeError
(
'Cannot print atoms vector to file: no FstatResults.multiFatoms, or it is None!'
)
def
__del__
(
self
):
"""
In pyCuda case without autoinit,
we need to make sure the context is removed at the end
"""
if
hasattr
(
self
,
'gpu_context'
)
and
self
.
gpu_context
:
self
.
gpu_context
.
detach
()
class
SemiCoherentSearch
(
ComputeFstat
):
""" A semi-coherent search """
...
...
@@ -961,6 +987,8 @@ class SemiCoherentSearch(ComputeFstat):
self
.
transientWindowType
=
'rect'
self
.
t0Band
=
None
self
.
tauBand
=
None
self
.
tCWFstatMapVersion
=
'lal'
self
.
cudaDeviceName
=
None
self
.
init_computefstatistic_single_point
()
self
.
init_semicoherent_parameters
()
...
...
@@ -1100,6 +1128,8 @@ class SemiCoherentGlitchSearch(ComputeFstat):
self
.
transientWindowType
=
'rect'
self
.
t0Band
=
None
self
.
tauBand
=
None
self
.
tCWFstatMapVersion
=
'lal'
self
.
cudaDeviceName
=
None
self
.
binary
=
False
self
.
init_computefstatistic_single_point
()
...
...
pyfstat/grid_based_searches.py
View file @
7d8e0ae6
...
...
@@ -370,7 +370,8 @@ class TransientGridSearch(GridSearch):
transientWindowType
=
None
,
t0Band
=
None
,
tauBand
=
None
,
dt0
=
None
,
dtau
=
None
,
outputTransientFstatMap
=
False
,
outputAtoms
=
False
):
outputAtoms
=
False
,
tCWFstatMapVersion
=
'lal'
,
cudaDeviceName
=
None
):
"""
Parameters
----------
...
...
@@ -403,6 +404,11 @@ class TransientGridSearch(GridSearch):
outputTransientFstatMap: bool
if true, write output files for (t0,tau) Fstat maps
(one file for each doppler grid point!)
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
cudaDeviceName: str
GPU name to be matched against drv.Device output.
For all other parameters, see `pyfstat.ComputeFStat` for details
"""
...
...
@@ -428,7 +434,9 @@ class TransientGridSearch(GridSearch):
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
BSGL
=
self
.
BSGL
,
SSBprec
=
self
.
SSBprec
,
injectSources
=
self
.
injectSources
,
assumeSqrtSX
=
self
.
assumeSqrtSX
)
assumeSqrtSX
=
self
.
assumeSqrtSX
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
,
cudaDeviceName
=
self
.
cudaDeviceName
)
self
.
search
.
get_det_stat
=
self
.
search
.
get_fullycoherent_twoF
def
run
(
self
,
return_data
=
False
):
...
...
@@ -442,19 +450,36 @@ class TransientGridSearch(GridSearch):
self
.
inititate_search_object
()
data
=
[]
if
self
.
outputTransientFstatMap
:
tCWfilebase
=
os
.
path
.
splitext
(
self
.
out_file
)[
0
]
+
'_tCW_'
logging
.
info
(
'Will save per-Doppler Fstatmap'
\
' results to {}*.dat'
.
format
(
tCWfilebase
))
for
vals
in
tqdm
(
self
.
input_data
):
detstat
=
self
.
search
.
get_det_stat
(
*
vals
)
windowRange
=
getattr
(
self
.
search
,
'windowRange'
,
None
)
FstatMap
=
getattr
(
self
.
search
,
'FstatMap'
,
None
)
thisCand
=
list
(
vals
)
+
[
detstat
]
if
getattr
(
self
,
'transientWindowType'
,
None
):
if
self
.
tCWFstatMapVersion
==
'lal'
:
F_mn
=
FstatMap
.
F_mn
.
data
else
:
F_mn
=
FstatMap
.
F_mn
if
self
.
outputTransientFstatMap
:
tCWfile
=
os
.
path
.
splitext
(
self
.
out_file
)[
0
]
+
'_tCW_%.16f_%.16f_%.16f_%.16g_%.16g.dat'
%
(
vals
[
2
],
vals
[
5
],
vals
[
6
],
vals
[
3
],
vals
[
4
])
# freq alpha delta f1dot f2dot
fo
=
lal
.
FileOpen
(
tCWfile
,
'w'
)
lalpulsar
.
write_transientFstatMap_to_fp
(
fo
,
FstatMap
,
windowRange
,
None
)
del
fo
# instead of lal.FileClose() which is not SWIG-exported
Fmn
=
FstatMap
.
F_mn
.
data
maxidx
=
np
.
unravel_index
(
Fmn
.
argmax
(),
Fmn
.
shape
)
# per-Doppler filename convention:
# freq alpha delta f1dot f2dot
tCWfile
=
(
tCWfilebase
+
'%.16f_%.16f_%.16f_%.16g_%.16g.dat'
%
(
vals
[
2
],
vals
[
5
],
vals
[
6
],
vals
[
3
],
vals
[
4
])
)
if
self
.
tCWFstatMapVersion
==
'lal'
:
fo
=
lal
.
FileOpen
(
tCWfile
,
'w'
)
lalpulsar
.
write_transientFstatMap_to_fp
(
fo
,
FstatMap
,
windowRange
,
None
)
# instead of lal.FileClose(),
# which is not SWIG-exported:
del
fo
else
:
self
.
write_F_mn
(
tCWfile
,
F_mn
,
windowRange
)
maxidx
=
np
.
unravel_index
(
F_mn
.
argmax
(),
F_mn
.
shape
)
thisCand
+=
[
windowRange
.
t0
+
maxidx
[
0
]
*
windowRange
.
dt0
,
windowRange
.
tau
+
maxidx
[
1
]
*
windowRange
.
dtau
]
data
.
append
(
thisCand
)
...
...
@@ -468,6 +493,19 @@ class TransientGridSearch(GridSearch):
self
.
save_array_to_disk
(
data
)
self
.
data
=
data
def
write_F_mn
(
self
,
tCWfile
,
F_mn
,
windowRange
):
with
open
(
tCWfile
,
'w'
)
as
tfp
:
tfp
.
write
(
'# t0 [s] tau [s] 2F
\n
'
)
for
m
,
F_m
in
enumerate
(
F_mn
):
this_t0
=
windowRange
.
t0
+
m
*
windowRange
.
dt0
for
n
,
this_F
in
enumerate
(
F_m
):
this_tau
=
windowRange
.
tau
+
n
*
windowRange
.
dtau
;
tfp
.
write
(
' %10d %10d %- 11.8g
\n
'
%
(
this_t0
,
this_tau
,
2.0
*
this_F
))
def
__del__
(
self
):
if
hasattr
(
self
,
'search'
):
self
.
search
.
__del__
()
class
SliceGridSearch
(
GridSearch
):
""" Slice gridded search using ComputeFstat """
...
...
pyfstat/mcmc_based_searches.py
View file @
7d8e0ae6
...
...
@@ -82,6 +82,9 @@ class MCMCSearch(core.BaseSearchClass):
('none' instead of None explicitly calls the transient-window function,
but with the full range, for debugging)
Currently only supported for nsegs=1.
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
Attributes
----------
...
...
@@ -115,7 +118,7 @@ class MCMCSearch(core.BaseSearchClass):
rhohatmax
=
1000
,
binary
=
False
,
BSGL
=
False
,
SSBprec
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
injectSources
=
None
,
assumeSqrtSX
=
None
,
transientWindowType
=
None
):
transientWindowType
=
None
,
tCWFstatMapVersion
=
'lal'
):
if
os
.
path
.
isdir
(
outdir
)
is
False
:
os
.
mkdir
(
outdir
)
...
...
@@ -161,7 +164,8 @@ class MCMCSearch(core.BaseSearchClass):
transientWindowType
=
self
.
transientWindowType
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
binary
=
self
.
binary
,
injectSources
=
self
.
injectSources
,
assumeSqrtSX
=
self
.
assumeSqrtSX
,
SSBprec
=
self
.
SSBprec
)
assumeSqrtSX
=
self
.
assumeSqrtSX
,
SSBprec
=
self
.
SSBprec
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
)
if
self
.
minStartTime
is
None
:
self
.
minStartTime
=
self
.
search
.
minStartTime
if
self
.
maxStartTime
is
None
:
...
...
@@ -2212,7 +2216,8 @@ class MCMCTransientSearch(MCMCSearch):
transientWindowType
=
self
.
transientWindowType
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
BSGL
=
self
.
BSGL
,
binary
=
self
.
binary
,
injectSources
=
self
.
injectSources
)
injectSources
=
self
.
injectSources
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
)
if
self
.
minStartTime
is
None
:
self
.
minStartTime
=
self
.
search
.
minStartTime
if
self
.
maxStartTime
is
None
:
...
...
pyfstat/pyCUDAkernels/cudaTransientFstatExpWindow.cu
0 → 100644
View file @
7d8e0ae6
__global__
void
cudaTransientFstatExpWindow
(
float
*
input
,
unsigned
int
numAtoms
,
unsigned
int
TAtom
,
unsigned
int
t0_data
,
unsigned
int
win_t0
,
unsigned
int
win_dt0
,
unsigned
int
win_tau
,
unsigned
int
win_dtau
,
unsigned
int
Fmn_rows
,
unsigned
int
Fmn_cols
,
float
*
Fmn
)
{
/* match CUDA thread indexing and high-level (t0,tau) indexing */
unsigned
int
m
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
// t0: row
unsigned
int
n
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
// tau: column
/* unraveled 1D index for 2D output array */
unsigned
int
outidx
=
Fmn_cols
*
m
+
n
;
/* hardcoded copy from lalpulsar */
unsigned
int
TRANSIENT_EXP_EFOLDING
=
3
;
if
(
(
m
<
Fmn_rows
)
&&
(
n
<
Fmn_cols
)
)
{
/* compute Fstat-atom index i_t0 in [0, numAtoms) */
unsigned
int
TAtomHalf
=
TAtom
/
2
;
// integer division
unsigned
int
t0
=
win_t0
+
m
*
win_dt0
;
/* integer round: floor(x+0.5) */
int
i_tmp
=
(
t0
-
t0_data
+
TAtomHalf
)
/
TAtom
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t0
=
(
unsigned
int
)
i_tmp
;
if
(
i_t0
>=
numAtoms
)
{
i_t0
=
numAtoms
-
1
;
}
/* translate n into an atoms end-index
* for this search interval [t0, t0+Tcoh],
* giving the index range of atoms to sum over
*/
unsigned
int
tau
=
win_tau
+
n
*
win_dtau
;
/* get end-time t1 of this transient-window search
* for given tau, what Tcoh should the exponential window cover?
* for speed reasons we want to truncate
* Tcoh = tau * TRANSIENT_EXP_EFOLDING
* with the e-folding factor chosen such that the window-value
* is practically negligible after that, where it will be set to 0
*/
// unsigned int t1 = lround( win_t0 + TRANSIENT_EXP_EFOLDING * win_tau);
unsigned
int
t1
=
t0
+
TRANSIENT_EXP_EFOLDING
*
tau
;
/* compute window end-time Fstat-atom index i_t1 in [0, numAtoms)
* using integer round: floor(x+0.5)
*/
i_tmp
=
(
t1
-
t0_data
+
TAtomHalf
)
/
TAtom
-
1
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t1
=
(
unsigned
int
)
i_tmp
;
if
(
i_t1
>=
numAtoms
)
{
i_t1
=
numAtoms
-
1
;
}
/* now we have two valid atoms-indices [i_t0, i_t1]
* spanning our Fstat-window to sum over
*/
float
Ad
=
0.0
f
;
float
Bd
=
0.0
f
;
float
Cd
=
0.0
f
;
float
Fa_re
=
0.0
f
;
float
Fa_im
=
0.0
f
;
float
Fb_re
=
0.0
f
;
float
Fb_im
=
0.0
f
;
unsigned
short
input_cols
=
7
;
// must match input matrix!
/* sum up atoms */
for
(
unsigned
int
i
=
i_t0
;
i
<=
i_t1
;
i
++
)
{
unsigned
int
t_i
=
t0_data
+
i
*
TAtom
;
float
win_i
=
0.0
;
if
(
t_i
>=
t0
&&
t_i
<=
t1
)
{
float
x
=
1.0
*
(
t_i
-
t0
)
/
tau
;
win_i
=
exp
(
-
x
);
}
float
win2_i
=
win_i
*
win_i
;
Ad
+=
input
[
i
*
input_cols
+
0
]
*
win2_i
;
// a2_alpha
Bd
+=
input
[
i
*
input_cols
+
1
]
*
win2_i
;
// b2_alpha
Cd
+=
input
[
i
*
input_cols
+
2
]
*
win2_i
;
// ab_alpha
Fa_re
+=
input
[
i
*
input_cols
+
3
]
*
win_i
;
// Fa_alpha_re
Fa_im
+=
input
[
i
*
input_cols
+
4
]
*
win_i
;
// Fa_alpha_im
Fb_re
+=
input
[
i
*
input_cols
+
5
]
*
win_i
;
// Fb_alpha_re
Fb_im
+=
input
[
i
*
input_cols
+
6
]
*
win_i
;
// Fb_alpha_im
}
/* get determinant */
float
Dd
=
(
Ad
*
Bd
-
Cd
*
Cd
);
float
DdInv
=
0.0
f
;
/* safety catch as in XLALWeightMultiAMCoeffs():
* make it so that in the end F=0 instead of -nan
*/
if
(
Dd
>
0.0
)
{
DdInv
=
1.0
/
Dd
;
}
/* from XLALComputeFstatFromFaFb */
float
F
=
DdInv
*
(
Bd
*
(
Fa_re
*
Fa_re
+
Fa_im
*
Fa_im
)
+
Ad
*
(
Fb_re
*
Fb_re
+
Fb_im
*
Fb_im
)
-
2.0
*
Cd
*
(
Fa_re
*
Fb_re
+
Fa_im
*
Fb_im
)
);
/* store result in Fstat-matrix
* at unraveled index of element {m,n}
*/
Fmn
[
outidx
]
=
F
;
}
// ( (m < Fmn_rows) && (n < Fmn_cols) )
}
// cudaTransientFstatExpWindow()
pyfstat/pyCUDAkernels/cudaTransientFstatRectWindow.cu
0 → 100644
View file @
7d8e0ae6
__global__
void
cudaTransientFstatRectWindow
(
float
*
input
,
unsigned
int
numAtoms
,
unsigned
int
TAtom
,
unsigned
int
t0_data
,
unsigned
int
win_t0
,
unsigned
int
win_dt0
,
unsigned
int
win_tau
,
unsigned
int
win_dtau
,
unsigned
int
N_tauRange
,
float
*
Fmn
)
{
/* match CUDA thread indexing and high-level (t0,tau) indexing */
// assume 1D block, grid setup
unsigned
int
m
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
// t0: row
unsigned
short
input_cols
=
7
;
// must match input matrix!
/* compute Fstat-atom index i_t0 in [0, numAtoms) */
unsigned
int
TAtomHalf
=
TAtom
/
2
;
// integer division
unsigned
int
t0
=
win_t0
+
m
*
win_dt0
;
/* integer round: floor(x+0.5) */
int
i_tmp
=
(
t0
-
t0_data
+
TAtomHalf
)
/
TAtom
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t0
=
(
unsigned
int
)
i_tmp
;
if
(
i_t0
>=
numAtoms
)
{
i_t0
=
numAtoms
-
1
;
}
float
Ad
=
0.0
f
;
float
Bd
=
0.0
f
;
float
Cd
=
0.0
f
;
float
Fa_re
=
0.0
f
;
float
Fa_im
=
0.0
f
;
float
Fb_re
=
0.0
f
;
float
Fb_im
=
0.0
f
;
unsigned
int
i_t1_last
=
i_t0
;
/* INNER loop over timescale-parameter tau
* NOT parallelized so that we can still use the i_t1_last trick
* (empirically seems to be faster than 2D CUDA version)
*/
for
(
unsigned
int
n
=
0
;
n
<
N_tauRange
;
n
++
)
{
if
(
(
m
<
N_tauRange
)
&&
(
n
<
N_tauRange
)
)
{
/* translate n into an atoms end-index
* for this search interval [t0, t0+Tcoh],
* giving the index range of atoms to sum over
*/
unsigned
int
tau
=
win_tau
+
n
*
win_dtau
;
/* get end-time t1 of this transient-window search */
unsigned
int
t1
=
t0
+
tau
;
/* compute window end-time Fstat-atom index i_t1 in [0, numAtoms)
* using integer round: floor(x+0.5)
*/
i_tmp
=
(
t1
-
t0_data
+
TAtomHalf
)
/
TAtom
-
1
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t1
=
(
unsigned
int
)
i_tmp
;
if
(
i_t1
>=
numAtoms
)
{
i_t1
=
numAtoms
-
1
;
}
/* now we have two valid atoms-indices [i_t0, i_t1]
* spanning our Fstat-window to sum over
*/
for
(
unsigned
int
i
=
i_t1_last
;
i
<=
i_t1
;
i
++
)
{
/* sum up atoms,
* special optimiziation in the rectangular-window case:
* just add on to previous tau values,
* ie re-use the sum over [i_t0, i_t1_last]
from the pevious tau-loop iteration
*/
Ad
+=
input
[
i
*
input_cols
+
0
];
// a2_alpha
Bd
+=
input
[
i
*
input_cols
+
1
];
// b2_alpha
Cd
+=
input
[
i
*
input_cols
+
2
];
// ab_alpha
Fa_re
+=
input
[
i
*
input_cols
+
3
];
// Fa_alpha_re
Fa_im
+=
input
[
i
*
input_cols
+
4
];
// Fa_alpha_im
Fb_re
+=
input
[
i
*
input_cols
+
5
];
// Fb_alpha_re
Fb_im
+=
input
[
i
*
input_cols
+
6
];
// Fb_alpha_im
/* keep track of up to where we summed for the next iteration */
i_t1_last
=
i_t1
+
1
;
}
/* get determinant */
float
Dd
=
(
Ad
*
Bd
-
Cd
*
Cd
);
float
DdInv
=
0.0
f
;
/* safety catch as in XLALWeightMultiAMCoeffs():
* make it so that in the end F=0 instead of -nan
*/
if
(
Dd
>
0.0
)
{
DdInv
=
1.0
/
Dd
;
}
/* from XLALComputeFstatFromFaFb */
float
F
=
DdInv
*
(
Bd
*
(
Fa_re
*
Fa_re
+
Fa_im
*
Fa_im
)
+
Ad
*
(
Fb_re
*
Fb_re
+
Fb_im
*
Fb_im
)
-
2.0
*
Cd
*
(
Fa_re
*
Fb_re
+
Fa_im
*
Fb_im
)
);
/* store result in Fstat-matrix
* at unraveled index of element {m,n}
*/
unsigned
int
outidx
=
m
*
N_tauRange
+
n
;
Fmn
[
outidx
]
=
F
;
}
// if ( (m < N_tauRange) && (n < N_tauRange) )
}
// for ( unsigned int n = 0; n < N_tauRange; n ++ )
}
// cudaTransientFstatRectWindow()
pyfstat/tcw_fstat_map_funcs.py
0 → 100644
View file @
7d8e0ae6
""" Additional helper functions dealing with transient-CW F(t0,tau) maps """
import
numpy
as
np
import
os
import
sys
import
logging
# optional imports
import
importlib
as
imp
def
_optional_import
(
modulename
,
shorthand
=
None
):
'''
Import a module/submodule only if it's available.
using importlib instead of __import__
because the latter doesn't handle sub.modules
Also including a special check to fail more gracefully
when CUDA_DEVICE is set to too high a number.
'''
if
shorthand
is
None
:
shorthand
=
modulename
shorthandbit
=
''
else
:
shorthandbit
=
' as '
+
shorthand
try
:
globals
()[
shorthand
]
=
imp
.
import_module
(
modulename
)
logging
.
debug
(
'Successfully imported module %s%s.'
%
(
modulename
,
shorthandbit
))
success
=
True
except
ImportError
,
e
:
if
e
.
message
==
'No module named '
+
modulename
:
logging
.
debug
(
'No module {:s} found.'
.
format
(
modulename
))
success
=
False
else
:
raise
return
success
class
pyTransientFstatMap
(
object
):
'''
simplified object class for a F(t0,tau) F-stat map (not 2F!)
based on LALSuite's transientFstatMap_t type
replacing the gsl matrix with a numpy array
F_mn: 2D array of 2F values
maxF: maximum of F (not 2F!)
t0_ML: maximum likelihood transient start time t0 estimate
tau_ML: maximum likelihood transient duration tau estimate
'''
def
__init__
(
self
,
N_t0Range
,
N_tauRange
):
self
.
F_mn
=
np
.
zeros
((
N_t0Range
,
N_tauRange
),
dtype
=
np
.
float32
)
# Initializing maxF to a negative value ensures
# that we always update at least once and hence return
# sane t0_d_ML, tau_d_ML
# even if there is only a single bin where F=0 happens.
self
.
maxF
=
float
(
-
1.0
)
self
.
t0_ML
=
float
(
0.0
)
self
.
tau_ML
=
float
(
0.0
)
# dictionary of the actual callable F-stat map functions we support,
# if the corresponding modules are available.
fstatmap_versions
=
{
'lal'
:
lambda
multiFstatAtoms
,
windowRange
:
getattr
(
lalpulsar
,
'ComputeTransientFstatMap'
)
(
multiFstatAtoms
,
windowRange
,
False
),
'pycuda'
:
lambda
multiFstatAtoms
,
windowRange
:
pycuda_compute_transient_fstat_map
(
multiFstatAtoms
,
windowRange
)
}
def
init_transient_fstat_map_features
(
wantCuda
=
False
,
cudaDeviceName
=
None
):
'''
Initialization of available modules (or "features") for F-stat maps.
Returns a dictionary of method names, to match fstatmap_versions
each key's value set to True only if
all required modules are importable on this system.
'''
features
=
{}
have_lal
=
_optional_import
(
'lal'
)
have_lalpulsar
=
_optional_import
(
'lalpulsar'
)
features
[
'lal'
]
=
have_lal
and
have_lalpulsar
# import GPU features
have_pycuda
=
_optional_import
(
'pycuda'
)
have_pycuda_drv
=
_optional_import
(
'pycuda.driver'
,
'drv'
)
have_pycuda_gpuarray
=
_optional_import
(
'pycuda.gpuarray'
,
'gpuarray'
)
have_pycuda_tools
=
_optional_import
(
'pycuda.tools'
,
'cudatools'
)
have_pycuda_compiler
=
_optional_import
(
'pycuda.compiler'
,
'cudacomp'
)
features
[
'pycuda'
]
=
(
have_pycuda_drv
and
have_pycuda_gpuarray
and
have_pycuda_tools
and
have_pycuda_compiler
)
logging
.
debug
(
'Got the following features for transient F-stat maps:'
)
logging
.
debug
(
features
)
if
wantCuda
and
features
[
'pycuda'
]: