Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gregory Ashton
PyFstat
Commits
7d8e0ae6
Commit
7d8e0ae6
authored
Feb 09, 2018
by
Gregory Ashton
Browse files
Merge branch 'master' into develop-GA
parents
25aab707
d55178b6
Changes
8
Show whitespace changes
Inline
Side-by-side
examples/transient_examples/short_transient_search_gridded.py
View file @
7d8e0ae6
...
@@ -50,7 +50,8 @@ search2 = pyfstat.TransientGridSearch(
...
@@ -50,7 +50,8 @@ search2 = pyfstat.TransientGridSearch(
minStartTime
=
minStartTime
,
maxStartTime
=
maxStartTime
,
minStartTime
=
minStartTime
,
maxStartTime
=
maxStartTime
,
transientWindowType
=
'rect'
,
t0Band
=
Tspan
-
2
*
Tsft
,
tauBand
=
Tspan
,
transientWindowType
=
'rect'
,
t0Band
=
Tspan
-
2
*
Tsft
,
tauBand
=
Tspan
,
BSGL
=
False
,
BSGL
=
False
,
outputTransientFstatMap
=
True
)
outputTransientFstatMap
=
True
,
tCWFstatMapVersion
=
'lal'
)
search2
.
run
()
search2
.
run
()
search2
.
print_max_twoF
()
search2
.
print_max_twoF
()
...
...
pyfstat/core.py
View file @
7d8e0ae6
...
@@ -13,6 +13,7 @@ import scipy.optimize
...
@@ -13,6 +13,7 @@ import scipy.optimize
import
lal
import
lal
import
lalpulsar
import
lalpulsar
import
pyfstat.helper_functions
as
helper_functions
import
pyfstat.helper_functions
as
helper_functions
import
pyfstat.tcw_fstat_map_funcs
as
tcw
# workaround for matplotlib on X-less remote logins
# workaround for matplotlib on X-less remote logins
if
'DISPLAY'
in
os
.
environ
:
if
'DISPLAY'
in
os
.
environ
:
...
@@ -334,7 +335,8 @@ class ComputeFstat(BaseSearchClass):
...
@@ -334,7 +335,8 @@ class ComputeFstat(BaseSearchClass):
dt0
=
None
,
dtau
=
None
,
dt0
=
None
,
dtau
=
None
,
detectors
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
detectors
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
injectSources
=
None
,
injectSqrtSX
=
None
,
assumeSqrtSX
=
None
,
injectSources
=
None
,
injectSqrtSX
=
None
,
assumeSqrtSX
=
None
,
SSBprec
=
None
):
SSBprec
=
None
,
tCWFstatMapVersion
=
'lal'
,
cudaDeviceName
=
None
):
"""
"""
Parameters
Parameters
----------
----------
...
@@ -382,6 +384,11 @@ class ComputeFstat(BaseSearchClass):
...
@@ -382,6 +384,11 @@ class ComputeFstat(BaseSearchClass):
SSBprec : int
SSBprec : int
Flag to set the SSB calculation: 0=Newtonian, 1=relativistic,
Flag to set the SSB calculation: 0=Newtonian, 1=relativistic,
2=relativisitic optimised, 3=DMoff, 4=NO_SPIN
2=relativisitic optimised, 3=DMoff, 4=NO_SPIN
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
cudaDeviceName: str
GPU name to be matched against drv.Device output.
"""
"""
...
@@ -624,7 +631,8 @@ class ComputeFstat(BaseSearchClass):
...
@@ -624,7 +631,8 @@ class ComputeFstat(BaseSearchClass):
self
.
windowRange
.
dt0
=
self
.
Tsft
self
.
windowRange
.
dt0
=
self
.
Tsft
self
.
windowRange
.
dtau
=
self
.
Tsft
self
.
windowRange
.
dtau
=
self
.
Tsft
# special treatment of window_type = none ==> replace by rectangular window spanning all the data
# special treatment of window_type = none
# ==> replace by rectangular window spanning all the data
if
self
.
windowRange
.
type
==
lalpulsar
.
TRANSIENT_NONE
:
if
self
.
windowRange
.
type
==
lalpulsar
.
TRANSIENT_NONE
:
self
.
windowRange
.
t0
=
int
(
self
.
minStartTime
)
self
.
windowRange
.
t0
=
int
(
self
.
minStartTime
)
self
.
windowRange
.
t0Band
=
0
self
.
windowRange
.
t0Band
=
0
...
@@ -652,6 +660,11 @@ class ComputeFstat(BaseSearchClass):
...
@@ -652,6 +660,11 @@ class ComputeFstat(BaseSearchClass):
if
self
.
dtau
:
if
self
.
dtau
:
self
.
windowRange
.
dtau
=
self
.
dtau
self
.
windowRange
.
dtau
=
self
.
dtau
logging
.
info
(
'Initialising transient FstatMap features...'
)
self
.
tCWFstatMapFeatures
,
self
.
gpu_context
=
(
tcw
.
init_transient_fstat_map_features
(
self
.
tCWFstatMapVersion
==
'pycuda'
,
self
.
cudaDeviceName
))
def
get_fullycoherent_twoF
(
self
,
tstart
,
tend
,
F0
,
F1
,
F2
,
Alpha
,
Delta
,
def
get_fullycoherent_twoF
(
self
,
tstart
,
tend
,
F0
,
F1
,
F2
,
Alpha
,
Delta
,
asini
=
None
,
period
=
None
,
ecc
=
None
,
tp
=
None
,
asini
=
None
,
period
=
None
,
ecc
=
None
,
tp
=
None
,
argp
=
None
):
argp
=
None
):
...
@@ -694,9 +707,13 @@ class ComputeFstat(BaseSearchClass):
...
@@ -694,9 +707,13 @@ class ComputeFstat(BaseSearchClass):
# F-stat computation
# F-stat computation
self
.
windowRange
.
tau
=
int
(
2
*
self
.
Tsft
)
self
.
windowRange
.
tau
=
int
(
2
*
self
.
Tsft
)
self
.
FstatMap
=
lalpulsar
.
ComputeTransientFstatMap
(
self
.
FstatMap
=
tcw
.
call_compute_transient_fstat_map
(
self
.
FstatResults
.
multiFatoms
[
0
],
self
.
windowRange
,
False
)
self
.
tCWFstatMapVersion
,
self
.
tCWFstatMapFeatures
,
self
.
FstatResults
.
multiFatoms
[
0
],
self
.
windowRange
)
if
self
.
tCWFstatMapVersion
==
'lal'
:
F_mn
=
self
.
FstatMap
.
F_mn
.
data
F_mn
=
self
.
FstatMap
.
F_mn
.
data
else
:
F_mn
=
self
.
FstatMap
.
F_mn
twoF
=
2
*
np
.
max
(
F_mn
)
twoF
=
2
*
np
.
max
(
F_mn
)
if
self
.
BSGL
is
False
:
if
self
.
BSGL
is
False
:
...
@@ -931,6 +948,15 @@ class ComputeFstat(BaseSearchClass):
...
@@ -931,6 +948,15 @@ class ComputeFstat(BaseSearchClass):
raise
RuntimeError
(
'Cannot print atoms vector to file: no FstatResults.multiFatoms, or it is None!'
)
raise
RuntimeError
(
'Cannot print atoms vector to file: no FstatResults.multiFatoms, or it is None!'
)
def
__del__
(
self
):
"""
In pyCuda case without autoinit,
we need to make sure the context is removed at the end
"""
if
hasattr
(
self
,
'gpu_context'
)
and
self
.
gpu_context
:
self
.
gpu_context
.
detach
()
class
SemiCoherentSearch
(
ComputeFstat
):
class
SemiCoherentSearch
(
ComputeFstat
):
""" A semi-coherent search """
""" A semi-coherent search """
...
@@ -961,6 +987,8 @@ class SemiCoherentSearch(ComputeFstat):
...
@@ -961,6 +987,8 @@ class SemiCoherentSearch(ComputeFstat):
self
.
transientWindowType
=
'rect'
self
.
transientWindowType
=
'rect'
self
.
t0Band
=
None
self
.
t0Band
=
None
self
.
tauBand
=
None
self
.
tauBand
=
None
self
.
tCWFstatMapVersion
=
'lal'
self
.
cudaDeviceName
=
None
self
.
init_computefstatistic_single_point
()
self
.
init_computefstatistic_single_point
()
self
.
init_semicoherent_parameters
()
self
.
init_semicoherent_parameters
()
...
@@ -1100,6 +1128,8 @@ class SemiCoherentGlitchSearch(ComputeFstat):
...
@@ -1100,6 +1128,8 @@ class SemiCoherentGlitchSearch(ComputeFstat):
self
.
transientWindowType
=
'rect'
self
.
transientWindowType
=
'rect'
self
.
t0Band
=
None
self
.
t0Band
=
None
self
.
tauBand
=
None
self
.
tauBand
=
None
self
.
tCWFstatMapVersion
=
'lal'
self
.
cudaDeviceName
=
None
self
.
binary
=
False
self
.
binary
=
False
self
.
init_computefstatistic_single_point
()
self
.
init_computefstatistic_single_point
()
...
...
pyfstat/grid_based_searches.py
View file @
7d8e0ae6
...
@@ -370,7 +370,8 @@ class TransientGridSearch(GridSearch):
...
@@ -370,7 +370,8 @@ class TransientGridSearch(GridSearch):
transientWindowType
=
None
,
t0Band
=
None
,
tauBand
=
None
,
transientWindowType
=
None
,
t0Band
=
None
,
tauBand
=
None
,
dt0
=
None
,
dtau
=
None
,
dt0
=
None
,
dtau
=
None
,
outputTransientFstatMap
=
False
,
outputTransientFstatMap
=
False
,
outputAtoms
=
False
):
outputAtoms
=
False
,
tCWFstatMapVersion
=
'lal'
,
cudaDeviceName
=
None
):
"""
"""
Parameters
Parameters
----------
----------
...
@@ -403,6 +404,11 @@ class TransientGridSearch(GridSearch):
...
@@ -403,6 +404,11 @@ class TransientGridSearch(GridSearch):
outputTransientFstatMap: bool
outputTransientFstatMap: bool
if true, write output files for (t0,tau) Fstat maps
if true, write output files for (t0,tau) Fstat maps
(one file for each doppler grid point!)
(one file for each doppler grid point!)
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
cudaDeviceName: str
GPU name to be matched against drv.Device output.
For all other parameters, see `pyfstat.ComputeFStat` for details
For all other parameters, see `pyfstat.ComputeFStat` for details
"""
"""
...
@@ -428,7 +434,9 @@ class TransientGridSearch(GridSearch):
...
@@ -428,7 +434,9 @@ class TransientGridSearch(GridSearch):
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
BSGL
=
self
.
BSGL
,
SSBprec
=
self
.
SSBprec
,
BSGL
=
self
.
BSGL
,
SSBprec
=
self
.
SSBprec
,
injectSources
=
self
.
injectSources
,
injectSources
=
self
.
injectSources
,
assumeSqrtSX
=
self
.
assumeSqrtSX
)
assumeSqrtSX
=
self
.
assumeSqrtSX
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
,
cudaDeviceName
=
self
.
cudaDeviceName
)
self
.
search
.
get_det_stat
=
self
.
search
.
get_fullycoherent_twoF
self
.
search
.
get_det_stat
=
self
.
search
.
get_fullycoherent_twoF
def
run
(
self
,
return_data
=
False
):
def
run
(
self
,
return_data
=
False
):
...
@@ -442,19 +450,36 @@ class TransientGridSearch(GridSearch):
...
@@ -442,19 +450,36 @@ class TransientGridSearch(GridSearch):
self
.
inititate_search_object
()
self
.
inititate_search_object
()
data
=
[]
data
=
[]
if
self
.
outputTransientFstatMap
:
tCWfilebase
=
os
.
path
.
splitext
(
self
.
out_file
)[
0
]
+
'_tCW_'
logging
.
info
(
'Will save per-Doppler Fstatmap'
\
' results to {}*.dat'
.
format
(
tCWfilebase
))
for
vals
in
tqdm
(
self
.
input_data
):
for
vals
in
tqdm
(
self
.
input_data
):
detstat
=
self
.
search
.
get_det_stat
(
*
vals
)
detstat
=
self
.
search
.
get_det_stat
(
*
vals
)
windowRange
=
getattr
(
self
.
search
,
'windowRange'
,
None
)
windowRange
=
getattr
(
self
.
search
,
'windowRange'
,
None
)
FstatMap
=
getattr
(
self
.
search
,
'FstatMap'
,
None
)
FstatMap
=
getattr
(
self
.
search
,
'FstatMap'
,
None
)
thisCand
=
list
(
vals
)
+
[
detstat
]
thisCand
=
list
(
vals
)
+
[
detstat
]
if
getattr
(
self
,
'transientWindowType'
,
None
):
if
getattr
(
self
,
'transientWindowType'
,
None
):
if
self
.
tCWFstatMapVersion
==
'lal'
:
F_mn
=
FstatMap
.
F_mn
.
data
else
:
F_mn
=
FstatMap
.
F_mn
if
self
.
outputTransientFstatMap
:
if
self
.
outputTransientFstatMap
:
tCWfile
=
os
.
path
.
splitext
(
self
.
out_file
)[
0
]
+
'_tCW_%.16f_%.16f_%.16f_%.16g_%.16g.dat'
%
(
vals
[
2
],
vals
[
5
],
vals
[
6
],
vals
[
3
],
vals
[
4
])
# freq alpha delta f1dot f2dot
# per-Doppler filename convention:
# freq alpha delta f1dot f2dot
tCWfile
=
(
tCWfilebase
+
'%.16f_%.16f_%.16f_%.16g_%.16g.dat'
%
(
vals
[
2
],
vals
[
5
],
vals
[
6
],
vals
[
3
],
vals
[
4
])
)
if
self
.
tCWFstatMapVersion
==
'lal'
:
fo
=
lal
.
FileOpen
(
tCWfile
,
'w'
)
fo
=
lal
.
FileOpen
(
tCWfile
,
'w'
)
lalpulsar
.
write_transientFstatMap_to_fp
(
fo
,
FstatMap
,
windowRange
,
None
)
lalpulsar
.
write_transientFstatMap_to_fp
(
del
fo
# instead of lal.FileClose() which is not SWIG-exported
fo
,
FstatMap
,
windowRange
,
None
)
Fmn
=
FstatMap
.
F_mn
.
data
# instead of lal.FileClose(),
maxidx
=
np
.
unravel_index
(
Fmn
.
argmax
(),
Fmn
.
shape
)
# which is not SWIG-exported:
del
fo
else
:
self
.
write_F_mn
(
tCWfile
,
F_mn
,
windowRange
)
maxidx
=
np
.
unravel_index
(
F_mn
.
argmax
(),
F_mn
.
shape
)
thisCand
+=
[
windowRange
.
t0
+
maxidx
[
0
]
*
windowRange
.
dt0
,
thisCand
+=
[
windowRange
.
t0
+
maxidx
[
0
]
*
windowRange
.
dt0
,
windowRange
.
tau
+
maxidx
[
1
]
*
windowRange
.
dtau
]
windowRange
.
tau
+
maxidx
[
1
]
*
windowRange
.
dtau
]
data
.
append
(
thisCand
)
data
.
append
(
thisCand
)
...
@@ -468,6 +493,19 @@ class TransientGridSearch(GridSearch):
...
@@ -468,6 +493,19 @@ class TransientGridSearch(GridSearch):
self
.
save_array_to_disk
(
data
)
self
.
save_array_to_disk
(
data
)
self
.
data
=
data
self
.
data
=
data
def
write_F_mn
(
self
,
tCWfile
,
F_mn
,
windowRange
):
with
open
(
tCWfile
,
'w'
)
as
tfp
:
tfp
.
write
(
'# t0 [s] tau [s] 2F
\n
'
)
for
m
,
F_m
in
enumerate
(
F_mn
):
this_t0
=
windowRange
.
t0
+
m
*
windowRange
.
dt0
for
n
,
this_F
in
enumerate
(
F_m
):
this_tau
=
windowRange
.
tau
+
n
*
windowRange
.
dtau
;
tfp
.
write
(
' %10d %10d %- 11.8g
\n
'
%
(
this_t0
,
this_tau
,
2.0
*
this_F
))
def
__del__
(
self
):
if
hasattr
(
self
,
'search'
):
self
.
search
.
__del__
()
class
SliceGridSearch
(
GridSearch
):
class
SliceGridSearch
(
GridSearch
):
""" Slice gridded search using ComputeFstat """
""" Slice gridded search using ComputeFstat """
...
...
pyfstat/mcmc_based_searches.py
View file @
7d8e0ae6
...
@@ -82,6 +82,9 @@ class MCMCSearch(core.BaseSearchClass):
...
@@ -82,6 +82,9 @@ class MCMCSearch(core.BaseSearchClass):
('none' instead of None explicitly calls the transient-window function,
('none' instead of None explicitly calls the transient-window function,
but with the full range, for debugging)
but with the full range, for debugging)
Currently only supported for nsegs=1.
Currently only supported for nsegs=1.
tCWFstatMapVersion: str
Choose between standard 'lal' implementation,
'pycuda' for gpu, and some others for devel/debug.
Attributes
Attributes
----------
----------
...
@@ -115,7 +118,7 @@ class MCMCSearch(core.BaseSearchClass):
...
@@ -115,7 +118,7 @@ class MCMCSearch(core.BaseSearchClass):
rhohatmax
=
1000
,
binary
=
False
,
BSGL
=
False
,
rhohatmax
=
1000
,
binary
=
False
,
BSGL
=
False
,
SSBprec
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
SSBprec
=
None
,
minCoverFreq
=
None
,
maxCoverFreq
=
None
,
injectSources
=
None
,
assumeSqrtSX
=
None
,
injectSources
=
None
,
assumeSqrtSX
=
None
,
transientWindowType
=
None
):
transientWindowType
=
None
,
tCWFstatMapVersion
=
'lal'
):
if
os
.
path
.
isdir
(
outdir
)
is
False
:
if
os
.
path
.
isdir
(
outdir
)
is
False
:
os
.
mkdir
(
outdir
)
os
.
mkdir
(
outdir
)
...
@@ -161,7 +164,8 @@ class MCMCSearch(core.BaseSearchClass):
...
@@ -161,7 +164,8 @@ class MCMCSearch(core.BaseSearchClass):
transientWindowType
=
self
.
transientWindowType
,
transientWindowType
=
self
.
transientWindowType
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
binary
=
self
.
binary
,
injectSources
=
self
.
injectSources
,
binary
=
self
.
binary
,
injectSources
=
self
.
injectSources
,
assumeSqrtSX
=
self
.
assumeSqrtSX
,
SSBprec
=
self
.
SSBprec
)
assumeSqrtSX
=
self
.
assumeSqrtSX
,
SSBprec
=
self
.
SSBprec
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
)
if
self
.
minStartTime
is
None
:
if
self
.
minStartTime
is
None
:
self
.
minStartTime
=
self
.
search
.
minStartTime
self
.
minStartTime
=
self
.
search
.
minStartTime
if
self
.
maxStartTime
is
None
:
if
self
.
maxStartTime
is
None
:
...
@@ -2212,7 +2216,8 @@ class MCMCTransientSearch(MCMCSearch):
...
@@ -2212,7 +2216,8 @@ class MCMCTransientSearch(MCMCSearch):
transientWindowType
=
self
.
transientWindowType
,
transientWindowType
=
self
.
transientWindowType
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
minStartTime
=
self
.
minStartTime
,
maxStartTime
=
self
.
maxStartTime
,
BSGL
=
self
.
BSGL
,
binary
=
self
.
binary
,
BSGL
=
self
.
BSGL
,
binary
=
self
.
binary
,
injectSources
=
self
.
injectSources
)
injectSources
=
self
.
injectSources
,
tCWFstatMapVersion
=
self
.
tCWFstatMapVersion
)
if
self
.
minStartTime
is
None
:
if
self
.
minStartTime
is
None
:
self
.
minStartTime
=
self
.
search
.
minStartTime
self
.
minStartTime
=
self
.
search
.
minStartTime
if
self
.
maxStartTime
is
None
:
if
self
.
maxStartTime
is
None
:
...
...
pyfstat/pyCUDAkernels/cudaTransientFstatExpWindow.cu
0 → 100644
View file @
7d8e0ae6
__global__
void
cudaTransientFstatExpWindow
(
float
*
input
,
unsigned
int
numAtoms
,
unsigned
int
TAtom
,
unsigned
int
t0_data
,
unsigned
int
win_t0
,
unsigned
int
win_dt0
,
unsigned
int
win_tau
,
unsigned
int
win_dtau
,
unsigned
int
Fmn_rows
,
unsigned
int
Fmn_cols
,
float
*
Fmn
)
{
/* match CUDA thread indexing and high-level (t0,tau) indexing */
unsigned
int
m
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
// t0: row
unsigned
int
n
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
// tau: column
/* unraveled 1D index for 2D output array */
unsigned
int
outidx
=
Fmn_cols
*
m
+
n
;
/* hardcoded copy from lalpulsar */
unsigned
int
TRANSIENT_EXP_EFOLDING
=
3
;
if
(
(
m
<
Fmn_rows
)
&&
(
n
<
Fmn_cols
)
)
{
/* compute Fstat-atom index i_t0 in [0, numAtoms) */
unsigned
int
TAtomHalf
=
TAtom
/
2
;
// integer division
unsigned
int
t0
=
win_t0
+
m
*
win_dt0
;
/* integer round: floor(x+0.5) */
int
i_tmp
=
(
t0
-
t0_data
+
TAtomHalf
)
/
TAtom
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t0
=
(
unsigned
int
)
i_tmp
;
if
(
i_t0
>=
numAtoms
)
{
i_t0
=
numAtoms
-
1
;
}
/* translate n into an atoms end-index
* for this search interval [t0, t0+Tcoh],
* giving the index range of atoms to sum over
*/
unsigned
int
tau
=
win_tau
+
n
*
win_dtau
;
/* get end-time t1 of this transient-window search
* for given tau, what Tcoh should the exponential window cover?
* for speed reasons we want to truncate
* Tcoh = tau * TRANSIENT_EXP_EFOLDING
* with the e-folding factor chosen such that the window-value
* is practically negligible after that, where it will be set to 0
*/
// unsigned int t1 = lround( win_t0 + TRANSIENT_EXP_EFOLDING * win_tau);
unsigned
int
t1
=
t0
+
TRANSIENT_EXP_EFOLDING
*
tau
;
/* compute window end-time Fstat-atom index i_t1 in [0, numAtoms)
* using integer round: floor(x+0.5)
*/
i_tmp
=
(
t1
-
t0_data
+
TAtomHalf
)
/
TAtom
-
1
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t1
=
(
unsigned
int
)
i_tmp
;
if
(
i_t1
>=
numAtoms
)
{
i_t1
=
numAtoms
-
1
;
}
/* now we have two valid atoms-indices [i_t0, i_t1]
* spanning our Fstat-window to sum over
*/
float
Ad
=
0.0
f
;
float
Bd
=
0.0
f
;
float
Cd
=
0.0
f
;
float
Fa_re
=
0.0
f
;
float
Fa_im
=
0.0
f
;
float
Fb_re
=
0.0
f
;
float
Fb_im
=
0.0
f
;
unsigned
short
input_cols
=
7
;
// must match input matrix!
/* sum up atoms */
for
(
unsigned
int
i
=
i_t0
;
i
<=
i_t1
;
i
++
)
{
unsigned
int
t_i
=
t0_data
+
i
*
TAtom
;
float
win_i
=
0.0
;
if
(
t_i
>=
t0
&&
t_i
<=
t1
)
{
float
x
=
1.0
*
(
t_i
-
t0
)
/
tau
;
win_i
=
exp
(
-
x
);
}
float
win2_i
=
win_i
*
win_i
;
Ad
+=
input
[
i
*
input_cols
+
0
]
*
win2_i
;
// a2_alpha
Bd
+=
input
[
i
*
input_cols
+
1
]
*
win2_i
;
// b2_alpha
Cd
+=
input
[
i
*
input_cols
+
2
]
*
win2_i
;
// ab_alpha
Fa_re
+=
input
[
i
*
input_cols
+
3
]
*
win_i
;
// Fa_alpha_re
Fa_im
+=
input
[
i
*
input_cols
+
4
]
*
win_i
;
// Fa_alpha_im
Fb_re
+=
input
[
i
*
input_cols
+
5
]
*
win_i
;
// Fb_alpha_re
Fb_im
+=
input
[
i
*
input_cols
+
6
]
*
win_i
;
// Fb_alpha_im
}
/* get determinant */
float
Dd
=
(
Ad
*
Bd
-
Cd
*
Cd
);
float
DdInv
=
0.0
f
;
/* safety catch as in XLALWeightMultiAMCoeffs():
* make it so that in the end F=0 instead of -nan
*/
if
(
Dd
>
0.0
)
{
DdInv
=
1.0
/
Dd
;
}
/* from XLALComputeFstatFromFaFb */
float
F
=
DdInv
*
(
Bd
*
(
Fa_re
*
Fa_re
+
Fa_im
*
Fa_im
)
+
Ad
*
(
Fb_re
*
Fb_re
+
Fb_im
*
Fb_im
)
-
2.0
*
Cd
*
(
Fa_re
*
Fb_re
+
Fa_im
*
Fb_im
)
);
/* store result in Fstat-matrix
* at unraveled index of element {m,n}
*/
Fmn
[
outidx
]
=
F
;
}
// ( (m < Fmn_rows) && (n < Fmn_cols) )
}
// cudaTransientFstatExpWindow()
pyfstat/pyCUDAkernels/cudaTransientFstatRectWindow.cu
0 → 100644
View file @
7d8e0ae6
__global__
void
cudaTransientFstatRectWindow
(
float
*
input
,
unsigned
int
numAtoms
,
unsigned
int
TAtom
,
unsigned
int
t0_data
,
unsigned
int
win_t0
,
unsigned
int
win_dt0
,
unsigned
int
win_tau
,
unsigned
int
win_dtau
,
unsigned
int
N_tauRange
,
float
*
Fmn
)
{
/* match CUDA thread indexing and high-level (t0,tau) indexing */
// assume 1D block, grid setup
unsigned
int
m
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
// t0: row
unsigned
short
input_cols
=
7
;
// must match input matrix!
/* compute Fstat-atom index i_t0 in [0, numAtoms) */
unsigned
int
TAtomHalf
=
TAtom
/
2
;
// integer division
unsigned
int
t0
=
win_t0
+
m
*
win_dt0
;
/* integer round: floor(x+0.5) */
int
i_tmp
=
(
t0
-
t0_data
+
TAtomHalf
)
/
TAtom
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t0
=
(
unsigned
int
)
i_tmp
;
if
(
i_t0
>=
numAtoms
)
{
i_t0
=
numAtoms
-
1
;
}
float
Ad
=
0.0
f
;
float
Bd
=
0.0
f
;
float
Cd
=
0.0
f
;
float
Fa_re
=
0.0
f
;
float
Fa_im
=
0.0
f
;
float
Fb_re
=
0.0
f
;
float
Fb_im
=
0.0
f
;
unsigned
int
i_t1_last
=
i_t0
;
/* INNER loop over timescale-parameter tau
* NOT parallelized so that we can still use the i_t1_last trick
* (empirically seems to be faster than 2D CUDA version)
*/
for
(
unsigned
int
n
=
0
;
n
<
N_tauRange
;
n
++
)
{
if
(
(
m
<
N_tauRange
)
&&
(
n
<
N_tauRange
)
)
{
/* translate n into an atoms end-index
* for this search interval [t0, t0+Tcoh],
* giving the index range of atoms to sum over
*/
unsigned
int
tau
=
win_tau
+
n
*
win_dtau
;
/* get end-time t1 of this transient-window search */
unsigned
int
t1
=
t0
+
tau
;
/* compute window end-time Fstat-atom index i_t1 in [0, numAtoms)
* using integer round: floor(x+0.5)
*/
i_tmp
=
(
t1
-
t0_data
+
TAtomHalf
)
/
TAtom
-
1
;
if
(
i_tmp
<
0
)
{
i_tmp
=
0
;
}
unsigned
int
i_t1
=
(
unsigned
int
)
i_tmp
;
if
(
i_t1
>=
numAtoms
)
{
i_t1
=
numAtoms
-
1
;
}
/* now we have two valid atoms-indices [i_t0, i_t1]
* spanning our Fstat-window to sum over
*/
for
(
unsigned
int
i
=
i_t1_last
;
i
<=
i_t1
;
i
++
)
{
/* sum up atoms,
* special optimiziation in the rectangular-window case:
* just add on to previous tau values,
* ie re-use the sum over [i_t0, i_t1_last]
from the pevious tau-loop iteration
*/
Ad
+=
input
[
i
*
input_cols
+
0
];
// a2_alpha
Bd
+=
input
[
i
*
input_cols
+
1
];
// b2_alpha
Cd
+=
input
[
i
*
input_cols
+
2
];
// ab_alpha
Fa_re
+=
input
[
i
*
input_cols
+
3
];
// Fa_alpha_re
Fa_im
+=
input
[
i
*
input_cols
+
4
];
// Fa_alpha_im
Fb_re
+=
input
[
i
*
input_cols
+
5
];
// Fb_alpha_re
Fb_im
+=
input
[
i
*
input_cols
+
6
];
// Fb_alpha_im
/* keep track of up to where we summed for the next iteration */
i_t1_last
=
i_t1
+
1
;
}
/* get determinant */
float
Dd
=
(
Ad
*
Bd
-
Cd
*
Cd
);
float
DdInv
=
0.0
f
;
/* safety catch as in XLALWeightMultiAMCoeffs():
* make it so that in the end F=0 instead of -nan
*/
if
(
Dd
>
0.0
)
{
DdInv
=
1.0
/
Dd
;
}
/* from XLALComputeFstatFromFaFb */
float
F
=
DdInv
*
(
Bd
*
(
Fa_re
*
Fa_re
+
Fa_im
*
Fa_im
)
+
Ad
*
(
Fb_re
*
Fb_re
+
Fb_im
*
Fb_im
)
-
2.0
*
Cd
*
(
Fa_re
*
Fb_re
+
Fa_im
*
Fb_im
)
);
/* store result in Fstat-matrix
* at unraveled index of element {m,n}
*/
unsigned
int
outidx
=
m
*
N_tauRange
+
n
;
Fmn
[
outidx
]
=
F
;
}
// if ( (m < N_tauRange) && (n < N_tauRange) )
}
// for ( unsigned int n = 0; n < N_tauRange; n ++ )
}
// cudaTransientFstatRectWindow()
pyfstat/tcw_fstat_map_funcs.py
0 → 100644
View file @
7d8e0ae6
""" Additional helper functions dealing with transient-CW F(t0,tau) maps """
import
numpy
as
np
import
os
import
sys
import
logging
# optional imports
import
importlib
as
imp
def
_optional_import
(
modulename
,
shorthand
=
None
):
'''
Import a module/submodule only if it's available.
using importlib instead