samplegeneration.py 14.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
"""
Provide the :func:`generate_sample()` method, which is at the heart of
the sample generation process.
"""

# -----------------------------------------------------------------------------
# IMPORTS
# -----------------------------------------------------------------------------

from __future__ import print_function

import numpy as np

from lal import LIGOTimeGPS
15
from pycbc.psd import interpolate, inverse_spectrum_truncation
16
17
18
19
20
21
22
23
24
25
26
27
from pycbc.psd.analytical import aLIGOZeroDetHighPower
from pycbc.noise import noise_from_psd
from pycbc.filter import sigma

from .hdffiles import get_strain_from_hdf_file
from .waveforms import get_detector_signals, get_waveform


# -----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
# -----------------------------------------------------------------------------

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def signal_whiten(psd, signal, segment_duration, max_filter_duration, 
                  trunc_method='hann', low_frequency_cutoff=None):
    # Estimate the noise spectrum, no need for segment_duration, because we already get in line 193.
    psd = interpolate(psd, signal.delta_f)
    max_filter_len = int(max_filter_duration * signal.sample_rate)

    # Interpolate and smooth to the desired corruption length
    psd = inverse_spectrum_truncation(psd,
               max_filter_len=max_filter_len,
               low_frequency_cutoff=low_frequency_cutoff,
               trunc_method=trunc_method)

    # Whiten the data by the asd
    white = (signal.to_frequencyseries() / psd**0.5).to_timeseries()
    return white

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def generate_sample(static_arguments,
                    event_tuple,
                    waveform_params=None):
    """
    Generate a single sample (or example) by taking a piece of LIGO
    background noise (real or synthetic, depending on `event_tuple`),
    optionally injecting a simulated waveform (depending on
    `waveform_params`) and post-processing the result (whitening,
    band-passing).
    
    Args:
        static_arguments (dict): A dictionary containing global
            technical parameters for the sample generation, for example
            the target_sampling_rate of the output.
        event_tuple (tuple): A tuple `(event_time, file_path)`, which
            specifies the GPS time at which to make an injection and
            the path of the HDF file which contains said GPS time.
            If `file_path` is `None`, synthetic noise will be used
            instead and the `event_time` only serves as a seed for
            the corresponding (random) noise generator.
        waveform_params (dict): A dictionary containing the randomly
            sampled parameters that are passed as inputs to the
            waveform model (e.g., the masses, spins, position, ...).

    Returns:
        A tuple `(sample, injection_parameters)`, which contains the
        generated `sample` itself (a dict with keys `{'event_time',
        'h1_strain', 'l1_strain'}`), and the `injection_parameters`,
        which are either `None` (in case no injection was made), or a
        dict containing the `waveform_params` and some additional
        parameters (e.g., single detector SNRs).
    """

    # -------------------------------------------------------------------------
    # Define shortcuts for some elements of self.static_arguments
    # -------------------------------------------------------------------------

    # Read out frequency-related arguments
    original_sampling_rate = static_arguments['original_sampling_rate']
    target_sampling_rate = static_arguments['target_sampling_rate']
    f_lower = static_arguments['f_lower']
    delta_f = static_arguments['delta_f']
    fd_length = static_arguments['fd_length']

    # Get the width of the noise sample that we either select from the raw
    # HDF files, or generate synthetically
    noise_interval_width = static_arguments['noise_interval_width']

    # Get how many seconds before and after the event time to use
    seconds_before_event = static_arguments['seconds_before_event']
    seconds_after_event = static_arguments['seconds_after_event']

    # Get the event time and the dict containing the HDF file path
    event_time, hdf_file_paths = event_tuple

    # -------------------------------------------------------------------------
    # Get the background noise (either from data or synthetically)
    # -------------------------------------------------------------------------

    # If the event_time is None, we generate synthetic noise
    if hdf_file_paths is None:

        # Create an artificial PSD for the noise
        # TODO: Is this the best choice for this task?
        psd = aLIGOZeroDetHighPower(length=fd_length,
                                    delta_f=delta_f,
                                    low_freq_cutoff=f_lower)

        # Actually generate the noise using the PSD and LALSimulation
        noise = dict()
        for i, det in enumerate(('H1', 'L1')):

            # Compute the length of the noise sample in time steps
            noise_length = noise_interval_width * target_sampling_rate

            # Generate the noise for this detector
            noise[det] = noise_from_psd(length=noise_length,
                                        delta_t=(1.0 / target_sampling_rate),
                                        psd=psd,
                                        seed=(2 * event_time + i))

            # Manually fix the noise start time to match the fake event time.
            # However, for some reason, the correct setter method seems broken?
            start_time = event_time - noise_interval_width / 2
            # noinspection PyProtectedMember
            noise[det]._epoch = LIGOTimeGPS(start_time)

    # Otherwise we select the noise from the corresponding HDF file
    else:

        kwargs = dict(hdf_file_paths=hdf_file_paths,
                      gps_time=event_time,
                      interval_width=noise_interval_width,
                      original_sampling_rate=original_sampling_rate,
                      target_sampling_rate=target_sampling_rate,
                      as_pycbc_timeseries=True)
        noise = get_strain_from_hdf_file(**kwargs)

    # -------------------------------------------------------------------------
    # If applicable, make an injection
    # -------------------------------------------------------------------------

    # If no waveform parameters are given, we are not making an injection.
    # In this case, there are no detector signals and no injection
    # parameters, and the strain is simply equal to the noise
    if waveform_params is None:
        detector_signals = None
        injection_parameters = None
152
        output_signals = None
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
        strain = noise

    # Otherwise, we need to simulate a waveform for the given waveform_params
    # and add it into the noise to create the strain
    else:

        # ---------------------------------------------------------------------
        # Simulate the waveform with the given injection parameters
        # ---------------------------------------------------------------------

        # Actually simulate the waveform with these parameters
        waveform = get_waveform(static_arguments=static_arguments,
                                waveform_params=waveform_params)

        # Get the detector signals by projecting on the antenna patterns
        detector_signals = \
            get_detector_signals(static_arguments=static_arguments,
                                 waveform_params=waveform_params,
                                 event_time=event_time,
                                 waveform=waveform)

174
175
176
177
        # Store the output_signal
        output_signals = {}
        output_signals = detector_signals.copy()

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
        # ---------------------------------------------------------------------
        # Add the waveform into the noise as is to calculate the NOMF-SNR
        # ---------------------------------------------------------------------

        # Store the dummy strain, the PSDs and the SNRs for the two detectors
        strain_ = {}
        psds = {}
        snrs = {}

        # Calculate these quantities for both detectors
        for det in ('H1', 'L1'):

            # Add the simulated waveform into the noise to get the dummy strain
            strain_[det] = noise[det].add_into(detector_signals[det])

            # Estimate the Power Spectral Density from the dummy strain
            psds[det] = strain_[det].psd(4)
            psds[det] = interpolate(psds[det], delta_f=delta_f)

            # Use the PSD estimate to calculate the optimal matched
            # filtering SNR for this injection and this detector
            snrs[det] = sigma(htilde=detector_signals[det],
                              psd=psds[det],
                              low_frequency_cutoff=f_lower)

        # Calculate the network optimal matched filtering SNR for this
        # injection (which we need for scaling to the chosen injection SNR)
        nomf_snr = np.sqrt(snrs['H1']**2 + snrs['L1']**2)

        # ---------------------------------------------------------------------
        # Add the waveform into the noise with the chosen injection SNR
        # ---------------------------------------------------------------------

        # Compute the rescaling factor
        injection_snr = waveform_params['injection_snr']
        scale_factor = 1.0 * injection_snr / nomf_snr

        strain = {}
        for det in ('H1', 'L1'):

            # Add the simulated waveform into the noise, using a scaling
            # factor to ensure that the resulting NOMF-SNR equals the chosen
            # injection SNR
            strain[det] = noise[det].add_into(scale_factor *
                                              detector_signals[det])
223
            output_signals[det] = scale_factor * output_signals[det]
224
225
226
227
228
229
230
231
232
233
        # ---------------------------------------------------------------------
        # Store some information about the injection we just made
        # ---------------------------------------------------------------------

        # Store the information we have computed ourselves
        injection_parameters = {'scale_factor': scale_factor,
                                'h1_snr': snrs['H1'],
                                'l1_snr': snrs['L1']}

        # Also add the waveform parameters we have sampled
234
        for key, value in iter(waveform_params.items()):
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
            injection_parameters[key] = value

    # -------------------------------------------------------------------------
    # Whiten and bandpass the strain (also for noise-only samples)
    # -------------------------------------------------------------------------

    for det in ('H1', 'L1'):

        # Get the whitening parameters
        segment_duration = static_arguments['whitening_segment_duration']
        max_filter_duration = static_arguments['whitening_max_filter_duration']

        # Whiten the strain (using the built-in whitening of PyCBC)
        # We don't need to remove the corrupted samples here, because we
        # crop the strain later on
        strain[det] = \
            strain[det].whiten(segment_duration=segment_duration,
                               max_filter_duration=max_filter_duration,
                               remove_corrupted=False)

255
256
257
258
259
260
261
        if waveform_params is not None:
            output_signals[det] = \
                signal_whiten(psd = psds[det], 
                              signal = output_signals[det], 
                              segment_duration = segment_duration, 
                              max_filter_duration = max_filter_duration)
        
262
263
264
265
266
267
268
269
270
271
        # Get the limits for the bandpass
        bandpass_lower = static_arguments['bandpass_lower']
        bandpass_upper = static_arguments['bandpass_upper']

        # Apply a high-pass to remove everything below `bandpass_lower`;
        # If bandpass_lower = 0, do not apply any high-pass filter.
        if bandpass_lower != 0:
            strain[det] = strain[det].highpass_fir(frequency=bandpass_lower,
                                                   remove_corrupted=False,
                                                   order=512)
272
273
274
275
            if waveform_params is not None:
                output_signals[det] = output_signals[det].highpass_fir(frequency=bandpass_lower,
                                                       remove_corrupted=False,
                                                       order=512)
276
277
278
279
280
281
        # Apply a low-pass filter to remove everything above `bandpass_upper`.
        # If bandpass_upper = sampling rate, do not apply any low-pass filter.
        if bandpass_upper != target_sampling_rate:
            strain[det] = strain[det].lowpass_fir(frequency=bandpass_upper,
                                                  remove_corrupted=False,
                                                  order=512)
282
283
284
285
            if waveform_params is not None:
                output_signals[det] = output_signals[det].lowpass_fir(frequency=bandpass_upper,
                                                      remove_corrupted=False,
                                                      order=512)
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
    # -------------------------------------------------------------------------
    # Cut strain (and signal) time series to the pre-specified length
    # -------------------------------------------------------------------------

    for det in ('H1', 'L1'):

        # Define some shortcuts for slicing
        a = event_time - seconds_before_event
        b = event_time + seconds_after_event

        # Cut the strain to the desired length
        strain[det] = strain[det].time_slice(a, b)

        # If we've made an injection, also cut the simulated signal
        if waveform_params is not None:

            # Cut the detector signals to the specified length
            detector_signals[det] = detector_signals[det].time_slice(a, b)
304
            output_signals[det] = output_signals[det].time_slice(a, b)
305
306
307
308
309
310
311

            # Also add the detector signals to the injection parameters
            injection_parameters['h1_signal'] = \
                np.array(detector_signals['H1'])
            injection_parameters['l1_signal'] = \
                np.array(detector_signals['L1'])

312
313
314
315
316
            injection_parameters['h1_output_signal'] = \
                np.array(output_signals['H1'])
            injection_parameters['l1_output_signal'] = \
                np.array(output_signals['L1'])

317
318
319
320
321
322
323
324
325
326
327
328
    # -------------------------------------------------------------------------
    # Collect all available information about this sample and return results
    # -------------------------------------------------------------------------

    # The whitened strain is numerically on the order of O(1), so we can save
    # it as a 32-bit float (unlike the original signal, which is down to
    # O(10^-{30}) and thus requires 64-bit floats).
    sample = {'event_time': event_time,
              'h1_strain': np.array(strain['H1']).astype(np.float32),
              'l1_strain': np.array(strain['L1']).astype(np.float32)}

    return sample, injection_parameters