diff --git a/python3_samples/.DS_Store b/python3_samples/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..749e222af6d2280dd99e24f4c7942a3c7586b65e Binary files /dev/null and b/python3_samples/.DS_Store differ diff --git a/python3_samples/config_files/.DS_Store b/python3_samples/config_files/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2db5130bf4e2d5f5c211a5667f8578af23fa86c3 Binary files /dev/null and b/python3_samples/config_files/.DS_Store differ diff --git a/python3_samples/config_files/default.json b/python3_samples/config_files/default.json new file mode 100755 index 0000000000000000000000000000000000000000..0b83109509f11e58d60e0010ae568869c4a79904 --- /dev/null +++ b/python3_samples/config_files/default.json @@ -0,0 +1,12 @@ +{ + "random_seed": 42, + "background_data_directory": null, + "dq_bits": [0, 1, 2, 3], + "inj_bits": [0, 1, 2, 4], + "waveform_params_file_name": "waveform_params.ini", + "max_runtime": 60, + "n_injection_samples": 32, + "n_noise_samples": 16, + "n_processes": 4, + "output_file_name": "default.hdf" +} diff --git a/python3_samples/config_files/waveform_params.ini b/python3_samples/config_files/waveform_params.ini new file mode 100755 index 0000000000000000000000000000000000000000..c6a3477608477cce042a8af37110bc5fbcd8e477 --- /dev/null +++ b/python3_samples/config_files/waveform_params.ini @@ -0,0 +1,116 @@ +; ----------------------------------------------------------------------------- +; DECLARE ARGUMENTS +; ----------------------------------------------------------------------------- + +[variable_args] +; Waveform parameters that will vary in MCMC +mass1 = +mass2 = +spin1z = +spin2z = +ra = +dec = +coa_phase = +inclination = +polarization = +injection_snr = + + +[static_args] +; Waveform parameters that will not change in MCMC +approximant = SEOBNRv4 +domain = time +f_lower = 18 +distance = 100 +waveform_length = 128 + +; Width of the background noise interval (in seconds) around the event_time, +; which is used to make the injection. Should be larger than (see below): +; sample_length = seconds_before_event + seconds_after_event +; because we need to crop off the edges that are corrupted by the whitening. +noise_interval_width = 16 + +; original_sampling_rate = Sampling rate of raw HDF files (usually 4096 Hz) +; target_sampling_rate = Desired sampling rate for sample generation output +original_sampling_rate = 4096 +target_sampling_rate = 2048 + +; Define parameters for the whitening procedure. See documentation of the +; pycbc.types.TimeSeries.whiten() method for an explanation of what these +; values exactly mean. +whitening_segment_duration = 4 +whitening_max_filter_duration = 4 + +; Define the lower and upper bound for the bandpass filter (in Hertz) +bandpass_lower = 20 +bandpass_upper = 2048 + +; Define how to align the sample around the event time. By convention, the +; event time is the H1 time! +; The sum of these values will be the the sample_length! +seconds_before_event = 5.5 +seconds_after_event = 2.5 + +; alpha for the Tukey window that is used to "fade on" the waveforms +; It represents the fraction of the window inside the cosine tapered region. +; To turn off the "fade on", simply choose tukey_alpha = 0. +tukey_alpha = 0.25 + + +; ----------------------------------------------------------------------------- +; DEFINE DISTRIBUTIONS FOR PARAMETERS +; ----------------------------------------------------------------------------- + +[prior-mass1] +; Prior for mass1 +name = uniform +min-mass1 = 10. +max-mass1 = 80. + + +[prior-mass2] +; Prior for mass2 +name = uniform +min-mass2 = 10. +max-mass2 = 80. + + +[prior-spin1z] +; Prior for spin1z +name = uniform +min-spin1z = 0 +max-spin1z = 0.998 + + +[prior-spin2z] +; Prior for spin2z +name = uniform +min-spin2z = 0 +max-spin2z = 0.998 + + +[prior-injection_snr] +; Prior for the injection SNR +name = uniform +min-injection_snr = 5 +max-injection_snr = 20 + + +[prior-coa_phase] +; Coalescence phase prior +name = uniform_angle + + +[prior-inclination] +; Inclination prior +name = sin_angle + + +[prior-ra+dec] +; Sky position prior +name = uniform_sky + + +[prior-polarization] +; Polarization prior +name = uniform_angle diff --git a/python3_samples/generate_sample.py b/python3_samples/generate_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..d98ff98da6618cdba3252d5470b73c1ae13e121e --- /dev/null +++ b/python3_samples/generate_sample.py @@ -0,0 +1,306 @@ +""" +The "main script" of this repository: Read in a configuration file and +generate synthetic GW data according to the provided specifications. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +from __future__ import print_function + +import argparse +import numpy as np +import os +import sys +import time + +from itertools import count +from multiprocessing import Process, Queue +from tqdm import tqdm + +from utils.configfiles import read_ini_config, read_json_config +from utils.hdffiles import NoiseTimeline +from utils.samplefiles import SampleFile +from utils.samplegeneration import generate_sample +from utils.waveforms import WaveformParameterGenerator + +# ----------------------------------------------------------------------------- +# MAIN CODE +# ----------------------------------------------------------------------------- + +if __name__ == '__main__': + + # ------------------------------------------------------------------------- + # Preliminaries + # ------------------------------------------------------------------------- + + # Disable output buffering ('flush' option is not available for Python 2) + #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) + + # Start the stopwatch + script_start = time.time() + + print('') + print('GENERATE A GW DATA SAMPLE FILE') + print('') + + # ------------------------------------------------------------------------- + # Parse the command line arguments + # ------------------------------------------------------------------------- + + # Set up the parser and add arguments + parser = argparse.ArgumentParser(description='Generate a GW data sample.') + parser.add_argument('--config-file', + help='Name of the JSON configuration file which ' + 'controls the sample generation process.', + default='default.json') + + # Parse the arguments that were passed when calling this script + print('Parsing command line arguments...', end=' ') + command_line_arguments = vars(parser.parse_args()) + print('Done!') + + # ------------------------------------------------------------------------- + # Read in JSON config file specifying the sample generation process + # ------------------------------------------------------------------------- + + # Build the full path to the config file + json_config_name = command_line_arguments['config_file'] + json_config_path = os.path.join('.', 'config_files', json_config_name) + + # Read the JSON configuration into a dict + print('Reading and validating in JSON configuration file...', end=' ') + config = read_json_config(json_config_path) + print('Done!') + + # ------------------------------------------------------------------------- + # Read in INI config file specifying the static_args and variable_args + # ------------------------------------------------------------------------- + + # Build the full path to the waveform params file + ini_config_name = config['waveform_params_file_name'] + ini_config_path = os.path.join('.', 'config_files', ini_config_name) + + # Read in the variable_arguments and static_arguments + print('Reading and validating in INI configuration file...', end=' ') + variable_arguments, static_arguments = read_ini_config(ini_config_path) + print('Done!\n') + + # ------------------------------------------------------------------------- + # Shortcuts and random seed + # ------------------------------------------------------------------------- + + # Set the random seed for this script + np.random.seed(config['random_seed']) + + # Define some useful shortcuts + random_seed = config['random_seed'] + max_runtime = config['max_runtime'] + bkg_data_dir = config['background_data_directory'] + + # ------------------------------------------------------------------------- + # Construct a generator for sampling waveform parameters + # ------------------------------------------------------------------------- + + # Initialize a waveform parameter generator that can sample injection + # parameters from the distributions specified in the config file + waveform_parameter_generator = \ + WaveformParameterGenerator(config_file=ini_config_path, + random_seed=random_seed) + + # Wrap it in a generator expression so that we can we can easily sample + # from it by calling next(waveform_parameters) + waveform_parameters = \ + (waveform_parameter_generator.draw() for _ in iter(int, 1)) + + # ------------------------------------------------------------------------- + # Construct a generator for sampling valid noise times + # ------------------------------------------------------------------------- + + # If the 'background_data_directory' is None, we will use synthetic noise + if config['background_data_directory'] is None: + + print('Using synthetic noise! (background_data_directory = None)\n') + + # Create a iterator that returns a fake "event time", which we will + # use as a seed for the RNG to ensure the reproducibility of the + # generated synthetic noise. + # For the HDF file path that contains that time, we always yield + # None, so that we know that we need to generate synthetic noise. + noise_times = ((1000000000 + _, None) for _ in count()) + + # Otherwise, we set up a timeline object for the background noise, that + # is, we read in all HDF files in the raw_data_directory and figure out + # which parts of it are useable (i.e., have the right data quality and + # injection bits set as specified in the config file). + else: + + print('Using real noise from LIGO recordings! ' + '(background_data_directory = {})'.format(bkg_data_dir)) + print('Reading in raw data. This may take several minutes...', end=' ') + + # Create a timeline object by running over all HDF files once + noise_timeline = NoiseTimeline(background_data_directory=bkg_data_dir, + random_seed=random_seed) + + # Create a noise time generator so that can sample valid noise times + # simply by calling next(noise_time_generator) + delta_t = int(static_arguments['noise_interval_width'] / 2) + noise_times = (noise_timeline.sample(delta_t=delta_t, + dq_bits=config['dq_bits'], + inj_bits=config['inj_bits'], + return_paths=True) + for _ in iter(int, 1)) + + print('Done!\n') + + # ------------------------------------------------------------------------- + # Define a convenience function to generate arguments for the simulation + # ------------------------------------------------------------------------- + + def generate_arguments(injection=True): + + # Only sample waveform parameters if we are making an injection + waveform_params = next(waveform_parameters) if injection else None + + # Return all necessary arguments as a dictionary + return dict(static_arguments=static_arguments, + event_tuple=next(noise_times), + waveform_params=waveform_params) + + # ------------------------------------------------------------------------- + # Finally: Create our samples! + # ------------------------------------------------------------------------- + + # Keep track of all the samples (and parameters) we have generated + samples = dict(injection_samples=[], noise_samples=[]) + injection_parameters = dict(injection_samples=[], noise_samples=[]) + + + print('Generating samples containing an injection...') + n_samples = config['n_injection_samples'] + arguments_generator = \ + (generate_arguments(injection=True) for _ in iter(int, 1)) + print('Number of samples:',n_samples) + + sample_type = 'injection_samples' + for i in range(n_samples): + print(i) + + results_list = [] + arguments = next(arguments_generator) + print(arguments) + result = generate_sample(**arguments) + results_list.append(result) + + # --------------------------------------------------------------------- + # Process results in the results_list + # --------------------------------------------------------------------- + + # Separate the samples and the injection parameters + samples[sample_type], injection_parameters[sample_type] = \ + zip(*results_list) + + # Sort all results by the event_time + idx = np.argsort([_['event_time'] for _ in list(samples[sample_type])]) + samples[sample_type] = \ + list([samples[sample_type][i] for i in idx]) + injection_parameters[sample_type] = \ + list([injection_parameters[sample_type][i] for i in idx]) + + print('Sample generation completed!\n') + + # ------------------------------------------------------------------------- + # Compute the normalization parameters for this file + # ------------------------------------------------------------------------- + + print('Computing normalization parameters for sample...', end=' ') + + # Gather all samples (with and without injection) in one list + all_samples = list(samples['injection_samples'] + samples['noise_samples']) + + # Group all samples by detector + h1_samples = [_['h1_strain'] for _ in all_samples] + l1_samples = [_['l1_strain'] for _ in all_samples] + + # Stack recordings along first axis + h1_samples = np.vstack(h1_samples) + l1_samples = np.vstack(l1_samples) + + # Compute the mean and standard deviation for both detectors as the median + # of the means / standard deviations for each sample. This is more robust + # towards outliers than computing "global" parameters by concatenating all + # samples and treating them as a single, long time series. + normalization_parameters = \ + dict(h1_mean=np.median(np.mean(h1_samples, axis=1), axis=0), + l1_mean=np.median(np.mean(l1_samples, axis=1), axis=0), + h1_std=np.median(np.std(h1_samples, axis=1), axis=0), + l1_std=np.median(np.std(l1_samples, axis=1), axis=0)) + + print('Done!\n') + + # ------------------------------------------------------------------------- + # Create a SampleFile dict from list of samples and save it as an HDF file + # ------------------------------------------------------------------------- + + print('Saving the results to HDF file ...', end=' ') + + # Initialize the dictionary that we use to create a SampleFile object + sample_file_dict = dict(command_line_arguments=command_line_arguments, + injection_parameters=dict(), + injection_samples=dict(), + noise_samples=dict(), + normalization_parameters=normalization_parameters, + static_arguments=static_arguments) + + # Collect and add samples (with and without injection) + for sample_type in ('injection_samples', 'noise_samples'): + for key in ('event_time', 'h1_strain', 'l1_strain'): + if samples[sample_type]: + value = np.array([_[key] for _ in list(samples[sample_type])]) + else: + value = None + sample_file_dict[sample_type][key] = value + + # Collect and add injection_parameters (ignore noise samples here, because + # for those, the injection_parameters are always None) + other_keys = ['h1_signal', 'h1_output_signal','h1_snr', 'l1_signal','l1_output_signal', 'l1_snr', 'scale_factor'] + for key in list(variable_arguments + other_keys): + if injection_parameters['injection_samples']: + value = np.array([_[key] for _ in + injection_parameters['injection_samples']]) + else: + value = None + sample_file_dict['injection_parameters'][key] = value + + # Construct the path for the output HDF file + output_dir = os.path.join('.', 'output') + if not os.path.exists(output_dir): + os.mkdir(output_dir) + sample_file_path = os.path.join(output_dir, config['output_file_name']) + + # Create the SampleFile object and save it to the specified output file + sample_file = SampleFile(data=sample_file_dict) + sample_file.to_hdf(file_path=sample_file_path) + + print('Done!') + + # Get file size in MB and print the result + sample_file_size = os.path.getsize(sample_file_path) / 1024**2 + print('Size of resulting HDF file: {:.2f}MB'.format(sample_file_size)) + print('') + + # ------------------------------------------------------------------------- + # Postliminaries + # ------------------------------------------------------------------------- + + # PyCBC always create a copy of the waveform parameters file, which we + # can delete at the end of the sample generation process + duplicate_path = os.path.join('.', config['waveform_params_file_name']) + if os.path.exists(duplicate_path): + os.remove(duplicate_path) + + # Print the total run time + print('Total runtime: {:.1f} seconds!'.format(time.time() - script_start)) + print('') diff --git a/python3_samples/output/.DS_Store b/python3_samples/output/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..88760d6b4f82f62024c05bb0a0f4b8d34d378264 Binary files /dev/null and b/python3_samples/output/.DS_Store differ diff --git a/python3_samples/utils/__init__.py b/python3_samples/utils/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python3_samples/utils/__pycache__/__init__.cpython-39.pyc b/python3_samples/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26517d9b974eea945630a2a15ba2903dfaf7f135 Binary files /dev/null and b/python3_samples/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/configfiles.cpython-39.pyc b/python3_samples/utils/__pycache__/configfiles.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a56010086881851a061239a47d3d40083bdb64b Binary files /dev/null and b/python3_samples/utils/__pycache__/configfiles.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/hdffiles.cpython-39.pyc b/python3_samples/utils/__pycache__/hdffiles.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..297bd1cb160ef013ac80326a409ba565f6071ef5 Binary files /dev/null and b/python3_samples/utils/__pycache__/hdffiles.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/samplefiles.cpython-39.pyc b/python3_samples/utils/__pycache__/samplefiles.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a7afc6bbaf3df9c8b931984aef4200fcba35ad8 Binary files /dev/null and b/python3_samples/utils/__pycache__/samplefiles.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/samplegeneration.cpython-39.pyc b/python3_samples/utils/__pycache__/samplegeneration.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2b966df5f1085f2f8503556c1df4fddaccf1c89 Binary files /dev/null and b/python3_samples/utils/__pycache__/samplegeneration.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/staticargs.cpython-39.pyc b/python3_samples/utils/__pycache__/staticargs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f7b313531af13020cade7da87a7899e4553a5a7 Binary files /dev/null and b/python3_samples/utils/__pycache__/staticargs.cpython-39.pyc differ diff --git a/python3_samples/utils/__pycache__/waveforms.cpython-39.pyc b/python3_samples/utils/__pycache__/waveforms.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e660db95e1a7a22bd1f8dd4d6e39b9cd2ceaed57 Binary files /dev/null and b/python3_samples/utils/__pycache__/waveforms.cpython-39.pyc differ diff --git a/python3_samples/utils/configfiles.py b/python3_samples/utils/configfiles.py new file mode 100755 index 0000000000000000000000000000000000000000..cf99e116e85d943f94c0aceebf95c30a110ad8a9 --- /dev/null +++ b/python3_samples/utils/configfiles.py @@ -0,0 +1,98 @@ +""" +Provide functions for reading and parsing configuration files. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +import json +import os + +from pycbc.workflow import WorkflowConfigParser +from pycbc.distributions import read_params_from_config + +from .staticargs import amend_static_args, typecast_static_args + + +# ----------------------------------------------------------------------------- +# FUNCTION DEFINITIONS +# ----------------------------------------------------------------------------- + +def read_ini_config(file_path): + """ + Read in a `*.ini` config file, which is used mostly to specify the + waveform simulation (for example, the waveform model, the parameter + space for the binary black holes, etc.) and return its contents. + + Args: + file_path (str): Path to the `*.ini` config file to be read in. + + Returns: + A tuple `(variable_arguments, static_arguments)` where + + * `variable_arguments` should simply be a list of all the + parameters which get randomly sampled from the specified + distributions, usually using an instance of + :class:`utils.waveforms.WaveformParameterGenerator`. + * `static_arguments` should be a dictionary containing the keys + and values of the parameters that are the same for each + example that is generated (i.e., the non-physical parameters + such as the waveform model and the sampling rate). + """ + + # Make sure the config file actually exists + if not os.path.exists(file_path): + raise IOError('Specified configuration file does not exist: ' + '{}'.format(file_path)) + + # Set up a parser for the PyCBC config file + workflow_config_parser = WorkflowConfigParser(configFiles=[file_path]) + + # Read the variable_arguments and static_arguments using the parser + variable_arguments, static_arguments = \ + read_params_from_config(workflow_config_parser) + + # Typecast and amend the static arguments + static_arguments = typecast_static_args(static_arguments) + static_arguments = amend_static_args(static_arguments) + + return variable_arguments, static_arguments + + +def read_json_config(file_path): + """ + Read in a `*.json` config file, which is used to specify the + sample generation process itself (for example, the number of + samples to generate, the number of concurrent processes to use, + etc.) and return its contents. + + Args: + file_path (str): Path to the `*.json` config file to be read in. + + Returns: + A `dict` containing the contents of the given JSON file. + """ + + # Make sure the config file actually exists + if not os.path.exists(file_path): + raise IOError('Specified configuration file does not exist: ' + '{}'.format(file_path)) + + # Open the config while and load the JSON contents as a dict + with open(file_path, 'r') as json_file: + config = json.load(json_file) + + # Define the required keys for the config file in a set + required_keys = {'background_data_directory', 'dq_bits', 'inj_bits', + 'waveform_params_file_name', 'max_runtime', + 'n_injection_samples', 'n_noise_samples', 'n_processes', + 'random_seed', 'output_file_name'} + + # Make sure no required keys are missing + missing_keys = required_keys.difference(set(config.keys())) + if missing_keys: + raise KeyError('Missing required key(s) in JSON configuration file: ' + '{}'.format(', '.join(list(missing_keys)))) + + return config diff --git a/python3_samples/utils/hdffiles.py b/python3_samples/utils/hdffiles.py new file mode 100755 index 0000000000000000000000000000000000000000..47310ef41893405049c1945adb0851f7bb0590e8 --- /dev/null +++ b/python3_samples/utils/hdffiles.py @@ -0,0 +1,602 @@ +""" +Provide classes and functions for reading and writing HDF files. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +from __future__ import print_function + +import numpy as np +import h5py +import os +import sys + +from pycbc.catalog import Catalog +from pycbc.types.timeseries import TimeSeries +from lal import LIGOTimeGPS + + +# ----------------------------------------------------------------------------- +# FUNCTION DEFINITIONS +# ----------------------------------------------------------------------------- + +def get_file_paths(directory, extensions=None): + """ + Take a directory and return the paths to all files in this + directory and its subdirectories. Optionally filter out only + files specific extensions. + + Args: + directory (str): Path to a directory. + extensions (list): List of allowed file extensions, + for example: `['hdf', 'h5']`. + + Returns: + List of paths of all files matching the above descriptions. + """ + + file_paths = [] + + # Walk over the directory and find all files + for path, dirs, files in os.walk(directory): + for f in files: + file_paths.append(os.path.join(path, f)) + + # If a list of extensions is provided, only keep the corresponding files + if extensions is not None: + file_paths = [_ for _ in file_paths if any([_.endswith(ext) for + ext in extensions])] + + return file_paths + + +# ----------------------------------------------------------------------------- + + +def get_strain_from_hdf_file(hdf_file_paths, + gps_time, + interval_width, + original_sampling_rate=4096, + target_sampling_rate=4096, + as_pycbc_timeseries=False): + """ + For a given `gps_time`, select the interval of length + `interval_width` (centered around `gps_time`) from the HDF files + specified in `hdf_file_paths`, and resample them to the given + `target_sampling_rate`. + + Args: + hdf_file_paths (dict): A dictionary with keys `{'H1', 'L1'}`, + which holds the paths to the HDF files containing the + interval around `gps_time`. + gps_time (int): A (valid) background noise time (GPS timestamp). + interval_width (int): The length of the strain sample (in + seconds) to be selected from the HDF files. + original_sampling_rate (int): The original sampling rate (in + Hertz) of the HDF files sample. Default is 4096. + target_sampling_rate (int): The sampling rate (in Hertz) to + which the strain should be down-sampled (if desired). Must + be a divisor of the `original_sampling_rate`. + as_pycbc_timeseries (bool): Whether to return the strain as a + dict of numpy arrays or as a dict of objects of type + `pycbc.types.timeseries.TimeSeries`. + + Returns: + A dictionary with keys `{'H1', 'L1'}`. For each key, the + dictionary contains a strain sample (as a numpy array) of the + given length, centered around `gps_time`, (down)-sampled to + the desired `target_sampling_rate`. + """ + + # ------------------------------------------------------------------------- + # Perform some basic sanity checks on the arguments + # ------------------------------------------------------------------------- + + assert isinstance(gps_time, int), \ + 'time is not an integer!' + assert isinstance(interval_width, int), \ + 'interval_width is not an integer' + assert isinstance(original_sampling_rate, int), \ + 'original_sampling_rate is not an integer' + assert isinstance(target_sampling_rate, int), \ + 'target_sampling_rate is not an integer' + assert original_sampling_rate % target_sampling_rate == 0, \ + 'Invalid target_sampling_rate: Not a divisor of ' \ + 'original_sampling_rate!' + + # ------------------------------------------------------------------------- + # Read out the strain from the HDF files + # ------------------------------------------------------------------------- + + # Compute the offset = half the interval width (intervals are centered + # around the given gps_time) + offset = int(interval_width / 2) + + # Compute the resampling factor + sampling_factor = int(original_sampling_rate / target_sampling_rate) + + # Store the sample we have selected from the HDF files + sample = dict() + + # Loop over both detectors + for detector in ('H1', 'L1'): + + # Extract the path to the HDF file + file_path = hdf_file_paths[detector] + + # Read in the HDF file and select the noise sample + with h5py.File(file_path, 'r') as hdf_file: + + # Get the start_time and compute array indices + start_time = int(hdf_file['meta']['GPSstart'][()]) + start_idx = \ + (gps_time - start_time - offset) * original_sampling_rate + end_idx = \ + (gps_time - start_time + offset) * original_sampling_rate + + # Select the sample from the strain + strain = np.array(hdf_file['strain']['Strain']) + sample[detector] = strain[start_idx:end_idx] + + # Down-sample the selected sample to the target_sampling_rate + sample[detector] = sample[detector][::sampling_factor] + + # ------------------------------------------------------------------------- + # Convert to PyCBC time series, if necessary + # ------------------------------------------------------------------------- + + # If we just want a plain numpy array, we can return it right away + if not as_pycbc_timeseries: + return sample + + # Otherwise we need to convert the numpy array to a time series first + else: + + # Initialize an empty dict for the time series results + timeseries = dict() + + # Convert strain of both detectors to a TimeSeries object + for detector in ('H1', 'L1'): + + timeseries[detector] = \ + TimeSeries(initial_array=sample[detector], + delta_t=1.0/target_sampling_rate, + epoch=LIGOTimeGPS(gps_time - offset)) + + return timeseries + + +# ----------------------------------------------------------------------------- +# CLASS DEFINITIONS +# ----------------------------------------------------------------------------- + +class NoiseTimeline: + """ + A ``NoiseTimeline`` object stores information about the data + quality and hardware injection flags of the files in the given + `background_data_directory`. This is information is read in only + once at the beginning of the sample generation and can then be + utilized to quickly sample "valid" noise times, that is, GPS times + where the files in `background_data_directory` provide data which + pass certain desired quality criteria. + + Args: + background_data_directory (str): Path to the directory which + contains the raw data (HDF files). These files may also be + distributed over several subdirectories. + random_seed (int): Seed for the random number generator which + is used for sampling valid noise times. + verbose (bool): Whether or not this instance should print + logging information to the command line. + """ + + def __init__(self, + background_data_directory, + random_seed=42, + verbose=False): + + # Store the directory and sampling rate of the raw HDF files + self.background_data_directory = background_data_directory + + # Print debug messages or not? + self.verbose = verbose + + # Create a new random number generator with the given seed to + # decouple this from the global numpy RNG (for reproducibility) + self.rng = np.random.RandomState(seed=random_seed) + + # Get the list of all HDF files in the specified directory + self.vprint('Getting HDF file paths...', end=' ') + self.hdf_file_paths = get_file_paths(self.background_data_directory, + extensions=['hdf', 'h5']) + self.vprint('Done!') + + # Read in the meta information and masks from HDF files + self.vprint('Reading information from HDF files', end=' ') + self.hdf_files = self._get_hdf_files() + self.vprint('Done!') + + # Build the timeline for these HDF files + self.vprint('Building timeline object...', end=' ') + self.timeline = self._build_timeline() + self.vprint('Done!') + + # ------------------------------------------------------------------------- + + def vprint(self, string, *args, **kwargs): + """ + Verbose printing: Wrapper around `print()` to only call it if + `self.verbose` is set to true. + + Args: + string (str): String to be printed if `self.verbose` + is `True`. + *args: Arguments passed to `print()`. + **kwargs: Keyword arguments passed to `print()`. + """ + + if self.verbose: + print(string, *args, **kwargs) + sys.stdout.flush() + + # ------------------------------------------------------------------------- + + def _get_hdf_files(self): + + # Keep track of all the files whose information we need to store + hdf_files = [] + + # Open every HDF file once to read in the meta information as well + # as the injection and data quality (DQ) masks + n_files = len(self.hdf_file_paths) + for i, hdf_file_path in enumerate(self.hdf_file_paths): + with h5py.File(hdf_file_path, 'r') as f: + + self.vprint('({:>4}/{:>4})...'.format(i, n_files), end=' ') + + # Select necessary information from the HDF file + start_time = f['meta']['GPSstart'][()] + detector = f['meta']['Detector'][()].decode('utf-8') + duration = f['meta']['Duration'][()] + inj_mask = np.array(f['quality']['injections']['Injmask'], + dtype=np.int32) + dq_mask = np.array(f['quality']['simple']['DQmask'], + dtype=np.int32) + + # Perform some basic sanity checks + assert detector in ['H1', 'L1'], \ + 'Invalid detector {}!'.format(detector) + assert duration == len(inj_mask) == len(dq_mask), \ + 'Length of InjMask or DQMask does not match the duration!' + + # Collect this information in a dict + hdf_files.append(dict(file_path=hdf_file_path, + start_time=start_time, + detector=detector, + duration=duration, + inj_mask=inj_mask, + dq_mask=dq_mask)) + + self.vprint('\033[15D\033[K', end='') + + # Sort the read in HDF files by start time and return them + self.vprint('({:>4}/{:>4})...'.format(n_files, n_files), end=' ') + return sorted(hdf_files, key=lambda _: _['start_time']) + + # ------------------------------------------------------------------------- + + def _build_timeline(self): + + # Get the size of the arrays that we need to initialize + n_entries = self.gps_end_time - self.gps_start_time + + # Initialize the empty timeline + timeline = dict(h1_inj_mask=np.zeros(n_entries, dtype=np.int32), + l1_inj_mask=np.zeros(n_entries, dtype=np.int32), + h1_dq_mask=np.zeros(n_entries, dtype=np.int32), + l1_dq_mask=np.zeros(n_entries, dtype=np.int32)) + + # Add information from HDF files to timeline + for hdf_file in self.hdf_files: + + # Define some shortcuts + detector = hdf_file['detector'] + dq_mask = hdf_file['dq_mask'] + inj_mask = hdf_file['inj_mask'] + + # Map start/end from GPS time to array indices + idx_start = hdf_file['start_time'] - self.gps_start_time + idx_end = idx_start + hdf_file['duration'] + + # Add the mask information to the correct detector + if detector == 'H1': + timeline['h1_inj_mask'][idx_start:idx_end] = inj_mask + timeline['h1_dq_mask'][idx_start:idx_end] = dq_mask + else: + timeline['l1_inj_mask'][idx_start:idx_end] = inj_mask + timeline['l1_dq_mask'][idx_start:idx_end] = dq_mask + + # Return the completed timeline + return timeline + + # ------------------------------------------------------------------------- + + def is_valid(self, + gps_time, + delta_t=16, + dq_bits=(0, 1, 2, 3), + inj_bits=(0, 1, 2, 4)): + """ + For a given `gps_time`, check if is a valid time to sample + noise from by checking if all data points in the interval + `[gps_time - delta_t, gps_time + delta_t]` have the specified + `dq_bits` and `inj_bits` set. + + .. seealso:: For more information about the `dq_bits` and + `inj_bits`, check out the website of the GW Open Science + Center, which explains these for the case of O1: + + https://www.gw-openscience.org/archive/dataset/O1 + + Args: + gps_time (int): The GPS time whose validity we are checking. + delta_t (int): The number of seconds around `gps_time` + which we also want to be valid (because the sample will + be an interval). + dq_bits (tuple): The Data Quality Bits which one would like + to require (see note above). + *For example:* `dq_bits=(0, 1, 2, 3)` means that the + data quality needs to pass all tests up to `CAT3`. + inj_bits (tuple): The Injection Bits which one would like + to require (see note above). + *For example:* `inj_bits=(0, 1, 2, 4)` means that only + continuous wave (CW) injections are permitted; all + recordings containing any of other type of injection + will be invalid for sampling. + + Returns: + `True` if `gps_time` is valid, otherwise `False`. + """ + + # --------------------------------------------------------------------- + # Perform some basic sanity checks + # --------------------------------------------------------------------- + + assert isinstance(gps_time, int), \ + 'Received GPS time that is not an integer!' + assert delta_t >= 0, \ + 'Received an invalid value for delta_t!' + assert set(dq_bits).issubset(set(range(7))), \ + 'Invalid Data Quality bit specification passed to is_valid()!' + assert set(inj_bits).issubset(set(range(5))), \ + 'Invalid Injection bit specification passed to is_valid()!' + + # --------------------------------------------------------------------- + # Check if given time is too close to a real event + # --------------------------------------------------------------------- + + # Get GPS times of all confirmed mergers + catalog = Catalog() + real_event_times = [catalog.mergers[_].time for _ in catalog.names] + + # Check if gps_time is too close to any of these times + if any(abs(gps_time - _) <= delta_t for _ in real_event_times): + return False + + # --------------------------------------------------------------------- + # Check if the given time is too close to the edge within its HDF file + # --------------------------------------------------------------------- + + # Loop over all HDF files to find the one that contains the given + # gps_time. Here, we do not distinguish between H1 and L1, because + # we assume that the files for the detectors are aligned on a grid. + for hdf_file in self.hdf_files: + + # Get the start and end time for the current HDF file + start_time = hdf_file['start_time'] + end_time = start_time + hdf_file['duration'] + + # Find the file that contains the given gps_time + if start_time < gps_time < end_time: + + # Check if it is far away enough from the edges: If not, it + # is not a valid time; otherwise we can still stop searching + if not start_time + delta_t < gps_time < end_time - delta_t: + return False + else: + break + + # --------------------------------------------------------------------- + # Select the environment around the specified time + # --------------------------------------------------------------------- + + # Map time to indices + idx_start = self.gps2idx(gps_time) - delta_t + idx_end = self.gps2idx(gps_time) + delta_t + + # Select the mask intervals + environment = \ + dict(h1_inj_mask=self.timeline['h1_inj_mask'][idx_start:idx_end], + l1_inj_mask=self.timeline['l1_inj_mask'][idx_start:idx_end], + h1_dq_mask=self.timeline['h1_dq_mask'][idx_start:idx_end], + l1_dq_mask=self.timeline['l1_dq_mask'][idx_start:idx_end]) + + # --------------------------------------------------------------------- + # Data Quality Check + # --------------------------------------------------------------------- + + # Compute the minimum data quality + min_dq = sum([2**i for i in dq_bits]) + + # Perform the DQ check for H1 + environment['h1_dq_mask'] = environment['h1_dq_mask'] > min_dq + if not np.all(environment['h1_dq_mask']): + return False + + # Perform the DQ check for L1 + environment['l1_dq_mask'] = environment['l1_dq_mask'] > min_dq + if not np.all(environment['l1_dq_mask']): + return False + + # --------------------------------------------------------------------- + # Injection Check + # --------------------------------------------------------------------- + + # Define an array of ones that matches the length of the environment. + # This is needed because for a given number N, we can check if the + # K-th bit is set by evaluating the expression: N & (1 << K) + ones = np.ones(2 * delta_t, dtype=np.int32) + + # For each requested injection bit, check if it is set for the whole + # environment (for both H1 and L1) + for i in inj_bits: + + # Perform the injection check for H1 + if not np.all(np.bitwise_and(environment['h1_inj_mask'], + np.left_shift(ones, i))): + return False + + # Perform the injection check for L1 + if not np.all(np.bitwise_and(environment['l1_inj_mask'], + np.left_shift(ones, i))): + return False + + # If we have not returned False yet, the time must be valid! + return True + + # ------------------------------------------------------------------------- + + def sample(self, + delta_t=16, + dq_bits=(0, 1, 2, 3), + inj_bits=(0, 1, 2, 4), + return_paths=False): + + """ + Randomly sample a time from `[gps_start_time, gps_end_time]` + which passes the :func:`NoiseTimeline.is_valid()` test. + + Args: + delta_t (int): For an explanation, see + :func:`NoiseTimeline.is_valid()`. + dq_bits (tuple): For an explanation, see + :func:`NoiseTimeline.is_valid()`. + inj_bits (tuple): For an explanation, see + :func:`NoiseTimeline.is_valid()`. + return_paths (bool): Whether or not to return the paths to + the HDF files containing the `gps_time`. + + Returns: + A valid GPS time and optionally a `dict` with the file + paths to the HDF files containing that GPS time (keys will + correspond to the different detectors). + """ + + # Keep sampling random times until we find a valid one... + while True: + + # Randomly choose a GPS time between the start and end + gps_time = self.rng.randint(self.gps_start_time + delta_t, + self.gps_end_time - delta_t) + + # If it is a valid time, return it + if self.is_valid(gps_time=gps_time, delta_t=delta_t, + dq_bits=dq_bits, inj_bits=inj_bits): + if return_paths: + return gps_time, self.get_file_paths_for_time(gps_time) + else: + return gps_time + + # ------------------------------------------------------------------------- + + def get_file_paths_for_time(self, gps_time): + """ + For a given (valid) GPS time, find the two HDF files (for the + two detectors H1 and L1) which contain the corresponding strain. + + Args: + gps_time (int): A valid GPS time stamp. + + Returns: + A dictionary with keys `{'H1', 'L1'}` containing the paths + to the HDF files, or None if no such files could be found. + """ + + # Keep track of the results, i.e., the paths to the HDF files + result = dict() + + # Loop over all HDF files to find the ones containing the given time + for hdf_file in self.hdf_files: + + # Get the start and end time for the current HDF file + start_time = hdf_file['start_time'] + end_time = start_time + hdf_file['duration'] + + # Check if the given GPS time falls into the interval of the + # current HDF file, and if so, store the file path for it + if start_time < gps_time < end_time: + result[hdf_file['detector']] = hdf_file['file_path'] + + # If both files were found, we are done! + if 'H1' in result.keys() and 'L1' in result.keys(): + return result + + # If we didn't both files, return None + return None + + # ------------------------------------------------------------------------- + + def idx2gps(self, idx): + """ + Map an index to a GPS time by correcting for the start time of + the observation run, as determined from the HDF files. + + Args: + idx (int): An index of a time series array (covering an + observation run). + + Returns: + The corresponding GPS time. + """ + + return idx + self.gps_start_time + + # ------------------------------------------------------------------------- + + def gps2idx(self, gps): + """ + Map an GPS time to an index by correcting for the start time of + the observation run, as determined from the HDF files. + + Args: + gps (int): A GPS time belonging to a point in time between + the start and end of an observation run. + + Returns: + The corresponding time series index. + """ + + return gps - self.gps_start_time + + # ------------------------------------------------------------------------- + + @property + def gps_start_time(self): + """ + The GPS start time of the observation run. + """ + + return self.hdf_files[0]['start_time'] + + # ------------------------------------------------------------------------- + + @property + def gps_end_time(self): + """ + The GPS end time of the observation run. + """ + + return self.hdf_files[-1]['start_time'] + \ + self.hdf_files[-1]['duration'] diff --git a/python3_samples/utils/progressbar.py b/python3_samples/utils/progressbar.py new file mode 100755 index 0000000000000000000000000000000000000000..e7c644ac4b4500cf7ffd05101d6b72fe1248eb90 --- /dev/null +++ b/python3_samples/utils/progressbar.py @@ -0,0 +1,230 @@ +""" +Provide a custom ProgressBar class, which provides a wrapper around +an iterable that automatically produces a progressbar when iterating +over it. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +import sys +import time +from threading import Event, Thread + + +# ----------------------------------------------------------------------------- +# CLASS DEFINITIONS +# ----------------------------------------------------------------------------- + +class RepeatedTimer: + """ + Wrapper class to repeat the given `func` every `interval` seconds + (asynchronously in the background). + Source: https://stackoverflow.com/a/33054922/4100721. + """ + + def __init__(self, interval, func, *args, **kwargs): + self.interval = interval + self.func = func + self.args = args + self.kwargs = kwargs + self.start = time.time() + self.event = Event() + self.thread = Thread(target=self._target) + self.thread.start() + + def _target(self): + while not self.event.wait(self._time): + self.func(*self.args, **self.kwargs) + + @property + def _time(self): + return self.interval - ((time.time() - self.start) % self.interval) + + def stop(self): + self.event.set() + self.thread.join() + + +# ----------------------------------------------------------------------------- + + +class ProgressBar: + """ + :class:`ProgressBar` objects are a custom way to "decorate" + a given iterable to produce a progress bar when looping over it. + This class allows to also produce some output with the progress + bar, such as information about the element of the iterable that is + currently being processed. + + Args: + iterable (iterable): The iterable to be "decorated" with + a progressbar. + bar_length (int): Length of the bar itself (in characters). + auto_update (bool): Whether or not to automatically write + the updated progressbar to the command line. + """ + + def __init__(self, + iterable, + bar_length=50, + auto_update=False): + + self.iterable = iterable + self.max_value = len(iterable) + self.bar_length = bar_length + self.auto_update = auto_update + + self.start_time = None + self.last_timediff = None + + self.progressbar = self.get_progressbar(-1) + + self.extras_ = [] + self.scheduler = None + + # ------------------------------------------------------------------------- + + def __iter__(self): + + # Start the stop watch as soon as we start iterating + self.start_time = time.time() + + # Initialize index to 0 to ensure it is always defined + index = 0 + + # Start the scheduler that will update the elapsed time every second + def update(): + self.progressbar = self.get_progressbar(index) + self.write(extras=self.extras_) + self.scheduler = RepeatedTimer(1, update) + + # Actually loop over the iterable + for index, value in enumerate(self.iterable): + + # Update the last_timediff, which is used to estimate when we + # will be done + self.last_timediff = self.get_timediff() + + # Update the progressbar string + self.progressbar = self.get_progressbar(index) + + # If are doing auto-updates (i.e. no extras), we can already + # write the progress bar to stdout + if self.auto_update: + self.write() + + # Finally, actually yield the current value of the iterable + yield value + + # Update our progress bar string one last time to indicate we have + # made it to 100% + self.progressbar = self.get_progressbar(self.max_value) + + # Stop our background scheduler + self.scheduler.stop() + + # ------------------------------------------------------------------------- + + def get_timediff(self): + """ + Returns: Time elapsed since progress bar was instantiated. + """ + + if self.start_time is not None: + return time.time() - self.start_time + else: + return None + + # ------------------------------------------------------------------------- + + def get_eta(self, + percent): + """ + Get the estimated time of arrival (ETA) by linear interpolation. + + Args: + percent (float): Current progress in percent. + + Returns: + Estimated time of arrival in seconds. + """ + + if self.last_timediff is not None and percent != 0: + return max(0, self.last_timediff / percent - self.get_timediff()) + else: + return None + + # ------------------------------------------------------------------------- + + def get_progressbar(self, + index): + """ + Construct the progressbar itself (bar, ETA, etc.). + + Args: + index (int): Current index of the iterable; used to compute + the current progress percentage. + + Returns: + A string containing the basic progress bar. + """ + + # Construct the actual progress bar + percent = float(index) / self.max_value + bar = '=' * int(round(percent * self.bar_length)) + spaces = '-' * (self.bar_length - len(bar)) + + # Get the elapsed time as a proper string + elapsed_time = self.get_timediff() + if elapsed_time is None: + elapsed_time = 0 + elapsed_time = '{:.2f}'.format(elapsed_time) + + # Get the expected time of arrival (ETA) as a proper string + eta = self.get_eta(percent) + if eta is None: + eta = '?' + else: + eta = '{:.2f}'.format(eta) + + # Construct the actual progress bar string + out = "[{0}] {1:>3}% ({2:>{3}}/{4:>{3}}) | Elapsed: {5} | ETA: {6}" + progressbar = out.format(bar + spaces, round(percent * 100), + index, len(str(self.max_value)), + self.max_value, elapsed_time, eta) + + return progressbar + + # ------------------------------------------------------------------------- + + def write(self, + clear_line=False, + extras=()): + """ + Construct the progress bar and write it to the command line. + + Args: + clear_line (bool): Whether or not to clear the last line. + extras (list): List of additional outputs (e.g., the file + that is currently being downloaded). + """ + + self.extras_ = extras + + if extras: + for _ in range(len(extras)): + sys.stdout.write('\r\033[K\033[F') + if clear_line: + sys.stdout.write('\r\033[K\033[F') + + # Actually write the finished progress bar to the command line + sys.stdout.write('\r\033[K\033[F') + sys.stdout.write('\r\033[K\033[K') + sys.stdout.write(self.progressbar) + if extras: + sys.stdout.write('\n' + '\n'.join(extras)) + if not clear_line: + sys.stdout.write('\n') + sys.stdout.flush() diff --git a/python3_samples/utils/samplefiles.py b/python3_samples/utils/samplefiles.py new file mode 100755 index 0000000000000000000000000000000000000000..4a6de14b4e1f5b9ce9976a607c947355df18b12b --- /dev/null +++ b/python3_samples/utils/samplefiles.py @@ -0,0 +1,368 @@ +""" +Provide tools for writing and reading the sample HDF files produced by +the sample generation. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +import numpy as np +import pandas as pd +import h5py + +from six import iteritems +from pprint import pformat +from warnings import warn + + +# ----------------------------------------------------------------------------- +# CLASS DEFINITIONS +# ----------------------------------------------------------------------------- + +class SampleFile: + """ + :class:`SampleFile` objects serve as an abstraction for the result + files of the sample generation. + + Args: + data (dict): A dictionary containing the following keys: + + .. code-block:: python + + {'command_line_arguments', 'static_arguments', + 'injection_samples', 'noise_samples', + 'injection_parameters', 'normalization_parameters'} + + The value for every key must again be a dictionary relating + the names of sample parameters (e.g., 'h1_snr') to a numpy + array containing the values for that parameter. + """ + + def __init__(self, + data=None): + + # Perform sanity checks on data + self.__check_data(data) + + # If we have received data, store it; else initialize an empty dict + if data is not None: + self.data = data + else: + self.data = dict(command_line_arguments=dict(), + static_arguments=dict(), + injection_samples=dict(), + noise_samples=dict(), + injection_parameters=dict(), + normalization_parameters=dict()) + + # ------------------------------------------------------------------------- + + @staticmethod + def __check_data(data): + """ + Run some sanity checks on `data`. Raises an assertion error if + the data fail any of these sanity checks. + + Args: + data (dict): A dictionary as specified in the ``__init__`` + of this class, that is, a dictionary containing the + following keys: + + .. code-block:: python + + {'command_line_arguments', 'static_arguments', + 'injection_samples', 'noise_samples', + 'injection_parameters', 'normalization_parameters'} + """ + + assert isinstance(data, dict) or data is None, \ + 'data must be either dict or None!' + + if data is not None: + + assert 'command_line_arguments' in data.keys(), \ + 'data must provide key "command_line_arguments"!' + assert 'static_arguments' in data.keys(), \ + 'data must provide key "static_arguments"!' + assert 'injection_samples' in data.keys(), \ + 'data must provide key "injection_samples"!' + assert 'noise_samples' in data.keys(), \ + 'data must provide key "noise_samples"!' + assert 'injection_parameters' in data.keys(), \ + 'data must provide key "injection_parameters"!' + assert 'normalization_parameters' in data.keys(), \ + 'data must provide key "normalization_parameters"!' + + # ------------------------------------------------------------------------- + + def __repr__(self): + + return pformat(self.data, indent=4) + + # ------------------------------------------------------------------------- + + def __str__(self): + + return pformat(self.data, indent=4) + + # ------------------------------------------------------------------------- + + def __getitem__(self, item): + + return self.data[item] + + # ------------------------------------------------------------------------- + + def __setitem__(self, key, value): + + self.data[key] = value + + # ------------------------------------------------------------------------- + + def read_hdf(self, file_path): + """ + Read in an existing HDF sample file (e.g., to use an instance + of :class:`SampleFile` as a convenience wrapper for accessing + the contents of an HDF samples file). + + Args: + file_path (str): The path to the HDF file to be read into + the :class:`SampleFile` object. + """ + + # Clear the existing data + self.data = {} + + with h5py.File(file_path, 'r') as hdf_file: + + # Read in dict with command_line_arguments + self.data['command_line_arguments'] = \ + dict(hdf_file['command_line_arguments'].attrs) + self.data['command_line_arguments'] = \ + {key: value.decode('ascii') for key, value in + iteritems(self.data['command_line_arguments'])} + + # Read in dict with static_arguments + self.data['static_arguments'] = \ + dict(hdf_file['static_arguments'].attrs) + self.data['static_arguments'] = \ + {key: value.decode('ascii') for key, value in + iteritems(self.data['static_arguments'])} + + # Read in group containing injection samples + self.data['injection_samples'] = dict() + for key in ('event_time', 'h1_strain', 'l1_strain'): + try: + self.data['injection_samples'][key] = \ + np.array(hdf_file['injection_samples'][key]) + except TypeError: + self.data['injection_samples'][key] = np.array(None) + + # Read in group containing noise samples + self.data['noise_samples'] = dict() + for key in ('event_time', 'h1_strain', 'l1_strain'): + try: + self.data['noise_samples'][key] = \ + np.array(hdf_file['noise_samples'][key]) + except TypeError: + self.data['noise_samples'][key] = np.array(None) + + # Read in injection parameters + self.data['injection_parameters'] = dict() + for key in hdf_file['/injection_parameters'].keys(): + try: + self.data['injection_parameters'][key] = \ + np.array(hdf_file['injection_parameters'][key]) + except TypeError: + self.data['injection_parameters'][key] = np.array(None) + + # Read in dict with normalization parameters + self.data['normalization_parameters'] = \ + dict(hdf_file['normalization_parameters'].attrs) + self.data['normalization_parameters'] = \ + {key: float(value) for key, value in + iteritems(self.data['normalization_parameters'])} + + # ------------------------------------------------------------------------- + + def to_hdf(self, file_path): + + with h5py.File(file_path, 'w') as hdf_file: + + # Create group for command_line_arguments and save the values of + # the dict as attributes of the group + group = hdf_file.create_group('command_line_arguments') + for key, value in iteritems(self.data['command_line_arguments']): + group.attrs[key] = str(value) + + # Create group for static_arguments and save the values of + # the dict as attributes of the group + group = hdf_file.create_group('static_arguments') + for key, value in iteritems(self.data['static_arguments']): + group.attrs[key] = str(value) + + # Create group for injection_samples and save every item of the + # dict as a new dataset + group = hdf_file.create_group('injection_samples') + for key, value in iteritems(self.data['injection_samples']): + dtype = 'float64' if key == 'event_time' else 'float32' + if value is not None: + group.create_dataset(name=key, + shape=value.shape, + dtype=dtype, + data=value) + else: + group.create_dataset(name=key, + shape=None, + dtype=dtype) + + # Create group for noise_samples and save every item of the + # dict as a new dataset + group = hdf_file.create_group('noise_samples') + for key, value in iteritems(self.data['noise_samples']): + dtype = 'float64' if key == 'event_time' else 'float32' + if value is not None: + group.create_dataset(name=key, + shape=value.shape, + dtype=dtype, + data=value) + else: + group.create_dataset(name=key, + shape=None, + dtype=dtype) + + # Create group for injection_parameters and save every item of the + # dict as a new dataset + group = hdf_file.create_group('injection_parameters') + for key, value in iteritems(self.data['injection_parameters']): + if value is not None: + group.create_dataset(name=key, + shape=value.shape, + dtype='float64', + data=value) + else: + group.create_dataset(name=key, + shape=None, + dtype='float64') + + # Create group for normalization_parameters and save every item + # of the dict as a new attribute + group = hdf_file.create_group('normalization_parameters') + for key, value in iteritems(self.data['normalization_parameters']): + group.attrs[key] = float(value) + + # ------------------------------------------------------------------------- + + def as_dataframe(self, + injection_parameters=False, + static_arguments=False, + command_line_arguments=False, + split_injections_noise=False): + """ + Return the contents of the :class:`SampleFile` as a ``pandas`` + data frame. + + Args: + injection_parameters (bool): Whether or not to return + the `injection parameters` for every sample. + static_arguments (bool): Whether or not to return + the `static_arguments` for every sample. + command_line_arguments (bool): Whether or not to return + the `command_line_arguments` for every sample. + split_injections_noise (bool): If this is set to True, a + separate data frame will be returned for both the + samples with and without an injection. + + Returns: + One (or two, if `split_injections_noise` is set to `True`) + pandas data frame containing the sample stored in the + :class:`SampleFile` object. + """ + + # Create a data frame for the samples containing an injection + injection_samples = [] + if self.data['injection_samples']['event_time'].shape != (): + for i in range(len(self.data['injection_samples']['event_time'])): + _ = {k: v[i] for k, v in + iteritems(self.data['injection_samples'])} + injection_samples.append(_) + df_injection_samples = pd.DataFrame().append(injection_samples, + ignore_index=True, + sort=True) + else: + df_injection_samples = pd.DataFrame() + + # Create a data frame for the samples not containing an injection + noise_samples = [] + if self.data['noise_samples']['event_time'].shape != (): + for i in range(len(self.data['noise_samples']['event_time'])): + _ = {k: v[i] for k, v in + iteritems(self.data['noise_samples'])} + noise_samples.append(_) + df_noise_samples = pd.DataFrame().append(noise_samples, + ignore_index=True, + sort=True) + else: + df_noise_samples = pd.DataFrame() + + # If requested, create a data frame for the injection parameters and + # merge it with the data frame containing the injection samples + if injection_parameters: + injection_params = [] + + # Check if we even have any injection parameters + if self.data['injection_parameters']['mass1'].shape != (): + for i in range(len(df_injection_samples)): + _ = {k: v[i] for k, v in + iteritems(self.data['injection_parameters'])} + injection_params.append(_) + df_injection_params = pd.DataFrame().append(injection_params, + ignore_index=True, + sort=True) + else: + df_injection_params = pd.DataFrame() + + df = pd.concat([df_injection_samples, df_injection_params], + axis=1, sort=True) + + else: + df = df_injection_samples + + # If requested, add the static_arguments to the data frame + # containing the injections, and a smaller subset of the + # static_arguments also to the data frame containing the noise + # samples (only those arguments that make sense there) + if static_arguments: + for key, value in iteritems(self.data['static_arguments']): + df[key] = value + if key in ('random_seed', 'target_sampling_rate', + 'bandpass_lower', 'bandpass_upper', + 'seconds_before_event', 'seconds_after_event', + 'sample_length'): + df_noise_samples[key] = value + + # Merge the data frames for the samples with and without injections + df = df.append(df_noise_samples, ignore_index=True, sort=True) + + # If requested, add the command line arguments that were used in the + # creation of the sample file to the combined data frame + if command_line_arguments: + for key, value in iteritems(self.data['command_line_arguments']): + df[key] = value + + # Ensure the `event_time` variable is an integer + try: + df['event_time'] = df['event_time'].astype(int) + except KeyError: + warn('\nNo key "event_time": Data frame is probably empty!') + + # Either split into two data frames for injection and noise samples + if split_injections_noise: + df_injections = df[df.h1_signal.notnull()] + df_noise = df[~df.h1_signal.notnull()] + return df_injections, df_noise + + # Or just return a single data frame containing both types of samples + else: + return df diff --git a/python3_samples/utils/samplegeneration.py b/python3_samples/utils/samplegeneration.py new file mode 100755 index 0000000000000000000000000000000000000000..76be58a1245be67e48e3105c457e8f1dc663de98 --- /dev/null +++ b/python3_samples/utils/samplegeneration.py @@ -0,0 +1,288 @@ +""" +Provide the :func:`generate_sample()` method, which is at the heart of +the sample generation process. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +from __future__ import print_function + +import numpy as np + +from lal import LIGOTimeGPS +from pycbc.psd import interpolate +from pycbc.psd.analytical import aLIGOZeroDetHighPower +from pycbc.noise import noise_from_psd +from pycbc.filter import sigma + +from .hdffiles import get_strain_from_hdf_file +from .waveforms import get_detector_signals, get_waveform + + +# ----------------------------------------------------------------------------- +# FUNCTION DEFINITIONS +# ----------------------------------------------------------------------------- + +def generate_sample(static_arguments, + event_tuple, + waveform_params=None): + """ + Generate a single sample (or example) by taking a piece of LIGO + background noise (real or synthetic, depending on `event_tuple`), + optionally injecting a simulated waveform (depending on + `waveform_params`) and post-processing the result (whitening, + band-passing). + + Args: + static_arguments (dict): A dictionary containing global + technical parameters for the sample generation, for example + the target_sampling_rate of the output. + event_tuple (tuple): A tuple `(event_time, file_path)`, which + specifies the GPS time at which to make an injection and + the path of the HDF file which contains said GPS time. + If `file_path` is `None`, synthetic noise will be used + instead and the `event_time` only serves as a seed for + the corresponding (random) noise generator. + waveform_params (dict): A dictionary containing the randomly + sampled parameters that are passed as inputs to the + waveform model (e.g., the masses, spins, position, ...). + + Returns: + A tuple `(sample, injection_parameters)`, which contains the + generated `sample` itself (a dict with keys `{'event_time', + 'h1_strain', 'l1_strain'}`), and the `injection_parameters`, + which are either `None` (in case no injection was made), or a + dict containing the `waveform_params` and some additional + parameters (e.g., single detector SNRs). + """ + + # ------------------------------------------------------------------------- + # Define shortcuts for some elements of self.static_arguments + # ------------------------------------------------------------------------- + + # Read out frequency-related arguments + original_sampling_rate = static_arguments['original_sampling_rate'] + target_sampling_rate = static_arguments['target_sampling_rate'] + f_lower = static_arguments['f_lower'] + delta_f = static_arguments['delta_f'] + fd_length = static_arguments['fd_length'] + + # Get the width of the noise sample that we either select from the raw + # HDF files, or generate synthetically + noise_interval_width = static_arguments['noise_interval_width'] + + # Get how many seconds before and after the event time to use + seconds_before_event = static_arguments['seconds_before_event'] + seconds_after_event = static_arguments['seconds_after_event'] + + # Get the event time and the dict containing the HDF file path + event_time, hdf_file_paths = event_tuple + + # ------------------------------------------------------------------------- + # Get the background noise (either from data or synthetically) + # ------------------------------------------------------------------------- + + # If the event_time is None, we generate synthetic noise + if hdf_file_paths is None: + + # Create an artificial PSD for the noise + # TODO: Is this the best choice for this task? + psd = aLIGOZeroDetHighPower(length=fd_length, + delta_f=delta_f, + low_freq_cutoff=f_lower) + + # Actually generate the noise using the PSD and LALSimulation + noise = dict() + for i, det in enumerate(('H1', 'L1')): + + # Compute the length of the noise sample in time steps + noise_length = noise_interval_width * target_sampling_rate + + # Generate the noise for this detector + noise[det] = noise_from_psd(length=noise_length, + delta_t=(1.0 / target_sampling_rate), + psd=psd, + seed=(2 * event_time + i)) + + # Manually fix the noise start time to match the fake event time. + # However, for some reason, the correct setter method seems broken? + start_time = event_time - noise_interval_width / 2 + # noinspection PyProtectedMember + noise[det]._epoch = LIGOTimeGPS(start_time) + + # Otherwise we select the noise from the corresponding HDF file + else: + + kwargs = dict(hdf_file_paths=hdf_file_paths, + gps_time=event_time, + interval_width=noise_interval_width, + original_sampling_rate=original_sampling_rate, + target_sampling_rate=target_sampling_rate, + as_pycbc_timeseries=True) + noise = get_strain_from_hdf_file(**kwargs) + + # ------------------------------------------------------------------------- + # If applicable, make an injection + # ------------------------------------------------------------------------- + + # If no waveform parameters are given, we are not making an injection. + # In this case, there are no detector signals and no injection + # parameters, and the strain is simply equal to the noise + if waveform_params is None: + detector_signals = None + injection_parameters = None + strain = noise + + # Otherwise, we need to simulate a waveform for the given waveform_params + # and add it into the noise to create the strain + else: + + # --------------------------------------------------------------------- + # Simulate the waveform with the given injection parameters + # --------------------------------------------------------------------- + + # Actually simulate the waveform with these parameters + waveform = get_waveform(static_arguments=static_arguments, + waveform_params=waveform_params) + + # Get the detector signals by projecting on the antenna patterns + detector_signals = \ + get_detector_signals(static_arguments=static_arguments, + waveform_params=waveform_params, + event_time=event_time, + waveform=waveform) + + # --------------------------------------------------------------------- + # Add the waveform into the noise as is to calculate the NOMF-SNR + # --------------------------------------------------------------------- + + # Store the dummy strain, the PSDs and the SNRs for the two detectors + strain_ = {} + psds = {} + snrs = {} + + # Calculate these quantities for both detectors + for det in ('H1', 'L1'): + + # Add the simulated waveform into the noise to get the dummy strain + strain_[det] = noise[det].add_into(detector_signals[det]) + + # Estimate the Power Spectral Density from the dummy strain + psds[det] = strain_[det].psd(4) + psds[det] = interpolate(psds[det], delta_f=delta_f) + + # Use the PSD estimate to calculate the optimal matched + # filtering SNR for this injection and this detector + snrs[det] = sigma(htilde=detector_signals[det], + psd=psds[det], + low_frequency_cutoff=f_lower) + + # Calculate the network optimal matched filtering SNR for this + # injection (which we need for scaling to the chosen injection SNR) + nomf_snr = np.sqrt(snrs['H1']**2 + snrs['L1']**2) + + # --------------------------------------------------------------------- + # Add the waveform into the noise with the chosen injection SNR + # --------------------------------------------------------------------- + + # Compute the rescaling factor + injection_snr = waveform_params['injection_snr'] + scale_factor = 1.0 * injection_snr / nomf_snr + + strain = {} + for det in ('H1', 'L1'): + + # Add the simulated waveform into the noise, using a scaling + # factor to ensure that the resulting NOMF-SNR equals the chosen + # injection SNR + strain[det] = noise[det].add_into(scale_factor * + detector_signals[det]) + + # --------------------------------------------------------------------- + # Store some information about the injection we just made + # --------------------------------------------------------------------- + + # Store the information we have computed ourselves + injection_parameters = {'scale_factor': scale_factor, + 'h1_snr': snrs['H1'], + 'l1_snr': snrs['L1']} + + # Also add the waveform parameters we have sampled + for key, value in waveform_params.items(): + injection_parameters[key] = value + + # ------------------------------------------------------------------------- + # Whiten and bandpass the strain (also for noise-only samples) + # ------------------------------------------------------------------------- + + for det in ('H1', 'L1'): + + # Get the whitening parameters + segment_duration = static_arguments['whitening_segment_duration'] + max_filter_duration = static_arguments['whitening_max_filter_duration'] + + # Whiten the strain (using the built-in whitening of PyCBC) + # We don't need to remove the corrupted samples here, because we + # crop the strain later on + strain[det] = \ + strain[det].whiten(segment_duration=segment_duration, + max_filter_duration=max_filter_duration, + remove_corrupted=False) + + # Get the limits for the bandpass + bandpass_lower = static_arguments['bandpass_lower'] + bandpass_upper = static_arguments['bandpass_upper'] + + # Apply a high-pass to remove everything below `bandpass_lower`; + # If bandpass_lower = 0, do not apply any high-pass filter. + if bandpass_lower != 0: + strain[det] = strain[det].highpass_fir(frequency=bandpass_lower, + remove_corrupted=False, + order=512) + + # Apply a low-pass filter to remove everything above `bandpass_upper`. + # If bandpass_upper = sampling rate, do not apply any low-pass filter. + if bandpass_upper != target_sampling_rate: + strain[det] = strain[det].lowpass_fir(frequency=bandpass_upper, + remove_corrupted=False, + order=512) + + # ------------------------------------------------------------------------- + # Cut strain (and signal) time series to the pre-specified length + # ------------------------------------------------------------------------- + + for det in ('H1', 'L1'): + + # Define some shortcuts for slicing + a = event_time - seconds_before_event + b = event_time + seconds_after_event + + # Cut the strain to the desired length + strain[det] = strain[det].time_slice(a, b) + + # If we've made an injection, also cut the simulated signal + if waveform_params is not None: + + # Cut the detector signals to the specified length + detector_signals[det] = detector_signals[det].time_slice(a, b) + + # Also add the detector signals to the injection parameters + injection_parameters['h1_signal'] = \ + np.array(detector_signals['H1']) + injection_parameters['l1_signal'] = \ + np.array(detector_signals['L1']) + + # ------------------------------------------------------------------------- + # Collect all available information about this sample and return results + # ------------------------------------------------------------------------- + + # The whitened strain is numerically on the order of O(1), so we can save + # it as a 32-bit float (unlike the original signal, which is down to + # O(10^-{30}) and thus requires 64-bit floats). + sample = {'event_time': event_time, + 'h1_strain': np.array(strain['H1']).astype(np.float32), + 'l1_strain': np.array(strain['L1']).astype(np.float32)} + + return sample, injection_parameters diff --git a/python3_samples/utils/staticargs.py b/python3_samples/utils/staticargs.py new file mode 100755 index 0000000000000000000000000000000000000000..b77e55ccf73c1e4d53ad56b8a8968c20e2aac602 --- /dev/null +++ b/python3_samples/utils/staticargs.py @@ -0,0 +1,96 @@ +""" +Provide tools that are needed for amending and typecasting the static +arguments from an `*.ini` configuration file, which controls the +waveform simulation process. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +import copy + + +# ----------------------------------------------------------------------------- +# FUNCTION DEFINITIONS +# ----------------------------------------------------------------------------- + +def amend_static_args(static_args): + """ + Amend the static_args from the `*.ini` configuration file by adding + the parameters that can be computed directly from others (more + intuitive ones). Note that the static_args should have been + properly typecast first; see :func:`typecast_static_args()`. + + Args: + static_args (dict): The static_args dict after it has been + typecast by :func:`typecast_static_args()`. + + Returns: + The amended `static_args`, where implicitly defined variables + have been added. + """ + + # Create a copy of the original static_args + args = copy.deepcopy(static_args) + + # If necessary, compute the sample length + if 'sample_length' not in args.keys(): + args['sample_length'] = \ + args['seconds_before_event'] + args['seconds_after_event'] + + # If necessary, add delta_t = 1 / target_sampling_rate + if 'delta_t' not in args.keys(): + args['delta_t'] = 1.0 / args['target_sampling_rate'] + + # If necessary, add delta_f = 1 / waveform_length + if 'delta_f' not in args.keys(): + args['delta_f'] = 1.0 / args['waveform_length'] + + # If necessary, add td_length = waveform_length * target_sampling_rate + if 'td_length' not in args.keys(): + args['td_length'] = \ + int(args['waveform_length'] * args['target_sampling_rate']) + + # If necessary, add fd_length = td_length / 2 + 1 + if 'fd_length' not in args.keys(): + args['fd_length'] = int(args['td_length'] / 2.0 + 1) + + return args + + +def typecast_static_args(static_args): + """ + Take the `static_args` dictionary as it is read in from the PyCBC + configuration file (i.e., all values are strings) and cast the + values to the correct types (`float` or `int`). + + Args: + static_args (dict): The raw `static_args` dictionary as it is + read from the `*.ini` configuration file. + + Returns: + The `static_args` dictionary with proper types for all values. + """ + + args = copy.deepcopy(static_args) + + # Cast variables to integer that need to be integers + args['bandpass_lower'] = int(args['bandpass_lower']) + args['bandpass_upper'] = int(args['bandpass_upper']) + args['waveform_length'] = int(args['waveform_length']) + args['noise_interval_width'] = int(args['noise_interval_width']) + args['original_sampling_rate'] = int(args['original_sampling_rate']) + args['target_sampling_rate'] = int(args['target_sampling_rate']) + args['whitening_segment_duration'] = \ + float(args['whitening_segment_duration']) + args['whitening_max_filter_duration'] = \ + int(args['whitening_max_filter_duration']) + + # Cast variables to float that need to be floats + args['distance'] = float(args['distance']) + args['f_lower'] = float(args['f_lower']) + args['seconds_before_event'] = float(args['seconds_before_event']) + args['seconds_after_event'] = float(args['seconds_after_event']) + + return args diff --git a/python3_samples/utils/waveforms.py b/python3_samples/utils/waveforms.py new file mode 100755 index 0000000000000000000000000000000000000000..9c82fd4f9c38b7d0daccc98b1e6ff4023041b55f --- /dev/null +++ b/python3_samples/utils/waveforms.py @@ -0,0 +1,312 @@ +""" +Provide methods for generating and processing simulated +gravitational-wave waveforms. +""" + +# ----------------------------------------------------------------------------- +# IMPORTS +# ----------------------------------------------------------------------------- + +from __future__ import print_function + +import numpy as np +from scipy.signal.windows import tukey + +from pycbc.distributions import JointDistribution, read_params_from_config, \ + read_constraints_from_config, read_distributions_from_config +from pycbc.transforms import apply_transforms, read_transforms_from_config +from pycbc.workflow import WorkflowConfigParser +from pycbc.waveform import get_td_waveform, get_fd_waveform +from pycbc.detector import Detector +from pycbc.types.timeseries import TimeSeries + + +# ----------------------------------------------------------------------------- +# CLASS DEFINITIONS +# ----------------------------------------------------------------------------- + +class WaveformParameterGenerator(object): + """ + :class:`WaveformParameterGenerator` objects are essentially just a + simple convenience wrapper to construct the joint probability + distribution (and provide a method to draw samples from it) of the + parameters specified by the `[variable_args]` section of an + `*.ini` configuration file and their distributions as defined in + the corresponding `[prior-*]` sections. + + Args: + config_file (str): Path to the `*.ini` configuration file, + which contains the information about the parameters to be + generated and their distributions. + random_seed (int): Seed for the random number generator. + Caveat: We can only set the seed of the global numpy RNG. + """ + + def __init__(self, + config_file, + random_seed): + + # Fix the seed for the random number generator + np.random.seed(random_seed) + + # Read in the configuration file using a WorkflowConfigParser. + # Note that the argument `configFiles` has to be a list here, + # so we need to wrap the `config_file` argument accordingly... + config_file = WorkflowConfigParser(configFiles=[config_file]) + + # Extract variable arguments and constraints + # We don't need the static_args here, hence they do not get amended. + self.var_args, _ = read_params_from_config(config_file) + self.constraints = read_constraints_from_config(config_file) + + # Extract distributions + dist = read_distributions_from_config(config_file) + + # Extract transformations + self.trans = read_transforms_from_config(config_file) + + # Set up a joint distribution to sample from + self.pval = JointDistribution(self.var_args, + *dist, + **{'constraints': self.constraints}) + + # ------------------------------------------------------------------------- + + def draw(self): + """ + Draw a sample from the joint distribution and construct a + dictionary that maps the parameter names to the values + generated for them. + + Returns: + A `dict` containing a the names and values of a set of + randomly sampled waveform parameters (e.g., masses, spins, + position in the sky, ...). + """ + values = apply_transforms(self.pval.rvs(), self.trans)[0] + result = dict(zip(self.var_args, values)) + + return result + + +# ----------------------------------------------------------------------------- +# FUNCTION DEFINITIONS +# ----------------------------------------------------------------------------- + +def fade_on(timeseries, + alpha=0.25): + """ + Take a PyCBC time series and use a one-sided Tukey window to "fade + on" the waveform (to reduce discontinuities in the amplitude). + + Args: + timeseries (pycbc.types.timeseries.TimeSeries): The PyCBC + TimeSeries object to be faded on. + alpha (float): The alpha parameter for the Tukey window. + + Returns: + The `timeseries` which has been faded on. + """ + + # Save the parameters from the time series we are about to fade on + delta_t = timeseries.delta_t + epoch = timeseries.start_time + duration = timeseries.duration + sample_rate = timeseries.sample_rate + + # Create a one-sided Tukey window for the turn on + window = tukey(M=int(duration * sample_rate), alpha=alpha) + window[int(0.5*len(window)):] = 1 + + # Apply the one-sided Tukey window for the fade-on + ts = window * np.array(timeseries) + + # Create and return a TimeSeries object again from the resulting array + # using the original parameters (delta_t and epoch) of the time series + return TimeSeries(initial_array=ts, + delta_t=delta_t, + epoch=epoch) + + +def get_waveform(static_arguments, + waveform_params): + """ + Simulate a waveform (using methods provided by PyCBC / LALSuite) + based on the `static_arguments` (which define, e.g., the waveform + model to be used) and the `waveform_params`, which specify the + physical parameters of the waveform (e.g., the masses and spins). + + .. note:: + The actual simulation of the waveform is, depending on your + choice of the `domain`, performed by the PyCBC methods + :func:`get_td_waveform()` and :func:`get_fd_waveform()`, + respectively. + These take as arguments a combination of the `static_arguments` + and the `waveform_params.` A (more) comprehensive explanation of + the parameters that are supported by these methods can be found + in the `PyCBC documentation <https://pycbc.org/pycbc/latest/html/ + pycbc.waveform.html#pycbc.waveform.waveform.get_td_waveform>`_. + Currently, however, only the following keys are actually passed + to the simulation routines: + + .. code-block:: python + + {'approximant', 'coa_phase', 'delta_f', 'delta_t', + 'distance', 'f_lower', 'inclination', 'mass1', 'mass2', + 'spin1z', 'spin2z'} + + .. warning:: + If you want to use a different waveform model or a different + parameter space, you may need to edit this function according + to your exact needs! + + + Args: + static_arguments (dict): The static arguments (e.g., the + waveform approximant and the sampling rate) defined in the + `*.ini` configuration file, which specify technical aspects + of the simulation process. + waveform_params (dict): The physical parameters of the + waveform to be simulated, such as the masses or the + position in the sky. Usually, these values are sampled + using a :class:`WaveformParameterGenerator` instance, + which is based in the variable arguments section in the + `*.ini` configuration file. + + Returns: + A tuple `(h_plus, h_cross)` with the two polarization modes of + the simulated waveform, resized to the desired length. + """ + + # Check if we are using a time domain (TD) or frequency domain (FD) + # approximant and retrieve the required parameters for the simulation + if static_arguments['domain'] == 'time': + simulate_waveform = get_td_waveform + length = int(static_arguments['td_length']) + elif static_arguments['domain'] == 'frequency': + simulate_waveform = get_fd_waveform + length = int(static_arguments['fd_length']) + else: + raise ValueError('Invalid domain! Must be "time" or "frequency"!') + + # Collect all the required parameters for the simulation from the given + # static and variable parameters + simulation_parameters = dict(approximant=static_arguments['approximant'], + coa_phase=waveform_params['coa_phase'], + delta_f=static_arguments['delta_f'], + delta_t=static_arguments['delta_t'], + distance=static_arguments['distance'], + f_lower=static_arguments['f_lower'], + inclination=waveform_params['inclination'], + mass1=waveform_params['mass1'], + mass2=waveform_params['mass2'], + spin1z=waveform_params['spin1z'], + spin2z=waveform_params['spin2z']) + + # Perform the actual simulation with the given parameters + h_plus, h_cross = simulate_waveform(**simulation_parameters) + + # Apply the fade-on filter to them + h_plus = fade_on(h_plus, alpha=static_arguments['tukey_alpha']) + h_cross = fade_on(h_cross, alpha=static_arguments['tukey_alpha']) + + # Resize the simulated waveform to the specified length + h_plus.resize(length) + h_cross.resize(length) + + return h_plus, h_cross + + +# ----------------------------------------------------------------------------- + + +def get_detector_signals(static_arguments, + waveform_params, + event_time, + waveform): + """ + Project the raw `waveform` (i.e., the tuple `(h_plus, h_cross)` + returned by :func:`get_waveform()`) onto the antenna patterns of + the detectors in Hanford and Livingston. This requires the position + of the source in the sky, which is contained in `waveform_params`. + + Args: + static_arguments (dict): The static arguments (e.g., the + waveform approximant and the sampling rate) defined in the + `*.ini` configuration file. + waveform_params (dict): The parameters that were used as inputs + for the waveform simulation, although this method will only + require the following parameters to be present: + + - ``ra`` = Right ascension of the source + - ``dec`` = Declination of the source + - ``polarization`` = Polarization angle of the source + + event_time (int): The GPS time for the event, which, by + convention, is the time at which the simulated signal + reaches its maximum amplitude in the `H1` channel. + waveform (tuple): The pure simulated wavefrom, represented by + a tuple `(h_plus, h_cross)`, which is usually generated + by :func:`get_waveform()`. + + Returns: + A dictionary with keys `{'H1', 'L1'}` that contains the pure + signal as it would be observed at Hanford and Livingston. + """ + + # Retrieve the two polarization modes from the waveform tuple + h_plus, h_cross = waveform + + # Extract the parameters we will need later for the projection + right_ascension = waveform_params['ra'] + declination = waveform_params['dec'] + polarization = waveform_params['polarization'] + + # Store the detector signals we will get through projection + detector_signals = {} + + # Set up detectors + detectors = {'H1': Detector('H1'), 'L1': Detector('L1')} + + # Loop over both detectors and calculate the signal we would see there + for detector_name in ('H1', 'L1'): + + # Set up the detector based on its name + detector = detectors[detector_name] + + # Calculate the antenna pattern for this detector + f_plus, f_cross = \ + detector.antenna_pattern(right_ascension=right_ascension, + declination=declination, + polarization=polarization, + t_gps=100) + + # Calculate the time offset from H1 for this detector + delta_t_h1 = \ + detector.time_delay_from_detector(other_detector=detectors['H1'], + right_ascension=right_ascension, + declination=declination, + t_gps=100) + + # Project the waveform onto the antenna pattern + detector_signal = f_plus * h_plus + f_cross * h_cross + + # Map the signal from geocentric coordinates to the specific + # reference frame of the detector. This depends on whether we have + # simulated the waveform in the time or frequency domain: + if static_arguments['domain'] == 'time': + offset = 100 + delta_t_h1 + detector_signal.start_time + detector_signal = detector_signal.cyclic_time_shift(offset) + detector_signal.start_time = event_time - 100 + elif static_arguments['domain'] == 'frequency': + offset = 100 + delta_t_h1 + detector_signal = detector_signal.cyclic_time_shift(offset) + detector_signal.start_time = event_time - 100 + detector_signal = detector_signal.to_timeseries() + else: + raise ValueError('Invalid domain! Must be "time" or "frequency"!') + + # Store the result + detector_signals[detector_name] = detector_signal + + return detector_signals