diff --git a/docs/file_formats_used_by_pyfstat.md b/docs/file_formats_used_by_pyfstat.md new file mode 100644 index 0000000000000000000000000000000000000000..991a6d728d437de13038b6ecd08cda3a5168f75c --- /dev/null +++ b/docs/file_formats_used_by_pyfstat.md @@ -0,0 +1,59 @@ +# File formats used by PyFstat + +This page documents the various file formats and how they are used. + +## MCMC Searches: + +### Data output +* `outdir/label.log`: On importing `pyfstat`, two logging streams are setup: + one stream which is written to the terminal, and a second which is saved to a +file `outdir/label.log` where the `outdir` and `label` are given when +initialising the search. While the terminal output can be suppressed with the +`-q` flag, the file is always written with a log-level set to `INFO`. This file +is never overwritten, so can be used to search for changes in the setup or old +results. + +* `outdir/label.par`: A parameter file containing the maximum detection stat. +value, and estimates of the best-fit parameters. This can be read in with +`read_par()` and written with `write_par()`. + +* `outdir/label_saved_data.p`: Upon succesful completion of an MCMC search, the +results will be saved to a `python` `pickle` file. This pickle file can +subsequently be read back and contains many useful outputs such as the +`sampler` object, the `lnprobs` and `lnlikes` from the run, and of course the +`chains` themselves. Rerunning a script with different parameters, the pickle +is overwritten once the simulation completes, however, a backup is saved with +an appended `.old` label. + +### Image output + +* `outdir/label_walkers.png`: The position of all temperature 0 walkers + during the burn-in + production stage. In addition, the final panel plots a + histogram of the detection statistic from all temperature 0 walkers; if + a burn-in period is defined this is computed separately and colored red. + +* `outdir/label_init_i_walkers.png`: The same as the `walkers`, but for the + `ith` initialisation stage. + +* `outdir/label_corner.png`: A corner plot of the production samples using + the [corner](https://github.com/dfm/corner.py) package. This file is + generated by `plot_corner()`. + +* `outdir/label_prior_posterior.png`: A plot showing the prior and a KDE of + the posterior, generated by `plot_prior_posterior()` + +## Grid searches + +### Data output +* `outdir/label_grid_FS.txt`: Upon succesful completion of a grid search, the +grid points are saved in plain text format. The order is set by +`get_input_data_array` with an additional column being the output detection +statistic. + +### Image output + +* `outdir/label_1D.png` + +* `outdir/label_2D.png`: A 2D contour plot of the detection statitistic over +the range of parameters; options exist to flatten higher dimension +searches. Generated using `plot_2D()`. diff --git a/pyfstat.py b/pyfstat.py index c3a9adf095a4989a1e1cc50fe104d33334ad2d8b..6c0199d2838fb8282488d93153e132a8f15cb89c 100755 --- a/pyfstat.py +++ b/pyfstat.py @@ -71,7 +71,7 @@ logger.addHandler(stream_handler) def initializer(func): - """ Automatically assigns the parameters to self """ + """ Decorator function to automatically assign the parameters to self """ names, varargs, keywords, defaults = inspect.getargspec(func) @wraps(func) @@ -102,13 +102,13 @@ def read_par(label, outdir): class BaseSearchClass(object): - """ The base search class, provides ephemeris and general utilities """ + """ The base search class, provides general functions """ earth_ephem_default = earth_ephem sun_ephem_default = sun_ephem def add_log_file(self): - ' Log output to a log-file, requires class to have outdir and label ' + """ Log output to a file, requires class to have outdir and label """ logfilename = '{}/{}.log'.format(self.outdir, self.label) fh = logging.FileHandler(logfilename) fh.setLevel(logging.INFO) @@ -118,7 +118,21 @@ class BaseSearchClass(object): logging.getLogger().addHandler(fh) def shift_matrix(self, n, dT): - """ Generate the shift matrix """ + """ Generate the shift matrix + + Parameters + ---------- + n: int + The dimension of the shift-matrix to generate + dT: float + The time delta of the shift matrix + + Returns + ------- + m: array (n, n) + The shift matrix + """ + m = np.zeros((n, n)) factorial = np.math.factorial for i in range(n): @@ -132,7 +146,6 @@ class BaseSearchClass(object): m[i, j] = 2*np.pi*float(dT)**(j-i) / factorial(j-i) else: m[i, j] = float(dT)**(j-i) / factorial(j-i) - return m def shift_coefficients(self, theta, dT): @@ -151,6 +164,7 @@ class BaseSearchClass(object): theta_new: array-like shape (n,) vector of the coefficients as evaluate as the new reference time. """ + n = len(theta) m = self.shift_matrix(n, dT) return np.dot(m, theta) @@ -176,18 +190,17 @@ class BaseSearchClass(object): class ComputeFstat(object): - """ Base class providing interface to lalpulsar.ComputeFstat """ + """ Base class providing interface to `lalpulsar.ComputeFstat` """ earth_ephem_default = earth_ephem sun_ephem_default = sun_ephem @initializer - def __init__(self, tref, sftfilepath=None, - minStartTime=None, maxStartTime=None, - minCoverFreq=None, maxCoverFreq=None, - detector=None, earth_ephem=None, sun_ephem=None, - binary=False, transient=True, BSGL=False, - BSGL_PREFACTOR=1): + def __init__(self, tref, sftfilepath=None, minStartTime=None, + maxStartTime=None, binary=False, transient=True, BSGL=False, + BSGL_PREFACTOR=1, detector=None, minCoverFreq=None, + maxCoverFreq=None, earth_ephem=None, sun_ephem=None, + ): """ Parameters ---------- @@ -195,23 +208,30 @@ class ComputeFstat(object): GPS seconds of the reference time. sftfilepath: str File patern to match SFTs + minStartTime, maxStartTime: float GPStime + Only use SFTs with timestemps starting from (including, excluding) + this epoch + binary: bool + If true, search of binary parameters. + transient: bool + If true, allow for the Fstat to be computed over a transient range. + BSGL: bool + If true, compute the BSGL rather than the twoF value. + BSGL_PREFACTOR: float + If BSGL is True, one can specify a prefactor to multiply the + computed BSGL value by, useful in MCMC searches to amplify the + peaks. + detector: str + Two character reference to the data to use, specify None for no + contraint. minCoverFreq, maxCoverFreq: float The min and max cover frequency passed to CreateFstatInput, if either is None the range of frequencies in the SFT less 1Hz is used. - detector: str - Two character reference to the data to use, specify None for no - contraint. earth_ephem, sun_ephem: str Paths of the two files containing positions of Earth and Sun, respectively at evenly spaced times, as passed to CreateFstatInput. If None defaults defined in BaseSearchClass will be used. - binary: bool - If true, search of binary parameters. - transient: bool - If true, allow for the Fstat to be computed over a transient range. - BSGL: bool - If true, compute the BSGL rather than the twoF value. """ @@ -316,7 +336,6 @@ class ComputeFstat(object): argp=None): """ Returns the twoF fully-coherently at a single point """ - self.PulsarDopplerParams.fkdot = np.array([F0, F1, F2, 0, 0, 0, 0]) self.PulsarDopplerParams.Alpha = Alpha self.PulsarDopplerParams.Delta = Delta @@ -382,10 +401,10 @@ class SemiCoherentGlitchSearch(BaseSearchClass, ComputeFstat): @initializer def __init__(self, label, outdir, tref, tstart, tend, nglitch=0, - sftfilepath=None, theta0_idx=0, BSGL=False, - minCoverFreq=None, maxCoverFreq=None, minStartTime=None, - maxStartTime=None, detector=None, earth_ephem=None, - sun_ephem=None, BSGL_PREFACTOR=1): + sftfilepath=None, theta0_idx=0, BSGL=False, BSGL_PREFACTOR=1, + minStartTime=None, maxStartTime=None, minCoverFreq=None, + maxCoverFreq=None, detector=None, earth_ephem=None, + sun_ephem=None): """ Parameters ---------- @@ -402,17 +421,8 @@ class SemiCoherentGlitchSearch(BaseSearchClass, ComputeFstat): Index (zero-based) of which segment the theta refers to - uyseful if providing a tight prior on theta to allow the signal to jump too theta (and not just from) - minCoverFreq, maxCoverFreq: float - The min and max cover frequency passed to CreateFstatInput, if - either is None the range of frequencies in the SFT less 1Hz is - used. - detector: str - Two character reference to the data to use, specify None for no - contraint. - earth_ephem, sun_ephem: str - Paths of the two files containing positions of Earth and Sun, - respectively at evenly spaced times, as passed to CreateFstatInput. - If None defaults defined in BaseSearchClass will be used. + + For all other parameters, see pyfstat.ComputeFStat. """ self.fs_file_name = "{}/{}_FS.dat".format(self.outdir, self.label) @@ -1462,13 +1472,14 @@ _ sftfilepath: str class GridSearch(BaseSearchClass): """ Gridded search using ComputeFstat """ @initializer - def __init__(self, label, outdir, sftfilepath, F0s=[0], - F1s=[0], F2s=[0], Alphas=[0], Deltas=[0], tref=None, - tstart=None, tend=None, minCoverFreq=None, maxCoverFreq=None, - earth_ephem=None, sun_ephem=None, detector=None, BSGL=False, - BSGL_PREFACTOR=1): + def __init__(self, label, outdir, sftfilepath, F0s=[0], F1s=[0], F2s=[0], + Alphas=[0], Deltas=[0], tref=None, tstart=None, tend=None, + BSGL=False, BSGL_PREFACTOR=1, minCoverFreq=None, + maxCoverFreq=None, earth_ephem=None, sun_ephem=None, + detector=None): """ Parameters + ---------- label, outdir: str A label and directory to read/write data from/to sftfilepath: str @@ -1478,14 +1489,8 @@ class GridSearch(BaseSearchClass): [F0min, F0max, dF0], for a fixed value simply give [F0]. tref, tstart, tend: int GPS seconds of the reference time, start time and end time - minCoverFreq, maxCoverFreq: float - Minimum and maximum instantaneous frequency which will be covered - over the SFT time span as passed to CreateFstatInput - earth_ephem, sun_ephem: str - Paths of the two files containing positions of Earth and Sun, - respectively at evenly spaced times, as passed to CreateFstatInput - If None defaults defined in BaseSearchClass will be used + For all other parameters, see `pyfstat.ComputeFStat` for details """ self.minStartTime = tstart @@ -1606,9 +1611,9 @@ class GridSearch(BaseSearchClass): fig.savefig('{}/{}_1D.png'.format(self.outdir, self.label)) def plot_2D(self, xkey, ykey, ax=None, save=True, vmin=None, vmax=None, - add_mismatch=None, xN=None, yN=None, flat_keys=[], + add_mismatch=None, xN=None, yN=None, flat_keys=[], rel_flat_idxs=[], flatten_method=np.max, - predicted_twoF=None, cm=None): + predicted_twoF=None, cm=None, cbarkwargs={}): """ Plots a 2D grid of 2F values Parameters @@ -1646,7 +1651,8 @@ class GridSearch(BaseSearchClass): cm = plt.cm.viridis pax = ax.pcolormesh(X, Y, Z, cmap=cm, vmin=vmin, vmax=vmax) - plt.colorbar(pax, ax=ax) + cb = plt.colorbar(pax, ax=ax, **cbarkwargs) + cb.set_label('$2\mathcal{F}$') if add_mismatch: self.add_mismatch_to_ax(ax, x, y, xkey, ykey, *add_mismatch) @@ -1683,7 +1689,7 @@ class GridSearch(BaseSearchClass): class GridGlitchSearch(GridSearch): - """ Gridded search using the SemiCoherentGlitchSearch """ + """ Grid search using the SemiCoherentGlitchSearch """ @initializer def __init__(self, label, outdir, sftfilepath=None, F0s=[0], F1s=[0], F2s=[0], delta_F0s=[0], delta_F1s=[0], tglitchs=None, @@ -1693,6 +1699,7 @@ class GridGlitchSearch(GridSearch): BSGL_PREFACTOR=1): """ Parameters + ---------- label, outdir: str A label and directory to read/write data from/to sftfilepath: str @@ -1702,14 +1709,8 @@ class GridGlitchSearch(GridSearch): [F0min, F0max, dF0], for a fixed value simply give [F0]. tref, tstart, tend: int GPS seconds of the reference time, start time and end time - minCoverFreq, maxCoverFreq: float - Minimum and maximum instantaneous frequency which will be covered - over the SFT time span as passed to CreateFstatInput - earth_ephem, sun_ephem: str - Paths of the two files containing positions of Earth and Sun, - respectively at evenly spaced times, as passed to CreateFstatInput - If None defaults defined in BaseSearchClass will be used + For all other parameters, see pyfstat.ComputeFStat. """ if tglitchs is None: self.tglitchs = [self.tend]