diff --git a/Paper/definitions.tex b/Paper/definitions.tex index 5bfa6393e43bb32b5b2c5375fc252b3fb4a1d5d6..65a3d4d8dda29df0c7b66684b06fcdfa532410da 100644 --- a/Paper/definitions.tex +++ b/Paper/definitions.tex @@ -1,3 +1,4 @@ +\newcommand{\fdot}{\dot{f}} \newcommand{\F}{{\mathcal{F}}} \newcommand{\A}{\boldsymbol{\mathcal{A}}} \newcommand{\blambda}{\boldsymbol{\mathbf{\lambda}}} diff --git a/Paper/fully_coherent_search_using_MCMC_walkers.png b/Paper/fully_coherent_search_using_MCMC_walkers.png index 589a99d81a18deee317b051c95df1199492729ae..d338b9fcbc9d77a39c38c7ccab75710570e59759 100644 Binary files a/Paper/fully_coherent_search_using_MCMC_walkers.png and b/Paper/fully_coherent_search_using_MCMC_walkers.png differ diff --git a/Paper/paper_cw_mcmc.tex b/Paper/paper_cw_mcmc.tex index d32706b7aed68a5fb8b6e391bc39910a89a81163..badc37b74a83ce04281f96fcca5fff6e6f760b56 100644 --- a/Paper/paper_cw_mcmc.tex +++ b/Paper/paper_cw_mcmc.tex @@ -50,8 +50,7 @@ \begin{document} -\title{MCMC follow-up methods for continuous gravitational wave candidates including -glitching and transient waveforms} +\title{MCMC follow-up methods for continuous gravitational wave candidates} \author{G. Ashton} \email[E-mail: ]{gregory.ashton@ligo.org} @@ -197,20 +196,20 @@ data contains an additive mixture of noise and a signal $h(t; \A, \blambda)$. In order to make a quantitative comparison, we use Bayes theorum in the usual way to write the odds as \begin{equation} -O_{\rm S/N} \equiv \frac{P(\Hs| x I)}{P(\Hn| x I)} = +O_{\rm S/N} \equiv \frac{P(\Hs| x, I)}{P(\Hn| x, I)} = \Bsn(x| I) \frac{P(\Hs| I)}{P(\Hn | I)}, \end{equation} where the second factor is the prior odds while the first factor is the \emph{Bayes factor}: \begin{equation} -\Bsn(x| I) = \frac{P(x| \Hs I)}{P(x| \Hn I)}. +\Bsn(x| I) = \frac{P(x| \Hs, I)}{P(x| \Hn, I)}. \end{equation} Typically, we set the prior odds to unity such that it is the Bayes factor which determines our confidence in the signal hypothesis. In this work we will -therefore discuss the Bayes factor with the impplied assumption this is +therefore discuss the Bayes factor with the implied assumption this is equivalent to the odds, unless we have a good reason to change the prior odds. -We can rewrite this Bayes factor in terms of the two sets of signal parameters +We can rewrite the Bayes factor in terms of the two sets of signal parameters as \begin{equation} \Bsn(x| I) = \frac{P(x, \A, \blambda|\Hs, I)} @@ -277,13 +276,13 @@ where $(h|h)$ is the inner product of the signal with itself (see for example the detector and $\mathcal{N}$ is the number of detectors. -\subsection{Using the $\F$-statistic to compute a Bayes factor} -At first, it appeared that the $\F$-statistic was independent of the Bayesian -framework. However, it was shown by \citet{prix2009} that if we marginalise -over the four amplitude parameters of Equation~\eqref{eqn_full_bayes}, choosing -a prior $\Pi_{\rm c}$ such that +\subsection{Using the $\F$-statistic to compute a Bayes factor} At first, it +appears that the $\F$-statistic is independent of the Bayesian framework since +it was first derived directly from the likelihood. However, it was shown by +\citet{prix2009} that if we marginalise over the four amplitude parameters of +Equation~\eqref{eqn_full_bayes}, choosing a prior $\Pic$ such that \begin{equation} -P(\A| \Hs, \Pi_{\rm c}, I) \equiv \left\{ +P(\A| \Hs, \Pic, I) \equiv \left\{ \begin{array}{ll} C & \textrm{ for } \ho < \homax \\ 0 & \textrm{ otherwise} @@ -293,7 +292,7 @@ C & \textrm{ for } \ho < \homax \\ then the integral, when $\homax \gg 1$, is a Gaussian integral and can be computed analytically as \begin{align} -B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) & \equiv +\Bsn(x| \Pic, \blambda, I) & \equiv \int \mathcal{L}(x ;\A, \blambda) P(\A| \Hs, I) d\A @@ -310,13 +309,13 @@ fixed Doppler parameters. As such, we can define the Bayes-factor of Equation~\eqref{eqn_full_bayes} as \begin{equation} -B_{\rm S/N}(x| \Pi_{\rm c}, I) = \int -B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) P(\blambda| \Hs, I) +\Bsn(x| \Pic, I) = \int +\Bsn(x| \Pic, \blambda, I) P(\blambda| \Hs, I) d\blambda, \end{equation} or neglecting the constants \begin{equation} -B_{\rm S/N}(x| \Pi_{\rm c}, I) \propto \int +\Bsn(x| \Pic, I) \propto \int e^{\F(x| \blambda)} P(\blambda| \Hs, I) d\blambda. \label{eqn_bayes_over_F} @@ -327,7 +326,7 @@ there exists a wealth of well-tested tools \citep{lalsuite} capable of computing the $\mathcal{F}$-statistic for CW signals, transient-CWs, and CW signals from binary systems; these can be levereged to compute Equation~\eqref{eqn_bayes_over_F}, or adding in the constant -$B_{\rm S/N}(x| \Pi_{\rm c})$ itself. The disadvantage to this method is that +$\Bsn(x| \Pic)$ itself. The disadvantage to this method is that we are forced to use the prior $\Pic$, which was shown by \citet{prix2009} to be unphysical. @@ -343,14 +342,14 @@ P(\theta| x, \H, I) \propto P(x| \theta, \H, I)P(\theta| \H, I), \end{equation} is used to evaluate proposed jumps from one point in parameter to other points; jumps which increase this probabily are accepted with some probability. The -algorithm, proceeding in this way, is highly effective at resolving peaks in -the high-dimension parameter spaces. +algorithm, proceeding in this way, is highly efficient at resolving peaks in +high-dimension parameter spaces. At this point, we note the equivalence of Equation~\eqref{eqn_bayes_for_theta} to the integrand of Equation~\eqref{eqn_bayes_over_F}: \begin{equation} -P(\blambda | x, \Pi_{\rm c}, \Hs, I) -%=B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) P(\blambda| \Hs I). +P(\blambda | x, \Pic, \Hs, I) +%=\Bsn(x| \Pic, \blambda) P(\blambda| \Hs I). \propto e^{\F(x| \blambda)} P(\blambda| \Hs I), \label{eqn_lambda_posterior} \end{equation} @@ -371,47 +370,52 @@ proposal distribution must be `tuned' so that the MCMC sampler effeciently walks the parameter space without either jumping too far off the peak, or taking such small steps that it takes a long period of time to traverse the peak. The \texttt{emcee} sampler addresses this by using an ensemble, a large -number ${\sim}100$ parallel `walkers', in which the proposal for each walker -is generated from the current distribution of the other walkers. Moreover, by -applying an an affine transformation, the efficiency of the algorithm is not -diminished when the parameter space is highly anisotropic. As such, this -sampler requires little in the way of tuning: a single proposal scale and the -Number of steps to take. - -Beyond the standard ensemble sampler, we will often use one further -modification, namely the parallel-tempered ensemble sampler. A parallel +number ${\sim}100$ parallel \emph{walkers}, in which the proposal for each +walker is generated from the current distribution of the other walkers. +Moreover, by applying an an affine transformation, the efficiency of the +algorithm is not diminished when the parameter space is highly anisotropic. As +such, this sampler requires little in the way of tuning: a single proposal +scale and the number of steps to take. + +\subsection{Parallel tempering: sampling multimodal posteriors} +Beyond the standard ensemble sampler, we will also use one further +modification, the parallel-tempered ensemble sampler. A parallel tempered MCMC simulation, first proposed by \citet{swendsen1986}, runs $\Ntemps$ simulations in parallel with the likelihood in the $i^{\rm th}$ -parallel simulation is raied to a power of $1/T_{i}$ (where $T_i$ is referred -to as the temperature) such that Equation~\eqref{eqn_lambda_posterior} becomes +parallel simulation is raised to a power of $1/T_{i}$ where $T_i$ is referred +to as the temperature. As such, Equation~\eqref{eqn_lambda_posterior} is +written as \begin{equation} -P(\blambda | T_i, x, \Pi_{\rm c}, \Hs, I) -%=B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{T_i} P(\blambda| \Hs I). +P(\blambda | T_i, x, \Pic, \Hs, I) +%=\Bsn(x| \Pic, \blambda)^{T_i} P(\blambda| \Hs I). \propto (e^{\F(x| \blambda)})^{T_i} P(\blambda| \Hs I). \end{equation} -Setting $T_0=1$ with $T_i > T_0 \forall i > 1$, such that the lowest +Setting $T_0=1$ with $T_i > T_0 \; \forall \; i > 1$, such that the lowest temperature recovers Equation~\eqref{eqn_lambda_posterior} while for higher temperatures the likelihood is broadened (for a Gaussian likelihood, the standard devitation is larger by a factor of $\sqrt{T_i}$). Periodically, the -different tempereates swap elements. This allows the $T_0$ chain (from which we -draw samples of the posterior) to efficiently sample from multi-modal -posteriors. This does however introduce two additional tuning parameters, the -number and range of the set of temperatures $\{T_i\}$. +algorithem swaps the position of the walkers between the different +temperatures. This allows the $T_0$ chain (from which we draw samples of the +posterior) to efficiently sample from multi-modal posteriors. This introduces +two additional tuning parameters, the number and range of the set of +temperatures $\{T_i\}$, we will discuss their signficance when relevant. -\subsection{Parallel tempering} +\subsection{Parallel tempering: estimating the Bayes factor} In addition, parallel-tempering also offers a robust method to estimate the Bayes factor of Equation~\eqref{eqn_bayes_over_F}. If we define -$\beta\equiv1/T$, the inverse temperature and $Z(\beta)\equiv B_{\rm S/N}(x| \Pi_{\rm +$\beta\equiv1/T$, the inverse temperature and $Z(\beta)\equiv \Bsn(x| \Pi_{\rm c}, I)$, then as noted by \citet{goggans2004} for the general case, we may write \begin{align} \frac{1}{Z} \frac{\partial Z}{\partial \beta}= \frac{ -\int B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{\beta} -\log(B_{\rm S/N}(x| \Pi_{\rm c}, \blambda))P(\blambda| I) +\int \Bsn(x| \Pic, \blambda)^{\beta} +\log(\Bsn(x| \Pic, \blambda))P(\blambda| I) +d\blambda } { -\int B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{\beta})P(\blambda| I) +\int \Bsn(x| \Pic, \blambda)^{\beta})P(\blambda| I) +d\blambda } \end{align} The right-hand-side expresses the average of the log-likelihood at $\beta$. As @@ -427,7 +431,7 @@ can numerically integrate to get the Bayes factor, i.e. \langle \log(\Bsn(x| \Pic, \blambda) \rangle_{\beta} d\beta. \end{align} In practise, we use a simple numerical quadrature over a finite ladder of -$\beta_i$ with the smallest chosen such that extending closer to zero does not +$\beta_i$ with the smallest chosen such that choosing a smaller value does not change the result beyond other numerical uncertainties. Typically, getting accurate results for the Bayes factor requires a substantially larger number of temperatures than are required for effeciently sampling multi-modal @@ -437,22 +441,20 @@ temperatures (suitably initialised close to the target peak) when estimating the Bayes factor. \subsection{The topology of the likelihood} -As discussed, we intend to use the $\F$-statistic as our log-likelihood in the -MCMC simulations. Before continuing, it is worthwhile to understand the behaviour -of the log-likelihood. As shown in Equation~\eqref{eqn_twoF_expectation}, -$\widetilde{2\F}$ has a expectation value of 4 (corresponding to the -4 degrees of freedom of the underlying chi-square distribution) in Gaussian -noise, but in the presence of a signal larger value are expected proportional -to the squared SNR. - -To illustrate this, let us consider $\widetilde{2\F}$ (the log-likelihood) -as a function of $f$ (the template frequency) if there exists a signal in the -data with frequency $f_0$. We will assume that all other Doppler parameters -are perfectly matched. -This can be calculated analytically, taking the matched-filtering amplitude -(Equation~(11) of \citep{prix2005}) with $\Delta\Phi(t) = 2\pi(f - f_0) t$ -the expectation of $\widetilde{2\F}$ as a function of the template frequency -$f$ is given by + +We intend to use the $\F$-statistic as our log-likelihood in MCMC simulations, +but before continuing, it is worthwhile to acquaint ourselves with the typical +behaviour of the log-likelihood by considering a specific example. + +As shownn in Equation~\eqref{eqn_twoF_expectation}, the expectation of +$\widetilde{2\F}$ is 4 in Gaussian noise alone, but proportional to the square +of the SNR in the presense of a signal. To illustrate this, let us consider +$\widetilde{2\F}$ as a function of $f$ (the template frequency) if there exists +a signal in the data with frequency $f_0$. We will assume that all other +Doppler parameters are perfectly matched. Such an example can be calculated +analytically, taking the matched-filtering amplitude (Equation~(11) of +\citep{prix2005}) with $\Delta\Phi(t) = 2\pi(f - f_0) t$, the expectation of +$\widetilde{2\F}$ as a function of the template frequency $f$ is given by \begin{equation} \textrm{E}[\widetilde{2\F}](f) = 4 + (\textrm{E}[\widetilde{2\F_0}] -4)\textrm{sinc}^{2}(\pi(f-f_0)\Tcoh)) @@ -624,6 +626,7 @@ As such, the volume can be decomposed as \sqrt{\textrm{det}g^{\rm Sky}}\frac{\Delta\Omega}{2} \times \sqrt{\textrm{det}g^{\rm PE}}\prod_{s=0}^{\smax}\Delta f^{(s)} \\ & = \Vsky \times \Vpe. +\label{eqn_metric_volume} \end{align} Moreover, if $\tref$ is in the middle of the observation span, the diagonal nature of $g^{\rm PE}$ means that one can further identify @@ -633,48 +636,90 @@ nature of $g^{\rm PE}$ means that one can further identify \end{align} This decomposition may be useful in setting up MCMC searches. -\subsection{An example} - -In order to familiarise the reader with the features of the search, we will now -describe a simple directed search (over $f$ and $\dot{f}$) for a strong -simulated signal in Gaussian noise. The setup of the search consists in defining -the following -\begin{itemize} -\item The prior for each search parameter. Typically, we recomend either a uniform -prior bounding the area of interest, or a normal distribution centered on the -target and with some well defined width. In this example we will use a uniform -prior. -\item The initialisation of the walkers. If the whole prior volume is to be explored, -the walkers should be initialised from the prior (i.e. random drawn from the -prior distributions) as we will do here. However, it is possible that only a -small region of parameter space requires exploration, therefore we provide -functionality to initialise the walkers subject to an independent distribution -if needed. -\item The number of burn-in and production steps to take. This is a tuning -parameter of the MCMC algorithm. First we allow the walkers to run for a number -of `burn-in' steps which are discarded and then a number of `production' steps -are taken from which one makes estimates of the posterior -\item The parallel tempering set-up. If used, one must specify the number of -temperatures and their arrangement. Typically, we use 3 or so temperatures -with arranged linearly in log-space from some zero to some maximum temperature. -\end{itemize} - -Using these choices, the simulation is run. To illustrate the full MCMC process, -in Figure~\ref{fig_MCMC_simple_example} we plot the progress of all the individual -walkers (each represented by an individual line) as a function of the total -number of steps. The red portion of steps are `burn-in' and hence discarded, -from this plot we see why: the walkers are initialised from the uniform prior -and initially spend some time exploring the whole parameter space before congerging. -The production samples, colored black, are only taken once the sampler has -converged - these can be used to generate posterior plots. - +\subsection{Example: signal in noise} + +In order to familiarise the reader with the features of an MCMC search, we will +now describe a simple directed search (over $f$ and $\dot{f}$) for a simulated +signal in Gaussian noise. The signal will have a frequency of $30$~Hz and a +spin-down of $-1{\times}10^{-10}$~Hz/s, all other Doppler parameters are +`known' and so are irrelevant. Moreover, the signal has an amplitude +$h_0=10^{-24}$~Hz$^{-1/2}$ while the Gaussian noise has +$\Sn=10^{-23}$~Hz$^{-1/2}$ such that the signal has a depth of 10. + +First, we must define a prior for each search parameter Typically, we recomend +either a uniform prior bounding the area of interest, or a normal distribution +centered on the target and with some well defined width. However, to ensure +that the MCMC simulation has a reasonable chance at finding a peak, one should +consider the corresponding metric-volume given in +Equation~\eqref{eqn_metric_volume}. For this example, we will use a uniform +prior with a frequency range of $\Delta f = 10^{-7}$~Hz and a spin-down range +of $\Delta \fdot=10^{-13}$~Hz/s both centered on the simulated signal frequency +and spin-down rate. We set the reference time to coincide with the middle of +the data span, therefore the metric volume can be decomposed into the frequency +contribution and spin-down contribution: +frequency, +\begin{align} +\Vpe^{(0)} = \frac{(\pi\Tcoh\Delta f)^2}{3} \approx 2.46 +\end{align} +and +\begin{align} +\Vpe^{(1)} = \frac{4(\pi\Delta \fdot)^2\Tcoh^{4}}{45} \approx 48.9 +\end{align} +such that $\V\approx120$ (note that $\Vsky$ does not contribute since we do +not search over the sky parameters). This metric volume indicates that the +signal will occupy about 1\% of the prior volume, therefore the MCMC is +expected to work. Alternative priors will need careful thought about how to +translate them into a metric volume: for example using a Guassian one could use +the standard deviation as a proxy for the allowed search region. + +In addition to defining the prior, one must also consider how to +\emph{initialise} the walkers. If the prior genuinely represents the stated +prior knowledge, the usual solution is to initialise the walkers from the +prior: that is the starting position is drawn from the prior. However, +instances do occur when one would like to initialise the walkers from a +different distribution. For example, if one only needs to estimate the evidence +(given a particular prior), but is aware from previous searches that the only +significant peak lies in a small area of parameter space, one could initialise +the walkers in a small cluster close to that area. In this example, we +initialise the walkers from the prior such that they have the chance to explore +the entire prior volume. + +Having defined the prior, the final setup step is to define the number of +\emph{burn-in} and \emph{production} steps the sampler should take and the +number of walkers; this is a tuning parameter of the MCMC algorithm. The number +of walkers should be typically a few hundred, the greater the number the more +samples will be taken resulting in improved posterior estimates. The burn-in +steps refers to an initial set of steps which are discarded as they are taken +whilst the walkers converge. After they have convereged the steps are known as +production steps since they are used to produce posterior estimates and +calculate the marginal likelihood. + +Using these choices, the simulation is run. To illustrate the full MCMC +process, in Figure~\ref{fig_MCMC_simple_example} we plot the progress of all +the individual walkers (each represented by an individual line) as a function +of the total number of steps. The red portion of steps are burn-in and hence +discarded, from this plot we see why: the walkers are initialised from the +uniform prior and initially spend some time exploring the whole parameter space +before congerging. The fact that they converge to a single unique point is due +to the strength of the signal (substantially elevating the likelihood about +that of Gaussian fluctuations) and the tight prior which was quantifed throug the +metric volume $\V$. The production samples, colored black, are only taken once +the sampler has converged - these can be used to generate posterior plots. \begin{figure}[htb] \centering \includegraphics[width=0.5\textwidth]{fully_coherent_search_using_MCMC_walkers} -\caption{} -\label{fig:} +\caption{The progress of the MCMC simulation for a simulated signal in Gaussian +noise, searching over the frequency and spin-down. The upper two panels show +the position of all walkers as a function of the number of steps for the +frequency and spin-down; when they are colored red the samples are discarded as +burn-in (the first 100 steps), while when they are colored black they are used +as production samples. The bottom panel shows the distribution of +$\widetilde{2\F}$ taken from the production samples.} +\label{fig_MCMC_simple_example} \end{figure} +\subsection{Example: noise-only} + \section{Follow-up} \label{sec_follow_up} @@ -686,8 +731,8 @@ fully-coherently. We begin by rewritting Equation~\eqref{eqn_lambda_posterior}, the posterior distribution of the Doppler parameters, with the explicit dependence on the coherence time $\Tcoh$: \begin{equation} -P(\blambda | \Tcoh, x, \Pi_{\rm c}, \Hs, I) -%=B_{\rm S/N}(x| \Tcoh, \Pi_{\rm c}, \blambda) P(\blambda| \Hs I). +P(\blambda | \Tcoh, x, \Pic, \Hs, I) +%=\Bsn(x| \Tcoh, \Pic, \blambda) P(\blambda| \Hs I). \propto e^{\hat{\F}(x| \Tcoh, \blambda)} P(\blambda| \Hs I). \end{equation} diff --git a/Paper/weak_signal_follow_up_run_setup.tex b/Paper/weak_signal_follow_up_run_setup.tex index 9039eace0a5e1596093df2d92bf8c9fce2189478..6a77ddc95edd2997fc634a628bd434651bca561a 100644 --- a/Paper/weak_signal_follow_up_run_setup.tex +++ b/Paper/weak_signal_follow_up_run_setup.tex @@ -1,9 +1,9 @@ \begin{tabular}{c|cccccc} Stage & $\Nseg$ & $\Tcoh^{\rm days}$ &$\Nsteps$ & $\V$ & $\Vsky$ & $\Vpe$ \\ \hline -0 & 80 & 1.25 & 100 & 20.0 & 2.0 & 10.0 \\ -1 & 40 & 2.5 & 100 & $2{\times}10^{2}$ & 7.0 & 20.0 \\ +0 & 93 & 1.1 & 100 & 10.0 & 1.0 & 10.0 \\ +1 & 43 & 2.3 & 100 & $1{\times}10^{2}$ & 6.0 & 20.0 \\ 2 & 20 & 5.0 & 100 & $1{\times}10^{3}$ & 30.0 & 50.0 \\ -3 & 10 & 10.0 & 100 & $1{\times}10^{4}$ & $1{\times}10^{2}$ & 90.0 \\ -4 & 5 & 20.0 & 100 & $7{\times}10^{4}$ & $4{\times}10^{2}$ & $2{\times}10^{2}$ \\ +3 & 9 & 11.1 & 100 & $1{\times}10^{4}$ & $1{\times}10^{2}$ & $1{\times}10^{2}$ \\ +4 & 4 & 25.0 & 100 & $1{\times}10^{5}$ & $6{\times}10^{2}$ & $2{\times}10^{2}$ \\ 5 & 1 & 100.0 & 100,100 & $1{\times}10^{6}$ & $1{\times}10^{3}$ & $9{\times}10^{2}$ \\ \end{tabular} diff --git a/Paper/weak_signal_follow_up_walkers.png b/Paper/weak_signal_follow_up_walkers.png index 96097988a61fd8e354d2fbac5665eccbc02ec074..f389425ce0d15083b02cf310a0463e0db2a74cfd 100644 Binary files a/Paper/weak_signal_follow_up_walkers.png and b/Paper/weak_signal_follow_up_walkers.png differ diff --git a/examples/fully_coherent_search_using_MCMC.py b/examples/fully_coherent_search_using_MCMC.py index 7e866bc487eda4a4c91108431f5d2e97f7fbc52b..fb4464ec9de22ab5f73f53bf11d5366597c85e54 100644 --- a/examples/fully_coherent_search_using_MCMC.py +++ b/examples/fully_coherent_search_using_MCMC.py @@ -1,33 +1,61 @@ -from pyfstat import MCMCSearch +import pyfstat +import numpy as np +# Properties of the GW data +sqrtSX = 1e-23 +tstart = 1000000000 +duration = 100*86400 +tend = tstart + duration + +# Properties of the signal F0 = 30.0 F1 = -1e-10 F2 = 0 Alpha = 5e-3 Delta = 6e-2 -tref = 362750407.0 +tref = .5*(tstart+tend) -tstart = 1000000000 -duration = 100*86400 -tend = tstart + duration +depth = 10 +h0 = sqrtSX / depth +data_label = 'fully_coherent_search_using_MCMC' + +data = pyfstat.Writer( + label=data_label, outdir='data', tref=tref, + tstart=tstart, F0=F0, F1=F1, F2=F2, duration=duration, Alpha=Alpha, + Delta=Delta, h0=h0, sqrtSX=sqrtSX) +data.make_data() + +# The predicted twoF, given by lalapps_predictFstat can be accessed by +twoF = data.predict_fstat() +print 'Predicted twoF value: {}\n'.format(twoF) -theta_prior = {'F0': {'type': 'unif', 'lower': F0*(1-1e-6), 'upper': F0*(1+1e-6)}, - 'F1': {'type': 'unif', 'lower': F1*(1+1e-2), 'upper': F1*(1-1e-2)}, +DeltaF0 = 1e-7 +DeltaF1 = 1e-13 +VF0 = (np.pi * duration * DeltaF0)**2 / 3.0 +VF1 = (np.pi * duration**2 * DeltaF1)**2 * 4/45. +print '\nV={:1.2e}, VF0={:1.2e}, VF1={:1.2e}\n'.format(VF0*VF1, VF0, VF1) + +theta_prior = {'F0': {'type': 'unif', + 'lower': F0-DeltaF0/2., + 'upper': F0+DeltaF0/2.}, + 'F1': {'type': 'unif', + 'lower': F1-DeltaF1/2., + 'upper': F1+DeltaF1/2.}, 'F2': F2, 'Alpha': Alpha, 'Delta': Delta } -ntemps = 3 +ntemps = 1 log10temperature_min = -1 -nwalkers = 100 -nsteps = [1000, 1000] - -mcmc = MCMCSearch(label='fully_coherent_search_using_MCMC', outdir='data', - sftfilepath='data/*basic*sft', theta_prior=theta_prior, - tref=tref, tstart=tstart, tend=tend, nsteps=nsteps, - nwalkers=nwalkers, ntemps=ntemps, - log10temperature_min=log10temperature_min) -mcmc.run() +nwalkers = 1000 +nsteps = [50, 50] + +mcmc = pyfstat.MCMCSearch( + label='fully_coherent_search_using_MCMC', outdir='data', + sftfilepath='data/*'+data_label+'*sft', theta_prior=theta_prior, tref=tref, + minStartTime=tstart, maxStartTime=tend, nsteps=nsteps, nwalkers=nwalkers, + ntemps=ntemps, log10temperature_min=log10temperature_min) +mcmc.run(context='paper', subtractions=[30, -1e-10]) mcmc.plot_corner(add_prior=True) mcmc.print_summary() diff --git a/pyfstat.py b/pyfstat.py index c578d44ebf1daaaee597e7990a4ebb1316baac39..918b448bdd1623e5c8d6518a1f2963218c06d0b7 100755 --- a/pyfstat.py +++ b/pyfstat.py @@ -1102,6 +1102,7 @@ class MCMCSearch(BaseSearchClass): .format(sampler.tswap_acceptance_fraction)) fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols, **kwargs) + fig.tight_layout() fig.savefig('{}/{}_init_{}_walkers.png'.format( self.outdir, self.label, j), dpi=200) @@ -1126,6 +1127,7 @@ class MCMCSearch(BaseSearchClass): fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols, burnin_idx=nburn, **kwargs) + fig.tight_layout() fig.savefig('{}/{}_walkers.png'.format(self.outdir, self.label), dpi=200) @@ -1370,7 +1372,7 @@ class MCMCSearch(BaseSearchClass): def plot_walkers(self, sampler, symbols=None, alpha=0.4, color="k", temp=0, lw=0.1, burnin_idx=None, add_det_stat_burnin=False, fig=None, axes=None, xoffset=0, plot_det_stat=True, - context='classic'): + context='classic', subtractions=None): """ Plot all the chains from a sampler """ shape = sampler.chain.shape @@ -1386,9 +1388,12 @@ class MCMCSearch(BaseSearchClass): "available range").format(temp)) chain = sampler.chain[temp, :, :, :] + if subtractions is None: + subtractions = [0 for i in range(ndim)] + with plt.style.context((context)): if fig is None and axes is None: - fig = plt.figure(figsize=(8, 4*ndim)) + fig = plt.figure(figsize=(3.3, 3.0*ndim)) ax = fig.add_subplot(ndim+1, 1, 1) axes = [ax] + [fig.add_subplot(ndim+1, 1, i) for i in range(2, ndim+1)] @@ -1402,12 +1407,19 @@ class MCMCSearch(BaseSearchClass): cs = chain[:, :, i].T if burnin_idx: axes[i].plot(xoffset+idxs[:burnin_idx], - cs[:burnin_idx], color="r", alpha=alpha, + cs[:burnin_idx]-subtractions[i], + color="r", alpha=alpha, lw=lw) - axes[i].plot(xoffset+idxs[burnin_idx:], cs[burnin_idx:], + axes[i].plot(xoffset+idxs[burnin_idx:], + cs[burnin_idx:]-subtractions[i], color="k", alpha=alpha, lw=lw) if symbols: - axes[i].set_ylabel(symbols[i]) + if subtractions[i] == 0: + axes[i].set_ylabel(symbols[i]) + else: + axes[i].set_ylabel( + symbols[i]+'$-$'+symbols[i]+'$_0$') + else: axes[0].ticklabel_format(useOffset=False, axis='y') cs = chain[:, :, temp].T @@ -1445,6 +1457,10 @@ class MCMCSearch(BaseSearchClass): Range = abs(maxv-minv) axes[-1].set_xlim(minv-0.1*Range, maxv+0.1*Range) + xfmt = matplotlib.ticker.ScalarFormatter() + xfmt.set_powerlimits((-4, 4)) + axes[-1].xaxis.set_major_formatter(xfmt) + axes[-2].set_xlabel(r'$\textrm{Number of steps}$', labelpad=0.1) return fig, axes @@ -2383,6 +2399,7 @@ class MCMCFollowUpSearch(MCMCSemiCoherentSearch): ax.axvline(nsteps_total, color='k', ls='--') nsteps_total += nburn+nprod + fig.tight_layout() fig.savefig('{}/{}_walkers.png'.format( self.outdir, self.label), dpi=200)