From f4c5218b8220ecf66f609f582e6e500b88839ffd Mon Sep 17 00:00:00 2001
From: Gregory Ashton <gregory.ashton@ligo.org>
Date: Tue, 22 Nov 2016 17:15:17 +0100
Subject: [PATCH] Various improvements to the paper

---
 Paper/definitions.tex   |   1 +
 Paper/paper_cw_mcmc.tex | 121 +++++++++++++++++++++++++++++-----------
 pyfstat.py              |  41 ++++++++------
 3 files changed, 115 insertions(+), 48 deletions(-)

diff --git a/Paper/definitions.tex b/Paper/definitions.tex
index 65a3d4d..4eb752e 100644
--- a/Paper/definitions.tex
+++ b/Paper/definitions.tex
@@ -9,6 +9,7 @@
 \newcommand{\Nseg}{N_{\rm seg}}
 \newcommand{\Nsteps}{N_{\rm steps}}
 \newcommand{\Ntemps}{N_{\rm temps}}
+\newcommand{\Nstages}{N_{\rm stages}}
 \newcommand{\Nspindown}{N_{\rm spindowns}}
 \renewcommand{\H}{\mathcal{H}}
 \newcommand{\Hs}{\H_{\rm s}}
diff --git a/Paper/paper_cw_mcmc.tex b/Paper/paper_cw_mcmc.tex
index badc37b..2c91873 100644
--- a/Paper/paper_cw_mcmc.tex
+++ b/Paper/paper_cw_mcmc.tex
@@ -739,49 +739,106 @@ P(\blambda | \Tcoh, x, \Pic, \Hs, I)
 Introducing the coherent time $\Tcoh$ as a variable provides an ability to
 adjust the likelihood. Therefore, a natural way to perform a follow-up is to
 start the MCMC simulations with a short coherence time (such that the signal
-peak occupies a substantial fraction of the prior volume). Subsequently,
-incrementally increase this coherence time in a controlled manner, aiming to
-allow the MCMC walkers to converge to the new likelihood before again
-increasing the coherence time. This can be considered analogous to simulated
-annealing (where the likelihood is raised to a power $1/T$ and subseuqntly
-`cooled') with the important difference that the semi-coherent likelihood is
-wider at short coherence times (rather than flatter as in the case of
-high-temperature simulated annealing stages).
-
-To illustrate the utility of this method, in Figure~\ref{fig_follow_up} we show
-the progress of the MCMC sampler during such a follow-up. The data, 100 days
-from a single detector, consists of Gaussian noise with
-$\sqrt{\Sn}=10^{-23}$~Hz$^{-1/2}$ (at the fiducial frequency of the signal) and
-a signal. The signal has an amplitude $h_0=1.4\times10^{25}$ such that the
-signal has a depth of $\sqrt{\Sn}/h_0=70$ in the noise. The search setup is
-outlined in Table~\ref{tab_weak_signal_follow_up}.
+peak occupies a substantial fraction of the prior volume) and then subseuqntly
+incrementally increasing this coherence time in a controlled manner,
+aiming to allow the MCMC walkers to converge to the new likelihood before again
+increasing the coherence time. Ultimately, this coherence time will be increased
+until $\Tcoh = \Tspan$. If this is done in $\Nstages$ discreet \emph{stages},
+this introduces a further set of tuning parameters, namely the ladder of
+coherence times $\Tcoh^{i}$, where $i \in [0, \Nstages]$ to use.
+
+In some ways, this bears a resembalance to so-called simulated annealing, a
+method in which the likelihood is raised to a power $1/T$ and subseuqntly
+`cooled'. The important difference being that the semi-coherent likelihood is
+wider at short coherence times, rather than flatter as in the case of
+high-temperature simulated annealing stages.
+
+Of course in practise, we do not arbitarily vary $\Tcoh^i$, but rather the
+number of segments at each stage $\Nstages^{i}\equiv \Tspan/\Tcoh^{i}$.
+Ideally, the ladder of segment should be chosen to ensure that the
+metric volume at the $i^{th}$ stage $\V_i \equiv \V(\Nseg^i)$ is a constant
+fraction of the volume at adjacent stages. That is we define
+\begin{equation}
+\mathcal{R} \equiv \frac{\V_i}{\V_{i+1}},
+\end{equation}
+where $\mathcal{R} \ge 1$ as $\Tcoh^{i+1} > \Tcoh^{i}$.
 
+Given a fixed prior on the Doppler parameters and a fixed span of data, the
+metric volume $\V_{\Nstages}$ for $\Tcoh^{\Nstages} = \Tspan$ is fixed, or in
+terms of the number of segments, $\Nseg^{\Nstages} = 1$.
+We can therefore define a minimisation problem:
+\begin{align}
+\min_{\Nseg^{i+1} \in \mathbb{N}}
+\left| \log\mathcal{R} + \log\V(\Nseg^{i+1}) - \log\V(\Nseg^i)\right|
+\end{align}
+which can be solved numerically to a specified tolerance. Due to the difficulty
+in solving such a minimisation with the integer constrain on $\Nseg^{i+1}$ we
+simply solve it as a real scalar, and then round to the nearest integer. We now
+have a method to generate a ladder of $\Nseg^{i}$ which keep the ratio of
+volume fractions fixed. Starting with $\Nseg^{\Nstages}$ = 1, we generate
+$\Nseg^{\Nstages-1}$ such that $\V^{\Nstages-1} < \V^{\Nstages}$ and
+subsequently itterate.  Finally we must define $\V^{\rm min}$ as the stopping
+criterion: a metric volume such that the initial stage will find a signal. This
+stopping criterion itself will set $\Nstages$; alternatively one could set
+$\Nstages$.
+
+We have now defined a method to automate the task of choosing the ladder of
+coherence times. This requires only two tuning parameters, the ratio between
+stages $\mathcal{R}$ and the minimum metric volume $\V^{\min}$.
+
+\subsection{Example}
+
+We now provide an illustrative example of the follow-up method. We consider a
+directed search over the sky position and frequency in 100 days of data from a
+single detector, with $\sqrt{\Sn}=10^{-23}$~Hz$^{-1/2}$ (at the fiducial
+frequency of the signal). The simulated signal has an amplitude
+$h_0=1.4\times10^{25}$ such that the signal has a depth of $\sqrt{\Sn}/h_0=70$
+in the noise.
+
+First, we must define the setup for the run. Using $\mathcal{R}=10$ and
+$\V^{\rm min}=100$ our optimisation procudure is run and proposes the setup
+layed out in Table~\ref{tab_weak_signal_follow_up}. In addition, we show the
+number of steps taken at each stage.
 \begin{table}[htb]
-\caption{The search setup used in Figure~\ref{fig_follow_up}}
+\caption{The search setup used in Figure~\ref{fig_follow_up}, generated with
+$\mathcal{R}=10$ and $\V^{\rm min}=100$.}
 \label{tab_weak_signal_follow_up}
 \input{weak_signal_follow_up_run_setup}
 \end{table}
 
+The choice of $\mathcal{R}$ and $\V^{\rm min}$ is a comprimise between the
+total computing time and the ability to ensure a candidate will be identified.
+From experimentation, we find that $\V^{\rm min}$ values of 100 or so are
+sufficient to ensure that any peaks are sufficiently broad during the
+initial stage. For $\mathcal{R}$ value much larger than $10^{3}$ or so where
+found to result in the MCMC simulations `loosing' the peaks between stages, we
+conservatively opt for 10 here, but values as large as 100 where also succesul.
+
+In Figure~\ref{fig_follow_up} we show the progress of the MCMC sampler during
+the follow-up.  As expected from Table~\ref{tab_weak_signal_follow_up}, during
+the initial stage the signal peak is broad with respect to the size of the
+prior volume, therefore the MCMC simulation quickly converges to it. Subsequently,
+each time the number of segments is reduced, the peak narrows and the samplers
+similarly converge to this new solution. At times it can appeak to be inconsistent,
+however this is due to the changing way that the Gaussian noise adds to the signal.
+Eventually, the walkers all converge to the true signal.
 \begin{figure}[htb]
-\centering
-\includegraphics[width=0.5\textwidth]{weak_signal_follow_up_walkers}
-
+\centering \includegraphics[width=0.5\textwidth]{weak_signal_follow_up_walkers}
 \caption{In the top three panels we show the progress of the 500 parallel
-walkers (each of which is an individual thin line) during the MCMC simulation
-for each of the search parameters, frequency $f$, right-ascension $\alpha$ and
-declination $\delta$. Each vertical dashed line indicates the start of a new
-stage of the search, the parameters for all stages are listed in
-Table~\ref{tab_weak_signal_follow_up}. Samples for use in estimating
-posteriors, or other processes are taken from those samples colored black,
-which we call the production samples.  The period for which the lines are
-coloured red, the samples are discarded either because they are taken from the
-posterior when $\Tcoh < \Tspan$, or they are from the burn-in of the final
-stage. In the final panel we plot the estimated distribution of
-$\widetilde{2\F}$ taken from the production samples.}
-
+walkers (see Figure~\ref{fig_MCMC_simple_example} for a description) during the
+MCMC simulation for each of the search parameters, frequency $f$,
+right-ascension $\alpha$ and declination $\delta$. Each vertical dashed line
+indicates the start of a new stage of the search, the parameters for all stages
+are listed in Table~\ref{tab_weak_signal_follow_up}.}
 \label{fig_follow_up}
 \end{figure}
 
+The key advantage to note here is that all walkers succefully convereged to the
+signal peak, which occupies $\approx 10^{-6}$ of the initial volume. While it
+is possible for this to occur during an ordinary MCMC simulation (with $\Tcoh$
+fixed at $\Tspan$), it would take much longer to converge as the chains explore
+the other `noise peaks' in the data.
+
 \section{Alternative waveform models: transients}
 \label{sec_transients}
 
diff --git a/pyfstat.py b/pyfstat.py
index 918b448..3ff4005 100755
--- a/pyfstat.py
+++ b/pyfstat.py
@@ -1066,7 +1066,7 @@ class MCMCSearch(BaseSearchClass):
             pass
         return sampler
 
-    def run(self, proposal_scale_factor=2, **kwargs):
+    def run(self, proposal_scale_factor=2, create_plots=True, **kwargs):
 
         self.old_data_is_okay_to_use = self.check_old_data_is_okay_to_use()
         if self.old_data_is_okay_to_use is True:
@@ -1100,11 +1100,13 @@ class MCMCSearch(BaseSearchClass):
             if self.ntemps > 1:
                 logging.info("Tswap acceptance fraction: {}"
                              .format(sampler.tswap_acceptance_fraction))
-            fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols,
-                                          **kwargs)
-            fig.tight_layout()
-            fig.savefig('{}/{}_init_{}_walkers.png'.format(
-                self.outdir, self.label, j), dpi=200)
+            if create_plots:
+                fig, axes = self.plot_walkers(sampler,
+                                              symbols=self.theta_symbols,
+                                              **kwargs)
+                fig.tight_layout()
+                fig.savefig('{}/{}_init_{}_walkers.png'.format(
+                    self.outdir, self.label, j), dpi=200)
 
             p0 = self.get_new_p0(sampler)
             p0 = self.apply_corrections_to_p0(p0)
@@ -1125,11 +1127,12 @@ class MCMCSearch(BaseSearchClass):
             logging.info("Tswap acceptance fraction: {}"
                          .format(sampler.tswap_acceptance_fraction))
 
-        fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols,
-                                      burnin_idx=nburn, **kwargs)
-        fig.tight_layout()
-        fig.savefig('{}/{}_walkers.png'.format(self.outdir, self.label),
-                    dpi=200)
+        if create_plots:
+            fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols,
+                                          burnin_idx=nburn, **kwargs)
+            fig.tight_layout()
+            fig.savefig('{}/{}_walkers.png'.format(self.outdir, self.label),
+                        dpi=200)
 
         samples = sampler.chain[0, :, nburn:, :].reshape((-1, self.ndim))
         lnprobs = sampler.lnprobability[0, :, nburn:].reshape((-1))
@@ -1393,7 +1396,7 @@ class MCMCSearch(BaseSearchClass):
 
         with plt.style.context((context)):
             if fig is None and axes is None:
-                fig = plt.figure(figsize=(3.3, 3.0*ndim))
+                fig = plt.figure(figsize=(4, 3.0*ndim))
                 ax = fig.add_subplot(ndim+1, 1, 1)
                 axes = [ax] + [fig.add_subplot(ndim+1, 1, i)
                                for i in range(2, ndim+1)]
@@ -1779,14 +1782,17 @@ class MCMCSearch(BaseSearchClass):
         print('p-value = {}'.format(p_val))
         return p_val
 
-    def compute_evidence(self):
-        """ Computes the evidence/marginal likelihood for the model """
+    def get_evidence(self):
         fburnin = float(self.nsteps[-2])/np.sum(self.nsteps[-2:])
         lnev, lnev_err = self.sampler.thermodynamic_integration_log_evidence(
             fburnin=fburnin)
 
         log10evidence = lnev/np.log(10)
         log10evidence_err = lnev_err/np.log(10)
+        return log10evidence, log10evidence_err
+
+    def compute_evidence_long(self):
+        """ Computes the evidence/marginal likelihood for the model """
         betas = self.betas
         alllnlikes = self.sampler.lnlikelihood[:, :, self.nsteps[-2]:]
         mean_lnlikes = np.mean(np.mean(alllnlikes, axis=1), axis=1)
@@ -2399,9 +2405,12 @@ class MCMCFollowUpSearch(MCMCSemiCoherentSearch):
                 ax.axvline(nsteps_total, color='k', ls='--')
             nsteps_total += nburn+nprod
 
+        try:
             fig.tight_layout()
-            fig.savefig('{}/{}_walkers.png'.format(
-                self.outdir, self.label), dpi=200)
+        except ValueError as e:
+            logging.warning('Tight layout encountered {}'.format(e))
+        fig.savefig('{}/{}_walkers.png'.format(
+            self.outdir, self.label), dpi=200)
 
         samples = sampler.chain[0, :, nburn:, :].reshape((-1, self.ndim))
         lnprobs = sampler.lnprobability[0, :, nburn:].reshape((-1))
-- 
GitLab