diff --git a/Paper/definitions.tex b/Paper/definitions.tex
index 5bfa6393e43bb32b5b2c5375fc252b3fb4a1d5d6..65a3d4d8dda29df0c7b66684b06fcdfa532410da 100644
--- a/Paper/definitions.tex
+++ b/Paper/definitions.tex
@@ -1,3 +1,4 @@
+\newcommand{\fdot}{\dot{f}}
 \newcommand{\F}{{\mathcal{F}}}
 \newcommand{\A}{\boldsymbol{\mathcal{A}}}
 \newcommand{\blambda}{\boldsymbol{\mathbf{\lambda}}}
diff --git a/Paper/fully_coherent_search_using_MCMC_walkers.png b/Paper/fully_coherent_search_using_MCMC_walkers.png
index 589a99d81a18deee317b051c95df1199492729ae..d338b9fcbc9d77a39c38c7ccab75710570e59759 100644
Binary files a/Paper/fully_coherent_search_using_MCMC_walkers.png and b/Paper/fully_coherent_search_using_MCMC_walkers.png differ
diff --git a/Paper/paper_cw_mcmc.tex b/Paper/paper_cw_mcmc.tex
index d32706b7aed68a5fb8b6e391bc39910a89a81163..badc37b74a83ce04281f96fcca5fff6e6f760b56 100644
--- a/Paper/paper_cw_mcmc.tex
+++ b/Paper/paper_cw_mcmc.tex
@@ -50,8 +50,7 @@
 
 \begin{document}
 
-\title{MCMC follow-up methods for continuous gravitational wave candidates including
-glitching and transient waveforms}
+\title{MCMC follow-up methods for continuous gravitational wave candidates}
 
     \author{G. Ashton}
     \email[E-mail: ]{gregory.ashton@ligo.org}
@@ -197,20 +196,20 @@ data contains an additive mixture of noise and a signal $h(t; \A, \blambda)$.
 In order to make a quantitative comparison, we use Bayes theorum in the usual
 way to write the odds as
 \begin{equation}
-O_{\rm S/N} \equiv \frac{P(\Hs| x I)}{P(\Hn| x I)} =
+O_{\rm S/N} \equiv \frac{P(\Hs| x, I)}{P(\Hn| x, I)} =
 \Bsn(x| I) \frac{P(\Hs| I)}{P(\Hn | I)},
 \end{equation}
 where the second factor is the prior odds while the first factor is the
 \emph{Bayes factor}:
 \begin{equation}
-\Bsn(x| I) = \frac{P(x| \Hs I)}{P(x| \Hn I)}.
+\Bsn(x| I) = \frac{P(x| \Hs, I)}{P(x| \Hn, I)}.
 \end{equation}
 Typically, we set the prior odds to unity such that it is the Bayes factor
 which determines our confidence in the signal hypothesis. In this work we will
-therefore discuss the Bayes factor with the impplied assumption this is
+therefore discuss the Bayes factor with the implied assumption this is
 equivalent to the odds, unless we have a good reason to change the prior odds.
 
-We can rewrite this Bayes factor in terms of the two sets of signal parameters
+We can rewrite the Bayes factor in terms of the two sets of signal parameters
 as
 \begin{equation}
 \Bsn(x| I) = \frac{P(x, \A, \blambda|\Hs, I)}
@@ -277,13 +276,13 @@ where $(h|h)$ is the inner product of the signal with itself (see for example
 the detector and $\mathcal{N}$ is the number of detectors.
 
 
-\subsection{Using the $\F$-statistic to compute a Bayes factor}
-At first, it appeared that the $\F$-statistic was independent of the Bayesian
-framework. However, it was shown by \citet{prix2009} that if we marginalise
-over the four amplitude parameters of Equation~\eqref{eqn_full_bayes}, choosing
-a prior $\Pi_{\rm c}$ such that
+\subsection{Using the $\F$-statistic to compute a Bayes factor} At first, it
+appears that the $\F$-statistic is independent of the Bayesian framework since
+it was first derived directly from the likelihood. However, it was shown by
+\citet{prix2009} that if we marginalise over the four amplitude parameters of
+Equation~\eqref{eqn_full_bayes}, choosing a prior $\Pic$ such that
 \begin{equation}
-P(\A| \Hs, \Pi_{\rm c}, I) \equiv \left\{
+P(\A| \Hs, \Pic, I) \equiv \left\{
 \begin{array}{ll}
 C & \textrm{ for } \ho < \homax \\
 0 & \textrm{ otherwise}
@@ -293,7 +292,7 @@ C & \textrm{ for } \ho < \homax \\
 then the integral, when $\homax \gg 1$, is a Gaussian integral and can be
 computed analytically as
 \begin{align}
-B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) & \equiv
+\Bsn(x| \Pic, \blambda, I) & \equiv
 \int
 \mathcal{L}(x ;\A, \blambda)
 P(\A| \Hs, I) d\A
@@ -310,13 +309,13 @@ fixed Doppler parameters.
 
 As such, we can define the Bayes-factor of Equation~\eqref{eqn_full_bayes} as
 \begin{equation}
-B_{\rm S/N}(x| \Pi_{\rm c}, I) = \int
-B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) P(\blambda| \Hs, I)
+\Bsn(x| \Pic, I) = \int
+\Bsn(x| \Pic, \blambda, I) P(\blambda| \Hs, I)
  d\blambda,
 \end{equation}
 or neglecting the constants
 \begin{equation}
-B_{\rm S/N}(x| \Pi_{\rm c}, I) \propto \int
+\Bsn(x| \Pic, I) \propto \int
 e^{\F(x| \blambda)} P(\blambda| \Hs, I)
  d\blambda.
 \label{eqn_bayes_over_F}
@@ -327,7 +326,7 @@ there exists a wealth of well-tested tools \citep{lalsuite} capable of
 computing the $\mathcal{F}$-statistic for CW signals, transient-CWs, and CW
 signals from binary systems; these can be levereged to compute
 Equation~\eqref{eqn_bayes_over_F}, or adding in the constant
-$B_{\rm S/N}(x| \Pi_{\rm c})$ itself. The disadvantage to this method is that
+$\Bsn(x| \Pic)$ itself. The disadvantage to this method is that
 we are forced to use the prior $\Pic$, which was shown by \citet{prix2009} to
 be unphysical.
 
@@ -343,14 +342,14 @@ P(\theta| x, \H, I) \propto P(x| \theta, \H, I)P(\theta| \H, I),
 \end{equation}
 is used to evaluate proposed jumps from one point in parameter to other points;
 jumps which increase this probabily are accepted with some probability. The
-algorithm, proceeding in this way, is highly effective at resolving peaks in
-the high-dimension parameter spaces.
+algorithm, proceeding in this way, is highly efficient at resolving peaks in
+high-dimension parameter spaces.
 
 At this point, we note the equivalence of Equation~\eqref{eqn_bayes_for_theta}
 to the integrand of Equation~\eqref{eqn_bayes_over_F}:
 \begin{equation}
-P(\blambda | x, \Pi_{\rm c}, \Hs, I)
-%=B_{\rm S/N}(x| \Pi_{\rm c}, \blambda) P(\blambda| \Hs I).
+P(\blambda | x, \Pic, \Hs, I)
+%=\Bsn(x| \Pic, \blambda) P(\blambda| \Hs I).
 \propto e^{\F(x| \blambda)} P(\blambda| \Hs I),
 \label{eqn_lambda_posterior}
 \end{equation}
@@ -371,47 +370,52 @@ proposal distribution must be `tuned' so that the MCMC sampler effeciently
 walks the parameter space without either jumping too far off the peak, or
 taking such small steps that it takes a long period of time to traverse the
 peak. The \texttt{emcee} sampler addresses this by using an ensemble, a large
-number ${\sim}100$ parallel `walkers', in which the proposal for each walker
-is generated from the current distribution of the other walkers. Moreover, by
-applying an an affine transformation, the efficiency of the algorithm is not
-diminished when the parameter space is highly anisotropic. As such, this
-sampler requires little in the way of tuning: a single proposal scale and the
-Number of steps to take.
-
-Beyond the standard ensemble sampler, we will often use one further
-modification, namely the parallel-tempered ensemble sampler. A parallel
+number ${\sim}100$ parallel \emph{walkers}, in which the proposal for each
+walker is generated from the current distribution of the other walkers.
+Moreover, by applying an an affine transformation, the efficiency of the
+algorithm is not diminished when the parameter space is highly anisotropic. As
+such, this sampler requires little in the way of tuning: a single proposal
+scale and the number of steps to take.
+
+\subsection{Parallel tempering: sampling multimodal posteriors}
+Beyond the standard ensemble sampler, we will also use one further
+modification, the parallel-tempered ensemble sampler. A parallel
 tempered MCMC simulation, first proposed by \citet{swendsen1986}, runs
 $\Ntemps$ simulations in parallel with the likelihood in the $i^{\rm th}$
-parallel simulation is raied to a power of $1/T_{i}$ (where $T_i$ is referred
-to as the temperature) such that Equation~\eqref{eqn_lambda_posterior} becomes
+parallel simulation is raised to a power of $1/T_{i}$ where $T_i$ is referred
+to as the temperature. As such, Equation~\eqref{eqn_lambda_posterior} is
+written as
 \begin{equation}
-P(\blambda | T_i, x, \Pi_{\rm c}, \Hs, I)
-%=B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{T_i} P(\blambda| \Hs I).
+P(\blambda | T_i, x, \Pic, \Hs, I)
+%=\Bsn(x| \Pic, \blambda)^{T_i} P(\blambda| \Hs I).
 \propto (e^{\F(x| \blambda)})^{T_i} P(\blambda| \Hs I).
 \end{equation}
-Setting $T_0=1$ with $T_i > T_0 \forall i > 1$, such that the lowest
+Setting $T_0=1$ with $T_i > T_0 \; \forall \; i > 1$, such that the lowest
 temperature recovers Equation~\eqref{eqn_lambda_posterior} while for higher
 temperatures the likelihood is broadened (for a Gaussian likelihood, the
 standard devitation is larger by a factor of $\sqrt{T_i}$). Periodically, the
-different tempereates swap elements. This allows the $T_0$ chain (from which we
-draw samples of the posterior) to efficiently sample from multi-modal
-posteriors. This does however introduce two additional tuning parameters, the
-number and range of the set of temperatures $\{T_i\}$.
+algorithem swaps the position of the walkers between the different
+temperatures. This allows the $T_0$ chain (from which we draw samples of the
+posterior) to efficiently sample from multi-modal posteriors. This introduces
+two additional tuning parameters, the number and range of the set of
+temperatures $\{T_i\}$, we will discuss their signficance when relevant.
 
-\subsection{Parallel tempering}
+\subsection{Parallel tempering: estimating the Bayes factor}
 In addition, parallel-tempering also offers a robust method to estimate the
 Bayes factor of Equation~\eqref{eqn_bayes_over_F}. If we define
-$\beta\equiv1/T$, the inverse temperature and $Z(\beta)\equiv B_{\rm S/N}(x| \Pi_{\rm
+$\beta\equiv1/T$, the inverse temperature and $Z(\beta)\equiv \Bsn(x| \Pi_{\rm
 c}, I)$, then as noted by \citet{goggans2004} for the general case, we may
 write
 \begin{align}
 \frac{1}{Z} \frac{\partial Z}{\partial \beta}=
 \frac{
-\int B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{\beta}
-\log(B_{\rm S/N}(x| \Pi_{\rm c}, \blambda))P(\blambda| I)
+\int \Bsn(x| \Pic, \blambda)^{\beta}
+\log(\Bsn(x| \Pic, \blambda))P(\blambda| I)
+d\blambda
 }
 {
-\int B_{\rm S/N}(x| \Pi_{\rm c}, \blambda)^{\beta})P(\blambda| I)
+\int \Bsn(x| \Pic, \blambda)^{\beta})P(\blambda| I)
+d\blambda
 }
 \end{align}
 The right-hand-side expresses the average of the log-likelihood at $\beta$. As
@@ -427,7 +431,7 @@ can numerically integrate to get the Bayes factor, i.e.
 \langle \log(\Bsn(x| \Pic, \blambda) \rangle_{\beta} d\beta.
 \end{align}
 In practise, we use a simple numerical quadrature over a finite ladder of
-$\beta_i$ with the smallest chosen such that extending closer to zero does not
+$\beta_i$ with the smallest chosen such that choosing a smaller value does not
 change the result beyond other numerical uncertainties. Typically, getting
 accurate results for the Bayes factor requires a substantially larger number of
 temperatures than are required for effeciently sampling multi-modal
@@ -437,22 +441,20 @@ temperatures (suitably initialised close to the target peak) when estimating
 the Bayes factor.
 
 \subsection{The topology of the likelihood}
-As discussed, we intend to use the $\F$-statistic as our log-likelihood in the
-MCMC simulations. Before continuing, it is worthwhile to understand the behaviour
-of the log-likelihood. As shown in Equation~\eqref{eqn_twoF_expectation},
-$\widetilde{2\F}$ has a expectation value of 4 (corresponding to the
-4 degrees of freedom of the underlying chi-square distribution) in Gaussian
-noise, but in the presence of a signal larger value are expected proportional
-to the squared SNR. 
-
-To illustrate this, let us consider $\widetilde{2\F}$ (the log-likelihood)
-as a function of $f$ (the template frequency) if there exists a signal in the
-data with frequency $f_0$. We will assume that all other Doppler parameters
-are perfectly matched.
-This can be calculated analytically, taking the matched-filtering amplitude
-(Equation~(11) of \citep{prix2005}) with $\Delta\Phi(t) = 2\pi(f - f_0) t$
-the expectation of $\widetilde{2\F}$ as a function of the template frequency
-$f$ is given by
+
+We intend to use the $\F$-statistic as our log-likelihood in MCMC simulations,
+but before continuing, it is worthwhile to acquaint ourselves with the typical
+behaviour of the log-likelihood by considering a specific example.
+
+As shownn in Equation~\eqref{eqn_twoF_expectation}, the expectation of
+$\widetilde{2\F}$ is 4 in Gaussian noise alone, but proportional to the square
+of the SNR in the presense of a signal.  To illustrate this, let us consider
+$\widetilde{2\F}$ as a function of $f$ (the template frequency) if there exists
+a signal in the data with frequency $f_0$. We will assume that all other
+Doppler parameters are perfectly matched.  Such an example can be calculated
+analytically, taking the matched-filtering amplitude (Equation~(11) of
+\citep{prix2005}) with $\Delta\Phi(t) = 2\pi(f - f_0) t$, the expectation of
+$\widetilde{2\F}$ as a function of the template frequency $f$ is given by
 \begin{equation}
 \textrm{E}[\widetilde{2\F}](f) = 4 + 
 (\textrm{E}[\widetilde{2\F_0}] -4)\textrm{sinc}^{2}(\pi(f-f_0)\Tcoh))
@@ -624,6 +626,7 @@ As such, the volume can be decomposed as
 \sqrt{\textrm{det}g^{\rm Sky}}\frac{\Delta\Omega}{2} \times
 \sqrt{\textrm{det}g^{\rm PE}}\prod_{s=0}^{\smax}\Delta f^{(s)} \\
 & = \Vsky \times \Vpe.
+\label{eqn_metric_volume}
 \end{align}
 Moreover, if $\tref$ is in the middle of the observation span, the diagonal
 nature of $g^{\rm PE}$ means that one can further identify
@@ -633,48 +636,90 @@ nature of $g^{\rm PE}$ means that one can further identify
 \end{align}
 This decomposition may be useful in setting up MCMC searches.
 
-\subsection{An example}
-
-In order to familiarise the reader with the features of the search, we will now
-describe a simple directed search (over $f$ and $\dot{f}$) for a strong
-simulated signal in Gaussian noise. The setup of the search consists in defining
-the following
-\begin{itemize}
-\item The prior for each search parameter. Typically, we recomend either a uniform
-prior bounding the area of interest, or a normal distribution centered on the
-target and with some well defined width. In this example we will use a uniform
-prior.
-\item The initialisation of the walkers. If the whole prior volume is to be explored,
-the walkers should be initialised from the prior (i.e. random drawn from the
-prior distributions) as we will do here. However, it is possible that only a
-small region of parameter space requires exploration, therefore we provide
-functionality to initialise the walkers subject to an independent distribution
-if needed.
-\item The number of burn-in and production steps to take. This is a tuning
-parameter of the MCMC algorithm. First we allow the walkers to run for a number
-of `burn-in' steps which are discarded and then a number of `production' steps
-are taken from which one makes estimates of the posterior
-\item The parallel tempering set-up. If used, one must specify the number of
-temperatures and their arrangement. Typically, we use 3 or so temperatures
-with arranged linearly in log-space from some zero to some maximum temperature.
-\end{itemize}
-
-Using these choices, the simulation is run. To illustrate the full MCMC process,
-in Figure~\ref{fig_MCMC_simple_example} we plot the progress of all the individual
-walkers (each represented by an individual line) as a function of the total
-number of steps. The red portion of steps are `burn-in' and hence discarded,
-from this plot we see why: the walkers are initialised from the uniform prior
-and initially spend some time exploring the whole parameter space before congerging.
-The production samples, colored black, are only taken once the sampler has
-converged - these can be used to generate posterior plots.
-
+\subsection{Example: signal in noise}
+
+In order to familiarise the reader with the features of an MCMC search, we will
+now describe a simple directed search (over $f$ and $\dot{f}$) for a simulated
+signal in Gaussian noise. The signal will have a frequency of $30$~Hz and a
+spin-down of $-1{\times}10^{-10}$~Hz/s, all other Doppler parameters are
+`known' and so are irrelevant. Moreover, the signal has an amplitude
+$h_0=10^{-24}$~Hz$^{-1/2}$ while the Gaussian noise has
+$\Sn=10^{-23}$~Hz$^{-1/2}$ such that the signal has a depth of 10.
+
+First, we must define a prior for each search parameter Typically, we recomend
+either a uniform prior bounding the area of interest, or a normal distribution
+centered on the target and with some well defined width. However, to ensure
+that the MCMC simulation has a reasonable chance at finding a peak, one should
+consider the corresponding metric-volume given in
+Equation~\eqref{eqn_metric_volume}. For this example, we will use a uniform
+prior with a frequency range of $\Delta f = 10^{-7}$~Hz and a spin-down range
+of $\Delta \fdot=10^{-13}$~Hz/s both centered on the simulated signal frequency
+and spin-down rate. We set the reference time to coincide with the middle of
+the data span, therefore the metric volume can be decomposed into the frequency
+contribution and spin-down contribution:
+frequency,
+\begin{align}
+\Vpe^{(0)} = \frac{(\pi\Tcoh\Delta f)^2}{3} \approx 2.46
+\end{align}
+and
+\begin{align}
+\Vpe^{(1)} = \frac{4(\pi\Delta \fdot)^2\Tcoh^{4}}{45} \approx 48.9
+\end{align}
+such that $\V\approx120$ (note that $\Vsky$ does not contribute since we do
+not search over the sky parameters). This metric volume indicates that the
+signal will occupy about 1\% of the prior volume, therefore the MCMC is
+expected to work. Alternative priors will need careful thought about how to
+translate them into a metric volume: for example using a Guassian one could use
+the standard deviation as a proxy for the allowed search region.
+
+In addition to defining the prior, one must also consider how to
+\emph{initialise} the walkers. If the prior genuinely represents the stated
+prior knowledge, the usual solution is to initialise the walkers from the
+prior: that is the starting position is drawn from the prior. However,
+instances do occur when one would like to initialise the walkers from a
+different distribution. For example, if one only needs to estimate the evidence
+(given a particular prior), but is aware from previous searches that the only
+significant peak lies in a small area of parameter space, one could initialise
+the walkers in a small cluster close to that area. In this example, we
+initialise the walkers from the prior such that they have the chance to explore
+the entire prior volume.
+
+Having defined the prior, the final setup step is to define the number of
+\emph{burn-in} and \emph{production} steps the sampler should take and the
+number of walkers; this is a tuning parameter of the MCMC algorithm. The number
+of walkers should be typically a few hundred, the greater the number the more
+samples will be taken resulting in improved posterior estimates. The burn-in
+steps refers to an initial set of steps which are discarded as they are taken
+whilst the walkers converge. After they have convereged the steps are known as
+production steps since they are used to produce posterior estimates and
+calculate the marginal likelihood.
+
+Using these choices, the simulation is run. To illustrate the full MCMC
+process, in Figure~\ref{fig_MCMC_simple_example} we plot the progress of all
+the individual walkers (each represented by an individual line) as a function
+of the total number of steps. The red portion of steps are burn-in and hence
+discarded, from this plot we see why: the walkers are initialised from the
+uniform prior and initially spend some time exploring the whole parameter space
+before congerging. The fact that they converge to a single unique point is due
+to the strength of the signal (substantially elevating the likelihood about
+that of Gaussian fluctuations) and the tight prior which was quantifed throug the
+metric volume $\V$. The production samples, colored black, are only taken once
+the sampler has converged - these can be used to generate posterior plots.
 \begin{figure}[htb]
 \centering
 \includegraphics[width=0.5\textwidth]{fully_coherent_search_using_MCMC_walkers}
-\caption{}
-\label{fig:}
+\caption{The progress of the MCMC simulation for a simulated signal in Gaussian
+noise, searching over the frequency and spin-down. The upper two panels show
+the position of all walkers as a function of the number of steps for the
+frequency and spin-down; when they are colored red the samples are discarded as
+burn-in (the first 100 steps), while when they are colored black they are used
+as production samples. The bottom panel shows the distribution of
+$\widetilde{2\F}$ taken from the production samples.}
+\label{fig_MCMC_simple_example}
 \end{figure}
 
+\subsection{Example: noise-only}
+
 \section{Follow-up}
 \label{sec_follow_up}
 
@@ -686,8 +731,8 @@ fully-coherently. We begin by rewritting Equation~\eqref{eqn_lambda_posterior},
 the posterior distribution of the Doppler parameters, with the explicit
 dependence on the coherence time $\Tcoh$:
 \begin{equation}
-P(\blambda | \Tcoh, x, \Pi_{\rm c}, \Hs, I)
-%=B_{\rm S/N}(x| \Tcoh, \Pi_{\rm c}, \blambda) P(\blambda| \Hs I).
+P(\blambda | \Tcoh, x, \Pic, \Hs, I)
+%=\Bsn(x| \Tcoh, \Pic, \blambda) P(\blambda| \Hs I).
 \propto e^{\hat{\F}(x| \Tcoh, \blambda)} P(\blambda| \Hs I).
 \end{equation}
 
diff --git a/Paper/weak_signal_follow_up_run_setup.tex b/Paper/weak_signal_follow_up_run_setup.tex
index 9039eace0a5e1596093df2d92bf8c9fce2189478..6a77ddc95edd2997fc634a628bd434651bca561a 100644
--- a/Paper/weak_signal_follow_up_run_setup.tex
+++ b/Paper/weak_signal_follow_up_run_setup.tex
@@ -1,9 +1,9 @@
 \begin{tabular}{c|cccccc}
 Stage & $\Nseg$ & $\Tcoh^{\rm days}$ &$\Nsteps$ & $\V$ & $\Vsky$ & $\Vpe$ \\ \hline
-0 & 80 & 1.25 & 100 & 20.0 & 2.0 & 10.0 \\
-1 & 40 & 2.5 & 100 & $2{\times}10^{2}$ & 7.0 & 20.0 \\
+0 & 93 & 1.1 & 100 & 10.0 & 1.0 & 10.0 \\
+1 & 43 & 2.3 & 100 & $1{\times}10^{2}$ & 6.0 & 20.0 \\
 2 & 20 & 5.0 & 100 & $1{\times}10^{3}$ & 30.0 & 50.0 \\
-3 & 10 & 10.0 & 100 & $1{\times}10^{4}$ & $1{\times}10^{2}$ & 90.0 \\
-4 & 5 & 20.0 & 100 & $7{\times}10^{4}$ & $4{\times}10^{2}$ & $2{\times}10^{2}$ \\
+3 & 9 & 11.1 & 100 & $1{\times}10^{4}$ & $1{\times}10^{2}$ & $1{\times}10^{2}$ \\
+4 & 4 & 25.0 & 100 & $1{\times}10^{5}$ & $6{\times}10^{2}$ & $2{\times}10^{2}$ \\
 5 & 1 & 100.0 & 100,100 & $1{\times}10^{6}$ & $1{\times}10^{3}$ & $9{\times}10^{2}$ \\
 \end{tabular}
diff --git a/Paper/weak_signal_follow_up_walkers.png b/Paper/weak_signal_follow_up_walkers.png
index 96097988a61fd8e354d2fbac5665eccbc02ec074..f389425ce0d15083b02cf310a0463e0db2a74cfd 100644
Binary files a/Paper/weak_signal_follow_up_walkers.png and b/Paper/weak_signal_follow_up_walkers.png differ
diff --git a/examples/fully_coherent_search_using_MCMC.py b/examples/fully_coherent_search_using_MCMC.py
index 7e866bc487eda4a4c91108431f5d2e97f7fbc52b..fb4464ec9de22ab5f73f53bf11d5366597c85e54 100644
--- a/examples/fully_coherent_search_using_MCMC.py
+++ b/examples/fully_coherent_search_using_MCMC.py
@@ -1,33 +1,61 @@
-from pyfstat import MCMCSearch
+import pyfstat
+import numpy as np
 
+# Properties of the GW data
+sqrtSX = 1e-23
+tstart = 1000000000
+duration = 100*86400
+tend = tstart + duration
+
+# Properties of the signal
 F0 = 30.0
 F1 = -1e-10
 F2 = 0
 Alpha = 5e-3
 Delta = 6e-2
-tref = 362750407.0
+tref = .5*(tstart+tend)
 
-tstart = 1000000000
-duration = 100*86400
-tend = tstart + duration
+depth = 10
+h0 = sqrtSX / depth
+data_label = 'fully_coherent_search_using_MCMC'
+
+data = pyfstat.Writer(
+    label=data_label, outdir='data', tref=tref,
+    tstart=tstart, F0=F0, F1=F1, F2=F2, duration=duration, Alpha=Alpha,
+    Delta=Delta, h0=h0, sqrtSX=sqrtSX)
+data.make_data()
+
+# The predicted twoF, given by lalapps_predictFstat can be accessed by
+twoF = data.predict_fstat()
+print 'Predicted twoF value: {}\n'.format(twoF)
 
-theta_prior = {'F0': {'type': 'unif', 'lower': F0*(1-1e-6), 'upper': F0*(1+1e-6)},
-               'F1': {'type': 'unif', 'lower': F1*(1+1e-2), 'upper': F1*(1-1e-2)},
+DeltaF0 = 1e-7
+DeltaF1 = 1e-13
+VF0 = (np.pi * duration * DeltaF0)**2 / 3.0
+VF1 = (np.pi * duration**2 * DeltaF1)**2 * 4/45.
+print '\nV={:1.2e}, VF0={:1.2e}, VF1={:1.2e}\n'.format(VF0*VF1, VF0, VF1)
+
+theta_prior = {'F0': {'type': 'unif',
+                      'lower': F0-DeltaF0/2.,
+                      'upper': F0+DeltaF0/2.},
+               'F1': {'type': 'unif',
+                      'lower': F1-DeltaF1/2.,
+                      'upper': F1+DeltaF1/2.},
                'F2': F2,
                'Alpha': Alpha,
                'Delta': Delta
                }
 
-ntemps = 3
+ntemps = 1
 log10temperature_min = -1
-nwalkers = 100
-nsteps = [1000, 1000]
-
-mcmc = MCMCSearch(label='fully_coherent_search_using_MCMC', outdir='data', 
-                  sftfilepath='data/*basic*sft', theta_prior=theta_prior,
-                  tref=tref, tstart=tstart, tend=tend, nsteps=nsteps,
-                  nwalkers=nwalkers, ntemps=ntemps,
-                  log10temperature_min=log10temperature_min)
-mcmc.run()
+nwalkers = 1000
+nsteps = [50, 50]
+
+mcmc = pyfstat.MCMCSearch(
+    label='fully_coherent_search_using_MCMC', outdir='data',
+    sftfilepath='data/*'+data_label+'*sft', theta_prior=theta_prior, tref=tref,
+    minStartTime=tstart, maxStartTime=tend, nsteps=nsteps, nwalkers=nwalkers,
+    ntemps=ntemps, log10temperature_min=log10temperature_min)
+mcmc.run(context='paper', subtractions=[30, -1e-10])
 mcmc.plot_corner(add_prior=True)
 mcmc.print_summary()
diff --git a/pyfstat.py b/pyfstat.py
index c578d44ebf1daaaee597e7990a4ebb1316baac39..918b448bdd1623e5c8d6518a1f2963218c06d0b7 100755
--- a/pyfstat.py
+++ b/pyfstat.py
@@ -1102,6 +1102,7 @@ class MCMCSearch(BaseSearchClass):
                              .format(sampler.tswap_acceptance_fraction))
             fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols,
                                           **kwargs)
+            fig.tight_layout()
             fig.savefig('{}/{}_init_{}_walkers.png'.format(
                 self.outdir, self.label, j), dpi=200)
 
@@ -1126,6 +1127,7 @@ class MCMCSearch(BaseSearchClass):
 
         fig, axes = self.plot_walkers(sampler, symbols=self.theta_symbols,
                                       burnin_idx=nburn, **kwargs)
+        fig.tight_layout()
         fig.savefig('{}/{}_walkers.png'.format(self.outdir, self.label),
                     dpi=200)
 
@@ -1370,7 +1372,7 @@ class MCMCSearch(BaseSearchClass):
     def plot_walkers(self, sampler, symbols=None, alpha=0.4, color="k", temp=0,
                      lw=0.1, burnin_idx=None, add_det_stat_burnin=False,
                      fig=None, axes=None, xoffset=0, plot_det_stat=True,
-                     context='classic'):
+                     context='classic', subtractions=None):
         """ Plot all the chains from a sampler """
 
         shape = sampler.chain.shape
@@ -1386,9 +1388,12 @@ class MCMCSearch(BaseSearchClass):
                                   "available range").format(temp))
             chain = sampler.chain[temp, :, :, :]
 
+        if subtractions is None:
+            subtractions = [0 for i in range(ndim)]
+
         with plt.style.context((context)):
             if fig is None and axes is None:
-                fig = plt.figure(figsize=(8, 4*ndim))
+                fig = plt.figure(figsize=(3.3, 3.0*ndim))
                 ax = fig.add_subplot(ndim+1, 1, 1)
                 axes = [ax] + [fig.add_subplot(ndim+1, 1, i)
                                for i in range(2, ndim+1)]
@@ -1402,12 +1407,19 @@ class MCMCSearch(BaseSearchClass):
                     cs = chain[:, :, i].T
                     if burnin_idx:
                         axes[i].plot(xoffset+idxs[:burnin_idx],
-                                     cs[:burnin_idx], color="r", alpha=alpha,
+                                     cs[:burnin_idx]-subtractions[i],
+                                     color="r", alpha=alpha,
                                      lw=lw)
-                    axes[i].plot(xoffset+idxs[burnin_idx:], cs[burnin_idx:],
+                    axes[i].plot(xoffset+idxs[burnin_idx:],
+                                 cs[burnin_idx:]-subtractions[i],
                                  color="k", alpha=alpha, lw=lw)
                     if symbols:
-                        axes[i].set_ylabel(symbols[i])
+                        if subtractions[i] == 0:
+                            axes[i].set_ylabel(symbols[i])
+                        else:
+                            axes[i].set_ylabel(
+                                symbols[i]+'$-$'+symbols[i]+'$_0$')
+
             else:
                 axes[0].ticklabel_format(useOffset=False, axis='y')
                 cs = chain[:, :, temp].T
@@ -1445,6 +1457,10 @@ class MCMCSearch(BaseSearchClass):
                     Range = abs(maxv-minv)
                     axes[-1].set_xlim(minv-0.1*Range, maxv+0.1*Range)
 
+                xfmt = matplotlib.ticker.ScalarFormatter()
+                xfmt.set_powerlimits((-4, 4)) 
+                axes[-1].xaxis.set_major_formatter(xfmt)
+
             axes[-2].set_xlabel(r'$\textrm{Number of steps}$', labelpad=0.1)
         return fig, axes
 
@@ -2383,6 +2399,7 @@ class MCMCFollowUpSearch(MCMCSemiCoherentSearch):
                 ax.axvline(nsteps_total, color='k', ls='--')
             nsteps_total += nburn+nprod
 
+            fig.tight_layout()
             fig.savefig('{}/{}_walkers.png'.format(
                 self.outdir, self.label), dpi=200)