update on the emcee sampler. Descriptions added

059ddf94 · Xisco Jimenez Forteza · a0744685 · 059ddf94
Commit 059ddf94 authored Jul 9, 2020 by Xisco Jimenez Forteza
--- a/code/RDGW150914_Example.ipynb
+++ b/code/RDGW150914_Example.ipynb
@@ -660,6 +660,7 @@
    }
   ],
   "source": [
+    "#Output this as well!!!\n",
    "flat_samples_T=flat_samples.T\n",
    "cov_matr=np.cov(flat_samples_T)\n",
    "columns=list(range(1, len(flat_samples_T)+1))\n",
@@ -754,6 +755,7 @@
    }
   ],
   "source": [
+    "#Output this as well!!!\n",
    "corr_matr=np.corrcoef(flat_samples_T)\n",
    "columns=list(range(1, len(flat_samples_T)+1))\n",
    "print('Correlation matrix')\n",

 %% Cell type:code id: tags:
  
 ``` python
 #pip install ptemcee
 #pip install qnm
  
 import numpy as np
 import corner
 %matplotlib inline
 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
 import emcee
 import math
 import h5py
 import inspect
 import pandas as pd
 import json
 import qnm
 import random
 import ptemcee
 from scipy.optimize import minimize
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #rootpath: root path to nr data
 #npoints: number of points you re using for your sampling
 #nmax: tone index --> nmax = 0 im fitting the fundamental tone
 #tshift: time shift after the strain peak
 rootpath="/Users/xisco"
 npoints=10000
 nmax=0
 tshift=10
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #TimeOfMaximum
 def FindTmaximum(y):
    absval = y[:,1]*y[:,1]+y[:,2]*y[:,2]
    vmax=np.max(absval)
    index = np.argmax(absval == vmax)
    timemax=gw_sxs_bbh_0305[index,0]
    return timemax
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #This loads the 22 mode data
 gw = {}
 gw["SXS:BBH:0305"] = h5py.File(rootpath+"/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/rhOverM_Asymptotic_GeometricUnits_CoM.h5", 'r')
 gw_sxs_bbh_0305 = gw["SXS:BBH:0305"]["Extrapolated_N2.dir"]["Y_l2_m2.dat"]
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # remember to download metadata.json from the simulation with number: 0305. Download Lev6/metadata.json
 # This postprocesses the metadata file to find the final mass and final spin
 metadata = {}
 with open(rootpath+"/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/metadata.json") as file:
    metadata["SXS:BBH:0305"] = json.load(file)
  
 af = metadata["SXS:BBH:0305"]['remnant_dimensionless_spin'][-1]
 mf = metadata["SXS:BBH:0305"]['remnant_mass']
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #times --> x axis of your data
 times = gw_sxs_bbh_0305[:,0]
 tmax=FindTmaximum(gw_sxs_bbh_0305)
 t0=tmax +tshift
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #Select the data from t0 onwards
 position = np.argmax(times >= (t0))
 gw_sxs_bbh_0305rd=gw_sxs_bbh_0305[position:-1]
 timesrd=gw_sxs_bbh_0305[position:-1][:,0]
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # You do not need that. This is a fit to the energy radiated = 1 - final_mass
 def EradUIB2017(eta,chi1,chi2):
  
    m1=0.5*(1+(1-4*eta)**0.5)
    m2=0.5*(1-(1-4*eta)**0.5)
    S= (m1**2*chi1 + m2**2*chi2)/(m1*m1 + m2*m2)
  
    erad=(((1-(2*(2)**0.5)/3)*eta+0.5609904135313374*eta**2-0.84667563764404*eta**3+3.145145224278187*eta**4)*(1+S**3 *(-0.6320191645391563+ 4.952698546796005*eta-10.023747993978121*eta**2)+S**2*(-0.17762802148331427+ 2.176667900182948*eta**2)+S*(-0.13084389181783257- 1.1387311580238488*eta+5.49074464410971*eta**2)))/(1+S*(-0.9919475346968611+ 0.367620218664352*eta+4.274567337924067*eta**2))-0.01978238971523653*S*(1-4.91667749015812*eta)*(1-4*eta)**0.5 *eta *(chi1-chi2)-0.09803730445895877*(1-4*eta)**0.5*(1-3.2283713377939134*eta)*eta**2 *(chi1-chi2)+0.01118530335431078*eta**3 *(chi1-chi2)**2
    return  erad
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # Depending on nmax, you load nmax number of freqs. and damping times from the qnm package
 omegas = []
 for i in range (0,nmax+1):
    grav_220 = qnm.modes_cache(s=-2,l=2,m=2,n=i)
    omega = grav_220(a=af)[0]
    omegas.append(omega)
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #deprecated. RD model. Not used anymore.
 def model(theta):
    x0, y0, a0, b0 = theta
    #x0, y0= theta
    w = (np.real(omegas))[0]/mf
    tau=-1/(np.imag(omegas))[0]*mf
  
  
    # -1j to agree with SXS convention
    #return (x0+y0*1j)*(np.exp(-(timesrd-timesrd[0])/(tau*(1+b0))))*(np.cos((1+a0)*w*timesrd)-1j*np.sin((1+a0)*w*timesrd))
  
    return (x0+1j*y0)*np.exp(-(timesrd-timesrd[0])/tau)*(np.cos(w*timesrd)-1j*np.sin(w*timesrd))
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #RD model for nmax tones. Amplitudes are in (xn+ i yn) version. Not used but still usable
 def model_dv(theta):
    #x0, y0= theta
    w = (np.real(omegas))/mf
    tau=-1/(np.imag(omegas))*mf
    dim =int(len(theta)/4)
  
    xvars = []
    yvars = []
    avars = []
    bvars = []
    for i in range (0,dim):
        xvar = theta[4*i]
        xvars.append(xvar)
        yvar = theta[4*i+1]
        yvars.append(yvar)
        avar = theta[4*i+2]
        avars.append(avar)
        bvar = theta[4*i+3]
        bvars.append(bvar)
  
  
    ansatz = 0
    for i in range (0,dim):
        bvars[0]=0
        avars[0]=0
        ansatz = ansatz + (xvars[i]+1j*yvars[i])*np.exp(-(timesrd-timesrd[0])/(tau[i]*(1+bvars[i])))\
        *(np.cos((1+avars[i])*w[i]*timesrd)-1j*np.sin((1+avars[i])*w[i]*timesrd))
    # -1j to agree with SXS convention
    return ansatz
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #RD model for nmax tones. Amplitudes are in (xn*Exp[i yn]) version. Used here.
 def model_dv_af(theta):
    #x0, y0= theta
    w = (np.real(omegas))/mf
    tau=-1/(np.imag(omegas))*mf
    dim =int(len(theta)/4)
  
    xvars = []
    yvars = []
    avars = []
    bvars = []
    for i in range (0,dim):
        xvar = theta[4*i]
        xvars.append(xvar)
        yvar = theta[4*i+1]
        yvars.append(yvar)
        avar = theta[4*i+2]
        avars.append(avar)
        bvar = theta[4*i+3]
        bvars.append(bvar)
  
  
    ansatz = 0
    for i in range (0,dim):
        bvars[0]=0
        avars[0]=0
        ansatz = ansatz + (xvars[i]*np.exp(1j*yvars[i]))*np.exp(-(timesrd-timesrd[0])/(tau[i]*(1+bvars[i])))\
        *(np.cos((1+avars[i])*w[i]*timesrd)-1j*np.sin((1+avars[i])*w[i]*timesrd))
    # -1j to agree with SXS convention
    return ansatz
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
 def log_likelihood(theta):
    #modelev = model_dv(theta)
    modelev = model_dv_af(theta)
  
    return  -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # LogLikelihood match function. Not used at the moment. It is just a Gaussian match loglikelihood based on computing the match
 def log_likelihood_match(theta):
  
    #model and data
    modelev = model_dv(theta)
    data = gw_sxs_bbh_0305rd[:,1] - 1j*gw_sxs_bbh_0305rd[:,2]
  
    #norms
    norm1=np.sum(modelev*np.conj(modelev))
    norm2=np.sum(data*np.conj(data))
  
    #mismatch
    myTable = data*np.conj(modelev);
    return -(1-(np.sum(myTable)).real/np.sqrt(norm1*norm2)).real
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # Logprior distribution. It defines the allowed range my variables can vary over. It works for the (xn+iyn) version. Check version below!!!
 def log_prior(theta):
    x_s = theta[0::4]
    y_s = theta[1::4]
    a_s = theta[2::4]
    b_s = theta[3::4]
    if all(-10 <= t <= 10 for t in x_s) and all(-10 <= t <= 10 for t in y_s) and all(-0.4 <= t <= 0.4 for t in a_s) and all(-0.4 <= t <= 0.4 for t in b_s):
        return 0.0
    return -np.inf
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # Logposterior distribution for the residuals case .
 def log_probability(theta):
    lp = log_prior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + log_likelihood(theta)
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # Logposterior distribution for the mismatch case. Not used yet.
 def log_probability_match(theta):
    lp = log_prior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + log_likelihood_match(theta)
 ```
  
 %% Cell type:markdown id: tags:
  
 Maximum estimator Fitting. Same use as in mathematica
  
 %% Cell type:code id: tags:
  
 ``` python
 #I need to provid an initial guess for 4*(nmax+1) the parameters
 np.random.seed(42)
 nll = lambda *args: -log_likelihood(*args)
 initial = np.array([1,1,0,0])
 soln = minimize(nll, initial)
 #x0_ml, y0_ml, a0_ml, b0_ml = soln.x
 print("Maximum likelihood estimates:")
 vars_ml=soln.x
 print(vars_ml)
 ```
  
 %% Output
  
    Maximum likelihood estimates:
    [-0.35650079  2.07846455  0.          0.        ]
  
 %% Cell type:code id: tags:
  
 ``` python
 plt.plot(timesrd, gw_sxs_bbh_0305rd[:,1], "r", alpha=0.3, lw=3, label="NR_re")
 modelfit = model_dv_af(vars_ml)
 plt.plot(timesrd, modelfit.real,"b", alpha=0.3, lw=3, label="Fit_re")
 #plt.plot(x0, np.dot(np.vander(x0, 2), w), "--k", label="LS")
 plt.legend(fontsize=14)
 plt.xlim(timesrd[0], timesrd[0]+80)
 plt.xlabel("t")
 plt.ylabel("h");
 ```
  
 %% Output
  

  
 %% Cell type:code id: tags:
  
 ``` python
 plt.plot(timesrd, gw_sxs_bbh_0305rd[:,2], "r", alpha=0.3, lw=3, label="NR_im")
 modelfit = model_dv_af(vars_ml)
 plt.plot(timesrd, modelfit.imag,"b", alpha=0.3, lw=3, label="Fit_im")
 #plt.plot(x0, np.dot(np.vander(x0, 2), w), "--k", label="LS")
 plt.legend(fontsize=14)
 plt.xlim(timesrd[0], timesrd[0]+80)
 plt.xlabel("t")
 plt.ylabel("h");
 ```
  
 %% Output
  

  
 %% Cell type:markdown id: tags:
  
 mcmc Fitting
  
 %% Cell type:code id: tags:
  
 ``` python
 # Logprior distribution. It defines the allowed range my variables can vary over.
 #It works for the (xn*Exp[iyn]) version!!!
 def log_prior(theta):
    x_s = theta[0::4]
    y_s = theta[1::4]
    a_s = theta[2::4]
    b_s = theta[3::4]
    if all(0 <= t <= 10 for t in x_s) and all(0 <= t <= 2*np.pi for t in y_s) and all(-0.4 <= t <= 0.4 for t in a_s) and all(-0.4 <= t <= 0.4 for t in b_s):
        return 0.0
    return -np.inf
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 #Define you initial position of your 4*(nmax+1) variables.
 nwalkers = 32
 ndim = int(4*(nmax+1))
  
 pos = [random.uniform(0.01,10) ,random.uniform(0,2*np.pi) ,random.uniform(-0.1,0.1) ,random.uniform(-0.1,0.1)]
 for i in range (1,nmax+1):
       pos_aux = pos
       pos = pos + pos_aux
  
 pos = pos + 1e-3 * np.random.randn(nwalkers, ndim)
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability)
 sampler.run_mcmc(pos,npoints,progress=True);
 ```
  
 %% Output
  
    100%|██████████| 10000/10000 [00:40<00:00, 245.59it/s]
  
 %% Cell type:code id: tags:
  
 ``` python
 plt.clf()
 fig, axes = plt.subplots(4, 1, sharex=True, figsize=(8, 9))
 axes[0].plot(sampler.chain[:, :, 0].T, color="k", alpha=0.4)
 axes[0].yaxis.set_major_locator(MaxNLocator(5))
 axes[0].set_ylabel("$x0$")
 axes[1].plot(sampler.chain[:, :, 1].T, color="k", alpha=0.4)
 axes[1].yaxis.set_major_locator(MaxNLocator(5))
 axes[1].set_ylabel("$y0$")
 axes[2].plot(sampler.chain[:, :, 2].T, color="k", alpha=0.4)
 axes[2].yaxis.set_major_locator(MaxNLocator(5))
 axes[2].set_ylabel("$a0$")
 axes[3].plot(sampler.chain[:, :, 3].T, color="k", alpha=0.4)
 axes[3].yaxis.set_major_locator(MaxNLocator(5))
 axes[3].set_ylabel("$b0$")
 plt.show()
 ```
  
 %% Output
  
  

  
 %% Cell type:code id: tags:
  
 ``` python
 label = ['x0','y0','a0','b0']
 for i in range (1,nmax+1):
    label2 = ['x' + str(i),'y' + str(i),'a' + str(i),'b' + str(i)]
    label = label + label2
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 burnin = 1000
 flat_samples = sampler.get_chain(discard=burnin, thin=15, flat=True)
 median=np.median(sampler.flatchain, axis=0)
 print(median)
 print(vars_ml)
  
 fig = corner.corner(
    flat_samples, labels=label, truths=median,quantiles=(0.1, 0.9)
 );
 ```
  
 %% Output
  
    [ 3.46122605e-01  5.22266074e+00 -3.14367107e-03 -5.23460769e-03]
    [-0.35650079  2.07846455  0.          0.        ]
  

  
 %% Cell type:code id: tags:
  
 ``` python
 #Change that as well!!!
 fig.savefig(rootpath+'/git/rdstackingproject/plots/fit_chi2_'+str(nmax)+'.pdf')
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
+#Output this as well!!!
 flat_samples_T=flat_samples.T
 cov_matr=np.cov(flat_samples_T)
 columns=list(range(1, len(flat_samples_T)+1))
 print('Covariant matrix')
 df = pd.DataFrame(cov_matr,columns=columns)
 df
 ```
  
 %% Output
  
    Covariant matrix
  
              1         2         3         4
    0  0.008955  0.000385  0.000456 -0.000365
    1  0.000385  0.080420  0.001151  0.000309
    2  0.000456  0.001151  0.052796  0.000025
    3 -0.000365  0.000309  0.000025  0.053115
  
 %% Cell type:code id: tags:
  
 ``` python
+#Output this as well!!!
 corr_matr=np.corrcoef(flat_samples_T)
 columns=list(range(1, len(flat_samples_T)+1))
 print('Correlation matrix')
 df = pd.DataFrame(corr_matr,columns=columns)
 df
 ```
  
 %% Output
  
    Correlation matrix
  
              1         2         3         4
    0  1.000000  0.014332  0.020968 -0.016718
    1  0.014332  1.000000  0.017659  0.004727
    2  0.020968  0.017659  1.000000  0.000470
    3 -0.016718  0.004727  0.000470  1.000000
  
 %% Cell type:markdown id: tags:
  
 With emceept
  
 %% Cell type:code id: tags:
  
 ``` python
 nwalkers = 32
 ntemps=10
 ndim = int(4*(nmax+1))
 npoints=5000
  
 pos = [random.uniform(0.,4) ,random.uniform(0.01,2*np.pi) ,random.uniform(-0.1,0.1) ,random.uniform(-0.1,0.1)]
 for i in range (1,nmax+1):
       pos_aux = [random.uniform(0.1,3) ,random.uniform(0.01,0.02) ,random.uniform(-.1,.1) ,random.uniform(-.1,.1)]
       pos = pos + pos_aux
  
 pos = pos + 1e-5 * np.random.randn(ntemps,nwalkers,ndim)
 sampler = ptemcee.Sampler(nwalkers, ndim,log_likelihood,log_prior, ntemps=10)
 sampler.run_mcmc(pos,npoints);
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 plt.clf()
 fig, axes = plt.subplots(4, 1, sharex=True, figsize=(8, 9))
 axes[0].plot(sampler.chain[0,:, :, 0].T, color="k", alpha=0.4)
 axes[0].yaxis.set_major_locator(MaxNLocator(5))
 axes[0].set_ylabel("$x0$")
 axes[1].plot(sampler.chain[0,:, :, 1].T, color="k", alpha=0.4)
 axes[1].yaxis.set_major_locator(MaxNLocator(5))
 axes[1].set_ylabel("$y0$")
 axes[2].plot(sampler.chain[0,:, :, 2].T, color="k", alpha=0.4)
 axes[2].yaxis.set_major_locator(MaxNLocator(5))
 axes[2].set_ylabel("$a0$")
 axes[3].plot(sampler.chain[0,:, :, 3].T, color="k", alpha=0.4)
 axes[3].yaxis.set_major_locator(MaxNLocator(5))
 axes[3].set_ylabel("$b0$")
 plt.show()
 ```
  
 %% Output
  
  

  
 %% Cell type:code id: tags:
  
 ``` python
 burnin = 500
 samples = sampler.chain[0,:, burnin:, :].reshape((-1, ndim))
 median=np.median(samples, axis=0)
 print(median)
 fig = corner.corner(samples, labels=["$x0$", "$y0$",  "$a0$", "$b0$"],truths=median,quantiles=(0.1, 0.9))
 ```
  
 %% Output
  
    [3.43165737e-01 5.21859473e+00 6.46241690e-04 5.11895187e-04]
  

  
 %% Cell type:code id: tags:
  
 ``` python
 ```