Debugged the loglikelihood functiona and dataframe

5f7e2428 · Rayne Liu · 11224884 · 5f7e2428 · 5f7e2428 · 5f7e2428
Commit 5f7e2428 authored Aug 18, 2020 by Rayne Liu
--- a/code/RDGW150914_ptemcee1.py
+++ b/code/RDGW150914_ptemcee1.py
@@ -262,16 +262,16 @@ figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'n
 #Now export the covariance and correlation matrices, as well as the logevidence
 #The covariant matrix
-samplesT = samples.T
+samplesT = samples_corn.T
 cov_matr=np.cov(samplesT)
 #print('Covariant matrix')
-df1 = pd.DataFrame(cov_matr, index = paramlabels, columns=paramlabels)
+df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
 df1.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.h5', key='df')
 #The correlation matrix
 corr_matr=np.corrcoef(samplesT)
 #print('Correlation matrix')
-df2 = pd.DataFrame(corr_matr,index = paramlabels, columns=paramlabels)
+df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
 df2.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.h5', key='df')
 #The logevidence (with error) and save it
@@ -280,6 +280,7 @@ df3 = pd.DataFrame(logevidence,index = [r'logevidence', r'error'], columns=[r'va
 df3.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_logevidence.h5', key='df')
 #Now plot alpha_0 on top of alpha_1 - only on top, not stacked, and only valid for vary_fund == True
 if vary_fund == True:
    samplea0 = samplesT[0]

--- a/code/RDGW150914_ptemcee2.py
+++ b/code/RDGW150914_ptemcee2.py
 #!/usr/bin/env python
 # coding: utf-8
-#import warnings
+import warnings
-#warnings.simplefilter("ignore") #Used to suppress the RuntimeWarnings. But if you are changing the code for TESTING, MAKE SURE TO COMMENT IT OUT.
+warnings.simplefilter("ignore") #Used to suppress the RuntimeWarnings. But if you are changing the code for TESTING, MAKE SURE TO COMMENT IT OUT.
 '''
 This script calculates the RDGW150914 constraints with ptemcee - specifically, the n=1 case both varying or not varying the fundamental frequencies. It produces the chain plot, corner plot, parameter constraints, and data plotted with the 1-sigma band. Since we are working specifically for the n=1 case, we also add in the corner plot the combined chain of alpha_0 and alpha_1, in order to demonstrate that the two are indistinguishable with each other.
@@ -35,7 +35,7 @@ from scipy.optimize import minimize
 #tshift: time shift after the strain peak
 #vary_fund: whether you vary the fundamental frequency. Works in the model_dv function.
-rootpath="/work/rayne.liu"#"/Users/RayneLiu" 
+rootpath="/Users/RayneLiu"#"/work/rayne.liu" 
 nmax=1
 tshift=19
 vary_fund = True
@@ -156,7 +156,10 @@ def log_prior(theta):
 # LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
 def log_likelihood(theta):
    modelev = model_dv(theta)
-    return  -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    result = -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    if np.isnan(result):
+        return -np.inf
+    return result
 # Logposterior distribution for the residuals case.
@@ -261,25 +264,25 @@ figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'n
 #Now export the covariance and correlation matrices, as well as the logevidence
 #The covariant matrix
-samplesT = samples.T
+samplesT = samples_corn.T
 cov_matr=np.cov(samplesT)
 #print('Covariant matrix')
-df1 = pd.DataFrame(cov_matr, index = paramlabels, columns=paramlabels)
+df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
 df1.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.h5', key='df')
 #The correlation matrix
 corr_matr=np.corrcoef(samplesT)
 #print('Correlation matrix')
-df2 = pd.DataFrame(corr_matr,index = paramlabels, columns=paramlabels)
+df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
 df2.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.h5', key='df')
 #The logevidence (with error) and save it
 logevidence = sampler.log_evidence_estimate(fburnin=0.2)
 df3 = pd.DataFrame(logevidence,index = [r'logevidence', r'error'], columns=[r'vary='+str(vary_fund)+', nmax='+str(nmax)+', tshift='+str(tshift)+', '+str(npoints)+'pts'])
 df3.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_logevidence.h5', key='df')
 #Now plot alpha_0 on top of alpha_1 - only on top, not stacked, and only valid for vary_fund == True
 if vary_fund == True:
    samplea0 = samples.T[0]
@@ -319,5 +322,5 @@ plt.ylabel("h")
 figband.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_band.pdf', format = 'pdf')
-import os
+#import os
-os.system('afplay /System/Library/Sounds/Submarine.aiff')
+#os.system('afplay /System/Library/Sounds/Submarine.aiff')
--- a/code/RDGW150914_ptemcee3.py
+++ b/code/RDGW150914_ptemcee3.py
@@ -42,11 +42,11 @@ tshift=0
 vary_fund = False
 #sampler parameters
-npoints=60000 
+npoints=100 
-nwalkers = 840
+nwalkers = 42
 ntemps=12
 ndim = int(4*(nmax+1))
-burnin = 5000 #How many points do you burn before doing the corner plot. You need to watch the convergence of the chain plot a bit.
+burnin = 20 #How many points do you burn before doing the corner plot. You need to watch the convergence of the chain plot a bit.
            #This is trivial but often forgotten: this cannot be more than npoints! Usually 1/5~1/4 npoints is what I observe.
 numbins = 42 #corner plot parameter - how many bins you want
 datacolor = '#105670' #'#4fa3a7'
@@ -157,7 +157,10 @@ def log_prior(theta):
 # LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
 def log_likelihood(theta):
    modelev = model_dv(theta)
-    return  -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    result = -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    if np.isnan(result):
+        return -np.inf
+    return result
 # Logposterior distribution for the residuals case.
@@ -233,7 +236,7 @@ pk = samples[np.argmax(lglk)]
 pk_corn = pk if vary_fund == True else np.delete(pk, [0,2])
 #y_0 range needs some messaging to make the plot
 if vary_fund == False:
-    samples_corn.T[-2] = samples_corn.T[-2] - np.pi
+    samples_corn.T[-2] = samples_corn.T[-2] - np.pi #This indeed changes samples_corn itself
    pk[-2] -= np.pi
 #print('pkFalse:')
@@ -269,16 +272,16 @@ figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'n
 #Now export the covariance and correlation matrices, as well as the logevidence
 #The covariant matrix
-samplesT = samples.T
+samplesT = samples_corn.T
 cov_matr=np.cov(samplesT)
 #print('Covariant matrix')
-df1 = pd.DataFrame(cov_matr, index = paramlabels, columns=paramlabels)
+df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
 df1.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.h5', key='df')
 #The correlation matrix
 corr_matr=np.corrcoef(samplesT)
 #print('Correlation matrix')
-df2 = pd.DataFrame(corr_matr,index = paramlabels, columns=paramlabels)
+df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
 df2.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.h5', key='df')
 #The logevidence (with error) and save it

--- a/code/RDGW150914_ptemcee4.py
+++ b/code/RDGW150914_ptemcee4.py
@@ -42,8 +42,8 @@ tshift=19
 vary_fund = False
 #sampler parameters
-npoints=1002 
+npoints=102 
-nwalkers = 840
+nwalkers = 42
 ntemps=12
 ndim = int(4*(nmax+1))
 burnin = 20 #How many points do you burn before doing the corner plot. You need to watch the convergence of the chain plot a bit.
@@ -157,7 +157,10 @@ def log_prior(theta):
 # LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
 def log_likelihood(theta):
    modelev = model_dv(theta)
-    return  -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    result = -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    if np.isnan(result):
+        return -np.inf
+    return result
 # Logposterior distribution for the residuals case.
@@ -268,16 +271,16 @@ figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'n
 #Now export the covariance and correlation matrices, as well as the logevidence
 #The covariant matrix
-samplesT = samples.T
+samplesT = samples_corn.T
 cov_matr=np.cov(samplesT)
 #print('Covariant matrix')
-df1 = pd.DataFrame(cov_matr, index = paramlabels, columns=paramlabels)
+df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
 df1.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.h5', key='df')
 #The correlation matrix
 corr_matr=np.corrcoef(samplesT)
 #print('Correlation matrix')
-df2 = pd.DataFrame(corr_matr,index = paramlabels, columns=paramlabels)
+df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
 df2.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.h5', key='df')
 #The logevidence (with error) and save it
@@ -286,6 +289,7 @@ df3 = pd.DataFrame(logevidence,index = [r'logevidence', r'error'], columns=[r'va
 df3.to_hdf(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_logevidence.h5', key='df')
 #Now plot alpha_0 on top of alpha_1 - only on top, not stacked, and only valid for vary_fund == True
 if vary_fund == True:
    samplea0 = samples.T[0]