Configuration for testing multicore

adbf84e1 · Rayne Liu · 82ecaab0 · adbf84e1
Commit adbf84e1 authored Sep 28, 2020 by Rayne Liu
--- a/code/RDGW150914_Phoenix_n1t0T.py
+++ b/code/RDGW150914_Phoenix_n1t0T.py
+#!/usr/bin/env python
+# coding: utf-8
+import warnings
+warnings.simplefilter("ignore") #Used to suppress the RuntimeWarnings. But if you are changing the code for TESTING, MAKE SURE TO COMMENT IT OUT.
+
+'''
+Rising from the ashes.
+
+--- R^a_{yne} L^i_u, 08/22/2020
+'''
+
+import numpy as np
+import corner
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MaxNLocator
+from matplotlib import rc
+#plt.rcParams['font.family'] = 'DejaVu Sans'
+#rc('text', usetex=True)
+plt.rcParams.update({'font.size': 16.5})
+
+import ptemcee
+from pycbc.pool import choose_pool
+import h5py
+import inspect
+import pandas as pd
+import json
+import qnm
+import random
+
+#Remember to change the following global variables
+#rootpath: root path to nr data
+#npoints: number of points you re using for your sampling
+#nmax: tone index --> nmax = 0 if fitting the fundamental tone
+#tshift: time shift after the strain peak
+#vary_fund: whether you vary the fundamental frequency. Works in the model_dv function.
+
+rootpath= "/work/rayne.liu"#"/Users/RayneLiu"
+nmax=1
+tshift=0
+vary_fund = True
+
+#sampler parameters
+npoints = 4000
+nwalkers = 4000
+ntemps=16
+dim = nmax+1
+ndim = 4*dim
+burnin = 2000  #How many points do you burn before doing the corner plot. You need to watch the convergence of the chain plot a bit.
+            #This is trivial but often forgotten: this cannot be more than npoints! Usually 1/5~1/4 npoints is what I observe.
+numbins = 42 #corner plot parameter - how many bins you want
+datacolor = '#105670' #'#4fa3a7'
+pkcolor = '#f2c977' #'#ffb45f'
+mediancolor = '#f7695c' #'#9b2814'
+
+#Import data and necessary functions
+
+#TimeOfMaximum
+def FindTmaximum(y):
+    #Determines the maximum absolute value of the complex waveform
+    absval = y[:,1]*y[:,1]+y[:,2]*y[:,2]
+    vmax=np.max(absval)
+    index = np.argmax(absval == vmax)
+    timemax=gw_sxs_bbh_0305[index,0]
+    return timemax
+
+
+
+
+#This loads the 22 mode data
+gw = {}
+gw["SXS:BBH:0305"] = h5py.File(rootpath+"/git/rdstackingproject/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/rhOverM_Asymptotic_GeometricUnits_CoM.h5", 'r')
+gw_sxs_bbh_0305 = gw["SXS:BBH:0305"]["Extrapolated_N2.dir"]["Y_l2_m2.dat"]
+
+# Remember to download metadata.json from the simulation with number: 0305. Download Lev6/metadata.json
+# This postprocesses the metadata file to find the final mass and final spin
+metadata = {}
+with open(rootpath+"/git/rdstackingproject/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/metadata.json") as file:
+    metadata["SXS:BBH:0305"] = json.load(file)
+
+af = metadata["SXS:BBH:0305"]['remnant_dimensionless_spin'][-1]
+mf = metadata["SXS:BBH:0305"]['remnant_mass']
+
+
+
+#times --> x axis of your data
+times = gw_sxs_bbh_0305[:,0]
+tmax=FindTmaximum(gw_sxs_bbh_0305)
+t0=tmax +tshift
+
+#Select the data from t0 onwards
+position = np.argmax(times >= (t0))
+gw_sxs_bbh_0305rd=gw_sxs_bbh_0305[position:-1]
+timesrd=gw_sxs_bbh_0305[position:-1][:,0]
+#print(timesrd[0])
+#print(t0) (This checks that timesrd[0] is indeed t0)
+timespan = timesrd - t0
+
+# Depending on nmax, you load nmax number of freqs. and damping times from the qnm package
+omegas = [qnm.modes_cache(s=-2,l=2,m=2,n=i)(a=af)[0] for i in range (0,dim)]
+w = (np.real(omegas))/mf
+tau=-1/(np.imag(omegas))*mf
+
+
+
+#Fitting
+#RD model for nmax tones. Amplitudes are in (xn*Exp[i yn]) version. Used here.
+def model_dv(theta):
+    #x0, y0= theta
+    #Your nmax might not align with the dim of theta. Better check it here.
+    assert int(len(theta)/4) == dim, 'Please recheck your n and parameters'
+    
+    avars = theta[ : (dim)]
+    bvars = theta[(dim) : 2*(dim)]
+    xvars = theta[2*(dim) : 3*(dim)]
+    yvars = theta[3*(dim) : ]
+    
+    if vary_fund == False:
+        avars[0]=0
+        bvars[0]=0
+        
+    ansatz = 0
+    for i in range (0,dim):
+        #bvars[1]=0
+        #avars[1]=0
+        ansatz += (xvars[i]*np.exp(1j*yvars[i]))*np.exp(-timespan/(tau[i]*(1+bvars[i]))) * (np.cos((1+avars[i])*w[i]*timespan)-1j*np.sin((1+avars[i])*w[i]*timespan))
+    # -1j to agree with SXS convention
+    return ansatz
+
+# Logprior distribution. It defines the allowed range my variables can vary over. 
+#It works for the (xn*Exp[iyn]) version. 
+def log_prior(theta): 
+    #Warning: we are specifically working with nmax=1 so here individual prior to the parameters are manually adjusted. This does not apply to all other nmax's.
+    avars = theta[ : (dim)]
+    bvars = theta[(dim) : 2*(dim)]
+    xvars = theta[2*(dim) : 3*(dim)]
+    yvars = theta[3*(dim) : ]
+    
+    if nmax == 0:
+        if all([0 <= tshift <= 5, vary_fund == True, -0.45 <= avars[0] <= 0.05, -0.95 <= bvars[0] <= 3.0, 0 <= xvars[0] <= 3.0, -np.pi <= yvars[0] <= np.pi]):        
+            return 0.0
+        elif all([tshift == 19, vary_fund == True, -3.0 <= avars[0] <= 3.0, -2.0 <= bvars[0] <= 5.0, 0 <= xvars[0] <= 1.0, 0 <= yvars[0] <= 2*np.pi]):        
+            return 0.0
+        if all([0 <= tshift <= 5, vary_fund == False, -1.0 <= avars[0] <= 1.0, -1.0 <= bvars[0] <= 1.0, 0 <= xvars[0] <= 3.0, -np.pi <= yvars[0] <= np.pi]):        
+            return 0.0
+        if all([tshift == 19, vary_fund == False, -1.0 <= avars[0] <= 1.0, -1.0 <= bvars[0] <= 1.0, 0 <= xvars[0] <= 3.0, 0 <= yvars[0] <= 2*np.pi]):        
+            return 0.0
+        
+    elif nmax == 1:
+        if all([0 <= tshift <= 5, vary_fund == True, -3.0 <= avars[0] <= 3.0, -3.0 <= avars[1] <= 3.0, -2.0 <= bvars[0] <= 12.0, -4.0 <= bvars[1] <= 30.0, 0 <= xvars[0] <= 1.6, 0 <= xvars[1] <= 1.4, -np.pi <= yvars[0] <= np.pi, -np.pi <= yvars[1] <= np.pi]):        
+            return 0.0
+        elif all([tshift == 19, vary_fund == True, -10.0 <= avars[0] <= 10.0, -10.0 <= avars[1] <= 10.0, -20.0 <= bvars[0] <= 30.0, -25.0 <= bvars[1] <= 30.0, 0 <= xvars[0] <= 0.6, 0 <= xvars[1] <= 0.9, 0 <= yvars[0] <= 2*np.pi, -np.pi <= yvars[1] <= np.pi]):
+            return 0.0
+
+        elif all([0 <= tshift <= 5, vary_fund == False, -10.0 <= avars[0] <= 10.0, -1.5 <= avars[1] <= 1.5, -9.0 <= bvars[0] <= 9.0, -6.0 <= bvars[1] <= 20.0, 0 <= xvars[0] <= 2.4, 0 <= xvars[1] <= 2.5, -np.pi <= yvars[0] <= np.pi, -np.pi <= yvars[1] <= np.pi]):
+            return 0.0
+        elif all([tshift == 19, vary_fund == False, -10.0 <= avars[0] <= 10.0, -8.0 <= avars[1] <= 8.0, -9.0 <= bvars[0] <= 9.0, -10.0 <= bvars[1] <= 12.0, 0 <= xvars[0] <= 0.6, 0 <= xvars[1] <= 0.7, 0 <= yvars[0] <= 2*np.pi, 0 <= yvars[1] <= 2* np.pi]):
+            return 0.0
+
+    return -np.inf
+
+
+# LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
+def log_likelihood(theta):
+    modelev = model_dv(theta)
+    result = -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
+    if np.isnan(result):
+        return -np.inf
+    return result
+
+
+# Logposterior distribution for the residuals case.
+# The evidence is just a normalization factor
+def log_probability(theta):
+    lp = log_prior(theta)
+    if not np.isfinite(lp):
+        return -np.inf
+    return lp + log_likelihood(theta)
+
+
+
+#Fit with ptemcee
+#Set the number of cores of your processors
+pool = choose_pool(16)
+pool.size = 16
+#vary_param = float(vary_fund)
+np.random.seed(42)
+pos = np.array([[random.uniform(-0.1,0.01), random.uniform(-0.1,0.1), random.uniform(0.0,0.5), random.uniform(0, 0.1)]])
+#print(pos)
+for i in range (1,dim):
+    pos_aux = np.array([[random.uniform(-0.1,0.01), random.uniform(-0.1,0.1), random.uniform(0.3,0.4), random.uniform(0.01, 0.02)]])
+    pos = np.concatenate((pos, pos_aux), axis = 0)
+    
+pos = pos.T.flatten()
+pos = list(pos)
+pos += 1e-5 * np.random.randn(ntemps, nwalkers, ndim)
+#print(pos)
+sampler = ptemcee.Sampler(nwalkers, ndim, log_likelihood, log_prior, ntemps=ntemps, pool=pool)
+sampler.run_mcmc(pos,npoints)
+
+
+
+
+#Define labels and start plotting
+paramlabels_a = [r'$\alpha_'+str(i)+'$' for i in range (dim)]
+paramlabels_b = [r'$\beta_'+str(i)+'$' for i in range (dim)]
+paramlabels_x = [r'$x_'+str(i)+'$' for i in range (dim)]
+paramlabels_y = [r'$y_'+str(i)+'$' for i in range (dim)] #if vary_fund == True else ['$y_'+str(i)+'-\pi$' for i in range (dim)]
+paramlabels = paramlabels_a + paramlabels_b + paramlabels_x + paramlabels_y
+#Need to delete alpha_0 and alpha_1 for the corner plot
+paramlabels_corner = paramlabels_a + paramlabels_b + paramlabels_x + paramlabels_y
+if vary_fund == False:
+    del paramlabels_corner[0]
+    del paramlabels_corner[nmax]
+
+
+
+#Chain plot
+fig, axes = plt.subplots(ndim, 1, sharex=True, figsize=(12, 9*(dim)))
+for i in range(ndim):
+    axes[i].plot(sampler.chain[0,:, :, i].T, color="k", alpha=0.4, rasterized=True)
+    axes[i].yaxis.set_major_locator(MaxNLocator(5))
+    axes[i].set_ylabel(paramlabels[i])
+axes[-1].set_xlabel('Iterations')
+#plt.show()
+fig.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_chainplot.png', format = 'png', bbox_inches = 'tight')
+
+
+
+#Burn samples, calculate peak likelihood value (not necessarily so in atlas) and make corner plot
+samples = sampler.chain[0,:, burnin:, :].reshape((-1, ndim))
+#samples for corner plot
+samples_corn = samples if vary_fund == True else np.delete(samples, np.s_[0,dim], 1)
+df0 = pd.DataFrame(samples_corn, columns=paramlabels_corner)
+df0.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_chain.csv', index = False)
+
+
+#print('Values with peak likelihood:')
+lglk = np.array([log_likelihood(samples[i]) for i in range(len(samples))])
+pk = samples[np.argmax(lglk)]
+#print('pk:')
+#print(pk)
+pk_corn = pk if vary_fund == True else np.delete(pk, [0,dim])
+#y_0 range needs some messaging to make the plot. But in order to make the whole picture consistent, better change the range of y_1 too.
+#if vary_fund == False:
+#    samples_corn.T[-dim:] -= np.pi #This indeed changes samples_corn itself
+#    pk[-dim:] -= np.pi
+
+#print('pkFalse:')
+#print(pk)
+    
+#print(pk) 
+#Now calculate median (50-percentile) value
+median = np.median(samples_corn, axis=0)
+#print(samples)
+#print(samples_corn)
+
+figcorn = corner.corner(samples_corn, bins = numbins, hist_bin_factor = 5, color = datacolor, truths=pk_corn, truth_color = pkcolor, plot_contours = True, labels = paramlabels_corner, quantiles=(0.05, 0.16, 0.5, 0.84, 0.95), levels=[1-np.exp(-0.5), 1-np.exp(-1.64 ** 2/2)], show_titles=True)
+
+
+#Extract the axes in order to add more important line plots
+naxes = len(pk_corn)
+axes = np.array(figcorn.axes).reshape((naxes, naxes))
+
+# Loop over the diagonal
+for i in range(naxes):
+    ax = axes[i, i]
+    ax.axvline(median[i], color=mediancolor)
+
+# Loop over the histograms
+for yi in range(naxes):
+    for xi in range(yi):
+        ax = axes[yi, xi]
+        ax.axvline(median[xi], color=mediancolor)
+        ax.axhline(median[yi], color=mediancolor)
+        ax.plot(median[xi], median[yi], color = mediancolor, marker = 's')
+figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_corner.pdf', format = 'pdf')
+
+
+#Now export the covariance and correlation matrices, as well as the logevidence and autocorrelation time
+#The covariant matrix
+samplesT = samples_corn.T
+cov_matr=np.cov(samplesT)
+#print('Covariant matrix')
+df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
+df1.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.csv')
+
+#The correlation matrix
+corr_matr=np.corrcoef(samplesT)
+#print('Correlation matrix')
+df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
+df2.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.csv')
+
+#The logevidence (with error) and save it
+logevidence = sampler.log_evidence_estimate(fburnin=0.2)
+df3 = pd.DataFrame(logevidence,index = [r'logevidence', r'error'], columns=[r'vary='+str(vary_fund)+', nmax='+str(nmax)+', tshift='+str(tshift)+', '+str(npoints)+'pts'])
+df3.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_logevidence.csv')
+
+#The autocorrelation time, this time I don't intend to delete alpha0 and beta0, just keep the nan's there since we don't really use this data, only look at it. It's good to keep it for sanity check's sake.
+atcrr = sampler.get_autocorr_time()[0]
+df4 = pd.DataFrame(atcrr,index = paramlabels, columns=[r'Autocorrelation time, vary='+str(vary_fund)+', nmax='+str(nmax)+', tshift='+str(tshift)+', '+str(npoints)+'pts'])
+df4.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_autocrrtime.csv')  
+
+
+#Now plot alpha_0 on top of alpha_1 - only on top, not stacked, and only valid for vary_fund == True
+if nmax == 1 and vary_fund == True:
+    samplea0 = samplesT[0]
+    samplea1 = samplesT[1]
+    fighist1 = plt.figure(figsize = (8, 6))
+    n0, bins0, patches0 = plt.hist(samplea0, bins = numbins * 6, alpha = 0.5, label = r'$\alpha_0$')
+    n1, bins1, patches1 = plt.hist(samplea1, bins = numbins * 10, alpha = 0.5, label = r'$\alpha_1$')
+    #n01, bins01, patches01 = plt.hist([samplea0, samplea1], numbins * 10, rwidth = 3, alpha = 0.5, label = [r'$\alpha_0$', r'$\alpha_1$'])
+    plt.legend()
+    fighist1.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_histtop.pdf', format = 'pdf')
+
+    #This plot is stacked
+    fighist1 = plt.figure(figsize = (8, 6))
+    sn01, sbins01, spatches01 = plt.hist([samplea0, samplea1], numbins * 10, rwidth = 1, stacked = True, alpha = 0.5, label = [r'$\alpha_0$', r'$\alpha_1$'])
+    plt.legend()
+    fighist1.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_histstack.pdf', format = 'pdf')
+
+
+    
+#Now plot the NR data against the mcmc fit data, together with the 1-sigma varying error data
+onesig_bounds = np.array([np.percentile(samples[:, i], [16, 84]) for i in range(len(samples[0]))]).T
+modelfitpk = model_dv(pk)
+figband = plt.figure(figsize = (12, 9))
+#Plot the 1-sigma_percentile
+for j in range(len(samples)):
+    sample = samples[j]
+    if np.all(onesig_bounds[0] <= sample) and np.all(sample <= onesig_bounds[1]):
+        plt.plot(timesrd, model_dv(sample).real, "#79CAF2", alpha=0.3)
+    
+plt.plot(timesrd, gw_sxs_bbh_0305rd[:,1], "k", alpha=0.7, lw=2, label=r'NR_re')
+plt.plot(timesrd, modelfitpk.real, "r", alpha=0.7, lw=2, label=r'FitMCmax_re')
+plt.title(r'Comparison of the MC fit data and the $1-\sigma$ error band')
+plt.legend()
+plt.xlim(timesrd[0], timesrd[0]+80)
+plt.xlabel("t")
+plt.ylabel("h")
+
+figband.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_band.png', format = 'png', dpi = 384, bbox_inches = 'tight')
+#import os
+#os.system('afplay /System/Library/Sounds/Submarine.aiff')