Skip to content
Snippets Groups Projects
Select Git revision
  • 54b82bdb2072849090369dbceb18c01f8166fa95
  • master default protected
  • 72-improve-docs-for_optimal_setup
  • os-path-join
  • develop-GA
  • add-higher-spindown-components
  • v1.3
  • v1.2
  • v1.1.2
  • v1.1.0
  • v1.0.1
11 results

glitch_robust_search_make_simulated_data.py

Blame
  • RDGW150914_Phoenix_n0t0T.py 14.74 KiB
    #!/usr/bin/env python
    # coding: utf-8
    import warnings
    warnings.simplefilter("ignore") #Used to suppress the RuntimeWarnings. But if you are changing the code for TESTING, MAKE SURE TO COMMENT IT OUT.
    
    '''
    Rising from the ashes.
    
    --- R^a_{yne} L^i_u, 08/22/2020
    '''
    
    import numpy as np
    import corner
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MaxNLocator
    from matplotlib import rc
    #plt.rcParams['font.family'] = 'DejaVu Sans'
    #rc('text', usetex=True)
    plt.rcParams.update({'font.size': 16.5})
    
    import ptemcee
    from pycbc.pool import choose_pool
    import h5py
    import inspect
    import pandas as pd
    import json
    import qnm
    import random
    
    #Remember to change the following global variables
    #rootpath: root path to nr data
    #npoints: number of points you re using for your sampling
    #nmax: tone index --> nmax = 0 if fitting the fundamental tone
    #tshift: time shift after the strain peak
    #vary_fund: whether you vary the fundamental frequency. Works in the model_dv function.
    
    rootpath= "/work/rayne.liu"#"/Users/RayneLiu"
    nmax=0
    tshift=0
    vary_fund = False
    
    #sampler parameters
    npoints = 240
    nwalkers = 120
    ntemps=16
    dim = nmax+1
    ndim = 4*dim
    burnin = 120  #How many points do you burn before doing the corner plot. You need to watch the convergence of the chain plot a bit.
                #This is trivial but often forgotten: this cannot be more than npoints! Usually 1/5~1/4 npoints is what I observe.
    numbins = 42 #corner plot parameter - how many bins you want
    datacolor = '#105670' #'#4fa3a7'
    pkcolor = '#f2c977' #'#ffb45f'
    mediancolor = '#f7695c' #'#9b2814'
    
    #Import data and necessary functions
    
    #TimeOfMaximum
    def FindTmaximum(y):
        #Determines the maximum absolute value of the complex waveform
        absval = y[:,1]*y[:,1]+y[:,2]*y[:,2]
        vmax=np.max(absval)
        index = np.argmax(absval == vmax)
        timemax=gw_sxs_bbh_0305[index,0]
        return timemax
    
    
    
    
    #This loads the 22 mode data
    gw = {}
    gw["SXS:BBH:0305"] = h5py.File(rootpath+"/git/rdstackingproject/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/rhOverM_Asymptotic_GeometricUnits_CoM.h5", 'r')
    gw_sxs_bbh_0305 = gw["SXS:BBH:0305"]["Extrapolated_N2.dir"]["Y_l2_m2.dat"]
    
    # Remember to download metadata.json from the simulation with number: 0305. Download Lev6/metadata.json
    # This postprocesses the metadata file to find the final mass and final spin
    metadata = {}
    with open(rootpath+"/git/rdstackingproject/SXS/BBH_SKS_d14.3_q1.22_sA_0_0_0.330_sB_0_0_-0.440/Lev6/metadata.json") as file:
        metadata["SXS:BBH:0305"] = json.load(file)
    
    af = metadata["SXS:BBH:0305"]['remnant_dimensionless_spin'][-1]
    mf = metadata["SXS:BBH:0305"]['remnant_mass']
    
    
    
    #times --> x axis of your data
    times = gw_sxs_bbh_0305[:,0]
    tmax=FindTmaximum(gw_sxs_bbh_0305)
    t0=tmax +tshift
    
    #Select the data from t0 onwards
    position = np.argmax(times >= (t0))
    gw_sxs_bbh_0305rd=gw_sxs_bbh_0305[position:-1]
    timesrd=gw_sxs_bbh_0305[position:-1][:,0]
    #print(timesrd[0])
    #print(t0) (This checks that timesrd[0] is indeed t0)
    timespan = timesrd - t0
    
    # Depending on nmax, you load nmax number of freqs. and damping times from the qnm package
    omegas = [qnm.modes_cache(s=-2,l=2,m=2,n=i)(a=af)[0] for i in range (0,dim)]
    w = (np.real(omegas))/mf
    tau=-1/(np.imag(omegas))*mf
    
    
    
    #Fitting
    #RD model for nmax tones. Amplitudes are in (xn*Exp[i yn]) version. Used here.
    def model_dv(theta):
        #x0, y0= theta
        #Your nmax might not align with the dim of theta. Better check it here.
        assert int(len(theta)/4) == dim, 'Please recheck your n and parameters'
        
        avars = theta[ : (dim)]
        bvars = theta[(dim) : 2*(dim)]
        xvars = theta[2*(dim) : 3*(dim)]
        yvars = theta[3*(dim) : ]
        
        if vary_fund == False:
            avars[0]=0
            bvars[0]=0
            
        ansatz = 0
        for i in range (0,dim):
            #bvars[1]=0
            #avars[1]=0
            ansatz += (xvars[i]*np.exp(1j*yvars[i]))*np.exp(-timespan/(tau[i]*(1+bvars[i]))) * (np.cos((1+avars[i])*w[i]*timespan)-1j*np.sin((1+avars[i])*w[i]*timespan))
        # -1j to agree with SXS convention
        return ansatz
    
    # Logprior distribution. It defines the allowed range my variables can vary over. 
    #It works for the (xn*Exp[iyn]) version. 
    def log_prior(theta): 
        #Warning: we are specifically working with nmax=1 so here individual prior to the parameters are manually adjusted. This does not apply to all other nmax's.
        avars = theta[ : (dim)]
        bvars = theta[(dim) : 2*(dim)]
        xvars = theta[2*(dim) : 3*(dim)]
        yvars = theta[3*(dim) : ]
        
        if nmax == 0:
            if all([0 <= tshift <= 5, vary_fund == True, -0.45 <= avars[0] <= 0.05, -0.95 <= bvars[0] <= 3.0, 0 <= xvars[0] <= 3.0, -np.pi <= yvars[0] <= np.pi]):        
                return 0.0
            elif all([tshift == 19, vary_fund == True, -3.0 <= avars[0] <= 3.0, -2.0 <= bvars[0] <= 5.0, 0 <= xvars[0] <= 1.0, 0 <= yvars[0] <= 2*np.pi]):        
                return 0.0
            if all([0 <= tshift <= 5, vary_fund == False, -1.0 <= avars[0] <= 1.0, -1.0 <= bvars[0] <= 1.0, 0 <= xvars[0] <= 3.0, -np.pi <= yvars[0] <= np.pi]):        
                return 0.0
            if all([tshift == 19, vary_fund == False, -1.0 <= avars[0] <= 1.0, -1.0 <= bvars[0] <= 1.0, 0 <= xvars[0] <= 3.0, 0 <= yvars[0] <= 2*np.pi]):        
                return 0.0
            
        elif nmax == 1:
            if all([0 <= tshift <= 5, vary_fund == True, -3.0 <= avars[0] <= 3.0, -3.0 <= avars[1] <= 3.0, -2.0 <= bvars[0] <= 12.0, -4.0 <= bvars[1] <= 30.0, 0 <= xvars[0] <= 1.6, 0 <= xvars[1] <= 1.4, -np.pi <= yvars[0] <= np.pi, -np.pi <= yvars[1] <= np.pi]):        
                return 0.0
            elif all([tshift == 19, vary_fund == True, -10.0 <= avars[0] <= 10.0, -10.0 <= avars[1] <= 10.0, -20.0 <= bvars[0] <= 30.0, -25.0 <= bvars[1] <= 30.0, 0 <= xvars[0] <= 0.6, 0 <= xvars[1] <= 0.9, 0 <= yvars[0] <= 2*np.pi, -np.pi <= yvars[1] <= np.pi]):
                return 0.0
    
            elif all([0 <= tshift <= 5, vary_fund == False, -10.0 <= avars[0] <= 10.0, -1.5 <= avars[1] <= 1.5, -9.0 <= bvars[0] <= 9.0, -6.0 <= bvars[1] <= 20.0, 0 <= xvars[0] <= 2.4, 0 <= xvars[1] <= 2.5, -np.pi <= yvars[0] <= np.pi, -np.pi <= yvars[1] <= np.pi]):
                return 0.0
            elif all([tshift == 19, vary_fund == False, -10.0 <= avars[0] <= 10.0, -8.0 <= avars[1] <= 8.0, -9.0 <= bvars[0] <= 9.0, -10.0 <= bvars[1] <= 12.0, 0 <= xvars[0] <= 0.6, 0 <= xvars[1] <= 0.7, 0 <= yvars[0] <= 2*np.pi, 0 <= yvars[1] <= 2* np.pi]):
                return 0.0
    
        return -np.inf
    
    
    # LogLikelihood function. It is just a Gaussian loglikelihood based on computing the residuals^2
    def log_likelihood(theta):
        modelev = model_dv(theta)
        result = -np.sum((gw_sxs_bbh_0305rd[:,1] - (modelev.real))**2+(gw_sxs_bbh_0305rd[:,2] - (modelev.imag))**2)
        if np.isnan(result):
            return -np.inf
        return result
    
    
    # Logposterior distribution for the residuals case.
    # The evidence is just a normalization factor
    def log_probability(theta):
        lp = log_prior(theta)
        if not np.isfinite(lp):
            return -np.inf
        return lp + log_likelihood(theta)
    
    
    
    #Fit with ptemcee
    #Set the number of cores of your processors
    pool = choose_pool(16)
    pool.size = 16
    #vary_param = float(vary_fund)
    np.random.seed(42)
    pos = np.array([[random.uniform(-0.1,0.01), random.uniform(-0.1,0.1), random.uniform(0.0,0.5), random.uniform(0, 0.1)]])
    #print(pos)
    for i in range (1,dim):
        pos_aux = np.array([[random.uniform(-0.1,0.01), random.uniform(-0.1,0.1), random.uniform(0.3,0.4), random.uniform(0.01, 0.02)]])
        pos = np.concatenate((pos, pos_aux), axis = 0)
        
    pos = pos.T.flatten()
    pos = list(pos)
    pos += 1e-5 * np.random.randn(ntemps, nwalkers, ndim)
    #print(pos)
    sampler = ptemcee.Sampler(nwalkers, ndim, log_likelihood, log_prior, ntemps=ntemps, pool=pool)
    sampler.run_mcmc(pos,npoints)
    
    
    
    
    #Define labels and start plotting
    paramlabels_a = [r'$\alpha_'+str(i)+'$' for i in range (dim)]
    paramlabels_b = [r'$\beta_'+str(i)+'$' for i in range (dim)]
    paramlabels_x = [r'$x_'+str(i)+'$' for i in range (dim)]
    paramlabels_y = [r'$y_'+str(i)+'$' for i in range (dim)] #if vary_fund == True else ['$y_'+str(i)+'-\pi$' for i in range (dim)]
    paramlabels = paramlabels_a + paramlabels_b + paramlabels_x + paramlabels_y
    #Need to delete alpha_0 and alpha_1 for the corner plot
    paramlabels_corner = paramlabels_a + paramlabels_b + paramlabels_x + paramlabels_y
    if vary_fund == False:
        del paramlabels_corner[0]
        del paramlabels_corner[nmax]
    
    
    
    #Chain plot
    fig, axes = plt.subplots(ndim, 1, sharex=True, figsize=(12, 9*(dim)))
    for i in range(ndim):
        axes[i].plot(sampler.chain[0,:, :, i].T, color="k", alpha=0.4, rasterized=True)
        axes[i].yaxis.set_major_locator(MaxNLocator(5))
        axes[i].set_ylabel(paramlabels[i])
    axes[-1].set_xlabel('Iterations')
    #plt.show()
    fig.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_chainplot.png', format = 'png', bbox_inches = 'tight')
    
    
    
    #Burn samples, calculate peak likelihood value (not necessarily so in atlas) and make corner plot
    samples = sampler.chain[0,:, burnin:, :].reshape((-1, ndim))
    #samples for corner plot
    samples_corn = samples if vary_fund == True else np.delete(samples, np.s_[0,dim], 1)
    df0 = pd.DataFrame(samples_corn, columns=paramlabels_corner)
    df0.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_chain.csv', index = False)
    
    
    #print('Values with peak likelihood:')
    lglk = np.array([log_likelihood(samples[i]) for i in range(len(samples))])
    pk = samples[np.argmax(lglk)]
    #print('pk:')
    #print(pk)
    pk_corn = pk if vary_fund == True else np.delete(pk, [0,dim])
    #y_0 range needs some messaging to make the plot. But in order to make the whole picture consistent, better change the range of y_1 too.
    #if vary_fund == False:
    #    samples_corn.T[-dim:] -= np.pi #This indeed changes samples_corn itself
    #    pk[-dim:] -= np.pi
    
    #print('pkFalse:')
    #print(pk)
        
    #print(pk) 
    #Now calculate median (50-percentile) value
    median = np.median(samples_corn, axis=0)
    #print(samples)
    #print(samples_corn)
    
    figcorn = corner.corner(samples_corn, bins = numbins, hist_bin_factor = 5, color = datacolor, truths=pk_corn, truth_color = pkcolor, plot_contours = True, labels = paramlabels_corner, quantiles=(0.05, 0.16, 0.5, 0.84, 0.95), levels=[1-np.exp(-0.5), 1-np.exp(-1.64 ** 2/2)], show_titles=True)
    
    
    #Extract the axes in order to add more important line plots
    naxes = len(pk_corn)
    axes = np.array(figcorn.axes).reshape((naxes, naxes))
    
    # Loop over the diagonal
    for i in range(naxes):
        ax = axes[i, i]
        ax.axvline(median[i], color=mediancolor)
    
    # Loop over the histograms
    for yi in range(naxes):
        for xi in range(yi):
            ax = axes[yi, xi]
            ax.axvline(median[xi], color=mediancolor)
            ax.axhline(median[yi], color=mediancolor)
            ax.plot(median[xi], median[yi], color = mediancolor, marker = 's')
    figcorn.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_corner.pdf', format = 'pdf')
    
    
    #Now export the covariance and correlation matrices, as well as the logevidence and autocorrelation time
    #The covariant matrix
    samplesT = samples_corn.T
    cov_matr=np.cov(samplesT)
    #print('Covariant matrix')
    df1 = pd.DataFrame(cov_matr, index = paramlabels_corner, columns=paramlabels_corner)
    df1.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_covariance.csv')
    
    #The correlation matrix
    corr_matr=np.corrcoef(samplesT)
    #print('Correlation matrix')
    df2 = pd.DataFrame(corr_matr,index = paramlabels_corner, columns=paramlabels_corner)
    df2.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_correlation.csv')
    
    #The logevidence (with error) and save it
    logevidence = sampler.log_evidence_estimate(fburnin=0.2)
    df3 = pd.DataFrame(logevidence,index = [r'logevidence', r'error'], columns=[r'vary='+str(vary_fund)+', nmax='+str(nmax)+', tshift='+str(tshift)+', '+str(npoints)+'pts'])
    df3.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_logevidence.csv')
    
    #The autocorrelation time, this time I don't intend to delete alpha0 and beta0, just keep the nan's there since we don't really use this data, only look at it. It's good to keep it for sanity check's sake.
    atcrr = sampler.get_autocorr_time()[0]
    df4 = pd.DataFrame(atcrr,index = paramlabels, columns=[r'Autocorrelation time, vary='+str(vary_fund)+', nmax='+str(nmax)+', tshift='+str(tshift)+', '+str(npoints)+'pts'])
    df4.to_csv(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_autocrrtime.csv')  
    
    
    #Now plot alpha_0 on top of alpha_1 - only on top, not stacked, and only valid for vary_fund == True
    if nmax == 1 and vary_fund == True:
        samplea0 = samplesT[0]
        samplea1 = samplesT[1]
        fighist1 = plt.figure(figsize = (8, 6))
        n0, bins0, patches0 = plt.hist(samplea0, bins = numbins * 6, alpha = 0.5, label = r'$\alpha_0$')
        n1, bins1, patches1 = plt.hist(samplea1, bins = numbins * 10, alpha = 0.5, label = r'$\alpha_1$')
        #n01, bins01, patches01 = plt.hist([samplea0, samplea1], numbins * 10, rwidth = 3, alpha = 0.5, label = [r'$\alpha_0$', r'$\alpha_1$'])
        plt.legend()
        fighist1.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_histtop.pdf', format = 'pdf')
    
        #This plot is stacked
        fighist1 = plt.figure(figsize = (8, 6))
        sn01, sbins01, spatches01 = plt.hist([samplea0, samplea1], numbins * 10, rwidth = 1, stacked = True, alpha = 0.5, label = [r'$\alpha_0$', r'$\alpha_1$'])
        plt.legend()
        fighist1.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_histstack.pdf', format = 'pdf')
    
    
        
    #Now plot the NR data against the mcmc fit data, together with the 1-sigma varying error data
    onesig_bounds = np.array([np.percentile(samples[:, i], [16, 84]) for i in range(len(samples[0]))]).T
    modelfitpk = model_dv(pk)
    figband = plt.figure(figsize = (12, 9))
    #Plot the 1-sigma_percentile
    for j in range(len(samples)):
        sample = samples[j]
        if np.all(onesig_bounds[0] <= sample) and np.all(sample <= onesig_bounds[1]):
            plt.plot(timesrd, model_dv(sample).real, "#79CAF2", alpha=0.3)
        
    plt.plot(timesrd, gw_sxs_bbh_0305rd[:,1], "k", alpha=0.7, lw=2, label=r'NR_re')
    plt.plot(timesrd, modelfitpk.real, "r", alpha=0.7, lw=2, label=r'FitMCmax_re')
    plt.title(r'Comparison of the MC fit data and the $1-\sigma$ error band')
    plt.legend()
    plt.xlim(timesrd[0], timesrd[0]+80)
    plt.xlabel("t")
    plt.ylabel("h")
    
    figband.savefig(rootpath+'/git/rdstackingproject/plotsmc/vary'+str(vary_fund)+'nmax='+str(nmax)+'_tshift='+str(tshift)+'_'+str(npoints)+'pt_band.png', format = 'png', dpi = 384, bbox_inches = 'tight')
    #import os
    #os.system('afplay /System/Library/Sounds/Submarine.aiff')