diff --git a/getData.py b/getData.py new file mode 100644 index 0000000000000000000000000000000000000000..6bc3c3e47797b41a4cd54fb5343a092331ea3e13 --- /dev/null +++ b/getData.py @@ -0,0 +1,302 @@ +from __future__ import division +import argparse +import datetime +import generalio as g +import os + +def makeparser(): + parser=argparse.ArgumentParser() + g.register_custom_types(parser) + parser.add_argument("--start",type='date_time',help="Start date for plots",default="") + parser.add_argument("--end",type='date_time',help="Date in format \"YYYY-MM-DD\" (Default is today).",default=datetime.date.today().strftime('%Y-%m-%d')) + parser.add_argument("--inputfolder",help="Parent folder where SFTs are located. If none, folders will be found automatically from the channel subsystem, using defaults for LHO.",default="") + parser.add_argument("--outputfolder",help="Parent folder where output data will be stored") + parser.add_argument("--channellistfile",help="Channel list source (path to a newline-delimited text file)") + parser.add_argument("--channels",nargs="+",help="More channels (user specified)") + parser.add_argument("--name",help="Prefix for output files") + parser.add_argument("--fmin",help="Frequency minumum (default 0)",type=float,default=0) + parser.add_argument("--fmax",help="Frequency bandwidth (default 2000)",type=float,default=2000) + parser.add_argument("--cumulative",type='bool',help="Compute cumulative plots (default=False)",default=False) + parser.add_argument("--checkonly",help="Don't compute anything, just print which days have data.",type='bool',default=False) + parser.add_argument("--useSFTs",help="Require data from SFTs and not averaged files",type='bool',default=False) + parser.add_argument("--coherence",help="Query coherence spectra from STAMP-PEM",type='bool',default=False) + parser.add_argument("--cohonly",help="Query coherence spectra from STAMP-PEM and nothing else.",type='bool',default=False) + parser.add_argument("--IFO",help="H1 or L1",default="H1") + parser.add_argument("--STAMPPEMconfigfile",help="Configuration file for use with STAMP-PEM query; default /home/meyers/coherence_comb_tracking_setup/config_files/H1.ini",default="/home/meyers/coherence_comb_tracking_setup/config_files/H1.ini") + + return parser + +def fscan_datefolder_from_inputfolder(inputfolder,date): + try: + datefolders=[path for path in os.listdir(inputfolder) if date.strftime('%Y_%m_%d') in path] + if len(datefolders)==1: + return datefolders[0] + elif len(datefolders)==0: + print("Error: Could not find a folder for the requested date ({}) within {}".format(date.strftime('%Y-%m-%d'),inputfolder)) + else: + print("Error: too many matching folders forund for the requested date ({}) within {}.".format(date.strftime('%Y-%m-%d'),inputfolder)) + except: pass + +def inputfolder_from_channel(channel,IFO): + # Get the subsystem + subsystem=channel.split(":")[1].split("-")[0] + # Get the corresponding Fscan subfolder for that subsystem + subdir=[line.split(" ")[1][:-1] for line in open("fscanSubsystemPaths.config",'r') if line.split(" ")[0]==subsystem][0] + inputfolder="/home/pulsar/public_html/fscan/{0}_DUAL_ARM/{1}/{1}/".format(IFO,subdir) + return inputfolder + +def get_datefolder(inputfolder,channel,date,IFO): + if inputfolder=="": + inputfolder=inputfolder_from_channel(channel,IFO) + return inputfolder+fscan_datefolder_from_inputfolder(inputfolder,date) + +def main(): + ''' ''' + args=g.customparse_args(makeparser()) + import sys + import traceback + import subprocess + import numpy as np + import glob + import hdf5io as h5 + import cStringIO + from stamp_pem.query_online import get_channel_online_data + devnull = open(os.devnull, 'w') + cumuloverwrite=False + + # If no start date is supplied, set up calculation for only 1 day. + if args.start=="": + dates=[args.end] + ndates=1 + + # Otherwise, set up calculation for a range of dates. + else: + ndates=(args.end-args.start).days+1 + dates=[args.start + datetime.timedelta(n) for n in range(ndates)] + + + # Loop through channels + for ch in args.chlist: + + # Set up variables to hold "old weights" for the cumulative sum. + oldcumulwtsname = "" + cumulwtsname = "" + + # Print the channel name, nicely formatted. + div="="*len(ch) + print("\n"+div+"\n"+ch+"\n"+div) + + # Loop through date range + for i in range(ndates): + try: + if os.path.isfile(cumulwtsname): + oldcumulwtsname=str(cumulwtsname) + except NameError: + pass + + + date=dates[i] + print("\nStarting on date {0}".format(date)) + print("Old cumulative weights file: {}".format(oldcumulwtsname)) + try: + datefolder=get_datefolder(args.inputfolder,ch,date,args.IFO) + except: + continue + chfolder=datefolder+"/"+"_".join(ch.split(":"))+"/" + + # Create a whole bunch of output names, prefixes, and a folder + outputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_fscans/" + if not os.path.isdir(outputfolder): + os.mkdir(outputfolder) + outprefix=args.name+"_"+ch+"_"+date.strftime('%Y-%m-%d') + datname=(outputfolder+"dat_{0}.hdf5".format(ch)) + wtsname=(outputfolder+"wts_{0}.hdf5".format(ch)) + outfpath=outputfolder+outprefix+"_PWA.txt" + + # If the user has specified that cumulative plots should be produced, also create these names: + if args.cumulative: + cumuloutputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_fscan_cumulsince_"+args.start.strftime('%Y-%m-%d') + cumulwtsname=cumuloutputfolder+"/wts_{0}.hdf5".format(ch) + cumuldatname=cumuloutputfolder+"/dat_{0}.hdf5".format(ch) + if not os.path.isdir(cumuloutputfolder): + os.mkdir(cumuloutputfolder) + + # If user has specified that coherence plots should be produced, also create these names: + if args.coherence or args.cohonly: + cohoutputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_coherence/" + cohdatname=cohoutputfolder+"/dat_{0}.hdf5".format(ch) + if not os.path.isdir(cohoutputfolder): + os.mkdir(cohoutputfolder) + print(os.path.isfile(cohdatname)) + + print("Finished setting up names.") + +# print(args.cumulative,os.path.isfile(cumuldatname),os.path.isfile(cumulwtsname),os.path.isfile(wtsname)) +# print(cumuldatname,cumulwtsname,wtsname) + + stop=False + # If we want cumulative data, but both cumulative outputs exist already: + if args.cumulative and os.path.isfile(cumuldatname) and os.path.isfile(cumulwtsname): + stop=True + print("All cumulative files exist already; skipping.") + + # If we want cumulative data, but the weights file exists and it's the first day: + if args.cumulative and oldcumulwtsname=="" and os.path.isfile(cumulwtsname): + stop=True + print("Weights file exists; this is the first day; skipping.") + + # If we want regular data and nothing else (no coherence, no cumulative): + if (not args.cumulative) and (not args.coherence) and (not args.cohonly) and os.path.isfile(datname): + stop=True + print("Dat file already exists; skipping.") + + # If we want just coherence data, nothing else: + if (args.cohonly) and (os.path.isfile(cohdatname)): + stop=True + print("Coherence file already exists; skipping.") + + # If we want coherence data and regular data: + if args.coherence and (not args.cumulative) and os.path.isfile(datname) and os.path.isfile(cohdatname): + stop=True + print("Data file and coherence file already exist; skipping.") + + if stop==True: + print("Files for this channel and date already exist, skipping.") + continue + + # Determine whether we need to calculate data from SFTs. + if ((args.cumulative) and (not os.path.isfile(wtsname))) or args.useSFTs: + SFTfolder=chfolder+"/sfts/tmp/" + + # if there are no SFTs in the folder, alert the user and skip to the next day + if len([path for path in os.listdir(SFTfolder) if path[-4:]==".sft"])==0: + print("Found the relevant folder at {}, but no SFT files.".format(SFTfolder)) + continue + + # If the user is only checking for data availability, print that files are available and skip to the next day. + if args.checkonly: + print("Found SFT files for this channel and date.") + print(chfolder) + continue + + # Time to run spec_avg_long and obtain the data from it + print("Running spec_avg_long on {0}...".format(SFTfolder)) + cmd=['/home/stephen.trembath/LALSUITE_SRCDIR/lalsuite/lalapps/src/pulsar/fscan/spec_avg_long','--SFTs', + SFTfolder+"*.sft",'--startGPS','1000000000','--endGPS', '2000000000','--fMin',str(args.fmin),'--fMax', str(args.fmax), + '--freqRes','0.10','--timeBaseline', '1800','--IFO',args.IFO,'--outputBname',outprefix] + print(" ".join(cmd)) + subprocess.call(cmd,cwd=outputfolder,stdout=devnull,stderr=devnull) + print("Done with spec_avg_long.") + print("Loading data from PWA file at {}...".format(outfpath)) + freq,tavgwt,sumwt = np.loadtxt(outfpath, comments='#', usecols=(0,1,2), unpack=True) + spec=tavgwt/sumwt*1800 + + # Then clean up + for suffix in ["","_timestamps",".txt","_PWA.txt","_date"]: + tmpfname=outputfolder+outprefix+suffix + if os.path.isfile(tmpfname): + print("Removing {}".format(tmpfname)) + subprocess.call(["rm",tmpfname]) + + # And check the data for errors, skipping if any are found + if np.isinf(sumwt[0]): + print("This data appears to contain errors! Skipping this date.") + continue + + # If a cumulative output is requested, but can be calculated from a weights file: + elif args.cumulative and os.path.isfile(wtsname): + print("Daily weights file already exists at {}.".format(wtsname)) + print("Loading data from weights file...") + freq,tavgwt,sumwt=h5.load_spectrum_wts(wtsname) + + # If we're not looking for cumulative output and the user didn't specify to use SFTs or deal with coherence only, it's fine to load from averaged files. + elif (not args.cumulative) and (not args.useSFTs) and (not args.cohonly): + print("Loading from time-averaged files...") + timeavgfiles=glob.glob(chfolder+"spec_*timeaverage") + if len(timeavgfiles)>0: + print("Found time-averaged spectrum files in {}".format(chfolder)) + if args.checkonly: + continue + bandmins=np.array([float(x.split("spec_")[1].split("_")[0]) for x in timeavgfiles]) + ordargs=np.argsort(bandmins) + timeavgfiles=np.array(timeavgfiles)[ordargs] + bandmins=bandmins[ordargs] + timeavgfiles=timeavgfiles[np.where(bandmins<=args.fmax)[0]] + print("Loading {} spectrum files from {}...".format(len(timeavgfiles),chfolder)) + freq=[] + valavg=[] + for timeavgfile in timeavgfiles: + freqi,valavgi = np.loadtxt(timeavgfile, comments='#', usecols=(0,1), unpack=True) + freq.append(freqi) + valavg.append(valavgi) + spec=np.hstack(valavg) + freq=np.hstack(freq) + + else: + print("Error: no calculation being done... may be an arguments issue?") + + if (args.coherence or args.cohonly) and (not os.path.isfile(cohdatname)): + cohbegin=date.strftime('%Y-%m-%d') + cohend=(date+datetime.timedelta(1)).strftime('%Y-%m-%d') + print("Querying coherence for {} to {}".format(cohbegin,cohend)) + save_stdout = sys.stdout + sys.stdout = cStringIO.StringIO() + data = get_channel_online_data(ch,cohbegin,cohend,format='seglist', config_file=args.STAMPPEMconfigfile) + sys.stdout = save_stdout + if len(data)>0: + combined_data = data[0] + for j in range(1,len(data)): + combined_data.update(data[j]) + N=combined_data.N + vData=np.array(combined_data.get_coh().value,dtype=float) + FAR=(1-vData)**(N-1) + vData=-1*np.log10(FAR) + vFreq=np.array(combined_data.get_coh().frequencies,dtype=float) + h5.save_spectrum(cohdatname,vData,vFreq,fcutoff=args.fmax) + print("Saved coherence spectrum at {}".format(cohdatname)) + else: + print("No coherence data found for this date.") + + # Now save the files for whichever data exists + try: + if (not os.path.isfile(wtsname)): + h5.save_spectrum_wts(wtsname,tavgwt,sumwt,freq) + print("Saved daily weights .hdf5 file at {}...".format(wtsname)) + except NameError: + pass + + try: + if (not os.path.isfile(datname)): + h5.save_spectrum(datname,spec,freq,fcutoff=args.fmax) + print("Saved daily spectrum .hdf5 file at {}...".format(datname)) + except NameError: + pass + + if args.cumulative: + if oldcumulwtsname=="": + if (not os.path.isfile(cumulwtsname)) or cumuloverwrite==True: + try: + h5.save_spectrum_wts(cumulwtsname,tavgwt,sumwt,freq) + print("No prior cumulative weights file found; assuming this is day 1") + print("Saved cumulative weights file at {}...".format(cumulwtsname)) + # oldcumulwtsname=str(cumulwtsname) + if cumuloverwrite==False: + cumuloverwrite=True + print("(Switching to overwrite mode due to new cumulative file being written.)") + except NameError: + pass + else: + oldfreq,oldtavgwt,oldsumwt=h5.load_spectrum_wts(oldcumulwtsname) + print("[Prior cumulative weights file loaded from {}]".format(oldcumulwtsname)) + if (not os.path.isfile(cumuldatname)) or (not os.path.isfile(cumulwtsname)) or cumuloverwrite==True: + h5.save_spectrum(cumuldatname,(oldtavgwt+tavgwt)/(oldsumwt+sumwt)*1800,freq,fcutoff=args.fmax) + print("Saved cumulative data file at {}".format(cumuldatname)) + h5.save_spectrum_wts(cumulwtsname,oldtavgwt+tavgwt,oldsumwt+sumwt,freq) + # oldcumulwtsname=str(cumulwtsname) + print("Saved cumulative weights file at {}".format(cumulwtsname)) + if cumuloverwrite==False: + cumuloverwrite=True + print("(Switching to overwrite mode due to new cumulative file being written.)") + +if __name__ == '__main__': + main()