Combined multiple ways to load data into a single script. After testing, this...

Combined multiple ways to load data into a single script. After testing, this will replace chdata and chdataFscan and also load coherence data from STAMP-PEM

Combined multiple ways to load data into a single script. After testing, this...
f5058d00 · Ansel Neunzert · e12a4fff · f5058d00
Commit f5058d00 authored Oct 24, 2016 by Ansel Neunzert
--- a/getData.py
+++ b/getData.py
+from __future__ import division
+import argparse
+import datetime
+import generalio as g
+import os
+
+def makeparser():
+	parser=argparse.ArgumentParser()
+	g.register_custom_types(parser)
+	parser.add_argument("--start",type='date_time',help="Start date for plots",default="")
+	parser.add_argument("--end",type='date_time',help="Date in format \"YYYY-MM-DD\" (Default is today).",default=datetime.date.today().strftime('%Y-%m-%d'))
+	parser.add_argument("--inputfolder",help="Parent folder where SFTs are located. If none, folders will be found automatically from the channel subsystem, using defaults for LHO.",default="")
+	parser.add_argument("--outputfolder",help="Parent folder where output data will be stored")
+	parser.add_argument("--channellistfile",help="Channel list source (path to a newline-delimited text file)")
+	parser.add_argument("--channels",nargs="+",help="More channels (user specified)")
+	parser.add_argument("--name",help="Prefix for output files")
+	parser.add_argument("--fmin",help="Frequency minumum (default 0)",type=float,default=0)
+	parser.add_argument("--fmax",help="Frequency bandwidth (default 2000)",type=float,default=2000)
+	parser.add_argument("--cumulative",type='bool',help="Compute cumulative plots (default=False)",default=False)
+	parser.add_argument("--checkonly",help="Don't compute anything, just print which days have data.",type='bool',default=False)
+	parser.add_argument("--useSFTs",help="Require data from SFTs and not averaged files",type='bool',default=False)
+	parser.add_argument("--coherence",help="Query coherence spectra from STAMP-PEM",type='bool',default=False)
+	parser.add_argument("--cohonly",help="Query coherence spectra from STAMP-PEM and nothing else.",type='bool',default=False)
+	parser.add_argument("--IFO",help="H1 or L1",default="H1")
+	parser.add_argument("--STAMPPEMconfigfile",help="Configuration file for use with STAMP-PEM query; default /home/meyers/coherence_comb_tracking_setup/config_files/H1.ini",default="/home/meyers/coherence_comb_tracking_setup/config_files/H1.ini")
+
+	return parser
+
+def fscan_datefolder_from_inputfolder(inputfolder,date):
+	try:
+		datefolders=[path for path in os.listdir(inputfolder) if date.strftime('%Y_%m_%d') in path]
+		if len(datefolders)==1:
+			return datefolders[0]
+		elif len(datefolders)==0:
+			print("Error: Could not find a folder for the requested date ({}) within {}".format(date.strftime('%Y-%m-%d'),inputfolder))
+		else:
+			print("Error: too many matching folders forund for the requested date ({}) within {}.".format(date.strftime('%Y-%m-%d'),inputfolder))
+	except: pass
+
+def inputfolder_from_channel(channel,IFO):
+	# Get the subsystem
+	subsystem=channel.split(":")[1].split("-")[0]
+	# Get the corresponding Fscan subfolder for that subsystem
+	subdir=[line.split(" ")[1][:-1] for line in open("fscanSubsystemPaths.config",'r') if line.split(" ")[0]==subsystem][0]
+	inputfolder="/home/pulsar/public_html/fscan/{0}_DUAL_ARM/{1}/{1}/".format(IFO,subdir)
+	return inputfolder
+
+def get_datefolder(inputfolder,channel,date,IFO):
+	if inputfolder=="":
+		inputfolder=inputfolder_from_channel(channel,IFO)
+	return inputfolder+fscan_datefolder_from_inputfolder(inputfolder,date)
+
+def main():
+	''' '''
+	args=g.customparse_args(makeparser())
+	import sys
+	import traceback
+	import subprocess
+	import numpy as np
+	import glob
+	import hdf5io as h5
+	import cStringIO
+	from stamp_pem.query_online import get_channel_online_data
+	devnull = open(os.devnull, 'w')
+	cumuloverwrite=False
+
+	# If no start date is supplied, set up calculation for only 1 day.
+	if args.start=="":
+		dates=[args.end]
+		ndates=1
+
+	# Otherwise, set up calculation for a range of dates.
+	else:
+		ndates=(args.end-args.start).days+1
+		dates=[args.start + datetime.timedelta(n) for n in range(ndates)]
+
+
+	# Loop through channels
+	for ch in args.chlist:
+
+		# Set up variables to hold "old weights" for the cumulative sum.
+		oldcumulwtsname = ""
+		cumulwtsname = ""
+
+		# Print the channel name, nicely formatted.
+		div="="*len(ch)
+		print("\n"+div+"\n"+ch+"\n"+div)
+
+		# Loop through date range
+		for i in range(ndates):
+			try:
+				if os.path.isfile(cumulwtsname):
+					oldcumulwtsname=str(cumulwtsname)
+			except NameError:
+				pass
+
+
+			date=dates[i]
+			print("\nStarting on date {0}".format(date))
+			print("Old cumulative weights file: {}".format(oldcumulwtsname))
+			try:
+				datefolder=get_datefolder(args.inputfolder,ch,date,args.IFO)
+			except:
+				continue
+			chfolder=datefolder+"/"+"_".join(ch.split(":"))+"/"
+
+			# Create a whole bunch of output names, prefixes, and a folder
+			outputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_fscans/"
+			if not os.path.isdir(outputfolder):
+				os.mkdir(outputfolder)
+			outprefix=args.name+"_"+ch+"_"+date.strftime('%Y-%m-%d')
+			datname=(outputfolder+"dat_{0}.hdf5".format(ch))
+			wtsname=(outputfolder+"wts_{0}.hdf5".format(ch))
+			outfpath=outputfolder+outprefix+"_PWA.txt"
+
+			# If the user has specified that cumulative plots should be produced, also create these names:
+			if args.cumulative:
+				cumuloutputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_fscan_cumulsince_"+args.start.strftime('%Y-%m-%d')
+				cumulwtsname=cumuloutputfolder+"/wts_{0}.hdf5".format(ch)
+				cumuldatname=cumuloutputfolder+"/dat_{0}.hdf5".format(ch)
+				if not os.path.isdir(cumuloutputfolder):
+					os.mkdir(cumuloutputfolder)
+
+			# If user has specified that coherence plots should be produced, also create these names:
+			if args.coherence or args.cohonly:
+				cohoutputfolder=args.outputfolder+args.name+"_"+date.strftime('%Y-%m-%d')+"_coherence/"
+				cohdatname=cohoutputfolder+"/dat_{0}.hdf5".format(ch)
+				if not os.path.isdir(cohoutputfolder):
+					os.mkdir(cohoutputfolder)
+				print(os.path.isfile(cohdatname))
+
+			print("Finished setting up names.")
+
+#			print(args.cumulative,os.path.isfile(cumuldatname),os.path.isfile(cumulwtsname),os.path.isfile(wtsname))
+#			print(cumuldatname,cumulwtsname,wtsname)
+
+			stop=False
+			# If we want cumulative data, but both cumulative outputs exist already:
+			if args.cumulative and os.path.isfile(cumuldatname) and os.path.isfile(cumulwtsname):
+				stop=True
+				print("All cumulative files exist already; skipping.")
+
+			# If we want cumulative data, but the weights file exists and it's the first day:
+			if args.cumulative and oldcumulwtsname=="" and os.path.isfile(cumulwtsname):
+				stop=True
+				print("Weights file exists; this is the first day; skipping.")
+
+			# If we want regular data and nothing else (no coherence, no cumulative):
+			if (not args.cumulative) and (not args.coherence) and (not args.cohonly) and os.path.isfile(datname):
+				stop=True
+				print("Dat file already exists; skipping.")
+
+			# If we want just coherence data, nothing else:
+			if (args.cohonly) and (os.path.isfile(cohdatname)):
+				stop=True
+				print("Coherence file already exists; skipping.")
+
+			# If we want coherence data and regular data:
+			if args.coherence and (not args.cumulative) and os.path.isfile(datname) and os.path.isfile(cohdatname):
+				stop=True
+				print("Data file and coherence file already exist; skipping.")
+
+			if stop==True:
+				print("Files for this channel and date already exist, skipping.")
+				continue
+
+			# Determine whether we need to calculate data from SFTs.
+			if ((args.cumulative) and (not os.path.isfile(wtsname))) or args.useSFTs:
+				SFTfolder=chfolder+"/sfts/tmp/"
+
+				# if there are no SFTs in the folder, alert the user and skip to the next day
+				if len([path for path in os.listdir(SFTfolder) if path[-4:]==".sft"])==0:
+					print("Found the relevant folder at {}, but no SFT files.".format(SFTfolder))
+					continue
+
+				# If the user is only checking for data availability, print that files are available and skip to the next day.
+				if args.checkonly:
+					print("Found SFT files for this channel and date.")
+					print(chfolder)
+					continue
+
+				# Time to run spec_avg_long and obtain the data from it
+				print("Running spec_avg_long on {0}...".format(SFTfolder))
+				cmd=['/home/stephen.trembath/LALSUITE_SRCDIR/lalsuite/lalapps/src/pulsar/fscan/spec_avg_long','--SFTs',
+					SFTfolder+"*.sft",'--startGPS','1000000000','--endGPS', '2000000000','--fMin',str(args.fmin),'--fMax', str(args.fmax),
+					'--freqRes','0.10','--timeBaseline', '1800','--IFO',args.IFO,'--outputBname',outprefix]
+				print(" ".join(cmd))
+				subprocess.call(cmd,cwd=outputfolder,stdout=devnull,stderr=devnull)
+				print("Done with spec_avg_long.")
+				print("Loading data from PWA file at {}...".format(outfpath))
+				freq,tavgwt,sumwt = np.loadtxt(outfpath, comments='#', usecols=(0,1,2), unpack=True)
+				spec=tavgwt/sumwt*1800
+
+				# Then clean up
+				for suffix in ["","_timestamps",".txt","_PWA.txt","_date"]:
+					tmpfname=outputfolder+outprefix+suffix
+					if os.path.isfile(tmpfname):
+						print("Removing {}".format(tmpfname))
+						subprocess.call(["rm",tmpfname])
+
+				# And check the data for errors, skipping if any are found
+				if np.isinf(sumwt[0]):
+					print("This data appears to contain errors! Skipping this date.")
+					continue
+
+			# If a cumulative output is requested, but can be calculated from a weights file:
+			elif args.cumulative and os.path.isfile(wtsname):
+				print("Daily weights file already exists at {}.".format(wtsname))
+				print("Loading data from weights file...")
+				freq,tavgwt,sumwt=h5.load_spectrum_wts(wtsname)
+
+			# If we're not looking for cumulative output and the user didn't specify to use SFTs or deal with coherence only, it's fine to load from averaged files.
+			elif (not args.cumulative) and (not args.useSFTs) and (not args.cohonly):
+				print("Loading from time-averaged files...")
+				timeavgfiles=glob.glob(chfolder+"spec_*timeaverage")
+				if len(timeavgfiles)>0:
+					print("Found time-averaged spectrum files in {}".format(chfolder))
+					if args.checkonly:
+						continue
+					bandmins=np.array([float(x.split("spec_")[1].split("_")[0]) for x in timeavgfiles])
+					ordargs=np.argsort(bandmins)
+					timeavgfiles=np.array(timeavgfiles)[ordargs]
+					bandmins=bandmins[ordargs]
+					timeavgfiles=timeavgfiles[np.where(bandmins<=args.fmax)[0]]
+					print("Loading {} spectrum files from {}...".format(len(timeavgfiles),chfolder))
+					freq=[]
+					valavg=[]
+					for timeavgfile in timeavgfiles:
+						freqi,valavgi = np.loadtxt(timeavgfile, comments='#', usecols=(0,1), unpack=True)
+						freq.append(freqi)
+						valavg.append(valavgi)
+					spec=np.hstack(valavg)
+					freq=np.hstack(freq)
+
+			else:
+				print("Error: no calculation being done... may be an arguments issue?")
+
+			if (args.coherence or args.cohonly) and (not os.path.isfile(cohdatname)):
+				cohbegin=date.strftime('%Y-%m-%d')
+				cohend=(date+datetime.timedelta(1)).strftime('%Y-%m-%d')
+				print("Querying coherence for {} to {}".format(cohbegin,cohend))
+				save_stdout = sys.stdout
+				sys.stdout = cStringIO.StringIO()
+				data = get_channel_online_data(ch,cohbegin,cohend,format='seglist', config_file=args.STAMPPEMconfigfile)
+				sys.stdout = save_stdout
+				if len(data)>0:
+					combined_data = data[0]
+					for j in range(1,len(data)):
+						combined_data.update(data[j])
+					N=combined_data.N
+					vData=np.array(combined_data.get_coh().value,dtype=float)
+					FAR=(1-vData)**(N-1)
+					vData=-1*np.log10(FAR)
+					vFreq=np.array(combined_data.get_coh().frequencies,dtype=float)
+					h5.save_spectrum(cohdatname,vData,vFreq,fcutoff=args.fmax)
+					print("Saved coherence spectrum at {}".format(cohdatname))
+				else:
+					print("No coherence data found for this date.")
+
+			# Now save the files for whichever data exists
+			try:
+				if (not os.path.isfile(wtsname)):
+					h5.save_spectrum_wts(wtsname,tavgwt,sumwt,freq)
+					print("Saved daily weights .hdf5 file at {}...".format(wtsname))
+			except NameError:
+				pass
+
+			try:
+				if (not os.path.isfile(datname)):
+					h5.save_spectrum(datname,spec,freq,fcutoff=args.fmax)
+					print("Saved daily spectrum .hdf5 file at {}...".format(datname))
+			except NameError:
+				pass
+
+			if args.cumulative:
+				if oldcumulwtsname=="":
+					if (not os.path.isfile(cumulwtsname)) or cumuloverwrite==True:
+						try:
+							h5.save_spectrum_wts(cumulwtsname,tavgwt,sumwt,freq)
+							print("No prior cumulative weights file found; assuming this is day 1")
+							print("Saved cumulative weights file at {}...".format(cumulwtsname))
+						#	oldcumulwtsname=str(cumulwtsname)
+							if cumuloverwrite==False:
+								cumuloverwrite=True
+								print("(Switching to overwrite mode due to new cumulative file being written.)")
+						except NameError:
+							pass
+				else:
+					oldfreq,oldtavgwt,oldsumwt=h5.load_spectrum_wts(oldcumulwtsname)
+					print("[Prior cumulative weights file loaded from {}]".format(oldcumulwtsname))
+					if (not os.path.isfile(cumuldatname)) or (not os.path.isfile(cumulwtsname)) or cumuloverwrite==True:
+						h5.save_spectrum(cumuldatname,(oldtavgwt+tavgwt)/(oldsumwt+sumwt)*1800,freq,fcutoff=args.fmax)
+						print("Saved cumulative data file at {}".format(cumuldatname))
+						h5.save_spectrum_wts(cumulwtsname,oldtavgwt+tavgwt,oldsumwt+sumwt,freq)
+					#	oldcumulwtsname=str(cumulwtsname)
+						print("Saved cumulative weights file at {}".format(cumulwtsname))
+						if cumuloverwrite==False:
+							cumuloverwrite=True
+							print("(Switching to overwrite mode due to new cumulative file being written.)")
+
+if __name__ == '__main__':
+	main()