Select Git revision
pulsatingscience.cpp
readFrames.py 7.68 KiB
#!/usr/bin/python
# Grant David Meadors
# 02012-08-14 (JD 2456154)
# g m e a d o r s @ u m i c h . e d u
# readFrames
# Based on code by Gregory Mendell
import re, numpy
from pylal.Fr import frgetvect1d
def readFrames(fileList, chanName, startGPSTime, duration, fileListIsInMemory=None, startIndex=None):
# Comments below, unless noted otherwise, are verbatim from the
# comments of Greg Mendell in the Matlab version of this code.
# # # # # # # # # # # # # # # # # #
# usage: [data,lastIndex,errCode,sRate,times] = readFrames(fileList,chanName,startGPSTime,duration,fileListIsInMemory,startIndex)
#
# Examples:
# 1. Read 60 s of H1:LSC-DARM_ERR from files listed in a lal cache file myLALCacheFilt.txt (e.g., as returned by ligo_data_find):
#
# x = readFrames('myLALCacheFile.txt','H1:LSC-DARM_ERR',940000000,60)
#
# 2. Read 60 s of H1:LSC-DARM_ERR if myFileList is a list of filenames held in memory
#
# x = readFrames(myFileList,'H1:LSC-DARM_ERR',940000000,60,1)
#
# 3. Read 60 s of H1:LSC-DARM_ERR data from a list of files, myFileList:
#
# myFileList = ['/path/filename1', '/path/filename2', '/path/filename3' ...]
# myChannel = 'H1:LSC-DARM_ERR'
# startGPSTime = 940000000
# endGPSTime = 940000000 + 86400
# duration = 60
# startIndex = 1
# while (startGPSTime < endGPSTime):
# x = readFrames(myFileList,myChannel,startGPSTime,duration,1,startIndex)
# lastIndex = x[1]
#
# # Do something with x...
#
# # Update for the next call in the loop:
# startGPSTime = startGPSTime + duration
# startIndex = lastIndex
#
#
# Inputs:
#
# fileList: A filename with a lal-cache style list of frame files or a list of filesnames. (SEe isListNotFile option below.)
# chanName: The name of the channel to read from the frames.
# startGPSTime: The GPS start time of first sample to return.
# duration: The duration in seconds to return.
# fileListIsInMemory: Set this to 1 if fileList is a list of filenames, and not a fileName with this list. (Optional)
# startIndex: The index from which to start in the list of files (optional, default is 1)
#
# Outputs:
#
# data: The data from channel.
# lastIndex: The last index used in the list of files
# errCode: the error code returned (0 means no error)
# sRate: The sample rate of this channel.
# times: the times, 0, corresponds to startGPSTime.
#
# Note the last two Input options are useful if a long list of files held in memory is send to readFrames over and over,
# with each call running on the next sequential subset of the files. For example, if fileList contains the list of files
# covering an entire day, and we wish to run on every 60 seconds of data, then set isListNotFile to 1 and duration to 60,
# and after each call to readFrames update startGPSTime to startGPSTime + duration, and update startIndex to the returned
# value of lastIndex. The readFrames function will then pick up from the correct place in the fileList from which the last call left off.
# Initialize variables:
errCode = 0
endGPSTime = startGPSTime + duration
durationFound = 0
fileLocalHostStr = 'file://localhost'
fileLocalHostStrLen = len(fileLocalHostStr)
data = numpy.array([])
# set default values:
if fileListIsInMemory is None:
fileListIsInMemory = 0
if startIndex is None:
startIndex = 1
###########################################
#
# Read in the fileList or initialize listOfFiles
#
###########################################
if (fileListIsInMemory > 0):
listOfFiles = fileList
else:
fileListObject = open(fileList)
listOfFiles = [line.strip().split()[4] for line in fileListObject]
fileListObject.close()
listOfFilesLen = len(listOfFiles)
###########################################
#
# Here is the main loop over the listOfFiles
# Note that listOfFiles contains lines like this,
#
# ['H', 'H1_LDAS_C02_L2', '953164800', '128', 'file://localhost/data/node191/frames/S6/LDAShoftC02/LHO/H-H1_LDAS_C02_L2-9531/H-H1_LDAS_C02_L2-953164800-128.gwf']
# ['H', 'H1_LDAS_C02_L2', '953164928', '128', 'file://localhost/data/node191/frames/S6/LDAShoftC02/LHO/H-H1_LDAS_C02_L2-9531/H-H1_LDAS_C02_L2-953164928-128.gwf']
#
# or a list like this,
#
# ['/path/filename1 ', '/path/filename2 ', '/path/filename3 ' ...]
#
# Keeping everything past file://localhost, go through the list of files and parse out the
# filename, GPS start times, and duration of each file and read the data from each file with
# data between startGPSTime and endGPSTime. Break off of the loop when endGPSTime is reached.
#
###########################################
for k, j in enumerate(listOfFiles[startIndex - 1:listOfFilesLen]):
# Get the filename with the path from each line in listOfFiles.
thisLine = str(j) # convert this line into string data
thisPos = thisLine.find(fileLocalHostStr) # find the position of the fileLocalHostStr string:
if thisPos > -1:
thisFile = thisLine[thisPos + fileLocalHostStrLen:] # slice out the filename with the path
else:
thisFile = thisLine
# parse out the GPS time and duration and get the start/end time of thisFile.
regExpOut = re.search('-(?P<GPS>\d+)-(?P<DUR>\d+)\.', thisFile)
thisStartTime = int(regExpOut.group('GPS'))
thisDuration = int(regExpOut.group('DUR'))
thisEndTime = thisStartTime + thisDuration
if (thisEndTime <= startGPSTime):
continue # This file ends before the start of the data we want; continue to the next file
elif (thisStartTime >= endGPSTime):
break # This file starts after the end of the data we want; break out of the loop
else:
# This file contains some of the data we want. Read it out using frgetvect.
gpsStart = max([startGPSTime, thisStartTime])
gpsEnd = min( [endGPSTime, thisEndTime])
dur = gpsEnd - gpsStart
try:
thisData = frgetvect1d(thisFile, chanName, gpsStart, dur)
data = numpy.concatenate((data, thisData[0]))
durationFound = durationFound + dur
except KeyError:
errCode = 1
print 'Error reading data from ' + str(thisFile)
if (thisEndTime >= endGPSTime):
# This file ends after the end of the data we want; break out of the loop.
break
# Set lastIndex to the last index used in the loop above:
lastIndex = k
if (len(data) == 0):
print 'No data found'
errCode = 2
elif (durationFound < duration):
print 'Some data is missing'
errCode = 3
nSamples = len(data)
sRate = int(numpy.floor(nSamples/duration + 0.5))
deltaT = 1.0/(1.0*sRate)
times = numpy.array(range(0, nSamples)) * deltaT
return [data, lastIndex, errCode, sRate, times]
# For testing demonstrations:
#output = readFrames('/home/pulsar/feedforward/2012/08/14/AMPS/cache/fileList-DARM-953164815-953165875.txt', 'H1:LDAS-STRAIN', 953164815, 129)
#exampleListOfFiles = ['H H1_LDAS_C02_L2 953164800 128 file://localhost/data/node191/frames/S6/LDAShoftC02/LHO/H-H1_LDAS_C02_L2-9531/H-H1_LDAS_C02_L2-953164800-128.gwf', 'H H1_LDAS_C02_L2 953164928 128 file://localhost/data/node191/frames/S6/LDAShoftC02/LHO/H-H1_LDAS_C02_L2-9531/H-H1_LDAS_C02_L2-953164928-128.gwf']
#output = readFrames(exampleListOfFiles, 'H1:LDAS-STRAIN', 953164815, 129,1)
#print output