Package wsatools :: Module ProvenanceUtils
[hide private]

Source Code for Module wsatools.ProvenanceUtils

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  # $Id: ProvenanceUtils.py 9966 2013-07-26 14:21:53Z NicholasCross $ 
  4  """ 
  5     Utilities for calculating properties from file provenance. 
  6   
  7     @author: N.J.G. Cross 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  from __future__      import division, print_function 
 12  from future_builtins import map 
 13   
 14  from   collections   import namedtuple 
 15  import os 
 16   
 17  import wsatools.DbConnect.DbConstants   as dbc 
 18  from   wsatools.DbConnect.DbSession import Join 
 19  import wsatools.FitsUtils               as fits 
 20  from   wsatools.Logger              import Logger 
 21  from   wsatools.SystemConstants     import DepCodes 
 22  import math 
 23  #------------------------------------------------------------------------------ 
 24  #: Details of a component file in a provenance list. 
 25  ProvFile = namedtuple("ProvFile", "name multiframeID type") 
 26   
 27  #------------------------------------------------------------------------------ 
 28   
29 -class Provenance(object):
30 """ Contains all relevant provenance information of a file. 31 """ 32 archive = None #: Connection to database to query. 33 combFileName = None #: Path to top-level file with provenance info. 34 frameType = None #: Base component frame type. 35 isPawPrint = False #: Is the top-level file a paw-print? 36 normalFrameInfo = None #: ? 37 normalList = None #: ? 38 sysc = None #: Initialised SystemConstants for this archive. 39 40 _frameInfoOfType = None #: Dictionary of frame info referenced by type. 41 _provList = None #: Complete list of component files (ProvFile). 42 43 #-------------------------------------------------------------------------- 44
45 - def __init__(self, archive, filePath, components=None, haveFrameInfo=False):
46 """ 47 Queries archive to determine full provenance info of given file. 48 49 @param archive: Connection to database to query. 50 @type archive: DbSession 51 @param filePath: Full path to file with provenance is to be determined. 52 @type filePath: str 53 @param haveFrameInfo: ? 54 @type haveFrameInfo: bool 55 56 """ 57 self.fileFrameType = fits.determineFrameType(filePath) 58 self._frameInfoOfType = {} 59 self.archive = archive 60 self.sysc = archive.sysc 61 self.combFileName = filePath 62 self.components = components 63 fileName = os.path.basename(filePath) 64 isConf = self.sysc.confSuffix in fileName 65 self.frameType = 'stackconf' if isConf else 'stack' 66 self.isPawPrint = (self.sysc.stackSuffix in fileName 67 and self.sysc.tileSuffix not in fileName 68 and self.sysc.mosaicSuffix not in fileName) 69 70 # @TODO: Not sure this is the best solution to the problem. 71 if not haveFrameInfo: 72 self.normalFrameInfo = \ 73 self._queryTypeFrameInfo('conf' if isConf else 'normal') 74 self.normalList = [(fInfo.multiframeID, fInfo.fileName) 75 for fInfo in self.normalFrameInfo] 76 else: 77 self._provList = getWholeProv(self.archive, self.combFileName, 78 self.components)
79 80 #-------------------------------------------------------------------------- 81
82 - def getPrevLayer(self, combiframeID=None):
83 """ 84 Get the top level provenance files. 85 86 @param combiframeID: multiframeID of main file. 87 @type combiframeID: int 88 89 """ 90 if self.components: 91 return [ProvFile(comp.imageName, comp.multiframeID, 92 fits.determineFrameType(comp.imageName)) 93 for comp in self.components] 94 # TODO: This is not quite correct - if there is a mixture of 95 # files on dev disks and main disks 96 combiframeID = combiframeID or self.archive.query("multiframeID", 97 fromStr="Multiframe", 98 whereStr="fileName LIKE '%%%s'" % self.combFileName, 99 firstOnly=True) 100 101 if combiframeID: 102 components = self.archive.query( 103 selectStr="fileName, Multiframe.multiframeID, frameType", 104 fromStr=Join(["Provenance", "Multiframe"], "multiframeID"), 105 whereStr="combiframeID=%s" % combiframeID) 106 107 return [ProvFile(fits.stripServer(comp.fileName), 108 comp.multiframeID, 109 comp.frameType) for comp in components] 110 111 return getFileProv(self.archive, self.combFileName)
112 113 #-------------------------------------------------------------------------- 114
115 - def _queryTypeFrameInfo(self, finalFrameType):
116 """ 117 """ 118 if finalFrameType in self._frameInfoOfType: 119 return self._frameInfoOfType[finalFrameType] 120 121 if not self._provList: 122 self._provList = getWholeProv(self.archive, self.combFileName, 123 self.components) 124 # @FIXME: if too many frames > 100 split SQL selections into parts. 125 # Why? 126 nGroups = int(math.ceil(len(self._provList) / 100)) 127 self._frameInfoOfType[finalFrameType] = [] 128 for grpNo in range(nGroups): 129 provList = [pv for ii, pv in enumerate(self._provList) 130 if ii % nGroups == grpNo] 131 132 typeMfIDs = ','.join(str(frame.multiframeID) for frame in provList 133 if frame.type == finalFrameType) 134 135 if not typeMfIDs: 136 self._frameInfoOfType[finalFrameType] = [] 137 #Logger.addMessage("<WARNING> No provenance found for " 138 # + self.combFileName, alwaysLog=False) 139 140 elif self.sysc.isVSA(): 141 self._frameInfoOfType[finalFrameType] += self.archive.query( 142 selectStr="Multiframe.multiframeID, mjdObs, detDit, detNdit, " 143 "origfile, obsID, njitter, dateObs, sadtSurveyID, " 144 "fileName", 145 fromStr=Join(["Multiframe", "MultiframeEsoKeys"], 146 "multiframeID"), 147 whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs) 148 149 elif self.sysc.isWSA(): 150 151 self._frameInfoOfType[finalFrameType] += self.archive.query( 152 selectStr="multiframeID, mjdObs, expTime, msbID, njitter" 153 ", nustep, dateObs, fileName", 154 fromStr="Multiframe", 155 whereStr="multiframeID IN (%s)" % typeMfIDs) 156 elif self.sysc.isOSA(): 157 # @TODO: Are these correct? 158 self._frameInfoOfType[finalFrameType] += self.archive.query( 159 selectStr="Multiframe.multiframeID, mjdObs, expTime, max(nCombine) as nCombine, " 160 "origfile, obsID, dateObs, sadtSurveyID, " 161 "fileName", 162 fromStr=Join(["Multiframe", "MultiframeEsoKeys, MultiframeDetector"], 163 "multiframeID"), 164 whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs, 165 groupBy="Multiframe.multiframeID, mjdObs, expTime," 166 "origfile, obsID, dateObs, sadtSurveyID, fileName") 167 168 return self._frameInfoOfType[finalFrameType]
169 170 #-------------------------------------------------------------------------- 171
172 - def getRegionName(self):
173 """ 174 """ 175 # @TODO: May have to do for OSA 176 if not self.sysc.isVSA(): 177 return 178 179 goodNames = set(fInfo.sadtSurveyID for fInfo in self.normalFrameInfo 180 if fInfo.sadtSurveyID != dbc.charDefault()) 181 182 if len(goodNames) is 1: 183 return goodNames.pop()
184 185 #-------------------------------------------------------------------------- 186
187 - def getSumTotalExpTime(self, multiframeID=None):
188 """ 189 """ 190 #@TODO: do we want max or avg? 191 if self.isPawPrint: 192 return 193 194 if self.normalList: 195 if self.sysc.isVSA(): 196 totExpTime = sum([(nfi.detDit * nfi.detNdit) for nfi in self.normalFrameInfo]) 197 if self.sysc.isWSA(): 198 totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo]) 199 if self.sysc.isOSA(): 200 totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo]) 201 else: 202 components = self.getPrevLayer(multiframeID) 203 totExpTime = 0 204 for filePath, mfID, _fType in components: 205 if mfID > 0: # Is in archive, so obtain from there 206 totExpTime += self.archive.queryAttrMax("totalExpTime", 207 table="MultiframeDetector", 208 where="multiframeID=%s" % mfID) or 0 209 else: 210 tExpTime = 0 211 # @FIXME: This for-loop needs a break/else - if last extension 212 # is the preferred one then reverse the loop. 213 for hdu in fits.open(filePath): 214 if 'TEXPTIME' in hdu.header: 215 tExpTime = max(hdu.header['TEXPTIME'], tExpTime) 216 217 totExpTime += tExpTime 218 219 return totExpTime
220 221 #-------------------------------------------------------------------------- 222
223 - def getTotalExpTime(self, multiframeID=None):
224 """ 225 """ 226 if not self.isPawPrint: 227 return 228 components = self.getPrevLayer(multiframeID) 229 230 mfIDs = [mfID for _filePath, mfID, _fType in components] 231 return dict(self.archive.query("extNum,SUM(totalExpTime)", 232 "MultiframeDetector", "multiframeID in (%s) group by extNum" % 233 ','.join(map(str, mfIDs))))
234 235 #-------------------------------------------------------------------------- 236 237 238
239 - def getMjdObs(self):
240 """ 241 """ 242 if self.normalFrameInfo: 243 return min([nfi.mjdObs for nfi in self.normalFrameInfo])
244 245 #-------------------------------------------------------------------------- 246
247 - def calcMeanMjdObs(self, extNum, normalList, depInfoDict, frameInfoDict):
248 """ 249 """ 250 # extNum = [2, 17]; FITS CAMNUM = [1, 16] 251 if not (self.isPawPrint 252 and extNum in fits.getExtList(self.combFileName)): 253 254 return dbc.realDefault() 255 256 nonDeps = [DepCodes.nonDep, DepCodes.deepStackOnly, 257 DepCodes.intermitDet] 258 259 meanMjd = 0 260 weight = 0 261 for mfID, _mjdObs in normalList: 262 if depInfoDict[(mfID, extNum)] in nonDeps: 263 meanMjd += frameInfoDict[mfID].mjdObs + \ 264 (0.5 * frameInfoDict[mfID].detNDit * 265 frameInfoDict[mfID].detDit / (24 * 3600)) 266 weight += 1 267 268 if weight > 0: 269 return meanMjd / weight 270 271 return dbc.realDefault()
272 273 #-------------------------------------------------------------------------- 274
275 - def getMeanMjdObs(self, extNum):
276 """ 277 """ 278 normalMfIDs = [mfID for mfID, _fName in self.normalList] 279 if not (self.isPawPrint 280 and extNum in fits.getExtList(self.combFileName)): 281 return dbc.realDefault() 282 283 depInfoDict = dict(self.archive.query( 284 selectStr="multiframeID,deprecated", 285 fromStr="MultiframeDetector", 286 whereStr="multiframeID in (%s) and extNum=%s" 287 % (','.join(map(str, normalMfIDs)), extNum))) 288 289 meanMjd = 0 290 weight = 0 291 for fInfo in self.normalFrameInfo: 292 if depInfoDict[fInfo.multiframeID] in [0, 50, 51] and self.sysc.isVSA(): 293 meanMjd += fInfo.mjdObs + (0.5 * fInfo.detNdit * fInfo.detDit / (24 * 3600)) 294 weight += 1 295 elif depInfoDict[fInfo.multiframeID] in [0] and self.sysc.isWSA(): 296 meanMjd += fInfo.mjdObs + (0.5 * fInfo.expTime / (24 * 3600)) 297 weight += 1 298 299 return meanMjd / weight
300 301 #-------------------------------------------------------------------------- 302
303 - def getDateObs(self):
304 """ 305 """ 306 if self.normalFrameInfo: 307 return min([nfi.dateObs for nfi in self.normalFrameInfo])
308 309 #-------------------------------------------------------------------------- 310
311 - def getMjdEnd(self):
312 """ 313 """ 314 if self.normalFrameInfo: 315 if self.sysc.isVSA(): 316 return max([nfi.mjdObs + (nfi.detDit * nfi.detNdit / (24 * 3600)) 317 for nfi in self.normalFrameInfo]) 318 319 elif self.sysc.isWSA() or self.sysc.isOSA(): 320 return max([nfi.mjdObs + (nfi.expTime / (24 * 3600)) 321 for nfi in self.normalFrameInfo])
322 323 #-------------------------------------------------------------------------- 324
325 - def getObsIDList(self):
326 """ 327 """ 328 if self.sysc.isVSA(): 329 return set(fInfo.obsID for fInfo in self.normalFrameInfo) 330 331 elif self.sysc.isWSA(): 332 return set(fInfo.msbID for fInfo in self.normalFrameInfo)
333 334 #-------------------------------------------------------------------------- 335
336 - def getNoRawFrames(self):
337 """ 338 """ 339 # @TODO: Do this per extension 340 return len(self.normalList)
341 342 #-------------------------------------------------------------------------- 343
344 - def getOrigFileNameList(self):
345 """ 346 """ 347 if self.sysc.isVSA(): 348 return [fInfo.origfile for fInfo in self.normalFrameInfo] 349 350 elif self.sysc.isWSA(): 351 return [fName for _mfiD, fName in self.normalList]
352 353 #-------------------------------------------------------------------------- 354
355 - def getNEpochs(self):
356 """ 357 """ 358 return len(self._queryTypeFrameInfo(self.frameType))
359 360 #-------------------------------------------------------------------------- 361
362 - def isSameJitterPattern(self):
363 """ 364 """ 365 if self.fileFrameType == self.frameType: 366 return True 367 stackFrameInfo = self._queryTypeFrameInfo(self.frameType) 368 jitters = set(frame.njitter for frame in stackFrameInfo) 369 return len(jitters) is 1
370 371 #------------------------------------------------------------------------------ 372
373 -def getWholeProv(db, fileName, components=None):
374 """ Works out whole provenance for file. 375 """ 376 finalFrameType = 'conf' if 'conf' in fileName else 'normal' 377 frameType = fits.determineFrameType(fileName) 378 if components: 379 inputList = [ProvFile(comp.imageName, comp.multiframeID, 380 fits.determineFrameType(comp.imageName)) 381 for comp in components] 382 else: 383 inputList = [ProvFile(fileName, dbc.intDefault(), frameType)] 384 fullList = inputList 385 isComplete = False 386 while not isComplete: 387 isComplete = True 388 outputList = [] 389 for filePath, mfID, fType in inputList: 390 isInArchive = mfID > 0 \ 391 or db.queryEntriesExist("Multiframe", 392 "fileName LIKE '%%%s'" % filePath) 393 if fType != finalFrameType: 394 isComplete = False 395 if isInArchive: 396 whereStr = ("mf2.multiframeID=%d" % mfID if mfID > 0 else 397 "mf2.fileName LIKE '%%%s'" % filePath) 398 results = db.query( 399 selectStr="mf1.fileName, mf1.multiframeID, mf1.frameType", 400 fromStr="Multiframe AS mf1, Provenance AS p" 401 ", Multiframe AS mf2", 402 whereStr=whereStr 403 + " AND mf2.multiframeID=p.combiframeID" 404 " AND mf1.multiframeID=p.multiframeID") 405 406 # @TODO: Stripping is not required for VSA 407 # & the WSA will be fixed 408 components = [ProvFile(fits.stripServer(comp.fileName), 409 comp.multiframeID, comp.frameType) 410 for comp in results] 411 else: 412 components = getFileProv(db, filePath) 413 414 outputList += components 415 fullList += outputList 416 inputList = outputList 417 418 # remove initial file from list 419 return [provFile for provFile in fullList if provFile.name != fileName]
420 421 #------------------------------------------------------------------------------ 422
423 -def getFileProv(db, filePath):
424 """ 425 """ 426 provSet = set() 427 for hdu in fits.open(filePath): 428 provSet.update( 429 hdu.header[key].split(db.sysc.mefType)[0] + db.sysc.mefType 430 for key in hdu.header if 'PROV' in key and key != 'PROV0000') 431 frameType = fits.determineFrameType(filePath) 432 433 # @TODO: if not realrun, some components may be on main disks and in archive already 434 # If disks down 435 if 'tile' in frameType: 436 # @TODO: frameType incorrect - for layer above 437 files = [(provName, fits.findFileOnDisks(db.sysc, provName, isDev=not db.isRealRun)) 438 for provName in provSet] 439 for pName, fileName in files: 440 if not fileName: 441 Logger.addMessage("Cannot find %s on disk" % pName) 442 443 return [ProvFile(fileName, dbc.intDefault(), fits.determineFrameType(fileName)) 444 for _pName, fileName in files] 445 else: 446 # @TODO: Has more to be done? 447 # @NOTE: Doesn't work in the case of components that are previous deeps - deprecated with 255... 448 files = [db.query("fileName as name,multiframeID,frameType", "Multiframe", 449 "fileName like '%%%s' and (%s or (frameType like " 450 "'deep%%stack' and deprecated=255))" 451 % (provName, DepCodes.selectNonDeprecated), 452 firstOnly=True) 453 for provName in provSet] 454 return [ProvFile(fName, mfID, fType) for fName, mfID, fType in files]
455 456 #------------------------------------------------------------------------------ 457