| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  # $Id: ProvenanceUtils.py 9966 2013-07-26 14:21:53Z NicholasCross $ 
  4  """ 
  5     Utilities for calculating properties from file provenance. 
  6   
  7     @author: N.J.G. Cross 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  from __future__      import division, print_function 
 12  from future_builtins import map 
 13   
 14  from   collections   import namedtuple 
 15  import os 
 16   
 17  import wsatools.DbConnect.DbConstants   as dbc 
 18  from   wsatools.DbConnect.DbSession import Join 
 19  import wsatools.FitsUtils               as fits 
 20  from   wsatools.Logger              import Logger 
 21  from   wsatools.SystemConstants     import DepCodes 
 22  import math 
 23  #------------------------------------------------------------------------------ 
 24  #: Details of a component file in a provenance list. 
 25  ProvFile = namedtuple("ProvFile", "name multiframeID type") 
 26   
 27  #------------------------------------------------------------------------------ 
 28   
 30      """ Contains all relevant provenance information of a file. 
 31      """ 
 32      archive = None          #: Connection to database to query. 
 33      combFileName = None     #: Path to top-level file with provenance info. 
 34      frameType = None        #: Base component frame type. 
 35      isPawPrint = False      #: Is the top-level file a paw-print? 
 36      normalFrameInfo = None  #: ? 
 37      normalList = None       #: ? 
 38      sysc = None             #: Initialised SystemConstants for this archive. 
 39   
 40      _frameInfoOfType = None  #: Dictionary of frame info referenced by type. 
 41      _provList = None         #: Complete list of component files (ProvFile). 
 42   
 43      #-------------------------------------------------------------------------- 
 44   
 46          """ 
 47          Queries archive to determine full provenance info of given file. 
 48   
 49          @param archive:  Connection to database to query. 
 50          @type  archive:  DbSession 
 51          @param filePath: Full path to file with provenance is to be determined. 
 52          @type  filePath: str 
 53          @param haveFrameInfo: ? 
 54          @type  haveFrameInfo: bool 
 55   
 56          """ 
 57          self.fileFrameType = fits.determineFrameType(filePath) 
 58          self._frameInfoOfType = {} 
 59          self.archive = archive 
 60          self.sysc = archive.sysc 
 61          self.combFileName = filePath 
 62          self.components = components 
 63          fileName = os.path.basename(filePath) 
 64          isConf = self.sysc.confSuffix in fileName 
 65          self.frameType = 'stackconf' if isConf else 'stack' 
 66          self.isPawPrint = (self.sysc.stackSuffix in fileName 
 67                             and self.sysc.tileSuffix not in fileName 
 68                             and self.sysc.mosaicSuffix not in fileName) 
 69   
 70          # @TODO: Not sure this is the best solution to the problem. 
 71          if not haveFrameInfo: 
 72              self.normalFrameInfo = \ 
 73                      self._queryTypeFrameInfo('conf' if isConf else 'normal') 
 74              self.normalList = [(fInfo.multiframeID, fInfo.fileName) 
 75                                 for fInfo in self.normalFrameInfo] 
 76          else: 
 77              self._provList = getWholeProv(self.archive, self.combFileName, 
 78                                            self.components) 
 79   
 80      #-------------------------------------------------------------------------- 
 81   
 83          """ 
 84          Get the top level provenance files. 
 85   
 86          @param combiframeID: multiframeID of main file. 
 87          @type  combiframeID: int 
 88   
 89          """ 
 90          if self.components: 
 91              return [ProvFile(comp.imageName, comp.multiframeID, 
 92                              fits.determineFrameType(comp.imageName)) 
 93                              for comp in self.components] 
 94          # TODO: This is not quite correct - if there is a mixture of 
 95          # files on dev disks and main disks 
 96          combiframeID = combiframeID or self.archive.query("multiframeID", 
 97              fromStr="Multiframe", 
 98             whereStr="fileName LIKE '%%%s'" % self.combFileName, 
 99            firstOnly=True) 
100   
101          if combiframeID: 
102              components = self.archive.query( 
103                selectStr="fileName, Multiframe.multiframeID, frameType", 
104                  fromStr=Join(["Provenance", "Multiframe"], "multiframeID"), 
105                 whereStr="combiframeID=%s" % combiframeID) 
106   
107              return [ProvFile(fits.stripServer(comp.fileName), 
108                               comp.multiframeID, 
109                               comp.frameType) for comp in components] 
110   
111          return getFileProv(self.archive, self.combFileName) 
112   
113      #-------------------------------------------------------------------------- 
114   
116          """ 
117          """ 
118          if finalFrameType in self._frameInfoOfType: 
119              return self._frameInfoOfType[finalFrameType] 
120   
121          if not self._provList: 
122              self._provList = getWholeProv(self.archive, self.combFileName, 
123                                            self.components) 
124          # @FIXME: if too many frames > 100 split SQL selections into parts. 
125          # Why? 
126          nGroups = int(math.ceil(len(self._provList) / 100)) 
127          self._frameInfoOfType[finalFrameType] = [] 
128          for grpNo in range(nGroups): 
129              provList = [pv for ii, pv in enumerate(self._provList) 
130                          if ii % nGroups == grpNo] 
131   
132              typeMfIDs = ','.join(str(frame.multiframeID) for frame in provList 
133                                   if frame.type == finalFrameType) 
134   
135              if not typeMfIDs: 
136                  self._frameInfoOfType[finalFrameType] = [] 
137                  #Logger.addMessage("<WARNING> No provenance found for " 
138                  #                  + self.combFileName, alwaysLog=False) 
139   
140              elif self.sysc.isVSA(): 
141                  self._frameInfoOfType[finalFrameType] += self.archive.query( 
142                    selectStr="Multiframe.multiframeID, mjdObs, detDit, detNdit, " 
143                              "origfile, obsID, njitter, dateObs, sadtSurveyID, " 
144                              "fileName", 
145                      fromStr=Join(["Multiframe", "MultiframeEsoKeys"], 
146                                   "multiframeID"), 
147                     whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs) 
148   
149              elif self.sysc.isWSA(): 
150   
151                  self._frameInfoOfType[finalFrameType] += self.archive.query( 
152                    selectStr="multiframeID, mjdObs, expTime, msbID, njitter" 
153                              ", nustep, dateObs, fileName", 
154                      fromStr="Multiframe", 
155                     whereStr="multiframeID IN (%s)" % typeMfIDs) 
156              elif self.sysc.isOSA(): 
157                  # @TODO: Are these correct? 
158                  self._frameInfoOfType[finalFrameType] += self.archive.query( 
159                    selectStr="Multiframe.multiframeID, mjdObs, expTime, max(nCombine) as nCombine, " 
160                              "origfile, obsID, dateObs, sadtSurveyID, " 
161                              "fileName", 
162                      fromStr=Join(["Multiframe", "MultiframeEsoKeys, MultiframeDetector"], 
163                                   "multiframeID"), 
164                     whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs, 
165                      groupBy="Multiframe.multiframeID, mjdObs, expTime," 
166                              "origfile, obsID, dateObs, sadtSurveyID, fileName") 
167   
168          return self._frameInfoOfType[finalFrameType] 
169   
170      #-------------------------------------------------------------------------- 
171   
173          """ 
174          """ 
175          # @TODO: May have to do for OSA 
176          if not self.sysc.isVSA(): 
177              return 
178   
179          goodNames = set(fInfo.sadtSurveyID for fInfo in self.normalFrameInfo 
180                          if fInfo.sadtSurveyID != dbc.charDefault()) 
181   
182          if len(goodNames) is 1: 
183              return goodNames.pop() 
184   
185      #-------------------------------------------------------------------------- 
186   
188          """ 
189          """ 
190          #@TODO: do we want max or avg? 
191          if self.isPawPrint: 
192              return 
193   
194          if self.normalList: 
195              if self.sysc.isVSA(): 
196                  totExpTime = sum([(nfi.detDit * nfi.detNdit) for nfi in self.normalFrameInfo]) 
197              if self.sysc.isWSA(): 
198                  totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo]) 
199              if self.sysc.isOSA(): 
200                  totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo]) 
201          else: 
202              components = self.getPrevLayer(multiframeID) 
203              totExpTime = 0 
204              for filePath, mfID, _fType in components: 
205                  if mfID > 0:  # Is in archive, so obtain from there 
206                      totExpTime += self.archive.queryAttrMax("totalExpTime", 
207                        table="MultiframeDetector", 
208                        where="multiframeID=%s" % mfID) or 0 
209                  else: 
210                      tExpTime = 0 
211                      # @FIXME: This for-loop needs a break/else - if last extension 
212                      #         is the preferred one then reverse the loop. 
213                      for hdu in fits.open(filePath): 
214                          if 'TEXPTIME' in hdu.header: 
215                              tExpTime = max(hdu.header['TEXPTIME'], tExpTime) 
216   
217                      totExpTime += tExpTime 
218   
219          return totExpTime 
220   
221      #-------------------------------------------------------------------------- 
222   
224          """ 
225          """ 
226          if not self.isPawPrint: 
227              return 
228          components = self.getPrevLayer(multiframeID) 
229   
230          mfIDs = [mfID for _filePath, mfID, _fType in components] 
231          return dict(self.archive.query("extNum,SUM(totalExpTime)", 
232              "MultiframeDetector", "multiframeID in (%s) group by extNum" % 
233              ','.join(map(str, mfIDs)))) 
234   
235      #-------------------------------------------------------------------------- 
236   
237   
238   
240          """ 
241          """ 
242          if self.normalFrameInfo: 
243              return min([nfi.mjdObs for nfi in self.normalFrameInfo]) 
244   
245      #-------------------------------------------------------------------------- 
246   
248          """ 
249          """ 
250          # extNum = [2, 17]; FITS CAMNUM = [1, 16] 
251          if not (self.isPawPrint 
252                  and extNum in fits.getExtList(self.combFileName)): 
253   
254              return dbc.realDefault() 
255   
256          nonDeps = [DepCodes.nonDep, DepCodes.deepStackOnly, 
257                     DepCodes.intermitDet] 
258   
259          meanMjd = 0 
260          weight = 0 
261          for mfID, _mjdObs in normalList: 
262              if depInfoDict[(mfID, extNum)] in nonDeps: 
263                  meanMjd += frameInfoDict[mfID].mjdObs + \ 
264                             (0.5 * frameInfoDict[mfID].detNDit * 
265                              frameInfoDict[mfID].detDit / (24 * 3600)) 
266                  weight += 1 
267   
268          if weight > 0: 
269              return meanMjd / weight 
270   
271          return dbc.realDefault() 
272   
273      #-------------------------------------------------------------------------- 
274   
276          """ 
277          """ 
278          normalMfIDs = [mfID for mfID, _fName in self.normalList] 
279          if not (self.isPawPrint 
280                  and extNum in fits.getExtList(self.combFileName)): 
281              return dbc.realDefault() 
282   
283          depInfoDict = dict(self.archive.query( 
284              selectStr="multiframeID,deprecated", 
285              fromStr="MultiframeDetector", 
286              whereStr="multiframeID in (%s) and extNum=%s" 
287              % (','.join(map(str, normalMfIDs)), extNum))) 
288   
289          meanMjd = 0 
290          weight = 0 
291          for fInfo in self.normalFrameInfo: 
292              if depInfoDict[fInfo.multiframeID] in [0, 50, 51] and self.sysc.isVSA(): 
293                  meanMjd += fInfo.mjdObs + (0.5 * fInfo.detNdit * fInfo.detDit / (24 * 3600)) 
294                  weight += 1 
295              elif depInfoDict[fInfo.multiframeID] in [0] and self.sysc.isWSA(): 
296                  meanMjd += fInfo.mjdObs + (0.5 * fInfo.expTime / (24 * 3600)) 
297                  weight += 1 
298   
299          return meanMjd / weight 
300   
301      #-------------------------------------------------------------------------- 
302   
304          """ 
305          """ 
306          if self.normalFrameInfo: 
307              return min([nfi.dateObs for nfi in self.normalFrameInfo]) 
308   
309      #-------------------------------------------------------------------------- 
310   
312          """ 
313          """ 
314          if self.normalFrameInfo: 
315              if self.sysc.isVSA(): 
316                  return max([nfi.mjdObs + (nfi.detDit * nfi.detNdit / (24 * 3600)) 
317                   for nfi in self.normalFrameInfo]) 
318   
319              elif self.sysc.isWSA() or self.sysc.isOSA(): 
320                  return max([nfi.mjdObs + (nfi.expTime / (24 * 3600)) 
321                             for nfi in self.normalFrameInfo]) 
322   
323      #-------------------------------------------------------------------------- 
324   
326          """ 
327          """ 
328          if self.sysc.isVSA(): 
329              return set(fInfo.obsID for fInfo in self.normalFrameInfo) 
330   
331          elif self.sysc.isWSA(): 
332              return set(fInfo.msbID for fInfo in self.normalFrameInfo) 
333   
334      #-------------------------------------------------------------------------- 
335   
341   
342      #-------------------------------------------------------------------------- 
343   
345          """ 
346          """ 
347          if self.sysc.isVSA(): 
348              return [fInfo.origfile for fInfo in self.normalFrameInfo] 
349   
350          elif self.sysc.isWSA(): 
351              return [fName for _mfiD, fName in self.normalList] 
352   
353      #-------------------------------------------------------------------------- 
354   
359   
360      #-------------------------------------------------------------------------- 
361   
363          """ 
364          """ 
365          if self.fileFrameType == self.frameType: 
366              return True 
367          stackFrameInfo = self._queryTypeFrameInfo(self.frameType) 
368          jitters = set(frame.njitter for frame in stackFrameInfo) 
369          return len(jitters) is 1 
370   
371  #------------------------------------------------------------------------------ 
372   
374      """ Works out whole provenance for file. 
375      """ 
376      finalFrameType = 'conf' if 'conf' in fileName else 'normal' 
377      frameType = fits.determineFrameType(fileName) 
378      if components: 
379          inputList = [ProvFile(comp.imageName, comp.multiframeID, 
380                                fits.determineFrameType(comp.imageName)) 
381                       for comp in components] 
382      else: 
383          inputList = [ProvFile(fileName, dbc.intDefault(), frameType)] 
384      fullList = inputList 
385      isComplete = False 
386      while not isComplete: 
387          isComplete = True 
388          outputList = [] 
389          for filePath, mfID, fType in inputList: 
390              isInArchive = mfID > 0 \ 
391                or db.queryEntriesExist("Multiframe", 
392                                        "fileName LIKE '%%%s'" % filePath) 
393              if fType != finalFrameType: 
394                  isComplete = False 
395                  if isInArchive: 
396                      whereStr = ("mf2.multiframeID=%d" % mfID if mfID > 0 else 
397                                "mf2.fileName LIKE '%%%s'" % filePath) 
398                      results = db.query( 
399                        selectStr="mf1.fileName, mf1.multiframeID, mf1.frameType", 
400                          fromStr="Multiframe AS mf1, Provenance AS p" 
401                                  ", Multiframe AS mf2", 
402                          whereStr=whereStr 
403                                  + " AND mf2.multiframeID=p.combiframeID" 
404                                   " AND mf1.multiframeID=p.multiframeID") 
405   
406                      # @TODO: Stripping is not required for VSA 
407                      #        & the WSA will be fixed 
408                      components = [ProvFile(fits.stripServer(comp.fileName), 
409                                             comp.multiframeID, comp.frameType) 
410                                    for comp in results] 
411                  else: 
412                      components = getFileProv(db, filePath) 
413   
414                  outputList += components 
415          fullList += outputList 
416          inputList = outputList 
417   
418      # remove initial file from list 
419      return [provFile for provFile in fullList if provFile.name != fileName] 
420   
421  #------------------------------------------------------------------------------ 
422   
424      """ 
425      """ 
426      provSet = set() 
427      for hdu in fits.open(filePath): 
428          provSet.update( 
429            hdu.header[key].split(db.sysc.mefType)[0] + db.sysc.mefType 
430            for key in hdu.header if 'PROV' in key and key != 'PROV0000') 
431      frameType = fits.determineFrameType(filePath) 
432   
433      # @TODO: if not realrun, some components may be on main disks and in archive already 
434      # If disks down  
435      if 'tile' in frameType: 
436          # @TODO: frameType incorrect - for layer above 
437          files = [(provName, fits.findFileOnDisks(db.sysc, provName, isDev=not db.isRealRun)) 
438           for provName in provSet] 
439          for pName, fileName in files: 
440              if not fileName: 
441                  Logger.addMessage("Cannot find %s on disk" % pName) 
442   
443          return [ProvFile(fileName, dbc.intDefault(), fits.determineFrameType(fileName)) 
444                  for _pName, fileName in files] 
445      else: 
446          # @TODO: Has more to be done? 
447          # @NOTE: Doesn't work in the case of components that are previous deeps - deprecated with 255... 
448          files = [db.query("fileName as name,multiframeID,frameType", "Multiframe", 
449                          "fileName like '%%%s' and (%s or (frameType like " 
450                          "'deep%%stack' and deprecated=255))" 
451                          % (provName, DepCodes.selectNonDeprecated), 
452                          firstOnly=True) 
453                  for provName in provSet] 
454          return [ProvFile(fName, mfID, fType) for fName, mfID, fType in files] 
455   
456  #------------------------------------------------------------------------------ 
457   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Mon Sep 8 15:47:17 2014 | http://epydoc.sourceforge.net |