| Home | Trees | Indices | Help |
|
|---|
|
|
1 #! /usr/bin/env python
2 #------------------------------------------------------------------------------
3 # $Id: ProvenanceUtils.py 9966 2013-07-26 14:21:53Z NicholasCross $
4 """
5 Utilities for calculating properties from file provenance.
6
7 @author: N.J.G. Cross
8 @org: WFAU, IfA, University of Edinburgh
9 """
10 #------------------------------------------------------------------------------
11 from __future__ import division, print_function
12 from future_builtins import map
13
14 from collections import namedtuple
15 import os
16
17 import wsatools.DbConnect.DbConstants as dbc
18 from wsatools.DbConnect.DbSession import Join
19 import wsatools.FitsUtils as fits
20 from wsatools.Logger import Logger
21 from wsatools.SystemConstants import DepCodes
22 import math
23 #------------------------------------------------------------------------------
24 #: Details of a component file in a provenance list.
25 ProvFile = namedtuple("ProvFile", "name multiframeID type")
26
27 #------------------------------------------------------------------------------
28
30 """ Contains all relevant provenance information of a file.
31 """
32 archive = None #: Connection to database to query.
33 combFileName = None #: Path to top-level file with provenance info.
34 frameType = None #: Base component frame type.
35 isPawPrint = False #: Is the top-level file a paw-print?
36 normalFrameInfo = None #: ?
37 normalList = None #: ?
38 sysc = None #: Initialised SystemConstants for this archive.
39
40 _frameInfoOfType = None #: Dictionary of frame info referenced by type.
41 _provList = None #: Complete list of component files (ProvFile).
42
43 #--------------------------------------------------------------------------
44
46 """
47 Queries archive to determine full provenance info of given file.
48
49 @param archive: Connection to database to query.
50 @type archive: DbSession
51 @param filePath: Full path to file with provenance is to be determined.
52 @type filePath: str
53 @param haveFrameInfo: ?
54 @type haveFrameInfo: bool
55
56 """
57 self.fileFrameType = fits.determineFrameType(filePath)
58 self._frameInfoOfType = {}
59 self.archive = archive
60 self.sysc = archive.sysc
61 self.combFileName = filePath
62 self.components = components
63 fileName = os.path.basename(filePath)
64 isConf = self.sysc.confSuffix in fileName
65 self.frameType = 'stackconf' if isConf else 'stack'
66 self.isPawPrint = (self.sysc.stackSuffix in fileName
67 and self.sysc.tileSuffix not in fileName
68 and self.sysc.mosaicSuffix not in fileName)
69
70 # @TODO: Not sure this is the best solution to the problem.
71 if not haveFrameInfo:
72 self.normalFrameInfo = \
73 self._queryTypeFrameInfo('conf' if isConf else 'normal')
74 self.normalList = [(fInfo.multiframeID, fInfo.fileName)
75 for fInfo in self.normalFrameInfo]
76 else:
77 self._provList = getWholeProv(self.archive, self.combFileName,
78 self.components)
79
80 #--------------------------------------------------------------------------
81
83 """
84 Get the top level provenance files.
85
86 @param combiframeID: multiframeID of main file.
87 @type combiframeID: int
88
89 """
90 if self.components:
91 return [ProvFile(comp.imageName, comp.multiframeID,
92 fits.determineFrameType(comp.imageName))
93 for comp in self.components]
94 # TODO: This is not quite correct - if there is a mixture of
95 # files on dev disks and main disks
96 combiframeID = combiframeID or self.archive.query("multiframeID",
97 fromStr="Multiframe",
98 whereStr="fileName LIKE '%%%s'" % self.combFileName,
99 firstOnly=True)
100
101 if combiframeID:
102 components = self.archive.query(
103 selectStr="fileName, Multiframe.multiframeID, frameType",
104 fromStr=Join(["Provenance", "Multiframe"], "multiframeID"),
105 whereStr="combiframeID=%s" % combiframeID)
106
107 return [ProvFile(fits.stripServer(comp.fileName),
108 comp.multiframeID,
109 comp.frameType) for comp in components]
110
111 return getFileProv(self.archive, self.combFileName)
112
113 #--------------------------------------------------------------------------
114
116 """
117 """
118 if finalFrameType in self._frameInfoOfType:
119 return self._frameInfoOfType[finalFrameType]
120
121 if not self._provList:
122 self._provList = getWholeProv(self.archive, self.combFileName,
123 self.components)
124 # @FIXME: if too many frames > 100 split SQL selections into parts.
125 # Why?
126 nGroups = int(math.ceil(len(self._provList) / 100))
127 self._frameInfoOfType[finalFrameType] = []
128 for grpNo in range(nGroups):
129 provList = [pv for ii, pv in enumerate(self._provList)
130 if ii % nGroups == grpNo]
131
132 typeMfIDs = ','.join(str(frame.multiframeID) for frame in provList
133 if frame.type == finalFrameType)
134
135 if not typeMfIDs:
136 self._frameInfoOfType[finalFrameType] = []
137 #Logger.addMessage("<WARNING> No provenance found for "
138 # + self.combFileName, alwaysLog=False)
139
140 elif self.sysc.isVSA():
141 self._frameInfoOfType[finalFrameType] += self.archive.query(
142 selectStr="Multiframe.multiframeID, mjdObs, detDit, detNdit, "
143 "origfile, obsID, njitter, dateObs, sadtSurveyID, "
144 "fileName",
145 fromStr=Join(["Multiframe", "MultiframeEsoKeys"],
146 "multiframeID"),
147 whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs)
148
149 elif self.sysc.isWSA():
150
151 self._frameInfoOfType[finalFrameType] += self.archive.query(
152 selectStr="multiframeID, mjdObs, expTime, msbID, njitter"
153 ", nustep, dateObs, fileName",
154 fromStr="Multiframe",
155 whereStr="multiframeID IN (%s)" % typeMfIDs)
156 elif self.sysc.isOSA():
157 # @TODO: Are these correct?
158 self._frameInfoOfType[finalFrameType] += self.archive.query(
159 selectStr="Multiframe.multiframeID, mjdObs, expTime, max(nCombine) as nCombine, "
160 "origfile, obsID, dateObs, sadtSurveyID, "
161 "fileName",
162 fromStr=Join(["Multiframe", "MultiframeEsoKeys, MultiframeDetector"],
163 "multiframeID"),
164 whereStr="Multiframe.multiframeID IN (%s)" % typeMfIDs,
165 groupBy="Multiframe.multiframeID, mjdObs, expTime,"
166 "origfile, obsID, dateObs, sadtSurveyID, fileName")
167
168 return self._frameInfoOfType[finalFrameType]
169
170 #--------------------------------------------------------------------------
171
173 """
174 """
175 # @TODO: May have to do for OSA
176 if not self.sysc.isVSA():
177 return
178
179 goodNames = set(fInfo.sadtSurveyID for fInfo in self.normalFrameInfo
180 if fInfo.sadtSurveyID != dbc.charDefault())
181
182 if len(goodNames) is 1:
183 return goodNames.pop()
184
185 #--------------------------------------------------------------------------
186
188 """
189 """
190 #@TODO: do we want max or avg?
191 if self.isPawPrint:
192 return
193
194 if self.normalList:
195 if self.sysc.isVSA():
196 totExpTime = sum([(nfi.detDit * nfi.detNdit) for nfi in self.normalFrameInfo])
197 if self.sysc.isWSA():
198 totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo])
199 if self.sysc.isOSA():
200 totExpTime = sum([(nfi.expTime) for nfi in self.normalFrameInfo])
201 else:
202 components = self.getPrevLayer(multiframeID)
203 totExpTime = 0
204 for filePath, mfID, _fType in components:
205 if mfID > 0: # Is in archive, so obtain from there
206 totExpTime += self.archive.queryAttrMax("totalExpTime",
207 table="MultiframeDetector",
208 where="multiframeID=%s" % mfID) or 0
209 else:
210 tExpTime = 0
211 # @FIXME: This for-loop needs a break/else - if last extension
212 # is the preferred one then reverse the loop.
213 for hdu in fits.open(filePath):
214 if 'TEXPTIME' in hdu.header:
215 tExpTime = max(hdu.header['TEXPTIME'], tExpTime)
216
217 totExpTime += tExpTime
218
219 return totExpTime
220
221 #--------------------------------------------------------------------------
222
224 """
225 """
226 if not self.isPawPrint:
227 return
228 components = self.getPrevLayer(multiframeID)
229
230 mfIDs = [mfID for _filePath, mfID, _fType in components]
231 return dict(self.archive.query("extNum,SUM(totalExpTime)",
232 "MultiframeDetector", "multiframeID in (%s) group by extNum" %
233 ','.join(map(str, mfIDs))))
234
235 #--------------------------------------------------------------------------
236
237
238
240 """
241 """
242 if self.normalFrameInfo:
243 return min([nfi.mjdObs for nfi in self.normalFrameInfo])
244
245 #--------------------------------------------------------------------------
246
248 """
249 """
250 # extNum = [2, 17]; FITS CAMNUM = [1, 16]
251 if not (self.isPawPrint
252 and extNum in fits.getExtList(self.combFileName)):
253
254 return dbc.realDefault()
255
256 nonDeps = [DepCodes.nonDep, DepCodes.deepStackOnly,
257 DepCodes.intermitDet]
258
259 meanMjd = 0
260 weight = 0
261 for mfID, _mjdObs in normalList:
262 if depInfoDict[(mfID, extNum)] in nonDeps:
263 meanMjd += frameInfoDict[mfID].mjdObs + \
264 (0.5 * frameInfoDict[mfID].detNDit *
265 frameInfoDict[mfID].detDit / (24 * 3600))
266 weight += 1
267
268 if weight > 0:
269 return meanMjd / weight
270
271 return dbc.realDefault()
272
273 #--------------------------------------------------------------------------
274
276 """
277 """
278 normalMfIDs = [mfID for mfID, _fName in self.normalList]
279 if not (self.isPawPrint
280 and extNum in fits.getExtList(self.combFileName)):
281 return dbc.realDefault()
282
283 depInfoDict = dict(self.archive.query(
284 selectStr="multiframeID,deprecated",
285 fromStr="MultiframeDetector",
286 whereStr="multiframeID in (%s) and extNum=%s"
287 % (','.join(map(str, normalMfIDs)), extNum)))
288
289 meanMjd = 0
290 weight = 0
291 for fInfo in self.normalFrameInfo:
292 if depInfoDict[fInfo.multiframeID] in [0, 50, 51] and self.sysc.isVSA():
293 meanMjd += fInfo.mjdObs + (0.5 * fInfo.detNdit * fInfo.detDit / (24 * 3600))
294 weight += 1
295 elif depInfoDict[fInfo.multiframeID] in [0] and self.sysc.isWSA():
296 meanMjd += fInfo.mjdObs + (0.5 * fInfo.expTime / (24 * 3600))
297 weight += 1
298
299 return meanMjd / weight
300
301 #--------------------------------------------------------------------------
302
304 """
305 """
306 if self.normalFrameInfo:
307 return min([nfi.dateObs for nfi in self.normalFrameInfo])
308
309 #--------------------------------------------------------------------------
310
312 """
313 """
314 if self.normalFrameInfo:
315 if self.sysc.isVSA():
316 return max([nfi.mjdObs + (nfi.detDit * nfi.detNdit / (24 * 3600))
317 for nfi in self.normalFrameInfo])
318
319 elif self.sysc.isWSA() or self.sysc.isOSA():
320 return max([nfi.mjdObs + (nfi.expTime / (24 * 3600))
321 for nfi in self.normalFrameInfo])
322
323 #--------------------------------------------------------------------------
324
326 """
327 """
328 if self.sysc.isVSA():
329 return set(fInfo.obsID for fInfo in self.normalFrameInfo)
330
331 elif self.sysc.isWSA():
332 return set(fInfo.msbID for fInfo in self.normalFrameInfo)
333
334 #--------------------------------------------------------------------------
335
341
342 #--------------------------------------------------------------------------
343
345 """
346 """
347 if self.sysc.isVSA():
348 return [fInfo.origfile for fInfo in self.normalFrameInfo]
349
350 elif self.sysc.isWSA():
351 return [fName for _mfiD, fName in self.normalList]
352
353 #--------------------------------------------------------------------------
354
359
360 #--------------------------------------------------------------------------
361
363 """
364 """
365 if self.fileFrameType == self.frameType:
366 return True
367 stackFrameInfo = self._queryTypeFrameInfo(self.frameType)
368 jitters = set(frame.njitter for frame in stackFrameInfo)
369 return len(jitters) is 1
370
371 #------------------------------------------------------------------------------
372
374 """ Works out whole provenance for file.
375 """
376 finalFrameType = 'conf' if 'conf' in fileName else 'normal'
377 frameType = fits.determineFrameType(fileName)
378 if components:
379 inputList = [ProvFile(comp.imageName, comp.multiframeID,
380 fits.determineFrameType(comp.imageName))
381 for comp in components]
382 else:
383 inputList = [ProvFile(fileName, dbc.intDefault(), frameType)]
384 fullList = inputList
385 isComplete = False
386 while not isComplete:
387 isComplete = True
388 outputList = []
389 for filePath, mfID, fType in inputList:
390 isInArchive = mfID > 0 \
391 or db.queryEntriesExist("Multiframe",
392 "fileName LIKE '%%%s'" % filePath)
393 if fType != finalFrameType:
394 isComplete = False
395 if isInArchive:
396 whereStr = ("mf2.multiframeID=%d" % mfID if mfID > 0 else
397 "mf2.fileName LIKE '%%%s'" % filePath)
398 results = db.query(
399 selectStr="mf1.fileName, mf1.multiframeID, mf1.frameType",
400 fromStr="Multiframe AS mf1, Provenance AS p"
401 ", Multiframe AS mf2",
402 whereStr=whereStr
403 + " AND mf2.multiframeID=p.combiframeID"
404 " AND mf1.multiframeID=p.multiframeID")
405
406 # @TODO: Stripping is not required for VSA
407 # & the WSA will be fixed
408 components = [ProvFile(fits.stripServer(comp.fileName),
409 comp.multiframeID, comp.frameType)
410 for comp in results]
411 else:
412 components = getFileProv(db, filePath)
413
414 outputList += components
415 fullList += outputList
416 inputList = outputList
417
418 # remove initial file from list
419 return [provFile for provFile in fullList if provFile.name != fileName]
420
421 #------------------------------------------------------------------------------
422
424 """
425 """
426 provSet = set()
427 for hdu in fits.open(filePath):
428 provSet.update(
429 hdu.header[key].split(db.sysc.mefType)[0] + db.sysc.mefType
430 for key in hdu.header if 'PROV' in key and key != 'PROV0000')
431 frameType = fits.determineFrameType(filePath)
432
433 # @TODO: if not realrun, some components may be on main disks and in archive already
434 # If disks down
435 if 'tile' in frameType:
436 # @TODO: frameType incorrect - for layer above
437 files = [(provName, fits.findFileOnDisks(db.sysc, provName, isDev=not db.isRealRun))
438 for provName in provSet]
439 for pName, fileName in files:
440 if not fileName:
441 Logger.addMessage("Cannot find %s on disk" % pName)
442
443 return [ProvFile(fileName, dbc.intDefault(), fits.determineFrameType(fileName))
444 for _pName, fileName in files]
445 else:
446 # @TODO: Has more to be done?
447 # @NOTE: Doesn't work in the case of components that are previous deeps - deprecated with 255...
448 files = [db.query("fileName as name,multiframeID,frameType", "Multiframe",
449 "fileName like '%%%s' and (%s or (frameType like "
450 "'deep%%stack' and deprecated=255))"
451 % (provName, DepCodes.selectNonDeprecated),
452 firstOnly=True)
453 for provName in provSet]
454 return [ProvFile(fName, mfID, fType) for fName, mfID, fType in files]
455
456 #------------------------------------------------------------------------------
457
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Sep 8 15:47:17 2014 | http://epydoc.sourceforge.net |