Package wsatools :: Module ExternalProductConverter
[hide private]

Source Code for Module wsatools.ExternalProductConverter

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: ExternalProductConverter.py 9680 2013-01-25 12:37:14Z NicholasCross $ 
  4  """ 
  5     Edits a provenance file produced by Nottingham to include all the 
  6     information needed for CU13. 
  7   
  8     @author: N.J. Cross 
  9     @org:    WFAU, IfA, University of Edinburgh 
 10   
 11     @newfield contributors: Contributors, Contributors (Alphabetical Order) 
 12     @contributors: R.S. Collins 
 13  """ 
 14  #------------------------------------------------------------------------------ 
 15  from   collections import defaultdict 
 16  import os 
 17  from   os import path 
 18  import sys 
 19  import vo.table as vot 
 20  import math 
 21  from   operator    import itemgetter 
 22  import shutil 
 23   
 24  import wsatools.DataFactory             as df 
 25  from   wsatools.DbConnect.DbSession import Join 
 26  import wsatools.DbConnect.DbConstants   as dbc 
 27  import wsatools.FitsUtils               as fits 
 28  from   wsatools.Logger              import Logger 
 29  import wsatools.Utilities               as utils 
 30  import wsatools.Astrometry              as astro 
 31  import wsatools.Statistics              as stats 
 32  #------------------------------------------------------------------------------ 
 33   
34 -class ExternalProduct(object):
35 """ Prepares a provenance list based on the Nottingham list or 36 Swarp XML VO Table. 37 @TODO: May have to update for OSA mosaics 38 """ 39 comment = "Updating mosaic file provenance" 40 #releaseNum = '8' #: Mosaic release version number 41 splitCharDict = {'VIDEO':'_', 'UDS':'-'} 42 isVoTable = False 43 #-------------------------------------------------------------------------- 44
45 - def __init__(self, archive, externalProdData, progTable, dateRange):
46 self.archive = archive 47 self.sysc = archive.sysc 48 self.extD = externalProdData 49 self.directory = path.join(self.sysc.swarpDir, self.extD.directory) 50 self.programmeID = externalProdData.programmeID 51 self.programme = progTable 52 self.dateRange = dateRange
53 54 #-------------------------------------------------------------------------- 55
56 - def getProductList(self, Mosaic):
57 """ Create product list 58 """ 59 60 splitChar = self.splitCharDict[self.programme.getAcronym().upper()] 61 62 Logger.addMessage("Querying database for programme info") 63 # Get details from ExternalProduct table 64 65 Logger.addMessage("Preparing mosaics and provenance lists for CU13 in " 66 + self.directory) 67 68 filterTable = df.Table("Filter", self.archive) 69 70 possFilterNames = self.archive.query("shortName", "Filter") 71 wcsAttr = ["CRPIX1", "CRPIX2", "CRVAL1", "CRVAL2", "CD1_1", "CD1_2", 72 "CD2_1", "CD2_2", "PV2_1", "PV2_2", "PV2_3", "PV2_4", 73 "PV2_5"] 74 75 filtIndices = [] 76 77 for imagePath in self.findMosaics(): 78 # Get data from header... 79 # Get ra,dec,pixelScale,raExt,decExt,posAngle 80 parts = os.path.basename(imagePath).split('.')[0].split(splitChar) 81 filtIndices += [ii for ii, part in enumerate(parts) 82 if part in possFilterNames] 83 bestFiltIndexCount = sorted(stats.listCounts(filtIndices, 1), key=itemgetter(1)) 84 if bestFiltIndexCount: 85 bestFiltIndex = bestFiltIndexCount[0][0] 86 87 products = [] 88 for imagePath in self.findMosaics(): 89 filterName = os.path.basename(imagePath).split(splitChar)[bestFiltIndex].split('.', 1)[0] 90 filterTable.setCurRow(shortName=filterName) 91 filterID = filterTable.getAttr("filterID") 92 weightImagePath = fits.getSwarpConfMap(imagePath) 93 self.updateFileFilterName((imagePath, weightImagePath), filterName) 94 pixelScale = fits.getPixelScale(imagePath) 95 nuS = 1 96 if self.sysc.isWSA(): 97 nuS = int((round(self.sysc.instrumentPixScale / pixelScale)) ** 2) 98 99 projection = 'TAN' 100 101 # Read in astrometry, use astro path 102 hdulist = fits.open(imagePath) 103 for hdu in hdulist: 104 if hdu.header['NAXIS'] == 2: 105 naxis1 = hdu.header['NAXIS1'] 106 naxis2 = hdu.header['NAXIS2'] 107 wcs = [] 108 for attr in wcsAttr: 109 if attr in hdu.header: 110 wcs.append(hdu.header[attr]) 111 else: 112 wcs.append(dbc.realDefault()) 113 114 hdulist.close() 115 pixels = [(0.5 * (1 + naxis1), 0.5 * (1 + naxis2))] 116 pointingInfo = astro.convPixelToSky(pixels, wcs=tuple(wcs), 117 projection=projection) 118 centre = pointingInfo[0] 119 Logger.addMessage("Mosaic %s has pointing %s %s and filter %s" % 120 (imagePath, centre[0], centre[1], filterID)) 121 skyPos2 = (centre[0], centre[1] + 0.01) 122 skyPosList = [centre, skyPos2] 123 pixels = astro.convSkyToPixel(skyPosList, wcs=wcs, 124 projection=projection) 125 posAngle = math.degrees(math.atan2(pixels[1][1] - pixels[0][1], 126 pixels[1][0] - pixels[0][0])) 127 raExt = naxis1 * pixelScale / 3600. 128 decExt = naxis2 * pixelScale / 3600. 129 products.append((imagePath, math.radians(centre[0]), 130 math.radians(centre[1]), 131 filterID, raExt, decExt, pixelScale, posAngle, nuS)) 132 133 # Group files by pointing 134 currCentres = [] 135 if products: currCentres = [(products[0][1], products[0][2])] 136 extProducts = [] 137 for prod in products: 138 match = False 139 for centre in currCentres: 140 if (math.degrees(astro.angularSep(centre, (prod[1], prod[2]))) 141 < self.sysc.maxRaDecExtentStackDeg): 142 extProducts.append((prod, centre)) 143 match = True 144 if not match: 145 currCentres.append((prod[1], prod[2])) 146 extProducts.append((prod, currCentres[-1])) 147 mosaicGroups = [] 148 for centre in currCentres: 149 group = [] 150 for prod, pos in extProducts: 151 if centre[0] == pos[0] and centre[1] == pos[1]: 152 group.append(prod) 153 ordGrp = sorted(group, key=itemgetter(3)) 154 mosaicGroups.append((centre, ordGrp)) 155 156 #Check that there are no crazies -- duplicate filters at same pointing 157 prodID = 1 158 productList = [] 159 self.productDict = {} 160 self.pointingDict = {} 161 allFilters = set() 162 self.posData = [] 163 self.prodLinks = [] 164 for fieldID, (centre, group) in enumerate(mosaicGroups): 165 self.posData.append((fieldID, math.degrees(centre[0]), 166 math.degrees(centre[1]), dbc.realDefault())) 167 filters = [filterID for _pFiles, _ra, _dec, filterID, _raExt, 168 _decExt, _pSc, _pAng, _nuS in group] 169 170 if len(set(filters)) < len(filters): 171 Logger.addMessage("<WARNING> Duplicate filters in pointing: " 172 "%s %s" % (math.degrees(centre[0]), math.degrees(centre[1]))) 173 174 for iPath, _ra, _dec, filterID, raExt, decExt, pSc, pAng, nuS in group: 175 allFilters.add(filterID) 176 "productID, fieldID, filterID, nuStep, pixelSize, ra, dec, " \ 177 "raExtent, decExtent, posAngle" 178 productList.append(Mosaic(prodID, fieldID, filterID, nuS, pSc, 179 math.degrees(centre[0]), 180 math.degrees(centre[1]), 181 raExt, decExt, pAng)) 182 self.prodLinks.append((prodID, dbc.intDefault(), 183 "mosaic", "mosaic")) 184 self.productDict[iPath] = prodID 185 self.pointingDict[prodID] = (fieldID, centre, filterID) 186 prodID += 1 187 188 self.filtData = [(filterID, 1, 0) for filterID in sorted(allFilters)] 189 190 return productList
191 192 #-------------------------------------------------------------------------- 193
194 - def getFiltData(self):
195 """ 196 """ 197 return self.filtData
198 199 #-------------------------------------------------------------------------- 200
201 - def getPosData(self):
202 """ 203 """ 204 return self.posData
205 206 #-------------------------------------------------------------------------- 207 212 213 #-------------------------------------------------------------------------- 214
215 - def prepareProvList(self):
216 """ Update the mosaic files and prepare a provenance list of CU13. 217 """ 218 219 # TODO --- do for each pointing, filter... 220 221 provList = [] 222 isFilterDeepDict = dict((filterID, 0) 223 for filterID, _nP, _isS in self.filtData) 224 for imagePath in self.findMosaics(): 225 productID = self.productDict[imagePath] 226 fieldID, _centre, filterID = self.pointingDict[productID] 227 # Parse external provenance list 228 self.isVoTable = fits.isSwarpProvVO(imagePath) 229 self.provType = ('.xml' if self.isVoTable else '.lst') 230 extProvFileName = (imagePath.split('.', 1)[0] + self.provType) 231 self.fixVoTableVersion(extProvFileName) 232 self.imageName = os.path.basename(imagePath) 233 extOfFile, isReprocessedFile = self.parseProvList(extProvFileName) 234 235 # Query database for files with this filterID 236 archiveData = self.queryMetadata(filterID) 237 238 # Prepare provenance list 239 if len(extOfFile) > 1: 240 isFilterDeepDict[filterID] = 1 241 for fileName in sorted(extOfFile): 242 try: 243 possMatches = archiveData[fileName] 244 except KeyError: 245 Logger.addMessage("<WARNING> %s is not in the database" 246 % fileName) 247 else: 248 # Find best file: most recent version 249 # Or undeprecated version 250 if len(possMatches) == 1: 251 arcd = possMatches[0] 252 else: 253 pArcd = [match for match in possMatches if match.deprecated < 128] 254 if len(pArcd) == 1: 255 arcd = pArcd[0] 256 elif len(pArcd) == 0: 257 arcd = max(possMatches, key=getVersionNum) 258 else: 259 arcd = max(pArcd, key=getVersionNum) 260 Logger.addMessage("Too many good matches, one should be deprecated") 261 print(possMatches) 262 exit() 263 # Get rid of normal space in numberRange 264 eNums = utils.numberRange(extOfFile[fileName], sep=",") 265 dateEnd = formatDate(arcd.dateEnd if self.sysc.isWSA() else 266 arcd.dateObs) 267 268 entry = [fieldID, filterID, arcd.fileName, 269 formatDate(arcd.utDate), formatDate(arcd.dateObs), 270 dateEnd, arcd.mjdObs, arcd.raBase, arcd.decBase, 271 arcd.deprecated, eNums, arcd.tExpTime, 272 int(isReprocessedFile[fileName])] 273 274 provList.append(' '.join(map(str, entry)) + '\n') 275 276 # Save completed provenance list 277 outputFileName = path.join(self.directory, 'prov%s.dat' % 278 self.extD.directory) 279 280 Logger.addMessage("Writing provenance list to " + outputFileName) 281 if not self.archive.isTrialRun: 282 file(outputFileName, 'w').writelines(provList) 283 else: 284 sys.stdout.writelines(provList) 285 self.filtData = [(filterID, nPass, isFilterDeepDict[filterID]) 286 for filterID, nPass, _isS in self.filtData] 287 288 if max(isS for _filterID, _nPass, isS in self.filtData) > 0: 289 isDeep = True 290 291 return isDeep
292 293 #-------------------------------------------------------------------------- 294
295 - def renameFiles(self):
296 # Rename files. 297 fileNameRoot = '%s_%s_DR%s_' % (self.programme.getAcronym(), 298 self.extD.directory, self.extD.releaseNum) 299 300 inparPath = [os.path.join(self.directory, fName) 301 for fName in os.listdir(self.directory) 302 if (fName.endswith('.inpar') or fName.endswith('.sex'))] 303 if inparPath: 304 inparPath = inparPath[0] 305 else: 306 inparPath = None 307 renameList = [] 308 isCat = True 309 310 for imagePath in self.findMosaics(): 311 prodID = self.productDict[imagePath] 312 313 newImagePath = os.path.join(self.directory, fileNameRoot + '%s%s' % 314 (prodID, self.sysc.mefType)) 315 renameList.append((imagePath, newImagePath)) 316 weightPath = fits.getSwarpConfMap(imagePath) 317 newWeightPath = os.path.join(self.directory, fileNameRoot + '%s%s%s' % 318 (prodID, self.sysc.confSuffix, 319 self.sysc.mefType)) 320 renameList.append((weightPath, newWeightPath)) 321 provPath = (imagePath.split('.', 1)[0] + self.provType) 322 newProvPath = os.path.join(self.directory, fileNameRoot + '%s%s' % 323 (prodID, self.provType)) 324 renameList.append((provPath, newProvPath)) 325 catPath = fits.getSwarpCat(imagePath) 326 # others (cat,inpar) 327 if catPath: 328 newCatPath = os.path.join(self.directory, fileNameRoot + '%s%s' % 329 (prodID, '.cat')) 330 renameList.append((catPath, newCatPath)) 331 else: 332 isCat = False 333 if not isCat and not inparPath: 334 # Use default UDS style 335 inparPath = self.getDefaultInparFile() 336 # @TODO: should have inparFile - for SExtractor information 337 if inparPath: 338 newInparPath = \ 339 os.path.join(self.directory, fileNameRoot[:-1] + '_detCat.inpar') 340 if not os.path.exists(newInparPath): 341 shutil.copyfile(inparPath, newInparPath) 342 else: 343 Logger.addMessage("No SExtractor .inpar file: no details will go into headers") 344 345 for oldFile, newFile in renameList: 346 Logger.addMessage("Renaming %s to %s" % (oldFile, newFile)) 347 348 # Add header attribute 349 if self.sysc.mefType in oldFile: 350 fitsFile = fits.open(oldFile, 'update') 351 fitsFile[0].header.update('ORIGNAME', oldFile, 352 'Original name of file from %s at %s' 353 % (self.extD.creator, self.extD.institution)) 354 355 fitsFile.close() 356 357 # @TODO: COPY NOT RENAME 358 if not os.path.exists(newFile): 359 shutil.copyfile(oldFile, newFile) 360 #os.rename(oldFile,newFile) 361 362 self.archive.update("ExternalProduct", 363 "fileNameRoot='%s'" % fileNameRoot, 364 where="programmeID=%d and productType='%s' and " 365 "releaseNum=%d" % 366 (self.programmeID, self.extD.productType, 367 self.extD.releaseNum))
368 369 #-------------------------------------------------------------------------- 370
371 - def fixVoTableVersion(self, fileName):
372 """ Adds version into VOTABLE markup aand fix centre data 373 """ 374 lines = file(fileName, 'r').readlines() 375 newlines = [] 376 diff = 0 377 for ii, line in enumerate(lines): 378 if line.startswith("<VOTABLE"): 379 if " version=" not in line: 380 # version number missing 381 versionNo = line.split('http://www.ivoa.net/xml/VOTable/v')[1].split('"')[0] 382 if versionNo: 383 diff += 1 384 line = line[:-2] + ' version="%s">\n' % versionNo 385 if "<PARAM name" in line and "Center" in line and "Center_Type" not in line: 386 # Sort out gaps 387 if 'value=" ' in line: 388 centreVals = line.split('value="')[1].split('" unit')[0].split() 389 begLine = line.split('value="')[0] 390 endLine = line.split('value="')[1].split('" unit')[1] 391 line = begLine + 'value="%s %s" unit' % tuple(centreVals) + endLine 392 diff += 1 393 newlines.append(line) 394 if diff: 395 file(fileName, "w").writelines(newlines)
396
397 - def getDefaultInparFile(self):
398 """ 399 """ 400 return os.path.join(self.sysc.sExtractorInputsPath(), 401 self.sysc.defaultSexInparFile)
402 403 #-------------------------------------------------------------------------- 404
405 - def queryExtProdData(self):
406 """ Get details from ExternalProduct table. 407 """ 408 return self.archive.query("*", 409 fromStr="ExternalProduct", 410 whereStr="programmeID=%s AND releaseNum=%s AND productType='mosaic'" 411 % (self.programmeID, self.releaseNum), 412 firstOnly=True)
413 414 #-------------------------------------------------------------------------- 415
416 - def findMosaics(self):
417 """ Find mosaics. 418 """ 419 Logger.addMessage("Finding mosaics") 420 images = [] 421 for fileName in os.listdir(self.directory): 422 if self.sysc.catType in fileName and 'weight' not in fileName \ 423 and '_p' not in fileName and '_conf' not in fileName \ 424 and 'correction' not in fileName and 'bin' not in fileName: 425 images.append(path.join(self.directory, fileName)) 426 427 return images
428 429 #-------------------------------------------------------------------------- 430
431 - def updateFileFilterName(self, files, filterName):
432 """ Update the filter names in the FITS files. 433 """ 434 mode = "readonly" if self.archive.isTrialRun else "update" 435 for imageFileName in files: 436 Logger.addMessage("Updating header of " 437 + path.basename(imageFileName)) 438 imageFile = fits.open(imageFileName, mode) 439 imageFile[0].header.update("FILTER", filterName, "Filter name") 440 imageFile.close()
441 442 #-------------------------------------------------------------------------- 443
444 - def parseProvList(self, extProvFileName):
445 """ Parse external provenance list. 446 """ 447 Logger.addMessage("Parsing external provenance list: " 448 + path.basename(extProvFileName)) 449 450 extOfFile = defaultdict(set) 451 isReprocessedFile = defaultdict(bool) 452 453 if self.isVoTable: 454 files = None 455 fieldCoords = None 456 votable = vot.parse(extProvFileName) 457 for resource in votable.resources: 458 if containsSameParts(resource.name, self.imageName): 459 for res in resource.resources: 460 if res.name == 'MetaData': 461 for table in res.tables: 462 if table.name == 'Input_Image_Data': 463 data = table.array 464 files = data.field('Image_Name') 465 fieldCoords = data.field('Field_Coordinates') 466 extOfFile, isReprocessedFile = self.getExtNums(files, fieldCoords) 467 468 else: 469 for fileName in file(extProvFileName): 470 extNum = path.basename(fileName).split('.')[0][-1:] 471 parts = path.basename(fileName).split('_' + extNum) 472 actFileName = parts[0] + parts[1].split()[0] 473 if path.splitext(actFileName)[1] != self.sysc.mefType: 474 actFileName = path.splitext(actFileName)[0] + self.sysc.mefType 475 476 extOfFile[actFileName].add(int(extNum)) 477 isReprocessedFile[actFileName] = isReprocessedFile[actFileName] \ 478 or "_rep" in path.dirname(fileName) 479 480 return extOfFile, isReprocessedFile
481 482 #-------------------------------------------------------------------------- 483
484 - def getExtNums(self, files, fieldCoords):
485 """ 486 """ 487 tolerance = self.sysc.maxRaDecExtentStackDeg * 60. 488 archFiles = self.archive.query( 489 selectStr="m.multiframeID,fileName,deprecated", 490 fromStr="Multiframe as m,ProgrammeFrame as p", 491 whereStr="programmeID=%d and p.multiframeID=m.multiframeID and " 492 "frameType like '%%stack'" % self.programmeID) 493 archFNameSet = set(os.path.basename(file.fileName) for file in archFiles) 494 mfIDLookUp = dict((fileName, file.multiframeID) 495 for fileName in archFNameSet for file in archFiles 496 if os.path.basename(file.fileName) == fileName) 497 extOfFile = defaultdict(set) 498 isReprocessedFile = defaultdict(bool) 499 500 for ii, swarpPath in enumerate(files): 501 # Remove 'u' 502 casuName = os.path.basename(swarpPath)[1:] 503 # find most recent 504 if casuName not in extOfFile: 505 extOfFile[casuName] = set() 506 isReprocessedFile[casuName] = False 507 if casuName in mfIDLookUp: 508 mfID = mfIDLookUp[casuName] 509 extNum = self.archive.query( 510 "extNum", "CurrentAstrometry", 511 "multiframeID=%d and dbo.fGreatCircleDist(centralRa," 512 "centralDec,%s,%s)<%s" % (mfID, fieldCoords[ii][0], 513 fieldCoords[ii][1], tolerance), 514 firstOnly=True, 515 default=dbc.tinyIntDefault()) 516 extOfFile[casuName].add(extNum) 517 518 return extOfFile, isReprocessedFile
519 520 #-------------------------------------------------------------------------- 521
522 - def queryMetadata(self, filterID):
523 """ Query database for files with this filterID. 524 """ 525 Logger.addMessage("Querying database metadata for filterID %s" 526 % filterID) 527 528 archiveData = defaultdict(list) 529 tExpTime = ('njitter * nustep * detDit * detNdit' if self.sysc.isVSA() 530 else 'njitter * nustep * expTime') 531 joinTables = (["Multiframe", "ProgrammeFrame", "MultiframeEsoKeys"] 532 if self.sysc.isVSA() else ["Multiframe", "ProgrammeFrame"]) 533 for entry in self.archive.query( 534 selectStr="fileName, utDate, dateObs, " 535 + ("dateEnd, " if self.sysc.isWSA() else "") 536 + "mjdObs, raBase, decBase, filterID, deprecated, " 537 "(%s) AS tExpTime" % tExpTime, 538 fromStr=Join(joinTables, "multiframeID"), 539 whereStr="programmeID=%s AND filterID=%s" 540 % (self.programmeID, filterID) 541 + " AND frameType LIKE '%%stack'" 542 " AND frameType NOT LIKE '%%deep%%'" 543 , orderBy="mjdObs"): 544 545 archiveData[path.basename(entry.fileName)].append(entry) 546 return archiveData
547 548 #------------------------------------------------------------------------------ 549
550 -def formatDate(date):
551 return str(date).replace(' ', 'Z')
552 553 #------------------------------------------------------------------------------ 554
555 -def getVersionNum(x):
556 return path.basename(path.dirname(x.fileName)).split("_v")[-1]
557 558 #------------------------------------------------------------------------------ 559
560 -def getSetOfParts(name, splitNameList):
561 """ 562 """ 563 names = [name] 564 for splt in splitNameList: 565 parts = [] 566 for name in names: 567 parts.extend(name.split(splt)) 568 names = parts 569 570 return set(names)
571 572 #------------------------------------------------------------------------------ 573
574 -def containsSameParts(filePath1, filePath2):
575 """ 576 """ 577 parts1 = getSetOfParts(filePath1, ['_', '.']) 578 parts2 = getSetOfParts(filePath2, ['_', '.']) 579 580 return len(parts1.difference(parts2)) == 0 \ 581 and len(parts2.difference(parts1)) == 0
582 583 #------------------------------------------------------------------------------ 584