wsatools.FitsUtils

1 #------------------------------------------------------------------------------ 2 # $Id: FitsUtils.py 10237 2014-03-06 14:26:56Z EckhardSutorius $ 3 """ 4 Tools to work with the VDFS FITS file collection. 5 6 @author: E. Sutorius, N.J.G. Cross 7 @org: WFAU, IfA, University of Edinburgh 8 9 @newfield contributors: Contributors, Contributors (Alphabetical Order) 10 @contributors: I.A. Bond, R.S. Collins 11 12 @requires: PyFITS 13 """ 14 #------------------------------------------------------------------------------ 15 from __future__ import division, print_function 16 17 from collections import defaultdict, namedtuple 18 import dircache 19 from glob import iglob 20 import math 21 import numpy 22 from operator import itemgetter 23 import os 24 import pyfits 25 import re 26 import time 27 28 import wsatools.DbConnect.DbConstants as dbc 29 import wsatools.ExternalProcess as extp 30 from wsatools.File import File 31 from wsatools.Logger import Logger 32 from wsatools.SystemConstants import SystemConstants 33 import wsatools.Utilities as utils 34 #------------------------------------------------------------------------------ 35 # Module variables 36 37 _blockLen = 2880 #: Maximum length of a FITS header block in bytes 38 #------------------------------------------------------------------------------ 39 # Module types 40 41 #: The format of tuples giving file lists. 42 FileList = namedtuple('FileList', 'image conf cat') 43 44 #------------------------------------------------------------------------------ 45

46 -class FitsList(object):

47 """ Generate lists for FITS directories. 48 """ 49 # @TODO: Default value for sysc is for backwards compatibility only.

50 - def __init__(self, sysc=SystemConstants("WSA"), prefix=''):

51 self.sysc = sysc 52 self.prefix = prefix

53 54 #-------------------------------------------------------------------------- 55

56 - def createFitsDateDict(self, disklist=None, 57 ingestDirectory=SystemConstants.fitsDir, 58 trafoLog=None, beginDateStr='19990101', 59 endDateStr='99991231', versionStr=None, 60 forceLists=False):

61 """ 62 Create a dictionary listing all dates for every fits directory 63 in disklist. 64 65 @param beginDateStr: First date of fits date directories. 66 @type beginDateStr: str 67 @param disklist: List of all disks having the ingest directory. 68 @type disklist: list[str] 69 @param endDateStr: Last date of fits date directories. 70 @type endDateStr: str 71 @param forceLists: Return a dictionary of lists. 72 @type forceLists: bool 73 @param ingestDirectory: The directory containing the fits date 74 directories. 75 @type ingestDirectory: str 76 @param trafoLog: The transfer logfile name. 77 @type trafoLog: str 78 @param versionStr: The version of the date directories. 79 @type versionStr: str 80 81 """ 82 disklist = disklist or self.sysc.availableRaidFileSystem() 83 if type(ingestDirectory) != type([]): 84 ingestDirectory = [ingestDirectory] 85 86 self.fitsDateDict = defaultdict(list) 87 if trafoLog: 88 trafoLogDict = self.createTrafoLogDict(trafoLog) 89 if trafoLogDict: 90 for direc in list(trafoLogDict.pop("xferDirs")): 91 trafoDir, datumVersStr = os.path.split(direc) 92 self.fitsDateDict[trafoDir].append(datumVersStr) 93 else: 94 if versionStr: 95 versions = [versionStr] 96 else: 97 allVersions = set() 98 if "products" in ingestDirectory[0]: 99 allVersions.update(map(str, range(1, 100))) 100 for x in self.sysc.obsCal.versNums.values(): 101 allVersions.update(x) 102 103 versions = sorted(allVersions) 104 105 for direc in disklist: 106 for subDir in ingestDirectory: 107 fdir = os.path.join(direc, subDir) 108 if os.path.exists(fdir): 109 listdir = [] 110 for elem in dircache.listdir(fdir): 111 if (elem[:2] == "20" and '_v' in elem 112 and not elem.partition('.')[2].isalpha()): 113 tmpDate, tmpVers = elem.partition('_v')[::2] 114 if beginDateStr <= tmpDate <= endDateStr \ 115 and tmpVers in versions: 116 listdir.append(elem) 117 if listdir: 118 self.fitsDateDict[fdir] = listdir 119 120 self.invFitsDateDict = utils.invertDict(self.fitsDateDict, forceLists)

121 122 #-------------------------------------------------------------------------- 123

124 - def getVersion(self, datumVersStr):

125 """ 126 Get the version number from the date directory name, exclude 127 test directories (ending in *_v.comment). 128 129 @param datumVersStr: The date directory name. 130 @type datumVersStr: str 131 132 @return: Version number. 133 @rtype: str 134 135 """ 136 versStr = '-1' 137 if not datumVersStr.partition('.')[2].isalpha(): 138 try: 139 versStr = datumVersStr.rpartition("_v")[2] 140 except ValueError: 141 pass 142 143 return versStr

144 145 #-------------------------------------------------------------------------- 146

147 - def createTrafoLogDict(self, logFileName):

148 """ 149 Read the transfer log and create a date sorted dict. 150 151 @param logFileName: The transfer log file. 152 @type logFileName: str 153 154 @return: Dictionary containing all information of the transfered files. 155 @rtype: dict 156 157 """ 158 trafoLogDict = defaultdict(list) 159 xferDirs = set() 160 logFile = File(logFileName) 161 logFile.ropen() 162 allLines = logFile.readlines(commentChar='#') 163 logFile.close() 164 for line in allLines: 165 trafoDir, datumVersStr = os.path.split(os.path.dirname(line)) 166 trafoLogDict[datumVersStr].append(line) 167 xferDirs.add(os.path.join(trafoDir, datumVersStr)) 168 keys = sorted(trafoLogDict.keys()) 169 if keys: 170 trafoLogDict["startDate"] = int(keys[0].split("_v")[0]) 171 trafoLogDict["versionStr"] = keys[0].split("_v")[1] 172 trafoLogDict["endDate"] = int(keys[-1].split("_v")[0]) 173 trafoLogDict["xferDirs"] = list(xferDirs) 174 return trafoLogDict

175 176 #-------------------------------------------------------------------------- 177

178 - def allFiles(self, pattern, searchPath, pathSep=os.pathsep):

179 """ 180 Get all file names matching the given string pattern. 181 182 @note: Despite using os.listdir() glob is still as fast as 183 dircache.listdir() having done tests of my own to prove it. 184 """ 185 for path in searchPath.split(pathSep): 186 for match in iglob(os.path.join(path, pattern)): 187 yield match

188 189 #-------------------------------------------------------------------------- 190

191 - def getFileCount(self, pattern, searchPath, pathSep=os.pathsep):

192 """Get the number of files matching the given string pattern. 193 """ 194 return len(list(self.allFiles(pattern, searchPath, pathSep)))

195 196 #-------------------------------------------------------------------------- 197

198 - def countFileTypes(self, fileList):

199 """ 200 Count the files in the given list grouped by file type. 201 202 @param fileList: List of FITS files. 203 @type fileList: list(str) 204 205 @return: number of pixel and catalogue files (excluding 'e*'), 206 number of pixel files (excluding 'e*'), 207 number of catalogue files (excluding 'e*') 208 @rtype: int, int, int 209 210 """ 211 filecrit_casupre = re.compile(r'\b' + self.sysc.casuPrefix) 212 filecrit_wfaupre = re.compile(r'\b' + self.sysc.wfauPrefix) 213 filecrit_pix = re.compile(self.sysc.mefType + r'\b') 214 filecrit_cat = \ 215 re.compile(self.sysc.catSuffix + self.sysc.catType + r'\b') 216 217 filecrit_tile = re.compile(self.sysc.tileSuffix) 218 fileNum = 0 219 catNum = 0 220 pixNum = 0 221 tileNum = 0 222 for item in fileList: 223 fileName = os.path.basename(item) 224 if (filecrit_pix.search(fileName) or 225 filecrit_cat.search(fileName)) and \ 226 not filecrit_wfaupre.search(fileName): 227 fileNum += 1 228 if (not filecrit_wfaupre.search(fileName) 229 and filecrit_pix.search(fileName)): 230 pixNum += 1 231 if (filecrit_casupre.search(fileName) 232 and filecrit_cat.search(fileName)): 233 catNum += 1 234 if (filecrit_casupre.search(fileName) 235 and filecrit_tile.search(fileName) 236 and filecrit_pix.search(fileName)): 237 tileNum += 1 238 239 return fileNum, pixNum, catNum, tileNum

240 241 #-------------------------------------------------------------------------- 242 243 # @TODO: This should use the system constant default dates.

244 - def createDailyFileLists(self, workDir, startDate=10000101, 245 endDate=99991231, versionStr='1', trafoLog=None):

246 """ 247 List all files in one date directory into one filelist. 248 @param workDir: Working directory path. 249 @type workDir: str 250 @param startDate: First date to process. 251 @type startDate: int 252 @param endDate: Last date to process. 253 @type endDate: int 254 @param versionStr: Version number of the data. 255 @type versionStr: str 256 @param trafoLog: Logfile containing files to be ingested. 257 @type trafoLog: str 258 """ 259 self.dailyFileListDict = defaultdict(list) 260 if trafoLog: 261 trafoLogDict = self.createTrafoLogDict(trafoLog) 262 for datumStr in trafoLogDict: 263 if datumStr not in ["versionStr", "xferDirs", 264 "endDate", "startDate"]: 265 fitsList = trafoLogDict[datumStr] 266 self.writeFileLists(workDir, datumStr, fitsList) 267 else: 268 for direc in self.fitsDateDict: 269 for datumVersStr in self.fitsDateDict[direc]: 270 date = int(datumVersStr[:datumVersStr.find('_')]) 271 versStr = self.getVersion(datumVersStr) 272 if startDate <= date <= endDate and versionStr == versStr: 273 path = os.path.join(direc, datumVersStr) 274 fitsList = [os.path.join(direc, datumVersStr, item) 275 for item in dircache.listdir(path)] 276 277 self.writeFileLists(workDir, datumVersStr, fitsList)

278 279 #-------------------------------------------------------------------------- 280

281 - def writeFileLists(self, outDir, datumStr, fitsList):

282 """ 283 Write filelists into a working directory for further processing. 284 285 @param outDir: Working directory path. 286 @type outDir: str 287 @param datumStr: The date directory name. 288 @type datumStr: str 289 @param fitsList: List of FITS files. 290 @type fitsList: list(str) 291 292 """ 293 listFileName = \ 294 os.path.join(outDir, "%sfilelist%s.log" % (self.prefix, datumStr)) 295 otherFilesName = \ 296 os.path.join(outDir, "%sotherlist%s.log" % (self.prefix, datumStr)) 297 298 self.dailyFileListDict[datumStr].append(listFileName) 299 300 filecrit2 = re.compile(self.sysc.mefType + r'\b') 301 filecrit3 = re.compile(self.sysc.catSuffix + self.sysc.catType + r'\b') 302 303 with file(listFileName, 'w') as listFile, \ 304 file(otherFilesName, 'w') as otherFile: 305 306 for filePath in fitsList: 307 if (filecrit2.search(os.path.basename(filePath)) 308 or filecrit3.search(os.path.basename(filePath))): 309 listFile.write(filePath + '\n') 310 else: 311 otherFile.write(filePath + '\n')

312 313 #------------------------------------------------------------------------------ 314

315 -def checksum(fileList):

316 """ 317 Update checksum of given FITS files. 318 319 @note: Parallel tests prove to be as slow as series due to I/O limit. 320 """ 321 uncompressFits(fileList) 322 cmd = ["fitscheck", "-f", "-w"] 323 fileList = [(fileName.replace('.fz', '') 324 if not os.path.exists(fileName) else fileName) 325 for fileName in fileList] 326 for filePath in fileList: 327 fileDirPath, fileName = os.path.split(filePath) 328 try: 329 extp.run(cmd + [fileName], cwd=fileDirPath or '.') 330 except (OSError, extp.Error) as error: 331 Logger.addExceptionWarning(error) 332 compressFits(fileList)

333 334 #------------------------------------------------------------------------------ 335

336 -def combHeaders(priHeader, secHeader):

337 """ 338 Combines the header keywords from a primary header and a secondary. Useful 339 for converting stacks to mosaics. 340 341 @param priHeader: Primary header to update. 342 @type priHeader: pyfits.Header 343 @param secHeader: Secondary header from which to obtain data. 344 @type secHeader: pyfits.Header 345 346 """ 347 excludeKeys = ["NAXIS", "PCOUNT", "GCOUNT", "BITPIX", "SIMPLE", "EXTEND", 348 "NAXIS1", "NAXIS2", "XTENSION"] 349 350 for card in secHeader.ascard: 351 if card.key not in excludeKeys: 352 priHeader.update(card.key, card.value, card.comment)

353 354 #------------------------------------------------------------------------------ 355

356 -def compressFits(fileList):

357 """ 358 Rice compress FITS files. 359 360 @param fileList: A list of FITS files. 361 @type fileList: list(str) 362 363 """ 364 _fpackList(fileList, compress=True)

365 366 #------------------------------------------------------------------------------ 367

368 -def createFileListDict(progTable, fileTag=None, workingDir=None, isList=False):

369 """ 370 Creates a dictionary of full paths to the files that list the newly 371 created file products to be ingested into the given database for the given 372 programme, referenced by product type. 373 374 @param progTable: Programme table for the ingest database with the current 375 row set to that of the desired programme if this is a 376 list of files for only one programme. 377 @type progTable: DataFactory.ProgrammeTable 378 @param fileTag: Optionally, additionally tag the file name with this 379 tag, otherwise the programme acronym is used. 380 @type fileTag: str 381 @param workingDir: Optionally specify directory path to create the file 382 lists. 383 @type workingDir: str 384 @param isList: ? 385 @type isList: bool 386 387 @return: Full path to the file list of each product type, referenced by 388 product type. 389 @rtype: dict(str: str) 390 391 """ 392 dbName = progTable._db.database 393 sysc = progTable._db.sysc 394 if not workingDir: 395 workingDir = (sysc.ingestListsPath(progTable._db.sharePath()) 396 if progTable._db.isRealRun else 397 sysc.testOutputPath(sysc.productLogDir)) 398 399 utils.ensureDirExist(workingDir) 400 401 fileListForType = {} 402 productTypes = (progTable.getAttr('sourceProdType') 403 if isList else sysc.productTypes) 404 405 for prodType in productTypes: 406 fileListName = '%s_%s_%s_%s%s.list' % (dbName, fileTag or 407 progTable.getAcronym().lower(), prodType.lower(), 408 time.strftime('%Y%m%d_%H%M'), ('_ld' if isList else '')) 409 410 fileListForType[prodType] = os.path.join(workingDir, fileListName) 411 412 return fileListForType

413 414 #------------------------------------------------------------------------------ 415

416 -def createSuffix(combiFrameType):

417 """ 418 Converts a combined frame type to a complete file name suffix. 419 420 @param combiFrameType: Database frame type of file. 421 @type combiFrameType: str 422 423 @return: The complete file name suffix. 424 @rtype: str 425 426 """ 427 sufList = [SystemConstants.frameTypeToSuffix()[frameType] 428 for frameType in SystemConstants.frameTypeToSuffix() 429 if frameType in combiFrameType] 430 431 return ''.join(utils.arbSort(sufList, SystemConstants.fileNameSuffices()))

432 433 #------------------------------------------------------------------------------ 434

435 -def determineFrameType(filePathName):

436 """ 437 Determines a file's frame type from the file name. 438 439 @param filePathName: Path to FITS image file. 440 @type filePathName: str 441 442 @return: The database frame type. 443 @rtype: str 444 445 """ 446 sufOfFrame = SystemConstants.frameTypeToSuffix() 447 types = [fType for fType, suffix in sufOfFrame.items() 448 if suffix in os.path.basename(filePathName)] 449 450 if types != ['normal']: 451 types.remove('normal') 452 453 return ''.join(utils.arbSort(types, SystemConstants.frameTypes))

454 455 #------------------------------------------------------------------------------ 456

457 -def findFileOnDisks(sysc, filePart, isDev):

458 """ 459 """ 460 isProduct = sysc.deepSuffix in filePart 461 isDev = isProduct and isDev 462 prodType = 'tile' if sysc.tileSuffix in filePart else 'stack' 463 prodType = 'mosaic' if sysc.mosaicSuffix in filePart else prodType 464 dirStruct = (os.path.join('products', prodType + 's') if isProduct else 465 os.path.join('ingest', 'fits')) 466 467 for disk in (sysc.availableRaidFileSystem() if not isDev else 468 [sysc.testOutputPath()]): 469 470 productDir = os.path.join(disk, dirStruct) 471 if os.path.exists(os.path.join(productDir, filePart)): 472 return os.path.join(productDir, filePart)

473 474 #------------------------------------------------------------------------------ 475 476 # @TODO: sysc argument should be mandatory; def. is for backwards compat.

477 -def findProduct(progID, releaseNum, productID, prodType, prodInfo, 478 filterTable, isDev=False, sysc=SystemConstants("WSA"), 479 directory=None):

480 """ 481 Finds all the available stacks that cover the mosaic in question. 482 483 @param progID: Programme ID. 484 @type progID: int 485 @param releaseNum: Release number of stack. 486 @type releaseNum: int 487 @param productID: ID of product 488 @type productID: int 489 @param prodType: Type of product 490 @type prodType: string 491 @param prodInfo: Details of the product 492 @type prodInfo: Namedtuple (or dict) 493 @param filterTable: Table object containing attributes of filter table. 494 @type filterTable: DataFactory.Table 495 @param isDev: If True, just check development directory else check all 496 disks. 497 @type isDev: bool 498 @param sysc: Initialised SystemConstants object. 499 @type sysc: SystemConstants 500 @param directory: ? 501 @type directory: str 502 503 @return: A list of stack attributes (productID, RA, Dec, filterID, date, 504 filePathName) for each stack in the mosaic. 505 @rtype: list(tuple(int, float, float, int, int, str)) 506 507 """ 508 # @TODO: These system constants should be replaced by a dictionary type 509 dirDict = {'stack': sysc.stackDir, 'mosaic': sysc.mosaicDir} 510 if sysc.isVSA(): 511 dirDict['tile'] = sysc.tileDir 512 513 fileNameParts = ["%05d%05d" % (progID, productID)] 514 fileNameParts += [sysc.deepSuffix, sysc.stackSuffix, sysc.mefType] 515 notFileNameParts = \ 516 [sysc.confSuffix, sysc.catType, sysc.catSuffix, sysc.filtSuffix, 517 sysc.filtPrefix, sysc.listSuffix] 518 519 if prodType == 'mosaic': 520 fileNameParts.append(sysc.mosaicSuffix) 521 else: 522 notFileNameParts.append(sysc.mosaicSuffix) 523 524 if prodType == 'tile': 525 fileNameParts.append(sysc.tileSuffix) 526 527 elif prodType != 'mosaic': 528 notFileNameParts.append(sysc.mosaicSuffix) 529 # Find list of stacks on all disks 530 products = [] 531 if directory: 532 for fileName in os.listdir(directory): 533 filePathName = os.path.join(directory, fileName) 534 # Everything matches 535 # @TODO: if pawprint, make sure do not match to tile or mosaic 536 if (all(part in filePathName for part in fileNameParts) 537 and all(part not in filePathName for part in notFileNameParts)): 538 products.append(filePathName) 539 else: 540 for disk in (sysc.availableRaidFileSystem() if not isDev else 541 [sysc.testOutputPath()]): 542 543 productDir = os.path.join(disk, dirDict[prodType]) 544 if os.path.exists(productDir): 545 # Check subdirectories 546 subDirs = [subDir for subDir in os.listdir(productDir) 547 if subDir.split('_')[0].isdigit() 548 and subDir.startswith('20') and 549 subDir.endswith("_v%d" % releaseNum)] 550 for subDir in subDirs: 551 # Check files 552 for fileName \ 553 in os.listdir(os.path.join(productDir, subDir)): 554 555 filePathName = \ 556 os.path.join(productDir, subDir, fileName) 557 # Everything matches 558 # @TODO: if pawprint, make sure do not match to tile or 559 # mosaic 560 if (all(part in filePathName for part in fileNameParts) 561 and all(part not in filePathName 562 for part in notFileNameParts)): 563 564 products.append(filePathName) 565 # Read header information of every stack 566 goodProducts = [] 567 prodList = [pi for pi in prodInfo if pi.productID == productID] 568 if prodList: 569 pInfo = prodList[0] 570 else: 571 # @FIXME: BUG - pInfo not set! 572 Logger.addMessage("<WARNING> No information for product %d in " 573 "Required%s" % (productID, prodType.title())) 574 575 for filePathName in products: 576 fitsFile = open(filePathName) 577 extNo = 0 if not (sysc.tileSuffix in filePathName and len(fitsFile) > 1) else 1 578 fitsKeys = {} 579 testKeys = sysc.pointingKeys 580 testKeys = [key.replace("HIERARCH ", "") for key in testKeys] 581 testKeys.extend(['TELESCOP', 'INSTRUME', 'NUSTEP']) 582 583 for key in testKeys: 584 try: 585 fitsKeys[key] = fitsFile[extNo].header.get(key) 586 except: 587 fitsKeys[key] = None 588 if fitsKeys[key] is None: 589 Logger.addMessage( 590 "<Warning> No %s FITS key in %s" % (key, filePathName)) 591 fitsFile.close() 592 593 if None not in fitsKeys.values(): 594 try: 595 fID = filterTable.getAttr('filterID', 596 shortName=fitsKeys[testKeys[2]]) 597 except ValueError: 598 fID = None 599 for name in filterTable.getAll('name'): 600 if fitsKeys[testKeys[2]].lower() in name.lower(): 601 fID = filterTable.getAttr('filterID', name=name) 602 break 603 if not fID: 604 if fitsKeys[testKeys[2]] == 'BGamma': 605 fID = 7 606 607 ra = (15 * fitsKeys[testKeys[0]] 608 if testKeys[0] == "RABASE" else fitsKeys[testKeys[0]]) 609 610 dec = fitsKeys[testKeys[1]] 611 deltaPos = (max(sysc.fieldOfViewTile) / 2 612 if prodType == 'tile' else sysc.maxRaDecExtentStackDeg) 613 614 raMin = ra - deltaPos 615 raMax = ra + deltaPos 616 decMin = dec - deltaPos 617 decMax = dec + deltaPos 618 if (fID == pInfo.filterID 619 and int(fitsKeys['NUSTEP']) == pInfo.nustep 620 and raMin < pInfo.ra < raMax and decMin < pInfo.dec < decMax): 621 goodProducts.append(filePathName) 622 623 if goodProducts: 624 goodProducts.sort(key=itemgetter(0)) 625 return goodProducts[-1]

626 627 #------------------------------------------------------------------------------ 628

629 -def fixExponentBug(header):

630 """ 631 Repairs the CC_PRES keyword values that prevent FITS files from being 632 written. 633 634 @param header: A FITS file header. 635 @type header: pyfits.Header 636 637 @return: 1 if change made else 0 638 @rtype: int 639 640 """ 641 cc_pres = header.get('CC_PRES') 642 if cc_pres is not None: 643 header['CC_PRES'] = cc_pres 644 return 1 645 646 return 0

647 648 #------------------------------------------------------------------------------ 649

650 -def formatDateTime(dateTime, isDiffSec=True):

651 """ 652 Formats a DateTime string. 653 654 @param dateTime: DateTime object to format. 655 @type dateTime: mx.DateTime 656 657 @return: Correctly formatted date time string. 658 @rtype: str 659 660 """ 661 intSec, diffSec = int(dateTime.second), int(1000 * (dateTime.second - 662 int(dateTime.second))) 663 if isDiffSec: 664 return '%4u-%02u-%02uT%02u:%02u:%02u.%03u' % ( 665 dateTime.year, dateTime.month, dateTime.day, 666 dateTime.hour, dateTime.minute, intSec, diffSec) 667 else: 668 return '%4u-%02u-%02uT%02u:%02u:%02u' % ( 669 dateTime.year, dateTime.month, dateTime.day, 670 dateTime.hour, dateTime.minute, intSec)

671 672 #------------------------------------------------------------------------------ 673

674 -def _fpackList(fileList, compress=True):

675 """ 676 Execute fpack/funpack on every file in given list. Used by public 677 functions, compressFits() and uncompressFits(). 678 679 @param fileList: A list of FITS files. 680 @type fileList: list(str) 681 @param compress: If True, compress the FITS file, otherwise uncompress. 682 @type compress: bool 683 684 @note: Parallel tests prove to be as slow as series due to I/O limit. 685 686 """ 687 cmd = (["fpack", "-r", "-Y"] if compress else ["funpack"]) + ["-F"] 688 for filePath in fileList: 689 if filePath.endswith('.fz') and not os.path.exists(filePath) and os.path.exists(filePath.replace('.fz', '')): 690 filePath = filePath.replace('.fz', '') 691 fileDirPath, fileName = os.path.split(filePath) 692 if (compress and not isCompressed(filePath) 693 or not compress and isCompressed(filePath)): 694 try: 695 extp.run(cmd + [fileName], cwd=fileDirPath or '.') 696 except (OSError, extp.Error) as error: 697 Logger.addExceptionWarning(error) 698 else: 699 if not compress and filePath.endswith('.fz'): 700 filePath = filePath.split('.fz')[0] 701 702 # fpack does not honour original file permissions 703 os.chmod(filePath, 0644) # rw for user (scos), r for others

704 705 #------------------------------------------------------------------------------ 706

707 -def getAllPaths(disks, fitsDirs=[SystemConstants.fitsDir]):

708 """ 709 Find all paths with the given FITS sub-directory name if the given list of 710 disks, where FITS files are stored. 711 712 @param disks: The sequence of disks to be searched, e.g. 713 L{SystemConstants.availableRaidFileSystem()}. 714 @type disks: sequence(str) 715 @param fitsDirs: A list of directories, 716 e.g. L{SystemConstants.downloadPathList()}. 717 @type fitsDirs: list(str) 718 719 @return: A list of pairs of FITS sub-directory path with a list of the date 720 sub-directories in that path (e.g. 721 [('/disk01/wsa/fits/', ['20050101', '20060202'])]). 722 @rtype: list(tuple(str, list(str))) 723 724 """ 725 paths = [] 726 for disk in disks: 727 for dirName in fitsDirs: 728 dirPath = os.path.join(disk, dirName) 729 if os.path.exists(dirPath): 730 dateDirs = [elem for elem in dircache.listdir(dirPath) 731 if elem[:8].isdigit() and '_v' in elem 732 and not '.' in elem] 733 if dateDirs: 734 paths.append((dirPath, dateDirs)) 735 return paths

736 737 #------------------------------------------------------------------------------ 738

739 -def getCatalogue(filePath):

740 """ 741 @return: The full path to the catalogue file for the corresponding image 742 file path. 743 @rtype: str 744 745 """ 746 return filePath.replace(SystemConstants.mefType, SystemConstants.catSuffix 747 + SystemConstants.catType)

748 749 #------------------------------------------------------------------------------ 750

751 -def getConfMap(filePath):

752 """ 753 @return: The full path to the confidence image map for the corresponding 754 image file path. 755 @rtype: str 756 757 """ 758 return filePath.replace(SystemConstants.mefType, SystemConstants.confSuffix 759 + SystemConstants.mefType)

760 761 #------------------------------------------------------------------------------ 762

763 -def getDate(filePath):

764 """ 765 @return: The date string of the filePath 766 @rtype: str 767 768 """ 769 return os.path.basename(os.path.dirname(filePath)).partition('_v')[0]

770 771 #------------------------------------------------------------------------------ 772

773 -def getExtList(filePath):

774 """ 775 @return: The list of extensions. 776 @rtype: list(int) 777 778 """ 779 if (SystemConstants.tileSuffix in os.path.basename(filePath) 780 or SystemConstants.mosaicSuffix in os.path.basename(filePath)): 781 782 return [2] 783 784 keyList = ["CAMNUM", "ESO DET CHIP NO"] 785 extList = [] 786 for hdu in open(filePath)[1:]: 787 for key in keyList: 788 try: 789 extList.append((hdu.header[key] + 1)) 790 except KeyError: 791 pass 792 793 return utils.orderedSet(extList)

794 795 #------------------------------------------------------------------------------ 796

797 -def getFileList(filePath):

798 """ 799 @return: List of image, confidence map and catalogue file paths for given 800 image file path. 801 @rtype: FileList 802 803 """ 804 return FileList(filePath, getConfMap(filePath), getCatalogue(filePath))

805 806 #------------------------------------------------------------------------------ 807

808 -def getFilteredImageName(filePath, directory=None):

809 """ 810 @return: The full path to the confidence image map for the corresponding 811 image file path. 812 @rtype: str 813 814 """ 815 filtName = (os.path.basename(filePath).split('.')[0] 816 + SystemConstants.filtSuffix + SystemConstants.mefType 817 if SystemConstants.tileSuffix in filePath else 818 SystemConstants.filtPrefix + os.path.basename(filePath)) 819 820 return os.path.join(directory or os.path.dirname(filePath), filtName)

821 822 #------------------------------------------------------------------------------ 823

824 -def getImageName(catPath):

825 """ 826 @return: The full path to image file for the corresponding catalogue file 827 path. 828 @rtype: str 829 830 """ 831 return catPath.replace(SystemConstants.catSuffix + SystemConstants.catType, 832 SystemConstants.mefType)

833 834 #------------------------------------------------------------------------------ 835

836 -def getListDrivenCat(outputDir, filePath):

837 """ 838 @return: The full path to the catalogue file for the corresponding image 839 file path. 840 @rtype: str 841 842 """ 843 return os.path.join(outputDir, os.path.basename( 844 filePath.replace(SystemConstants.mefType, SystemConstants.listSuffix 845 + SystemConstants.catType)))

846 847 #------------------------------------------------------------------------------ 848

849 -def getPixelScale(fileNamePath, extNum=None):

850 """ @return: The pixel size in arcseconds of the given file. 851 @rtype: float 852 """ 853 fitsFile = open(fileNamePath) 854 if extNum and extNum <= len(fitsFile): # Selected header 855 fitsFile = fitsFile[extNum - 1:extNum] 856 857 elif len(fitsFile) > 1: # Skip primary header in MEFs 858 fitsFile = fitsFile[1:] 859 860 pixScl = [] 861 for hdu in fitsFile: 862 cd11 = hdu.header['CD1_1'] ** 2 863 cd12 = hdu.header['CD1_2'] ** 2 864 cd21 = hdu.header['CD2_1'] ** 2 865 cd22 = hdu.header['CD2_2'] ** 2 866 pixScl.append(math.sqrt(math.sqrt((cd11 + cd12) * (cd21 + cd22)))) 867 868 return 3600 * numpy.mean(pixScl)

869 870 #------------------------------------------------------------------------------ 871

872 -def getProductID(filePathName):

873 """ @return: The productID and product type of the file 874 @rtype: tuple(int,str) 875 """ 876 productID = int(os.path.basename(filePathName).split('_')[1][-5:]) 877 878 if SystemConstants.mosaicSuffix in filePathName: 879 prodType = 'mosaic' 880 elif SystemConstants.tileSuffix in filePathName: 881 prodType = 'tile' 882 else: 883 prodType = 'stack' 884 885 return productID, prodType

886 887 #------------------------------------------------------------------------------ 888

889 -def getSwarpCat(filePath):

890 """ 891 @return: Returns the path to the corresponding catalogue of the given SWARP 892 image, if the catalogue exists, else None. 893 @rtype: str 894 895 """ 896 catName = '.'.join(filePath.split('.')[:-1]) + '.cat' 897 if os.path.exists(catName): 898 return catName

899 900 #------------------------------------------------------------------------------ 901

902 -def getSwarpConfMap(filePath):

903 """ 904 @return: Returns the path to the corresponding confidence map of the given 905 SWARP image, if the confidence map exists, else None. 906 @rtype: str 907 908 """ 909 for confMap in [getConfMap(filePath), 910 filePath.replace(SystemConstants.mefType, 911 '.weight' + SystemConstants.mefType)]: 912 if os.path.exists(confMap): 913 return confMap

914 915 #------------------------------------------------------------------------------ 916

917 -def isCompressed(filePathName):

918 """ 919 Checks file to see if it is already compressed 920 921 @param filePathName: Full path to FITS file. 922 @type filePathName: str 923 924 """ 925 if len(open(filePathName)) == 1: return False 926 return any(hdu for hdu in open(filePathName)[1:] if 'ZIMAGE' in hdu.header)

927 928 #------------------------------------------------------------------------------ 929

930 -def isSwarpProvVO(filePath):

931 """ 932 @return: Checks the type of Provenance file that comes with Swarp mosaics 933 @rtype: str 934 935 """ 936 for provType in ['.xml', '.lst']: 937 extProvFileName = filePath.split('.')[0] + provType 938 if os.path.exists(extProvFileName): 939 return provType == '.xml'

940 941 #------------------------------------------------------------------------------ 942

943 -def maxHdrBlockKeys():

944 """ @return: Maximum number of keys in a FITS header block. 945 @rtype: int 946 """ 947 return _blockLen // pyfits.Card.length # Should be 36

948 949 #------------------------------------------------------------------------------ 950

951 -def maxHdrEntryLen():

952 """ @return: Maximum length of a FITS header entry (minus keyword length). 953 @rtype: int 954 """ 955 keywordLen = 8 956 # There's room for 72 but old versions of GAIA can only see 71 characters 957 return pyfits.Card.length - keywordLen - 1

958 959 #------------------------------------------------------------------------------ 960

961 -def open(filePath, mode="readonly", doNotScale=True, **kwds):

962 """ 963 Wrapper to pyfits.open() function to handle change of interface in versions 964 subsequent to 2.3.1dev650 and the change in default behaviour since PyFITS 965 version 1.4.1. 966 967 @param filePath: Full path to FITS file to open. 968 @type filePath: str 969 @param mode: File access mode, e.g. "readonly", "update" (see 970 pyfits.open() for complete list). 971 @type mode: str 972 @param doNotScale: If True, image data is not scaled using BSCALE/BZERO 973 values when read. 974 @type doNotScale: bool 975 @param kwds: Any additional keyword arguments supported by 976 pyfits.open(). 977 @type kwds: dict 978 979 @return: PyFITS HDUList object for the open file. 980 @rtype: pyfits.HDUList 981 982 """ 983 if pyfits.__version__.startswith("1."): 984 return pyfits.open(filePath, mode, **kwds) 985 986 return pyfits.open(filePath, mode, disable_image_compression=True, 987 do_not_scale_image_data=doNotScale, **kwds)

988 989 #------------------------------------------------------------------------------ 990

991 -def parseHistory(card):

992 """ Temporary wrapper function to handle transition to pyfits 3. 993 """ 994 return card if pyfits.__version__.startswith("2.") else card.value

995 996 #------------------------------------------------------------------------------ 997

998 -def prepHeaders(filePathName, confPathName, components, expTimes=[], 999 dfsIDString=None, bScale=None):

1000 """ 1001 Prepares the headers of FITS files to be ingested into the archive, with 1002 the correct provenance etc. This ensures that the FITS files produced by 1003 WFAU with CASU software tools, e.g. deep stacks, mosaics, difference images 1004 etc. all contain the correct header information prior to ingest. 1005 1006 @param filePathName: Full path to FITS file that requires its headers to be 1007 prepared for ingest. 1008 @type filePathName: str 1009 @param confPathName: Full path to the corresponding confidence map. 1010 @type confPathName: str 1011 @param components: A list containing the full paths to every component 1012 FITS file that went into the FITS file to be updated, 1013 i.e. the provenance information. 1014 @type components: list(FileList) 1015 @param expTimes: List of total exposure times for each extension. 1016 @type expTimes: list(float) 1017 @param bScale: If image has floating point data scaled to integers 1018 then supply the bScale values for each extension. 1019 @type bScale: list(float) 1020 1021 """ 1022 if not filePathName.endswith(SystemConstants.mefType): 1023 raise Exception("Filename has wrong type") 1024 1025 # List of keywords that shouldn't be in the primary header or 1026 # are unnecessary in stacks and cause problems. 1027 pHeaderBadKeys = ['RQ_MINSB', 'RQ_MAXSB', 'RQ_MNSEE', 'RQ_MXSEE', 1028 'RQ_MINCL', 'RQ_MAXCL', 'RQ_MNTAU', 'RQ_MXTAU', 1029 'RQ_MINMN', 'RQ_MAXMN'] 1030 1031 updateList = [filePathName] 1032 if confPathName: 1033 updateList.append(confPathName) 1034 1035 if (SystemConstants.tileSuffix in filePathName 1036 and SystemConstants.mosaicSuffix not in filePathName): 1037 1038 updateList.append(getFilteredImageName(filePathName)) 1039 1040 for imgPathName in updateList: 1041 isConfImage = (imgPathName == confPathName) 1042 modTime = time.strptime(time.ctime(os.stat(imgPathName).st_mtime)) 1043 hduList = open(imgPathName, "update", doNotScale=False) 1044 isSingFits = len(hduList) is 1 1045 isTile = (SystemConstants.tileSuffix in imgPathName and 1046 SystemConstants.mosaicSuffix not in imgPathName) 1047 1048 nExts = 1 if isSingFits else len(hduList) - 1 1049 # @TODO: expTimes should be a defaultdict 1050 if len(expTimes) != nExts: 1051 expTimes = [dbc.realDefault()] * (len(hduList) - 1) 1052 1053 # Update primary header 1054 pHduIdx = 1 if (isTile and not isSingFits) else 0 1055 pHeader = hduList[pHduIdx].header 1056 sysc = SystemConstants(pHeader.get("INSTRUME")) 1057 pHeader.update("DATE", 1058 time.strftime("%Y-%m-%dT%H:%M:%S", modTime), 1059 "file creation date (YYYY-MM-DDThh:mm:ss UT)", 1060 before='TELESCOP') 1061 1062 timeKey, mfidKey = sysc.wfauFitsKeys() 1063 pHeader.update(timeKey, "", sysc.loadDatabase + " time stamp") 1064 pHeader.update(mfidKey, 0, sysc.loadDatabase + " MultiframeID") 1065 1066 if sysc.deepSuffix in filePathName: 1067 pHeader.update("PRODID", 1068 int(os.path.basename(filePathName).split('_')[1][-5:]), 1069 "Product ID of deep stack or mosaic") 1070 1071 pHeader.update("RELNUM", int( 1072 os.path.basename(os.path.dirname(filePathName)).split('_v')[1]), 1073 "Release number of deep stack or mosaic") 1074 1075 if dfsIDString: 1076 pHeader.update("PROJECT", dfsIDString, "Project String") 1077 1078 #@TODO: Not if it is a tile. 1079 for key in ['MAGZPT', 'MAGZRR', 'EXTINCT', 'HISTORY']: 1080 del pHeader[key] 1081 1082 if sysc.deepSuffix in imgPathName: 1083 for key in pHeaderBadKeys: 1084 del pHeader[key] 1085 1086 fixExponentBug(pHeader) 1087 1088 # Update image extensions 1089 for ii, hdu in enumerate(hduList): 1090 1091 # Can't use hduList[1:] else save won't work! 1092 if isSingFits or ii is not 0: 1093 extNum = ii if not isSingFits else ii + 1 1094 1095 #### Prepare history #### 1096 1097 # Keep a copy of the file's original HISTORY 1098 historyEntries = hdu.header.get_history() 1099 1100 # Remove old history 1101 del hdu.header['HISTORY'] 1102 1103 # Reappend the last HISTORY line created by the CASU tool that 1104 # produced this file. Test for CASU line. 1105 if len(historyEntries) > 1 and \ 1106 parseHistory(historyEntries[-2]).split()[0].isdigit(): 1107 1108 hdu.header.add_history('%s at %s' % ( 1109 ' '.join(parseHistory(historyEntries[-1]).split()[1:-3]), 1110 parseHistory(historyEntries[-2]))) # Timestamp 1111 1112 #### Update other keywords #### 1113 1114 if bScale is not None and not isConfImage: 1115 index = extNum - 2 if not isTile else 0 1116 hdu.scale('int32', bscale=bScale[index]) 1117 1118 hdu.header.update('NIGHTZPT', dbc.realDefault(), 1119 'Average photometric ZP (mags) for night') 1120 1121 hdu.header.update('NIGHTZRR', dbc.realDefault(), 1122 'Photometric ZP sigma for night (mags)') 1123 1124 hdu.header.update('NIGHTNUM', dbc.intDefault(), 1125 'Number of ZP used in band for night') 1126 1127 hdu.header.update('TEXPTIME', expTimes[extNum - 1], 1128 'Total exposure time in this detector (s)') 1129 1130 if not isConfImage: 1131 hdu.header.update('CIR_CPM', 1132 os.path.basename(confPathName) + '[%s]' % extNum, 1133 'Confidence Map') 1134 1135 #### Update provenance keywords #### 1136 1137 hdu.header.update('PROV0000', os.path.basename(imgPathName), 1138 'Output file') 1139 1140 # Delete the pre-existing provenance keys 1141 for key, _value in hdu.header.items(): 1142 if key.startswith('PROV') and not key.endswith('0000'): 1143 del hdu.header[key] 1144 1145 # Create a new set of provenance keys 1146 for provID, component in enumerate(components): 1147 # @TODO: should we add in list of good extensions? 1148 filePath = (component.conf if isConfImage else 1149 component.image) 1150 1151 if filePath.endswith(sysc.mefType + "[%s]" % extNum): 1152 filePath = filePath.replace("[%s]" % extNum, '') 1153 1154 if filePath.endswith(sysc.mefType): 1155 # Reduce to just dir/filename.fit format 1156 filePath = os.path.join( 1157 os.path.basename(os.path.dirname(filePath)), 1158 os.path.basename(filePath)) 1159 1160 hdu.header.update('PROV%04d' % (provID + 1), filePath, 1161 'Card # %s' % (provID + 1), 1162 after='PROV%04d' % provID) 1163 checkForUnicodeError(hduList, imgPathName) 1164 hduList.close()

1165 1166 #------------------------------------------------------------------------------ 1167

1168 -def checkForUnicodeError(hduList, fileName):

1169 """ Tests for unicode error 1170 """ 1171 try: 1172 hduList.flush() 1173 except UnicodeDecodeError: 1174 raise Exception("Unicode error in file %s" % fileName)

1175 1176 #------------------------------------------------------------------------------ 1177

1178 -def removeDuplicateKeywords(filePathName):

1179 """ 1180 Removes duplicate keywords from given FITS file header. 1181 1182 @param filePathName: Full path to FITS file. 1183 @type filePathName: str 1184 1185 """ 1186 fitsFile = open(filePathName, 'update') 1187 1188 for hdu in fitsFile: 1189 dupes = utils.getDuplicates(hdu.header.ascard.keys()) 1190 dupes -= set(['HISTORY', 'COMMENT']) 1191 if dupes: 1192 Logger.addMessage("Removing duplicated keywords...", 1193 alwaysLog=False) 1194 1195 for key in dupes: 1196 cards = [card for card in hdu.header.ascard 1197 if card.key == key] 1198 1199 uniqueValues = set(card.value for card in cards) 1200 if len(uniqueValues) > 1: 1201 Logger.addMessage("<WARNING> There are multiple values of " 1202 "keyword %s in %s:\n%s" 1203 % (key, filePathName, tuple(uniqueValues)), alwaysLog=False) 1204 1205 # Remove all values 1206 del hdu.header[key] 1207 # Add one back in 1208 hdu.header.update(key, cards[0].value, cards[0].comment) 1209 Logger.addMessage("Removed all but one version of keyword %s" 1210 % key, alwaysLog=False) 1211 1212 checkForUnicodeError(fitsFile, filePathName) 1213 fitsFile.close()

1214 1215 #------------------------------------------------------------------------------ 1216

1217 -def stripServer(filePathName):

1218 """ Strip server name from file path. 1219 """ 1220 return filePathName.split(':')[-1]

1221 1222 #------------------------------------------------------------------------------ 1223

1224 -def uncompressFits(fileList):

1225 """ 1226 Rice uncompress FITS files. 1227 1228 @param fileList: A list of FITS files. 1229 @type fileList: list(str) 1230 1231 """ 1232 _fpackList(fileList, compress=False)

1233 1234 #------------------------------------------------------------------------------ 1235

1236 -def updateFitsStamp(fileName, timeStamp):

1237 """ 1238 Update the FITS file timestamp accordingly. 1239 @param fileName: FITS file to be updated. 1240 @type fileName: str 1241 @param timeStamp: WFAU timestamp (datedir+filetimestamp). 1242 @type timeStamp: long 1243 """ 1244 timetpl = time.strptime(timeStamp[6:], '%y%m%d%H%M%S') 1245 modtime = time.mktime(timetpl) 1246 os.utime(fileName, (time.time(), modtime))

1247 1248 #------------------------------------------------------------------------------ 1249

1250 -def writeToFitsHdu(fitsFile, hduno, key, value, descr, redoing=False, 1251 verify='exception'):

1252 """ 1253 Write a FITS key, it's value and comment into the HDU of a given file. 1254 1255 @param fitsFile: The open FITS file to be altered. 1256 @type fitsFile: pyfits.HDUList 1257 @param hduno: Tuple of HDU numbers. 1258 @type hduno: tuple(int) 1259 @param key: FITS key to be added. 1260 @type key: str 1261 @param value: The value of the FITS key. 1262 @type value: str 1263 @param descr: Description of the FITS key. 1264 @type descr: str 1265 @param redoing: Write mfid and timestamp anyways. 1266 @type redoing: bool 1267 1268 @return: Status (1 = success, 0 = failure) 1269 @rtype: int 1270 1271 """ 1272 hdrNum = 0 if fitsFile[0].header.has_key("INSTRUME") else 1 1273 sysc = SystemConstants(fitsFile[hdrNum].header.get("INSTRUME")) 1274 timeKey, mfidKey = sysc.wfauFitsKeys() 1275 status = 0 1276 for i in hduno: 1277 hdr = fitsFile[i].header 1278 if not redoing: 1279 if key == timeKey \ 1280 and (not hdr.has_key(key) or hdr[key].strip() == ""): 1281 hdr.update(key, value, descr) 1282 status += 1 1283 elif key == mfidKey and (not hdr.has_key(key) or hdr[key] == 0): 1284 hdr.update(key, value, descr) 1285 status += 1 1286 else: 1287 hdr.update(key, value, descr) 1288 status += 1 1289 1290 # fix CASUs e-bug 1291 status += fixExponentBug(hdr) 1292 fitsFile.flush(output_verify=verify) 1293 1294 return status

1295 1296 #------------------------------------------------------------------------------ 1297

Source Code for Module wsatools.FitsUtils