Package helpers :: Module CheckCatalogues
[hide private]

Source Code for Module helpers.CheckCatalogues

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: CheckCatalogues.py 8284 2011-05-26 11:00:27Z EckhardSutorius $ 
  4  """ 
  5     Checks for uniqueness of the primary key values in the catalogue FITS files. 
  6   
  7     @author: E. Sutorius 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  from   collections import defaultdict 
 12  import inspect 
 13  import mx.DateTime     as mxTime 
 14  import os 
 15  import pyfits 
 16  import sys 
 17   
 18  from   wsatools.CLI                    import CLI 
 19  from   wsatools.DbConnect.DbSession    import DbSession, Join 
 20  from   wsatools.File                   import File 
 21  import wsatools.FitsUtils                  as fits 
 22  from   wsatools.DbConnect.IngCuSession import IngCuSession 
 23  from   wsatools.Logger                 import Logger 
 24  from   wsatools.SystemConstants        import SystemConstants 
 25  import wsatools.Utilities                  as utils 
 26   
 27  #------------------------------------------------------------------------------ 
28 -class CheckCatalogues(IngCuSession):
29 """Checks catalogue files for errors in primary key values. 30 """ 31 isVerbose = False 32 #-------------------------------------------------------------------------- 33
34 - def __init__(self, 35 curator=CLI.getOptDef("curator"), 36 database=DbSession.database, 37 beginDate=CLI.getOptDef("begin"), 38 endDate=CLI.getOptDef("end"), 39 versionStr=CLI.getOptDef("version"), 40 programmeList=CLI.getOptDef("programmes"), 41 outPath=CLI.getOptDef("outpath"), 42 isTrialRun=DbSession.isTrialRun, 43 comment=CLI.getArgDef("comment")):
44 """ 45 @param beginDate: First date to process, eg. 20050101. 46 @type beginDate: str 47 @param comment: Descriptive comment as to why curation task is 48 being performed. 49 @type comment: str 50 @param curator: Name of curator. 51 @type curator: str 52 @param database: Name of the database to connect to. 53 @type database: str 54 @param endDate: Last date to process, eg. 20050131. 55 @type endDate: str 56 @param outPath: Directory where data is written to. 57 @type outPath: str 58 @param programmeList: Only process data for given programmes (accepts 59 keywords 'all', 'ns' (non-survey), 60 'ukidss' (all 5 main surveys)). 61 @type programmeList: list(str) 62 @param versionStr: Version number of the data. 63 @type versionStr: str 64 65 """ 66 super(CheckCatalogues, self).__init__(cuNum=-9999, 67 curator=curator, 68 comment=comment, 69 reqWorkDir=False, 70 keepWorkDir=False, 71 database=database, 72 autoCommit=False, 73 isTrialRun=isTrialRun) 74 75 beginDate, endDate = \ 76 self.sysc.obsCal.getDatesFromInput(beginDate, endDate) 77 78 typeTranslation = {"curator":str, 79 "database":str, 80 "beginDate":str, 81 "endDate":str, 82 "versionStr":str, 83 "outPath":str, 84 "programmeList":list, 85 "isTrialRun":bool, 86 "comment":str} 87 88 super(CheckCatalogues, self).attributesFromArguments( 89 inspect.getargspec(CheckCatalogues.__init__)[0], locals(), 90 types=typeTranslation) 91 92 if not self.versionStr: 93 self.versionStr = str(max( 94 self.sysc.obsCal.maxVersOfDate(self.beginDate), 95 self.sysc.obsCal.maxVersOfDate(self.endDate))) 96 97 self.dateList = self.sysc.obsCal.getObsDates( 98 self.beginDate, self.endDate, self.versionStr) 99 100 self._connectToDb() 101 # get programme IDs 102 self._createProgrammeTranslation() 103 self.progIdList = [str(self._progIDofName[x]) 104 for x in self.programmeList] 105 106 # get filenames/catnames from DB 107 Logger.addMessage("Getting catalogues from %s" % self.sysc.loadDatabase) 108 self.pixCatTuples = defaultdict(list) 109 for dateVersStr in self.dateList: 110 self.pixCatTuples[dateVersStr] = (self.getFileCatPaths(dateVersStr)) 111 self._disconnectFromDb()
112 113 #-------------------------------------------------------------------------- 114
115 - def _onRun(self):
116 """run 117 """ 118 for dateVersStr in self.dateList: 119 Logger.addMessage("::::%s::::" % dateVersStr) 120 for pixName, catName in sorted(self.pixCatTuples[dateVersStr]): 121 try: 122 if CheckCatalogues.isVerbose: 123 print "Checking %s ..." % catName 124 hduList = fits.open(catName) 125 for hduNum, hdu in enumerate(hduList[1:]): 126 tbdata = hdu.data 127 seqNums = tbdata.field('Sequence_number') 128 if len(seqNums) != len(set(seqNums)): 129 Logger.addMessage( 130 "<ERROR> Sequence number not unique " 131 "in %s in ext %s" % (catName, hduNum + 1)) 132 if list(seqNums) != map(int, list(seqNums)): 133 Logger.addMessage( 134 "<ERROR> Sequence numbers not all integers " 135 "in %s in ext %s" % (catName, hduNum + 1)) 136 except Exception as error: 137 print(error) 138 finally: 139 hduList.close() 140 Logger.addMessage("finished.") 141 Logger.dump(file(os.path.join(self.outPath, 142 "CCs_%s.log" % utils.makeTimeStamp().replace(' ','_')), 'w'))
143 144 #-------------------------------------------------------------------------- 145
146 - def getFileCatPaths(self, dateVersStr):
147 """Get all science pix and cat file paths. 148 """ 149 result = self.archive.query( 150 selectStr="Multiframe.fileName, Multiframe.catName", 151 fromStr=Join(["Multiframe", "FlatFileLookUp", "ProgrammeFrame"], 152 ["multiframeID"]), 153 whereStr=''.join(["programmeID in (%s)" % ','.join(self.progIdList), 154 " AND catName not like '%empty_catalogue.fits'", 155 " AND catName not like 'NONE'", 156 " AND dateVersStr like '%s'" % (dateVersStr)])) 157 return result
158 159 #------------------------------------------------------------------------------ 160 # Entry point for script. 161 162 # Allow module to be imported as well as executed from the command line 163 if __name__ == "__main__": 164 # Define additional command-line options 165 CLI.progArgs["comment"] = "Checking Catalogue FITS files" 166 CLI.progOpts += [ 167 CLI.Option('b', "begin", 168 "first date to process, eg. 20050101", 169 "DATE", str(IngCuSession.beginDateDef)), 170 CLI.Option('e', "end", 171 "last date to process, eg. 20050131", 172 "DATE", str(IngCuSession.endDateDef)), 173 CLI.Option('o', "outpath", 174 "directory where the output data is written to", 175 "PATH", './'), 176 CLI.Option('v', "version", 177 "version number of the data", 178 "STR", ''), 179 CLI.Option('P', "programmes", "only process data for given " 180 "programmes (accepts keywords 'all', 'ns' (non-survey), " 181 "'ukidss' (all 5 main surveys)); one programme prefixed " 182 "with 'x-' excludes this programme.", "LIST", 'all'), 183 CLI.Option('V','verbose', 184 'Log all information')] 185 186 cli = CLI(CheckCatalogues, "$Revision: 8284 $") 187 Logger.addMessage(cli.getProgDetails()) 188 CheckCatalogues.isVerbose = cli.getOpt('verbose') 189 checker = CheckCatalogues(cli.getOpt("curator"), 190 cli.getArg("database"), 191 cli.getOpt("begin"), 192 cli.getOpt("end"), 193 cli.getOpt("version"), 194 cli.getOpt("programmes"), 195 cli.getOpt("outpath"), 196 cli.getOpt("test"), 197 cli.getArg("comment")) 198 checker.run() 199