Package helpers :: Module CheckFitsFiles
[hide private]

Source Code for Module helpers.CheckFitsFiles

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: CheckFitsFiles.py 10045 2013-09-06 12:51:49Z EckhardSutorius $ 
  4  """ 
  5     Reads MultiframeIDs and Timestamps from all FITS files for a given date 
  6     and report missing values. 
  7   
  8     @author: E. Sutorius 
  9     @org:    WFAU, IfA, University of Edinburgh 
 10   
 11     @newfield contributors: Contributors, Contributors (Alphabetical Order) 
 12     @contributors: R.S. Collins 
 13  """ 
 14  #------------------------------------------------------------------------------ 
 15  import dircache 
 16  import os 
 17  import time 
 18  import re 
 19   
 20  from   wsatools.CLI                 import CLI 
 21  from   wsatools.DbConnect.DbSession import DbSession 
 22  import wsatools.FitsUtils               as fits 
 23  from   wsatools.SystemConstants     import SystemConstants 
 24  #------------------------------------------------------------------------------ 
 25   
26 -class CheckFitsFiles(object):
27 """ Checks FITS files for missing multiframe IDs or Timestamps. 28 """ 29 #-------------------------------------------------------------------------- 30 # Define public member variable default values (access as obj.varName) 31 # these need to be set from command-line options 32 33 archive = None #: Database connection. 34 sysc = SystemConstants("WSA") 35 beginDate = SystemConstants("WSA").obsCal.getFirstLight("%Y%m%d") #: Start date. 36 endDate = SystemConstants("WSA").obsCal.getToday() #: End date. 37 versionStr = '1' #: File version numbers to check. 38 pattern = ".*\.fit\Z" #: regex pattern. 39 verbose = 0 #: verbosity level. 40 deleteKeys = False #: delete FITS keys in wrong header. 41 42 #-------------------------------------------------------------------------- 43
44 - def run(self):
45 """ Runs the FITS file check. 46 """ 47 # create a dictionary containing all dates 48 fitsList = fits.FitsList(self.sysc) 49 fitsList.createFitsDateDict() 50 fileList = [] 51 mfidDict = {} 52 REpattern = re.compile(r'%s' % self.pattern) 53 for direc in fitsList.fitsDateDict: 54 for datumVersStr in fitsList.fitsDateDict[direc]: 55 dateStr = datumVersStr[:datumVersStr.index('_')] 56 versStr = fitsList.getVersion(datumVersStr) 57 if (self.beginDate <= dateStr <= self.endDate) \ 58 and self.versionStr == versStr: 59 dirPath = os.path.join(direc, datumVersStr) 60 fileList.extend(os.path.join(dirPath, item) 61 for item in dircache.listdir(dirPath) 62 if REpattern.search(item)) 63 64 if self.archive: 65 mfidDict.update(self._readmfids(datumVersStr)) 66 67 if self.verbose > 0: 68 print("%r" % self.pattern) 69 print("Checking: %s" % ', '.join(fileList)) 70 zeroIDs = [] 71 diffIDs = [] 72 missingFiles = [] 73 if self.deleteKeys: 74 for fileName in fileList: 75 # get file creation date 76 modtime = time.strptime(time.ctime(os.stat(fileName).st_mtime)) 77 # read FTIS keywords 78 if "_tl" in fileName: 79 fptr = fits.open(fileName, "update") 80 phdr = fptr[0].header 81 try: 82 print "deleting...", fileName 83 del phdr["%s_MFID" % self.sysc.loadDatabase] 84 del phdr["%s_TIME" % self.sysc.loadDatabase] 85 except KeyError as error: 86 print("DEL: %s %s" % (fileName, error)) 87 fptr.flush() 88 fptr.close() 89 os.utime(fileName, (time.time(), modtime)) 90 91 for fileName in fileList: 92 # get file creation date 93 dirName = os.path.split(os.path.dirname(fileName))[1][2:8] 94 modtime = time.strptime(time.ctime(os.stat(fileName).st_mtime)) 95 fileTimestamp = dirName + time.strftime("%y%m%d%H%M%S", modtime) 96 # read FTIS keywords 97 extNum = (1 if "_tl" in fileName else 0) 98 try: 99 phdr = fits.open(fileName)[extNum].header 100 mfId, fileTimestamp = phdr["%s_MFID" % self.sysc.loadDatabase],\ 101 phdr["%s_TIME" % self.sysc.loadDatabase] 102 except KeyError as error: 103 print("%s %s" % (fileName, error)) 104 else: 105 try: 106 fileTimestamp = long(fileTimestamp) 107 except ValueError: 108 fileTimestamp = 0 109 if mfId == 0 or fileTimestamp == 0: 110 print('%s : %s, %s (file: %s)' % (fileName, mfId, 111 fileTimestamp, fileTimestamp)) 112 zeroIDs.append(fileName+'\n') 113 114 if self.archive: 115 try: 116 dbMfId, dbTimestamp = mfidDict[fileName] 117 except KeyError: 118 missingFiles.append(fileName) 119 print(fileName + " not in DB!") 120 else: 121 if dbMfId != mfId or fileTimestamp != dbTimestamp: 122 print('%s : %s, %s / DB: %s %s)' % (fileName, mfId, 123 fileTimestamp, dbMfId, dbTimestamp)) 124 diffIDs.append(fileName + '\n') 125 126 if zeroIDs: 127 file('zeroIDs.list', 'w').writelines(zeroIDs) 128 print("Files with no MfID are in ./zeroIDs.list") 129 if diffIDs: 130 file('diffIDs.list', 'w').writelines(diffIDs) 131 print("Files with different file/DB MfID are in ./diffIDs.list") 132 133 if not zeroIDs and not diffIDs: 134 print("Files are OK!")
135 136 #-------------------------------------------------------------------------- 137
138 - def _readmfids(self, dateVersStr):
139 """ Read fileName, multiframeIDs from the database. 140 """ 141 mfidlist = self.archive.query( 142 selectStr="fileName, multiframeID, fileTimeStamp", 143 fromStr="Multiframe", 144 whereStr="fileName like '%%%s%%'" % dateVersStr) 145 146 return dict((i[0].rpartition(':')[2], (i[1], i[2])) for i in mfidlist)
147 148 #------------------------------------------------------------------------------ 149 # 150 if __name__ == '__main__': 151 # Define command-line interface settings 152 CLI.progOpts += [ 153 CLI.Option("b", "begin", 154 "first date to process, e.g. 2004-04-01, or 20040401, or first " 155 "semester to process e.g. 05A_SV", 156 "DATE", isValOK=CLI.isDateOK), 157 CLI.Option("e", "end", 158 "last date to process, e.g. 2006-07-31, or 20060731, or last " 159 "semester to process e.g. 07A", 160 "DATE", isValOK=CLI.isDateOK), 161 CLI.Option("v", "version", 162 "version number of the data", "STR", CheckFitsFiles.versionStr), 163 CLI.Option("c", "checkdb", 164 "checks mfID and timestamp against values in DB"), 165 CLI.Option("D", "delete", 166 "delete _MFID/_TIME in wrong extension for tiles"), 167 CLI.Option("R", "regex", 168 "filename RegEx pattern: '*'=>'.*', begin;end=>'\A';'\Z', '.'=>'\.'", 169 "STR", CheckFitsFiles.pattern), 170 CLI.Option("V", "verbose", 171 "print out more info", "INT", CheckFitsFiles.verbose)] 172 173 cli = CLI(CheckFitsFiles, "$Revision: 10045 $") 174 task = CheckFitsFiles() 175 if cli.getOpt("checkdb"): 176 task.archive = DbSession(cli=cli) 177 178 task.sysc = SystemConstants(cli.getArg("database").split('.')[-1]) 179 cal = SystemConstants(cli.getArg("database").split('.')[-1]).obsCal 180 181 try: 182 task.beginDate, task.endDate = cal.getDatesFromInput( 183 cli.getOpt("begin"), cli.getOpt("end")) 184 except Exception as error: 185 raise SystemExit("Illegal Option: " + str(error)) 186 187 task.pattern = cli.getOpt("regex") 188 task.versionStr = cli.getOpt("version") 189 task.verbose = cli.getOpt("verbose") 190 task.deleteKeys = cli.getOpt("delete") 191 task.run() 192 193 #------------------------------------------------------------------------------ 194 # Change log: 195 # 196 # 30-Oct-2007, ETWS: first version; 197 # 14-Feb-2008, RSC: Updated for new DbSession i/f and greatly simplified and 198 # enhanced. 199