Package invocations :: Package monitoring :: Module FindBrokenJpegs
[hide private]

Source Code for Module invocations.monitoring.FindBrokenJpegs

  1  #!/usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: FindBrokenJpegs.py 8858 2011-12-06 12:41:25Z EckhardSutorius $ 
  4  """ 
  5     Find JPGs that are not correctly created. 
  6   
  7     @author: E. Sutorius 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  from   numpy       import arange 
 12  from   collections import defaultdict 
 13  import inspect 
 14  import os 
 15   
 16  from   wsatools.CLI                    import CLI 
 17  from   wsatools.DbConnect.DbSession    import DbSession, Join 
 18  from   wsatools.File                   import File 
 19  from   wsatools.FitsUtils              import FitsList 
 20  from   wsatools.DbConnect.IngCuSession import IngCuSession 
 21  from   wsatools.Logger                 import Logger 
 22  #------------------------------------------------------------------------------ 
 23   
24 -class FindBrokenJpegs(IngCuSession):
25 """ 26 Find JPGs that are not correctly created. 27 """ 28 #-------------------------------------------------------------------------- 29 # Define class constants (access as FindBrokenJpegs.varName) 30 minFileSizeDef = 100000 31 32 #-------------------------------------------------------------------------- 33
34 - def __init__(self, 35 curator=CLI.getOptDef("curator"), 36 database=DbSession.database, 37 minFileSize=CLI.getOptDef("minsize"), 38 beginDate=CLI.getOptDef("begin"), 39 endDate=CLI.getOptDef("end"), 40 versionStr=CLI.getOptDef("version"), 41 diskList=CLI.getOptDef("disklist"), 42 deleteJpegs=CLI.getOptDef("remove"), 43 cu2Listing=CLI.getOptDef("cu2jpeg"), 44 isTrialRun=DbSession.isTrialRun, 45 comment=CLI.getArgDef("comment")):
46 """ 47 @param beginDate: First date to process, eg. 20050101. 48 @type beginDate: int 49 @param comment: Descriptive comment as to why curation task is 50 being performed. 51 @type comment: str 52 @param cu2Listing: Create CU2-readable output file. 53 @type cu2Listing: bool 54 @param deleteJpegs: If True, delete JPGs with file sizes smaller 55 than minFileSize. 56 @type deleteJpegs: bool 57 @param diskList: List of RAID disk paths including the 58 archive name, eg. '/disk01/wsa'. 59 @type diskList: list(str) 60 @param endDate: Last date to process, eg. 20050131. 61 @type endDate: int 62 @param isTrialRun: If True, do not perform database modifications. 63 @type isTrialRun: bool 64 @param minFileSize: File size below which reprocessing is needed. 65 @type minFileSize: int 66 @param versionStr: Version number of the data or 'all' or 'last'. 67 @type versionStr: str 68 69 """ 70 typeTranslation = {"curator":str, 71 "database":str, 72 "minFileSize":int, 73 "beginDate":str, 74 "endDate":str, 75 "versionStr":str, 76 "diskList":list, 77 "deleteJpegs":bool, 78 "cu2Listing":bool, 79 "isTrialRun":bool, 80 "comment":str} 81 82 super(FindBrokenJpegs, self).attributesFromArguments( 83 inspect.getargspec(FindBrokenJpegs.__init__)[0], locals(), 84 types=typeTranslation) 85 86 # Initialize parent class 87 super(FindBrokenJpegs, self).__init__(cuNum=0, 88 curator=self.curator, 89 comment=self.comment, 90 reqWorkDir=False, 91 database=self.database, 92 autoCommit=False, 93 isTrialRun=self.isTrialRun) 94 95 self.maxExtNum = FindBrokenJpegs.maxExtNum[self.sysc.loadDatabase] 96 self.obsCal = self.sysc.obsCal 97 self.diskList = self.sysc.availableRaidFileSystem()
98 99 #-------------------------------------------------------------------------- 100
101 - def _onRun(self):
102 """ Run each CU requsted. 103 """ 104 self.getSearchDates() 105 for semester in self.semList: 106 semBeginDate, semEndDate = \ 107 self.obsCal.getDates(semester, "%Y%m%d") 108 beginDate = max(self.beginDate, semBeginDate) 109 endDate = min(self.endDate, semEndDate) 110 versList = [] 111 if self.versionStr.replace('.', '').isdigit(): 112 versList.append(self.versionStr) 113 elif self.versionStr == "all": 114 versList.extend(self.obsCal.versNums[semester]) 115 else: 116 versList.append(self.obsCal.maxVers(semester)) 117 118 for versionStr in versList: 119 if self.cu2Listing: 120 self.fitsDirs = FitsList(self.sysc, prefix="fbj_") 121 self.fitsDirs.createFitsDateDict(beginDateStr=beginDate, 122 endDateStr=endDate, 123 versionStr=versionStr) 124 125 Logger.addMessage("Searching in %s: %s - %s v%d" % 126 (semester, beginDate, endDate, versionStr)) 127 firstDateVersStr = '%s_v%s' % (beginDate, versionStr) 128 lastDateVersStr = '%s_v%s' % (endDate, versionStr) 129 130 # create the dictionary listing all dates for every jpeg dir. 131 self.jpegDirs = FitsList(self.sysc, prefix='jpg') 132 self.jpegDirs.createFitsDateDict( 133 ingestDirectory=self.sysc.compressImDir, 134 beginDateStr=beginDate, endDateStr=endDate, 135 versionStr=versionStr, forceLists=True) 136 infix = ("Cu2" if self.cu2Listing else "") 137 outFile = File("brokenJpg%sList_%s_%s_%s_v%s.data" % ( 138 infix, semester, beginDate, endDate, versionStr)) 139 # search for JPGs datewise 140 shadyJpegs = defaultdict(list) 141 for dateVersStr in self.jpegDirs.invFitsDateDict: 142 shadyJpegs.update(self.searchJpegs(dateVersStr)) 143 144 # get byte counts for fits files 145 pcountDict = self.getPcounts( 146 firstDateVersStr, lastDateVersStr, versionStr) 147 148 brokenJpegs = defaultdict(list) 149 brokenJpegs = dict((k, shadyJpegs[k]) for k in list( 150 set(shadyJpegs.keys()).difference(pcountDict.keys()))) 151 152 outFile.wopen() 153 for jpgShort in sorted(brokenJpegs): 154 if self.cu2Listing: 155 outFile.writetheline(os.path.join( 156 self.fitsDirs.invFitsDateDict[ 157 jpgShort.partition('/')[0]], jpgShort + ".fit")) 158 else: 159 self._connectToDb() 160 for jpgName, jpgSize in brokenJpegs[jpgShort]: 161 progName = self.getProgName(jpgShort) 162 outFile.writetheline("%s, %d (%s)" % ( 163 jpgName, jpgSize, progName)) 164 self._disconnectFromDb() 165 166 outFile.close() 167 168 if self.deleteJpegs: 169 self.removeJpgs(brokenJpegs) 170 171 Logger.addMessage("Found %d broken JPGs in version %s." % ( 172 len(brokenJpegs), versionStr))
173 174 #-------------------------------------------------------------------------- 175
176 - def getProgName(self, fileNamePart):
177 selectStr = "distinct dfsIDString" 178 fromStr = "Multiframe, ProgrammeFrame, Programme" 179 whereStr = " AND ".join([ 180 "Multiframe.multiframeID=ProgrammeFrame.multiframeID", 181 "ProgrammeFrame.programmeID=Programme.programmeID", 182 "fileName like '%%%s%%'" % fileNamePart]) 183 if not self.isTrialRun: 184 progName = self.archive.query(selectStr, fromStr, whereStr)[0] 185 else: 186 print "SELECT ", selectStr 187 print "FROM ", fromStr 188 print "WHERE ", whereStr 189 progName = '' 190 191 return progName.upper().replace('U/UKIDSS/', '')
192 193 #-------------------------------------------------------------------------- 194
195 - def removeJpgs(self, brokenJpegs):
196 for jpgShort in sorted(brokenJpegs): 197 if self.isTrialRun: 198 for jpgName, jpgSize in brokenJpegs[jpgShort]: 199 Logger.addMessage( 200 "Will delete %s: %d" % (jpgName, jpgSize)) 201 else: 202 Logger.addMessage("Deleting %s_[1-%s].jpg" 203 % (jpgShort, self.sysc.maxHDUs-1)) 204 205 for extNum in range(1, self.sysc.maxHDUs): 206 jpgName = "%s_%d.jpg" % ( 207 brokenJpegs[jpgShort][0][0].rpartition('_')[0], extNum) 208 if os.path.exists(jpgName): 209 os.remove(jpgName)
210 211 #-------------------------------------------------------------------------- 212
213 - def searchJpegs(self, dateVersStr):
214 brokenDict = defaultdict(list) 215 for jpgDir in self.jpegDirs.invFitsDateDict[dateVersStr]: 216 jpegList = (list(self.jpegDirs.allFiles( 217 '*' + self.sysc.jpgType, os.path.join( 218 jpgDir, dateVersStr)))) 219 for fileName in jpegList: 220 if not 'dark' in fileName and "/e20" not in fileName \ 221 and os.stat(fileName).st_size < self.minFileSize: 222 jpgFile = File(fileName) 223 jpgShort = os.path.join( 224 jpgFile.subdir, jpgFile.root).rpartition('_')[0] 225 brokenDict[jpgShort].append( 226 (fileName, os.stat(fileName).st_size)) 227 return brokenDict
228 229 #-------------------------------------------------------------------------- 230
231 - def getPcounts(self, startDateStr, endDateStr, versionStr):
232 selectStr = "fileName, imagePcount" 233 fromStr = Join(["FlatFileLookup", "MultiframeDetector"], 234 ["multiframeID"]) 235 whereStr = " AND ".join([ 236 "fileName NOT LIKE '%dark%'", 237 "imagePcount between 1 and 500000", 238 "dateVersStr BETWEEN '%s' AND '%s'" % (startDateStr, endDateStr), 239 "dateVersStr LIKE '%%v%s'" % (versionStr)]) 240 241 pcountDict = defaultdict(dict) 242 if not self.isTrialRun: 243 self._connectToDb() 244 pcountList = self.archive.query(selectStr, fromStr, whereStr) 245 self._disconnectFromDb() 246 for fileName, imagePcount in pcountList: 247 fitsFile = File(fileName) 248 pcountDict[os.path.join(fitsFile.subdir, fitsFile.root)] = \ 249 imagePcount 250 del fitsFile 251 else: 252 print "SELECT ", selectStr 253 print "FROM ", fromStr.fromStr 254 print "WHERE %s AND %s" % (fromStr.whereStr, whereStr) 255 return pcountDict
256 257 #-------------------------------------------------------------------------- 258
259 - def getSearchDates(self):
260 beginSemester = self.obsCal.getSemList()[0] 261 endSemester = self.obsCal.getSemList()[-1] 262 if len(self.beginDate) >= 8: 263 beginSemester = self.obsCal.checkDate(self.beginDate) 264 else: 265 beginSemester = self.obsCal.formatSemester(self.beginDate) 266 self.beginDate = self.obsCal.getDates(beginSemester, "%Y%m%d")[0] 267 if len(self.endDate) >= 8: 268 endSemester = self.obsCal.checkDate(self.endDate) 269 else: 270 endSemester = self.obsCal.formatSemester(self.endDate) 271 self.endDate = self.obsCal.getDates(endSemester, "%Y%m%d")[1] 272 273 self.semList = [sem for sem in self.obsCal.getSemList() 274 if beginSemester <= sem <= endSemester]
275 276 277 278 #------------------------------------------------------------------------------ 279 # 280 # Entry point for DataBuilder 281 282 if __name__ == '__main__': 283 # Define additional command-line options 284 CLI.progArgs["comment"] = "Searching for broken JPGs." 285 CLI.progOpts += [ 286 CLI.Option('k', "disklist", 287 "list of RAID disk paths including the archive name, " 288 "eg. '/disk01/wsa,/disk02/wsa'.", 289 "LIST"), 290 CLI.Option('b', "begin", 291 "first date/semester to process", 292 "DATE", "05A", isValOK=CLI.isDateOK), 293 CLI.Option('e', "end", 294 "last date/semester to process", 295 "DATE", "08A", isValOK=CLI.isDateOK), 296 CLI.Option('v', "version", 297 "version number of the data or 'all', 'last'", 298 "STR", 'all'), 299 CLI.Option('s', "minsize", 300 "minimum file size below which reprocessing is needed.", 301 "INT", FindBrokenJpegs.minFileSizeDef), 302 CLI.Option('J', "cu2jpeg", 303 "create outfile as input file for CU2."), 304 CLI.Option('R', "remove", 305 "delete found JPGs.") 306 ] 307 cli = CLI(FindBrokenJpegs, "$Revision: 8858 $") 308 Logger.addMessage(cli.getProgDetails()) 309 310 finder = FindBrokenJpegs(cli.getOpt("curator"), 311 cli.getArg("database"), 312 cli.getOpt("minsize"), 313 cli.getOpt("begin"), 314 cli.getOpt("end"), 315 cli.getOpt("version"), 316 cli.getOpt("disklist"), 317 cli.getOpt("remove"), 318 cli.getOpt("cu2jpeg"), 319 cli.getOpt("test"), 320 cli.getArg("comment")) 321 finder.run() 322