Package invocations :: Package monitoring :: Module CreateMfIdJpegList
[hide private]

Source Code for Module invocations.monitoring.CreateMfIdJpegList

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: CreateMfIdJpegList.py 8553 2011-08-05 11:42:18Z EckhardSutorius $ 
  4  """ 
  5     Reads multiframeIDs from the database and writes a list of 
  6     'multiframeID,extension number,compressed image file'. 
  7   
  8     @author: E. Sutorius 
  9     @org:    WFAU, IfA, University of Edinburgh 
 10  """ 
 11  #------------------------------------------------------------------------------ 
 12  from   collections import defaultdict 
 13  import dircache 
 14  import mx.DateTime     as mxTime 
 15  import os 
 16   
 17  from   wsatools.CLI                 import CLI 
 18  from   wsatools.DbConnect.DbSession import DbSession 
 19  from   wsatools.File                import File 
 20  import wsatools.FitsUtils               as fits 
 21  from   wsatools.Logger              import Logger 
 22  from   wsatools.SystemConstants     import SystemConstants 
 23  import wsatools.Utilities               as utils 
24 #------------------------------------------------------------------------------ 25 26 -class CreateMfIdJpegList(object):
27 """Reads multiframeIDs from the database and compiles file lists 28 depending on existing JPGs. 29 """
30 - def __init__(self, 31 database=DbSession.database, 32 checkDB=CLI.getOptDef("check"), 33 outPath=CLI.getOptDef("outpath"), 34 subDir=CLI.getOptDef("subdir"), 35 jpgPath=CLI.getOptDef("jpgpath"), 36 raidDisks=CLI.getOptDef("raiddisk"), 37 checkDeprecated=CLI.getOptDef("deprecated"), 38 beginDate=CLI.getArgDef("startdate"), 39 endDate=CLI.getArgDef("enddate"), 40 versionStr=CLI.getArgDef("version"), 41 inFileName=CLI.getArgDef("datafile")):
42 """ 43 @param beginDate: First date to process, eg. 20050101. 44 @type beginDate: int 45 @param checkDB: Check the DB for missing entries of compFile. 46 @type checkDB: bool 47 @param checkDeprecated: Check the DB for deprecated entries of compFile. 48 @type checkDeprecated: bool 49 @param database: Name of the database to connect to. 50 @type database: str 51 @param endDate: Last date to process, eg. 20050131. 52 @type endDate: int 53 @param inFileName: File containing multiframeID info. 54 @type inFileName: str 55 @param jpgPath: Directory where 'products/jpgs' will reside. 56 @type jpgPath: str 57 @param outPath: Path to directory for produced files 58 @type outPath: str 59 @param raidDisks: List of disks if not in standard search path 60 @type raidDisks: list 61 @param subDir: Fits subdirectory, eg. 'products/stacks' 62 @type subDir: str 63 @param versionStr: Version number of the data. 64 @type versionStr: str 65 66 """ 67 self.database = database 68 self.sysc = SystemConstants(self.database.rpartition('.')[2]) 69 beginDate, endDate = \ 70 self.sysc.obsCal.getDatesFromInput(beginDate, endDate) 71 72 self.startDateStr = beginDate 73 self.endDateStr = endDate 74 self.versionStr = versionStr 75 self.startMJD = int(mxTime.strptime(self.startDateStr, "%Y%m%d").mjd) 76 self.endMJD = int(mxTime.strptime(self.endDateStr, "%Y%m%d").mjd) 77 self.inFileName = (inFileName or None) 78 self.checkDB = checkDB 79 self.checkDeprecated = checkDeprecated 80 self.outPath = outPath 81 self.subDir = subDir 82 if raidDisks: 83 self.raidDisks = raidDisks.split(',') 84 else: 85 self.raidDisks = self.sysc.availableRaidFileSystem() 86 self.jpgPath = jpgPath
87 88 #-------------------------------------------------------------------------- 89
90 - def run(self):
91 """ 92 """ 93 procfiles = {} 94 notavailfiles = [] 95 dupliDict = defaultdict(set) 96 outFileTimeStamp = ''.join(["_", self.startDateStr, 97 "_", self.endDateStr, 98 "_", self.versionStr]) 99 100 outFileSuffix = ''.join(["_", self.database.rpartition('.')[2], 101 outFileTimeStamp]) 102 103 # list of dirs in which to search for fits files 104 if self.subDir: 105 fitsDirs = [self.subDir] 106 else: 107 fitsDirs = [self.sysc.fitsDir, self.sysc.stackDir, 108 self.sysc.mosaicDir, self.sysc.diffDir] 109 110 fitsList = fits.FitsList(self.sysc, prefix="CMJL_") 111 fitsList.createFitsDateDict(disklist=self.raidDisks, 112 ingestDirectory=fitsDirs, 113 beginDateStr=self.startDateStr, 114 endDateStr=self.endDateStr, 115 versionStr=self.versionStr) 116 self.jpegDateDict = self.createJpegDict() 117 118 for fitsDir in fitsList.fitsDateDict: 119 print "reading ", fitsDir 120 for datum in fitsList.fitsDateDict[fitsDir]: 121 # create a list of fits files for the give date 122 dateStr, versStr = datum.partition('_v')[::2] 123 if (self.startDateStr <= dateStr <= self.endDateStr) \ 124 and self.versionStr == versStr: 125 print "processing ", datum 126 fitsListing = dircache.listdir(os.path.join(fitsDir, datum)) 127 128 # create a list of catalogue files, exclude these files 129 # from the fitsList 130 tmpList = fitsListing[:] 131 for elem in tmpList: 132 if self.sysc.catSuffix+self.sysc.catType in elem \ 133 or self.sysc.mefType not in elem: 134 fitsListing.remove(elem) 135 136 #check if there is a jpg dir for this date 137 if datum in self.jpegDateDict: 138 # set up a name dict for jpgs 139 jpegDict = defaultdict(list) 140 for jpgDir in self.jpegDateDict[datum]: 141 filesPath = os.path.join(jpgDir, datum) 142 for elem in dircache.listdir(filesPath): 143 # get a dict for this date 144 name = elem[:elem.rfind("_")] 145 jpegDict[name].append( 146 os.path.join(filesPath, elem)) 147 dupliDict[os.path.join(datum, elem)].add( 148 jpgDir) 149 150 # check with fits list 151 for item in fitsListing: 152 name = item[:item.rfind(self.sysc.mefType)] 153 filePath = os.path.join(fitsDir, datum, item) 154 # is processed 155 if name in jpegDict: 156 procfiles[filePath] = jpegDict[name] 157 elif filePath not in notavailfiles: 158 notavailfiles.append(filePath) 159 160 # check for duplicate files 161 duplicates = False 162 for name in sorted(dupliDict): 163 if len(dupliDict[name]) > 1: 164 duplicates = True 165 break 166 if duplicates: 167 print "The following files exist in more than one directory:" 168 dupliFile = File(os.path.join( 169 self.outPath, "duplicatedjpgs%s.log" % outFileTimeStamp)) 170 dupliFile.wopen() 171 for name in sorted(dupliDict): 172 if len(dupliDict[name]) > 1: 173 print 174 print ":: %s ::" % name 175 for path in sorted(dupliDict[name]): 176 jpgName = os.path.join(path, name) 177 mtime = mxTime.DateTimeFromTicks( 178 os.path.getmtime(jpgName)) 179 dateStr = mtime.date + " %02d:%02d:%02d" % \ 180 (mtime.hour, mtime.minute, mtime.second) 181 print dateStr, jpgName 182 dupliFile.writetheline('%s, %s' % (dateStr, jpgName)) 183 dupliFile.close() 184 185 # check the database for missing jpgs 186 self.archive = DbSession(self.database) 187 188 missingJpgsDict = {} 189 if self.checkDB or self.checkDeprecated: 190 missingJpgsDict = self.checkForJpgs() 191 infoTxt = ' '.join([ 192 str(len(missingJpgsDict)), "files without jpegs in '", 193 self.database.rpartition('.')[2], ".Multiframe' from", 194 self.startDateStr, "to", self.endDateStr, 195 "for version", self.versionStr]) 196 if missingJpgsDict: 197 # write paths of files in the database into a file 198 jpgNotinDbFile = File(os.path.join( 199 self.outPath, "nojpgindb%s.list" % outFileSuffix)) 200 self.write2file(jpgNotinDbFile, missingJpgsDict) 201 infoTxt += " written to %s" % jpgNotinDbFile.name 202 print infoTxt 203 for entry in missingJpgsDict: 204 missingJpgsDict[entry] = missingJpgsDict[entry][0][0] 205 206 # make the mfid dictionary 207 if not self.inFileName: 208 mfidDict = self.getMfIDs() 209 infoTxt = ''.join([ 210 str(len(mfidDict)), " files in '", 211 self.database.rpartition('.')[2], ".Multiframe' from ", 212 self.startDateStr, " to ", self.endDateStr, 213 " for version ", self.versionStr]) 214 if mfidDict: 215 # write paths of files in the database into a file 216 indbFile = File(os.path.join( 217 self.outPath, "filesindb%s.list" % outFileSuffix)) 218 self.write2file(indbFile, mfidDict) 219 infoTxt += " written to %s" % indbFile.name 220 print infoTxt 221 else: 222 mfidDict = self.readMfidsFromFile() 223 224 mfidnotavailfiles = [] 225 # processed files 226 if procfiles: 227 prefix = ("depr" if self.jpgPath and "deprecated" in self.jpgPath 228 else '') 229 procFile = File(os.path.join( 230 self.outPath, ''.join([prefix, "jpgs", outFileSuffix, ".log"]))) 231 mfidnotavailfiles = self.updateProcessedData( 232 procfiles, (missingJpgsDict if self.checkDB \ 233 or self.checkDeprecated else mfidDict), 234 procFile) 235 236 if notavailfiles: 237 notAvailJpgFile = File(os.path.join( 238 self.outPath, "notavailjpgs%s.log" % outFileSuffix)) 239 240 print ' '.join([str(len(notavailfiles)), 241 "files with no available jpg are in", 242 notAvailJpgFile.name]) 243 notAvailJpgFile.wopen() 244 for elem in sorted(notavailfiles): 245 notAvailJpgFile.writetheline(elem) 246 if elem not in mfidDict: 247 mfidnotavailfiles.append(elem) 248 notAvailJpgFile.close() 249 250 if mfidnotavailfiles: 251 notAvailMfidFile = File(os.path.join( 252 self.outPath, "mfidnotavailfiles%s.log" % outFileSuffix)) 253 254 print ' '.join([str(len(mfidnotavailfiles)), 255 "files with no entry in the DB are in ", 256 notAvailMfidFile.name]) 257 notAvailMfidFile.wopen() 258 notAvailMfidFile.writelines(sorted(mfidnotavailfiles)) 259 notAvailMfidFile.close()
260 261 #-------------------------------------------------------------------------- 262 263 @staticmethod
264 - def updateJpegDict(jpegDict, jpgDir):
265 """ Update the jpgDict with files from the given directory. 266 """ 267 jpgDateList = [x for x in dircache.listdir(jpgDir) 268 if x.startswith('20') and '_v' in x and \ 269 not x.partition('.')[2].isalpha()] 270 for jpgDate in jpgDateList: 271 jpegDict[jpgDate].append(jpgDir) 272 return jpegDict
273 274 #-------------------------------------------------------------------------- 275
276 - def createJpegDict(self):
277 """ Create a dictionary of compressed image dates and their dirs. 278 """ 279 jdd = defaultdict(list) 280 if self.jpgPath: 281 if "disk" in self.jpgPath: 282 jdd = self.updateJpegDict(jdd, self.jpgPath) 283 else: 284 for direc in self.raidDisks: 285 jpgDirList = [] 286 if "deprecated" in self.jpgPath: 287 for x in os.listdir(os.path.join(direc, "products")): 288 if "deprecated" in x: 289 jpgDirList = os.listdir( 290 os.path.join(direc, "products", x)) 291 else: 292 jpgDirList = [os.path.join(direc, self.jpgPath)] 293 for jpgDir in jpgDirList: 294 if os.path.exists(jpgDir): 295 jdd = self.updateJpegDict(jdd, jpgDir) 296 else: 297 for direc in self.raidDisks: 298 jpgDir = os.path.join(direc, self.sysc.compressImDir) 299 jdd = self.updateJpegDict(jdd, jpgDir) 300 return jdd
301 302 #-------------------------------------------------------------------------- 303
304 - def checkForJpgs(self):
305 """ Read filename,multiframeIDs from the given database, where 306 no compName is given. 307 """ 308 dbJpgsDict = defaultdict(list) 309 for mjd in xrange(self.startMJD, self.endMJD+1): 310 date = mxTime.DateTimeFromMJD(mjd) 311 dateVersStr = "'%04d%02d%02d_v%s'" % ( 312 date.year, date.month, date.day, self.versionStr) 313 if not self.checkDeprecated: 314 compCheckStr = "compFile NOT LIKE '%disk%' AND" 315 else: 316 compCheckStr = "compFile NOT LIKE '%%%s%%' AND" % \ 317 self.sysc.deprecatedComprImDir 318 mfidlist = self.archive.query( 319 "F.fileName, F.multiframeID, MFD.compFile", 320 fromStr="MultiframeDetector as MFD, FlatFileLookUp as F", 321 whereStr="F.multiframeID=MFD.multiframeID AND " 322 "%s dateVersStr=%s" % (compCheckStr, dateVersStr)) 323 for entry in mfidlist: 324 dbJpgsDict[entry[0].rpartition(':')[2]].append( 325 (entry[1], entry[2].rpartition(':')[2])) 326 return dbJpgsDict
327 328 #-------------------------------------------------------------------------- 329
330 - def getMfIDs(self):
331 """ Read filename,multiframeIDs from the given database. 332 """ 333 mfidList = [] 334 for mjd in xrange(self.startMJD, self.endMJD+1): 335 date = mxTime.DateTimeFromMJD(mjd) 336 dateVersStr = "'%04d%02d%02d_v%s'" % ( 337 date.year, date.month, date.day, self.versionStr) 338 mfidList += self.archive.query( 339 "fileName, multiframeID", "FlatFileLookup", 340 "dateVersStr=" + dateVersStr) 341 342 return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList)
343 344 #-------------------------------------------------------------------------- 345
346 - def readMfidsFromFile(self):
347 """ Read filename,multiframeIDs from a file. 348 """ 349 mfidList = [line.split() for line in utils.ParsedFile(self.inFileName) 350 if ':' in line] 351 352 return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList)
353 354 #-------------------------------------------------------------------------- 355 356 @staticmethod
357 - def updateProcessedData(jpgedFiles, aDict, outFile):
358 """Write data into file. 359 """ 360 noMfIDFiles = [] 361 outFile.wopen() 362 prficounter = 0 363 for entry in jpgedFiles: 364 if entry in aDict: 365 mfid = aDict[entry] 366 for imag in jpgedFiles[entry]: 367 x = ','.join([str(mfid), 368 imag[imag.rfind('_')+1:imag.rfind('.')], 369 imag]) 370 outFile.writetheline(x) 371 prficounter += 1 372 else: 373 noMfIDFiles.append(entry) 374 outFile.close() 375 376 print ' '.join([str(prficounter), 377 "files' jpgs and their mfID written to", 378 outFile.name]) 379 380 return noMfIDFiles
381 382 #-------------------------------------------------------------------------- 383 384 @staticmethod
385 - def write2file(dbFile, jpgDict):
386 """type dbFile: File object 387 """ 388 dbFile.wopen() 389 for key in sorted(jpgDict): 390 dbFile.writetheline(key+", "+repr(jpgDict[key])) 391 dbFile.close()
392 393 #------------------------------------------------------------------------------ 394 # Entry point for script. 395 396 # Allow module to be imported as well as executed from the command line 397 if __name__ == "__main__": 398 399 # Define command-line interface settings for CreateMfIdJpegList 400 CLI.progArgs += [ 401 CLI.Argument("startdate", "05A", isValOK=CLI.isDateOK), 402 CLI.Argument("enddate", "05A", isValOK=CLI.isDateOK), 403 CLI.Argument("version", '1'), 404 CLI.Argument("datafile", None, isOptional=True) 405 ] 406 CLI.progOpts += [ 407 CLI.Option('c', 'check', 408 "checks the DB for missing entries of compFile"), 409 CLI.Option('d', 'deprecated', 410 "checks the DB for deprecated entries of compFile"), 411 CLI.Option('o', 'outpath', 412 "new destination for produced files", 413 "DIR", os.curdir), 414 CLI.Option('s', 'subdir', 415 "subdirectory containing FITS date directories", 416 "DIR", SystemConstants().fitsDir), 417 CLI.Option('j', 'jpgpath', 418 "path to non-standard jpg location", 419 "DIR", None), 420 CLI.Option('r', 'raiddisk', 421 "list of disk if not in standard search path", 422 "DIR", None) 423 ] 424 cli = CLI(CreateMfIdJpegList.__name__, "$Revision: 8553 $", 425 CreateMfIdJpegList.__doc__) 426 Logger.isVerbose = False 427 Logger.addMessage(cli.getProgDetails()) 428 429 CreateMfIdJpegList = CreateMfIdJpegList( 430 cli.getArg("database"), 431 cli.getOpt("check"), 432 cli.getOpt("outpath"), 433 cli.getOpt("subdir"), 434 cli.getOpt("jpgpath"), 435 cli.getOpt("raiddisk"), 436 cli.getOpt("deprecated"), 437 cli.getArg("startdate"), 438 cli.getArg("enddate"), 439 cli.getArg("version"), 440 cli.getArg("datafile")) 441 442 CreateMfIdJpegList.run() 443 444 #------------------------------------------------------------------------------ 445 # Change log: 446 # 447 # 17-Nov-2005, ETWS: first version 448 # 18-Jan-2006, ETWS: fixed bug, included output of a list of not 449 # created jpegs 450 # 19-Jan-2006, ETWS: fixed indentation error, added output of a list 451 # of files existing in the database and their mfID; 452 # checking the actually processed dir as well 453 # 8-Feb-2006, ETWS: extended the searched fits directories list 454 # 28-Feb-2006, ETWS: included function to check for missing jpegs in the DB 455 # 6-Mar-2006, ETWS: fixed output 456 # 7-Jun-2006, RSC: Replaced utils.sortList() with fast, new Python 2.4 method 457 # 10-Jul-2006, RSC: Replaced string module functions with str() equivalents as 458 # the string module with be obsoleted in Python 3.0 459 # 22-Feb-2007, RSC: Updated to reflect move of loadServerHost() constant from 460 # DbConstants to SystemConstants 461 # 01-May-2007, ETWS: Updated for include into cu0 462 # 09-May-2007, ETWS: Fixed missing spaces in output. 463 # 23-Nov-2007, ETWS: Added diffDir to list of searched fits dirs. 464 # 28-Nov-2007, ETWS: Reduced queries to date range; 465 # fixed UTDate range problem. 466 # 8-Jan-2008, ETWS: Restricted the RAID disk list to availableRaidFileSystem. 467 # 16-Apr-2008, ETWS: Added DB specifications to input and version info 468 # to output. 469 # 11-Aug-2008, ETWS: Included more flexibility to find deprecated JPEGs. 470 # 26-Feb-2008, ETWS: Included detection of duplicated JPGs. 471