1   
  2   
  3   
  4  """ 
  5     Reads multiframeIDs from the database and writes a list of 
  6     'multiframeID,extension number,compressed image file'. 
  7   
  8     @author: E. Sutorius 
  9     @org:    WFAU, IfA, University of Edinburgh 
 10  """ 
 11   
 12  from   collections import defaultdict 
 13  import dircache 
 14  import mx.DateTime     as mxTime 
 15  import os 
 16   
 17  from   wsatools.CLI                 import CLI 
 18  from   wsatools.DbConnect.DbSession import DbSession 
 19  from   wsatools.File                import File 
 20  import wsatools.FitsUtils               as fits 
 21  from   wsatools.Logger              import Logger 
 22  from   wsatools.SystemConstants     import SystemConstants 
 23  import wsatools.Utilities               as utils 
 27      """Reads multiframeIDs from the database and compiles file lists 
 28         depending on existing JPGs. 
 29      """ 
 30 -    def __init__(self, 
 31                   database=DbSession.database, 
 32                   checkDB=CLI.getOptDef("check"), 
 33                   outPath=CLI.getOptDef("outpath"), 
 34                   subDir=CLI.getOptDef("subdir"), 
 35                   jpgPath=CLI.getOptDef("jpgpath"), 
 36                   raidDisks=CLI.getOptDef("raiddisk"), 
 37                   checkDeprecated=CLI.getOptDef("deprecated"), 
 38                   beginDate=CLI.getArgDef("startdate"), 
 39                   endDate=CLI.getArgDef("enddate"), 
 40                   versionStr=CLI.getArgDef("version"), 
 41                   inFileName=CLI.getArgDef("datafile")): 
  42          """ 
 43          @param beginDate:   First date to process, eg. 20050101. 
 44          @type  beginDate:   int 
 45          @param checkDB:     Check the DB for missing entries of compFile. 
 46          @type  checkDB:     bool 
 47          @param checkDeprecated: Check the DB for deprecated entries of compFile. 
 48          @type  checkDeprecated: bool 
 49          @param database:    Name of the database to connect to. 
 50          @type  database:    str 
 51          @param endDate:     Last date to process, eg. 20050131. 
 52          @type  endDate:     int 
 53          @param inFileName:  File containing multiframeID info. 
 54          @type  inFileName:  str 
 55          @param jpgPath:     Directory where 'products/jpgs' will reside. 
 56          @type  jpgPath:     str 
 57          @param outPath:     Path to directory for produced files 
 58          @type  outPath:     str 
 59          @param raidDisks:   List of disks if not in standard search path 
 60          @type  raidDisks:   list 
 61          @param subDir:      Fits subdirectory, eg. 'products/stacks' 
 62          @type  subDir:      str 
 63          @param versionStr:  Version number of the data. 
 64          @type  versionStr:  str 
 65   
 66          """ 
 67          self.database = database 
 68          self.sysc = SystemConstants(self.database.rpartition('.')[2]) 
 69          beginDate, endDate = \ 
 70            self.sysc.obsCal.getDatesFromInput(beginDate, endDate) 
 71   
 72          self.startDateStr = beginDate 
 73          self.endDateStr = endDate 
 74          self.versionStr = versionStr 
 75          self.startMJD = int(mxTime.strptime(self.startDateStr, "%Y%m%d").mjd) 
 76          self.endMJD = int(mxTime.strptime(self.endDateStr, "%Y%m%d").mjd) 
 77          self.inFileName = (inFileName or None) 
 78          self.checkDB = checkDB 
 79          self.checkDeprecated = checkDeprecated 
 80          self.outPath = outPath 
 81          self.subDir = subDir 
 82          if raidDisks: 
 83              self.raidDisks = raidDisks.split(',') 
 84          else: 
 85              self.raidDisks = self.sysc.availableRaidFileSystem() 
 86          self.jpgPath = jpgPath 
  87   
 88       
 89   
 91          """ 
 92          """ 
 93          procfiles = {} 
 94          notavailfiles = [] 
 95          dupliDict = defaultdict(set) 
 96          outFileTimeStamp = ''.join(["_", self.startDateStr, 
 97                                      "_", self.endDateStr, 
 98                                      "_", self.versionStr]) 
 99   
100          outFileSuffix = ''.join(["_", self.database.rpartition('.')[2], 
101                                   outFileTimeStamp]) 
102   
103           
104          if self.subDir: 
105              fitsDirs = [self.subDir] 
106          else: 
107              fitsDirs = [self.sysc.fitsDir, self.sysc.stackDir, 
108                          self.sysc.mosaicDir, self.sysc.diffDir] 
109   
110          fitsList = fits.FitsList(self.sysc, prefix="CMJL_") 
111          fitsList.createFitsDateDict(disklist=self.raidDisks, 
112                                      ingestDirectory=fitsDirs, 
113                                      beginDateStr=self.startDateStr, 
114                                      endDateStr=self.endDateStr, 
115                                      versionStr=self.versionStr) 
116          self.jpegDateDict = self.createJpegDict() 
117   
118          for fitsDir in fitsList.fitsDateDict: 
119              print "reading ", fitsDir 
120              for datum in fitsList.fitsDateDict[fitsDir]: 
121                   
122                  dateStr, versStr = datum.partition('_v')[::2] 
123                  if (self.startDateStr <= dateStr <= self.endDateStr) \ 
124                         and self.versionStr == versStr: 
125                      print "processing ", datum 
126                      fitsListing = dircache.listdir(os.path.join(fitsDir, datum)) 
127   
128                       
129                       
130                      tmpList = fitsListing[:] 
131                      for elem in tmpList: 
132                          if self.sysc.catSuffix+self.sysc.catType in elem \ 
133                                 or self.sysc.mefType not in elem: 
134                              fitsListing.remove(elem) 
135   
136                       
137                      if datum in self.jpegDateDict: 
138                           
139                          jpegDict = defaultdict(list) 
140                          for jpgDir in self.jpegDateDict[datum]: 
141                              filesPath = os.path.join(jpgDir, datum) 
142                              for elem in dircache.listdir(filesPath): 
143                                   
144                                  name = elem[:elem.rfind("_")] 
145                                  jpegDict[name].append( 
146                                      os.path.join(filesPath, elem)) 
147                                  dupliDict[os.path.join(datum, elem)].add( 
148                                      jpgDir) 
149   
150                           
151                          for item in fitsListing: 
152                              name = item[:item.rfind(self.sysc.mefType)] 
153                              filePath = os.path.join(fitsDir, datum, item) 
154                               
155                              if name in jpegDict: 
156                                  procfiles[filePath] = jpegDict[name] 
157                              elif filePath not in notavailfiles: 
158                                  notavailfiles.append(filePath) 
159   
160           
161          duplicates = False 
162          for name in sorted(dupliDict): 
163              if len(dupliDict[name]) > 1: 
164                  duplicates = True 
165                  break 
166          if duplicates: 
167              print "The following files exist in more than one directory:" 
168              dupliFile = File(os.path.join( 
169                  self.outPath, "duplicatedjpgs%s.log" % outFileTimeStamp)) 
170              dupliFile.wopen() 
171              for name in sorted(dupliDict): 
172                  if len(dupliDict[name]) > 1: 
173                      print 
174                      print ":: %s ::" % name 
175                      for path in sorted(dupliDict[name]): 
176                          jpgName = os.path.join(path, name) 
177                          mtime = mxTime.DateTimeFromTicks( 
178                              os.path.getmtime(jpgName)) 
179                          dateStr = mtime.date + " %02d:%02d:%02d" % \ 
180                                    (mtime.hour, mtime.minute, mtime.second) 
181                          print dateStr, jpgName 
182                          dupliFile.writetheline('%s, %s' % (dateStr, jpgName)) 
183              dupliFile.close() 
184   
185           
186          self.archive = DbSession(self.database) 
187   
188          missingJpgsDict = {} 
189          if self.checkDB or self.checkDeprecated: 
190              missingJpgsDict = self.checkForJpgs() 
191              infoTxt = ' '.join([ 
192                  str(len(missingJpgsDict)), "files without jpegs in '", 
193                  self.database.rpartition('.')[2], ".Multiframe' from", 
194                  self.startDateStr, "to", self.endDateStr, 
195                  "for version", self.versionStr]) 
196              if missingJpgsDict: 
197                   
198                  jpgNotinDbFile = File(os.path.join( 
199                      self.outPath, "nojpgindb%s.list" % outFileSuffix)) 
200                  self.write2file(jpgNotinDbFile, missingJpgsDict) 
201                  infoTxt += " written to %s" % jpgNotinDbFile.name 
202              print infoTxt 
203              for entry in missingJpgsDict: 
204                  missingJpgsDict[entry] = missingJpgsDict[entry][0][0] 
205   
206           
207          if not self.inFileName: 
208              mfidDict = self.getMfIDs() 
209              infoTxt = ''.join([ 
210                  str(len(mfidDict)), " files in '", 
211                  self.database.rpartition('.')[2], ".Multiframe' from ", 
212                  self.startDateStr, " to ", self.endDateStr, 
213                  " for version ", self.versionStr]) 
214              if mfidDict: 
215                   
216                  indbFile = File(os.path.join( 
217                      self.outPath, "filesindb%s.list" % outFileSuffix)) 
218                  self.write2file(indbFile, mfidDict) 
219                  infoTxt += " written to %s" % indbFile.name 
220              print infoTxt 
221          else: 
222              mfidDict = self.readMfidsFromFile() 
223   
224          mfidnotavailfiles = [] 
225           
226          if procfiles: 
227              prefix = ("depr" if self.jpgPath and "deprecated" in self.jpgPath 
228                        else '') 
229              procFile = File(os.path.join( 
230                  self.outPath, ''.join([prefix, "jpgs", outFileSuffix, ".log"]))) 
231              mfidnotavailfiles = self.updateProcessedData( 
232                  procfiles, (missingJpgsDict if self.checkDB \ 
233                              or self.checkDeprecated else mfidDict), 
234                  procFile) 
235   
236          if notavailfiles: 
237              notAvailJpgFile = File(os.path.join( 
238                  self.outPath, "notavailjpgs%s.log" % outFileSuffix)) 
239   
240              print ' '.join([str(len(notavailfiles)), 
241                              "files with no available jpg are in", 
242                              notAvailJpgFile.name]) 
243              notAvailJpgFile.wopen() 
244              for elem in sorted(notavailfiles): 
245                  notAvailJpgFile.writetheline(elem) 
246                  if elem not in mfidDict: 
247                      mfidnotavailfiles.append(elem) 
248              notAvailJpgFile.close() 
249   
250          if mfidnotavailfiles: 
251              notAvailMfidFile = File(os.path.join( 
252                  self.outPath, "mfidnotavailfiles%s.log" % outFileSuffix)) 
253   
254              print ' '.join([str(len(mfidnotavailfiles)), 
255                              "files with no entry in the DB are in ", 
256                              notAvailMfidFile.name]) 
257              notAvailMfidFile.wopen() 
258              notAvailMfidFile.writelines(sorted(mfidnotavailfiles)) 
259              notAvailMfidFile.close() 
 260   
261       
262   
263      @staticmethod 
265          """ Update the jpgDict with files from the given directory. 
266          """ 
267          jpgDateList = [x for x in dircache.listdir(jpgDir) 
268                         if x.startswith('20') and '_v' in x and \ 
269                         not x.partition('.')[2].isalpha()] 
270          for jpgDate in jpgDateList: 
271              jpegDict[jpgDate].append(jpgDir) 
272          return jpegDict 
 273   
274       
275   
277          """ Create a dictionary of compressed image dates and their dirs. 
278          """ 
279          jdd = defaultdict(list) 
280          if self.jpgPath: 
281              if "disk" in self.jpgPath: 
282                  jdd = self.updateJpegDict(jdd, self.jpgPath) 
283              else: 
284                  for direc in self.raidDisks: 
285                      jpgDirList = [] 
286                      if "deprecated" in self.jpgPath: 
287                          for x in os.listdir(os.path.join(direc, "products")): 
288                              if "deprecated" in x: 
289                                  jpgDirList = os.listdir( 
290                                      os.path.join(direc, "products", x)) 
291                      else: 
292                          jpgDirList = [os.path.join(direc, self.jpgPath)] 
293                      for jpgDir in jpgDirList: 
294                          if os.path.exists(jpgDir): 
295                              jdd = self.updateJpegDict(jdd, jpgDir) 
296          else: 
297              for direc in self.raidDisks: 
298                  jpgDir = os.path.join(direc, self.sysc.compressImDir) 
299                  jdd = self.updateJpegDict(jdd, jpgDir) 
300          return jdd 
 301   
302       
303   
305          """ Read filename,multiframeIDs from the given database, where 
306              no compName is given. 
307          """ 
308          dbJpgsDict = defaultdict(list) 
309          for mjd in xrange(self.startMJD, self.endMJD+1): 
310              date = mxTime.DateTimeFromMJD(mjd) 
311              dateVersStr = "'%04d%02d%02d_v%s'" % ( 
312                  date.year, date.month, date.day, self.versionStr) 
313              if not self.checkDeprecated: 
314                  compCheckStr = "compFile NOT LIKE '%disk%' AND" 
315              else: 
316                  compCheckStr = "compFile NOT LIKE '%%%s%%' AND" % \ 
317                                 self.sysc.deprecatedComprImDir 
318              mfidlist = self.archive.query( 
319                  "F.fileName, F.multiframeID, MFD.compFile", 
320                  fromStr="MultiframeDetector as MFD, FlatFileLookUp as F", 
321                  whereStr="F.multiframeID=MFD.multiframeID AND " 
322                           "%s dateVersStr=%s" % (compCheckStr, dateVersStr)) 
323              for entry in mfidlist: 
324                  dbJpgsDict[entry[0].rpartition(':')[2]].append( 
325                      (entry[1], entry[2].rpartition(':')[2])) 
326          return dbJpgsDict 
 327   
328       
329   
331          """ Read filename,multiframeIDs from the given database. 
332          """ 
333          mfidList = [] 
334          for mjd in xrange(self.startMJD, self.endMJD+1): 
335              date = mxTime.DateTimeFromMJD(mjd) 
336              dateVersStr = "'%04d%02d%02d_v%s'" % ( 
337                  date.year, date.month, date.day, self.versionStr) 
338              mfidList += self.archive.query( 
339                  "fileName, multiframeID", "FlatFileLookup", 
340                  "dateVersStr=" + dateVersStr) 
341   
342          return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList) 
 343   
344       
345   
347          """ Read filename,multiframeIDs from a file. 
348          """ 
349          mfidList = [line.split() for line in utils.ParsedFile(self.inFileName) 
350                      if ':' in line] 
351   
352          return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList) 
 353   
354       
355   
356      @staticmethod 
358          """Write data into file. 
359          """ 
360          noMfIDFiles = [] 
361          outFile.wopen() 
362          prficounter = 0 
363          for entry in jpgedFiles: 
364              if entry in aDict: 
365                  mfid = aDict[entry] 
366                  for imag in jpgedFiles[entry]: 
367                      x = ','.join([str(mfid), 
368                                    imag[imag.rfind('_')+1:imag.rfind('.')], 
369                                    imag]) 
370                      outFile.writetheline(x) 
371                      prficounter += 1 
372              else: 
373                  noMfIDFiles.append(entry) 
374          outFile.close() 
375   
376          print ' '.join([str(prficounter), 
377                          "files' jpgs and their mfID written to", 
378                          outFile.name]) 
379   
380          return noMfIDFiles 
 381   
382       
383   
384      @staticmethod 
386          """type dbFile:  File object 
387          """ 
388          dbFile.wopen() 
389          for key in sorted(jpgDict): 
390              dbFile.writetheline(key+", "+repr(jpgDict[key])) 
391          dbFile.close() 
  392   
393   
394   
395   
396   
397  if __name__ == "__main__": 
398   
399       
400      CLI.progArgs += [ 
401          CLI.Argument("startdate", "05A", isValOK=CLI.isDateOK), 
402          CLI.Argument("enddate", "05A", isValOK=CLI.isDateOK), 
403          CLI.Argument("version", '1'), 
404          CLI.Argument("datafile", None, isOptional=True) 
405          ] 
406      CLI.progOpts += [ 
407          CLI.Option('c', 'check', 
408                     "checks the DB for missing entries of compFile"), 
409          CLI.Option('d', 'deprecated', 
410                     "checks the DB for deprecated entries of compFile"), 
411          CLI.Option('o', 'outpath', 
412                     "new destination for produced files", 
413                     "DIR", os.curdir), 
414          CLI.Option('s', 'subdir', 
415                     "subdirectory containing FITS date directories", 
416                     "DIR", SystemConstants().fitsDir), 
417          CLI.Option('j', 'jpgpath', 
418                     "path to non-standard jpg location", 
419                     "DIR", None), 
420          CLI.Option('r', 'raiddisk', 
421                     "list of disk if not in standard search path", 
422                     "DIR", None) 
423          ] 
424      cli = CLI(CreateMfIdJpegList.__name__, "$Revision: 8553 $", 
425                CreateMfIdJpegList.__doc__) 
426      Logger.isVerbose = False 
427      Logger.addMessage(cli.getProgDetails()) 
428   
429      CreateMfIdJpegList = CreateMfIdJpegList( 
430          cli.getArg("database"), 
431          cli.getOpt("check"), 
432          cli.getOpt("outpath"), 
433          cli.getOpt("subdir"), 
434          cli.getOpt("jpgpath"), 
435          cli.getOpt("raiddisk"), 
436          cli.getOpt("deprecated"), 
437          cli.getArg("startdate"), 
438          cli.getArg("enddate"), 
439          cli.getArg("version"), 
440          cli.getArg("datafile")) 
441   
442      CreateMfIdJpegList.run() 
443   
444   
445   
446   
447   
448   
449   
450   
451   
452   
453   
454   
455   
456   
457   
458   
459   
460   
461   
462   
463   
464   
465   
466   
467   
468   
469   
470   
471