Package invocations :: Package cu2 :: Module checkcu2
[hide private]

Source Code for Module invocations.cu2.checkcu2

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: checkcu2.py 6323 2009-11-09 22:39:17Z RossCollins $ 
  4  """ 
  5     Checks if files are processed by CU2. Gives back a list of files and a list 
  6     of completely non-processed directories. 
  7   
  8     @author: E. Sutorius 
  9     @org:    WFAU, IfA, University of Edinburgh 
 10  """ 
 11  #------------------------------------------------------------------------------ 
 12  import dircache as dc 
 13  import getopt 
 14  import os 
 15  import sys 
 16   
 17  import wsatools.CSV             as csv 
 18  from   wsatools.FitsUtils   import FitsList 
 19  from   wsatools.Logger      import Logger 
 20  import wsatools.SystemConstants as sysc 
 21  import wsatools.Utilities       as utils 
 22  #------------------------------------------------------------------------------ 
 23   
24 -def usage():
25 print "usage: checkcu2.py [-k/--disk disklist] [-d/--dir directory] [-M/--mosaic] [-r/--redo] startdate enddate version\n" 26 print "disklist = list of disks, eg. '/disk01/wsa,/disk02/wsa'; default is availableRaidFileSystem()." 27 print "directory = subdirectory, eg. 'products/stacks default is fin_dir()." 28 print "outpath = path where products/jpgs is created." 29 print "mosaic = forces making mosaics, may break if size is bigger than 2 GB." 30 print "redo = re-do the jpegs." 31 print "startdate is the start date of directories to check." 32 print "enddate is the end date of directories to check." 33 print "version is the version number of the data dir."
34 35 #------------------------------------------------------------------------------ 36
37 -def main(argv):
38 """Check if files are processed by CU2. 39 40 startdate is the start date of directories to check. 41 enddate is the end date of directories to check. 42 version is the version number of the data dir. 43 [disklist] is a csv list of disks to be processed 44 [directory] is the subdirectory, eg. 'products/stacks' 45 [outpath] path where products/jpgs is created 46 """ 47 48 # read the arguments 49 try: 50 opts, args = getopt.getopt(sys.argv[1:], "k:d:o:Mr", 51 ["disk=", "dir=", "outpath=", 52 "mosaic", "redo"]) 53 except getopt.GetoptError: 54 # print help information and exit: 55 print argv 56 usage() 57 raise SystemExit 58 59 disklist = sysc.availableRaidFileSystem() 60 directory = sysc.fitsDir() 61 jpgPath = None 62 forceMosaic = False 63 reDo = False 64 65 for o, a in opts: 66 if o in ("-k", "--disk"): 67 disklist = a.split(',') 68 if o in ("-d","--dir"): 69 directory = a 70 if o in ("-o", "--outpath"): 71 jpgPath = a 72 if o in ("-M","--mosaic"): 73 forceMosaic = True 74 if o in ("-r","--redo"): 75 reDo = True 76 77 if len(args) != 3: 78 print argv 79 usage() 80 raise SystemExit 81 82 searchcritlow = int(args[0]) 83 searchcrithigh = int(args[1]) 84 versionnr = int(args[2]) 85 86 disklist, subDir = ((disklist, "") if not directory.startswith("test") else 87 (sysc.developRaidFileSystem(), "test")) 88 89 cu2Path = os.path.join(sysc.curationCodePath(), "invocations/cu2") 90 91 # create a dictionary listing all dates for every fits dir. 92 fitsDirs = FitsList(prefix="cu2_") 93 fitsDirs.createFitsDateDict(disklist, directory) 94 fitsdatedict = fitsDirs.fitsDateDict 95 96 # create a dictionary listing all jpg dirs for every date. 97 jpegdatedict = {} 98 jpgDirList = [] 99 if jpgPath: 100 jpgDirList = [os.path.join(jpgPath, sysc.compressImDir())] 101 else: 102 for direc in sysc.availableRaidFileSystem(): 103 jpgDirList.append(os.path.join(direc, subDir, 104 sysc.compressImDir())) 105 106 for jpgDir in jpgDirList: 107 utils.ensureDirExist(jpgDir) 108 jpgdatelist = dc.listdir(jpgDir) 109 for jpgdate in jpgdatelist: 110 jpegdatedict.setdefault(jpgdate, []).append(jpgDir) 111 112 # go datewise through all fits dirs depending on the search criteria 113 notprocdirs =[] 114 notprocfiles = [] 115 procfiles = [] 116 catlist = [] 117 duplicatelist = [] 118 Logger.addMessage("%50s %6s %8s" %("date", "fits", "jpg/4")) 119 for direc in fitsdatedict.iterkeys(): 120 for datumVersStr in fitsdatedict[direc]: 121 date = int(datumVersStr[:datumVersStr.find('_')]) 122 vers = fitsDirs.getVersion(datumVersStr) 123 if (searchcritlow <= date <= searchcrithigh) and versionnr == vers: 124 fitslist = dc.listdir(os.path.join(direc, datumVersStr)) 125 126 # create a list of catalogue files, exclude these files 127 # from the fitslist 128 tmpl = fitslist[:] 129 for elem in tmpl: 130 if elem.find(sysc.catSuffix() + sysc.catType()) >= 0: 131 catlist.append(os.path.join(direc ,datumVersStr, elem)) 132 fitslist.remove(elem) 133 elif not elem.endswith(sysc.mefType()): 134 fitslist.remove(elem) 135 136 #check if there is a jpg dir for this date 137 if jpegdatedict.has_key(datumVersStr): 138 # set up a name dict for jpgs 139 jpegdict = {} 140 for jpgdir in jpegdatedict[datumVersStr]: 141 jpeglist = dc.listdir(os.path.join(jpgdir, 142 datumVersStr)) 143 144 # print out how much fits files there are per date 145 # and how much jpgs (divided by 4 (# of jpgs per fits)) 146 Logger.addMessage("%50s %6s %8s" % 147 (os.path.join(jpgdir, 148 datumVersStr), 149 len(fitslist), 150 len(jpeglist)/4.0)) 151 for elem in jpeglist: 152 name = elem[:elem.rfind("_")] 153 if jpegdict.has_key(name): 154 jpegdict[name] += 1 155 else: 156 jpegdict[name] = 1 157 158 # check with fits list 159 for item in fitslist: 160 name = item[:item.rfind(".fit")] 161 filepath = os.path.join(direc ,datumVersStr, item) 162 fpnum = notprocfiles.count(filepath) 163 if reDo: 164 notprocfiles.append(filepath) 165 # is large mosaic file 166 elif sysc.mosaicSuffix() in name and \ 167 name.split('_')[2] == "dp": 168 if forceMosaic: 169 notprocfiles.append(filepath) 170 else: 171 pass 172 # is not processed yet 173 elif not(jpegdict.has_key(name)) and \ 174 name.find("_cat")<0 and fpnum == 0: 175 notprocfiles.append(filepath) 176 # less than 4 extensions processed 177 elif jpegdict.has_key(name) and \ 178 jpegdict[name] < 4 and fpnum == 0: 179 notprocfiles.append(filepath) 180 elif jpegdict.has_key(name) and \ 181 jpegdict[name] > 4: 182 duplicatelist.append( 183 (jpegdatedict[datumVersStr], name, 184 jpegdict[name])) 185 186 # processed 187 else: 188 procfiles.append(filepath) 189 if fpnum > 0: 190 notprocfiles.remove(filepath) 191 else: 192 notprocdirs.append(os.path.join(direc, datumVersStr)) 193 # print out how much fits files there are per date 194 Logger.addMessage("%50s %6s %8s" % 195 (os.path.join(direc, datumVersStr), 196 len(fitslist), 0)) 197 198 # write not yet processed files to a log (can be used as 199 # input for CU2) 200 if notprocfiles: 201 filePath = \ 202 os.path.join(cu2Path, "reprocfiles_%s.log"%datumVersStr) 203 Logger.addMessage("%s files to be processed are in: %s" % 204 (len(notprocfiles), filePath)) 205 open(filePath,'w').writelines(f+'\n' for f in notprocfiles) 206 207 # unprocessed dirs 208 if notprocdirs: 209 filePath = \ 210 os.path.join(cu2Path, "notprocdirs_%s.log"% datumVersStr) 211 Logger.addMessage("%s dirs to be processed are in: %s" % 212 (len(notprocdirs), filePath)) 213 open(filePath,'w').writelines(f+'\n' for f in notprocdirs) 214 215 # processed files 216 if procfiles: 217 filePath = \ 218 os.path.join(cu2Path, "procfiles_%s.log" % datumVersStr) 219 Logger.addMessage("%s files processed are in: %s" % 220 (len(procfiles), filePath)) 221 open(filePath,'w').writelines(f + '\n' for f in procfiles) 222 223 Logger.addMessage("%s catalogue files" % len(catlist)) 224 225 # duplicate files 226 if duplicatelist: 227 log_time = utils.makeTimeStamp() 228 log_time = log_time[:log_time.find('.')].replace(' ','_') 229 dfname = "duplicatejpgs_"+log_time+".log" 230 Logger.addMessage("%s duplicate jpgs are in: %s" % 231 (len(duplicatelist), dfname)) 232 csv.File(dfname, 'w').writelines(duplicatelist)
233 234 #------------------------------------------------------------------------------ 235 # 236 if __name__ == '__main__': 237 main(sys.argv) 238 239 #------------------------------------------------------------------------------ 240 # Change log: 241 # 242 # 13-Jul-2005, ETWS: Original version. 243 # Included partly processed files. 244 # 8-Aug-2005, ETWS: Writing unprocessed dirs also into a file. 245 # 16-Aug-2005, ETWS: Write a list of processed files as well. 246 # 3-Oct-2005, ETWS: Included versioning of data dirs. 247 # 17-Nov-2005, ETWS: Taking care of multiple directories for one date; 248 # included parameter 'disklist'. 249 # 18-Jan-2006, ETWS: fixed bug, fixed wrong indentation 250 # 30-Jan-2006, ETWS: Included optional 'disk' and 'directory' parameters 251 # to list other dirs than 'ingest/fits' 252 # 28-Apr-2006, ETWS: Included check for test directories 253 # 10-May-2006, ETWS: Included check for duplicate jpgs 254 # 30-May-2006, ETWS: Added timestamp to duplicatejpgs log 255 # 5-Jul-2006, RSC: Upgraded to use new CSV.py module. 256 # 01-May-2007, ETWS: Updated for use with CU0 257 # 10-Jul-2007, ETWS: Updated for use with test cases. 258 # 22-Nov-2007, ETWS: Excluded large deep mosaic fits files from list. 259 # 29-Jan-2008, ETWS: Included possibility to force mosaic jpeg creation 260 # and to re-process existing jpegs. 261 # 30-Apr-2008, ETWS: Replaced massStorageRaidFileSystem with 262 # availableRaidFileSystem. 263 # 30-Jul-2008, ETWS: Enabled possibility to be run on multiple processors. 264