invocations.cu2.checkcu2

38 """Check if files are processed by CU2. 39 40 startdate is the start date of directories to check. 41 enddate is the end date of directories to check. 42 version is the version number of the data dir. 43 [disklist] is a csv list of disks to be processed 44 [directory] is the subdirectory, eg. 'products/stacks' 45 [outpath] path where products/jpgs is created 46 """ 47 48 # read the arguments 49 try: 50 opts, args = getopt.getopt(sys.argv[1:], "k:d:o:Mr", 51 ["disk=", "dir=", "outpath=", 52 "mosaic", "redo"]) 53 except getopt.GetoptError: 54 # print help information and exit: 55 print argv 56 usage() 57 raise SystemExit 58 59 disklist = sysc.availableRaidFileSystem() 60 directory = sysc.fitsDir() 61 jpgPath = None 62 forceMosaic = False 63 reDo = False 64 65 for o, a in opts: 66 if o in ("-k", "--disk"): 67 disklist = a.split(',') 68 if o in ("-d","--dir"): 69 directory = a 70 if o in ("-o", "--outpath"): 71 jpgPath = a 72 if o in ("-M","--mosaic"): 73 forceMosaic = True 74 if o in ("-r","--redo"): 75 reDo = True 76 77 if len(args) != 3: 78 print argv 79 usage() 80 raise SystemExit 81 82 searchcritlow = int(args[0]) 83 searchcrithigh = int(args[1]) 84 versionnr = int(args[2]) 85 86 disklist, subDir = ((disklist, "") if not directory.startswith("test") else 87 (sysc.developRaidFileSystem(), "test")) 88 89 cu2Path = os.path.join(sysc.curationCodePath(), "invocations/cu2") 90 91 # create a dictionary listing all dates for every fits dir. 92 fitsDirs = FitsList(prefix="cu2_") 93 fitsDirs.createFitsDateDict(disklist, directory) 94 fitsdatedict = fitsDirs.fitsDateDict 95 96 # create a dictionary listing all jpg dirs for every date. 97 jpegdatedict = {} 98 jpgDirList = [] 99 if jpgPath: 100 jpgDirList = [os.path.join(jpgPath, sysc.compressImDir())] 101 else: 102 for direc in sysc.availableRaidFileSystem(): 103 jpgDirList.append(os.path.join(direc, subDir, 104 sysc.compressImDir())) 105 106 for jpgDir in jpgDirList: 107 utils.ensureDirExist(jpgDir) 108 jpgdatelist = dc.listdir(jpgDir) 109 for jpgdate in jpgdatelist: 110 jpegdatedict.setdefault(jpgdate, []).append(jpgDir) 111 112 # go datewise through all fits dirs depending on the search criteria 113 notprocdirs =[] 114 notprocfiles = [] 115 procfiles = [] 116 catlist = [] 117 duplicatelist = [] 118 Logger.addMessage("%50s %6s %8s" %("date", "fits", "jpg/4")) 119 for direc in fitsdatedict.iterkeys(): 120 for datumVersStr in fitsdatedict[direc]: 121 date = int(datumVersStr[:datumVersStr.find('_')]) 122 vers = fitsDirs.getVersion(datumVersStr) 123 if (searchcritlow <= date <= searchcrithigh) and versionnr == vers: 124 fitslist = dc.listdir(os.path.join(direc, datumVersStr)) 125 126 # create a list of catalogue files, exclude these files 127 # from the fitslist 128 tmpl = fitslist[:] 129 for elem in tmpl: 130 if elem.find(sysc.catSuffix() + sysc.catType()) >= 0: 131 catlist.append(os.path.join(direc ,datumVersStr, elem)) 132 fitslist.remove(elem) 133 elif not elem.endswith(sysc.mefType()): 134 fitslist.remove(elem) 135 136 #check if there is a jpg dir for this date 137 if jpegdatedict.has_key(datumVersStr): 138 # set up a name dict for jpgs 139 jpegdict = {} 140 for jpgdir in jpegdatedict[datumVersStr]: 141 jpeglist = dc.listdir(os.path.join(jpgdir, 142 datumVersStr)) 143 144 # print out how much fits files there are per date 145 # and how much jpgs (divided by 4 (# of jpgs per fits)) 146 Logger.addMessage("%50s %6s %8s" % 147 (os.path.join(jpgdir, 148 datumVersStr), 149 len(fitslist), 150 len(jpeglist)/4.0)) 151 for elem in jpeglist: 152 name = elem[:elem.rfind("_")] 153 if jpegdict.has_key(name): 154 jpegdict[name] += 1 155 else: 156 jpegdict[name] = 1 157 158 # check with fits list 159 for item in fitslist: 160 name = item[:item.rfind(".fit")] 161 filepath = os.path.join(direc ,datumVersStr, item) 162 fpnum = notprocfiles.count(filepath) 163 if reDo: 164 notprocfiles.append(filepath) 165 # is large mosaic file 166 elif sysc.mosaicSuffix() in name and \ 167 name.split('_')[2] == "dp": 168 if forceMosaic: 169 notprocfiles.append(filepath) 170 else: 171 pass 172 # is not processed yet 173 elif not(jpegdict.has_key(name)) and \ 174 name.find("_cat")<0 and fpnum == 0: 175 notprocfiles.append(filepath) 176 # less than 4 extensions processed 177 elif jpegdict.has_key(name) and \ 178 jpegdict[name] < 4 and fpnum == 0: 179 notprocfiles.append(filepath) 180 elif jpegdict.has_key(name) and \ 181 jpegdict[name] > 4: 182 duplicatelist.append( 183 (jpegdatedict[datumVersStr], name, 184 jpegdict[name])) 185 186 # processed 187 else: 188 procfiles.append(filepath) 189 if fpnum > 0: 190 notprocfiles.remove(filepath) 191 else: 192 notprocdirs.append(os.path.join(direc, datumVersStr)) 193 # print out how much fits files there are per date 194 Logger.addMessage("%50s %6s %8s" % 195 (os.path.join(direc, datumVersStr), 196 len(fitslist), 0)) 197 198 # write not yet processed files to a log (can be used as 199 # input for CU2) 200 if notprocfiles: 201 filePath = \ 202 os.path.join(cu2Path, "reprocfiles_%s.log"%datumVersStr) 203 Logger.addMessage("%s files to be processed are in: %s" % 204 (len(notprocfiles), filePath)) 205 open(filePath,'w').writelines(f+'\n' for f in notprocfiles) 206 207 # unprocessed dirs 208 if notprocdirs: 209 filePath = \ 210 os.path.join(cu2Path, "notprocdirs_%s.log"% datumVersStr) 211 Logger.addMessage("%s dirs to be processed are in: %s" % 212 (len(notprocdirs), filePath)) 213 open(filePath,'w').writelines(f+'\n' for f in notprocdirs) 214 215 # processed files 216 if procfiles: 217 filePath = \ 218 os.path.join(cu2Path, "procfiles_%s.log" % datumVersStr) 219 Logger.addMessage("%s files processed are in: %s" % 220 (len(procfiles), filePath)) 221 open(filePath,'w').writelines(f + '\n' for f in procfiles) 222 223 Logger.addMessage("%s catalogue files" % len(catlist)) 224 225 # duplicate files 226 if duplicatelist: 227 log_time = utils.makeTimeStamp() 228 log_time = log_time[:log_time.find('.')].replace(' ','_') 229 dfname = "duplicatejpgs_"+log_time+".log" 230 Logger.addMessage("%s duplicate jpgs are in: %s" % 231 (len(duplicatelist), dfname)) 232 csv.File(dfname, 'w').writelines(duplicatelist)

Source Code for Module invocations.cu2.checkcu2