helpers.TestProductionFrames

Source Code for Module helpers.TestProductionFrames

1 #! /usr/bin/env python 2 #------------------------------------------------------------------------------ 3 #$Id: TestProductionFrames.py 7245 2010-07-20 12:52:03Z RossCollins $ 4 """ 5 Tests production frame IDs of all FITS files against entries in the 6 database. 7 8 @author: E. Sutorius 9 @org: WFAU, IfA, University of Edinburgh 10 """ 11 #------------------------------------------------------------------------------ 12 from __future__ import division, print_function 13 14 from collections import defaultdict 15 import dircache 16 import os 17 import time 18 19 from wsatools.CLI import CLI 20 import wsatools.CSV as csv 21 import wsatools.DbConnect.DbConstants as dbc 22 from wsatools.DbConnect.DbSession import DbSession 23 import wsatools.FitsUtils as fits 24 import wsatools.SystemConstants as sysc 25 import wsatools.Utilities as utils 26 #------------------------------------------------------------------------------ 27

28 -def checkforprodf(cli, xdates):

29 """ ? 30 """ 31 db = DbSession(cli=cli) 32 versionnr = cli.getArg("version") 33 beginDate = int(cli.getArg("beginDate")) 34 endDate = int(cli.getArg("endDate")) 35 bd = str(beginDate * 1000000000000L - 1)[2:] 36 ed = str((endDate + 1) * 1000000000000L)[2:] 37 38 # get file and id list from db for given dates 39 attrs = "fileName,multiframeID,darkID,confID,flatID,frinID,skyID,maskID" 40 mfidlist = db.query(attrs, "Multiframe", 41 whereStr="fileTimeStamp > '%s' AND fileTimeStamp <'%s'" % (bd, ed) 42 + " AND fileName LIKE '%_v" + versionnr + "/%'") 43 44 # build the mfdict dictionary 45 mfl = [] 46 for ent in mfidlist: 47 # the filename key <date>/<filename> 48 fns = os.path.join(os.path.basename(os.path.dirname(ent[0])), 49 os.path.basename(ent[0])) 50 # build the dict entry: 51 # first the full filename without the host 52 mft = [ent[0].rpartition(':')[2]] 53 # then all prod file ids 54 for j in ent[1:]: 55 mft.append(j) 56 mfl.append((fns,tuple(mft))) 57 mfdict = dict(mfl) 58 59 # find other dates used from the given dates 60 mflsub = [(os.path.basename(i[0]),os.path.basename(os.path.dirname(i[0]))) 61 for i in mfidlist 62 if not os.path.basename(i[0]).startswith(sysc.casuPrefix())] 63 64 # find other dates used from the given dates 65 xd = xdates 66 67 for f in mflsub: 68 if f[0].find('_',f[0].find('20')) >= 0: 69 date = f[0][f[0].find('20'):f[0].find('_',f[0].find('20'))] 70 if not (beginDate <= int(date) <= endDate): 71 xd.append(date+"_v"+versionnr) 72 73 # query db for production files of those dates 74 for date in sorted(set(xd)): 75 mfidlist = db.query(attrs, "Multiframe", 76 whereStr="fileName LIKE '%"+date+"%' AND fileName NOT LIKE '%/w20%'") 77 78 mfld = [] 79 for ent in mfidlist: 80 fns = os.path.join(os.path.basename(os.path.dirname(ent[0])), 81 os.path.basename(ent[0])) 82 mft = [ent[0].rpartition(':')[2]] 83 for j in ent[1:]: 84 mft.append(j) 85 mfld.append((fns,tuple(mft))) 86 87 # append new results to existing mf list and dict 88 mfl += mfld 89 mfdict.update(dict(mfld)) 90 91 if mfl: 92 # write paths of files in the database into a file 93 filename = os.path.join(cli.getOpt("proddir"), 94 "prodframeindb"+"_"+db+"_"+str(beginDate)+"_"+str(endDate)+"_" 95 +versionnr+".list") 96 97 lines = ("%s, %r\n" % entry for entry in sorted(mfl)) 98 open(filename, 'w').writelines(lines) 99 100 return mfdict

101 102 #------------------------------------------------------------------------------ 103

104 -def getprodfits(beginDate,endDate,versionnr):

105 # list of dirs in which to search for fits files 106 disks = sysc.availableRaidFileSystem() 107 fitsDirs = [sysc.fitsDir(), sysc.stackDir(), sysc.mosaicDir()] 108 fitsdict = defaultdict(dict) 109 for dirname, dateDirs in fits.getAllPaths(disks, fitsDirs): 110 print("reading " + dirname) 111 for datum in dateDirs: 112 # create a list of fits files for the give date 113 date = int(datum[:datum.find('_')]) 114 vers = datum[datum.find('_')+2:] 115 filedict = {} 116 if (beginDate <= date <= endDate) and versionnr == vers: 117 print("processing ",datum) 118 dirPath = os.path.join(dirname,datum) 119 120 # create a list of catalogue files, exclude these files 121 # from the fitslist 122 print(" processing files in ", datum) 123 d1 = time.time() 124 for files in dircache.listdir(dirPath): 125 if "_cat.fits" not in files: 126 fileshortpath = os.path.join(datum,files) 127 filepath = os.path.join(dirPath,files) 128 filedict[fileshortpath] = readhdu(filepath) 129 fitsdict[date].update(filedict) 130 d2 = time.time() 131 print((d2 - d1) / 60) 132 133 return fitsdict

134 135 #------------------------------------------------------------------------------ 136

137 -def writeprodfits(fitsdict, proddir):

138 for key, item in fitsdict.iteritems(): 139 fitslistfile = "fitsprod_%s.list" % key 140 lines = ("%s, %r\n" % entry for entry in item.iteritems()) 141 open(os.path.join(proddir, fitslistfile), 'w').writelines(lines)

142 143 #------------------------------------------------------------------------------ 144

145 -def readidfile(dbfilesfn):

146 mfdict = {} 147 for line in open(dbfilesfn): 148 line = line.replace('\n','') 149 filename, idlist = line.split(',',1) 150 ids_csv = idlist.replace('(','').replace(')','') 151 mfl = [dequotify(x.strip()) for x in ids_csv.split(',')] 152 tmpmfl = [long(i) for i in mfl[1:]] 153 mfl = [mfl[0]] 154 mfl.extend(tmpmfl) 155 mfdict[filename] = tuple(mfl) 156 return mfdict

157 158 #------------------------------------------------------------------------------ 159

160 -def readfitsfile(fitsfilesfn):

161 mfdict = {} 162 for line in open(fitsfilesfn): 163 line = line.replace('\n','') 164 filename, idlist = line.split(',',1) 165 ids_csv = (idlist.replace('[','')).replace(']','') 166 mfl = [dequotify(x.strip()) for x in ids_csv.split(',')] 167 mfl[1] = long(mfl[1]) 168 mfdict[filename] = mfl 169 return mfdict

170 171 #------------------------------------------------------------------------------ 172

173 -def readhdu(files):

174 # multiframeid,darkid,confid,flatid,frinid,skyid,maskid 175 prodidnames = ["DARKCOR","CIR_CPM","FLATCOR","FRINGCOR","SKYSUB","CIR_OPM"] 176 fitsFile = fits.open(files) 177 prodnames = [files] 178 mfid = long(fitsFile[0].header.get("WSA_MFID")) 179 prodnames.append(mfid) 180 for name in prodidnames: 181 descr = fitsFile[1].header.get(name, dbc.charDefault()) 182 if descr == dbc.charDefault(): 183 filename = dbc.charDefault() 184 else: 185 fitpos = descr.find(".fit") 186 filename = descr[descr.rfind(' ',0,fitpos)+1:fitpos+4] 187 prodnames.append(filename) 188 189 return prodnames

190 191 #------------------------------------------------------------------------------ 192

193 -def dequotify(text):

194 if text.startswith("\'") and text.endswith("\'"): 195 text = text.replace("\'","") 196 if text.startswith("\"") and text.endswith("\""): 197 text = text.replace("\"","") 198 return text

199 200 #------------------------------------------------------------------------------ 201 # Entry point for script. 202 203 # Allow module to be imported as well as executed from the command line 204 if __name__ == "__main__": 205 # Define command-line interface options 206 isDateOK = lambda x: x.isdigit() and len(x)==8 207 CLI.progArgs += [ 208 CLI.Argument("beginDate", "20051212", isValOK=isDateOK), 209 CLI.Argument("endDate", "20060102", isValOK=isDateOK), 210 CLI.Argument("version", "1", isValOK=lambda x: 211 x.replace('.', '').isdigit() and x>'0')] 212 213 pairDesc = "containing pairs of filename,(mfID,darkID,confID,flatID,"\ 214 "frinID,skyID,maskID)" 215 216 CLI.progOpts.remove("test") 217 CLI.progOpts += [ 218 CLI.Option('f', "file_prefix", 219 "prefix of files created from fits files and in the given time range, " 220 + pairDesc, "PREFIX"), 221 CLI.Option('i', "db_file", 222 "file created from the DB, " + pairDesc, "FILE"), 223 CLI.Option('o', "outdir", 224 "dir where missing files lists are written to", 225 "DIR", "prodoutput"), 226 CLI.Option('p', "proddir", 227 "dir where the db files are written to/read from", 228 "DIR", "productionlists")] 229 230 cli = CLI("TestProductionFrame", "$Revision: 7245 $", __doc__) 231 beginDate = int(cli.getArg("beginDate")) 232 endDate = int(cli.getArg("endDate")) 233 versionnr = cli.getArg("version") 234 outdir = cli.getOpt("outdir") 235 utils.ensureDirExist(outdir) 236 proddir = cli.getOpt("proddir") 237 utils.ensureDirExist(proddir) 238 239 # get prod filenames from fitsfiles 240 if not cli.getOpt("file_prefix"): 241 tmpfitsdict = getprodfits(beginDate, endDate, versionnr) 242 writeprodfits(tmpfitsdict, proddir) 243 fitsdict = {} 244 for key in tmpfitsdict: 245 fitsdict.update(tmpfitsdict[key]) 246 else: 247 fitsfilename = cli.getOpt("file_prefix") 248 listdir = (elem for elem in dircache.listdir(proddir) 249 if fitsfilename in elem) 250 fitsdict = {} 251 for fff in listdir: 252 date = int(fff[len(fitsfilename)+1:fff.rfind(".list")]) 253 if beginDate <= date <= endDate: 254 fitsdict.update(readfitsfile(os.path.join(proddir,fff))) 255 256 # get external dates from prod filenames not covered by given range 257 xdates = [] 258 xdt = set() 259 for key in fitsdict: 260 for pfn in fitsdict[key][2:]: 261 if pfn != dbc.charDefault() \ 262 and not pfn.startswith(sysc.casuPrefix()) \ 263 and pfn.find('_',pfn.find('20')) >= 0: 264 xdt.add(pfn[pfn.find('20'):pfn.find('_',pfn.find('20'))]) 265 266 for xd in sorted(xdt): 267 if not (beginDate <= int(xd) <= endDate): 268 xdates.append(xd+"_v"+versionnr) 269 270 # make the mfid dictionary from the DB or from a file 271 if not cli.getOpt("db_file"): 272 mfiddict = checkforprodf(cli, xdates) 273 else: 274 mfiddict = readidfile(os.path.join(proddir, cli.getOpt("db_file"))) 275 276 missinglist = [] 277 fitsiddict = {} 278 for key in fitsdict: 279 # is key in DB? 280 try: 281 idtuple = mfiddict[key] 282 except KeyError: 283 msg = "WARNING! File not in DB: "+key 284 missinglist.append(msg) 285 print(msg) 286 else: 287 # is the mfID identical? 288 if fitsdict[key][1] != idtuple[1]: 289 missinglist.append("WARNING! mfID wrong: "+key+" Fits: " 290 +str(fitsdict[key][1])+" DB: " 291 +str(idtuple[1])) 292 print("WARNING! mfID wrong: ",key,"Fits:",fitsdict[key][1], 293 " DB:",idtuple[1]) 294 # take filename, mfID from file 295 newentry = [fitsdict[key][0],fitsdict[key][1]] 296 for xfn in fitsdict[key][2:]: 297 # resolve other IDs 298 if xfn == dbc.charDefault(): 299 newentry.append(dbc.intDefault()) 300 else: 301 pfn = os.path.join(os.path.dirname(key),xfn) 302 # is ID in this date dir or is it external? 303 try: 304 newentry.append(mfiddict[pfn][1]) 305 except KeyError: 306 if xfn.find('_',xfn.find('20')) >= 0: 307 date = xfn[xfn.find('20'): 308 xfn.find('_',xfn.find('20'))] 309 xentry = os.path.join(date+"_v"+versionnr,xfn) 310 try: 311 newentry.append(mfiddict[xentry][1]) 312 except KeyError: 313 msg = "WARNING! external file not found: "\ 314 + xentry 315 missinglist.append(msg) 316 print(msg) 317 newentry.append(dbc.intDefault()) 318 else: 319 msg = "WARNING! file not found: "+xfn 320 missinglist.append(msg) 321 print(msg) 322 newentry.append(dbc.intDefault()) 323 fitsiddict[key] = newentry 324 325 dbchange = [] 326 for entry in fitsiddict: 327 if fitsiddict[entry] != list(mfiddict[entry]): 328 dbchange.append(fitsiddict[entry]) 329 330 db = cli.getArg("database").split('.')[-1] 331 if dbchange: 332 missingfn = "wrongprod"+"_"+db+"_"+str(beginDate)+"_"+ \ 333 str(endDate)+"_"+versionnr+".list" 334 csv.File(os.path.join(outdir, missingfn), 'w').writelines(dbchange) 335 336 if missinglist: 337 missingfn = "missing_files"+"_"+db+"_"+str(beginDate)+"_"\ 338 +str(endDate)+"_"+versionnr+".list" 339 340 lines = (entry+'\n' for entry in missinglist) 341 open(os.path.join(outdir, missingfn), 'w').writelines(lines) 342 343 #------------------------------------------------------------------------------ 344 # Change log: 345 # 346 # 11-Apr-2006, ETWS: Original version. 347 # 2-May-2006, RSC: Renamed checkDirExist() to ensureDirExist() 348 # 3-May-2006, ETWS: Included input/output dirs, moved createFitsDateDict 349 # to Utilities.py 350 # 7-Jun-2006, RSC: Replaced utils.sortList() with fast, new Python 2.4 method 351 # 5-Jul-2006, RSC: Upgraded to use new CSV.py module. 352 # 10-Jul-2006, RSC: Replaced string module functions with str() equivalents as 353 # the string module with be obsoleted in Python 3.0 354 # 6-Dec-2006, RSC: Fixed filename pattern matching bug (only affects 05A v2). 355 # 22-Feb-2007, RSC: Updated to reflect move of loadServerHost() constant from 356 # DbConstants to SystemConstants 357