Package invocations :: Package monitoring :: Module TestDetection
[hide private]

Source Code for Module invocations.monitoring.TestDetection

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: TestDetection.py 7245 2010-07-20 12:52:03Z RossCollins $ 
  4  """ 
  5     Test detection table entries against FITS cat files. 
  6   
  7     @author: E. Sutorius 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  from   collections import defaultdict 
 12  import dircache 
 13  import getopt 
 14  import os 
 15  import sys 
 16   
 17  from   wsatools.DataFactory         import ProgrammeTable 
 18  import wsatools.DbConnect.DbConstants   as dbc 
 19  from   wsatools.DbConnect.DbSession import DbSession 
 20  import wsatools.FitsUtils               as fits 
 21  import wsatools.SystemConstants         as sysc 
 22  import wsatools.Utilities               as utils 
 23  #------------------------------------------------------------------------------ 
 24   
25 -def usage():
26 print("Usage: TestDetection.py [-d/--dbfile DblistPrefix]" 27 "[-e/--exclude exclProgList] [-f/--fitsfile FitslistPrefix]" 28 "[-c/--catdir catsdir] [-o/--outdir] [-p/--progs progList]" 29 "startdate enddate version")
30 31 #------------------------------------------------------------------------------ 32
33 -def checkforcats(archive, startdate, enddate, verNum, proglist, excludelist):
34 """ Get the cat sizes from the DB. 35 """ 36 bd = str(startdate * 1000000000000L - 1)[2:] 37 ed = str((enddate + 1) * 1000000000000L)[2:] 38 39 tables = set(ProgrammeTable(archive).getDetectionTable()) 40 tables.discard(dbc.charDefault()+"Raw") 41 42 if "ns" in excludelist: 43 tables -= set(table for table in tables if not table.isalpha()) 44 45 tables -= set(entry+"DetectionRaw" for entry in excludelist) 46 tables.intersection_update(prog+"DetectionRaw" for prog in proglist) 47 48 # get file and catsize list from db for given dates 49 print("Checking detections in:") 50 dbdets = {} 51 for count, table in enumerate(sorted(tables)): 52 print("%s/%s: %s..." % (count+1, len(tables), table)) 53 dbdets[table] = archive.query( 54 selectStr="filename, extnum-1, MAX(seqnum), d.multiframeid", 55 fromStr=table +" AS d, Multiframe AS m", 56 whereStr="d.multiframeid = m.multiframeid AND " 57 "fileTimeStamp > '%s' AND fileTimeStamp <'%s'" % (bd, ed) + 58 " AND fileName like '%_v"+str(verNum)+"/%' " 59 "GROUP BY d.multiframeid,extnum,fileName") 60 detections = {} 61 for table in dbdets: 62 detdict = {} 63 for fileName, extNum, maxSeqNum, _mfID in sorted(dbdets[table]): 64 filePath = fileName.split(':', 1)[-1] 65 filePathName, fileshortname = os.path.split(filePath) 66 fileshort = os.path.join(os.path.basename(filePathName), 67 os.path.splitext(fileshortname)[0]) 68 if fileshort not in detdict: 69 detdict[fileshort] = [filePath, -1, -1, -1, -1] 70 detdict[fileshort][extNum] = maxSeqNum 71 detections[table] = detdict 72 73 return detections
74 75 #------------------------------------------------------------------------------ 76
77 -def compare(dictdb, dictfits):
78 """compare the cat files in the DB with the cat fits files""" 79 errorlist = [] 80 nodbcat = [] 81 for keyfits in dictfits: 82 if keyfits in dictdb: 83 dbcatsize = dictdb[keyfits] 84 fitscatsize = dictfits[keyfits] 85 # compare filenames 86 fname = os.path.splitext(fitscatsize[0])[0] 87 dbname = os.path.splitext(dbcatsize[0])[0] + sysc.catSuffix() 88 if fname != dbname: 89 errtext = "WARNING! Filename wrong: " + keyfits + \ 90 "\t Fits: " + fitscatsize[0] + \ 91 " DB: " + dbcatsize[0] 92 print errtext 93 errorlist.append(errtext) 94 if len(fitscatsize) != len(dbcatsize): 95 errtext = "WARNING! Filelength wrong: " + keyfits + \ 96 "\t Fits: " + str(len(fitscatsize)-2) + \ 97 " DB: " + str(len(dbcatsize)-2) 98 print errtext 99 errorlist.append(errtext) 100 101 # compare sizes 102 for i in range(1, 5): 103 if fitscatsize[i] != dbcatsize[i] \ 104 and not (dbcatsize[i] == -1 and fitscatsize[i] == 0): 105 errtext = "WARNING! File size wrong: " + keyfits + \ 106 "\t Fits: " + repr(fitscatsize[1:5]) + \ 107 " DB: " + repr(dbcatsize[1:5]) 108 print errtext 109 errorlist.append(errtext) 110 break 111 else: 112 fitscatsize = dictfits[keyfits] 113 if fitscatsize[-1][0] == "none": 114 nodbcat.append((fitscatsize[0], fitscatsize[-1][1])) 115 else: 116 errtext = "WARNING! File not in DB: " + fitscatsize[0] + \ 117 "\t Programme: " + fitscatsize[-1][1] 118 print errtext 119 errorlist.append(errtext) 120 121 return errorlist, nodbcat
122 123 #------------------------------------------------------------------------------ 124
125 -def getcats(beginDate, endDate, verNum, progTable):
126 """get cat sizes from fits files""" 127 fitsDict = defaultdict(dict) 128 for dirName, dateDirs in fits.getAllPaths(sysc.availableRaidFileSystem()): 129 print("reading " + dirName) 130 for dateDir in dateDirs: 131 # create a list of fits files for the given date 132 dateStr, versStr = dateDir.split('_v') 133 if (beginDate <= dateStr <= endDate) and verNum == versStr: 134 print(" processing files in " + dateDir) 135 dirPath = os.path.join(dirName, dateDir) 136 for fileName in dircache.listdir(dirPath): 137 if sysc.mefType() in fileName \ 138 and not fileName.endswith(sysc.mefType()): 139 root = os.path.join(dateDir, 140 fileName.split(sysc.catSuffix())[0]) 141 filePath = os.path.join(dirPath, fileName) 142 fitsDict[dateStr][root] = readhdu(filePath, progTable) 143 144 return fitsDict
145 146 #------------------------------------------------------------------------------ 147
148 -def writecats(fitsDict, outdir):
149 """write cat fits file sizes into a list""" 150 for key in fitsDict: 151 fitsListPath = os.path.join(outdir, "catsize_%s.list" % key) 152 open(fitsListPath, 'w').writelines("%s, %r\n" % 153 (entry, fitsDict[key][entry]) for entry in fitsDict[key])
154 155 #------------------------------------------------------------------------------ 156
157 -def readlistfile(listName):
158 """read fits file sizes from a list file""" 159 programme = (listName.split('_')[2] if listName.count('_') > 2 else "") 160 catdict = {} 161 for line in open(listName): 162 filename, linelist = line.replace('\n','').split(',',1) 163 lineelems = [dequotify(x.strip()) for x in linelist.split(',')] 164 ls = 0 165 le = 0 166 for x in lineelems: 167 if '[' in x: 168 ls = lineelems.index(x) 169 if ']' in x: 170 le = lineelems.index(x) 171 lineelems[ls] = dequotify(lineelems[ls].replace('[','')) 172 lineelems[le] = dequotify(lineelems[le].replace(']','')) 173 cat_csv = lineelems[ls:le+1] 174 if any('(' in x for x in cat_csv): 175 ls = 0 176 le = 0 177 for x in cat_csv: 178 if '(' in x: 179 ls = cat_csv.index(x) 180 if ')' in x: 181 le = cat_csv.index(x) 182 cat_csv[ls] = dequotify(cat_csv[ls].replace('(','')) 183 cat_csv[le] = dequotify(cat_csv[le].replace(')','')) 184 progtpl = tuple(cat_csv[ls:le+1]) 185 else: 186 progtpl = (programme, programme) 187 ls = len(cat_csv) 188 catdict[filename] = [cat_csv[0]] + map(int, cat_csv[1:ls]) + [progtpl] 189 190 return catdict
191 192 #------------------------------------------------------------------------------ 193
194 -def readhdu(filePath, progTable):
195 """ Extract details from catalogue FITS header. 196 """ 197 catDetails = [filePath] 198 for hdu in fits.open(filePath)[1:]: 199 catDetails.append(hdu.header.get("NAXIS2", 0)) 200 201 project = hdu.header.get("PROJECT", dbc.charDefault()).lower() 202 if project.startswith('u/ukidss/'): 203 project = project[:project.rfind('/')+4] 204 try: 205 progTable.setProgID(project.replace('u/ukidss/', '')) 206 except SystemExit: 207 acronym = dbc.charDefault() 208 else: 209 acronym = progTable.getAcronym() 210 if project.startswith("u/ec/"): 211 project = "u/ec/" 212 acronym = "comm" 213 214 return catDetails + [(acronym.lower(), project)]
215 216 #------------------------------------------------------------------------------ 217
218 -def dequotify(text):
219 if text.startswith("'") and text.endswith("'"): 220 text = text.replace("'", "") 221 if text.startswith('"') and text.endswith('"'): 222 text = text.replace('"', '') 223 return text
224 225 #------------------------------------------------------------------------------ 226 # Entry point for script. 227 228 # Allow module to be imported as well as executed from the command line 229 if __name__ == "__main__": 230 # read the arguments 231 try: 232 opts, args = getopt.getopt(sys.argv[1:], "d:e:f:hc:o:p:", 233 ["dbfile=","exclude=","fitsfile=","help","catsdir=", 234 "outdir=","progs="]) 235 if len(args) < 3 or len(args) > 4: 236 raise getopt.GetoptError 237 except getopt.GetoptError: 238 # print help information and exit: 239 print(sys.argv) 240 usage() 241 exit() 242 243 checkdb = True 244 outdir = os.curdir 245 catsdir = "catlists" 246 readfitsdir = True 247 fitsfilename = "" 248 detfilename = "" 249 excludeProgs = ["test"] 250 progList = [] 251 for o, a in opts: 252 if o in ("-h","--help"): 253 usage() 254 exit() 255 if o in ("-d","--dbfile"): 256 checkdb = False 257 detfilename = a 258 if o in ("-e","--exclude"): 259 excludeProgs.extend(a.split(',')) 260 if o in ("-f","--fitsfile"): 261 readfitsdir = False 262 fitsfilename = a 263 if o in ("-c","--catdir"): 264 catsdir = a 265 if o in ("-o","--outdir"): 266 outdir = a 267 if o in ("-p","--progs"): 268 progList.extend(a.split(',')) 269 270 utils.ensureDirExist(outdir) 271 utils.ensureDirExist(catsdir) 272 273 # just get the data from the fits files 274 nocheck = detfilename.lower() == "none" 275 searchcritlow = int(args[0]) 276 searchcrithigh = int(args[1]) 277 verNum = args[2] 278 279 archive = DbSession() 280 progTable = ProgrammeTable(archive) 281 282 # get prod filenames from fitsfiles 283 if readfitsdir: 284 # list of dirs in which to search for fits files 285 tmpfitsDict = getcats(searchcritlow, searchcrithigh, verNum, progTable) 286 writecats(tmpfitsDict, catsdir) 287 fitsDict = {} 288 for key in tmpfitsDict: 289 fitsDict.update(tmpfitsDict[key]) 290 else: 291 fitsDict = {} 292 for entry in dircache.listdir(catsdir): 293 if fitsfilename in entry: 294 date, ext = os.path.splitext(entry.replace(fitsfilename, '')) 295 if searchcritlow <= date <= searchcrithigh: 296 fitsDict.update(readlistfile(os.path.join(catsdir, entry))) 297 298 if nocheck: 299 print "No comparison done!" 300 exit() 301 302 # make the size dictionary from the DB 303 if checkdb: 304 detDict = checkforcats(archive, searchcritlow, searchcrithigh, verNum, 305 progList, excludeProgs) 306 for table in detDict: 307 if detDict[table]: 308 dbListPath = os.path.join(catsdir, 309 "detections_%s_%s_%s_%s_%s.list" % (archive.database, 310 table.replace("DetectionRaw", ''), searchcritlow, 311 searchcrithigh, verNum)) 312 open(dbListPath, 'w').writelines("%s, %r\n" % 313 (entry, detDict[table][entry]) for entry in detDict[table]) 314 else: 315 detDict = {} 316 for entry in dircache.listdir(catsdir): 317 if detfilename in entry: 318 filebeg = int(entry.split('_')[3]) 319 fileend = int(entry.split('_')[4]) 320 if searchcritlow <= fileend and filebeg <= searchcrithigh: 321 filePath = os.path.join(catsdir, entry) 322 detDict.update(readlistfile(filePath)) 323 324 # compare fits vs. db 325 errl, ndbc = compare(detDict, fitsDict) 326 if errl: 327 errfn = os.path.join(outdir, "detectionerrors_%s_%s_%s_%s.errlog" % 328 (archive.database, searchcritlow, searchcrithigh, verNum)) 329 open(errfn, 'w').writelines(entry + '\n' for entry in errl) 330 331 ncfn = os.path.join(outdir, "catnotindb_%s_%s_%s_%s.list" % 332 (archive.database, searchcritlow, searchcrithigh, verNum)) 333 ncfl = [] 334 for entry in errl: 335 if "WARNING! File not in DB:" in entry: 336 cn = entry[entry.find('/'):entry.find('.fits')+5] 337 fn = cn.replace("_cat.fits", ".fit") 338 ncfl.append(fn+"\n"+cn+"\n") 339 340 open(ncfn, 'w').writelines(ncfl) 341 342 if ndbc: 343 ncfn = os.path.join(outdir, "catprognotindb_%s_%s_%s_%s.list" % 344 (archive.database, searchcritlow, searchcrithigh, verNum)) 345 346 open(ncfn, 'w').writelines("%s \t %s\n" % entry for entry in ndbc) 347 348 #------------------------------------------------------------------------------ 349 # Change log: 350 # 351 # 11-May-2006, ETWS: Original version. 352 # 15-May-2006, RSC: Renamed utils.checkDirExist() to ensureDirExist(). 353 # 10-Jul-2006, RSC: Replaced string module functions with str() equivalents as 354 # the string module with be obsoleted in Python 3.0 355 # 6-Dec-2006, RSC: Fixed filename pattern matching bug (only affects 05A v2). 356 # 22-Feb-2007, RSC: Updated to reflect move of loadServerHost() constant from 357 # DbConstants to SystemConstants 358