1
2
3
4 """
5 Reads multiframeIDs from the database and writes a list of
6 'multiframeID,extension number,compressed image file'.
7
8 @author: E. Sutorius
9 @org: WFAU, IfA, University of Edinburgh
10 """
11
12 from collections import defaultdict
13 import dircache
14 import mx.DateTime as mxTime
15 import os
16
17 from wsatools.CLI import CLI
18 from wsatools.DbConnect.DbSession import DbSession
19 from wsatools.File import File
20 import wsatools.FitsUtils as fits
21 from wsatools.Logger import Logger
22 from wsatools.SystemConstants import SystemConstants
23 import wsatools.Utilities as utils
27 """Reads multiframeIDs from the database and compiles file lists
28 depending on existing JPGs.
29 """
30 - def __init__(self,
31 database=DbSession.database,
32 checkDB=CLI.getOptDef("check"),
33 outPath=CLI.getOptDef("outpath"),
34 subDir=CLI.getOptDef("subdir"),
35 jpgPath=CLI.getOptDef("jpgpath"),
36 raidDisks=CLI.getOptDef("raiddisk"),
37 checkDeprecated=CLI.getOptDef("deprecated"),
38 beginDate=CLI.getArgDef("startdate"),
39 endDate=CLI.getArgDef("enddate"),
40 versionStr=CLI.getArgDef("version"),
41 inFileName=CLI.getArgDef("datafile")):
42 """
43 @param beginDate: First date to process, eg. 20050101.
44 @type beginDate: int
45 @param checkDB: Check the DB for missing entries of compFile.
46 @type checkDB: bool
47 @param checkDeprecated: Check the DB for deprecated entries of compFile.
48 @type checkDeprecated: bool
49 @param database: Name of the database to connect to.
50 @type database: str
51 @param endDate: Last date to process, eg. 20050131.
52 @type endDate: int
53 @param inFileName: File containing multiframeID info.
54 @type inFileName: str
55 @param jpgPath: Directory where 'products/jpgs' will reside.
56 @type jpgPath: str
57 @param outPath: Path to directory for produced files
58 @type outPath: str
59 @param raidDisks: List of disks if not in standard search path
60 @type raidDisks: list
61 @param subDir: Fits subdirectory, eg. 'products/stacks'
62 @type subDir: str
63 @param versionStr: Version number of the data.
64 @type versionStr: str
65
66 """
67 self.database = database
68 self.sysc = SystemConstants(self.database.rpartition('.')[2])
69 beginDate, endDate = \
70 self.sysc.obsCal.getDatesFromInput(beginDate, endDate)
71
72 self.startDateStr = beginDate
73 self.endDateStr = endDate
74 self.versionStr = versionStr
75 self.startMJD = int(mxTime.strptime(self.startDateStr, "%Y%m%d").mjd)
76 self.endMJD = int(mxTime.strptime(self.endDateStr, "%Y%m%d").mjd)
77 self.inFileName = (inFileName or None)
78 self.checkDB = checkDB
79 self.checkDeprecated = checkDeprecated
80 self.outPath = outPath
81 self.subDir = subDir
82 if raidDisks:
83 self.raidDisks = raidDisks.split(',')
84 else:
85 self.raidDisks = self.sysc.availableRaidFileSystem()
86 self.jpgPath = jpgPath
87
88
89
91 """
92 """
93 procfiles = {}
94 notavailfiles = []
95 dupliDict = defaultdict(set)
96 outFileTimeStamp = ''.join(["_", self.startDateStr,
97 "_", self.endDateStr,
98 "_", self.versionStr])
99
100 outFileSuffix = ''.join(["_", self.database.rpartition('.')[2],
101 outFileTimeStamp])
102
103
104 if self.subDir:
105 fitsDirs = [self.subDir]
106 else:
107 fitsDirs = [self.sysc.fitsDir, self.sysc.stackDir,
108 self.sysc.mosaicDir, self.sysc.diffDir]
109
110 fitsList = fits.FitsList(self.sysc, prefix="CMJL_")
111 fitsList.createFitsDateDict(disklist=self.raidDisks,
112 ingestDirectory=fitsDirs,
113 beginDateStr=self.startDateStr,
114 endDateStr=self.endDateStr,
115 versionStr=self.versionStr)
116 self.jpegDateDict = self.createJpegDict()
117
118 for fitsDir in fitsList.fitsDateDict:
119 print "reading ", fitsDir
120 for datum in fitsList.fitsDateDict[fitsDir]:
121
122 dateStr, versStr = datum.partition('_v')[::2]
123 if (self.startDateStr <= dateStr <= self.endDateStr) \
124 and self.versionStr == versStr:
125 print "processing ", datum
126 fitsListing = dircache.listdir(os.path.join(fitsDir, datum))
127
128
129
130 tmpList = fitsListing[:]
131 for elem in tmpList:
132 if self.sysc.catSuffix+self.sysc.catType in elem \
133 or self.sysc.mefType not in elem:
134 fitsListing.remove(elem)
135
136
137 if datum in self.jpegDateDict:
138
139 jpegDict = defaultdict(list)
140 for jpgDir in self.jpegDateDict[datum]:
141 filesPath = os.path.join(jpgDir, datum)
142 for elem in dircache.listdir(filesPath):
143
144 name = elem[:elem.rfind("_")]
145 jpegDict[name].append(
146 os.path.join(filesPath, elem))
147 dupliDict[os.path.join(datum, elem)].add(
148 jpgDir)
149
150
151 for item in fitsListing:
152 name = item[:item.rfind(self.sysc.mefType)]
153 filePath = os.path.join(fitsDir, datum, item)
154
155 if name in jpegDict:
156 procfiles[filePath] = jpegDict[name]
157 elif filePath not in notavailfiles:
158 notavailfiles.append(filePath)
159
160
161 duplicates = False
162 for name in sorted(dupliDict):
163 if len(dupliDict[name]) > 1:
164 duplicates = True
165 break
166 if duplicates:
167 print "The following files exist in more than one directory:"
168 dupliFile = File(os.path.join(
169 self.outPath, "duplicatedjpgs%s.log" % outFileTimeStamp))
170 dupliFile.wopen()
171 for name in sorted(dupliDict):
172 if len(dupliDict[name]) > 1:
173 print
174 print ":: %s ::" % name
175 for path in sorted(dupliDict[name]):
176 jpgName = os.path.join(path, name)
177 mtime = mxTime.DateTimeFromTicks(
178 os.path.getmtime(jpgName))
179 dateStr = mtime.date + " %02d:%02d:%02d" % \
180 (mtime.hour, mtime.minute, mtime.second)
181 print dateStr, jpgName
182 dupliFile.writetheline('%s, %s' % (dateStr, jpgName))
183 dupliFile.close()
184
185
186 self.archive = DbSession(self.database)
187
188 missingJpgsDict = {}
189 if self.checkDB or self.checkDeprecated:
190 missingJpgsDict = self.checkForJpgs()
191 infoTxt = ' '.join([
192 str(len(missingJpgsDict)), "files without jpegs in '",
193 self.database.rpartition('.')[2], ".Multiframe' from",
194 self.startDateStr, "to", self.endDateStr,
195 "for version", self.versionStr])
196 if missingJpgsDict:
197
198 jpgNotinDbFile = File(os.path.join(
199 self.outPath, "nojpgindb%s.list" % outFileSuffix))
200 self.write2file(jpgNotinDbFile, missingJpgsDict)
201 infoTxt += " written to %s" % jpgNotinDbFile.name
202 print infoTxt
203 for entry in missingJpgsDict:
204 missingJpgsDict[entry] = missingJpgsDict[entry][0][0]
205
206
207 if not self.inFileName:
208 mfidDict = self.getMfIDs()
209 infoTxt = ''.join([
210 str(len(mfidDict)), " files in '",
211 self.database.rpartition('.')[2], ".Multiframe' from ",
212 self.startDateStr, " to ", self.endDateStr,
213 " for version ", self.versionStr])
214 if mfidDict:
215
216 indbFile = File(os.path.join(
217 self.outPath, "filesindb%s.list" % outFileSuffix))
218 self.write2file(indbFile, mfidDict)
219 infoTxt += " written to %s" % indbFile.name
220 print infoTxt
221 else:
222 mfidDict = self.readMfidsFromFile()
223
224 mfidnotavailfiles = []
225
226 if procfiles:
227 prefix = ("depr" if self.jpgPath and "deprecated" in self.jpgPath
228 else '')
229 procFile = File(os.path.join(
230 self.outPath, ''.join([prefix, "jpgs", outFileSuffix, ".log"])))
231 mfidnotavailfiles = self.updateProcessedData(
232 procfiles, (missingJpgsDict if self.checkDB \
233 or self.checkDeprecated else mfidDict),
234 procFile)
235
236 if notavailfiles:
237 notAvailJpgFile = File(os.path.join(
238 self.outPath, "notavailjpgs%s.log" % outFileSuffix))
239
240 print ' '.join([str(len(notavailfiles)),
241 "files with no available jpg are in",
242 notAvailJpgFile.name])
243 notAvailJpgFile.wopen()
244 for elem in sorted(notavailfiles):
245 notAvailJpgFile.writetheline(elem)
246 if elem not in mfidDict:
247 mfidnotavailfiles.append(elem)
248 notAvailJpgFile.close()
249
250 if mfidnotavailfiles:
251 notAvailMfidFile = File(os.path.join(
252 self.outPath, "mfidnotavailfiles%s.log" % outFileSuffix))
253
254 print ' '.join([str(len(mfidnotavailfiles)),
255 "files with no entry in the DB are in ",
256 notAvailMfidFile.name])
257 notAvailMfidFile.wopen()
258 notAvailMfidFile.writelines(sorted(mfidnotavailfiles))
259 notAvailMfidFile.close()
260
261
262
263 @staticmethod
265 """ Update the jpgDict with files from the given directory.
266 """
267 jpgDateList = [x for x in dircache.listdir(jpgDir)
268 if x.startswith('20') and '_v' in x and \
269 not x.partition('.')[2].isalpha()]
270 for jpgDate in jpgDateList:
271 jpegDict[jpgDate].append(jpgDir)
272 return jpegDict
273
274
275
277 """ Create a dictionary of compressed image dates and their dirs.
278 """
279 jdd = defaultdict(list)
280 if self.jpgPath:
281 if "disk" in self.jpgPath:
282 jdd = self.updateJpegDict(jdd, self.jpgPath)
283 else:
284 for direc in self.raidDisks:
285 jpgDirList = []
286 if "deprecated" in self.jpgPath:
287 for x in os.listdir(os.path.join(direc, "products")):
288 if "deprecated" in x:
289 jpgDirList = os.listdir(
290 os.path.join(direc, "products", x))
291 else:
292 jpgDirList = [os.path.join(direc, self.jpgPath)]
293 for jpgDir in jpgDirList:
294 if os.path.exists(jpgDir):
295 jdd = self.updateJpegDict(jdd, jpgDir)
296 else:
297 for direc in self.raidDisks:
298 jpgDir = os.path.join(direc, self.sysc.compressImDir)
299 jdd = self.updateJpegDict(jdd, jpgDir)
300 return jdd
301
302
303
305 """ Read filename,multiframeIDs from the given database, where
306 no compName is given.
307 """
308 dbJpgsDict = defaultdict(list)
309 for mjd in xrange(self.startMJD, self.endMJD+1):
310 date = mxTime.DateTimeFromMJD(mjd)
311 dateVersStr = "'%04d%02d%02d_v%s'" % (
312 date.year, date.month, date.day, self.versionStr)
313 if not self.checkDeprecated:
314 compCheckStr = "compFile NOT LIKE '%disk%' AND"
315 else:
316 compCheckStr = "compFile NOT LIKE '%%%s%%' AND" % \
317 self.sysc.deprecatedComprImDir
318 mfidlist = self.archive.query(
319 "F.fileName, F.multiframeID, MFD.compFile",
320 fromStr="MultiframeDetector as MFD, FlatFileLookUp as F",
321 whereStr="F.multiframeID=MFD.multiframeID AND "
322 "%s dateVersStr=%s" % (compCheckStr, dateVersStr))
323 for entry in mfidlist:
324 dbJpgsDict[entry[0].rpartition(':')[2]].append(
325 (entry[1], entry[2].rpartition(':')[2]))
326 return dbJpgsDict
327
328
329
331 """ Read filename,multiframeIDs from the given database.
332 """
333 mfidList = []
334 for mjd in xrange(self.startMJD, self.endMJD+1):
335 date = mxTime.DateTimeFromMJD(mjd)
336 dateVersStr = "'%04d%02d%02d_v%s'" % (
337 date.year, date.month, date.day, self.versionStr)
338 mfidList += self.archive.query(
339 "fileName, multiframeID", "FlatFileLookup",
340 "dateVersStr=" + dateVersStr)
341
342 return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList)
343
344
345
347 """ Read filename,multiframeIDs from a file.
348 """
349 mfidList = [line.split() for line in utils.ParsedFile(self.inFileName)
350 if ':' in line]
351
352 return dict((name.rpartition(':')[2], mfid) for name, mfid in mfidList)
353
354
355
356 @staticmethod
358 """Write data into file.
359 """
360 noMfIDFiles = []
361 outFile.wopen()
362 prficounter = 0
363 for entry in jpgedFiles:
364 if entry in aDict:
365 mfid = aDict[entry]
366 for imag in jpgedFiles[entry]:
367 x = ','.join([str(mfid),
368 imag[imag.rfind('_')+1:imag.rfind('.')],
369 imag])
370 outFile.writetheline(x)
371 prficounter += 1
372 else:
373 noMfIDFiles.append(entry)
374 outFile.close()
375
376 print ' '.join([str(prficounter),
377 "files' jpgs and their mfID written to",
378 outFile.name])
379
380 return noMfIDFiles
381
382
383
384 @staticmethod
386 """type dbFile: File object
387 """
388 dbFile.wopen()
389 for key in sorted(jpgDict):
390 dbFile.writetheline(key+", "+repr(jpgDict[key]))
391 dbFile.close()
392
393
394
395
396
397 if __name__ == "__main__":
398
399
400 CLI.progArgs += [
401 CLI.Argument("startdate", "05A", isValOK=CLI.isDateOK),
402 CLI.Argument("enddate", "05A", isValOK=CLI.isDateOK),
403 CLI.Argument("version", '1'),
404 CLI.Argument("datafile", None, isOptional=True)
405 ]
406 CLI.progOpts += [
407 CLI.Option('c', 'check',
408 "checks the DB for missing entries of compFile"),
409 CLI.Option('d', 'deprecated',
410 "checks the DB for deprecated entries of compFile"),
411 CLI.Option('o', 'outpath',
412 "new destination for produced files",
413 "DIR", os.curdir),
414 CLI.Option('s', 'subdir',
415 "subdirectory containing FITS date directories",
416 "DIR", SystemConstants().fitsDir),
417 CLI.Option('j', 'jpgpath',
418 "path to non-standard jpg location",
419 "DIR", None),
420 CLI.Option('r', 'raiddisk',
421 "list of disk if not in standard search path",
422 "DIR", None)
423 ]
424 cli = CLI(CreateMfIdJpegList.__name__, "$Revision: 8553 $",
425 CreateMfIdJpegList.__doc__)
426 Logger.isVerbose = False
427 Logger.addMessage(cli.getProgDetails())
428
429 CreateMfIdJpegList = CreateMfIdJpegList(
430 cli.getArg("database"),
431 cli.getOpt("check"),
432 cli.getOpt("outpath"),
433 cli.getOpt("subdir"),
434 cli.getOpt("jpgpath"),
435 cli.getOpt("raiddisk"),
436 cli.getOpt("deprecated"),
437 cli.getArg("startdate"),
438 cli.getArg("enddate"),
439 cli.getArg("version"),
440 cli.getArg("datafile"))
441
442 CreateMfIdJpegList.run()
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471