1
2
3
4 """
5 Find JPGs that are not correctly created.
6
7 @author: E. Sutorius
8 @org: WFAU, IfA, University of Edinburgh
9 """
10
11 from numpy import arange
12 from collections import defaultdict
13 import inspect
14 import os
15
16 from wsatools.CLI import CLI
17 from wsatools.DbConnect.DbSession import DbSession, Join
18 from wsatools.File import File
19 from wsatools.FitsUtils import FitsList
20 from wsatools.DbConnect.IngCuSession import IngCuSession
21 from wsatools.Logger import Logger
22
23
25 """
26 Find JPGs that are not correctly created.
27 """
28
29
30 minFileSizeDef = 100000
31
32
33
34 - def __init__(self,
35 curator=CLI.getOptDef("curator"),
36 database=DbSession.database,
37 minFileSize=CLI.getOptDef("minsize"),
38 beginDate=CLI.getOptDef("begin"),
39 endDate=CLI.getOptDef("end"),
40 versionStr=CLI.getOptDef("version"),
41 diskList=CLI.getOptDef("disklist"),
42 deleteJpegs=CLI.getOptDef("remove"),
43 cu2Listing=CLI.getOptDef("cu2jpeg"),
44 isTrialRun=DbSession.isTrialRun,
45 comment=CLI.getArgDef("comment")):
46 """
47 @param beginDate: First date to process, eg. 20050101.
48 @type beginDate: int
49 @param comment: Descriptive comment as to why curation task is
50 being performed.
51 @type comment: str
52 @param cu2Listing: Create CU2-readable output file.
53 @type cu2Listing: bool
54 @param deleteJpegs: If True, delete JPGs with file sizes smaller
55 than minFileSize.
56 @type deleteJpegs: bool
57 @param diskList: List of RAID disk paths including the
58 archive name, eg. '/disk01/wsa'.
59 @type diskList: list(str)
60 @param endDate: Last date to process, eg. 20050131.
61 @type endDate: int
62 @param isTrialRun: If True, do not perform database modifications.
63 @type isTrialRun: bool
64 @param minFileSize: File size below which reprocessing is needed.
65 @type minFileSize: int
66 @param versionStr: Version number of the data or 'all' or 'last'.
67 @type versionStr: str
68
69 """
70 typeTranslation = {"curator":str,
71 "database":str,
72 "minFileSize":int,
73 "beginDate":str,
74 "endDate":str,
75 "versionStr":str,
76 "diskList":list,
77 "deleteJpegs":bool,
78 "cu2Listing":bool,
79 "isTrialRun":bool,
80 "comment":str}
81
82 super(FindBrokenJpegs, self).attributesFromArguments(
83 inspect.getargspec(FindBrokenJpegs.__init__)[0], locals(),
84 types=typeTranslation)
85
86
87 super(FindBrokenJpegs, self).__init__(cuNum=0,
88 curator=self.curator,
89 comment=self.comment,
90 reqWorkDir=False,
91 database=self.database,
92 autoCommit=False,
93 isTrialRun=self.isTrialRun)
94
95 self.maxExtNum = FindBrokenJpegs.maxExtNum[self.sysc.loadDatabase]
96 self.obsCal = self.sysc.obsCal
97 self.diskList = self.sysc.availableRaidFileSystem()
98
99
100
102 """ Run each CU requsted.
103 """
104 self.getSearchDates()
105 for semester in self.semList:
106 semBeginDate, semEndDate = \
107 self.obsCal.getDates(semester, "%Y%m%d")
108 beginDate = max(self.beginDate, semBeginDate)
109 endDate = min(self.endDate, semEndDate)
110 versList = []
111 if self.versionStr.replace('.', '').isdigit():
112 versList.append(self.versionStr)
113 elif self.versionStr == "all":
114 versList.extend(self.obsCal.versNums[semester])
115 else:
116 versList.append(self.obsCal.maxVers(semester))
117
118 for versionStr in versList:
119 if self.cu2Listing:
120 self.fitsDirs = FitsList(self.sysc, prefix="fbj_")
121 self.fitsDirs.createFitsDateDict(beginDateStr=beginDate,
122 endDateStr=endDate,
123 versionStr=versionStr)
124
125 Logger.addMessage("Searching in %s: %s - %s v%d" %
126 (semester, beginDate, endDate, versionStr))
127 firstDateVersStr = '%s_v%s' % (beginDate, versionStr)
128 lastDateVersStr = '%s_v%s' % (endDate, versionStr)
129
130
131 self.jpegDirs = FitsList(self.sysc, prefix='jpg')
132 self.jpegDirs.createFitsDateDict(
133 ingestDirectory=self.sysc.compressImDir,
134 beginDateStr=beginDate, endDateStr=endDate,
135 versionStr=versionStr, forceLists=True)
136 infix = ("Cu2" if self.cu2Listing else "")
137 outFile = File("brokenJpg%sList_%s_%s_%s_v%s.data" % (
138 infix, semester, beginDate, endDate, versionStr))
139
140 shadyJpegs = defaultdict(list)
141 for dateVersStr in self.jpegDirs.invFitsDateDict:
142 shadyJpegs.update(self.searchJpegs(dateVersStr))
143
144
145 pcountDict = self.getPcounts(
146 firstDateVersStr, lastDateVersStr, versionStr)
147
148 brokenJpegs = defaultdict(list)
149 brokenJpegs = dict((k, shadyJpegs[k]) for k in list(
150 set(shadyJpegs.keys()).difference(pcountDict.keys())))
151
152 outFile.wopen()
153 for jpgShort in sorted(brokenJpegs):
154 if self.cu2Listing:
155 outFile.writetheline(os.path.join(
156 self.fitsDirs.invFitsDateDict[
157 jpgShort.partition('/')[0]], jpgShort + ".fit"))
158 else:
159 self._connectToDb()
160 for jpgName, jpgSize in brokenJpegs[jpgShort]:
161 progName = self.getProgName(jpgShort)
162 outFile.writetheline("%s, %d (%s)" % (
163 jpgName, jpgSize, progName))
164 self._disconnectFromDb()
165
166 outFile.close()
167
168 if self.deleteJpegs:
169 self.removeJpgs(brokenJpegs)
170
171 Logger.addMessage("Found %d broken JPGs in version %s." % (
172 len(brokenJpegs), versionStr))
173
174
175
177 selectStr = "distinct dfsIDString"
178 fromStr = "Multiframe, ProgrammeFrame, Programme"
179 whereStr = " AND ".join([
180 "Multiframe.multiframeID=ProgrammeFrame.multiframeID",
181 "ProgrammeFrame.programmeID=Programme.programmeID",
182 "fileName like '%%%s%%'" % fileNamePart])
183 if not self.isTrialRun:
184 progName = self.archive.query(selectStr, fromStr, whereStr)[0]
185 else:
186 print "SELECT ", selectStr
187 print "FROM ", fromStr
188 print "WHERE ", whereStr
189 progName = ''
190
191 return progName.upper().replace('U/UKIDSS/', '')
192
193
194
196 for jpgShort in sorted(brokenJpegs):
197 if self.isTrialRun:
198 for jpgName, jpgSize in brokenJpegs[jpgShort]:
199 Logger.addMessage(
200 "Will delete %s: %d" % (jpgName, jpgSize))
201 else:
202 Logger.addMessage("Deleting %s_[1-%s].jpg"
203 % (jpgShort, self.sysc.maxHDUs-1))
204
205 for extNum in range(1, self.sysc.maxHDUs):
206 jpgName = "%s_%d.jpg" % (
207 brokenJpegs[jpgShort][0][0].rpartition('_')[0], extNum)
208 if os.path.exists(jpgName):
209 os.remove(jpgName)
210
211
212
228
229
230
231 - def getPcounts(self, startDateStr, endDateStr, versionStr):
232 selectStr = "fileName, imagePcount"
233 fromStr = Join(["FlatFileLookup", "MultiframeDetector"],
234 ["multiframeID"])
235 whereStr = " AND ".join([
236 "fileName NOT LIKE '%dark%'",
237 "imagePcount between 1 and 500000",
238 "dateVersStr BETWEEN '%s' AND '%s'" % (startDateStr, endDateStr),
239 "dateVersStr LIKE '%%v%s'" % (versionStr)])
240
241 pcountDict = defaultdict(dict)
242 if not self.isTrialRun:
243 self._connectToDb()
244 pcountList = self.archive.query(selectStr, fromStr, whereStr)
245 self._disconnectFromDb()
246 for fileName, imagePcount in pcountList:
247 fitsFile = File(fileName)
248 pcountDict[os.path.join(fitsFile.subdir, fitsFile.root)] = \
249 imagePcount
250 del fitsFile
251 else:
252 print "SELECT ", selectStr
253 print "FROM ", fromStr.fromStr
254 print "WHERE %s AND %s" % (fromStr.whereStr, whereStr)
255 return pcountDict
256
257
258
275
276
277
278
279
280
281
282 if __name__ == '__main__':
283
284 CLI.progArgs["comment"] = "Searching for broken JPGs."
285 CLI.progOpts += [
286 CLI.Option('k', "disklist",
287 "list of RAID disk paths including the archive name, "
288 "eg. '/disk01/wsa,/disk02/wsa'.",
289 "LIST"),
290 CLI.Option('b', "begin",
291 "first date/semester to process",
292 "DATE", "05A", isValOK=CLI.isDateOK),
293 CLI.Option('e', "end",
294 "last date/semester to process",
295 "DATE", "08A", isValOK=CLI.isDateOK),
296 CLI.Option('v', "version",
297 "version number of the data or 'all', 'last'",
298 "STR", 'all'),
299 CLI.Option('s', "minsize",
300 "minimum file size below which reprocessing is needed.",
301 "INT", FindBrokenJpegs.minFileSizeDef),
302 CLI.Option('J', "cu2jpeg",
303 "create outfile as input file for CU2."),
304 CLI.Option('R', "remove",
305 "delete found JPGs.")
306 ]
307 cli = CLI(FindBrokenJpegs, "$Revision: 8858 $")
308 Logger.addMessage(cli.getProgDetails())
309
310 finder = FindBrokenJpegs(cli.getOpt("curator"),
311 cli.getArg("database"),
312 cli.getOpt("minsize"),
313 cli.getOpt("begin"),
314 cli.getOpt("end"),
315 cli.getOpt("version"),
316 cli.getOpt("disklist"),
317 cli.getOpt("remove"),
318 cli.getOpt("cu2jpeg"),
319 cli.getOpt("test"),
320 cli.getArg("comment"))
321 finder.run()
322