1
2
3
4 """
5 Checks if files are processed by CU2. Gives back a list of files and a list
6 of completely non-processed directories.
7
8 @author: E. Sutorius
9 @org: WFAU, IfA, University of Edinburgh
10 """
11
12 import dircache as dc
13 import getopt
14 import os
15 import sys
16
17 import wsatools.CSV as csv
18 from wsatools.FitsUtils import FitsList
19 from wsatools.Logger import Logger
20 import wsatools.SystemConstants as sysc
21 import wsatools.Utilities as utils
22
23
25 print "usage: checkcu2.py [-k/--disk disklist] [-d/--dir directory] [-M/--mosaic] [-r/--redo] startdate enddate version\n"
26 print "disklist = list of disks, eg. '/disk01/wsa,/disk02/wsa'; default is availableRaidFileSystem()."
27 print "directory = subdirectory, eg. 'products/stacks default is fin_dir()."
28 print "outpath = path where products/jpgs is created."
29 print "mosaic = forces making mosaics, may break if size is bigger than 2 GB."
30 print "redo = re-do the jpegs."
31 print "startdate is the start date of directories to check."
32 print "enddate is the end date of directories to check."
33 print "version is the version number of the data dir."
34
35
36
38 """Check if files are processed by CU2.
39
40 startdate is the start date of directories to check.
41 enddate is the end date of directories to check.
42 version is the version number of the data dir.
43 [disklist] is a csv list of disks to be processed
44 [directory] is the subdirectory, eg. 'products/stacks'
45 [outpath] path where products/jpgs is created
46 """
47
48
49 try:
50 opts, args = getopt.getopt(sys.argv[1:], "k:d:o:Mr",
51 ["disk=", "dir=", "outpath=",
52 "mosaic", "redo"])
53 except getopt.GetoptError:
54
55 print argv
56 usage()
57 raise SystemExit
58
59 disklist = sysc.availableRaidFileSystem()
60 directory = sysc.fitsDir()
61 jpgPath = None
62 forceMosaic = False
63 reDo = False
64
65 for o, a in opts:
66 if o in ("-k", "--disk"):
67 disklist = a.split(',')
68 if o in ("-d","--dir"):
69 directory = a
70 if o in ("-o", "--outpath"):
71 jpgPath = a
72 if o in ("-M","--mosaic"):
73 forceMosaic = True
74 if o in ("-r","--redo"):
75 reDo = True
76
77 if len(args) != 3:
78 print argv
79 usage()
80 raise SystemExit
81
82 searchcritlow = int(args[0])
83 searchcrithigh = int(args[1])
84 versionnr = int(args[2])
85
86 disklist, subDir = ((disklist, "") if not directory.startswith("test") else
87 (sysc.developRaidFileSystem(), "test"))
88
89 cu2Path = os.path.join(sysc.curationCodePath(), "invocations/cu2")
90
91
92 fitsDirs = FitsList(prefix="cu2_")
93 fitsDirs.createFitsDateDict(disklist, directory)
94 fitsdatedict = fitsDirs.fitsDateDict
95
96
97 jpegdatedict = {}
98 jpgDirList = []
99 if jpgPath:
100 jpgDirList = [os.path.join(jpgPath, sysc.compressImDir())]
101 else:
102 for direc in sysc.availableRaidFileSystem():
103 jpgDirList.append(os.path.join(direc, subDir,
104 sysc.compressImDir()))
105
106 for jpgDir in jpgDirList:
107 utils.ensureDirExist(jpgDir)
108 jpgdatelist = dc.listdir(jpgDir)
109 for jpgdate in jpgdatelist:
110 jpegdatedict.setdefault(jpgdate, []).append(jpgDir)
111
112
113 notprocdirs =[]
114 notprocfiles = []
115 procfiles = []
116 catlist = []
117 duplicatelist = []
118 Logger.addMessage("%50s %6s %8s" %("date", "fits", "jpg/4"))
119 for direc in fitsdatedict.iterkeys():
120 for datumVersStr in fitsdatedict[direc]:
121 date = int(datumVersStr[:datumVersStr.find('_')])
122 vers = fitsDirs.getVersion(datumVersStr)
123 if (searchcritlow <= date <= searchcrithigh) and versionnr == vers:
124 fitslist = dc.listdir(os.path.join(direc, datumVersStr))
125
126
127
128 tmpl = fitslist[:]
129 for elem in tmpl:
130 if elem.find(sysc.catSuffix() + sysc.catType()) >= 0:
131 catlist.append(os.path.join(direc ,datumVersStr, elem))
132 fitslist.remove(elem)
133 elif not elem.endswith(sysc.mefType()):
134 fitslist.remove(elem)
135
136
137 if jpegdatedict.has_key(datumVersStr):
138
139 jpegdict = {}
140 for jpgdir in jpegdatedict[datumVersStr]:
141 jpeglist = dc.listdir(os.path.join(jpgdir,
142 datumVersStr))
143
144
145
146 Logger.addMessage("%50s %6s %8s" %
147 (os.path.join(jpgdir,
148 datumVersStr),
149 len(fitslist),
150 len(jpeglist)/4.0))
151 for elem in jpeglist:
152 name = elem[:elem.rfind("_")]
153 if jpegdict.has_key(name):
154 jpegdict[name] += 1
155 else:
156 jpegdict[name] = 1
157
158
159 for item in fitslist:
160 name = item[:item.rfind(".fit")]
161 filepath = os.path.join(direc ,datumVersStr, item)
162 fpnum = notprocfiles.count(filepath)
163 if reDo:
164 notprocfiles.append(filepath)
165
166 elif sysc.mosaicSuffix() in name and \
167 name.split('_')[2] == "dp":
168 if forceMosaic:
169 notprocfiles.append(filepath)
170 else:
171 pass
172
173 elif not(jpegdict.has_key(name)) and \
174 name.find("_cat")<0 and fpnum == 0:
175 notprocfiles.append(filepath)
176
177 elif jpegdict.has_key(name) and \
178 jpegdict[name] < 4 and fpnum == 0:
179 notprocfiles.append(filepath)
180 elif jpegdict.has_key(name) and \
181 jpegdict[name] > 4:
182 duplicatelist.append(
183 (jpegdatedict[datumVersStr], name,
184 jpegdict[name]))
185
186
187 else:
188 procfiles.append(filepath)
189 if fpnum > 0:
190 notprocfiles.remove(filepath)
191 else:
192 notprocdirs.append(os.path.join(direc, datumVersStr))
193
194 Logger.addMessage("%50s %6s %8s" %
195 (os.path.join(direc, datumVersStr),
196 len(fitslist), 0))
197
198
199
200 if notprocfiles:
201 filePath = \
202 os.path.join(cu2Path, "reprocfiles_%s.log"%datumVersStr)
203 Logger.addMessage("%s files to be processed are in: %s" %
204 (len(notprocfiles), filePath))
205 open(filePath,'w').writelines(f+'\n' for f in notprocfiles)
206
207
208 if notprocdirs:
209 filePath = \
210 os.path.join(cu2Path, "notprocdirs_%s.log"% datumVersStr)
211 Logger.addMessage("%s dirs to be processed are in: %s" %
212 (len(notprocdirs), filePath))
213 open(filePath,'w').writelines(f+'\n' for f in notprocdirs)
214
215
216 if procfiles:
217 filePath = \
218 os.path.join(cu2Path, "procfiles_%s.log" % datumVersStr)
219 Logger.addMessage("%s files processed are in: %s" %
220 (len(procfiles), filePath))
221 open(filePath,'w').writelines(f + '\n' for f in procfiles)
222
223 Logger.addMessage("%s catalogue files" % len(catlist))
224
225
226 if duplicatelist:
227 log_time = utils.makeTimeStamp()
228 log_time = log_time[:log_time.find('.')].replace(' ','_')
229 dfname = "duplicatejpgs_"+log_time+".log"
230 Logger.addMessage("%s duplicate jpgs are in: %s" %
231 (len(duplicatelist), dfname))
232 csv.File(dfname, 'w').writelines(duplicatelist)
233
234
235
236 if __name__ == '__main__':
237 main(sys.argv)
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264