Package helpers :: Module UpdateJpegs
[hide private]

Source Code for Module helpers.UpdateJpegs

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  # $Id: UpdateJpegs.py 9133 2012-04-04 16:54:42Z EckhardSutorius $ 
  4  """ 
  5     Update JPEG names in the database given a file list. File contains a list of 
  6     databases and a list of logs containing multiframeIDs along with extension 
  7     UIDs and JpegFilenames. The list of databases should have the format 
  8     "dbpub::database1,database2,...,databaseN" for DBs on the public server, and 
  9     "dbload::.." on the load server. 
 10   
 11     Example for a list file: 
 12   
 13     # begin example.list 
 14     dbload::WSA 
 15     dbpub::UKIDSSR2,U05A100 
 16     jpgs_WSA_20050401_20050430.log 
 17     jpgs_WSA_20050501_20050531.log 
 18     jpgs_WSA_20050601_20050630.log 
 19     # end example.list 
 20   
 21     @author: N.C. Hambly 
 22     @org:    WFAU, IfA, University of Edinburgh 
 23   
 24  """ 
 25  #------------------------------------------------------------------------------ 
 26  from   collections import defaultdict 
 27  import os 
 28  import time 
 29   
 30  from   wsatools.CLI                 import CLI 
 31  from   wsatools.DbConnect.DbSession import DbSession, Join 
 32  from   wsatools.File                import File 
 33  from   wsatools.Logger              import Logger 
 34  import wsatools.DbConnect.CommonQueries as queries 
 35  from   wsatools.SystemConstants     import SystemConstants 
36 #------------------------------------------------------------------------------ 37 38 -class UpdateJpegs(object):
39 """Update JPEG names in the database given a file list. 40 """
41 - def __init__(self, 42 archive=CLI.getArgDef("archive"), 43 database=DbSession.server+'.'+DbSession.database, 44 userName=DbSession.userName, 45 fromJpegList=CLI.getOptDef("isjpglist"), 46 multiDBs=CLI.getOptDef("multidbs"), 47 isTrialRun=DbSession.isTrialRun, 48 updateFileName=CLI.getArgDef("datafile")):
49 """ 50 @param database: Name of the database to connect to. 51 @type database: str 52 @param fromJpegList: Input file lists multiframeIDs and JPG filenames. 53 @type fromJpegList: bool 54 @param isTrialRun: If True, do not perform database modifications. 55 @type isTrialRun: bool 56 @param multiDBs: Update multiple DBs on 'dbload' or 'dbpub'. 57 @type multiDBs: str 58 @param updateFileName: File containing a list of DBs and JPGlist files. 59 @type updateFileName: str 60 61 """ 62 # database options 63 self.isTrialRun = isTrialRun 64 self.userName = userName 65 self.database = database 66 self.dataArchive = archive 67 self.sysc = SystemConstants(self.dataArchive) 68 69 self.dbSynonyms = {"dbpub": self.sysc.publicServers, 70 "dbload": [self.sysc.loadServer]} 71 72 self.multiDBs = multiDBs 73 # don't use multiDBs when running on a public server 74 if self.database.partition('.')[0][-1] not in ['12']: 75 self.multiDBs = None 76 77 # is file mfid-jpeg list? 78 self.fromJpegList = fromJpegList 79 80 # filename 81 self.updateFileName = updateFileName
82 83 #-------------------------------------------------------------------------- 84
85 - def run(self):
86 """ 87 """ 88 # Prepare list of available databases from user-supplied options 89 self.logFiles = [] 90 self.dataBases = defaultdict(list) 91 if self.fromJpegList: 92 if self.multiDBs: 93 serverSyn, dbs = self.multiDBs.split("::") 94 for server in self.dbSynonyms[serverSyn]: 95 self.dataBases[server] = dbs.split(',') 96 else: 97 server, db = self.database.split('.') 98 self.dataBases[server] = [db] 99 self.logFiles.append(self.updateFileName) 100 else: 101 self.readFile() 102 103 self.getAvailDBs() 104 105 # create a master mfid dict from all log files 106 masterMfidDict, dateList = self.createMasterDict() 107 108 for server in sorted(self.dataBases.keys()): 109 for db in sorted(self.dataBases[server]): 110 archive = DbSession(server+'.'+db, autoCommit=True, 111 isTrialRun=self.isTrialRun, 112 userName=self.userName) 113 114 # get a dict of entries from the DB 115 mfidDict = self.getMultiframeIDs(archive, dateList) 116 117 # build a list of data to update 118 dbMfidList = [] 119 mfIDList = list(set(masterMfidDict.keys()).intersection( 120 mfidDict.keys())) 121 122 for mfID in sorted(mfIDList): 123 for extNum, jpgName in masterMfidDict[mfID]: 124 if (extNum+1, self.sysc.pixelServerHostName+jpgName) \ 125 not in mfidDict[mfID]: 126 dbMfidList.append((mfID, extNum, jpgName)) 127 128 if dbMfidList: 129 Logger.addMessage("Updating %s" % db) 130 # Write to file if database is not a WFCAM release 131 if archive.database.upper().startswith(("UKIDSS", "WORLD", 132 "TRANSIT")): 133 path = os.path.join( 134 "SqlUpdates", "JPG_PubDB_%s.%s_%s.sql" 135 % (archive.server, archive.database, 136 time.strftime('%Y%m%d_%H%M'))) 137 updateFile = File(archive.sharePath(path)) 138 updateFile.aopen() 139 else: 140 updateFile = None 141 self.updateCompressedImageNames(archive, dbMfidList, 142 updateFile) 143 if archive.database.upper().startswith(("UKIDSS", "WORLD", 144 "TRANSIT")): 145 updateFile.close() 146 else: 147 Logger.addMessage("%s is up-to-date." % db) 148 # close db connection 149 del archive 150 151 if not self.dataBases[server]: 152 Logger.addMessage("<WARNING> No database found on %s" % server) 153 try: 154 if not "updated" in self.updateFileName \ 155 and not "deprjpgs" in self.updateFileName: 156 os.rename(self.updateFileName, os.path.join( 157 os.path.dirname(self.updateFileName), 158 "updated_%s" % os.path.basename(self.updateFileName))) 159 except OSError: 160 Logger.addMessage("<WARNING> %s not found!" % self.updateFileName)
161 162 #-------------------------------------------------------------------------- 163
164 - def createMasterDict(self):
165 """ 166 Create a master dict of compressed file info per multiframe IDs. 167 @return: Dictionary holding the extNum and jpg name tuple for each mfID. 168 @rtype: int:[(int,str)] 169 """ 170 mmd = defaultdict(list) 171 dates = set() 172 Logger.addMessage("Creating master MfID dictionary") 173 for fileName in self.logFiles: 174 print " from ", fileName 175 logFile = File(fileName) 176 logFile.ropen() 177 inpData = logFile.readlines(strip=True) 178 logFile.close() 179 for line in inpData: 180 mfStr, extNumStr, jpgName = line.split(',') 181 dateVersStr = os.path.basename(os.path.dirname(jpgName)) 182 dates.add(dateVersStr) 183 mfID = int(mfStr) 184 extNum = int(extNumStr) 185 mmd[mfID].append((extNum, jpgName)) 186 return mmd, list(dates)
187 188 #-------------------------------------------------------------------------- 189 190 @staticmethod
191 - def getMultiframeIDs(archive, dateList):
192 """ 193 Get a list of all MultiframeIDs, extension numbers and jpgnames from 194 the given DB. 195 196 @param archive: Connection to database to query. 197 @type archive: DbSession 198 @return: Dictionary giving extNum,compFileName tuples for every MfID. 199 @rtype: dict(int:list(str)) 200 201 """ 202 attrs = "Multiframe.multiframeID, extNum, compFile" 203 tables = Join(["Multiframe", "MultiframeDetector"], ["multiframeID"]) 204 205 mfd = defaultdict(list) 206 for dateVersStr in dateList: 207 where = "fileName LIKE '%" + dateVersStr + "%'" 208 for mfID, extNum, compFile in archive.query(attrs, tables, where): 209 mfd[mfID].append((extNum, compFile)) 210 211 return mfd
212 213 #-------------------------------------------------------------------------- 214
215 - def getAvailDBs(self):
216 """ 217 Get all or selected DBs from given server list. 218 219 @return: List of full paths to databases on selected server. 220 @rtype: list(str) 221 222 """ 223 for server in self.dataBases: 224 availDBs = queries.getAllDBs(server) 225 if self.sysc.isVSA(): 226 availDBs = [db for db in availDBs 227 if not db.startswith(("UKIDSS", "TRANSIT", "WORLD"))] 228 else: 229 availDBs = [db for db in availDBs if not db.startswith("V")] 230 if "all" in self.dataBases[server]: 231 self.dataBases[server] = availDBs 232 else: 233 databases = list(set(availDBs).intersection( 234 self.dataBases[server])) or [] 235 self.dataBases[server] = databases
236 237 #-------------------------------------------------------------------------- 238
239 - def readFile(self):
240 """ 241 Read databases and lists from a file. 242 243 @return: Dictionary holding the db info and a list of files. 244 @rtype: dict(str:list(str)), list(str) 245 246 """ 247 dbListFile = File(self.updateFileName) 248 dbListFile.ropen() 249 inputList = dbListFile.readlines(strip=True) 250 dbListFile.close() 251 dbsFound = False 252 for line in inputList: 253 if line.startswith('#'): 254 continue 255 elif line.startswith("db") and "::" in line: 256 dbsFound = True 257 serverSyn, dbs = line.split("::") 258 for server in self.dbSynonyms[serverSyn]: 259 self.databases[server] = set(dbs.split(',')) 260 else: 261 self.logFiles.append(line) 262 if not dbsFound: 263 Logger.addMessage("No databases found in file, using %s on %s" % ( 264 self.sysc.loadDatabase, self.sysc.loadServer)) 265 self.databases[self.sysc.loadServer] = set([self.sysc.loadDatabase])
266 267 #-------------------------------------------------------------------------- 268
269 - def updateCompressedImageNames(self, archive, jpgList, updateFile):
270 """ 271 Update quick-look compressed image filenames for given list. This is 272 used to update the relevant column after quick-look (JPEG) images have 273 been made. 274 275 @param archive: Connection to database to query. 276 @type archive: DbSession 277 @param jpgList: List of modification data. 278 @type jpgList: list 279 @param updateFile: Write to this file for read only DBs. 280 @type updateFile: File obj 281 282 """ 283 numRows = 0 284 for mfID, extNum, compFile in jpgList: 285 if not compFile.startswith(self.sysc.pixelServerHostName): 286 compFile = self.sysc.pixelServerHostName + compFile 287 updSql = "compFile='%s'" % compFile 288 updWhere = "multiframeID=%s AND extNum=%s" % (mfID, extNum+1) 289 # Write to file on dbShare for public DBs 290 if archive.database.upper().startswith(("UKIDSS", "WORLD", 291 "TRANSIT")): 292 numRows += 1 293 updateFile.writetheline( 294 "UPDATE MultiframeDetector SET %s WHERE %s" % ( 295 updSql, updWhere)) 296 else: 297 numRows += archive.update( 298 "MultiframeDetector", updSql, updWhere) 299 300 Logger.addMessage("%s fileNames updated in MultiframeDetector." % 301 numRows)
302 303 #------------------------------------------------------------------------------ 304 # Entry point for script. 305 306 # Allow module to be imported as well as executed from the command line 307 if __name__ == "__main__": 308 309 # Define command-line interface settings for UpdateJpegs 310 CLI.progArgs.remove("database") 311 CLI.progArgs += [ 312 CLI.Argument("archive", "WSA", 313 isValOK=lambda x: x.lower() in ['wsa','vsa']), 314 CLI.Argument("datafile", "FILE/U") 315 ] 316 CLI.progOpts += [ 317 CLI.Option('d', 'database', 318 "update individual database", 319 "NAME", isValOK=lambda x: x.count('.') <= 1), 320 CLI.Option('j', 'isjpglist', 321 "input file lists multiframeIDs and JPG filenames"), 322 CLI.Option('M', 'multidbs', 323 "'server::database' where server can be 'dbload' or 'dbpub' " 324 "and database a comma-seperated list of DBs or 'all'", 325 "LIST", isValOK=lambda x: "::" in x), 326 ] 327 cli = CLI(UpdateJpegs, "$Revision: 9133 $") 328 Logger.isVerbose = False 329 Logger.addMessage(cli.getProgDetails()) 330 331 if cli.getArg("datafile") == 'U': 332 print "usage: UpdateJpegs.py [-M server::database] -j/--jpg archive jpglistfile" 333 print " or: UpdateJpegs.py archive filename" 334 print "If a list file 'filename' is given, it should look like this example:" 335 print "# begin example.list" 336 print "dbload::WSA" 337 print "dbpub::UKIDSSR2,U05A100" 338 print "jpgs_WSA_20050401_20050430.log" 339 print "jpgs_WSA_20050501_20050531.log" 340 print "jpgs_WSA_20050601_20050630.log" 341 print "# end example.list\n" 342 print "If 'database' is 'all' the script looks for all available DBs on" 343 print "the public server." 344 else: 345 UpdateJpegs = UpdateJpegs( 346 cli.getArg("archive"), 347 cli.getOpt("database"), 348 cli.getOpt("user"), 349 cli.getOpt("isjpglist"), 350 cli.getOpt("multidbs"), 351 cli.getOpt("test"), 352 cli.getArg("datafile")) 353 UpdateJpegs.run() 354 355 #------------------------------------------------------------------------------ 356 # Change log: 357 # 358 # 20-Jul-2005, NCH: First Version 359 # 16-Nov-2005, NCH: Tarted up a bit to make it easier to use; made it 360 # more flexible to be able to update online DBs on the 361 # public DB server as well as the offline load server. 362 # 7-Dec-2005, ETWS: Rewritten to read in a list of databases and files; 363 # included example list file. 364 # 19-Jan-2006, ETWS: Included commandline input of db and jpg listing file 365 # 16-May-2006, ETWS: Refactored to run faster on public server DBs 366 # 26-Jul-2006, ETWS: Refactored to run on multiple public servers 367 # 31-Jul-2006, ETWS: Changed path to load and public server hosts variables 368 # 18-Oct-2006, JB: Someone else's edit to protect getAllDBs 369 # 21-Nov-2006, ETWS: Changed to have write access on DB 370 # 01-May-2007, ETWS: Updated for include into cu0 371 # 12-Sep-2007, ETWS: Included testRun possibility. 372 # 28-Nov-2007, ETWS: Reduced queries to date range. 373 # 9-Jan-2008, ETWS: Fixed bug in the multiframeID query. 374 # 16-Apr-2008, ETWS: Switched to SqlWrappers.getAllDBs. 375