1   
  2   
  3   
  4  """ 
  5     Checks the size of non survey detection tables against the WSA. 
  6   
  7     @author: E.T.W. Sutorius 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9   
 10     @newfield contributors: Contributors, Contributors (Alphabetical Order) 
 11     @contributors: R.S. Collins 
 12  """ 
 13   
 14  from   collections import defaultdict 
 15  from   operator    import itemgetter 
 16  import os 
 17   
 18  from   wsatools.CLI                 import CLI 
 19  import wsatools.CSV                     as csv 
 20  from   wsatools.DataFactory         import ProgrammeTable 
 21  import wsatools.DbConnect.CommonQueries as queries 
 22  import wsatools.DbConnect.DbConstants   as dbc 
 23  from   wsatools.DbConnect.DbSession import DbSession 
 24  from   wsatools.Logger              import Logger 
 25  import wsatools.SystemConstants         as sysc 
 26  import wsatools.Utilities               as utils 
 27   
 28   
 29   
 30   
 31  if __name__ == "__main__": 
 32      CLI.progArgs.remove("database") 
 33      CLI.progOpts.remove("test") 
 34      CLI.progOpts.remove("user") 
 35      CLI.progOpts += [ 
 36        CLI.Option('d', "db_list", 
 37          "a list of databases which should be checked (all by default)", 
 38          "LIST"), 
 39        CLI.Option('f', "from_table", 
 40          "if different from *Detection (must be the same for all queried DBs)", 
 41          "NAME"), 
 42        CLI.Option('o', "out_dir", 
 43          "where the log is written to", 
 44          "DIR", "/disk01/wsa/nonSurvey/"), 
 45        CLI.Option('p', "prog_list", 
 46          "a list of standard programmes (all by default, use 'none' for none)", 
 47          "LIST"), 
 48        CLI.Option('w', "where", 
 49          "additional where clause for counting entries", 
 50          "SQL")] 
 51   
 52      cli = CLI("CheckReleasedNonSurvey", "$Revision: 6773 $", __doc__) 
 53      Logger.isVerbose = False 
 54      Logger.addMessage(cli.getProgDetails()) 
 55   
 56       
 57      hostsOfDb = defaultdict(list) 
 58      for server in sysc.publicServerHosts(): 
 59          for database in queries.getAllDBs(server): 
 60              hostsOfDb[database].append(server) 
 61   
 62       
 63      allDbs = set(hostsOfDb.keys() if not cli.getOpt("db_list") else 
 64                   csv.values(cli.getOpt("db_list").upper())) 
 65   
 66       
 67      loadDb = DbSession() 
 68      progTable = ProgrammeTable(loadDb) 
 69   
 70       
 71      ukidssDbs = set(db for db in allDbs if db.startswith(("UKIDSS", "WORLD"))) 
 72      excludeDbs = ("WFCAM", "VESPA", "VSA", "TRANSIT") 
 73       
 74      relNSDbs = [(dbName, progTable.getProgIDfromName(dbName.split('v')[0])) 
 75                  for dbName in allDbs - ukidssDbs 
 76                   if not dbName.startswith(excludeDbs)] 
 77   
 78      unknownDbs = [dbName for dbName, progID in relNSDbs if progID is None] 
 79      if unknownDbs: 
 80          raise SystemExit( 
 81            "<ERROR> Unrecognised non-survey databases: %s" % unknownDbs) 
 82   
 83      progIDs = [] 
 84      if not cli.getOpt("prog_list"): 
 85          progIDs = [progID for progID in progTable.getProgIDList() 
 86                             if 100 < progID < 106] 
 87      elif cli.getOpt("prog_list").upper() != "NONE": 
 88          progIDs = [progTable.getProgIDfromName(prog) 
 89                     for prog in csv.values(cli.getOpt("prog_list"))] 
 90   
 91      specificFrom = cli.getOpt("from_table") 
 92      whereStr = "deprecated=0" 
 93      if cli.getOpt("where"): 
 94          whereStr += " AND " + cli.getOpt("where") 
 95   
 96      detCount = defaultdict(dict) 
 97   
 98      Logger.addMessage("Stage 1 of 3: Getting row counts from load server...") 
 99      allProgIDs = progIDs + map(itemgetter(1), relNSDbs) 
100      for num, progID in enumerate(allProgIDs): 
101          pName = progTable.getAcronym(progID).upper() 
102          detTable = progTable.getDetectionTable(programmeID=progID) 
103          Logger.addMessage("%s/%s: %s" % (num+1, len(allProgIDs), pName)) 
104          detCount[loadDb.server][pName] = \ 
105            loadDb.queryNumRows(specificFrom or detTable, whereStr) 
106   
107      Logger.addMessage("Stage 2 of 3: " 
108        "Getting non-survey row counts from public servers...") 
109      for num, (dbName, progID) in enumerate(relNSDbs): 
110          pName = progTable.getAcronym(progID).upper() 
111          detTable = progTable.getAttr("detectionTable", programmeID=progID) 
112          Logger.addMessage("%s/%s: %s" % (num+1, len(relNSDbs), pName)) 
113   
114          for dbServer in hostsOfDb[dbName]: 
115              detCount[dbServer][pName] = \ 
116                DbSession(dbServer+'.'+dbName, 
117                          userName=dbc.loadServerRwUsername())\ 
118                .queryNumRows(specificFrom or detTable, whereStr) 
119   
120      Logger.addMessage("Stage 3 of 3: " 
121        "Getting survey row counts from public servers...") 
122      for num, progID in enumerate(progIDs): 
123          pName = progTable.getAcronym(progID).upper() 
124          detTable = progTable.getAttr("detectionTable", programmeID=progID) 
125          Logger.addMessage("%s/%s: %s" % (num+1, len(progIDs), pName)) 
126   
127          for dbName in ukidssDbs: 
128              for dbServer in hostsOfDb[dbName]: 
129                  detCount[dbServer][dbName+'.'+pName] = \ 
130                    DbSession(dbServer+'.'+dbName, 
131                              userName=dbc.loadServerRwUsername())\ 
132                    .queryNumRows(specificFrom or detTable, whereStr) 
133   
134      log = "" 
135      for dbServer in sysc.publicServerHosts(): 
136          if dbServer in detCount: 
137              log += "\n%20s:\n" % dbServer.title() 
138              log += "\n%20s %14s %14s\n" % ("DB","WSAsize","DBsize") 
139              log += "%20s %14s %14s\n" % ("--","-------","------") 
140              for db in sorted(detCount[dbServer]): 
141                  log += "%20s %14d %14d\n" % \ 
142                    (db, detCount[loadDb.server][db.split('.')[-1]], 
143                     detCount[dbServer][db]) 
144      print log 
145      logFileName = "checkReleasedNonSurvey%s.log" % utils.makeMssqlTimeStamp() 
146      utils.ensureDirExist(cli.getOpt("out_dir")) 
147      open(os.path.join(cli.getOpt("out_dir"), logFileName), 'w').write(log) 
148   
149   
150   
151   
152   
153   
154   
155   
156   
157   
158   
159   
160   
161   
162   
163