Package invocations :: Package cu4 :: Module CreatePixCatList
[hide private]

Source Code for Module invocations.cu4.CreatePixCatList

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: CreatePixCatList.py 7012 2010-05-27 19:08:14Z RossCollins $ 
  4  """ 
  5     Creates a list of filename, catname from the database for use by CU4. 
  6   
  7     @author: E. Sutorius 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9   
 10     @newfield contributors: Contributors, Contributors (Alphabetical Order) 
 11     @contributors: R.S. Collins 
 12   
 13     @todo: Make the programme list both database and user driven, obviating the 
 14            need for a cu4programmes.list file. 
 15  """ 
 16  #------------------------------------------------------------------------------ 
 17  import os 
 18   
 19  from   wsatools.CLI                 import CLI 
 20  import wsatools.CSV                     as csv 
 21  import wsatools.DataFactory             as df 
 22  import wsatools.DbConnect.DbConstants   as dbc 
 23  from   wsatools.DbConnect.DbSession import DbSession, Join, SelectSQL 
 24  from   wsatools.Logger              import ForLoopMonitor, Logger 
 25  import wsatools.Utilities               as utils 
 26  #------------------------------------------------------------------------------ 
 27  # Entry point for script. 
 28   
 29  # Allow module to be imported as well as executed from the command line 
 30  if __name__ == '__main__': 
 31      timestamp = utils.makeTimeStamp().replace(' ', '_') 
 32   
 33      # Define additional command-line interface options 
 34      CLI.progOpts.remove('test') 
 35      CLI.progOpts.remove('user') 
 36      CLI.progOpts += [ 
 37        CLI.Option('a', 'all', 
 38          "create a single list for all programmes (catlist_all_%s.list)" 
 39          % timestamp), 
 40        CLI.Option('b', 'begin', 
 41          "first date to include, e.g. 2004-04-01, or 20040401, or first " 
 42          "semester to include e.g. 05A_SV", 
 43          "DATE", isValOK=CLI.isDateOK), 
 44        CLI.Option('e', 'end', 
 45          "last date to include, e.g. 2006-07-31, or 20060731, or last " 
 46          "semester to include e.g. 07A", 
 47          "DATE", isValOK=CLI.isDateOK), 
 48        CLI.Option('f', 'fromfile', 
 49          "read programmeID from 'cu4programmes.list' (default: query DB)."), 
 50        CLI.Option('m', 'missing_only', 
 51          "only list files where the catalogues are not ingested yet."), 
 52        CLI.Option('P', "programmes", "only process data for given " 
 53                   "programmes (accepts keywords 'all', 'ns' (non-survey), " 
 54                   "'ukidss' (all 5 main surveys).)", "LIST", 'all'), 
 55        CLI.Option('v', 'version', 
 56          "only select files of this processing revision number", "NUMBER", 
 57          isValOK=lambda x: x.isdigit() and int(x) > 0)] 
 58   
 59      cli = CLI("CreatePixCatList", '$Revision: 7012 $', __doc__) 
 60      Logger.addMessage(cli.getProgDetails()) 
 61   
 62      readProgIDFile = cli.getOpt('fromfile') 
 63      createSingleFile = cli.getOpt('all') 
 64      onlyMissing = cli.getOpt('missing_only') 
 65      versionNum = int(cli.getOpt('version')) if cli.getOpt('version') else 1 
 66      programmeList = [prog.lower() 
 67                       for prog in cli.getOpt('programmes').split(',')] 
 68   
 69      db = DbSession(cli.getArg('database')) 
 70      progTable = df.ProgrammeTable(db) 
 71      if readProgIDFile: 
 72          progIDList = [int(progID) 
 73                        for name, progID in csv.File("cu4programmes.list")] 
 74      else: 
 75          progIDList = progTable.getProgIDList() 
 76   
 77      # create progID list of processed programmes 
 78      if 'all' in programmeList: 
 79          progList = progIDList 
 80      elif 'ukidss' in programmeList: 
 81          progList = [pid for pid in progIDList if 10 < pid <= 105] 
 82      elif 'ns' in  programmeList or 'non-survey' in programmeList: 
 83          progList = [pid for pid in progIDList if pid > 10000] 
 84      else: 
 85          progList = [] 
 86          for entry in programmeList: 
 87              if str(entry).isdigit(): 
 88                  progList.append(int(entry)) 
 89              else: 
 90                  progList.append(progTable.getProgIDfromName( 
 91                      entry.replace('/',''))) 
 92   
 93      try: 
 94          beginDate, endDate = \ 
 95            db.sysc.obsCal.dateRange(cli.getOpt("begin"), cli.getOpt("end")) 
 96   
 97      except Exception as error: 
 98          raise SystemExit("Illegal Option: " + str(error)) 
 99   
100      beginDate = int(beginDate.date.replace('-', '')) 
101      endDate = int(endDate.date.replace('-', '')) 
102   
103      Logger.addMessage("Creating lists for %s programmes" % len(progList)) 
104      progress = ForLoopMonitor(progList) 
105      outlist = [] 
106      for progID in progList: 
107          progTable.setCurRow(programmeID=progID) 
108          if dbc.charDefault() not in progTable.getDetectionTable(): 
109              columns = "fileName, catName" 
110              tables = Join(["Multiframe", "ProgrammeFrame"], ["multiframeID"]) 
111              whereStr = ' AND '.join( 
112                  ["programmeID=%s" % progID,"deprecated=0", 
113                   "catName != %r" % dbc.charDefault(), 
114                   "catName NOT LIKE '%%empty%%'"]) 
115              if onlyMissing: 
116                  whereStr += " AND Multiframe.multiframeID NOT IN (%s)" % \ 
117                      SelectSQL("multiframeID", progTable.getDetectionTable(), 
118                                groupBy="multiframeID") 
119              Logger.addMessage( 
120                  "Getting filenames for programme: %s (%s) " % ( 
121                  progTable.getAcronym(), progID)) 
122          else: 
123              Logger.addMessage( 
124                  "%s (%s) is a new programme without a detection table yet" 
125                  % (progID, progTable.getAttr('dfsIDString'))) 
126   
127          #create list of files for given dates and version 
128          filelog = [] 
129          for fileName, catName in db.query(columns, tables, whereStr): 
130              datedir = os.path.basename(os.path.dirname(catName)) 
131              date, version = map(int, datedir.split('_v')) 
132   
133              if (beginDate <= date <= endDate) and (not versionNum \ 
134                or version == versionNum): 
135                  filelog.append(fileName.split(':')[-1] + '\n') 
136                  filelog.append(catName.split(':')[-1] + '\n') 
137   
138          # write to log file 
139          if filelog: 
140              outprefix = "NI_" if onlyMissing else "" 
141              version = ("_v%s" % versionNum) if versionNum else "" 
142              fileName = "catlist_%s%s_%s_%s%s.log" % ( 
143                outprefix, progID, beginDate, endDate, version) 
144   
145              file(fileName, 'w').writelines(filelog) 
146   
147          if createSingleFile: 
148              outlist += filelog 
149   
150          progress.testForOutput() 
151   
152      if outlist: 
153          outprefix = "NI_" if onlyMissing else "" 
154          version = ("_v%s" % versionNum) if versionNum else "" 
155          fileName = "catlist_%sall_%s_%s%s_%s.log" % ( 
156            outprefix, beginDate, endDate, version, timestamp) 
157   
158          Logger.addMessage("Writing to %s..." % fileName) 
159          file(fileName, 'w').writelines(outlist) 
160   
161  #------------------------------------------------------------------------------ 
162  # Change log: 
163  # 
164  # 17-Jan-2006, ETWS: first version 
165  # 14-Feb-2006, ETWS: included option to write output into one big list; 
166  #                    included option to retrieve only catalogue files 
167  #                    which are not ingested yet 
168  # 22-Feb-2007,  RSC: Updated to reflect move of loadServerHost() constant from 
169  #                    DbConstants to SystemConstants 
170  # 30-Jan-2008, ETWS: Only write to files if there is data. 
171  #  6-Mar-2008,  RSC: Updated to use DbSession and CLI. 
172  # 31-Oct-2008, ETWS: Bug fix. 
173