Package helpers :: Module BinaryToCsv
[hide private]

Source Code for Module helpers.BinaryToCsv

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: BinaryToCsv.py 10057 2013-09-09 15:58:07Z RossCollins $ 
  4  """ 
  5     Converts a binary ingest file to human readable csv format. 
  6   
  7     @author: R.S. Collins 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  import os 
 12  import struct 
 13   
 14  from   wsatools.CLI                 import CLI 
 15  import wsatools.CSV                     as csv 
 16  from   wsatools.DbConnect.DbSession import DbSession 
 17  from   wsatools.Logger              import Logger, ForLoopMonitor 
 18  #------------------------------------------------------------------------------ 
 19  CLI.progArgs += [CLI.Argument('input', 'ingest.dat', isValOK=os.path.exists), 
 20                   CLI.Argument('db_table', 'dxsDetection')] 
 21  CLI.progOpts += [CLI.Option('o', 'output', 'Alternative output file name', 
 22                              'NAME', 'ingest.csv'), 
 23                   CLI.Option('b', 'begin', 'Initial row to read', 'NUMBER', '0'), 
 24                   CLI.Option('e', 'end', 'Final row to read', 'NUMBER', '0'), 
 25                   CLI.Option('n', 'noheader', "Don't print a header line with column names")] 
 26  #------------------------------------------------------------------------------ 
 27  # Entry point for script. 
 28   
 29  # Allow module to be imported as well as executed from the command line 
 30  if __name__ == '__main__': 
 31      CLI.progOpts.remove('test') 
 32      CLI.progOpts.remove('user') 
 33      cli = CLI("BinaryToCsv", "$Revision: 10057 $", __doc__) 
 34      Logger.setEchoOn() 
 35      Logger.addMessage(cli.getProgDetails()) 
 36   
 37      # Query database for table information 
 38      db = DbSession(database=cli.getArg('database')) 
 39      dataTypes = db.queryDataTypes(cli.getArg('db_table')) 
 40      rowSize = db.queryRowSize(cli.getArg('db_table')) 
 41      colNames = db.queryColumnNames(cli.getArg('db_table')) 
 42      noHeader = cli.getOpt('noheader') 
 43      del db 
 44      if not rowSize: 
 45          Logger.addMessage("<ERROR> Table %s not found in database %s" % 
 46            (cli.getArg('db_table'), cli.getArg('database'))) 
 47          exit() 
 48   
 49      # Construct a table row data type string for struct 
 50      sqlToStruct = {'real': 'f', 'float': 'd', 'tinyint': 'B', 'smallint': 'h', 
 51                     'int': 'i', 'bigint': 'q'} 
 52      rowTypes = '<' + ''.join(sqlToStruct[dataTypes[name]] for name in colNames) 
 53   
 54      # Make sure input file is not corrupt 
 55      numRows = os.path.getsize(cli.getArg('input')) / float(rowSize) 
 56      if numRows - int(numRows) != 0.0: 
 57          raise SystemExit, cli.getArg('input') + " is corrupt!! " + \ 
 58                            "File ends with an incomplete row." 
 59      numRows = int(numRows) 
 60      startRow = int(cli.getOpt('begin')) 
 61      endRow = int(cli.getOpt('end')) 
 62   
 63      if endRow < 0: 
 64          endRow += numRows 
 65      if startRow < 0: 
 66          startRow += numRows 
 67   
 68      if cli.getOpt('end') is not cli.getOptDef('end'): 
 69          numRows = int(endRow) - int(startRow) 
 70      elif cli.getOpt('begin') is not cli.getOptDef('begin'): 
 71          numRows -= int(startRow) 
 72   
 73      # Open input and output files 
 74      binaryFile = file(cli.getArg('input'), 'rb') 
 75      if cli.getOpt('output') is cli.getOptDef('output'): 
 76          outputFileName = cli.getArg('input') + '.csv' 
 77      else: 
 78          outputFileName = cli.getOpt('output') 
 79      Logger.addMessage("Writing " + str(numRows) + " rows to file " + 
 80                        os.path.join(os.getcwd(), outputFileName)) 
 81      outFile = csv.File(outputFileName, 'w') 
 82      if not noHeader: 
 83          outFile.write(colNames) # Header row 
 84   
 85      # Convert 
 86      binaryFile.seek(int(startRow) * rowSize) 
 87      progress = ForLoopMonitor(range(numRows)) 
 88      rowNum = startRow 
 89      while True: 
 90          rowBinary = binaryFile.read(rowSize) 
 91          rowNum += 1 
 92          if not rowBinary: 
 93              break 
 94          outFile.write(struct.unpack(rowTypes, rowBinary)) 
 95          outFile.flush() 
 96          progress.testForOutput() 
 97          if rowNum == endRow: 
 98              break 
 99   
100      Logger.addMessage("done!") 
101   
102      binaryFile.close() 
103      outFile.close() 
104   
105  #------------------------------------------------------------------------------ 
106  # Change log: 
107  # 
108  # 12-Sep-2006,  RSC: Original version. 
109  # 07-Feb-2007, ETWS: Updated for usage with negative begin/end values 
110