1
2
3
4 """
5 Converts a binary ingest file to human readable csv format.
6
7 @author: R.S. Collins
8 @org: WFAU, IfA, University of Edinburgh
9 """
10
11 import os
12 import struct
13
14 from wsatools.CLI import CLI
15 import wsatools.CSV as csv
16 from wsatools.DbConnect.DbSession import DbSession
17 from wsatools.Logger import Logger, ForLoopMonitor
18
19 CLI.progArgs += [CLI.Argument('input', 'ingest.dat', isValOK=os.path.exists),
20 CLI.Argument('db_table', 'dxsDetection')]
21 CLI.progOpts += [CLI.Option('o', 'output', 'Alternative output file name',
22 'NAME', 'ingest.csv'),
23 CLI.Option('b', 'begin', 'Initial row to read', 'NUMBER', '0'),
24 CLI.Option('e', 'end', 'Final row to read', 'NUMBER', '0'),
25 CLI.Option('n', 'noheader', "Don't print a header line with column names")]
26
27
28
29
30 if __name__ == '__main__':
31 CLI.progOpts.remove('test')
32 CLI.progOpts.remove('user')
33 cli = CLI("BinaryToCsv", "$Revision: 10057 $", __doc__)
34 Logger.setEchoOn()
35 Logger.addMessage(cli.getProgDetails())
36
37
38 db = DbSession(database=cli.getArg('database'))
39 dataTypes = db.queryDataTypes(cli.getArg('db_table'))
40 rowSize = db.queryRowSize(cli.getArg('db_table'))
41 colNames = db.queryColumnNames(cli.getArg('db_table'))
42 noHeader = cli.getOpt('noheader')
43 del db
44 if not rowSize:
45 Logger.addMessage("<ERROR> Table %s not found in database %s" %
46 (cli.getArg('db_table'), cli.getArg('database')))
47 exit()
48
49
50 sqlToStruct = {'real': 'f', 'float': 'd', 'tinyint': 'B', 'smallint': 'h',
51 'int': 'i', 'bigint': 'q'}
52 rowTypes = '<' + ''.join(sqlToStruct[dataTypes[name]] for name in colNames)
53
54
55 numRows = os.path.getsize(cli.getArg('input')) / float(rowSize)
56 if numRows - int(numRows) != 0.0:
57 raise SystemExit, cli.getArg('input') + " is corrupt!! " + \
58 "File ends with an incomplete row."
59 numRows = int(numRows)
60 startRow = int(cli.getOpt('begin'))
61 endRow = int(cli.getOpt('end'))
62
63 if endRow < 0:
64 endRow += numRows
65 if startRow < 0:
66 startRow += numRows
67
68 if cli.getOpt('end') is not cli.getOptDef('end'):
69 numRows = int(endRow) - int(startRow)
70 elif cli.getOpt('begin') is not cli.getOptDef('begin'):
71 numRows -= int(startRow)
72
73
74 binaryFile = file(cli.getArg('input'), 'rb')
75 if cli.getOpt('output') is cli.getOptDef('output'):
76 outputFileName = cli.getArg('input') + '.csv'
77 else:
78 outputFileName = cli.getOpt('output')
79 Logger.addMessage("Writing " + str(numRows) + " rows to file " +
80 os.path.join(os.getcwd(), outputFileName))
81 outFile = csv.File(outputFileName, 'w')
82 if not noHeader:
83 outFile.write(colNames)
84
85
86 binaryFile.seek(int(startRow) * rowSize)
87 progress = ForLoopMonitor(range(numRows))
88 rowNum = startRow
89 while True:
90 rowBinary = binaryFile.read(rowSize)
91 rowNum += 1
92 if not rowBinary:
93 break
94 outFile.write(struct.unpack(rowTypes, rowBinary))
95 outFile.flush()
96 progress.testForOutput()
97 if rowNum == endRow:
98 break
99
100 Logger.addMessage("done!")
101
102 binaryFile.close()
103 outFile.close()
104
105
106
107
108
109
110