1
2
3 """
4 Reading/writing of comma-separated value strings/files. An enhanced
5 interface to the Python standard library csv module, and supports its
6 dialect system to alter the expected standard CSV format, e.g. delimiters.
7
8 Usage
9 =====
10
11 Import as::
12
13 import wsatools.CSV as csv
14
15 Read/Write CSV Files
16 --------------------
17
18 To write the results of a database query to a CSV file::
19
20 results = db.query("sourceID, ra, dec", "lasSource")
21 csv.File("sources.csv", 'w').writelines(results)
22
23 To read a file using the standard dialect::
24
25 for sourceID, Ra, Dec in csv.File("sources.csv"):
26 if float(Dec) > 0.0:
27 print sourceID
28 break
29
30 To alter the dialect based on the contents of a file::
31
32 csv.File.autoSetDialect("strange.csv")
33 rows = csv.File("strange.csv").readlines()
34
35 To manually change the dialect to, say, make the delimiter a single colon
36 character, and to ignore comment lines that start with a double-hyphen::
37
38 from csv import excel
39 class ColonDialect(excel):
40 delimiter = ':'
41 csv.File.setDialect(ColonDialect)
42 csv.File.commentMarker = '--'
43 rows = csv.File("coloned.csv").readlines()
44
45 Convert between sequences and CSV strings
46 -----------------------------------------
47
48 To convert a CSV string to a list::
49
50 elements = list(csv.values('1.0, 2.0, 3.0'))
51
52 To convert a list to a CSV string::
53
54 print csv.join([1.0, 2.0, 3.0])
55
56 To change dialects::
57
58 import csv as pycsv
59 pycsv.register_dialect('strange', csv.sniffDialect('strange.csv'))
60 elements = list(csv.values('1.0-2.0-3.0', dialect='strange'))
61
62 or::
63
64 pycsv.register_dialect('colon', ColonDialect)
65 elements = list(csv.values('1.0:2.0:3.0', dialect='colon'))
66
67 @todo: Support dialects on write. Needs a substantial change to the current
68 interface.
69
70 @author: R.S. Collins
71 @org: WFAU, IfA, University of Edinburgh
72 """
73
74 from collections import namedtuple
75 import csv
76 import os
77
78 from wsatools.Utilities import ParsedFile
79
80
81 -class File(ParsedFile):
82 """ Read and write CSV files. Ignores blank and comment lines. """
83
84 _dialect = 'excel'
85
86
87
89 """
90 Use sniffDialect() function to automatically set the correct dialect
91 of the CSV file to be read.
92
93 @param filePathName: Full path to the CSV file.
94 @type filePathName: str
95 @param numLinesSniffed: Number of lines to inspect.
96 @type numLinesSniffed: int
97
98 """
99 File.setDialect(sniffDialect(filePathName, numLinesSniffed))
100
101 autoSetDialect = staticmethod(autoSetDialect)
102
103
104
106 """
107 Set the current dialect to the one described in the supplied dialect
108 object.
109
110 @param dialect: A class derived from the csv.Dialect class describing
111 the current dialect.
112 @type dialect: classobj
113
114 """
115 csv.register_dialect(dialect.__name__, dialect)
116 File._dialect = dialect.__name__
117
118 setDialect = staticmethod(setDialect)
119
120
121
123 """ @return: Every value in the next line of the file.
124 @rtype: tuple(str)
125 """
126 return values(super(File, self).readline(), self._dialect)
127
128
129
130 - def write(self, values, inQuotes=False):
131 """
132 Write the sequence of values to the file as a line of comma-separated
133 values.
134
135 @param values: A sequence of string-castable values.
136 @type values: sequence
137 @param inQuotes: If True, encapsulate strings in quotes.
138 @type inQuotes: bool
139
140 """
141 super(File, self).write(join(values, inQuotes) + '\n')
142
143
144
146 """
147 Write a sequence of lines, each containing a sequence of values, to the
148 file in the comma-separated values format.
149
150 @param sequence: A sequence for every line that contains a sequence of
151 string-castable objects for every file.
152 @type sequence: sequence(sequence)
153 @param inQuotes: If True, encapsulate strings in quotes.
154 @type inQuotes: bool
155
156 """
157 super(File, self).writelines(join(values, inQuotes) + '\n'
158 for values in sequence)
159
160
161
163 """
164 Reads CSV files that have a header line (e.g. the output of pySQL),
165 returning each row as a namedtuple associating column names to each value
166 (e.g. the output of DbFile("outgest.csv").readlines() should be in the form
167 that is identical to the output resulting from a DbSession().query() call).
168
169 """
170
171 Columns = None
172
173
174
176 """ Reads first line of the file to define namedtuple of columns.
177 """
178 super(DbFile, self).__init__(filePathName)
179 colNames = ' '.join(super(DbFile, self).readline())
180 fileName = os.path.splitext(os.path.basename(filePathName))[0]
181 self.Columns = namedtuple(fileName, colNames)
182
183
184
186 """ Converts line to namedtuple.
187 """
188 row = super(DbFile, self).readline()
189 try:
190 return self.Columns(*row)
191
192 except TypeError as error:
193 if "takes exactly" in str(error):
194 return row
195
196
197
198 -def join(sequence, inQuotes=True, sep=',', enableMixWarning=True):
199 """
200 Convert a given sequence into a string of comma-separated values.
201
202 @param sequence: A sequence of string-castable objects.
203 @type sequence: sequence
204 @param inQuotes: If True, encapsulate strings in quotes.
205 @type inQuotes: bool
206 @param sep: The separator string to use.
207 @type sep: str
208 @param enableMixWarning: If True and inQuotes is True then warn if any
209 values contain mixed quotes.
210 @type enableMixWarning: bool
211
212 @return: A string of comma-separated values from the list.
213 @rtype: str
214
215 """
216 if inQuotes:
217 if enableMixWarning:
218 for value in sequence:
219 if type(value) is str and '"' in value and "'" in value:
220 print "<Warning> Mixed quotes in string:", value
221 return sep.join(repr(value) for value in sequence)
222 else:
223 return sep.join(str(value) for value in sequence)
224
225
226
228 """
229 Inspect a CSV file to determine its dialect, returning a csv.Dialect
230 object.
231
232 @param filePathName: Full path to the CSV file.
233 @type filePathName: str
234 @param numLinesSniffed: Number of lines to inspect.
235 @type numLinesSniffed: int
236
237 @return: A class derived from csv.Dialect describing the file's dialect.
238 @rtype: classobj
239
240 """
241 csvFile = ParsedFile(filePathName)
242 lines = []
243 for _i in xrange(numLinesSniffed):
244 line = csvFile.readline()
245 if not line:
246 break
247 lines.append(line)
248 csvFile.close()
249 sniffer = csv.Sniffer()
250 if len(lines) > 1 and sniffer.has_header('\n'.join(lines)):
251 lines.pop(0)
252 return sniffer.sniff('\n'.join(lines))
253
254
255
256 -def values(csvStr, dialect='excel'):
257 """
258 Extract the values from a string containing one line of comma-separated
259 values.
260
261 @param csvStr: A line of comma-separated values (with or without newlines).
262 @type csvStr: str
263 @param dialect: Name of the registered dialect to use.
264 @type dialect: str
265
266 @return: Every value in the line.
267 @rtype: tuple(str)
268
269 """
270
271 return tuple(datumString.lstrip() for datumString
272 in csv.reader([csvStr], dialect).next())
273
274
275
276
277
278
279
280
281