wsatools.CSV

1 #------------------------------------------------------------------------------ 2 #$Id: CSV.py 10018 2013-08-22 13:45:52Z RossCollins $ 3 """ 4 Reading/writing of comma-separated value strings/files. An enhanced 5 interface to the Python standard library csv module, and supports its 6 dialect system to alter the expected standard CSV format, e.g. delimiters. 7 8 Usage 9 ===== 10 11 Import as:: 12 13 import wsatools.CSV as csv 14 15 Read/Write CSV Files 16 -------------------- 17 18 To write the results of a database query to a CSV file:: 19 20 results = db.query("sourceID, ra, dec", "lasSource") 21 csv.File("sources.csv", 'w').writelines(results) 22 23 To read a file using the standard dialect:: 24 25 for sourceID, Ra, Dec in csv.File("sources.csv"): 26 if float(Dec) > 0.0: 27 print sourceID 28 break 29 30 To alter the dialect based on the contents of a file:: 31 32 csv.File.autoSetDialect("strange.csv") 33 rows = csv.File("strange.csv").readlines() 34 35 To manually change the dialect to, say, make the delimiter a single colon 36 character, and to ignore comment lines that start with a double-hyphen:: 37 38 from csv import excel 39 class ColonDialect(excel): 40 delimiter = ':' 41 csv.File.setDialect(ColonDialect) 42 csv.File.commentMarker = '--' 43 rows = csv.File("coloned.csv").readlines() 44 45 Convert between sequences and CSV strings 46 ----------------------------------------- 47 48 To convert a CSV string to a list:: 49 50 elements = list(csv.values('1.0, 2.0, 3.0')) 51 52 To convert a list to a CSV string:: 53 54 print csv.join([1.0, 2.0, 3.0]) 55 56 To change dialects:: 57 58 import csv as pycsv 59 pycsv.register_dialect('strange', csv.sniffDialect('strange.csv')) 60 elements = list(csv.values('1.0-2.0-3.0', dialect='strange')) 61 62 or:: 63 64 pycsv.register_dialect('colon', ColonDialect) 65 elements = list(csv.values('1.0:2.0:3.0', dialect='colon')) 66 67 @todo: Support dialects on write. Needs a substantial change to the current 68 interface. 69 70 @author: R.S. Collins 71 @org: WFAU, IfA, University of Edinburgh 72 """ 73 #------------------------------------------------------------------------------ 74 from collections import namedtuple 75 import csv 76 import os 77 78 from wsatools.Utilities import ParsedFile 79 #------------------------------------------------------------------------------ 80

81 -class File(ParsedFile):

82 """ Read and write CSV files. Ignores blank and comment lines. """ 83 84 _dialect = 'excel' #: CSV dialect. 85 86 #-------------------------------------------------------------------------- 87

88 - def autoSetDialect(filePathName, numLinesSniffed=10):

89 """ 90 Use sniffDialect() function to automatically set the correct dialect 91 of the CSV file to be read. 92 93 @param filePathName: Full path to the CSV file. 94 @type filePathName: str 95 @param numLinesSniffed: Number of lines to inspect. 96 @type numLinesSniffed: int 97 98 """ 99 File.setDialect(sniffDialect(filePathName, numLinesSniffed))

100 101 autoSetDialect = staticmethod(autoSetDialect) 102 103 #-------------------------------------------------------------------------- 104

105 - def setDialect(dialect):

106 """ 107 Set the current dialect to the one described in the supplied dialect 108 object. 109 110 @param dialect: A class derived from the csv.Dialect class describing 111 the current dialect. 112 @type dialect: classobj 113 114 """ 115 csv.register_dialect(dialect.__name__, dialect) 116 File._dialect = dialect.__name__

117 118 setDialect = staticmethod(setDialect) 119 120 #-------------------------------------------------------------------------- 121

122 - def readline(self):

123 """ @return: Every value in the next line of the file. 124 @rtype: tuple(str) 125 """ 126 return values(super(File, self).readline(), self._dialect)

127 128 #-------------------------------------------------------------------------- 129

130 - def write(self, values, inQuotes=False):

131 """ 132 Write the sequence of values to the file as a line of comma-separated 133 values. 134 135 @param values: A sequence of string-castable values. 136 @type values: sequence 137 @param inQuotes: If True, encapsulate strings in quotes. 138 @type inQuotes: bool 139 140 """ 141 super(File, self).write(join(values, inQuotes) + '\n')

142 143 #-------------------------------------------------------------------------- 144

145 - def writelines(self, sequence, inQuotes=False):

146 """ 147 Write a sequence of lines, each containing a sequence of values, to the 148 file in the comma-separated values format. 149 150 @param sequence: A sequence for every line that contains a sequence of 151 string-castable objects for every file. 152 @type sequence: sequence(sequence) 153 @param inQuotes: If True, encapsulate strings in quotes. 154 @type inQuotes: bool 155 156 """ 157 super(File, self).writelines(join(values, inQuotes) + '\n' 158 for values in sequence)

159 160 #------------------------------------------------------------------------------ 161

162 -class DbFile(File):

163 """ 164 Reads CSV files that have a header line (e.g. the output of pySQL), 165 returning each row as a namedtuple associating column names to each value 166 (e.g. the output of DbFile("outgest.csv").readlines() should be in the form 167 that is identical to the output resulting from a DbSession().query() call). 168 169 """ 170 #-------------------------------------------------------------------------- 171 Columns = None #: :type Columns: namedtuple 172 173 #-------------------------------------------------------------------------- 174

175 - def __init__(self, filePathName):

176 """ Reads first line of the file to define namedtuple of columns. 177 """ 178 super(DbFile, self).__init__(filePathName) 179 colNames = ' '.join(super(DbFile, self).readline()) 180 fileName = os.path.splitext(os.path.basename(filePathName))[0] 181 self.Columns = namedtuple(fileName, colNames)

182 183 #-------------------------------------------------------------------------- 184

185 - def readline(self):

186 """ Converts line to namedtuple. 187 """ 188 row = super(DbFile, self).readline() 189 try: 190 return self.Columns(*row) 191 192 except TypeError as error: # Handles last line of file + oddities 193 if "takes exactly" in str(error): 194 return row

195 196 #------------------------------------------------------------------------------ 197

198 -def join(sequence, inQuotes=True, sep=',', enableMixWarning=True):

199 """ 200 Convert a given sequence into a string of comma-separated values. 201 202 @param sequence: A sequence of string-castable objects. 203 @type sequence: sequence 204 @param inQuotes: If True, encapsulate strings in quotes. 205 @type inQuotes: bool 206 @param sep: The separator string to use. 207 @type sep: str 208 @param enableMixWarning: If True and inQuotes is True then warn if any 209 values contain mixed quotes. 210 @type enableMixWarning: bool 211 212 @return: A string of comma-separated values from the list. 213 @rtype: str 214 215 """ 216 if inQuotes: 217 if enableMixWarning: 218 for value in sequence: 219 if type(value) is str and '"' in value and "'" in value: 220 print "<Warning> Mixed quotes in string:", value 221 return sep.join(repr(value) for value in sequence) 222 else: 223 return sep.join(str(value) for value in sequence)

224 225 #------------------------------------------------------------------------------ 226

227 -def sniffDialect(filePathName, numLinesSniffed=10):

228 """ 229 Inspect a CSV file to determine its dialect, returning a csv.Dialect 230 object. 231 232 @param filePathName: Full path to the CSV file. 233 @type filePathName: str 234 @param numLinesSniffed: Number of lines to inspect. 235 @type numLinesSniffed: int 236 237 @return: A class derived from csv.Dialect describing the file's dialect. 238 @rtype: classobj 239 240 """ 241 csvFile = ParsedFile(filePathName) 242 lines = [] 243 for _i in xrange(numLinesSniffed): 244 line = csvFile.readline() 245 if not line: 246 break 247 lines.append(line) 248 csvFile.close() 249 sniffer = csv.Sniffer() 250 if len(lines) > 1 and sniffer.has_header('\n'.join(lines)): 251 lines.pop(0) 252 return sniffer.sniff('\n'.join(lines))

253 254 #------------------------------------------------------------------------------ 255

256 -def values(csvStr, dialect='excel'):

257 """ 258 Extract the values from a string containing one line of comma-separated 259 values. 260 261 @param csvStr: A line of comma-separated values (with or without newlines). 262 @type csvStr: str 263 @param dialect: Name of the registered dialect to use. 264 @type dialect: str 265 266 @return: Every value in the line. 267 @rtype: tuple(str) 268 269 """ 270 # Leading white-space in each string removed to handle a, b as well as a,b 271 return tuple(datumString.lstrip() for datumString 272 in csv.reader([csvStr], dialect).next())

273 274 #------------------------------------------------------------------------------ 275 # Change log: 276 # 277 # 5-Jul-2006, RSC: Original version. Functionality moved from Utilities.py 278 # 23-Nov-2006, RSC: Replaced .split(',') design with Python csv module. 279 # 10-Jul-2007, RSC: Provided support for dialects on reading CSV files, as 280 # well ignoring blank and comment lines. 281

Source Code for Module wsatools.CSV