Package wsatools :: Module CSV
[hide private]

Source Code for Module wsatools.CSV

  1  #------------------------------------------------------------------------------ 
  2  #$Id: CSV.py 10018 2013-08-22 13:45:52Z RossCollins $ 
  3  """ 
  4     Reading/writing of comma-separated value strings/files. An enhanced 
  5     interface to the Python standard library csv module, and supports its 
  6     dialect system to alter the expected standard CSV format, e.g. delimiters. 
  7   
  8     Usage 
  9     ===== 
 10   
 11     Import as:: 
 12   
 13         import wsatools.CSV as csv 
 14   
 15     Read/Write CSV Files 
 16     -------------------- 
 17   
 18     To write the results of a database query to a CSV file:: 
 19   
 20         results = db.query("sourceID, ra, dec", "lasSource") 
 21         csv.File("sources.csv", 'w').writelines(results) 
 22   
 23     To read a file using the standard dialect:: 
 24   
 25         for sourceID, Ra, Dec in csv.File("sources.csv"): 
 26             if float(Dec) > 0.0: 
 27                 print sourceID 
 28                 break 
 29   
 30     To alter the dialect based on the contents of a file:: 
 31   
 32         csv.File.autoSetDialect("strange.csv") 
 33         rows = csv.File("strange.csv").readlines() 
 34   
 35     To manually change the dialect to, say, make the delimiter a single colon 
 36     character, and to ignore comment lines that start with a double-hyphen:: 
 37   
 38         from csv import excel 
 39         class ColonDialect(excel): 
 40             delimiter = ':' 
 41         csv.File.setDialect(ColonDialect) 
 42         csv.File.commentMarker = '--' 
 43         rows = csv.File("coloned.csv").readlines() 
 44   
 45     Convert between sequences and CSV strings 
 46     ----------------------------------------- 
 47   
 48     To convert a CSV string to a list:: 
 49   
 50         elements = list(csv.values('1.0, 2.0, 3.0')) 
 51   
 52     To convert a list to a CSV string:: 
 53   
 54         print csv.join([1.0, 2.0, 3.0]) 
 55   
 56     To change dialects:: 
 57   
 58         import csv as pycsv 
 59         pycsv.register_dialect('strange', csv.sniffDialect('strange.csv')) 
 60         elements = list(csv.values('1.0-2.0-3.0', dialect='strange')) 
 61   
 62     or:: 
 63   
 64         pycsv.register_dialect('colon', ColonDialect) 
 65         elements = list(csv.values('1.0:2.0:3.0', dialect='colon')) 
 66   
 67     @todo: Support dialects on write. Needs a substantial change to the current 
 68            interface. 
 69   
 70     @author: R.S. Collins 
 71     @org:    WFAU, IfA, University of Edinburgh 
 72  """ 
 73  #------------------------------------------------------------------------------ 
 74  from   collections import namedtuple 
 75  import csv 
 76  import os 
 77   
 78  from wsatools.Utilities import ParsedFile 
 79  #------------------------------------------------------------------------------ 
 80   
81 -class File(ParsedFile):
82 """ Read and write CSV files. Ignores blank and comment lines. """ 83 84 _dialect = 'excel' #: CSV dialect. 85 86 #-------------------------------------------------------------------------- 87
88 - def autoSetDialect(filePathName, numLinesSniffed=10):
89 """ 90 Use sniffDialect() function to automatically set the correct dialect 91 of the CSV file to be read. 92 93 @param filePathName: Full path to the CSV file. 94 @type filePathName: str 95 @param numLinesSniffed: Number of lines to inspect. 96 @type numLinesSniffed: int 97 98 """ 99 File.setDialect(sniffDialect(filePathName, numLinesSniffed))
100 101 autoSetDialect = staticmethod(autoSetDialect) 102 103 #-------------------------------------------------------------------------- 104
105 - def setDialect(dialect):
106 """ 107 Set the current dialect to the one described in the supplied dialect 108 object. 109 110 @param dialect: A class derived from the csv.Dialect class describing 111 the current dialect. 112 @type dialect: classobj 113 114 """ 115 csv.register_dialect(dialect.__name__, dialect) 116 File._dialect = dialect.__name__
117 118 setDialect = staticmethod(setDialect) 119 120 #-------------------------------------------------------------------------- 121
122 - def readline(self):
123 """ @return: Every value in the next line of the file. 124 @rtype: tuple(str) 125 """ 126 return values(super(File, self).readline(), self._dialect)
127 128 #-------------------------------------------------------------------------- 129
130 - def write(self, values, inQuotes=False):
131 """ 132 Write the sequence of values to the file as a line of comma-separated 133 values. 134 135 @param values: A sequence of string-castable values. 136 @type values: sequence 137 @param inQuotes: If True, encapsulate strings in quotes. 138 @type inQuotes: bool 139 140 """ 141 super(File, self).write(join(values, inQuotes) + '\n')
142 143 #-------------------------------------------------------------------------- 144
145 - def writelines(self, sequence, inQuotes=False):
146 """ 147 Write a sequence of lines, each containing a sequence of values, to the 148 file in the comma-separated values format. 149 150 @param sequence: A sequence for every line that contains a sequence of 151 string-castable objects for every file. 152 @type sequence: sequence(sequence) 153 @param inQuotes: If True, encapsulate strings in quotes. 154 @type inQuotes: bool 155 156 """ 157 super(File, self).writelines(join(values, inQuotes) + '\n' 158 for values in sequence)
159 160 #------------------------------------------------------------------------------ 161
162 -class DbFile(File):
163 """ 164 Reads CSV files that have a header line (e.g. the output of pySQL), 165 returning each row as a namedtuple associating column names to each value 166 (e.g. the output of DbFile("outgest.csv").readlines() should be in the form 167 that is identical to the output resulting from a DbSession().query() call). 168 169 """ 170 #-------------------------------------------------------------------------- 171 Columns = None #: :type Columns: namedtuple 172 173 #-------------------------------------------------------------------------- 174
175 - def __init__(self, filePathName):
176 """ Reads first line of the file to define namedtuple of columns. 177 """ 178 super(DbFile, self).__init__(filePathName) 179 colNames = ' '.join(super(DbFile, self).readline()) 180 fileName = os.path.splitext(os.path.basename(filePathName))[0] 181 self.Columns = namedtuple(fileName, colNames)
182 183 #-------------------------------------------------------------------------- 184
185 - def readline(self):
186 """ Converts line to namedtuple. 187 """ 188 row = super(DbFile, self).readline() 189 try: 190 return self.Columns(*row) 191 192 except TypeError as error: # Handles last line of file + oddities 193 if "takes exactly" in str(error): 194 return row
195 196 #------------------------------------------------------------------------------ 197
198 -def join(sequence, inQuotes=True, sep=',', enableMixWarning=True):
199 """ 200 Convert a given sequence into a string of comma-separated values. 201 202 @param sequence: A sequence of string-castable objects. 203 @type sequence: sequence 204 @param inQuotes: If True, encapsulate strings in quotes. 205 @type inQuotes: bool 206 @param sep: The separator string to use. 207 @type sep: str 208 @param enableMixWarning: If True and inQuotes is True then warn if any 209 values contain mixed quotes. 210 @type enableMixWarning: bool 211 212 @return: A string of comma-separated values from the list. 213 @rtype: str 214 215 """ 216 if inQuotes: 217 if enableMixWarning: 218 for value in sequence: 219 if type(value) is str and '"' in value and "'" in value: 220 print "<Warning> Mixed quotes in string:", value 221 return sep.join(repr(value) for value in sequence) 222 else: 223 return sep.join(str(value) for value in sequence)
224 225 #------------------------------------------------------------------------------ 226
227 -def sniffDialect(filePathName, numLinesSniffed=10):
228 """ 229 Inspect a CSV file to determine its dialect, returning a csv.Dialect 230 object. 231 232 @param filePathName: Full path to the CSV file. 233 @type filePathName: str 234 @param numLinesSniffed: Number of lines to inspect. 235 @type numLinesSniffed: int 236 237 @return: A class derived from csv.Dialect describing the file's dialect. 238 @rtype: classobj 239 240 """ 241 csvFile = ParsedFile(filePathName) 242 lines = [] 243 for _i in xrange(numLinesSniffed): 244 line = csvFile.readline() 245 if not line: 246 break 247 lines.append(line) 248 csvFile.close() 249 sniffer = csv.Sniffer() 250 if len(lines) > 1 and sniffer.has_header('\n'.join(lines)): 251 lines.pop(0) 252 return sniffer.sniff('\n'.join(lines))
253 254 #------------------------------------------------------------------------------ 255
256 -def values(csvStr, dialect='excel'):
257 """ 258 Extract the values from a string containing one line of comma-separated 259 values. 260 261 @param csvStr: A line of comma-separated values (with or without newlines). 262 @type csvStr: str 263 @param dialect: Name of the registered dialect to use. 264 @type dialect: str 265 266 @return: Every value in the line. 267 @rtype: tuple(str) 268 269 """ 270 # Leading white-space in each string removed to handle a, b as well as a,b 271 return tuple(datumString.lstrip() for datumString 272 in csv.reader([csvStr], dialect).next())
273 274 #------------------------------------------------------------------------------ 275 # Change log: 276 # 277 # 5-Jul-2006, RSC: Original version. Functionality moved from Utilities.py 278 # 23-Nov-2006, RSC: Replaced .split(',') design with Python csv module. 279 # 10-Jul-2007, RSC: Provided support for dialects on reading CSV files, as 280 # well ignoring blank and comment lines. 281