1
2
3 """
4 General utility functions. Mostly concerning manipulation of Python objects,
5 and the file system.
6
7 @author: I.A. Bond
8 @org: WFAU, IfA, University of Edinburgh
9
10 @newfield contributors: Contributors, Contributors (Alphabetical Order)
11 @contributors: R.S. Collins, N.J.G. Cross, N.C. Hambly, E. Sutorius
12 """
13
14 from __future__ import division, print_function
15 from future_builtins import map, zip
16
17 from bisect import bisect_left, insort_left
18 from collections import MutableMapping
19 from contextlib import contextmanager
20 from itertools import groupby
21 import math
22 import mx.DateTime
23 import os
24 import re
25 import signal
26 from subprocess import Popen, PIPE
30 """
31 Behaves like a file object, except that when iterating over file lines only
32 non-blank, non-comment lines are returned and any EOL characters are
33 removed together with trailing white-space. Use this class just like the
34 file class when reading files::
35
36 from wsatools.Utilities import ParsedFile
37 for line in ParsedFile("myfile.txt"):
38 print(line)
39
40 """
41 commentMarker = '#'
42 stripEOLComments = False
43
44
45
47 """
48 @return: The next line without EOL characters and trailing whitespace
49 that isn't a blank and isn't a comment.
50 @rtype: str
51
52 @note: Unfortunately, I have reimplemented the base-class design for
53 next() because the base-class does not call this readline()
54 method. Could have chosen to call the base-class next() method
55 and then parsed in the exact same way as readline() is parsed
56 (save for the EOF return condition that is shouldn't be in the
57 next() implementation). However, this way should be more
58 efficient and maintainable.
59 """
60 line = self.readline()
61 if not line:
62 raise StopIteration
63
64 return line
65
66
67
91
92
93
95 """
96 @return: A list of all lines in the file without EOL characters and
97 trailing whitespace that aren't blank and aren't a comment.
98 @rtype: list(str)
99
100 @note: Unfortunately, I have reimplemented the base-class design for
101 readlines() because the base-class does not call this readline()
102 method. Could have chosen to call the base-class readlines()
103 method and then parsed in the same way as readline(), but this
104 way should be more efficient and maintainable.
105 """
106 lines = []
107 while True:
108 line = self.readline()
109 if not line:
110 break
111 else:
112 lines.append(line)
113
114 return lines
115
119 """
120 Private helper function used by the WordWrapper class.
121
122 @return: User's default wrap width read from the preferences file.
123 @rtype: int
124
125 """
126 userWidthFilePathName = os.path.join(os.getenv('HOME'), '.wrap_width')
127 wrapWidthDefault = 0
128 try:
129 return int(file(userWidthFilePathName).read().strip())
130 except:
131 file(userWidthFilePathName, 'w').write(str(wrapWidthDefault))
132 return wrapWidthDefault
133
137 """
138 Formats long strings so that they neatly fit within a certain width without
139 words being split across lines. This is a singleton class, invoke as::
140
141 from wsatools.Utilities import WordWrapper
142 for line in lines:
143 print(WordWrapper.wrap(line))
144
145 @todo: Instead of not wrapping pre-formatted lines, handle the case of a
146 new-line character when calculating the overlap.
147
148 """
149
150 indent = 0
151
152 wrapWidth = _getUserWidth()
153
154 _wrappedLines = []
155
156
157
162
163 resetWrapWidth = staticmethod(resetWrapWidth)
164
165
166
168 """
169 Returns supplied line with neatly placed linebreaks to wrap the line
170 within a certain width. If the line already contains line breaks or is
171 within the width specified by L{WordWrapper.wrapWidth} then it remains
172 unaffected. If a L{WordWrapper.indent} value is supplied then it is
173 assumed that the first character in the line is located at the column
174 number equal to the L{WordWrapper.indent} value. Wrapped lines will
175 then have a gap after each line-break to maintain the indentation.
176
177 @param line: The line to be wrapped.
178 @type line: str
179
180 @return: Wrapped line.
181 @rtype: str
182
183 """
184
185 if not WordWrapper.wrapWidth or '\n' in line:
186 return line
187
188 WordWrapper._wrappedLines = []
189 return WordWrapper._wrapLine(line)
190
191 wrap = staticmethod(wrap)
192
193
194
196 """
197 Recursive calls to this function with the next line of text to be
198 wrapped appends lines to L{WordWrapper._wrappedLines} truncated to the
199 width specified by L{WordWrapper.wrapWidth}. The remaining text is used
200 as the input to the function until there is no more overrun, and the
201 fully wrapped text is returned.
202
203 @param line: Full line of text that has not yet been wrapped.
204 @type line: str
205
206 @return: Fully wrapped text.
207 @rtype: str
208
209 """
210 overRun = len(line) - WordWrapper.wrapWidth + WordWrapper.indent
211 if overRun <= 0:
212 WordWrapper._wrappedLines.append(line)
213 return ('\n' + WordWrapper.indent * ' '
214 ).join(WordWrapper._wrappedLines)
215
216 overRunWords = []
217 words = line.split()
218 while words and overRun > 0:
219 overRun -= 1 + len(words[-1])
220 overRunWords.insert(0, words.pop())
221
222
223 if not words:
224 words.append(overRunWords.pop(0))
225
226 WordWrapper._wrappedLines.append(' '.join(words))
227
228 return WordWrapper._wrapLine(' '.join(overRunWords))
229
230 _wrapLine = staticmethod(_wrapLine)
231
232
233
234 -def arbSort(unsortedList, kwdList, key=lambda x: x, isFullKeySet=True):
235 """
236 Arbitrarily sorts a list of tuples of form (keyword, value) by the order
237 defined in the sequence of specified keywords. Example:
238
239 >>> arbSort([7, 3, 8, 0, 1], [8, 3], isFullKeySet=False)
240 [8, 3, 7, 0, 1]
241
242 @param unsortedList: Unsorted list of scalars or sequences.
243 @type unsortedList: list
244 @param kwdList: List of keywords specifying the required sort order,
245 need to be of same type/value as entries in of the
246 sort key element of the given unsorted list.
247 @type kwdList: list
248 @param key: Function to fetch the sort key element from the given
249 unsorted list, e.g. operator.itemgetter(0).
250 @type key: function
251 @param isFullKeySet: If True, expect kwdList to contain the complete set
252 of keys in the unsortedList, otherwise leave entries
253 for non-specified keys at the end of the sorted list,
254 sorted by non-specified key in original key order.
255 @type isFullKeySet: bool
256
257 @return: The sorted list.
258 @rtype: list
259
260 @todo: See if defining my own compare function that calls index() is
261 faster/simpler than the Decorate-Sort-Undecorate method used here.
262 """
263 idxList = []
264 for entry in unsortedList:
265 try:
266 idxList.append(kwdList.index(key(entry)))
267 except ValueError:
268 if isFullKeySet:
269 msg = ("Utilities.arbSort() error: kwdList is not complete, "
270 + "an entry for '%s' does not exist!")
271 missingKey = key(entry)
272 if type(missingKey) is tuple:
273 missingKey = list(missingKey)
274 if type(missingKey) is list:
275 msg += " You may have forgotten to supply a sort key."
276 raise ValueError(msg % missingKey)
277 else:
278 kwdList.append(key(entry))
279 idxList.append(kwdList.index(key(entry)))
280
281 return [entry for _idx, entry in sorted(zip(idxList, unsortedList))]
282
286 """
287 If the supplied directory does not exist then create it.
288
289 @param aDir: Full path to the directory.
290 @type aDir: str
291
292 """
293 if not os.path.exists(aDir):
294 os.umask(0000)
295 os.makedirs(aDir, mode=0775)
296
300 """
301 Given a human readable compact number range string, expand it to a complete
302 sequence of numbers in a CSV string. Example:
303
304 >>> expandNumberRange(numberRange([1, 2, 3, 5, 6, 7]))
305 '1,2,3,5,6,7'
306 >>> expandNumberRange('1,2')
307 '1,2'
308 >>> expandNumberRange('0')
309 '0'
310
311 @param numberRange: A human readable compact number range string
312 @type numberRange: str
313
314 @return: A complete sequence of numbers in a CSV string.
315 @rtype: str
316
317 """
318 numStep = (10 if useTens else 1)
319 numbers = []
320 for part in numberRange.split(','):
321 part = part.strip()
322 if '-' not in part:
323 numbers.append(part)
324 else:
325 begin, end = part.split('-')
326 for number in xrange(int(begin), int(end) + numStep, numStep):
327 numbers.append(str(number))
328
329 return ','.join(numbers)
330
334 """
335 Gobble all entries in the given column of a space separated text file into
336 a list. Lines that begin with the hash mark are treated as comments and are
337 ignored. Example:
338
339 >>> column = extractColumn("/disk47/sys/test/Utilities/test.cat", 6)
340 >>> list(column)[:2]
341 ['14.5723', '15.1406']
342
343 @param filePathName: Path to space separated text file to be read.
344 @type filePathName: str
345 @param colNum: Column number of text to extract.
346 @type colNum: int
347
348 @return: A generator for all text in specified column of the file.
349 @rtype: generator(str)
350
351 @todo: Replace with extractColumns()? Could leave this method here for
352 speed and simplicity. Though normally we want more than one column
353 anyway! So, may just extractColumns(file, 3)[0] isn't so bad.
354 """
355 return (line.split()[colNum] for line in ParsedFile(filePathName))
356
357
358
359 -def extractColumns(filePathName, columnList=None, numRows=None, dataType=str):
360 """
361 Extracts from the given file the data in the given list of columns as a
362 list of strings for every column. Example:
363
364 >>> data = extractColumns("/disk47/sys/test/Utilities/test.cat",
365 ... columnList=[6, 7], dataType=float)
366 >>> print(data[0][0], data[0][1], data[1][0], data[1][1])
367 14.5723 15.1406 0.0033 0.005
368
369 @param filePathName: Full path to the file to read.
370 @type filePathName: str
371 @param columnList: Optional list of indices for the columns to be read
372 (with the first column at index 0), otherwise all
373 columns are read.
374 @type columnList: list(int)
375 @param numRows: Optionally supply number of rows to read, otherwise
376 all rows are read.
377 @type numRows: int
378 @param dataType: Optionally convert entries from string to this Python
379 type.
380 @type dataType: type
381
382 @return: A list of column data, with each column represented by a list of
383 strings.
384 @rtype: list(list(dataType))
385
386 @todo: Possibly alter to make use of the CSV module's abilities to handle
387 different dialects? Would simplify code a bit, and make more useful.
388 """
389 if columnList:
390 data = [[] for _col in columnList]
391
392 for rowNum, line in enumerate(ParsedFile(filePathName)):
393 if numRows and rowNum == numRows:
394 break
395
396 entries = line.split()
397 if not columnList:
398 columnList = range(len(entries))
399 data = [[] for _col in columnList]
400
401 for column, colIdx in zip(data, columnList):
402 column.append(entries[colIdx])
403
404 return (data if dataType is str else
405 [list(map(dataType, column)) for column in data])
406
410 """
411 Gets the available disk space for supplied list of disks.
412
413 @param disks: Sequence of disk paths (e.g.
414 L{SystemConstants.massStorageRaidFileSystem()}).
415 @type disks: sequence(str) or generator(str)
416
417 @return: A dictionary of sizes of the form (total, free) for each disk.
418 @rtype: dict(str:list(int, int))
419
420 """
421 cmd = ["df", "-P", "-k"]
422 spacePerDisk = {}
423 for disk in disks:
424 try:
425 p = Popen(cmd + [disk], stdout=PIPE)
426 resline = p.communicate()[0].strip().split('\n')[-1]
427 _a, _size, used, free, _p, _direc = resline.split()
428 if "disk38" in disk:
429 free = 0
430 spacePerDisk[disk] = [int(used) + int(free), int(free)]
431 except OSError:
432 pass
433 return spacePerDisk
434
438 """
439 Returns the set of items for which the groupBy method returns the same item
440 more than once. By default, this will simply return just the values in the
441 list that are duplicated. Removed groupBy key option, because it's better
442 to pre-process the iterable once prior to passing to this function and
443 sorting (in this usage of groupby, in other usages it's useful). Examples:
444
445 >>> getDuplicates([1, 2, 1, 0])
446 set([1])
447 >>> getDuplicates(x[0] for x in [(1,2), (3,4), (1,4)])
448 set([1])
449
450 @param anIterable: Any sequence or generator that can be iterated over.
451 @type anIterable: sequence or generator
452
453 @rtype: set
454
455 """
456
457 return set(key for key, group in groupby(sorted(anIterable))
458 if moreThanOneIn(group))
459
463 """
464 Returns a list of all indices where a value occurs in the given list.
465 Example:
466
467 >>> getListIndices(["bob", "steve", "bob"], "bob")
468 [0, 2]
469
470 @param aList: List to search for occurrences of value.
471 @type aList: list
472 @param value: Value to find in list, that may occur multiple times.
473 @type value: object
474
475 @rtype: list(int)
476
477 @note: Can probably avoid the need to use this function by employing a
478 slightly different algorithm design, e.g. use a dictionary.
479
480 """
481 indicesList = []
482 try:
483 indicesList.append(aList.index(value))
484 while True:
485 indicesList.append(aList.index(value, indicesList[-1] + 1))
486 except ValueError:
487 return indicesList
488
489
490
491 -def getNextDisk(sysc, spacePerDisk=None, byPercentFree=False, preAllocMem=0):
492 """
493 Gets the next available disk which is less than 99% full.
494
495 @param sysc: An initialised SystemConstants object.
496 @type sysc: SystemConstants
497 @param spacePerDisk: A dictionary of available disks and their size,
498 obtained automatically by L{getDiskSpace()} if not
499 supplied here.
500 @type spacePerDisk: dict(str:list(int, int))
501 @param byPercentFree: If True, choose next disk based on percentage free
502 space available rather than absolute free space.
503 @type byPercentFree: bool
504 @param preAllocMem: Pre allocated memory in GB.
505 @type preAllocMem: int
506
507 @return: Path to the next free disk.
508 @rtype: str
509
510 @todo: Instead of taking a SystemConstants object, why not make this a
511 method of SystemConstants?
512 """
513 leaveSpace = sysc.leaveSpaceOnMSRFS(byPercentFree)
514 if not spacePerDisk:
515 spacePerDisk = getDiskSpace(sysc.availableRaidFileSystem())
516
517 disks = sorted(disk for disk in spacePerDisk
518 if disk not in sysc.criticalDisks
519 and disk not in sysc.developDisks)
520 for disk in disks:
521 totalSpace, freeSpace = spacePerDisk[disk]
522 if not byPercentFree:
523 totalSpace = 1
524
525 if (freeSpace - leaveSpace[disk] * totalSpace
526 > preAllocMem * sysc.one_megabyte):
527
528 return disk
529
530 raise MemoryError("There is no space left on the RAID array.")
531
535 """
536 Calculates the number of items expressed by a human-readable string of
537 number ranges.
538
539 >>> getNumberItems(numberRange(range(10)))
540 10
541
542 @param numberRange: List of numbers expressed as a human-readable string of
543 number ranges, as returned by Utilities.numberRange().
544 @type numberRange: str
545
546 @return: Number of items expressed in the given list.
547 @rtype: int
548
549 @note: This isn't a sensible way of doing things, as numberRange() is
550 designed only for the purpose of printing human readable strings. It
551 shouldn't be used as a data container for processing.
552
553 """
554 return len(expandNumberRange(numberRange, useTens=useTens).split(','))
555
559 """
560 @return: Amount of available memory in kilobytes.
561 @rtype: int
562
563 """
564 return int(open("/proc/meminfo").readline().split()[1])
565
569 """
570 Taking an ordered list of counts of a particular key, e.g. the results of
571 an SQL "SELECT key, count(*) ... GROUP BY key ORDER BY key", this function
572 returns a list of key ranges that contain up to the specified group size of
573 counts. Example:
574
575 >>> groupByCounts([(10001, 5), (10002, 3), (10003, 12), (10004, 6)],
576 ... groupSize=10)
577 [(10001, 10002), (10003, 10003), (10004, 10004)]
578
579 @param keyCounts: List of keys and their respective counts.
580 @type keyCounts: list(tuple(int, int))
581 @param groupSize: Total number of counts in which to group keys by.
582 @type groupSize: int
583
584 @return: List of ranges in the form (min, max) of values of the keys, where
585 between these values the total counts are less than or equal to
586 the specified group size.
587 @rtype: list(tuple(int, int))
588
589 """
590 keyRanges = []
591 if sum(count for key, count in keyCounts) > groupSize:
592 keyMin = keyMax = keyCounts[0][0]
593 chunkCount = 0
594 for key, count in keyCounts:
595 chunkCount += count
596 if chunkCount > groupSize:
597 keyRanges.append((keyMin, keyMax))
598 keyMin = key
599 chunkCount = count
600 keyMax = key
601 keyRanges.append((keyMin, keyMax))
602
603 return keyRanges
604
605
606
607 -def joinDict(aDict, joinStr=" = ", sepStr=", "):
608 """
609 Like string.join, but operates on the contents of a dictionary instead of a
610 list. Joins dictionary keyword and value pairs into the string:
611 str(keyword) + joinStr + str(value) + sepStr etc. Example:
612
613 >>> joinDict(dict(a=1, b=2))
614 'a = 1, b = 2'
615
616 @param aDict: Dictionary to process into a string.
617 @type aDict: dict
618 @param joinStr: String to insert between keywords and values.
619 @type joinStr: str
620 @param sepStr: String to insert between dictionary elements.
621 @type sepStr: str
622
623 @return: A string representing the contents of the dictionary.
624 @rtype: str
625
626 """
627 return sepStr.join([joinStr.join((str(key), str(value)))
628 for key, value in aDict.iteritems()])
629
630
631
632 -def joinNested(aSeq, joinStr=", ", subJoinStr=None, seqIndex=None):
633 """
634 Like string.join, but can handle nested (or un-nested) sequences of string-
635 castable objects. Example:
636
637 >>> joinNested([['a', 0], ['b', 1]])
638 'a, 0, b, 1'
639
640 @param aSeq: A (nested) sequence string-castable objects.
641 @type aSeq: list or tuple
642 @param joinStr: String to insert between elements of the main sequence.
643 @type joinStr: str
644 @param subJoinStr: String to insert between elements of the nested sequence
645 (defaults to the same as joinStr).
646 @type subJoinStr: str
647 @param seqIndex: If specified, only the elements at this index in the
648 nested sequences are included.
649 @type seqIndex: int
650
651 @return: A string containing all of the elements of the (nested) sequence.
652 @rtype: str
653
654 """
655
656 if not aSeq:
657 return ''
658 if not (isinstance(aSeq, list) or isinstance(aSeq, tuple)):
659 try:
660 aSeq.__getattribute__('next')
661 except AttributeError:
662 return str(aSeq)
663 else:
664 aSeq = list(aSeq)
665
666
667 if isinstance(aSeq[0], list) or isinstance(aSeq[0], tuple):
668
669 if seqIndex:
670 strSeq = [str(element[seqIndex]) for element in aSeq]
671 else:
672 if subJoinStr is None:
673 subJoinStr = joinStr
674 strSeq = [joinNested(nestSeq, joinStr=subJoinStr)
675 for nestSeq in aSeq]
676 else:
677
678 strSeq = map(str, aSeq)
679
680
681 return joinStr.join(strSeq)
682
683
684
685 -def invertDict(aDict, forceReturnList=False):
686 """
687 Inverts the dictionary in such a way that if the input dictionary's values
688 are lists, each item of this list will become a key with the input
689 dictionary's key as value. If several input dictionary's keys exist for one
690 input dictionary's value the inverted dict's values will be lists. Example:
691
692 >>> invertDict(dict(males=["bob", "chris"], females=["jane", "chris"]))
693 {'chris': ['males', 'females'], 'jane': ['females'], 'bob': ['males']}
694
695 @param aDict: Dictionary to invert.
696 @type aDict: dict
697 @param forceReturnList: If True, return dictionary where values are always
698 lists.
699 @type forceReturnList: bool
700
701 @return: Inverted dictionary.
702 @rtype: dict
703
704 """
705 retDict = {}
706 try:
707 if not forceReturnList and len(set(aDict.itervalues())) == len(aDict):
708
709 return dict((value, key) for key, value in aDict.iteritems())
710 else:
711
712 for key, value in aDict.iteritems():
713 retDict.setdefault(value, []).append(key)
714
715 except TypeError:
716 hasUniqueValues = not forceReturnList \
717 and (len(set(unpackList(aDict.itervalues()))) ==
718 len(list(unpackList(aDict.itervalues()))))
719
720 for key, aList in aDict.iteritems():
721 for value in aList:
722 if hasUniqueValues:
723 retDict[value] = key
724 else:
725 retDict.setdefault(value, []).append(key)
726
727 return retDict
728
732 """
733 Returns an archive date/time data type, defaulting to the current time if
734 no input argument is given. This defines the archive date/time data type,
735 and is presently set to the mx.DateTime defined type. This function defines
736 the time system for the archive (which is UTC).
737
738 @param time: If given, specify a time in the format:
739 "2005-01-29 23:59:59.99".
740 @type time: str
741
742 @return: A date/time in the archive defined type.
743 @rtype: mx.DateTime
744
745 """
746 if time:
747 return mx.DateTime.DateTimeFrom(time)
748 else:
749 return mx.DateTime.utc()
750
754 """
755 Creates a timestamp using makeTimeStamp and formats appropriately for use
756 in ingest strings for Microsoft SQL Server (and handles a bug in the
757 datetime object creation).
758
759 @return: A UTC date/time stamp string formatted for MS SQL Server.
760 @rtype: str
761
762 """
763 return makeTimeStamp().replace(':60', ':59').replace(' ', 'T', 1).rstrip()
764
768 """
769 @return: An archive time stamp as a string, as opposed to the internal date
770 time type.
771 @rtype: str
772
773 """
774 return str(makeDateTime())
775
779 """
780 Generator test function is equivalent to memory hog C{len(list(values)) > 1}
781 or C{sum(1 for _ in values) > 1} but doesn't iterate through all items.
782 In fact, memory is rarely an issue this the iterable here won't large
783 compared to the full program usage, but even the first form is slow.
784 Example:
785
786 >>> moreThanOneIn(x for x in [])
787 False
788 >>> moreThanOneIn(x for x in ['g'])
789 False
790 >>> moreThanOneIn(x for x in [0.1 ,0.2])
791 True
792 >>> moreThanOneIn(x for x in [1, 2, 3])
793 True
794
795 @param values: A generator to be evaluated.
796 @type values: generator
797
798 @return: True if values contains more than one item.
799 @rtype: bool
800
801 """
802 return all((next(values, None) is not None, next(values, None) is not None))
803
807 """
808 Performs multiple string substitutions on the given string. Example:
809
810 >>> multiSub("UKIRT and the WSA", [("WSA", "VSA"), ("UKIRT", "VISTA")])
811 'VISTA and the VSA'
812
813 @param text: String containing text to be substituted.
814 @type text: str
815 @param subs: List of text marker and substitution value pairs.
816 @type subs: list(tuple(str, str))
817
818 @return: The original text string, with all marker values replaced by given
819 substitution values.
820 @rtype: str
821
822 """
823 for marker, value in subs:
824 text = text.replace(marker, value)
825 return text
826
831 """ Disables keyboard interrupts whilst in this context.
832 """
833 signal.signal(signal.SIGINT, signal.SIG_IGN)
834 yield
835 signal.signal(signal.SIGINT, signal.SIG_DFL)
836
837
838
839 -def npop(aList, nMax=2, mode="topbot"):
840 """
841 Divide aList in nMax sublists by populating it with items subsequently
842 taken from the top and the bottom of list. Example:
843
844 >>> npop([1, 2, 3, 4, 5])
845 [[1, 5], [2, 4], [3]]
846 >>> npop([1, 2, 3, 4, 5, 6], nMax=3)
847 [[1, 6, 2, 5], [3, 4]]
848
849 @param aList: The list (of file names).
850 @type aList: list
851 @param nMax: Maximal number of sublists. Rounded up to next even number.
852 @type nMax: int
853 @param mode: Mode of picking items from the list: 'asc', 'desc', or
854 subsequently from the top and bottom.
855 @type mode: str
856
857 @return: List of sublists.
858 @rtype: list(list)
859
860 """
861 nPairs = range((nMax + 1) // 2)
862 newList = []
863 while aList:
864 subList = []
865 try:
866 for _n in nPairs:
867 subList.append(aList.pop() if mode == "desc" else aList.pop(0))
868 subList.append(aList.pop(0) if mode == "asc" else aList.pop())
869
870 except IndexError:
871 pass
872
873 newList.append(subList)
874
875 return newList
876
877
878
879 -def numberRange(numbers, sep=", ", useTens=False):
880 """
881 Given a sequence of integers it returns that sequence as a string
882 representation of an ordered range of unique numbers. Example:
883
884 >>> numberRange([1, 2, 3, 5, 6, 7])
885 '1-3, 5-7'
886 >>> numberRange([1, 3, 5, 7])
887 '1, 3, 5, 7'
888 >>> numberRange([])
889 'None'
890
891 @param numbers: Any sequence of integers.
892 @type numbers: sequence(int)
893 @param sep: Marker string to separate individual numbers.
894 @type sep: str
895
896 @return: A string representation of the range of the unique set of ordered
897 integers.
898 @rtype: str
899
900 """
901 if not numbers:
902 return "None"
903
904 numStep = (10 if useTens else 1)
905 if useTens:
906
907
908 theNums = {'1':[], '2':[], 'x': []}
909 newNums = []
910 for i in numbers:
911 if i % 10 in (1, 2):
912 theNums[str(i % 10)].append(i)
913 else:
914 theNums['x'].append(i)
915 for a in sorted(theNums):
916 newNums.extend(sorted(set(theNums[a])))
917 numbers = newNums
918 else:
919 numbers = sorted(set(numbers))
920 firstNum = numbers.pop(0)
921 rangeStr = str(firstNum)
922 nextNum = firstNum + numStep
923 while numbers:
924 curNum = numbers.pop(0)
925 if curNum == nextNum:
926 nextNum += numStep
927 if not numbers:
928 rangeStr += "-%s" % curNum
929 elif nextNum - firstNum == numStep:
930 rangeStr += "%s%s" % (sep, curNum)
931 firstNum = curNum
932 nextNum = firstNum + numStep
933 else:
934 rangeStr += "-%s%s%s" % (nextNum - numStep, sep, curNum)
935 firstNum = curNum
936 nextNum = firstNum + numStep
937
938 return rangeStr
939
943 """
944 Returns a list of the given sequence in the original order, but with
945 duplicates removed. Example:
946
947 >>> orderedSet([6, 4, 7, 4, 9, 1, 7])
948 [6, 4, 7, 9, 1]
949
950 @param seq: Any ordered sequence containing duplicate values.
951 @type seq: sequence(X)
952 @param excludeList: A list of items to exclude from the returned list.
953 @type excludeList: list(X)
954
955 @return: Ordered list without duplicates.
956 @rtype: list(X)
957
958 """
959 seen = set(excludeList or [])
960 return [x for x in seq if x not in seen and not seen.add(x)]
961
965 """
966 Parses a string value and converts to float. NaNs and failures all return
967 None. Example:
968
969 >>> parseFloat('3.1')
970 3.1
971
972 @param value: String containing just a floating-point value to parse.
973 @type value: str
974
975 @return: Floating-point value of parsed string or None if fails.
976 @rtype: float
977 """
978 try:
979 value = float(value)
980 except ValueError as error:
981 if value == "---":
982 value = None
983 elif value.count('.') > 1:
984 value = float(value.rsplit('.')[0])
985 else:
986 raise error
987
988 if math.isnan(value) or math.isinf(value):
989 value = None
990
991 return value
992
993
994
995 -def splitList(longList, chunkSize=2, noSingles=False):
996 """
997 Splits a list into a list of equal sized chunks. If list is not equally
998 divisable then the last chunk just contains the remaining number of
999 elements. Example:
1000
1001 >>> list(splitList([1, 2, 3, 4]))
1002 [[1, 2], [3, 4]]
1003 >>> list(splitList([1, 2, 3, 4, 5, 6, 7], 3))
1004 [[1, 2, 3], [4, 5, 6], [7]]
1005 >>> list(splitList([1, 2, 3, 4, 5, 6, 7], 3, noSingles=True))
1006 [[1, 2, 3], [4, 5, 6, 7]]
1007 >>> list(splitList([1]))
1008 [[1]]
1009 >>> list(splitList([1], noSingles=True))
1010 []
1011 >>> list(splitList([]))
1012 []
1013
1014 @param longList: The long list that needs to be split into a list of
1015 smaller sized chunks.
1016 @type longList: list(X)
1017 @param chunkSize: Number of elements per sub-list to divide the long list.
1018 @type chunkSize: int
1019 @param noSingles: If True, ensure no single item lists are created.
1020 @type noSingles: bool
1021
1022 @return: A generator for sub-lists containing the original list sub-divided
1023 into chunks.
1024 @rtype: generator(list(X))
1025
1026 """
1027 fixSingle = noSingles and len(longList) % chunkSize is 1
1028 numIdx = (len(longList) - 1 if fixSingle else len(longList)) - 1
1029 lastIdx = numIdx - numIdx % chunkSize
1030 indices = xrange(0, numIdx + 1, chunkSize)
1031
1032 return (longList[i:(i + chunkSize if i != lastIdx else len(longList))]
1033 for i in indices)
1034
1038 """
1039 Given a list of lists, return a single sequence containing all of the
1040 elements of the combined list, as a generator. It also handles the more
1041 general case of a sequence of sequences, unlike sum(combinedList, []),
1042 which is the equivalent of the standard case. Example:
1043
1044 >>> ', '.join(unpackList([['a', 'b'], ['c', 'd']]))
1045 'a, b, c, d'
1046 >>> list(unpackList(splitList([1, 2, 3, 4])))
1047 [1, 2, 3, 4]
1048
1049 @param combinedList: A list of lists or a generator for lists.
1050 @type combinedList: list(list(X)) or generator(list(X))
1051
1052 @return: A generator for a sequence of just the elements of the original
1053 nested list.
1054 @rtype: generator(X)
1055
1056 """
1057 return (element for subList in combinedList for element in subList)
1058
1062 "Sort strings naturally."
1063 return sorted(strings, key=naturalSortKey)
1064
1066 import re
1067 return [int(t) if t.isdigit() else t for t in re.split(r'(\d+)', key)]
1068
1069
1070
1071
1072 -class Ratings(dict, MutableMapping):
1073 """
1074 Ratings is mostly like a dictionary, with extra features: the value
1075 corresponding to each key is the 'score' for that key, and all keys are
1076 ranked in terms their scores. Values must be comparable; keys, as well as
1077 being hashable, must be comparable if any two keys may ever have the same
1078 corresponding value (i.e., may be "tied" on score). All mapping-like
1079 behavior is just as you would expect, such as:
1080
1081 >>> r = Ratings({"bob": 30, "john": 20})
1082 >>> r.update({"paul": 20, "tom": 10})
1083 >>> r.keys()
1084 ['tom', 'john', 'paul', 'bob']
1085 >>> len(r)
1086 4
1087 >>> "paul" in r
1088 True
1089 >>> [r[key] for key in ["bob", "paul", "john", "tom"]]
1090 [30, 20, 20, 10]
1091 >>> r.get("nobody"), r.get("nobody", 0)
1092 (None, 0)
1093
1094 In addition to the mapping interface, we offer rating-specific methods.
1095 r.rating(key) returns the ranking of a 'key' in the ratings, with a ranking
1096 of 0 meaning the lowest score (when two keys have equal scores, the keys
1097 themselves are compared, to 'break the tie', and the lesser key gets a
1098 lower ranking):
1099
1100 >>> [r.rating(key) for key in ["bob", "paul", "john", "tom"]]
1101 [3, 2, 1, 0]
1102
1103 getValueByRating(ranking) and getKeyByRating(ranking) return the score and
1104 key, respectively, for a given ranking index:
1105
1106 >>> [r.getValueByRating(rating) for rating in range(4)]
1107 [10, 20, 20, 30]
1108 >>> [r.getKeyByRating(rating) for rating in range(4)]
1109 ['tom', 'john', 'paul', 'bob']
1110
1111 An important feature is that the keys() method returns keys in ascending
1112 order of ranking, and all other related methods return lists or iterators
1113 fully consistent with this ordering:
1114
1115 >>> [key for key in r]
1116 ['tom', 'john', 'paul', 'bob']
1117 >>> r.values()
1118 [10, 20, 20, 30]
1119 >>> r.items()
1120 [('tom', 10), ('john', 20), ('paul', 20), ('bob', 30)]
1121
1122 An instance can be modified (adding, changing and deleting key-score
1123 correspondences), and every method of that instance reflects the instance's
1124 current state at all times:
1125
1126 >>> r["tom"] = 100
1127 >>> r.items()
1128 [('john', 20), ('paul', 20), ('bob', 30), ('tom', 100)]
1129
1130 """
1131 _rating = []
1132 """ The crucial auxiliary data structure. A list of all (value, key) pairs,
1133 kept in "natural"ly-sorted order.
1134 """
1135
1136
1138 """ This class gets instantiated just like 'dict'.
1139 """
1140 dict.__init__(self, *args, **kwds)
1141 self._rating = sorted((v, k) for k, v in dict.iteritems(self))
1142
1143
1144
1146 """ Provide an identical but independent copy.
1147 """
1148 return Ratings(self)
1149
1150
1151
1153 """ Besides delegating to dict, we maintain self._rating.
1154 """
1155 if k in self:
1156 del self._rating[self.rating(k)]
1157 dict.__setitem__(self, k, v)
1158 insort_left(self._rating, (v, k))
1159
1160
1161
1163 """ Besides delegating to dict, we maintain self._rating.
1164 """
1165 del self._rating[self.rating(k)]
1166 dict.__delitem__(self, k)
1167
1168
1169
1170
1171
1172 __len__ = dict.__len__
1173 __contains__ = dict.__contains__
1174 has_key = __contains__
1175 update = MutableMapping.update
1176
1177
1178
1179
1180
1182 for _v, k in self._rating:
1183 yield k
1184
1185 iterkeys = __iter__
1186
1189
1191 for v, k in self._rating:
1192 yield k, v
1193
1196
1198 for _, v in self.iteritems():
1199 yield v
1200
1203
1204
1205
1206
1208 item = self[key], key
1209 i = bisect_left(self._rating, item)
1210 if item == self._rating[i]:
1211 return i
1212 raise LookupError("item not found in rating")
1213
1216
1219
1221 results = {}
1222 for item in self.values():
1223 results.setdefault(item, 0)
1224 results[item] += 1
1225 return results
1226
1227
1228
1229 if __name__ == "__main__":
1230 import doctest
1231 doctest.testmod()
1232
1233
1234