wsatools.Utilities

1 #------------------------------------------------------------------------------ 2 #$Id: Utilities.py 10144 2013-11-28 12:34:16Z EckhardSutorius $ 3 """ 4 General utility functions. Mostly concerning manipulation of Python objects, 5 and the file system. 6 7 @author: I.A. Bond 8 @org: WFAU, IfA, University of Edinburgh 9 10 @newfield contributors: Contributors, Contributors (Alphabetical Order) 11 @contributors: R.S. Collins, N.J.G. Cross, N.C. Hambly, E. Sutorius 12 """ 13 #------------------------------------------------------------------------------ 14 from __future__ import division, print_function 15 from future_builtins import map, zip 16 17 from bisect import bisect_left, insort_left # For Ratings class 18 from collections import MutableMapping # For Ratings class 19 from contextlib import contextmanager 20 from itertools import groupby 21 import math 22 import mx.DateTime 23 import os 24 import re 25 import signal 26 from subprocess import Popen, PIPE

27 #------------------------------------------------------------------------------ 28 29 -class ParsedFile(file):

30 """ 31 Behaves like a file object, except that when iterating over file lines only 32 non-blank, non-comment lines are returned and any EOL characters are 33 removed together with trailing white-space. Use this class just like the 34 file class when reading files:: 35 36 from wsatools.Utilities import ParsedFile 37 for line in ParsedFile("myfile.txt"): 38 print(line) 39 40 """ 41 commentMarker = '#' #: Comment marker string. 42 stripEOLComments = False #: Strip comments from the end of lines too? 43 44 #-------------------------------------------------------------------------- 45

46 - def next(self):

47 """ 48 @return: The next line without EOL characters and trailing whitespace 49 that isn't a blank and isn't a comment. 50 @rtype: str 51 52 @note: Unfortunately, I have reimplemented the base-class design for 53 next() because the base-class does not call this readline() 54 method. Could have chosen to call the base-class next() method 55 and then parsed in the exact same way as readline() is parsed 56 (save for the EOF return condition that is shouldn't be in the 57 next() implementation). However, this way should be more 58 efficient and maintainable. 59 """ 60 line = self.readline() 61 if not line: 62 raise StopIteration 63 64 return line

65 66 #-------------------------------------------------------------------------- 67

68 - def readline(self):

69 """ 70 @return: The next line without EOL characters and trailing whitespace 71 that isn't a blank and isn't a comment. At EOF a blank string 72 is returned. 73 @rtype: str 74 """ 75 line = super(ParsedFile, self).readline() 76 77 # EOF 78 if not line: 79 return line 80 81 # Parse line 82 if not line.startswith(ParsedFile.commentMarker): 83 if ParsedFile.stripEOLComments: 84 line = line.split(ParsedFile.commentMarker)[0] 85 86 if line.strip() != '': 87 return line.rstrip() 88 89 # Skip line - recursive, so needs to be last condition 90 return ParsedFile.readline(self)

91 92 #-------------------------------------------------------------------------- 93

94 - def readlines(self):

95 """ 96 @return: A list of all lines in the file without EOL characters and 97 trailing whitespace that aren't blank and aren't a comment. 98 @rtype: list(str) 99 100 @note: Unfortunately, I have reimplemented the base-class design for 101 readlines() because the base-class does not call this readline() 102 method. Could have chosen to call the base-class readlines() 103 method and then parsed in the same way as readline(), but this 104 way should be more efficient and maintainable. 105 """ 106 lines = [] 107 while True: 108 line = self.readline() 109 if not line: 110 break 111 else: 112 lines.append(line) 113 114 return lines

115

116 #------------------------------------------------------------------------------ 117 118 -def _getUserWidth():

119 """ 120 Private helper function used by the WordWrapper class. 121 122 @return: User's default wrap width read from the preferences file. 123 @rtype: int 124 125 """ 126 userWidthFilePathName = os.path.join(os.getenv('HOME'), '.wrap_width') 127 wrapWidthDefault = 0 128 try: 129 return int(file(userWidthFilePathName).read().strip()) 130 except: 131 file(userWidthFilePathName, 'w').write(str(wrapWidthDefault)) 132 return wrapWidthDefault

133

134 #------------------------------------------------------------------------------ 135 136 -class WordWrapper(object):

137 """ 138 Formats long strings so that they neatly fit within a certain width without 139 words being split across lines. This is a singleton class, invoke as:: 140 141 from wsatools.Utilities import WordWrapper 142 for line in lines: 143 print(WordWrapper.wrap(line)) 144 145 @todo: Instead of not wrapping pre-formatted lines, handle the case of a 146 new-line character when calculating the overlap. 147 148 """ 149 #: Assume lines are indented by this many character columns. 150 indent = 0 151 #: Required width of wrapped lines in number of character columns. 152 wrapWidth = _getUserWidth() 153 #: List where fully wrapped lines are collated. 154 _wrappedLines = [] 155 156 #-------------------------------------------------------------------------- 157

158 - def resetWrapWidth():

159 """ Resets the wrap width to the user default value 160 """ 161 WordWrapper.wrapWidth = _getUserWidth()

162 163 resetWrapWidth = staticmethod(resetWrapWidth) 164 165 #-------------------------------------------------------------------------- 166

167 - def wrap(line):

168 """ 169 Returns supplied line with neatly placed linebreaks to wrap the line 170 within a certain width. If the line already contains line breaks or is 171 within the width specified by L{WordWrapper.wrapWidth} then it remains 172 unaffected. If a L{WordWrapper.indent} value is supplied then it is 173 assumed that the first character in the line is located at the column 174 number equal to the L{WordWrapper.indent} value. Wrapped lines will 175 then have a gap after each line-break to maintain the indentation. 176 177 @param line: The line to be wrapped. 178 @type line: str 179 180 @return: Wrapped line. 181 @rtype: str 182 183 """ 184 # Don't wrap preformatted lines 185 if not WordWrapper.wrapWidth or '\n' in line: 186 return line 187 188 WordWrapper._wrappedLines = [] 189 return WordWrapper._wrapLine(line)

190 191 wrap = staticmethod(wrap) 192 193 #-------------------------------------------------------------------------- 194

195 - def _wrapLine(line):

196 """ 197 Recursive calls to this function with the next line of text to be 198 wrapped appends lines to L{WordWrapper._wrappedLines} truncated to the 199 width specified by L{WordWrapper.wrapWidth}. The remaining text is used 200 as the input to the function until there is no more overrun, and the 201 fully wrapped text is returned. 202 203 @param line: Full line of text that has not yet been wrapped. 204 @type line: str 205 206 @return: Fully wrapped text. 207 @rtype: str 208 209 """ 210 overRun = len(line) - WordWrapper.wrapWidth + WordWrapper.indent 211 if overRun <= 0: 212 WordWrapper._wrappedLines.append(line) 213 return ('\n' + WordWrapper.indent * ' ' 214 ).join(WordWrapper._wrappedLines) 215 216 overRunWords = [] 217 words = line.split() 218 while words and overRun > 0: 219 overRun -= 1 + len(words[-1]) 220 overRunWords.insert(0, words.pop()) 221 222 # Handle the scenario where there's a single long word causing overrun 223 if not words: 224 words.append(overRunWords.pop(0)) 225 226 WordWrapper._wrappedLines.append(' '.join(words)) 227 228 return WordWrapper._wrapLine(' '.join(overRunWords))

229 230 _wrapLine = staticmethod(_wrapLine)

231

232 #------------------------------------------------------------------------------ 233 234 -def arbSort(unsortedList, kwdList, key=lambda x: x, isFullKeySet=True):

235 """ 236 Arbitrarily sorts a list of tuples of form (keyword, value) by the order 237 defined in the sequence of specified keywords. Example: 238 239 >>> arbSort([7, 3, 8, 0, 1], [8, 3], isFullKeySet=False) 240 [8, 3, 7, 0, 1] 241 242 @param unsortedList: Unsorted list of scalars or sequences. 243 @type unsortedList: list 244 @param kwdList: List of keywords specifying the required sort order, 245 need to be of same type/value as entries in of the 246 sort key element of the given unsorted list. 247 @type kwdList: list 248 @param key: Function to fetch the sort key element from the given 249 unsorted list, e.g. operator.itemgetter(0). 250 @type key: function 251 @param isFullKeySet: If True, expect kwdList to contain the complete set 252 of keys in the unsortedList, otherwise leave entries 253 for non-specified keys at the end of the sorted list, 254 sorted by non-specified key in original key order. 255 @type isFullKeySet: bool 256 257 @return: The sorted list. 258 @rtype: list 259 260 @todo: See if defining my own compare function that calls index() is 261 faster/simpler than the Decorate-Sort-Undecorate method used here. 262 """ 263 idxList = [] 264 for entry in unsortedList: 265 try: 266 idxList.append(kwdList.index(key(entry))) 267 except ValueError: 268 if isFullKeySet: 269 msg = ("Utilities.arbSort() error: kwdList is not complete, " 270 + "an entry for '%s' does not exist!") 271 missingKey = key(entry) 272 if type(missingKey) is tuple: 273 missingKey = list(missingKey) 274 if type(missingKey) is list: 275 msg += " You may have forgotten to supply a sort key." 276 raise ValueError(msg % missingKey) 277 else: 278 kwdList.append(key(entry)) 279 idxList.append(kwdList.index(key(entry))) 280 281 return [entry for _idx, entry in sorted(zip(idxList, unsortedList))]

282

283 #------------------------------------------------------------------------------ 284 285 -def ensureDirExist(aDir):

286 """ 287 If the supplied directory does not exist then create it. 288 289 @param aDir: Full path to the directory. 290 @type aDir: str 291 292 """ 293 if not os.path.exists(aDir): 294 os.umask(0000) 295 os.makedirs(aDir, mode=0775)

296

297 #------------------------------------------------------------------------------ 298 299 -def expandNumberRange(numberRange, useTens=False):

300 """ 301 Given a human readable compact number range string, expand it to a complete 302 sequence of numbers in a CSV string. Example: 303 304 >>> expandNumberRange(numberRange([1, 2, 3, 5, 6, 7])) 305 '1,2,3,5,6,7' 306 >>> expandNumberRange('1,2') 307 '1,2' 308 >>> expandNumberRange('0') 309 '0' 310 311 @param numberRange: A human readable compact number range string 312 @type numberRange: str 313 314 @return: A complete sequence of numbers in a CSV string. 315 @rtype: str 316 317 """ 318 numStep = (10 if useTens else 1) 319 numbers = [] 320 for part in numberRange.split(','): 321 part = part.strip() 322 if '-' not in part: 323 numbers.append(part) 324 else: 325 begin, end = part.split('-') 326 for number in xrange(int(begin), int(end) + numStep, numStep): 327 numbers.append(str(number)) 328 329 return ','.join(numbers)

330

331 #------------------------------------------------------------------------------ 332 333 -def extractColumn(filePathName, colNum):

334 """ 335 Gobble all entries in the given column of a space separated text file into 336 a list. Lines that begin with the hash mark are treated as comments and are 337 ignored. Example: 338 339 >>> column = extractColumn("/disk47/sys/test/Utilities/test.cat", 6) 340 >>> list(column)[:2] 341 ['14.5723', '15.1406'] 342 343 @param filePathName: Path to space separated text file to be read. 344 @type filePathName: str 345 @param colNum: Column number of text to extract. 346 @type colNum: int 347 348 @return: A generator for all text in specified column of the file. 349 @rtype: generator(str) 350 351 @todo: Replace with extractColumns()? Could leave this method here for 352 speed and simplicity. Though normally we want more than one column 353 anyway! So, may just extractColumns(file, 3)[0] isn't so bad. 354 """ 355 return (line.split()[colNum] for line in ParsedFile(filePathName))

356

357 #------------------------------------------------------------------------------ 358 359 -def extractColumns(filePathName, columnList=None, numRows=None, dataType=str):

360 """ 361 Extracts from the given file the data in the given list of columns as a 362 list of strings for every column. Example: 363 364 >>> data = extractColumns("/disk47/sys/test/Utilities/test.cat", 365 ... columnList=[6, 7], dataType=float) 366 >>> print(data[0][0], data[0][1], data[1][0], data[1][1]) 367 14.5723 15.1406 0.0033 0.005 368 369 @param filePathName: Full path to the file to read. 370 @type filePathName: str 371 @param columnList: Optional list of indices for the columns to be read 372 (with the first column at index 0), otherwise all 373 columns are read. 374 @type columnList: list(int) 375 @param numRows: Optionally supply number of rows to read, otherwise 376 all rows are read. 377 @type numRows: int 378 @param dataType: Optionally convert entries from string to this Python 379 type. 380 @type dataType: type 381 382 @return: A list of column data, with each column represented by a list of 383 strings. 384 @rtype: list(list(dataType)) 385 386 @todo: Possibly alter to make use of the CSV module's abilities to handle 387 different dialects? Would simplify code a bit, and make more useful. 388 """ 389 if columnList: 390 data = [[] for _col in columnList] 391 392 for rowNum, line in enumerate(ParsedFile(filePathName)): 393 if numRows and rowNum == numRows: 394 break 395 396 entries = line.split() 397 if not columnList: 398 columnList = range(len(entries)) 399 data = [[] for _col in columnList] 400 401 for column, colIdx in zip(data, columnList): 402 column.append(entries[colIdx]) 403 404 return (data if dataType is str else 405 [list(map(dataType, column)) for column in data])

406

407 #------------------------------------------------------------------------------ 408 409 -def getDiskSpace(disks):

410 """ 411 Gets the available disk space for supplied list of disks. 412 413 @param disks: Sequence of disk paths (e.g. 414 L{SystemConstants.massStorageRaidFileSystem()}). 415 @type disks: sequence(str) or generator(str) 416 417 @return: A dictionary of sizes of the form (total, free) for each disk. 418 @rtype: dict(str:list(int, int)) 419 420 """ 421 cmd = ["df", "-P", "-k"] 422 spacePerDisk = {} 423 for disk in disks: 424 try: 425 p = Popen(cmd + [disk], stdout=PIPE) 426 resline = p.communicate()[0].strip().split('\n')[-1] 427 _a, _size, used, free, _p, _direc = resline.split() 428 if "disk38" in disk: 429 free = 0 430 spacePerDisk[disk] = [int(used) + int(free), int(free)] 431 except OSError: 432 pass 433 return spacePerDisk

434

435 #------------------------------------------------------------------------------ 436 437 -def getDuplicates(anIterable):

438 """ 439 Returns the set of items for which the groupBy method returns the same item 440 more than once. By default, this will simply return just the values in the 441 list that are duplicated. Removed groupBy key option, because it's better 442 to pre-process the iterable once prior to passing to this function and 443 sorting (in this usage of groupby, in other usages it's useful). Examples: 444 445 >>> getDuplicates([1, 2, 1, 0]) 446 set([1]) 447 >>> getDuplicates(x[0] for x in [(1,2), (3,4), (1,4)]) 448 set([1]) 449 450 @param anIterable: Any sequence or generator that can be iterated over. 451 @type anIterable: sequence or generator 452 453 @rtype: set 454 455 """ 456 # Counter uses more memory unnecessary for this task & is sometimes slower 457 return set(key for key, group in groupby(sorted(anIterable)) 458 if moreThanOneIn(group))

459

460 #------------------------------------------------------------------------------ 461 462 -def getListIndices(aList, value):

463 """ 464 Returns a list of all indices where a value occurs in the given list. 465 Example: 466 467 >>> getListIndices(["bob", "steve", "bob"], "bob") 468 [0, 2] 469 470 @param aList: List to search for occurrences of value. 471 @type aList: list 472 @param value: Value to find in list, that may occur multiple times. 473 @type value: object 474 475 @rtype: list(int) 476 477 @note: Can probably avoid the need to use this function by employing a 478 slightly different algorithm design, e.g. use a dictionary. 479 480 """ 481 indicesList = [] 482 try: 483 indicesList.append(aList.index(value)) 484 while True: 485 indicesList.append(aList.index(value, indicesList[-1] + 1)) 486 except ValueError: 487 return indicesList

488

489 #------------------------------------------------------------------------------ 490 491 -def getNextDisk(sysc, spacePerDisk=None, byPercentFree=False, preAllocMem=0):

492 """ 493 Gets the next available disk which is less than 99% full. 494 495 @param sysc: An initialised SystemConstants object. 496 @type sysc: SystemConstants 497 @param spacePerDisk: A dictionary of available disks and their size, 498 obtained automatically by L{getDiskSpace()} if not 499 supplied here. 500 @type spacePerDisk: dict(str:list(int, int)) 501 @param byPercentFree: If True, choose next disk based on percentage free 502 space available rather than absolute free space. 503 @type byPercentFree: bool 504 @param preAllocMem: Pre allocated memory in GB. 505 @type preAllocMem: int 506 507 @return: Path to the next free disk. 508 @rtype: str 509 510 @todo: Instead of taking a SystemConstants object, why not make this a 511 method of SystemConstants? 512 """ 513 leaveSpace = sysc.leaveSpaceOnMSRFS(byPercentFree) 514 if not spacePerDisk: 515 spacePerDisk = getDiskSpace(sysc.availableRaidFileSystem()) 516 517 disks = sorted(disk for disk in spacePerDisk 518 if disk not in sysc.criticalDisks 519 and disk not in sysc.developDisks) 520 for disk in disks: 521 totalSpace, freeSpace = spacePerDisk[disk] 522 if not byPercentFree: 523 totalSpace = 1 524 525 if (freeSpace - leaveSpace[disk] * totalSpace 526 > preAllocMem * sysc.one_megabyte): 527 528 return disk 529 530 raise MemoryError("There is no space left on the RAID array.")

531

532 #------------------------------------------------------------------------------ 533 534 -def getNumberItems(numberRange, useTens=False):

535 """ 536 Calculates the number of items expressed by a human-readable string of 537 number ranges. 538 539 >>> getNumberItems(numberRange(range(10))) 540 10 541 542 @param numberRange: List of numbers expressed as a human-readable string of 543 number ranges, as returned by Utilities.numberRange(). 544 @type numberRange: str 545 546 @return: Number of items expressed in the given list. 547 @rtype: int 548 549 @note: This isn't a sensible way of doing things, as numberRange() is 550 designed only for the purpose of printing human readable strings. It 551 shouldn't be used as a data container for processing. 552 553 """ 554 return len(expandNumberRange(numberRange, useTens=useTens).split(','))

555

556 #------------------------------------------------------------------------------ 557 558 -def getSystemMemoryKb():

559 """ 560 @return: Amount of available memory in kilobytes. 561 @rtype: int 562 563 """ 564 return int(open("/proc/meminfo").readline().split()[1])

565

566 #------------------------------------------------------------------------------ 567 568 -def groupByCounts(keyCounts, groupSize):

569 """ 570 Taking an ordered list of counts of a particular key, e.g. the results of 571 an SQL "SELECT key, count(*) ... GROUP BY key ORDER BY key", this function 572 returns a list of key ranges that contain up to the specified group size of 573 counts. Example: 574 575 >>> groupByCounts([(10001, 5), (10002, 3), (10003, 12), (10004, 6)], 576 ... groupSize=10) 577 [(10001, 10002), (10003, 10003), (10004, 10004)] 578 579 @param keyCounts: List of keys and their respective counts. 580 @type keyCounts: list(tuple(int, int)) 581 @param groupSize: Total number of counts in which to group keys by. 582 @type groupSize: int 583 584 @return: List of ranges in the form (min, max) of values of the keys, where 585 between these values the total counts are less than or equal to 586 the specified group size. 587 @rtype: list(tuple(int, int)) 588 589 """ 590 keyRanges = [] 591 if sum(count for key, count in keyCounts) > groupSize: 592 keyMin = keyMax = keyCounts[0][0] 593 chunkCount = 0 594 for key, count in keyCounts: 595 chunkCount += count 596 if chunkCount > groupSize: 597 keyRanges.append((keyMin, keyMax)) 598 keyMin = key 599 chunkCount = count 600 keyMax = key 601 keyRanges.append((keyMin, keyMax)) 602 603 return keyRanges

604

605 #------------------------------------------------------------------------------ 606 607 -def joinDict(aDict, joinStr=" = ", sepStr=", "):

608 """ 609 Like string.join, but operates on the contents of a dictionary instead of a 610 list. Joins dictionary keyword and value pairs into the string: 611 str(keyword) + joinStr + str(value) + sepStr etc. Example: 612 613 >>> joinDict(dict(a=1, b=2)) 614 'a = 1, b = 2' 615 616 @param aDict: Dictionary to process into a string. 617 @type aDict: dict 618 @param joinStr: String to insert between keywords and values. 619 @type joinStr: str 620 @param sepStr: String to insert between dictionary elements. 621 @type sepStr: str 622 623 @return: A string representing the contents of the dictionary. 624 @rtype: str 625 626 """ 627 return sepStr.join([joinStr.join((str(key), str(value))) 628 for key, value in aDict.iteritems()])

629

630 #------------------------------------------------------------------------------ 631 632 -def joinNested(aSeq, joinStr=", ", subJoinStr=None, seqIndex=None):

633 """ 634 Like string.join, but can handle nested (or un-nested) sequences of string- 635 castable objects. Example: 636 637 >>> joinNested([['a', 0], ['b', 1]]) 638 'a, 0, b, 1' 639 640 @param aSeq: A (nested) sequence string-castable objects. 641 @type aSeq: list or tuple 642 @param joinStr: String to insert between elements of the main sequence. 643 @type joinStr: str 644 @param subJoinStr: String to insert between elements of the nested sequence 645 (defaults to the same as joinStr). 646 @type subJoinStr: str 647 @param seqIndex: If specified, only the elements at this index in the 648 nested sequences are included. 649 @type seqIndex: int 650 651 @return: A string containing all of the elements of the (nested) sequence. 652 @rtype: str 653 654 """ 655 # Sanity checks 656 if not aSeq: 657 return '' 658 if not (isinstance(aSeq, list) or isinstance(aSeq, tuple)): 659 try: 660 aSeq.__getattribute__('next') 661 except AttributeError: 662 return str(aSeq) 663 else: 664 aSeq = list(aSeq) 665 666 # Prepare the sequence of strings for string.join 667 if isinstance(aSeq[0], list) or isinstance(aSeq[0], tuple): 668 # Nested sequence case - works recursively! 669 if seqIndex: 670 strSeq = [str(element[seqIndex]) for element in aSeq] 671 else: 672 if subJoinStr is None: 673 subJoinStr = joinStr 674 strSeq = [joinNested(nestSeq, joinStr=subJoinStr) 675 for nestSeq in aSeq] 676 else: 677 # Single sequence of, hopefully, string-castable objects 678 strSeq = map(str, aSeq) 679 680 # Return the joined sequence of strings 681 return joinStr.join(strSeq)

682

683 #------------------------------------------------------------------------------ 684 685 -def invertDict(aDict, forceReturnList=False):

686 """ 687 Inverts the dictionary in such a way that if the input dictionary's values 688 are lists, each item of this list will become a key with the input 689 dictionary's key as value. If several input dictionary's keys exist for one 690 input dictionary's value the inverted dict's values will be lists. Example: 691 692 >>> invertDict(dict(males=["bob", "chris"], females=["jane", "chris"])) 693 {'chris': ['males', 'females'], 'jane': ['females'], 'bob': ['males']} 694 695 @param aDict: Dictionary to invert. 696 @type aDict: dict 697 @param forceReturnList: If True, return dictionary where values are always 698 lists. 699 @type forceReturnList: bool 700 701 @return: Inverted dictionary. 702 @rtype: dict 703 704 """ 705 retDict = {} 706 try: 707 if not forceReturnList and len(set(aDict.itervalues())) == len(aDict): 708 # All values are unique 709 return dict((value, key) for key, value in aDict.iteritems()) 710 else: 711 # Some values repeated, therefore store them in a list. 712 for key, value in aDict.iteritems(): 713 retDict.setdefault(value, []).append(key) 714 715 except TypeError: # Values are lists 716 hasUniqueValues = not forceReturnList \ 717 and (len(set(unpackList(aDict.itervalues()))) == 718 len(list(unpackList(aDict.itervalues())))) 719 720 for key, aList in aDict.iteritems(): 721 for value in aList: 722 if hasUniqueValues: 723 retDict[value] = key 724 else: 725 retDict.setdefault(value, []).append(key) 726 727 return retDict

728

729 #------------------------------------------------------------------------------ 730 731 -def makeDateTime(time=None):

732 """ 733 Returns an archive date/time data type, defaulting to the current time if 734 no input argument is given. This defines the archive date/time data type, 735 and is presently set to the mx.DateTime defined type. This function defines 736 the time system for the archive (which is UTC). 737 738 @param time: If given, specify a time in the format: 739 "2005-01-29 23:59:59.99". 740 @type time: str 741 742 @return: A date/time in the archive defined type. 743 @rtype: mx.DateTime 744 745 """ 746 if time: 747 return mx.DateTime.DateTimeFrom(time) 748 else: 749 return mx.DateTime.utc()

750

751 #------------------------------------------------------------------------------ 752 753 -def makeMssqlTimeStamp():

754 """ 755 Creates a timestamp using makeTimeStamp and formats appropriately for use 756 in ingest strings for Microsoft SQL Server (and handles a bug in the 757 datetime object creation). 758 759 @return: A UTC date/time stamp string formatted for MS SQL Server. 760 @rtype: str 761 762 """ 763 return makeTimeStamp().replace(':60', ':59').replace(' ', 'T', 1).rstrip()

764

765 #------------------------------------------------------------------------------ 766 767 -def makeTimeStamp():

768 """ 769 @return: An archive time stamp as a string, as opposed to the internal date 770 time type. 771 @rtype: str 772 773 """ 774 return str(makeDateTime())

775

776 #------------------------------------------------------------------------------ 777 778 -def moreThanOneIn(values):

779 """ 780 Generator test function is equivalent to memory hog C{len(list(values)) > 1} 781 or C{sum(1 for _ in values) > 1} but doesn't iterate through all items. 782 In fact, memory is rarely an issue this the iterable here won't large 783 compared to the full program usage, but even the first form is slow. 784 Example: 785 786 >>> moreThanOneIn(x for x in []) 787 False 788 >>> moreThanOneIn(x for x in ['g']) 789 False 790 >>> moreThanOneIn(x for x in [0.1 ,0.2]) 791 True 792 >>> moreThanOneIn(x for x in [1, 2, 3]) 793 True 794 795 @param values: A generator to be evaluated. 796 @type values: generator 797 798 @return: True if values contains more than one item. 799 @rtype: bool 800 801 """ 802 return all((next(values, None) is not None, next(values, None) is not None))

803

804 #------------------------------------------------------------------------------ 805 806 -def multiSub(text, subs):

807 """ 808 Performs multiple string substitutions on the given string. Example: 809 810 >>> multiSub("UKIRT and the WSA", [("WSA", "VSA"), ("UKIRT", "VISTA")]) 811 'VISTA and the VSA' 812 813 @param text: String containing text to be substituted. 814 @type text: str 815 @param subs: List of text marker and substitution value pairs. 816 @type subs: list(tuple(str, str)) 817 818 @return: The original text string, with all marker values replaced by given 819 substitution values. 820 @rtype: str 821 822 """ 823 for marker, value in subs: 824 text = text.replace(marker, value) 825 return text

826

827 #------------------------------------------------------------------------------ 828 829 @contextmanager 830 -def noInterrupt():

831 """ Disables keyboard interrupts whilst in this context. 832 """ 833 signal.signal(signal.SIGINT, signal.SIG_IGN) 834 yield 835 signal.signal(signal.SIGINT, signal.SIG_DFL)

836

837 #------------------------------------------------------------------------------ 838 839 -def npop(aList, nMax=2, mode="topbot"):

840 """ 841 Divide aList in nMax sublists by populating it with items subsequently 842 taken from the top and the bottom of list. Example: 843 844 >>> npop([1, 2, 3, 4, 5]) 845 [[1, 5], [2, 4], [3]] 846 >>> npop([1, 2, 3, 4, 5, 6], nMax=3) 847 [[1, 6, 2, 5], [3, 4]] 848 849 @param aList: The list (of file names). 850 @type aList: list 851 @param nMax: Maximal number of sublists. Rounded up to next even number. 852 @type nMax: int 853 @param mode: Mode of picking items from the list: 'asc', 'desc', or 854 subsequently from the top and bottom. 855 @type mode: str 856 857 @return: List of sublists. 858 @rtype: list(list) 859 860 """ 861 nPairs = range((nMax + 1) // 2) 862 newList = [] 863 while aList: 864 subList = [] 865 try: 866 for _n in nPairs: 867 subList.append(aList.pop() if mode == "desc" else aList.pop(0)) 868 subList.append(aList.pop(0) if mode == "asc" else aList.pop()) 869 870 except IndexError: 871 pass # Incomplete sublist 872 873 newList.append(subList) 874 875 return newList

876

877 #------------------------------------------------------------------------------ 878 879 -def numberRange(numbers, sep=", ", useTens=False):

880 """ 881 Given a sequence of integers it returns that sequence as a string 882 representation of an ordered range of unique numbers. Example: 883 884 >>> numberRange([1, 2, 3, 5, 6, 7]) 885 '1-3, 5-7' 886 >>> numberRange([1, 3, 5, 7]) 887 '1, 3, 5, 7' 888 >>> numberRange([]) 889 'None' 890 891 @param numbers: Any sequence of integers. 892 @type numbers: sequence(int) 893 @param sep: Marker string to separate individual numbers. 894 @type sep: str 895 896 @return: A string representation of the range of the unique set of ordered 897 integers. 898 @rtype: str 899 900 """ 901 if not numbers: 902 return "None" 903 904 numStep = (10 if useTens else 1) 905 if useTens: 906 # if sorting is based on the tens, ie for the VSA/VVV, make sure VSA 907 # and VVV are sorted separately. 908 theNums = {'1':[], '2':[], 'x': []} 909 newNums = [] 910 for i in numbers: 911 if i % 10 in (1, 2): 912 theNums[str(i % 10)].append(i) 913 else: 914 theNums['x'].append(i) 915 for a in sorted(theNums): 916 newNums.extend(sorted(set(theNums[a]))) 917 numbers = newNums 918 else: 919 numbers = sorted(set(numbers)) 920 firstNum = numbers.pop(0) 921 rangeStr = str(firstNum) 922 nextNum = firstNum + numStep 923 while numbers: 924 curNum = numbers.pop(0) 925 if curNum == nextNum: 926 nextNum += numStep 927 if not numbers: 928 rangeStr += "-%s" % curNum 929 elif nextNum - firstNum == numStep: 930 rangeStr += "%s%s" % (sep, curNum) 931 firstNum = curNum 932 nextNum = firstNum + numStep 933 else: 934 rangeStr += "-%s%s%s" % (nextNum - numStep, sep, curNum) 935 firstNum = curNum 936 nextNum = firstNum + numStep 937 938 return rangeStr

939

940 #------------------------------------------------------------------------------ 941 942 -def orderedSet(seq, excludeList=None):

943 """ 944 Returns a list of the given sequence in the original order, but with 945 duplicates removed. Example: 946 947 >>> orderedSet([6, 4, 7, 4, 9, 1, 7]) 948 [6, 4, 7, 9, 1] 949 950 @param seq: Any ordered sequence containing duplicate values. 951 @type seq: sequence(X) 952 @param excludeList: A list of items to exclude from the returned list. 953 @type excludeList: list(X) 954 955 @return: Ordered list without duplicates. 956 @rtype: list(X) 957 958 """ 959 seen = set(excludeList or []) 960 return [x for x in seq if x not in seen and not seen.add(x)]

961

962 #------------------------------------------------------------------------------ 963 964 -def parseFloat(value):

965 """ 966 Parses a string value and converts to float. NaNs and failures all return 967 None. Example: 968 969 >>> parseFloat('3.1') 970 3.1 971 972 @param value: String containing just a floating-point value to parse. 973 @type value: str 974 975 @return: Floating-point value of parsed string or None if fails. 976 @rtype: float 977 """ 978 try: 979 value = float(value) 980 except ValueError as error: 981 if value == "---": 982 value = None 983 elif value.count('.') > 1: 984 value = float(value.rsplit('.')[0]) 985 else: 986 raise error 987 988 if math.isnan(value) or math.isinf(value): 989 value = None 990 991 return value

992

993 #------------------------------------------------------------------------------ 994 995 -def splitList(longList, chunkSize=2, noSingles=False):

996 """ 997 Splits a list into a list of equal sized chunks. If list is not equally 998 divisable then the last chunk just contains the remaining number of 999 elements. Example: 1000 1001 >>> list(splitList([1, 2, 3, 4])) 1002 [[1, 2], [3, 4]] 1003 >>> list(splitList([1, 2, 3, 4, 5, 6, 7], 3)) 1004 [[1, 2, 3], [4, 5, 6], [7]] 1005 >>> list(splitList([1, 2, 3, 4, 5, 6, 7], 3, noSingles=True)) 1006 [[1, 2, 3], [4, 5, 6, 7]] 1007 >>> list(splitList([1])) 1008 [[1]] 1009 >>> list(splitList([1], noSingles=True)) 1010 [] 1011 >>> list(splitList([])) 1012 [] 1013 1014 @param longList: The long list that needs to be split into a list of 1015 smaller sized chunks. 1016 @type longList: list(X) 1017 @param chunkSize: Number of elements per sub-list to divide the long list. 1018 @type chunkSize: int 1019 @param noSingles: If True, ensure no single item lists are created. 1020 @type noSingles: bool 1021 1022 @return: A generator for sub-lists containing the original list sub-divided 1023 into chunks. 1024 @rtype: generator(list(X)) 1025 1026 """ 1027 fixSingle = noSingles and len(longList) % chunkSize is 1 1028 numIdx = (len(longList) - 1 if fixSingle else len(longList)) - 1 1029 lastIdx = numIdx - numIdx % chunkSize 1030 indices = xrange(0, numIdx + 1, chunkSize) 1031 1032 return (longList[i:(i + chunkSize if i != lastIdx else len(longList))] 1033 for i in indices)

1034

1035 #------------------------------------------------------------------------------ 1036 1037 -def unpackList(combinedList):

1038 """ 1039 Given a list of lists, return a single sequence containing all of the 1040 elements of the combined list, as a generator. It also handles the more 1041 general case of a sequence of sequences, unlike sum(combinedList, []), 1042 which is the equivalent of the standard case. Example: 1043 1044 >>> ', '.join(unpackList([['a', 'b'], ['c', 'd']])) 1045 'a, b, c, d' 1046 >>> list(unpackList(splitList([1, 2, 3, 4]))) 1047 [1, 2, 3, 4] 1048 1049 @param combinedList: A list of lists or a generator for lists. 1050 @type combinedList: list(list(X)) or generator(list(X)) 1051 1052 @return: A generator for a sequence of just the elements of the original 1053 nested list. 1054 @rtype: generator(X) 1055 1056 """ 1057 return (element for subList in combinedList for element in subList)

1058

1059 #------------------------------------------------------------------------------ 1060 1061 -def naturalSorted(strings):

1062 "Sort strings naturally." 1063 return sorted(strings, key=naturalSortKey) 1064

1065 -def naturalSortKey(key):

1066 import re 1067 return [int(t) if t.isdigit() else t for t in re.split(r'(\d+)', key)]

1068

1069 #------------------------------------------------------------------------------ 1070 # @@TODO: Finishing tidying up and make more Python 2.6 pythonic. 1071 1072 -class Ratings(dict, MutableMapping):

1073 """ 1074 Ratings is mostly like a dictionary, with extra features: the value 1075 corresponding to each key is the 'score' for that key, and all keys are 1076 ranked in terms their scores. Values must be comparable; keys, as well as 1077 being hashable, must be comparable if any two keys may ever have the same 1078 corresponding value (i.e., may be "tied" on score). All mapping-like 1079 behavior is just as you would expect, such as: 1080 1081 >>> r = Ratings({"bob": 30, "john": 20}) 1082 >>> r.update({"paul": 20, "tom": 10}) 1083 >>> r.keys() 1084 ['tom', 'john', 'paul', 'bob'] 1085 >>> len(r) 1086 4 1087 >>> "paul" in r 1088 True 1089 >>> [r[key] for key in ["bob", "paul", "john", "tom"]] 1090 [30, 20, 20, 10] 1091 >>> r.get("nobody"), r.get("nobody", 0) 1092 (None, 0) 1093 1094 In addition to the mapping interface, we offer rating-specific methods. 1095 r.rating(key) returns the ranking of a 'key' in the ratings, with a ranking 1096 of 0 meaning the lowest score (when two keys have equal scores, the keys 1097 themselves are compared, to 'break the tie', and the lesser key gets a 1098 lower ranking): 1099 1100 >>> [r.rating(key) for key in ["bob", "paul", "john", "tom"]] 1101 [3, 2, 1, 0] 1102 1103 getValueByRating(ranking) and getKeyByRating(ranking) return the score and 1104 key, respectively, for a given ranking index: 1105 1106 >>> [r.getValueByRating(rating) for rating in range(4)] 1107 [10, 20, 20, 30] 1108 >>> [r.getKeyByRating(rating) for rating in range(4)] 1109 ['tom', 'john', 'paul', 'bob'] 1110 1111 An important feature is that the keys() method returns keys in ascending 1112 order of ranking, and all other related methods return lists or iterators 1113 fully consistent with this ordering: 1114 1115 >>> [key for key in r] 1116 ['tom', 'john', 'paul', 'bob'] 1117 >>> r.values() 1118 [10, 20, 20, 30] 1119 >>> r.items() 1120 [('tom', 10), ('john', 20), ('paul', 20), ('bob', 30)] 1121 1122 An instance can be modified (adding, changing and deleting key-score 1123 correspondences), and every method of that instance reflects the instance's 1124 current state at all times: 1125 1126 >>> r["tom"] = 100 1127 >>> r.items() 1128 [('john', 20), ('paul', 20), ('bob', 30), ('tom', 100)] 1129 1130 """ 1131 _rating = [] 1132 """ The crucial auxiliary data structure. A list of all (value, key) pairs, 1133 kept in "natural"ly-sorted order. 1134 """ 1135 #-------------------------------------------------------------------------- 1136

1137 - def __init__(self, *args, **kwds):

1138 """ This class gets instantiated just like 'dict'. 1139 """ 1140 dict.__init__(self, *args, **kwds) 1141 self._rating = sorted((v, k) for k, v in dict.iteritems(self))

1142 1143 #-------------------------------------------------------------------------- 1144

1145 - def copy(self):

1146 """ Provide an identical but independent copy. 1147 """ 1148 return Ratings(self)

1149 1150 #-------------------------------------------------------------------------- 1151

1152 - def __setitem__(self, k, v):

1153 """ Besides delegating to dict, we maintain self._rating. 1154 """ 1155 if k in self: 1156 del self._rating[self.rating(k)] 1157 dict.__setitem__(self, k, v) 1158 insort_left(self._rating, (v, k))

1159 1160 #-------------------------------------------------------------------------- 1161

1162 - def __delitem__(self, k):

1163 """ Besides delegating to dict, we maintain self._rating. 1164 """ 1165 del self._rating[self.rating(k)] 1166 dict.__delitem__(self, k)

1167 1168 #-------------------------------------------------------------------------- 1169 # Delegate some methods to dict explicitly to avoid getting 1170 # MutableMapping's slower (though correct) implementations instead 1171 1172 __len__ = dict.__len__ 1173 __contains__ = dict.__contains__ 1174 has_key = __contains__ 1175 update = MutableMapping.update 1176 1177 #-------------------------------------------------------------------------- 1178 # The key semantic connection between self._rating and the order of 1179 # self.keys() 1180

1181 - def __iter__(self):

1182 for _v, k in self._rating: 1183 yield k

1184 1185 iterkeys = __iter__ 1186

1187 - def keys(self):

1188 return list(self)

1189

1190 - def iteritems(self):

1191 for v, k in self._rating: 1192 yield k, v

1193

1194 - def items(self):

1195 return list(self.iteritems())

1196

1197 - def itervalues(self):

1198 for _, v in self.iteritems(): 1199 yield v

1200

1201 - def values(self):

1202 return [v for _, v in self.iteritems()]

1203 1204 #-------------------------------------------------------------------------- 1205 # Ratings-specific methods 1206

1207 - def rating(self, key):

1208 item = self[key], key 1209 i = bisect_left(self._rating, item) 1210 if item == self._rating[i]: 1211 return i 1212 raise LookupError("item not found in rating")

1213

1214 - def getValueByRating(self, rating):

1215 return self._rating[rating][0]

1216

1217 - def getKeyByRating(self, rating):

1218 return self._rating[rating][1]

1219

1220 - def getCountsPerRating(self):

1221 results = {} 1222 for item in self.values(): 1223 results.setdefault(item, 0) 1224 results[item] += 1 1225 return results

1226 1227 #------------------------------------------------------------------------------ 1228 1229 if __name__ == "__main__": 1230 import doctest 1231 doctest.testmod() 1232 1233 #------------------------------------------------------------------------------ 1234

Source Code for Module wsatools.Utilities