# Source Code for Module wsatools.Statistics

```  1  #------------------------------------------------------------------------------
2  #\$Id: Statistics.py 9998 2013-08-09 08:44:31Z NicholasCross \$
3  """
4     Statistics package for photometry.
5
6     @author: N.J.G. Cross
7     @org:    WFAU, IfA, University of Edinburgh
8
9     @requires: Numpy, math, Scientific
10  """
11  #------------------------------------------------------------------------------
12  import math
13  import numpy
14  from   Scientific.Functions import LeastSquares as LSQ
15
16  import wsatools.DbConnect.DbConstants as dbc
17  #------------------------------------------------------------------------------
18
19 -def biweight(xx, cc):
20      """Calculates the biweight - Beers et al. 1990.
21      cc is the tuning constant -- 6.0 (approx 4sigma) is good for the location.
22      """
23      mui = biweightmui(xx, cc)
24      med = numpy.median(xx)
25      test = numpy.multiply(numpy.less(numpy.abs(mui), 1.), 1.)
26      denom = numpy.sum(numpy.multiply(numpy.multiply(numpy.subtract(xx, med), numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 2.)), test))
27      numer = numpy.sum(numpy.multiply(numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 2.), test))
28      if abs(numer) > 0:
29          rat = denom / numer
30          return med + rat
31      else:
32          return dbc.realDefault()
33
34  #------------------------------------------------------------------------------
35
36 -def biweightmui(xx, cc):
37      """ Calculates the biweight mui - Beers et al. 1990.
38      """
41      return numpy.divide(numpy.subtract(xx, med), cc * MD)
42
43  #------------------------------------------------------------------------------
44
45 -def biweightScale(xx, cc):
46      """Calculates the biweight scale - Beers et al. 1990.
47      cc is the tuning constant -- 9.0 is good for the scale.
48      """
49      mui = biweightmui(xx, cc)
50      med = numpy.median(xx)
51      test = numpy.multiply(numpy.less(numpy.abs(mui), 1.), 1.)
52      denom = numpy.sum(numpy.multiply(numpy.multiply(numpy.power(numpy.subtract(xx, med), 2.), numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 4.)), test))
53      numer = numpy.sum(numpy.multiply(numpy.multiply(numpy.subtract(1., numpy.power(mui, 2.)), numpy.subtract(1., numpy.multiply(5., numpy.power(mui, 2.)))), test))
54      if abs(numer) > 0.:
55          rat = numpy.sqrt(denom) / abs(numer)
56          return numpy.sqrt(1.0 * numpy(xx).size) * rat
57      else:
58          return dbc.realDefault()
59
60  #------------------------------------------------------------------------------
61
62 -def calcMean(xx):
63      """This calculates the mean
64      """
65      if xx.shape[0] > 1:
66          mean = numpy.array(numpy.mean(xx, axis=0))
67      else:
68          mean = dbc.realDefault()
69      return mean
70
71  #------------------------------------------------------------------------------
72
74      """
75      """
76      if xx.shape[0] > 1:
78      else:
79          med, mad = dbc.realDefault(), dbc.realDefault()
81
82  #------------------------------------------------------------------------------
83
84 -def calcSd(xx):
85      if xx.shape[0] > 1:
86          # @TODO: add in numpy.where
87          sd = numpy.multiply(numpy.std(xx, axis=0), numpy.sqrt(numpy.divide(\
88              xx.shape[-1], numpy.subtract(xx.shape[-1], 1.))))
89      else:
90          sd = dbc.realDefault()
91      return sd
92
93  #------------------------------------------------------------------------------
94
95 -def calcSkew(xx):
96      if xx.shape[0] > 2:
97          denom = numpy.multiply(numpy.multiply(
98              numpy.subtract(xx.shape[-1], 1.),
99              numpy.subtract(xx.shape[-1], 2.)), numpy.power(calcSd(xx), 3.))
100
101          skew = numpy.where(denom > 0,
102                  numpy.divide(numpy.multiply(numpy.power(xx.shape[-1], 2.),
103                                              centralMoment(xx, 3)), denom),
104                                              dbc.realDefault())
105      else:
106          skew = dbc.realDefault()
107      return skew
108
109  #------------------------------------------------------------------------------
110
111 -def centralMoment(data, N):
112      data = numpy.array(data)
113      if data.shape[0] > 0:
114          return numpy.mean(numpy.power(numpy.subtract(data, numpy.mean(data,
115                                                                       axis=0)),
116                                        float(N)), axis=0)
117      else:
118          return dbc.realDefault()
119
120  #------------------------------------------------------------------------------
121
122 -def chiSq(mag, rms, model, nParam):
123      """This returns the reduced chiSquared and number of degrees of freedom.
124      """
126      chisq = numpy.sum(numpy.power(numpy.divide(numpy.subtract(mag, model), rms), 2.))
127      ndof = len(mag) - nParam
128      return chisq / float(ndof), ndof
129
130  #------------------------------------------------------------------------------
131
132 -def chiSquare(model, parameters, data):
133      chisq = 0.
134      for point in data:
135          sigma = 1
136          if len(point) == 3:
137              sigma = point[2]
138          f = model(parameters, point[0])
139          chisq = chisq + ((f - point[1]) / sigma) ** 2
140      return chisq
141
142  #------------------------------------------------------------------------------
143
144 -def clippedHistogram(xx, clip=0, Niter=10, imean=dbc.realDefault(),
145                       isd=dbc.realDefault()):
146      """
147      Calculates mean and standard deviation of clipped distribution.
148
149      """
150      xx = numpy.array(xx)
151      if xx.size < 2:
152          return dbc.realDefault(), dbc.realDefault()
153
154      if clip == 0:
155          mean = calcMean(xx)
156          sd = calcSd(xx)
157      else:
158          test = 0
159          if imean == dbc.realDefault() and isd == dbc.realDefault():
160              mean = calcMean(xx)
161              sd = calcSd(xx)
162          else:
163              mean, sd = imean, isd
164
165          it = 0
166          while (test == 0) and it < Niter:
167              meanold, sdold = mean, sd
168              minV = mean - clip * sd
169              maxV = mean + clip * sd
174              if mean == meanold and sd == sdold:
175                  test = 1
176              it += 1
177      del xx
178      if mean == 0. and sd == 0.:
179          mean, sd = dbc.realDefault(), dbc.realDefault()
180
181      return mean, sd
182
183  #------------------------------------------------------------------------------
184
186      """
187      Calculates mean,median,standard deviation and median absolute deviation of
188      a clipped distribution. The distribution is clipped by a certain number of
189      median absolute deviations.
190
191      @param xx: The input list or array of values.
192      @type  xx: list
193      @param clip: The number of 'standard deviations for clipping'
194      @type  clip: float
195      @param Niter: The number iterations when clipping
196      @type  Niter: int
197      @param imed: starting value for the median BEST NOT TO USE
198      @type  imed: float
199      @param imad: starting value for the MAD BEST NOT TO USE
201      @param retValues: string describing which values to return
202      @type  retValues: str
203
204      @return: tuple of statistical values described in retValues
205      @rtype: tuple
206
207      """
208      xx = numpy.array(xx)
209      dimen = len(xx.shape)
210      if clip != 0. and dimen > 1:
211          print ("The array dimension is too large for clipping. Either do ",
212                 "not clip or use a 1-d array")
213          return -1
214      clip = clip * 1.48
215      # select data
216      if not minSd:
217          minSd = getMinSd(xx)
218      if clip == 0.:
219          if xx.size > 0:
221          else:
222              if dimen == 1:
223                  med, mad = dbc.realDefault(), dbc.realDefault()
224              else:
225                  med = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
226                                                 numpy.float32),
227                                     dbc.realDefault())
229                                                 numpy.float32),
230                                     dbc.realDefault())
232      else:
233          test = 0
234
235          if imed == dbc.realDefault() and imad == dbc.realDefault() and xx.size > 0:
237          else:
238              if dimen == 1:
240              else:
241                  med = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
242                                                 numpy.float32),
243                                     imed)
245                                                 numpy.float32),
247          it = 0
248          while (test == 0) and it < Niter:
250              minV = med - (clip * mad)
251              maxV = med + (clip * mad)
252              # Make sure all values are >0 apart from those outside range = 0.
253              # then remove all
257              else:
258                  med, mad = dbc.realDefault(), dbc.realDefault()
259                  test = 1
261                  test = 1
262
263              it += 1
264      # calculate final attributes
265      med = numpy.where(numpy.logical_and(numpy.equal(med, 0.),
267                      dbc.realDefault(), med)
273                      dbc.realDefault())
274      returnValueList = retValues.split(',')
275      possValueList = ["median", "med", "mean", "mad", "psd", "stdev", "tot", "skew", "min", "max"]
276      valList = []
277      for val in returnValueList:
278          if val.lower() not in possValueList:
279              print ("<Warning>Return value %s not recognised. Will return"
280                     " default value" % val)
281              valList.append(dbc.realDefault())
282          elif val.lower() == 'median' or val.lower() == 'med':
283              valList.append(med)
284          elif val.lower() == 'mean':
288          elif val.lower() == 'psd':
289              valList.append(psd)
290          elif val.lower() == 'tot':
291              if dimen > 1:
293              else:
295          elif val.lower() == 'stdev':
297          elif val.lower() == 'skew':
299          elif val.lower() == 'max':
301          elif val.lower() == 'min':
303
304      return tuple(valList)
305
306  #------------------------------------------------------------------------------
307
308 -def firstMomentDist(x, y):
309      """This calculates the mean value of x - sum(xy)/sum(y)
310      """
311      numer, denom = 0., 0.
312      for i in range(len(x)):
313          numer += float(y[i])
314          denom += float(x[i]) * float(y[i])
315      if numer != 0.:
316          return (denom / numer)
317      else:
318          print "Zero Division Error"
319          return dbc.realDefault()
320
321  #------------------------------------------------------------------------------
322
323 -def gammln(betaArray):
324      """ Calculates log(Gamma(beta)).
325      Lanczos approximation. Numerical Recipes. |e|<2e-10.
326      This works for any complex argument with a nonnegative real part
327
328      @param betaArray: Array of input values for which the gamma function.
329      @type  betaArray: numpy array
330      @return: log(Gamma(beta).
331      @rtype: numpy array
332      """
333      coeffs = [76.18009172947146, -86.5053203941677, 24.01409824083091,
334              - 1.231739572450155, 0.1208650973866179e-2, -0.5395239284953e-5]
335      xx, yy = betaArray, betaArray
337      gamma = numpy.subtract(numpy.multiply(numpy.add(xx, 0.5), numpy.log(numpy.where(zz > 0, zz, 1.))), zz)
338      summation = numpy.multiply(1.000000000190015, numpy.ones([betaArray.size], numpy.float64))
339      for coeff in coeffs:
341          summation = numpy.where(numpy.greater(yy, 0.),
342                      numpy.add(summation, numpy.divide(coeff, numpy.where(numpy.not_equal(yy, 0.), yy, 1.))), 0.)
343
344      return numpy.where(numpy.logical_and(numpy.greater(xx, 0.), numpy.greater(summation, 0.)),
346              0.)
347
348  #------------------------------------------------------------------------------
349
350 -def getMinSd(xx):
351      """ Calculates minimum standard deviation for a distribution.
352      This is to avoid zero values in the calculation of the median
353      absolute deviation. This looks to see if the data is binned/rounded
354      and if not, it will calculate a minimum based on double precision values
355
356      @param xx: array of input values
357      @type  xx: numpy array
358      @return: minimum standard deviation
359      @rtype: float
360      """
361      if xx.size == 0:
362          return -1
363      if xx.size == 1:
364          return 0
365      if numpy.array(xx).dtype == int:
366          sf = 0
367      else:
368          sf = 0
369          test = False
370          distVals = list(numpy.unique(numpy.abs(xx)))
371          absMin = distVals[len(distVals) - 1] / 9.22e18
372          if absMin == 0.:
373              absMin = 1. / 9.22e18
374          posSFvals = []
375          ratios = []
376          while not test:
377              maxerr = numpy.max(numpy.abs(numpy.subtract(xx, numpy.round(xx, sf))))
378              minerr = numpy.min(numpy.abs(numpy.subtract(xx, numpy.round(xx, sf))))
379              if maxerr > 0:
380                  rat = 10 ** (-sf) / maxerr
381              else:
382                  rat = 10.
383              if ((10 ** (-sf)) < absMin):
384                  posSFvals.append((sf, rat))
385                  ratios.append(rat)
386                  test = True
387              elif (maxerr < 10 ** (-(sf + 1)) and minerr < 1.0E-323):
388                  posSFvals.append((sf, rat))
389                  ratios.append(rat)
390              sf += 1
391          best = max(ratios)
392          for tsf, rat in posSFvals:
393              if best == rat:
394                  sf = tsf
395      #else:
396      #    print "Not integer or floating point type"
397      #    return -1
398
399      vals = list(numpy.unique(numpy.round(xx, sf)))
400      valsCopy = vals
401      endTerm = valsCopy.pop(len(vals) - 1)
402      valsCopy.insert(0, endTerm)
403      deltas = numpy.unique(numpy.abs(numpy.subtract(valsCopy, vals)))
404      if deltas[0] > 0.:
405          minVal = deltas[0]
406      elif len(deltas) > 1:
407          minVal = deltas[1]
408      else:
409          minVal = 10 ** (-sf)
410      return minVal #/math.sqrt(float(len(xx)-1))
411
412  #------------------------------------------------------------------------------
413
414 -def listCounts(inputList, minCount=None):
415      """
416      This returns the sorted unique values in a list of objects and the number
417      of times this value is listed. Also selects objects with a minimum count.
418
419      @param inputList: The input list of values (each value can be
420                        any python object).
421      @type  inputList: list
422      @param minCount: The minimum number of occurances of a value for it to
423                       be returned
424      @type  minCount: int
425
426      @return: List of unique values and the counts for these values.
427      @rtype:  list
428
429      """
430      uniqueValues = sorted(set(inputList))
431      intgArray = numpy.array([uniqueValues.index(item) for item in inputList])
432      counts = numpy.bincount(intgArray)
433      if not minCount:
434          return zip(uniqueValues, counts)
435      else:
436          return [(value, count) for value, count in zip(uniqueValues, counts)
437                                  if count >= minCount]
438
439  #------------------------------------------------------------------------------
440
442      """Median Absolute Deviation
443      """
444      med = numpy.median(xx, 0)
445      absdev = numpy.abs(numpy.subtract(xx, med))
447      if not minSd:
448          minSd = getMinSd(xx)
450                                           (minSd / 1.48)))
452
453  #------------------------------------------------------------------------------
454
456      xxN = numpy.subtract(xx, (minV - 1.))
457      xx_mask = numpy.sort(numpy.multiply(xxN, numpy.logical_and(numpy.greater(xx, minV), numpy.less(xx, maxV))))
458      xxN = []
459      nmin = xx.size - int(numpy.sum(numpy.sum(numpy.logical_and(numpy.greater(xx, minV), numpy.less(xx, maxV)).astype(numpy.float32))))
460      nmax = xx.size
463
464  #------------------------------------------------------------------------------
465
466 -def polynomialModel(param, magArray):
467      """Uses numpy to calculate polynomial model
468      """
469      outArray = numpy.zeros([magArray.size], numpy.float64)
470      for index, par in enumerate(param):
472                                                                 index), par))
473      return outArray
474
475  #------------------------------------------------------------------------------
476
477 -def probChisq(chiSqArray, ndofArray, nFinalPoints=10000, maxValueY=5.):
478      """Calculates the probability that the REDUCED chi-squared of a model
479      is less than a particular value. This is calculated by integrating the
480      area under the chi-squared curve up to this value. Inputs an array of
481      REDUCED chi-squared values and an equivalent array of numbers of
482      degrees-of-freedom for these values.
483
484      @param chiSqArray: array of reduced chisq values
485      @type  chiSqArray: numpy array
486      @param ndofArray: array of number of degree of freedom values
487      @type  ndofArray: numpy array
488      @param nFinalPoints: number of points in lookup table for integration
489      @type  nFinalPoints: int
490      @param maxValueY: maximum value to integrate to. The p(ChiSq)~1 here.
491      @type  maxValueY: float
492      @return: array of probabilities
493      @rtype: numpy array
494      """
495
496      #@@TODO: Large values of ndof - get infinities. Can approx by gaussian.
497      #        Need to work out when this is necessary.
498      # first calculate lookup tables in memory
499      # @FIXME: small values of ndof - rounding errors
500      nSteps = 2 * (nFinalPoints + 1)
501      uniqueNdof = numpy.unique(ndofArray)
502      dy = maxValueY / nSteps
503      yVector = numpy.multiply(numpy.arange(1, nSteps + 1), dy)
504      yArray = numpy.multiply(numpy.transpose(numpy.multiply(
505              numpy.ones([uniqueNdof.size, nSteps], numpy.float64), yVector)),
506              uniqueNdof)
507      uniqueNdofMsk = numpy.where(numpy.greater(uniqueNdof, 0.), uniqueNdof, 1)
508      unNdofd2 = numpy.multiply(uniqueNdofMsk, 0.5)
509      constantTermArray = numpy.where(numpy.greater(uniqueNdof, 0.),
510                          numpy.multiply(numpy.power(2, unNdofd2), numpy.exp(gammln(unNdofd2))),
511                          0.)
512      diffArray = numpy.where(numpy.greater(uniqueNdof, 0.),
513                  numpy.multiply(numpy.divide(numpy.multiply(numpy.power(
514                  yArray, numpy.multiply(numpy.ones([nSteps, unNdofd2.size],
515                  numpy.float64), numpy.subtract(unNdofd2, 1.))), numpy.exp(
516                  numpy.multiply(yArray, -0.5))), constantTermArray), uniqueNdofMsk), 0.)
517      areaVector = numpy.zeros([uniqueNdof.size], numpy.float64)
518      # Use simpsons rule.
519      probArray = numpy.zeros([(nFinalPoints), uniqueNdof.size], numpy.float64)
520      pointDiff = 2 * dy
521      finalYvector = numpy.zeros([nFinalPoints], numpy.float64)
522      for point in range(nFinalPoints):
524              diffArray[2 * point + 2]), numpy.multiply(diffArray[2 * point + 1], 4.))
525          areaVector = numpy.add(areaVector, numpy.multiply(simpsonRuleSum, (pointDiff / 6.)))
526          finalYvector[point] = yVector[2 * point + 1]
527          probArray[point] = areaVector
528
529      # now find best indices to look up probability. interpolate between points
530      #finalYvector=numpy.multiply(numpy.arange(1,nFinalPoints+1),pointDiff)
531      minYV = min(finalYvector)
532      index = numpy.where(numpy.greater(pointDiff, 0.), numpy.divide(numpy.add(chiSqArray, minYV), pointDiff), 0)
533      index = numpy.where(numpy.less_equal(index, (finalYvector.size - 1)), index, (finalYvector.size - 1))
534      upperIndex = numpy.minimum(numpy.ceil(index).astype(numpy.int32), (finalYvector.size - 1))
535      lowerIndex = numpy.subtract(upperIndex, 1)
536      # indexArray for ndof ndofArray -- uniqueNdof
537      ndofIndexArray = numpy.zeros([ndofArray.size], numpy.int32)
538      for ii, ndof in enumerate(list(uniqueNdof)):
539          if ndof > 0:
541                  ndofArray, ndof).astype(numpy.int32), ii))
543              numpy.subtract(index, lowerIndex)), probArray[lowerIndex, ndofIndexArray]),
544              numpy.multiply(numpy.subtract(1, numpy.subtract(upperIndex, index)),
545                             probArray[upperIndex, ndofIndexArray])))
546      prob = numpy.maximum(numpy.minimum(prob, 1.), 0.)
547      return numpy.where(numpy.greater(ndofArray, 0), prob, dbc.realDefault())
548  #------------------------------------------------------------------------------
549
550 -def probGauss(sigmaArray, nFinalPoints=10000, maxValueY=5.):
551      """Calculates the probability that the abs(sigma) of a standard normal
552      distribution
553      is less than a particular value. This is calculated by integrating the
554      area under the gaussian [N(0,1)] curve up to this value. Inputs an array
555      of standard absolute sigma.
556
557      @param sigmaArray: array of abs(sigma) from a N(0,1) gaussian
558      @type  sigmaArray: numpy array
559      @param nFinalPoints: number of points in lookup table for integration
560      @type  nFinalPoints: int
561      @param maxValueY: maximum value to integrate to. The p(ChiSq)~1 here.
562      @type  maxValueY: float
563      @return: array of probabilities
564      @rtype: numpy array
565      """
566
567      # first calculate lookup table in memory
568
569      nSteps = 2 * (nFinalPoints + 1)
570      dy = maxValueY / nSteps
571      yVector = numpy.multiply(numpy.arange(1, nSteps + 1), dy)
572      constantTerm = 2. / math.sqrt(2 * math.pi)
573      diffArray = numpy.multiply(numpy.exp(numpy.multiply(numpy.multiply(yVector, yVector), -0.5)), constantTerm)
574      areaVector = numpy.zeros([1], numpy.float64)
575      # Use simpsons rule.
576      probArray = numpy.zeros([nFinalPoints], numpy.float64)
577      pointDiff = 2 * dy
578      finalYvector = numpy.zeros([nFinalPoints], numpy.float64)
579      for point in range(nFinalPoints):
580          simpsonRuleSum = numpy.add(numpy.add(diffArray[2 * point], diffArray[2 * point + 2]), numpy.multiply(diffArray[2 * point + 1], 4.))
581          areaVector = numpy.add(areaVector, numpy.multiply(simpsonRuleSum, (pointDiff / 6.)))
582          finalYvector[point] = yVector[2 * point + 1]
583          probArray[point] = areaVector
584      # now find best indices to look up probability. interpolate between points
585
586      minYV = min(finalYvector)
587      # @TODO: add in numpy.where
588      index = numpy.divide(numpy.add(sigmaArray, minYV), pointDiff)
589      upperIndex = numpy.minimum(numpy.ceil(index).astype(numpy.int32), (finalYvector.size - 1))
590      lowerIndex = numpy.subtract(upperIndex, 1)
591      return numpy.add(numpy.multiply(numpy.subtract(1, numpy.subtract(index, lowerIndex)), probArray[lowerIndex]), numpy.multiply(numpy.subtract(1, numpy.subtract(upperIndex, index)), probArray[upperIndex]))
592
593  #------------------------------------------------------------------------------
594
595 -def Pearson(X, Y):
596      """Calculates the Pearson Correlation Coefficient."""
597      X = numpy.array(X)
598      Y = numpy.array(Y)
599      N = float(len(X))
600      sx = numpy.sum(X)
601      sy = numpy.sum(Y)
602      sx2 = numpy.sum(X ** 2)
603      sy2 = numpy.sum(Y ** 2)
604      sxy = numpy.sum(X * Y)
605      top = sxy - (sx * sy / N)
606      bot1 = sx2 - (sx * sx / N)
607      bot2 = sy2 - (sy * sy / N)
609
610  #------------------------------------------------------------------------------
611
612 -def scaleError(diffXY, dataS, diffXYErr=[1.], rangeMin=None, rangeMax=None):
613      """This function divides the data into several bins and finds
614      whether there is a gradient in the data.
615      """
616      if rangeMin is None:
617          rangeMin = min(dataS)
618
619      if rangeMax is None:
620          rangeMax = max(dataS)
621
622      totalData = len(dataS)
623      if not totalData:
624          return dbc.realDefault(), dbc.realDefault(), dbc.realDefault()
625
626      data = []
627      if len(diffXYErr) == 1 and len(dataS) > 1:
628          for ii in range(totalData):
629              if dataS[ii] >= rangeMin and dataS[ii] <= rangeMax:
630                  data.append((dataS[ii], diffXY[ii]))
631      elif len(diffXYErr) == len(dataS):
632          for ii in range(totalData):
633              if dataS[ii] >= rangeMin and dataS[ii] <= rangeMax:
634                  data.append((dataS[ii], diffXY[ii], diffXYErr[ii]))
635      data.sort()
636      totalData = len(data)
637      if totalData < 10:
638          return dbc.realDefault(), dbc.realDefault(), dbc.realDefault() # Not enough data
639      elif totalData < 500:
640          noBin = 5
641          noPerBin = int(totalData / 5.)
642      elif totalData < 5000:
643          noBin = 10
644          noPerBin = int(totalData / 10.)
645      else:
646          noBin = int(totalData / 500.)
647          noPerBin = int(totalData / noBin)
648
649      lineData = []
650      if totalData >= 100:
651          for bb in range(noBin):
652              XYdata = []
653              Sdata = []
654              for gal in range(noPerBin):
655                  gg = bb * noPerBin + gal
656                  Sdata.append(data[gg][0])
657                  XYdata.append(data[gg][1])
660              lineData.append((medS, medXY, psdXY))
661      else:
662          if len(diffXYErr) == 1:
663              for gal in range(totalData):
664                  lineData.append((float(data[gal][0]), float(data[gal][1]), 1.))
665          else:
666              for gal in range(totalData):
667                  lineData.append((float(data[gal][0]), float(data[gal][1]),
668                                   float(data[gal][2])))
669
670      expPar = [0., 0.]
671      bestFit = LSQ.polynomialLeastSquaresFit(expPar, lineData)
672
673      # find SD with scale error removed.
674      expData = numpy.add(numpy.multiply(bestFit[0][1], dataS), bestFit[0][0])
675      diffXYexp = numpy.subtract(diffXY, expData)
677
678      return bestFit[0][0], bestFit[0][1], finalPSTD
679
680  #------------------------------------------------------------------------------
681
682 -def weightedMean(xx, sig):
683      """ ?
684      """
685      if xx.size != sig.size:
686          print("Inconsistent shapes in numpy arrays for weightedMean. x array is"
687                " different to sigma array")
688
689      if xx.size < 1 or xx.size != sig.size:
690          return dbc.realDefault(), dbc.realDefault()
691
692      weight = numpy.where(numpy.greater(sig, 0.), (1 / sig) ** 2, 0.)
693      sumweight = numpy.sum(weight)
694      if not sumweight:
695          return dbc.realDefault(), dbc.realDefault()
696
697      sumweightdata = numpy.sum(xx * weight)
698      weightmean = sumweightdata / sumweight
699      if xx.size < 2:
700          return weightmean, dbc.realDefault()
701
702      sumweightdatammeansq = numpy.sum((xx - weightmean) ** 2 * weight)
703      # @FIXME: divide by zero?
704      # @FIXME: generally incorrect?
705      weightsamplevar = sumweight / (sumweight ** 2 - numpy.sum(weight ** 2))
706      weightsamplevar *= sumweightdatammeansq
707      weightsamplesd = (math.sqrt(weightsamplevar) if weightsamplevar > 0 else
708                        dbc.realDefault())
709
710      return weightmean, weightsamplesd
711
712  #------------------------------------------------------------------------------
713
714 -def clippedWeightedMean(xx, sig, clip, Niter=5):
715      xx = numpy.array(xx)
716      sig = numpy.array(sig)
717      if xx.size > 1:
718          mean, sd = weightedMean(xx, sig)
719      elif xx.size == 1:
720          mean, sd = xx[0], sig[0]
721      else:
722          mean, sd = dbc.realDefault(), dbc.realDefault()
723      if clip == 0:
724          return mean, sd
725      else:
726          meanold = mean
727          sdold = sd
728          test = 0
729          it = 0
730          while (test == 0) and it < Niter:
731              minV = mean - clip * sd
732              maxV = mean + clip * sd
733              # Make sure all values are >0 apart from those outside range = 0.
734              # then remove all
737              for ii in range(xx.size):
738                  if xx[ii] >= minV and xx[ii] < maxV:
747              else:
748                  mean, sd = dbc.realDefault(), dbc.realDefault()
749              if sd < 0:
750                  test = 1
751              if mean == meanold and sd == sdold:
752                  test = 1
753              it += 1
754          return mean, sd
755
756  #------------------------------------------------------------------------------
757
758 -def welchStetson(aMag, aSigma, bMag, bSigma):
759      """
760      Calculates the Welch-Stetson statistic (1993, AJ, 105, 1813). The amount of
761      correlation in the variability between two bandpasses.
762
763      @param aMag:   array of ?
764      @type  aMag:   numpy.array
765      @param aSigma: ?
766      @type  aSigma: float
767      @param bMag:   array of ?
768      @type  bMag:   numpy.array
769      @param bSigma: ?
770      @type  bSigma: float
771
772      @return: Welch-Stetson statistic.
773      @rtype:  float
774
775      """
776      if aMag.size != bMag.size:
777          return dbc.realDefault()
778      const = math.sqrt(aMag.size * float(aMag.size - 1.))
779      aWeightMean, _aWeightSigma = weightedMean(aMag, aSigma)
780      bWeightMean, _bWeightSigma = weightedMean(bMag, bSigma)
782      delaArray = numpy.divide(numpy.subtract(aMag, aWeightMean), aSigma)
783      delbArray = numpy.divide(numpy.subtract(bMag, bWeightMean), bSigma)
784      return numpy.nan_to_num(numpy.divide(numpy.sum(numpy.multiply(delaArray, delbArray)), const))
785
786  #------------------------------------------------------------------------------
787  # Change log:
788  #
789  #  2-Nov-2005,  NJC: Commenced file
790  #  2-Nov-2005,  NJC: Adding scaleError function
791  #  3-Nov-2005,  NJC: Finished scaleError function
792  # 24-Nov-2005,  NJC: Edited scaleError for case of no data,
793  #                    imported Utilities and LeastSquares as LSQ
794  # 25-Nov-2005,  NJC: Removed print statements
795  # 25-Nov-2005,  NJC: Minor correction
796  #  5-Mar-2006,  NJC: Revised clippedHistogram to make it faster on large
797  #                    arrays.
800  # 10-Mar-2006,  NJC: Fixed memory leaks, made clippedHistogram and
801  #                    clippedMADHistogram faster and more reliable
802  #  4-Apr-2006,  NJC: Fixed bugs in scaleError and added more option.
803  # 26-May-2006,  NJC: Revised clippedHistogram and clippedMADHistogram to
804  #                    have an optional no of iterations.
805  #  7-Jun-2006,  RSC: Replaced utils.sortList() with fast, new Python 2.4 method
806  # 21-Aug-2006,  NJC: Fixed bug to remove rounding error.
807  # 17-Apr-2007,  NJC: Fixed default values.
808  # 23-Apr-2007,  NJC: Set Niter default values to 10. Set clip in clippedMAD
809  #                    to 1.48times input to make it like sd.
810  # 07-Jun-2007,  NJC: Uses numpy and numpy. Works, but need to update
811  #                    all code to use numpy only eventually. Problem with
812  #                    scientific.
813  # 13-Jun-2007,  NJC: Moved towards numpy only - still need to do a lot of
814  #                    checks though.
815  # 13-Jun-2007,  NJC: Replaced median with numpy.median
816  # 15-Aug-2007,  NJC: Fixed clippedHistogram - it tries to convert
817  #                    a numpy object to a numpy object.
818  # 24-Aug-2007,  NJC: Using robust statistics in scaleError (MAD not SD)
819  # 30-Aug-2007,  NJC: Fully replaced numarray with numpy. Corrected
820  #                    clippedMADHistogram to be fully median absolute deviation.
821  #                    This now has options - mean of the final distribution
822  #                    and a pseudo atandard deviation - 1.48 times mean.
823  #                    Default output is median and pseudo standard deviation.
824  #                    More options to follow.
825  #  6-Sep-2007,  NJC: Added getMinSd function. This calculates the minimum value
826  #                    that standard deviation should have based on the accuracy
827  #                    of the data or the intervals between points. This
828  #                    is used to stop the MAD being evaluated as zero.
829  # 18-Sep-2007,  NJC: Fixed bug in getMinSd. Added centralMoment function and
830  #                    now clippedMADHistogram gives optional output "tot".
831  # 22-Sep-2007,  NJC: Added chiSq function
832  # 05-Oct-2007,  NJC: Fixed bug in getMinSd
833  # 05-Nov-2007,  NJC: Fixed bug in getMinSd
834  # 08-Nov-2007,  NJC: Edited clippedMADHistogram to calculate getMinSd at
835  #                    the start. Moved functions so that they are in
836  #                    alphabetical order. Documented some functions.
837  #                    Added new functions gammln, probGauss,probChisq.
838  # 09-Nov-2007,  NJC: Fixed integer related bug in getMinSd
839  # 15-Nov-2007,  NJC: Converted weightedMean calculation to one which uses
840  #                    numpy. Use two modules, clippedWeightedMean and
841  #                    weighted mean to avoid repeats. Fixed error in weightMean
842  #                    Added in better error handling.
843  # 23-Nov-2007,  NJC: Fixed weightedMean to avoid rounding error.
845  #                    correctly handle zero length arrays.
846  # 14-Feb-2008,  NJC: Added listCounts() function that makes use of fast
847  #                    numpy.bincount() to evaluate the number of times each
848  #                    value occurs in a list. Value can be anything: integer,
849  #                    tuple,list,etc.
850  # 18-Feb-2008,  NJC: Changed Statistics.clippedMADHistogram so that it can
851  #                    take 2-D arrays in some modes. Added calculation of skew.
852  # 19-Feb-2008,  NJC: Edited calcMAD,calcMean,calcSd,calcSkew so that they
853  #                    use a minimum value of the first axis rather than minimum
854  #                    array size. Also set minimum for skew to be >2 rather
855  #                    than >1.
856  # 21-Feb-2008,  NJC: Fixed bug in xx.shape calls for 2-D arrays.
857  #               NJC: Fixed order of med,mad in calcMAD. Improved indexing in
858  #                    probChisq
859  #  6-Jul-2008,  NJC: Made sure that return values works with upper and lower
860  #                    cases. Also returns min and max.
861  # 28-Jul-2008,  NJC: Added in welchStetson statistic function.
862
<!--
expandto(location.href);
// -->

```

 Generated by Epydoc 3.0.1 on Mon Sep 8 15:47:14 2014 http://epydoc.sourceforge.net