1
2
3 """
4 Statistics package for photometry.
5
6 @author: N.J.G. Cross
7 @org: WFAU, IfA, University of Edinburgh
8
9 @requires: Numpy, math, Scientific
10 """
11
12 import math
13 import numpy
14 from Scientific.Functions import LeastSquares as LSQ
15
16 import wsatools.DbConnect.DbConstants as dbc
17
18
20 """Calculates the biweight - Beers et al. 1990.
21 cc is the tuning constant -- 6.0 (approx 4sigma) is good for the location.
22 """
23 mui = biweightmui(xx, cc)
24 med = numpy.median(xx)
25 test = numpy.multiply(numpy.less(numpy.abs(mui), 1.), 1.)
26 denom = numpy.sum(numpy.multiply(numpy.multiply(numpy.subtract(xx, med), numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 2.)), test))
27 numer = numpy.sum(numpy.multiply(numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 2.), test))
28 if abs(numer) > 0:
29 rat = denom / numer
30 return med + rat
31 else:
32 return dbc.realDefault()
33
34
35
37 """ Calculates the biweight mui - Beers et al. 1990.
38 """
39 med, MD = MAD(xx)
40
41 return numpy.divide(numpy.subtract(xx, med), cc * MD)
42
43
44
46 """Calculates the biweight scale - Beers et al. 1990.
47 cc is the tuning constant -- 9.0 is good for the scale.
48 """
49 mui = biweightmui(xx, cc)
50 med = numpy.median(xx)
51 test = numpy.multiply(numpy.less(numpy.abs(mui), 1.), 1.)
52 denom = numpy.sum(numpy.multiply(numpy.multiply(numpy.power(numpy.subtract(xx, med), 2.), numpy.power(numpy.subtract(1., numpy.power(mui, 2.)), 4.)), test))
53 numer = numpy.sum(numpy.multiply(numpy.multiply(numpy.subtract(1., numpy.power(mui, 2.)), numpy.subtract(1., numpy.multiply(5., numpy.power(mui, 2.)))), test))
54 if abs(numer) > 0.:
55 rat = numpy.sqrt(denom) / abs(numer)
56 return numpy.sqrt(1.0 * numpy(xx).size) * rat
57 else:
58 return dbc.realDefault()
59
60
61
63 """This calculates the mean
64 """
65 if xx.shape[0] > 1:
66 mean = numpy.array(numpy.mean(xx, axis=0))
67 else:
68 mean = dbc.realDefault()
69 return mean
70
71
72
74 """
75 """
76 if xx.shape[0] > 1:
77 med, mad = MAD(xx, minSd)
78 else:
79 med, mad = dbc.realDefault(), dbc.realDefault()
80 return med, mad
81
82
83
85 if xx.shape[0] > 1:
86
87 sd = numpy.multiply(numpy.std(xx, axis=0), numpy.sqrt(numpy.divide(\
88 xx.shape[-1], numpy.subtract(xx.shape[-1], 1.))))
89 else:
90 sd = dbc.realDefault()
91 return sd
92
93
94
96 if xx.shape[0] > 2:
97 denom = numpy.multiply(numpy.multiply(
98 numpy.subtract(xx.shape[-1], 1.),
99 numpy.subtract(xx.shape[-1], 2.)), numpy.power(calcSd(xx), 3.))
100
101 skew = numpy.where(denom > 0,
102 numpy.divide(numpy.multiply(numpy.power(xx.shape[-1], 2.),
103 centralMoment(xx, 3)), denom),
104 dbc.realDefault())
105 else:
106 skew = dbc.realDefault()
107 return skew
108
109
110
112 data = numpy.array(data)
113 if data.shape[0] > 0:
114 return numpy.mean(numpy.power(numpy.subtract(data, numpy.mean(data,
115 axis=0)),
116 float(N)), axis=0)
117 else:
118 return dbc.realDefault()
119
120
121
122 -def chiSq(mag, rms, model, nParam):
123 """This returns the reduced chiSquared and number of degrees of freedom.
124 """
125
126 chisq = numpy.sum(numpy.power(numpy.divide(numpy.subtract(mag, model), rms), 2.))
127 ndof = len(mag) - nParam
128 return chisq / float(ndof), ndof
129
130
131
133 chisq = 0.
134 for point in data:
135 sigma = 1
136 if len(point) == 3:
137 sigma = point[2]
138 f = model(parameters, point[0])
139 chisq = chisq + ((f - point[1]) / sigma) ** 2
140 return chisq
141
142
143
146 """
147 Calculates mean and standard deviation of clipped distribution.
148
149 """
150 xx = numpy.array(xx)
151 if xx.size < 2:
152 return dbc.realDefault(), dbc.realDefault()
153
154 if clip == 0:
155 mean = calcMean(xx)
156 sd = calcSd(xx)
157 else:
158 test = 0
159 if imean == dbc.realDefault() and isd == dbc.realDefault():
160 mean = calcMean(xx)
161 sd = calcSd(xx)
162 else:
163 mean, sd = imean, isd
164
165 it = 0
166 while (test == 0) and it < Niter:
167 meanold, sdold = mean, sd
168 minV = mean - clip * sd
169 maxV = mean + clip * sd
170 xx_mask = maskArray(xx, minV, maxV)
171 mean = calcMean(xx_mask)
172 sd = calcSd(xx_mask)
173 xx_mask = []
174 if mean == meanold and sd == sdold:
175 test = 1
176 it += 1
177 del xx
178 if mean == 0. and sd == 0.:
179 mean, sd = dbc.realDefault(), dbc.realDefault()
180
181 return mean, sd
182
183
184
186 """
187 Calculates mean,median,standard deviation and median absolute deviation of
188 a clipped distribution. The distribution is clipped by a certain number of
189 median absolute deviations.
190
191 @param xx: The input list or array of values.
192 @type xx: list
193 @param clip: The number of 'standard deviations for clipping'
194 @type clip: float
195 @param Niter: The number iterations when clipping
196 @type Niter: int
197 @param imed: starting value for the median BEST NOT TO USE
198 @type imed: float
199 @param imad: starting value for the MAD BEST NOT TO USE
200 @type imad: float
201 @param retValues: string describing which values to return
202 @type retValues: str
203
204 @return: tuple of statistical values described in retValues
205 @rtype: tuple
206
207 """
208 xx = numpy.array(xx)
209 dimen = len(xx.shape)
210 if clip != 0. and dimen > 1:
211 print ("The array dimension is too large for clipping. Either do ",
212 "not clip or use a 1-d array")
213 return -1
214 clip = clip * 1.48
215
216 if not minSd:
217 minSd = getMinSd(xx)
218 if clip == 0.:
219 if xx.size > 0:
220 med, mad = calcMAD(xx, minSd)
221 else:
222 if dimen == 1:
223 med, mad = dbc.realDefault(), dbc.realDefault()
224 else:
225 med = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
226 numpy.float32),
227 dbc.realDefault())
228 mad = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
229 numpy.float32),
230 dbc.realDefault())
231 xx_mask = xx
232 else:
233 test = 0
234
235 if imed == dbc.realDefault() and imad == dbc.realDefault() and xx.size > 0:
236 med, mad = calcMAD(xx, minSd)
237 else:
238 if dimen == 1:
239 med, mad = imed, imad
240 else:
241 med = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
242 numpy.float32),
243 imed)
244 mad = numpy.multiply(numpy.ones, (numpy.shape(xx)[:-1],
245 numpy.float32),
246 imad)
247 it = 0
248 while (test == 0) and it < Niter:
249 medold, madold = med, mad
250 minV = med - (clip * mad)
251 maxV = med + (clip * mad)
252
253
254 xx_mask = maskArray(xx, minV, maxV)
255 if xx_mask.size > 0:
256 med, mad = calcMAD(xx_mask, minSd)
257 else:
258 med, mad = dbc.realDefault(), dbc.realDefault()
259 test = 1
260 if med == medold and mad == madold:
261 test = 1
262
263 it += 1
264
265 med = numpy.where(numpy.logical_and(numpy.equal(med, 0.),
266 numpy.equal(mad, 0.)),
267 dbc.realDefault(), med)
268 mad = numpy.where(numpy.logical_and(numpy.equal(med, 0.),
269 numpy.equal(mad, 0.)),
270 dbc.realDefault(), mad)
271 psd = numpy.where(numpy.greater(mad, dbc.realDefault()),
272 numpy.multiply(mad, 1.48),
273 dbc.realDefault())
274 returnValueList = retValues.split(',')
275 possValueList = ["median", "med", "mean", "mad", "psd", "stdev", "tot", "skew", "min", "max"]
276 valList = []
277 for val in returnValueList:
278 if val.lower() not in possValueList:
279 print ("<Warning>Return value %s not recognised. Will return"
280 " default value" % val)
281 valList.append(dbc.realDefault())
282 elif val.lower() == 'median' or val.lower() == 'med':
283 valList.append(med)
284 elif val.lower() == 'mean':
285 valList.append(calcMean(xx_mask))
286 elif val.lower() == 'mad':
287 valList.append(mad)
288 elif val.lower() == 'psd':
289 valList.append(psd)
290 elif val.lower() == 'tot':
291 if dimen > 1:
292 valList.append(numpy.array(xx_mask.shape[-1]))
293 else:
294 valList.append(xx_mask.size)
295 elif val.lower() == 'stdev':
296 valList.append(calcSd(xx_mask))
297 elif val.lower() == 'skew':
298 valList.append(calcSkew(xx_mask))
299 elif val.lower() == 'max':
300 valList.append(numpy.max(xx_mask))
301 elif val.lower() == 'min':
302 valList.append(numpy.min(xx_mask))
303
304 return tuple(valList)
305
306
307
309 """This calculates the mean value of x - sum(xy)/sum(y)
310 """
311 numer, denom = 0., 0.
312 for i in range(len(x)):
313 numer += float(y[i])
314 denom += float(x[i]) * float(y[i])
315 if numer != 0.:
316 return (denom / numer)
317 else:
318 print "Zero Division Error"
319 return dbc.realDefault()
320
321
322
324 """ Calculates log(Gamma(beta)).
325 Lanczos approximation. Numerical Recipes. |e|<2e-10.
326 This works for any complex argument with a nonnegative real part
327
328 @param betaArray: Array of input values for which the gamma function.
329 @type betaArray: numpy array
330 @return: log(Gamma(beta).
331 @rtype: numpy array
332 """
333 coeffs = [76.18009172947146, -86.5053203941677, 24.01409824083091,
334 - 1.231739572450155, 0.1208650973866179e-2, -0.5395239284953e-5]
335 xx, yy = betaArray, betaArray
336 zz = numpy.add(xx, 5.5)
337 gamma = numpy.subtract(numpy.multiply(numpy.add(xx, 0.5), numpy.log(numpy.where(zz > 0, zz, 1.))), zz)
338 summation = numpy.multiply(1.000000000190015, numpy.ones([betaArray.size], numpy.float64))
339 for coeff in coeffs:
340 yy = numpy.add(yy, 1)
341 summation = numpy.where(numpy.greater(yy, 0.),
342 numpy.add(summation, numpy.divide(coeff, numpy.where(numpy.not_equal(yy, 0.), yy, 1.))), 0.)
343
344 return numpy.where(numpy.logical_and(numpy.greater(xx, 0.), numpy.greater(summation, 0.)),
345 numpy.add(gamma, numpy.log(numpy.divide(numpy.multiply(2.5066282746310005, summation), xx))),
346 0.)
347
348
349
351 """ Calculates minimum standard deviation for a distribution.
352 This is to avoid zero values in the calculation of the median
353 absolute deviation. This looks to see if the data is binned/rounded
354 and if not, it will calculate a minimum based on double precision values
355
356 @param xx: array of input values
357 @type xx: numpy array
358 @return: minimum standard deviation
359 @rtype: float
360 """
361 if xx.size == 0:
362 return -1
363 if xx.size == 1:
364 return 0
365 if numpy.array(xx).dtype == int:
366 sf = 0
367 else:
368 sf = 0
369 test = False
370 distVals = list(numpy.unique(numpy.abs(xx)))
371 absMin = distVals[len(distVals) - 1] / 9.22e18
372 if absMin == 0.:
373 absMin = 1. / 9.22e18
374 posSFvals = []
375 ratios = []
376 while not test:
377 maxerr = numpy.max(numpy.abs(numpy.subtract(xx, numpy.round(xx, sf))))
378 minerr = numpy.min(numpy.abs(numpy.subtract(xx, numpy.round(xx, sf))))
379 if maxerr > 0:
380 rat = 10 ** (-sf) / maxerr
381 else:
382 rat = 10.
383 if ((10 ** (-sf)) < absMin):
384 posSFvals.append((sf, rat))
385 ratios.append(rat)
386 test = True
387 elif (maxerr < 10 ** (-(sf + 1)) and minerr < 1.0E-323):
388 posSFvals.append((sf, rat))
389 ratios.append(rat)
390 sf += 1
391 best = max(ratios)
392 for tsf, rat in posSFvals:
393 if best == rat:
394 sf = tsf
395
396
397
398
399 vals = list(numpy.unique(numpy.round(xx, sf)))
400 valsCopy = vals
401 endTerm = valsCopy.pop(len(vals) - 1)
402 valsCopy.insert(0, endTerm)
403 deltas = numpy.unique(numpy.abs(numpy.subtract(valsCopy, vals)))
404 if deltas[0] > 0.:
405 minVal = deltas[0]
406 elif len(deltas) > 1:
407 minVal = deltas[1]
408 else:
409 minVal = 10 ** (-sf)
410 return minVal
411
412
413
415 """
416 This returns the sorted unique values in a list of objects and the number
417 of times this value is listed. Also selects objects with a minimum count.
418
419 @param inputList: The input list of values (each value can be
420 any python object).
421 @type inputList: list
422 @param minCount: The minimum number of occurances of a value for it to
423 be returned
424 @type minCount: int
425
426 @return: List of unique values and the counts for these values.
427 @rtype: list
428
429 """
430 uniqueValues = sorted(set(inputList))
431 intgArray = numpy.array([uniqueValues.index(item) for item in inputList])
432 counts = numpy.bincount(intgArray)
433 if not minCount:
434 return zip(uniqueValues, counts)
435 else:
436 return [(value, count) for value, count in zip(uniqueValues, counts)
437 if count >= minCount]
438
439
440
441 -def MAD(xx, minSd=None):
442 """Median Absolute Deviation
443 """
444 med = numpy.median(xx, 0)
445 absdev = numpy.abs(numpy.subtract(xx, med))
446 mad = numpy.median(absdev, 0)
447 if not minSd:
448 minSd = getMinSd(xx)
449 mad = numpy.maximum(mad, numpy.multiply(numpy.ones(mad.shape, numpy.float32),
450 (minSd / 1.48)))
451 return med, mad
452
453
454
456 xxN = numpy.subtract(xx, (minV - 1.))
457 xx_mask = numpy.sort(numpy.multiply(xxN, numpy.logical_and(numpy.greater(xx, minV), numpy.less(xx, maxV))))
458 xxN = []
459 nmin = xx.size - int(numpy.sum(numpy.sum(numpy.logical_and(numpy.greater(xx, minV), numpy.less(xx, maxV)).astype(numpy.float32))))
460 nmax = xx.size
461 xx_mask = numpy.add(xx_mask[nmin:nmax], (minV - 1.))
462 return xx_mask
463
464
465
467 """Uses numpy to calculate polynomial model
468 """
469 outArray = numpy.zeros([magArray.size], numpy.float64)
470 for index, par in enumerate(param):
471 outArray = numpy.add(outArray, numpy.multiply(numpy.power(magArray,
472 index), par))
473 return outArray
474
475
476
477 -def probChisq(chiSqArray, ndofArray, nFinalPoints=10000, maxValueY=5.):
478 """Calculates the probability that the REDUCED chi-squared of a model
479 is less than a particular value. This is calculated by integrating the
480 area under the chi-squared curve up to this value. Inputs an array of
481 REDUCED chi-squared values and an equivalent array of numbers of
482 degrees-of-freedom for these values.
483
484 @param chiSqArray: array of reduced chisq values
485 @type chiSqArray: numpy array
486 @param ndofArray: array of number of degree of freedom values
487 @type ndofArray: numpy array
488 @param nFinalPoints: number of points in lookup table for integration
489 @type nFinalPoints: int
490 @param maxValueY: maximum value to integrate to. The p(ChiSq)~1 here.
491 @type maxValueY: float
492 @return: array of probabilities
493 @rtype: numpy array
494 """
495
496
497
498
499
500 nSteps = 2 * (nFinalPoints + 1)
501 uniqueNdof = numpy.unique(ndofArray)
502 dy = maxValueY / nSteps
503 yVector = numpy.multiply(numpy.arange(1, nSteps + 1), dy)
504 yArray = numpy.multiply(numpy.transpose(numpy.multiply(
505 numpy.ones([uniqueNdof.size, nSteps], numpy.float64), yVector)),
506 uniqueNdof)
507 uniqueNdofMsk = numpy.where(numpy.greater(uniqueNdof, 0.), uniqueNdof, 1)
508 unNdofd2 = numpy.multiply(uniqueNdofMsk, 0.5)
509 constantTermArray = numpy.where(numpy.greater(uniqueNdof, 0.),
510 numpy.multiply(numpy.power(2, unNdofd2), numpy.exp(gammln(unNdofd2))),
511 0.)
512 diffArray = numpy.where(numpy.greater(uniqueNdof, 0.),
513 numpy.multiply(numpy.divide(numpy.multiply(numpy.power(
514 yArray, numpy.multiply(numpy.ones([nSteps, unNdofd2.size],
515 numpy.float64), numpy.subtract(unNdofd2, 1.))), numpy.exp(
516 numpy.multiply(yArray, -0.5))), constantTermArray), uniqueNdofMsk), 0.)
517 areaVector = numpy.zeros([uniqueNdof.size], numpy.float64)
518
519 probArray = numpy.zeros([(nFinalPoints), uniqueNdof.size], numpy.float64)
520 pointDiff = 2 * dy
521 finalYvector = numpy.zeros([nFinalPoints], numpy.float64)
522 for point in range(nFinalPoints):
523 simpsonRuleSum = numpy.add(numpy.add(diffArray[2 * point],
524 diffArray[2 * point + 2]), numpy.multiply(diffArray[2 * point + 1], 4.))
525 areaVector = numpy.add(areaVector, numpy.multiply(simpsonRuleSum, (pointDiff / 6.)))
526 finalYvector[point] = yVector[2 * point + 1]
527 probArray[point] = areaVector
528
529
530
531 minYV = min(finalYvector)
532 index = numpy.where(numpy.greater(pointDiff, 0.), numpy.divide(numpy.add(chiSqArray, minYV), pointDiff), 0)
533 index = numpy.where(numpy.less_equal(index, (finalYvector.size - 1)), index, (finalYvector.size - 1))
534 upperIndex = numpy.minimum(numpy.ceil(index).astype(numpy.int32), (finalYvector.size - 1))
535 lowerIndex = numpy.subtract(upperIndex, 1)
536
537 ndofIndexArray = numpy.zeros([ndofArray.size], numpy.int32)
538 for ii, ndof in enumerate(list(uniqueNdof)):
539 if ndof > 0:
540 ndofIndexArray = numpy.add(ndofIndexArray, numpy.multiply(numpy.equal(
541 ndofArray, ndof).astype(numpy.int32), ii))
542 prob = numpy.nan_to_num(numpy.add(numpy.multiply(numpy.subtract(1,
543 numpy.subtract(index, lowerIndex)), probArray[lowerIndex, ndofIndexArray]),
544 numpy.multiply(numpy.subtract(1, numpy.subtract(upperIndex, index)),
545 probArray[upperIndex, ndofIndexArray])))
546 prob = numpy.maximum(numpy.minimum(prob, 1.), 0.)
547 return numpy.where(numpy.greater(ndofArray, 0), prob, dbc.realDefault())
548
549
550 -def probGauss(sigmaArray, nFinalPoints=10000, maxValueY=5.):
551 """Calculates the probability that the abs(sigma) of a standard normal
552 distribution
553 is less than a particular value. This is calculated by integrating the
554 area under the gaussian [N(0,1)] curve up to this value. Inputs an array
555 of standard absolute sigma.
556
557 @param sigmaArray: array of abs(sigma) from a N(0,1) gaussian
558 @type sigmaArray: numpy array
559 @param nFinalPoints: number of points in lookup table for integration
560 @type nFinalPoints: int
561 @param maxValueY: maximum value to integrate to. The p(ChiSq)~1 here.
562 @type maxValueY: float
563 @return: array of probabilities
564 @rtype: numpy array
565 """
566
567
568
569 nSteps = 2 * (nFinalPoints + 1)
570 dy = maxValueY / nSteps
571 yVector = numpy.multiply(numpy.arange(1, nSteps + 1), dy)
572 constantTerm = 2. / math.sqrt(2 * math.pi)
573 diffArray = numpy.multiply(numpy.exp(numpy.multiply(numpy.multiply(yVector, yVector), -0.5)), constantTerm)
574 areaVector = numpy.zeros([1], numpy.float64)
575
576 probArray = numpy.zeros([nFinalPoints], numpy.float64)
577 pointDiff = 2 * dy
578 finalYvector = numpy.zeros([nFinalPoints], numpy.float64)
579 for point in range(nFinalPoints):
580 simpsonRuleSum = numpy.add(numpy.add(diffArray[2 * point], diffArray[2 * point + 2]), numpy.multiply(diffArray[2 * point + 1], 4.))
581 areaVector = numpy.add(areaVector, numpy.multiply(simpsonRuleSum, (pointDiff / 6.)))
582 finalYvector[point] = yVector[2 * point + 1]
583 probArray[point] = areaVector
584
585
586 minYV = min(finalYvector)
587
588 index = numpy.divide(numpy.add(sigmaArray, minYV), pointDiff)
589 upperIndex = numpy.minimum(numpy.ceil(index).astype(numpy.int32), (finalYvector.size - 1))
590 lowerIndex = numpy.subtract(upperIndex, 1)
591 return numpy.add(numpy.multiply(numpy.subtract(1, numpy.subtract(index, lowerIndex)), probArray[lowerIndex]), numpy.multiply(numpy.subtract(1, numpy.subtract(upperIndex, index)), probArray[upperIndex]))
592
593
594
596 """Calculates the Pearson Correlation Coefficient."""
597 X = numpy.array(X)
598 Y = numpy.array(Y)
599 N = float(len(X))
600 sx = numpy.sum(X)
601 sy = numpy.sum(Y)
602 sx2 = numpy.sum(X ** 2)
603 sy2 = numpy.sum(Y ** 2)
604 sxy = numpy.sum(X * Y)
605 top = sxy - (sx * sy / N)
606 bot1 = sx2 - (sx * sx / N)
607 bot2 = sy2 - (sy * sy / N)
608 return top / math.sqrt(bot1 * bot2)
609
610
611
612 -def scaleError(diffXY, dataS, diffXYErr=[1.], rangeMin=None, rangeMax=None):
613 """This function divides the data into several bins and finds
614 whether there is a gradient in the data.
615 """
616 if rangeMin is None:
617 rangeMin = min(dataS)
618
619 if rangeMax is None:
620 rangeMax = max(dataS)
621
622 totalData = len(dataS)
623 if not totalData:
624 return dbc.realDefault(), dbc.realDefault(), dbc.realDefault()
625
626 data = []
627 if len(diffXYErr) == 1 and len(dataS) > 1:
628 for ii in range(totalData):
629 if dataS[ii] >= rangeMin and dataS[ii] <= rangeMax:
630 data.append((dataS[ii], diffXY[ii]))
631 elif len(diffXYErr) == len(dataS):
632 for ii in range(totalData):
633 if dataS[ii] >= rangeMin and dataS[ii] <= rangeMax:
634 data.append((dataS[ii], diffXY[ii], diffXYErr[ii]))
635 data.sort()
636 totalData = len(data)
637 if totalData < 10:
638 return dbc.realDefault(), dbc.realDefault(), dbc.realDefault()
639 elif totalData < 500:
640 noBin = 5
641 noPerBin = int(totalData / 5.)
642 elif totalData < 5000:
643 noBin = 10
644 noPerBin = int(totalData / 10.)
645 else:
646 noBin = int(totalData / 500.)
647 noPerBin = int(totalData / noBin)
648
649 lineData = []
650 if totalData >= 100:
651 for bb in range(noBin):
652 XYdata = []
653 Sdata = []
654 for gal in range(noPerBin):
655 gg = bb * noPerBin + gal
656 Sdata.append(data[gg][0])
657 XYdata.append(data[gg][1])
658 medS, _psdS = clippedMADHistogram(Sdata)
659 medXY, psdXY = clippedMADHistogram(XYdata)
660 lineData.append((medS, medXY, psdXY))
661 else:
662 if len(diffXYErr) == 1:
663 for gal in range(totalData):
664 lineData.append((float(data[gal][0]), float(data[gal][1]), 1.))
665 else:
666 for gal in range(totalData):
667 lineData.append((float(data[gal][0]), float(data[gal][1]),
668 float(data[gal][2])))
669
670 expPar = [0., 0.]
671 bestFit = LSQ.polynomialLeastSquaresFit(expPar, lineData)
672
673
674 expData = numpy.add(numpy.multiply(bestFit[0][1], dataS), bestFit[0][0])
675 diffXYexp = numpy.subtract(diffXY, expData)
676 _finalMed, finalPSTD = clippedMADHistogram(diffXYexp)
677
678 return bestFit[0][0], bestFit[0][1], finalPSTD
679
680
681
683 """ ?
684 """
685 if xx.size != sig.size:
686 print("Inconsistent shapes in numpy arrays for weightedMean. x array is"
687 " different to sigma array")
688
689 if xx.size < 1 or xx.size != sig.size:
690 return dbc.realDefault(), dbc.realDefault()
691
692 weight = numpy.where(numpy.greater(sig, 0.), (1 / sig) ** 2, 0.)
693 sumweight = numpy.sum(weight)
694 if not sumweight:
695 return dbc.realDefault(), dbc.realDefault()
696
697 sumweightdata = numpy.sum(xx * weight)
698 weightmean = sumweightdata / sumweight
699 if xx.size < 2:
700 return weightmean, dbc.realDefault()
701
702 sumweightdatammeansq = numpy.sum((xx - weightmean) ** 2 * weight)
703
704
705 weightsamplevar = sumweight / (sumweight ** 2 - numpy.sum(weight ** 2))
706 weightsamplevar *= sumweightdatammeansq
707 weightsamplesd = (math.sqrt(weightsamplevar) if weightsamplevar > 0 else
708 dbc.realDefault())
709
710 return weightmean, weightsamplesd
711
712
713
715 xx = numpy.array(xx)
716 sig = numpy.array(sig)
717 if xx.size > 1:
718 mean, sd = weightedMean(xx, sig)
719 elif xx.size == 1:
720 mean, sd = xx[0], sig[0]
721 else:
722 mean, sd = dbc.realDefault(), dbc.realDefault()
723 if clip == 0:
724 return mean, sd
725 else:
726 meanold = mean
727 sdold = sd
728 test = 0
729 it = 0
730 while (test == 0) and it < Niter:
731 minV = mean - clip * sd
732 maxV = mean + clip * sd
733
734
735 xx_mask = []
736 sig_mask = []
737 for ii in range(xx.size):
738 if xx[ii] >= minV and xx[ii] < maxV:
739 xx_mask.append(xx[ii])
740 sig_mask.append(sig[ii])
741 xx_mask = numpy.array(xx_mask)
742 sig_mask = numpy.array(sig_mask)
743 if xx_mask.size > 1:
744 mean, sd = weightedMean(xx_mask, sig_mask)
745 elif xx_mask.size == 1:
746 mean, sd = xx_mask[0], sig_mask[0]
747 else:
748 mean, sd = dbc.realDefault(), dbc.realDefault()
749 if sd < 0:
750 test = 1
751 if mean == meanold and sd == sdold:
752 test = 1
753 it += 1
754 return mean, sd
755
756
757
759 """
760 Calculates the Welch-Stetson statistic (1993, AJ, 105, 1813). The amount of
761 correlation in the variability between two bandpasses.
762
763 @param aMag: array of ?
764 @type aMag: numpy.array
765 @param aSigma: ?
766 @type aSigma: float
767 @param bMag: array of ?
768 @type bMag: numpy.array
769 @param bSigma: ?
770 @type bSigma: float
771
772 @return: Welch-Stetson statistic.
773 @rtype: float
774
775 """
776 if aMag.size != bMag.size:
777 return dbc.realDefault()
778 const = math.sqrt(aMag.size * float(aMag.size - 1.))
779 aWeightMean, _aWeightSigma = weightedMean(aMag, aSigma)
780 bWeightMean, _bWeightSigma = weightedMean(bMag, bSigma)
781
782 delaArray = numpy.divide(numpy.subtract(aMag, aWeightMean), aSigma)
783 delbArray = numpy.divide(numpy.subtract(bMag, bWeightMean), bSigma)
784 return numpy.nan_to_num(numpy.divide(numpy.sum(numpy.multiply(delaArray, delbArray)), const))
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862