import numpy as np
import warnings
[docs]
def medstddev(data, mask=None, medi=False, axis=0):
"""Compute the stddev with respect to the median.
This is rather than the standard method of using the mean.
Parameters
----------
data : ndarray
An array from which to caculate the median standard deviation.
mask : 1D ndarray; optional
Boolean mask indicating the bad values with True.
Same shape as data. Defaults to None.
medi : boolean; optional
If True return a tuple with (stddev, median) of data. Defaults
to False.
axis : int; optional
The axis along wich the median std deviation is calculated.
Defaults to 0.
Returns
-------
float
The stadard deviation.
float; optional
The median; only returned if medi==True.
Examples
--------
.. highlight:: python
.. code-block:: python
>>> import medstdev as m
>>> a = np.array([1,3,4,5,6,7,7])
>>> std, med = m.medstddev(a, medi=True)
>>> print(median(a))
5.0
>>> print(med)
5.0
>>> print(std)
2.2360679775
>>> # use masks
>>> a = np.array([1,3,4,5,6,7,7])
>>> mask = np.array([False,False,False,True,True,True,True])
>>> std, med = m.medstddev(a, mask, medi=True)
>>> print(std)
1.58113883008
>>> print(med)
3.0
>>> # automatically mask invalid values
>>> a = np.array([np.nan, 1, 4, np.inf, 6])
>>> std, med = m.medstddev(a, medi=True)
>>> print(std, med)
(2.5495097567963922, 4.0)
>>> # critical cases:
>>> # only one value, return std = 0.0
>>> a = np.array([1, 4, 6])
>>> mask = np.array([True, True, False])
>>> std, med = m.medstddev(a, mask, medi=True)
>>> print(std, med)
(0.0, 6.0)
>>> # no good values, return std = nan, med = nan
>>> mask[-1] = True
>>> std, med = m.medstddev(a, mask, medi=True)
>>> print(std, med)
(nan, nan)
Notes
-----
MEANSTDDEV calculates the median, subtracts it from each value of
X, then uses this residual to calculate the standard deviation.
The numerically-stable method for calculating the variance from
moment.pro doesn't work for the median standard deviation. It
only works for the mean, because by definition the residuals from
the mean add to zero.
"""
# Default mask: only non-finite values are bad
if mask is None:
mask = ~np.isfinite(data)
# Apply the mask
data = np.ma.masked_where(mask, data)
# number of good values:
ngood = np.sum(~mask, axis=axis)
# calculate median of good values:
median = np.ma.median(data, axis=axis)
# residuals is data - median, masked values don't count:
residuals = data - median
# calculate standar deviation:
std = np.full(median.shape, np.nan) # initialize output
valid = ngood > 1
if np.any(valid):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
with np.errstate(divide='ignore', invalid='ignore'):
std[valid] = np.ma.std(residuals, axis=axis, ddof=1)[valid]
# Convert masked arrays to just arrays
std = np.array(std)
median = np.array(median)
if std.shape == ():
# If just a single value, make sure using a shaped array
std = std.reshape(-1)
median = median.reshape(-1)
# critical case fixes:
if np.any(ngood == 0):
std[ngood == 0] = np.nan
median[ngood == 0] = np.nan
if np.any(ngood == 1):
std[ngood == 1] = 0.
if len(std) == 1:
std = std[0]
median = median[0]
# return statement:
if medi:
return (std, median)
return std