Source code for eureka.S3_data_reduction.sigrej

import numpy as np
from ..lib import medstddev as msd


[docs]def sigrej(data, sigma, mask=None, estsig=None, ival=False, axis=0,
           fmean=False, fstddev=False, fmedian=False, fmedstddev=False):
    '''This function flags outlying points in a data set using sigma rejection.

    Parameters
    ----------
    data : ndarray
        Array of points to apply sigma rejection to.
    sigma : ndarray (1D)
        1D array of sigma values for each iteration of sigma rejection.
        Number of elements determines number of iterations.
    mask : byte array; optional
        Same shape as Data, where 1 indicates the corresponding element in
        Data is good and 0 indicates it is bad.  Only
        rejection of good-flagged data will be further
        considered.  This input mask is NOT modified in the caller.
    estsig : ndarray; optional
        [nsig] array of estimated standard deviations to
        use instead of calculated ones in each iteration.
        This is useful in the case of small datasets with
        outliers, in which case the calculated standard
        deviation can be large if there is an outlier and
        small if there is not, leading to rejection of
        good elements in a clean dataset and acceptance of
        all elements in a dataset with one bad element.
        Set any element of estsig to a negative value to
        use the calculated standard deviation for that iteration.
    ival : ndarray (2D); optional
        (returned) 2D array giving the median and standard deviation (with
        respect to the median) at each iteration.
    axis : int; optional
        The axis along which to compute the mean/median.
    fmean : ndarray; optional
        (returned) the mean of the accepted data.
    fstddev : ndarray; optional
        (returned) the standard deviation of the accepted data with respect
        to the mean.
    fmedian : ndarray; optional
       (returned) the median of the accepted data.
    fmedstddev : ndarray; optional
       (returned) the standard deviation of the accepted data with respect
       to the median.

    Returns
    -------
    ret : tuple
        This function returns a mask of accepted values in the data.  The
        mask is a byte array of the same shape as Data.  In the mask, 1
        indicates good data, 0 indicates an outlier in the corresponding
        location of Data. fmean, fstddev, fmedian, and fmedstddev will also
        be updated and returned if they were passed in. All of these will be
        packaged together into a tuple.

    Notes
    -----
    SIGREJ flags as outliers points a distance of sigma* the standard
    deviation from the median.  Unless given as a positive value in
    ESTSIG, standard deviation is calculated with respect to the
    median, using MEDSTDDEV. For each successive iteration and value of
    sigma SIGREJ recalculates the median and standard deviation from
    the set of 'good' (not masked) points, and uses these new values in
    calculating further outliers. The final mask contains a value of 1
    for every 'inlier' and 0 for every outlying data point.

    History:

    - 2005-01-18 statia Statia Luszcz, Cornell. (shl35@cornell.edu)
        Initial version
    - 2005-01-19 statia
        Changed function to return mask, rather than a
        list of outlying and inlying points, added final statistics keywords
    - 2005-01-20 jh Joe Harrington, Cornell, (jh@oobleck.astro.cornell.edu)
        Header update.  Added example.
    - 2005-05-26 jh
        Fixed header typo.
    - 2006-01-10 jh
        Moved definition, added test to see if all
        elements rejected before last iteration (e.g.,
        dataset is all NaN).  Added input mask, estsig.
    - 2010-11-01 patricio (pcubillos@fulbrightmail.org)
        Converted to python.

    Examples
    --------
    Define the N-element vector of sample data.

    >>> print(mean(x), stddev(x), median(x), medstddev(x))
    1438.47      5311.67      67.0000      5498.10
    >>> sr.sigrej(x, [9,3]), ival=ival, fmean=fmean, fmedian=fmedian)

    >>> x = np.array([65., 667, 84, 968, 62, 70, 66, 78, 47, 71, 56, 65, 60])
    >>> q,w,e,r,t,y = sr.sigrej(x, [2,1], ival=True, fmean=True,
    >>>                         fstddev=True, fmedian=True, fmedstddev=True)

    >>> print(q)
    [ True False  True False  True  True  True  True  True  True  True  True
    True]
    >>> print(w)
    [[  66.           65.5       ]
    [ 313.02675604  181.61572819]]
    >>> print(e)
    65.8181818182
    >>> print(r)
    10.1174916043
    >>> print(t)
    65.0
    >>> print(y)
    10.1538170163
    >>> print(fmean, fmedian)
    67.0000      67.0000
    '''
    # Get sizes
    dims = list(np.shape(data))
    nsig = np.size(sigma)
    if nsig == 0:
        nsig = 1
        sigma = [sigma]

    if mask is None:
        mask = np.ones(dims, bool)

    # defining estsig makes the logic below easier
    # if estsig is None:
    #     estsig = - np.ones(nsig)

    # Return parameters:
    retival = ival
    retfmean = fmean
    retfstddev = fstddev
    retfmedian = fmedian
    retfmedstddev = fmedstddev

    # Remove axis
    del(dims[axis])
    ival = np.empty((2, nsig) + tuple(dims))
    ival[:] = np.nan

    # Iterations
    for iter in np.arange(nsig):
        if estsig is None:
            # note: ival is slicing
            ival[1, iter], ival[0, iter] = msd.medstddev(data, mask, axis=axis,
                                                         medi=True)
        # if estsig[iter] > 0:   # if we dont have an estimated std dev.
        else:
            # Calculations
            for j in np.arange(dims[0]):
                for i in np.arange(dims[1]):
                    ival[0, iter, j, i] = \
                        np.median(data[:, j, i][np.where(mask[:, j, i])])
            # note: ival is slicing
            ival[1, iter] = estsig[iter]

            # Fixes
            count = np.sum(mask, axis=axis)
            # note: ival is slicing
            (ival[1, iter])[np.where(count == 0)] = np.nan

        # Update mask
        # note: ival is slicing
        mask *= ((data >= (ival[0, iter] - sigma[iter] * ival[1, iter])) &
                 (data <= (ival[0, iter] + sigma[iter] * ival[1, iter])))

    # the return arrays
    ret = (mask,)
    if retival:
        ret = ret + (ival, )

    # final calculations
    if retfmean or retfstddev:
        count = np.sum(mask, axis=axis)
        fmean = np.nansum(data*mask, axis=axis)

        # calculate only where there are good pixels
        goodvals = np.isfinite(fmean) * (count > 0)
        if np.ndim(fmean) == 0 and goodvals:
            fmean /= count
        else:
            fmean[np.where(goodvals)] /= count[np.where(goodvals)]

        if retfstddev:
            resid = (data-fmean)*mask
            fstddev = np.sqrt(np.sum(resid**2, axis=axis)/(count-1))
            if np.ndim(fstddev) == 0:
                if count == 1:
                    fstddev = 0.0
            else:
                fstddev[np.where(count == 1)] = 0.0

    if retfmedian or retfmedstddev:
        fmedstddev, fmedian = msd.medstddev(data, mask, axis=axis, medi=True)

    # the returned final arrays
    if retfmean:
        ret = ret + (fmean,)
    if retfstddev:
        ret = ret + (fstddev,)
    if retfmedian:
        ret = ret + (fmedian,)
    if retfmedstddev:
        ret = ret + (fmedstddev,)

    if len(ret) == 1:
        return ret[0]
    return ret