Source code for eureka.S5_lightcurve_fitting.fitters

import numpy as np
import pandas as pd
import copy
from io import StringIO
import os
import sys
import h5py
import xarray as xr
from astraeus import xarrayIO as xrio
import time as time_pkg

from scipy.optimize import minimize
import lmfit
import emcee

from dynesty import NestedSampler
from dynesty.utils import resample_equal

from .likelihood import (computeRedChiSq, lnprob, ln_like, ptform,
                         update_uncertainty)
from . import plots_s5 as plots
from ..lib import astropytable
from ..lib.split_channels import get_trim

from multiprocessing import Pool

from astropy import table



[docs]
def lsqfitter(lc, model, meta, log, calling_function='lsq', **kwargs):
    """Perform least-squares fit.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit.
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    calling_function : str
        The fitter that is being run (e.g. may be 'emcee' if running lsqfitter
        to initialize emcee walkers). Defailts to 'lsq'.
    **kwargs : dict
        Arbitrary keyword arguments.

    Returns
    -------
    best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model after fitting

    Notes
    -----
    History:

    - December 29-30, 2021 Taylor Bell
        Updated documentation and arguments. Reduced repeated code.
        Also saving covariance matrix for later estimation of sampler
        step size.
    - January 7-22, 2022 Megan Mansfield
        Adding ability to do a single shared fit across all channels
    - February 28-March 1, 2022 Caroline Piaulet
        Adding scatter_ppm parameter
    - Mar 13-Apr 18, 2022 Caroline Piaulet
        Record an astropy table for param values
    """
    # Group the different variable types
    freenames, freepars, prior1, prior2, priortype, indep_vars = \
        group_variables(model)
    if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
        freepars = load_old_fitparams(meta, log, lc.channel, freenames)

    start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype,
                          freenames)
    log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))

    # Plot starting point
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta,
                       fitter=calling_function+'StartingPoint')
        # Plot GP starting point
        if model.GP:
            plots.plot_GP_components(lc, model, meta,
                                     fitter=calling_function+'StartingPoint')

    def neg_lnprob(theta, lc, model, prior1, prior2, priortype, freenames):
        return -lnprob(theta, lc, model, prior1, prior2, priortype, freenames)
    global lsq_t0
    lsq_t0 = time_pkg.time()

    def callback_full(theta, lc, model, prior1, prior2, priortype, freenames):
        global lsq_t0
        if (time_pkg.time()-lsq_t0) > 0.5:
            lsq_t0 = time_pkg.time()
            print('Current lnprob = ', lnprob(theta, lc, model, prior1, prior2,
                                              priortype, freenames), end='\r')

    def callback(theta):
        return callback_full(theta, lc, model, prior1, prior2, priortype,
                             freenames)

    if not hasattr(meta, 'lsq_method'):
        log.writelog('No lsq optimization method specified - using Nelder-Mead'
                     ' by default.')
        meta.lsq_method = 'Nelder-Mead'
    if not hasattr(meta, 'lsq_tol'):
        log.writelog('No lsq tolerance specified - using 1e-6 by default.')
        meta.lsq_tol = 1e-6
    if not hasattr(meta, 'lsq_maxiter'):
        meta.lsq_maxiter = None
    results = minimize(neg_lnprob, freepars,
                       args=(lc, model, prior1, prior2, priortype, freenames),
                       method=meta.lsq_method, tol=meta.lsq_tol,
                       options={'maxiter': meta.lsq_maxiter},
                       callback=callback)

    log.writelog("\nVerbose lsq results: {}\n".format(results),
                 mute=(not meta.verbose))
    if not meta.verbose:
        log.writelog("Success?: {}".format(results.success))
        log.writelog(results.message)

    # Get the best fit params
    fit_params = results.x

    # Create table of results
    t_results = table.Table([freenames, fit_params],
                            names=("Parameter", "Mean"))

    model.update(fit_params)
    lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames)

    # Save the fit ASAP
    save_fit(meta, lc, model, calling_function, t_results, freenames)

    end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
                        freenames)
    log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))

    # Compute reduced chi-squared
    chi2red = computeRedChiSq(lc, log, model, meta, freenames)

    log.writelog('\nLSQ RESULTS:')
    for i in range(len(freenames)):
        if 'scatter_mult' in freenames[i]:
            chan = freenames[i].split('_')[-1]
            if chan.isnumeric():
                chan = int(chan)
            else:
                chan = 0
            trim1, trim2 = get_trim(meta.nints, chan)
            unc = np.ma.median(lc.unc[trim1:trim2])
            scatter_ppm = 1e6*fit_params[i]*unc
            log.writelog(f'{freenames[i]}: {fit_params[i]}; {scatter_ppm} ppm')
        else:
            log.writelog(f'{freenames[i]}: {fit_params[i]}')
    log.writelog('')

    # Plot fit
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta, fitter=calling_function)

    # Plot GP fit + components
    if model.GP and meta.isplots_S5 >= 1:
        plots.plot_GP_components(lc, model, meta, fitter=calling_function)

    # Zoom in on phase variations
    if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs:
        plots.plot_phase_variations(lc, model, meta, fitter=calling_function)

    # Plot Allan plot
    if meta.isplots_S5 >= 3 and calling_function == 'lsq':
        # This plot is only really useful if you're actually using the
        # lsq fitter, otherwise don't make it
        plots.plot_rms(lc, model, meta, fitter=calling_function)

    # Plot residuals distribution
    if meta.isplots_S5 >= 3 and calling_function == 'lsq':
        plots.plot_res_distr(lc, model, meta, fitter=calling_function)

    # Make a new model instance
    best_model = copy.deepcopy(model)
    best_model.components[0].update(fit_params)

    # Save the covariance matrix in case it's needed to estimate step size
    # for a sampler
    # FINDME:
    # Commented out for now because op.least_squares() doesn't provide
    # covariance matrix
    # Need to compute using Jacobian matrix instead (hess_inv = (J.T J)^{-1})
    # model_lc = model.eval()
    # if results[1] is not None:
    #     residuals = (lc.flux - model_lc)
    #     cov_mat = results[1]*np.var(residuals)
    # else:
    #     # Sometimes lsq will fail to converge and will return a None
    #     # covariance matrix
    #     cov_mat = None
    cov_mat = None
    best_model.__setattr__('cov_mat', cov_mat)
    best_model.__setattr__('chi2red', chi2red)
    best_model.__setattr__('fit_params', fit_params)

    return best_model




[docs]
def demcfitter(lc, model, meta, log, **kwargs):
    """Perform sampling using Differential Evolution Markov Chain.

    This is an empty placeholder function to be filled later.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit.
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    **kwargs : dict
        Arbitrary keyword arguments.

    Returns
    -------
    best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model after fitting

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Updated documentation and arguments
    """
    best_model = None
    return best_model




[docs]
def emceefitter(lc, model, meta, log, **kwargs):
    """Perform sampling using emcee.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit
    meta : eureka.lib.readECF.MetaClass
        The metadata object
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    **kwargs : dict
        Arbitrary keyword arguments.

    Returns
    -------
    best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model after fitting

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Updated documentation. Reduced repeated code.
    - January 7-22, 2022 Megan Mansfield
        Adding ability to do a single shared fit across all channels
    - February 23-25, 2022 Megan Mansfield
        Added log-uniform and Gaussian priors.
    - February 28-March 1, 2022 Caroline Piaulet
        Adding scatter_ppm parameter. Added statements to avoid some initial
        state issues.
    - Mar 13-Apr 18, 2022 Caroline Piaulet
        Record an astropy table for mean, median, percentiles,
        +/- 1 sigma, all params
    """
    # Group the different variable types
    freenames, freepars, prior1, prior2, priortype, indep_vars = \
        group_variables(model)
    if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
        freepars = load_old_fitparams(meta, log, lc.channel, freenames)
    ndim = len(freenames)

    if hasattr(meta, 'old_chain') and meta.old_chain is not None:
        pos, nwalkers = start_from_oldchain_emcee(lc, meta, log, ndim,
                                                  freenames)
    else:
        if not hasattr(meta, 'lsq_first') or meta.lsq_first:
            # Only call lsq fitter first if asked or lsq_first option wasn't
            # passed (allowing backwards compatibility)
            log.writelog('\nCalling lsqfitter first...')
            # RUN LEAST SQUARES
            lsq_sol = lsqfitter(lc, model, meta, log,
                                calling_function='emcee_lsq', **kwargs)

            freepars = lsq_sol.fit_params

            # SCALE UNCERTAINTIES WITH REDUCED CHI2
            if meta.rescale_err:
                lc.unc *= np.sqrt(lsq_sol.chi2red)
        else:
            lsq_sol = None
        pos, nwalkers = initialize_emcee_walkers(meta, log, ndim, lsq_sol,
                                                 freepars, prior1, prior2,
                                                 priortype)

    start_lnprob = lnprob(np.median(pos, axis=0), lc, model, prior1, prior2,
                          priortype, freenames)
    log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))

    # Plot starting point
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta,
                       fitter='emceeStartingPoint')
        # Plot GP starting point
        if model.GP:
            plots.plot_GP_components(lc, model, meta,
                                     fitter='emceeStartingPoint')

    # Initialize tread pool
    if hasattr(meta, 'ncpu') and meta.ncpu > 1:
        pool = Pool(meta.ncpu)
    else:
        meta.ncpu = 1
        pool = None

    # Run emcee burn-in
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                    args=(lc, model, prior1, prior2,
                                          priortype, freenames),
                                    pool=pool)
    log.writelog('Running emcee burn-in...')
    sampler.run_mcmc(pos, meta.run_nsteps, progress=True)
    # state = sampler.run_mcmc(pos, meta.run_nsteps, progress=True)
    # # Log some details about the burn-in phase
    # acceptance_fraction = np.mean(sampler.acceptance_fraction)
    # log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}",
    #              mute=(not meta.verbose))
    # try:
    # autocorr = sampler.get_autocorr_time()
    #     log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps",
    #                  mute=(not meta.verbose))
    # except:
    #     log.writelog("Error: Unable to estimate the autocorrelation time!",
    #                  mute=(not meta.verbose))
    # mid_lnprob = lnprob(np.median(sampler.get_chain()[-1], axis=0), lc,
    #                     model, prior1, prior2, priortype, freenames)
    # log.writelog(f'Intermediate lnprob: {mid_lnprob}',
    #              mute=(not meta.verbose))
    # if meta.isplots_S5 >= 3:
    #     plots.plot_chain(sampler.get_chain(), lc, meta, freenames,
    #                      fitter='emcee', burnin=True)
    # # Reset the sampler and do the production run
    # log.writelog('Running emcee production run...')
    # sampler.reset()
    # sampler.run_mcmc(state, meta.run_nsteps-meta.run_nburn, progress=True)
    # samples = sampler.get_chain(flat=True)

    samples = sampler.get_chain(flat=True, discard=meta.run_nburn)
    if meta.ncpu > 1:
        # Close the thread pool
        pool.close()
        pool.join()

    # Record median + percentiles
    q = np.percentile(samples, [16, 50, 84], axis=0)
    fit_params = q[1]  # median
    mean_params = np.mean(samples, axis=0)
    errs = np.std(samples, axis=0)

    # Create table of results
    t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1],
                             q[0], fit_params, q[2]],
                            names=("Parameter", "Mean", "-1sigma", "+1sigma",
                                   "16th", "50th", "84th"))

    upper_errs = q[2]-q[1]
    lower_errs = q[1]-q[0]

    model.update(fit_params)
    model.errs = dict(zip(freenames, errs))
    lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames)

    # Save the fit ASAP so plotting errors don't make you lose everything
    save_fit(meta, lc, model, 'emcee', t_results, freenames, samples)

    end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
                        freenames)
    log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))
    acceptance_fraction = np.mean(sampler.acceptance_fraction)
    log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}",
                 mute=(not meta.verbose))
    try:
        autocorr = np.mean(sampler.get_autocorr_time())
        log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps",
                     mute=(not meta.verbose))
    except:
        # FINDME: Need to only catch the expected exception
        log.writelog("WARNING: Unable to estimate the autocorrelation time!",
                     mute=(not meta.verbose))

    # Compute reduced chi-squared
    chi2red = computeRedChiSq(lc, log, model, meta, freenames)

    log.writelog('\nEMCEE RESULTS:')
    for i in range(ndim):
        if 'scatter_mult' in freenames[i]:
            chan = freenames[i].split('_')[-1]
            if chan.isnumeric():
                chan = int(chan)
            else:
                chan = 0
            trim1, trim2 = get_trim(meta.nints, chan)
            unc = np.ma.median(lc.unc[trim1:trim2])
            scatter_ppm = 1e6*fit_params[i]*unc
            scatter_ppm_upper = 1e6*upper_errs[i]*unc
            scatter_ppm_lower = 1e6*lower_errs[i]*unc
            log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
                         f' -{lower_errs[i]}); {scatter_ppm} '
                         f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm')
        else:
            log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
                         f' -{lower_errs[i]})')
    log.writelog('')

    # Plot fit
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta, fitter='emcee')

    # Plot GP fit + components
    if model.GP and meta.isplots_S5 >= 1:
        plots.plot_GP_components(lc, model, meta, fitter='emcee')

    # Zoom in on phase variations
    if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs:
        plots.plot_phase_variations(lc, model, meta, fitter='emcee')

    # Plot Allan plot
    if meta.isplots_S5 >= 3:
        plots.plot_rms(lc, model, meta, fitter='emcee')

    # Plot residuals distribution
    if meta.isplots_S5 >= 3:
        plots.plot_res_distr(lc, model, meta, fitter='emcee')

    # Plot chain evolution
    if meta.isplots_S5 >= 3:
        plots.plot_chain(sampler.get_chain(), lc, meta, freenames,
                         fitter='emcee', burnin=True, nburn=meta.run_nburn)
        plots.plot_chain(sampler.get_chain(discard=meta.run_nburn), lc, meta,
                         freenames, fitter='emcee', burnin=False)

    if meta.isplots_S5 >= 5:
        plots.plot_corner(samples, lc, meta, freenames, fitter='emcee')

    # Make a new model instance
    best_model = copy.deepcopy(model)
    best_model.components[0].update(fit_params)
    best_model.__setattr__('chi2red', chi2red)
    best_model.__setattr__('fit_params', fit_params)

    return best_model




[docs]
def start_from_oldchain_emcee(lc, meta, log, ndim, freenames):
    """Restart emcee using the ending point of an old chain.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    meta : eureka.lib.readECF.MetaClass
        The meta data object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    ndim : int
        The number of fitted parameters.
    freenames : list
        The names of the fitted parameters.

    Returns
    -------
    pos : ndarray
        The starting positions for all walkers.
    nwalkers : int
        The number of walkers (may differ from the requested number
        if unable to get all walkers within the priors).

    Raises
    ------
    AssertionError
        The old chain is not compatible with the current fit.
    AssertionError
        Unable to get enough walkers within the prior range.
    """
    if meta.sharedp:
        channel_key = 'shared'
    else:
        ch_number = str(lc.channel).zfill(len(str(lc.nchannel)))
        channel_key = f'ch{ch_number}'

    foldername = os.path.join(meta.topdir, *meta.old_chain.split(os.sep))
    fname = f'S5_emcee_fitparams_{channel_key}.csv'
    fitted_values = pd.read_csv(os.path.join(foldername, fname),
                                escapechar='#', skipinitialspace=True)
    full_keys = np.array(fitted_values['Parameter'])

    if np.all(full_keys != freenames):
        message = ('Old chain does not have the same fitted parameters and '
                   'cannot be used to initialize the new fit.\n'
                   'The old chain included:\n['+','.join(full_keys)+']\n'
                   'The new chain included:\n['+','.join(freenames)+']')
        log.writelog(message, mute=True)
        raise AssertionError(message)

    fname = f'S5_emcee_samples_{channel_key}'
    # Load HDF5 files
    full_fname = os.path.join(foldername, fname)+'.h5'
    ds = xrio.readXR(full_fname, verbose=False)
    if ds is None:
        # Working with an old save file
        with h5py.File(full_fname, 'r') as hf:
            samples = hf['samples'][:]
    else:
        samples = ds.to_array().T.values
    log.writelog(f'Old chain path: {full_fname}')

    # Initialize the walkers using samples from the old chain
    nwalkers = meta.run_nwalkers
    pos = samples[-nwalkers:]
    walkers_used = nwalkers

    # Make sure that no walkers are starting in the same place as
    # they would then exactly follow each other
    repeat_pos = np.where([np.any(np.all(pos[i] == np.delete(pos, i, axis=0),
                                         axis=1))
                           for i in range(pos.shape[0])])[0]
    while (len(repeat_pos) > 0 and
           samples.shape[0] > (walkers_used+len(repeat_pos))):
        pos[repeat_pos] = samples[:-walkers_used][-len(repeat_pos):]
        walkers_used += len(repeat_pos)
        repeat_pos = np.where([np.any(np.all(pos[i] ==
                                             np.delete(pos, i, axis=0),
                                             axis=1))
                               for i in range(pos.shape[0])])[0]

    # If unable to initialize all walkers in unique starting locations,
    # use fewer walkers unless there'd be fewer walkers than dimensions
    if len(repeat_pos) > 0 and (nwalkers-len(repeat_pos) > ndim):
        pos = np.delete(pos, repeat_pos, axis=0)
        nwalkers = pos.shape[0]
        log.writelog(f'Warning: Unable to initialize all walkers at different '
                     f'positions using old chain!\nUsing {nwalkers} walkers '
                     f'instead of the initially requested {meta.run_nwalkers} '
                     f'walkers')
    elif len(repeat_pos) > 0:
        message = (f'Error: Unable to initialize all walkers at different '
                   f'positions using old chain!\nUsing {nwalkers} walkers '
                   f'instead of the initially requested {meta.run_nwalkers} '
                   f'walkers is not permitted as there are {ndim} fitted '
                   f'parameters')
        log.writelog(message, mute=True)
        raise AssertionError(message)

    return pos, nwalkers




[docs]
def initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1,
                             prior2, priortype):
    """Initialize emcee walker starting positions

    Parameters
    ----------
    meta : eureka.lib.readECF.MetaClass
        The meta data object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    ndim : int
        The number of fitted parameters.
    lsq_sol : The results from the lsqfitter.
        The results from the lsqfitter.
    freepars : list
        The names of the fitted parameters.
    prior1 : list
        The list of prior1 values.
    prior2 : list
        The list of prior2 values.
    priortype : list
        The types of each prior (to determine meaning of prior1 and prior2).

    Returns
    -------
    pos : ndarray
        The starting position of all walkers.
    nwalkers : int
        The number of walkers (may differ from the requested number
        if unable to get all walkers within the priors).

    Raises
    ------
    AssertionError
        Failed to initialize any walkers within the priors
    AssertionError
        Failed to initialize enough walkers within the priors
    """
    u = np.where(priortype == 'U')[0]
    lu = np.where(priortype == 'LU')[0]
    n = np.where(priortype == 'N')[0]
    if lsq_sol is not None and lsq_sol.cov_mat is not None:
        step_size = np.diag(lsq_sol.cov_mat)
        if np.any(step_size == 0.):
            ind_zero_u = np.where(step_size[u] == 0.)[0]
            ind_zero_lu = np.where(step_size[lu] == 0.)[0]
            ind_zero_n = np.where(step_size[n] == 0.)[0]
            step_size[u][ind_zero_u] = (0.001*(prior2[u][ind_zero_u] -
                                               prior1[u][ind_zero_u]))
            step_size[lu][ind_zero_lu] = (0.001 *
                                          (np.exp(prior2[lu][ind_zero_lu]) -
                                           np.exp(prior1[lu][ind_zero_lu])))
            step_size[n][ind_zero_n] = 0.1*prior2[n][ind_zero_n]
    else:
        # Sometimes the lsq fitter won't converge and will give None as
        # the covariance matrix. In that case, we need to establish the
        # step size in another way. Using a fractional step compared to
        # the prior range can work best for precisely known values like
        # t0 and period
        log.writelog('No covariance matrix from LSQ - falling back on a step '
                     'size based on the prior range')
        step_size = np.ones(ndim)
        step_size[u] = 0.001*(prior2[u] - prior1[u])
        step_size[lu] = 0.001*(np.exp(prior2[lu]) - np.exp(prior1[lu]))
        step_size[n] = 0.1*prior2[n]
    nwalkers = meta.run_nwalkers

    # make it robust to lsq hitting the upper or lower bound of the param space
    ind_max = np.where(freepars[u] - prior2[u] == 0.)[0]
    ind_min = np.where(freepars[u] - prior1[u] == 0.)[0]
    ind_max_LU = np.where(np.log(freepars[lu]) - prior2[lu] == 0.)[0]
    ind_min_LU = np.where(np.log(freepars[lu]) - prior1[lu] == 0.)[0]
    pmid = (prior2+prior1)/2.

    if len(ind_max) > 0 or len(ind_max_LU) > 0:
        log.writelog('Warning: >=1 params hit the upper bound in the lsq fit. '
                     'Setting to the middle of the interval.')
        freepars[u][ind_max] = pmid[u][ind_max]
        freepars[lu][ind_max_LU] = (np.exp(prior2[lu][ind_max_LU]) +
                                    np.exp(prior1[lu][ind_max_LU]))/2.
    if len(ind_min) > 0 or len(ind_min_LU) > 0:
        log.writelog('Warning: >=1 params hit the lower bound in the lsq fit. '
                     'Setting to the middle of the interval.')
        freepars[u][ind_min] = pmid[u][ind_min]
        freepars[lu][ind_min_LU] = (np.exp(prior2[lu][ind_min_LU]) +
                                    np.exp(prior1[lu][ind_min_LU]))/2.

    # Generate the walker positions
    pos = np.array([freepars + step_size*np.random.randn(ndim)
                    for i in range(nwalkers)])

    # Make sure the walker positions obey the priors
    in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all())
                         for ii in pos[:, u]])
    in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and
                           (np.log(ii) <= prior2[lu]).all())
                          for ii in pos[:, lu]])
    if not np.all(in_range) or not np.all(in_range2):
        log.writelog('Not all walkers were initialized within the priors, '
                     'using a smaller proposal distribution')
        pos = pos[in_range]
        # Make sure the step size is well within the limits
        uniform_step = np.abs(np.append((prior2-freepars).reshape(-1, 1)/10,
                                        (freepars-prior1).reshape(-1, 1)/10,
                                        axis=1))
        step_size_options = np.append(step_size.reshape(-1, 1), uniform_step,
                                      axis=1)
        if len(lu) != 0:
            step_size_options[lu, 1] = np.abs((np.exp(prior2[lu]) -
                                               freepars[lu]).reshape(-1, 1)/10)
            step_size_options[lu, 2] = np.abs((np.exp(prior1[lu]) -
                                               freepars[lu]).reshape(-1, 1)/10)
        if len(n) != 0:
            step_size_options[n, 1:] = step_size_options[n, 0].reshape(-1, 1)
        step_size = np.min(step_size_options, axis=1)
        if pos.shape[0] == 0:
            remove_zeroth = True
            new_nwalkers = nwalkers-len(pos)
            pos = np.zeros((1, ndim))
        else:
            remove_zeroth = False
            new_nwalkers = nwalkers-len(pos)
        pos = np.append(pos, np.array([freepars +
                                       step_size*np.random.randn(ndim)
                                       for i in range(new_nwalkers)
                                       ]).reshape(-1, ndim), axis=0)
        if remove_zeroth:
            pos = pos[1:]
        in_range = np.array([((prior1[u] <= ii).all() and
                              (ii <= prior2[u]).all())
                             for ii in pos[:, u]])
        in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and
                               (np.log(ii) <= prior2[lu]).all())
                              for ii in pos[:, lu]])
    if not np.any(in_range) and not np.any(in_range2):
        raise AssertionError('Failed to initialize any walkers within the set '
                             'bounds for all parameters!\n'
                             'Check your stating position, decrease your step '
                             'size, or increase the bounds on your parameters')
    elif not np.all(in_range) or not np.all(in_range2):
        old_nwalkers = nwalkers
        pos = pos[in_range+in_range2]
        nwalkers = pos.shape[0]
        if nwalkers > ndim:
            log.writelog(f'Warning: Failed to initialize all walkers within '
                         f'the set bounds for all parameters!\nUsing '
                         f'{nwalkers} walkers instead of the initially '
                         f'requested {old_nwalkers} walkers')
        else:
            message = (f'Error: Failed to initialize all walkers within the '
                       f'set bounds for all parameters!\nUsing '
                       f'{nwalkers} walkers instead of the initially requested'
                       f' {old_nwalkers} walkers is not permitted as there are'
                       f' {ndim} fitted parameters')
            log.writelog(message, mute=True)
            raise AssertionError(message)
    return pos, nwalkers




[docs]
def dynestyfitter(lc, model, meta, log, **kwargs):
    """Perform sampling using dynesty.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit.
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    **kwargs : dict
        Arbitrary keyword arguments.

    Returns
    -------
    best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model after fitting

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Updated documentation. Reduced repeated code.
    - January 7-22, 2022 Megan Mansfield
        Adding ability to do a single shared fit across all channels
    - February 23-25, 2022 Megan Mansfield
        Added log-uniform and Gaussian priors.
    - February 28-March 1, 2022 Caroline Piaulet
        Adding scatter_ppm parameter.
    - Mar 13-Apr 18, 2022 Caroline Piaulet
        Record an astropy table for mean, median, percentiles,
        +/- 1 sigma, all params
    """
    # Group the different variable types
    freenames, freepars, prior1, prior2, priortype, indep_vars = \
        group_variables(model)
    if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
        freepars = load_old_fitparams(meta, log, lc.channel, freenames)

    # DYNESTY
    nlive = meta.run_nlive  # number of live points
    bound = meta.run_bound  # use MutliNest algorithm for bounds
    ndims = len(freepars)  # two parameters
    sample = meta.run_sample  # uniform sampling
    tol = meta.run_tol  # the stopping criterion

    start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype,
                          freenames)
    log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))

    # Plot starting point
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta,
                       fitter='dynestyStartingPoint')
        # Plot GP starting point
        if model.GP:
            plots.plot_GP_components(lc, model, meta,
                                     fitter='dynestyStartingPoint')

    # START DYNESTY
    l_args = [lc, model, freenames]

    log.writelog('Running dynesty...')

    min_nlive = int(np.ceil(ndims*(ndims+1)//2))
    if nlive == 'min':
        nlive = min_nlive
    elif nlive < min_nlive:
        log.writelog(f'**** WARNING: You should set run_nlive to at least '
                     f'{min_nlive} ****')

    if hasattr(meta, 'ncpu') and meta.ncpu > 1:
        pool = Pool(meta.ncpu)
        queue_size = meta.ncpu
    else:
        meta.ncpu = 1
        pool = None
        queue_size = None
    sampler = NestedSampler(ln_like, ptform, ndims, pool=pool,
                            queue_size=queue_size, bound=bound, sample=sample,
                            nlive=nlive, logl_args=l_args,
                            ptform_args=[prior1, prior2, priortype])
    sampler.run_nested(dlogz=tol, print_progress=True)  # output progress bar
    res = sampler.results  # get results dictionary from sampler
    if meta.ncpu > 1:
        pool.close()
        pool.join()

    log.writelog('', mute=(not meta.verbose))
    # Need to temporarily redirect output since res.summar() prints rather
    # than returns a string
    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()
    res.summary()
    sys.stdout = old_stdout
    log.writelog(mystdout.getvalue(), mute=(not meta.verbose))

    # get function that resamples from the nested samples to give sampler
    # with equal weight
    # draw posterior samples
    weights = np.exp(res['logwt'] - res['logz'][-1])
    samples = resample_equal(res.samples, weights)
    log.writelog('Number of posterior samples is {}'.format(len(samples)),
                 mute=(not meta.verbose))

    # Record median + percentiles
    q = np.percentile(samples, [16, 50, 84], axis=0)
    fit_params = q[1]  # median
    mean_params = np.mean(samples, axis=0)
    errs = np.std(samples, axis=0)

    # Create table of results
    t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1],
                             q[0], fit_params, q[2]],
                            names=("Parameter", "Mean", "-1sigma", "+1sigma",
                                   "16th", "50th", "84th"))

    upper_errs = q[2]-q[1]
    lower_errs = q[1]-q[0]

    model.update(fit_params)
    model.errs = dict(zip(freenames, errs))
    lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames)

    # Save the fit ASAP so plotting errors don't make you lose everything
    save_fit(meta, lc, model, 'dynesty', t_results, freenames, samples)

    end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
                        freenames)
    log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))

    # Compute reduced chi-squared
    chi2red = computeRedChiSq(lc, log, model, meta, freenames)

    log.writelog('\nDYNESTY RESULTS:')
    for i in range(ndims):
        if 'scatter_mult' in freenames[i]:
            chan = freenames[i].split('_')[-1]
            if chan.isnumeric():
                chan = int(chan)
            else:
                chan = 0
            trim1, trim2 = get_trim(meta.nints, chan)
            unc = np.ma.median(lc.unc[trim1:trim2])
            scatter_ppm = 1e6*fit_params[i]*unc
            scatter_ppm_upper = 1e6*upper_errs[i]*unc
            scatter_ppm_lower = 1e6*lower_errs[i]*unc
            log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
                         f' -{lower_errs[i]}); {scatter_ppm} '
                         f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm')
        else:
            log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
                         f' -{lower_errs[i]})')
    log.writelog('')

    # Plot fit
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta, fitter='dynesty')

    # Plot GP fit + components
    if model.GP and meta.isplots_S5 >= 1:
        plots.plot_GP_components(lc, model, meta, fitter='dynesty')

    # Zoom in on phase variations
    if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs:
        plots.plot_phase_variations(lc, model, meta, fitter='dynesty')

    # Plot Allan plot
    if meta.isplots_S5 >= 3:
        plots.plot_rms(lc, model, meta, fitter='dynesty')

    # Plot residuals distribution
    if meta.isplots_S5 >= 3:
        plots.plot_res_distr(lc, model, meta, fitter='dynesty')

    # plot using corner.py
    if meta.isplots_S5 >= 5:
        plots.plot_corner(samples, lc, meta, freenames, fitter='dynesty')

    # Make a new model instance
    best_model = copy.deepcopy(model)
    best_model.components[0].update(fit_params)
    best_model.__setattr__('chi2red', chi2red)
    best_model.__setattr__('fit_params', fit_params)

    return best_model




[docs]
def lmfitter(lc, model, meta, log, **kwargs):
    """Perform a fit using lmfit.

    Parameters
    ----------
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit.
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    **kwargs : dict
        Arbitrary keyword arguments.

    Returns
    -------
    best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model after fitting

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Updated documentation. Reduced repeated code.
    - February 28-March 1, 2022 Caroline Piaulet
        Adding scatter_ppm parameter.
    - Mar 13-Apr 18, 2022 Caroline Piaulet
         Record an astropy table for parameter values
    """
    # TODO: Do something so that duplicate param names can all be handled
    # (e.g. two Polynomail models with c0). Perhaps append something to the
    # parameter name like c0_1 and c0_2?)

    # Group the different variable types
    param_list, freenames, indep_vars = group_variables_lmfit(model)

    # Add the time as an independent variable
    indep_vars['time'] = lc.time

    # Initialize lmfit Params object
    initialParams = lmfit.Parameters()
    # Insert parameters
    initialParams.add_many(*param_list)

    # Create the lmfit lightcurve model
    lcmodel = lmfit.Model(model.eval)
    lcmodel.independent_vars = indep_vars.keys()

    # Fit light curve model to the simulated data
    result = lcmodel.fit(lc.flux, weights=1/lc.unc, params=initialParams,
                         **indep_vars, **kwargs)

    # Get the best fit params
    fit_params = result.__dict__['params']
    # new_params = [(fit_params.get(i).name, fit_params.get(i).value,
    #                fit_params.get(i).vary, fit_params.get(i).min,
    #                fit_params.get(i).max) for i in fit_params]

    # Create table of results
    t_results = table.Table([freenames, fit_params],
                            names=("Parameter", "Mean"))

    model.update(fit_params)
    lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames)

    # Save the fit ASAP
    save_fit(meta, lc, model, 'lmfitter', t_results, freenames)

    # Compute reduced chi-squared
    chi2red = computeRedChiSq(lc, log, model, meta, freenames)

    # Log results
    log.writelog(result.fit_report(), mute=(not meta.verbose))

    # Plot fit
    if meta.isplots_S5 >= 1:
        plots.plot_fit(lc, model, meta, fitter='lmfitter')

    # Plot GP fit + components
    if model.GP and meta.isplots_S5 >= 1:
        plots.plot_GP_components(lc, model, meta, fitter='lmfitter')

    # Zoom in on phase variations
    if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs:
        plots.plot_phase_variations(lc, model, meta, fitter='lmfitter')

    # Plot Allan plot
    if meta.isplots_S5 >= 3:
        plots.plot_rms(lc, model, meta, fitter='lmfitter')

    # Plot residuals distribution
    if meta.isplots_S5 >= 3:
        plots.plot_res_distr(lc, model, meta, fitter='lmfitter')

    # Create new model with best fit parameters
    best_model = copy.deepcopy(model)
    best_model.components[0].update(fit_params)
    best_model.__setattr__('chi2red', chi2red)
    best_model.__setattr__('fit_params', fit_params)

    return best_model




[docs]
def group_variables(model):
    """Group variables into fitted and frozen.

    Parameters
    ----------
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit

    Returns
    -------
    freenames : np.array
        The names of fitted variables.
    freepars : np.array
        The fitted variables.
    prior1 : np.array
        The lower bound for constrained variables with uniform/log uniform
        priors, or mean for constrained variables with Gaussian priors.
    prior2 : np.array
        The upper bound for constrained variables with uniform/log uniform
        priors, or mean for constrained variables with Gaussian priors.
    priortype : np.array
        Keywords indicating the type of prior for each free parameter.
    indep_vars : dict
        The frozen variables.

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Moved code to separate function to reduce repeated code.
    - January 11, 2022 Megan Mansfield
        Added ability to have shared parameters
    - February 23-25, 2022 Megan Mansfield
        Added log-uniform and Gaussian priors.
    """
    all_params = []
    alreadylist = []
    for c in range(model.components[0].nchannel_fitted):
        temp = model.components[0].longparamlist[c]
        for par in list(model.components[0].parameters.dict.items()):
            if par[0] in temp:
                if not all_params:
                    all_params.append(par)
                    alreadylist.append(par[0])
                if par[0] not in alreadylist:
                    all_params.append(par)
                    alreadylist.append(par[0])

    # Group the different variable types
    freenames = []
    freepars = []
    prior1 = []
    prior2 = []
    priortype = []
    indep_vars = {}
    for ii, item in enumerate(all_params):
        name, param = item
        # param = list(param)
        if ((param[1] == 'free') or (param[1] == 'shared')
                or ('white' in param[1])):
            freenames.append(name)
            freepars.append(param[0])
            if len(param) == 5:  # If prior is specified.
                prior1.append(param[2])
                prior2.append(param[3])
                priortype.append(param[4])
            elif (len(param) > 3) & (len(param) < 5):
                # If prior bounds are specified but not the prior type
                raise IndexError("If you want to specify prior parameters, you"
                                 " must also specify the prior type: 'U', 'LU'"
                                 ", or 'N'.")
            else:
                # If no prior is specified,
                # assume uniform prior with infinite bounds.
                prior1.append(-np.inf)
                prior2.append(np.inf)
                priortype.append('U')
        elif param[1] == 'independent':
            indep_vars[name] = param[0]
    freenames = np.array(freenames)
    freepars = np.array(freepars)
    prior1 = np.array(prior1)
    prior2 = np.array(prior2)
    priortype = np.array(priortype)

    model.freenames = freenames

    return freenames, freepars, prior1, prior2, priortype, indep_vars




[docs]
def group_variables_lmfit(model):
    """Group variables into fitted and frozen for lmfit fitter.

    Parameters
    ----------
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit

    Returns
    -------
    paramlist : list
        The fitted variables.
    freenames : np.array
        The names of fitted variables.
    indep_vars : dict
        The frozen variables.

    Notes
    -----
    History:

    - December 29, 2021 Taylor Bell
        Moved code to separate function to look similar to other fitters.
    """
    all_params = [i for j in [model.components[n].parameters.dict.items()
                  for n in range(len(model.components))] for i in j]

    # Group the different variable types
    param_list = []
    freenames = []
    indep_vars = {}
    for param in all_params:
        param = list(param)
        if param[1][1] == 'free':
            freenames.append(param[0])
            param[1][1] = True
            param_list.append(tuple(param))
        elif param[1][1] == 'fixed':
            param[1][1] = False
            param_list.append(tuple(param))
        else:
            indep_vars[param[0]] = param[1]
    freenames = np.array(freenames)

    return param_list, freenames, indep_vars




[docs]
def load_old_fitparams(meta, log, channel, freenames):
    """Load in the best-fit values from a previous fit.

    Parameters
    ----------
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    log : logedit.Logedit
        The open log in which notes from this step can be added.
    channel : int
        Unused. The current channel.
    freenames : list
        The names of the fitted parameters.

    Returns
    -------
    fitted_values : np.array
        The best-fit values from a previous fit

    Raises
    ------
    AssertionError
        The old fit is incompatible with the current fit.
    """
    fname = os.path.join(meta.topdir, *meta.old_fitparams.split(os.sep))
    fitted_values = pd.read_csv(fname, escapechar='#', skipinitialspace=True)
    full_keys = np.array(fitted_values.keys())
    # Remove the " " from the start of the first key
    full_keys[0] = full_keys[0][1:]

    if np.all(full_keys != freenames):
        log.writelog('Old fit does not have the same fitted parameters and '
                     'cannot be used to initialize the new fit.\n'
                     'The old fit included:\n['+','.join(full_keys)+']\n'
                     'The new fit included:\n['+','.join(freenames)+']',
                     mute=True)
        raise AssertionError('Old fit does not have the same fitted parameters'
                             ' and cannot be used to initialize the new fit.\n'
                             'The old fit included:\n['+','.join(full_keys) +
                             ']\nThe new fit included:\n['+','.join(freenames)
                             + ']')

    return np.array(fitted_values)[0]




[docs]
def save_fit(meta, lc, model, fitter, results_table, freenames, samples=[]):
    """Save a fit as a txt file as well as the entire chain if provided.

    Parameters
    ----------
    meta : eureka.lib.readECF.MetaClass
        The metadata object.
    lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
        The lightcurve data object.
    model : eureka.S5_lightcurve_fitting.models.CompositeModel
        The composite model to fit.
    fitter : str
        The current fitter being used.
    fit_params : np.array
        The best-fit values from the current fit.
    freenames : list
        The list of fitted parameter names.
    samples : ndarray; optional
        The full chain from a sampling method, by default [].

    Notes
    -----
    History:

    - Mar 13-Apr 18, 2022 Caroline Piaulet
        Record an astropy table for mean, median, percentiles,
        +/- 1 sigma, all params
    """
    if lc.white:
        channel_tag = '_white'
    elif lc.share:
        channel_tag = '_shared'
    else:
        ch_number = str(lc.channel).zfill(len(str(lc.nchannel)))
        channel_tag = f'_ch{ch_number}'

    # Save the fitted parameters and their uncertainties (if possible)
    fname = f'S5_{fitter}_fitparams{channel_tag}'
    results_table.write(meta.outputdir+fname+'.csv', format='csv',
                        overwrite=False)

    # Save the chain from the sampler using Astraeus (if a chain was provided)
    if len(samples) != 0:
        fname = meta.outputdir+f'S5_{fitter}_samples{channel_tag}.h5'
        ds = dict([(freenames[i], xr.DataArray(samples[:, i], dims=['sample'],
                                               name=freenames[i]))
                   for i in range(len(freenames))])
        ds = xrio.makeDataset(ds)
        xrio.writeXR(fname, ds)

    # Directory structure should not use expanded HW values
    spec_hw_val = meta.spec_hw
    bg_hw_val = meta.bg_hw
    spec_hw_val //= meta.expand
    if not isinstance(bg_hw_val, str):
        # Only divide if value is not a string (spectroscopic modes)
        bg_hw_val //= meta.expand
    # Save the S5 outputs in a human readable ecsv file
    if not isinstance(meta.bg_hw, str):
        # Only divide if value is not a string (spectroscopic modes)
        bg_hw = meta.bg_hw//meta.expand
    else:
        bg_hw = meta.bg_hw
    event_ap_bg = meta.eventlabel+"_ap"+str(meta.spec_hw//meta.expand) + \
        '_bg'+str(bg_hw)
    meta.tab_filename_s5 = (meta.outputdir+'S5_'+event_ap_bg+"_Table_Save" +
                            channel_tag+'.txt')
    wavelengths = np.mean(np.append(meta.wave_low.reshape(1, -1),
                                    meta.wave_hi.reshape(1, -1), axis=0),
                          axis=0)
    wave_errs = (meta.wave_hi-meta.wave_low)/2
    # Evaluate each individual model for easier access outside of Eureka!
    individual_models = []
    for comp in model.components:
        if comp.name != 'GP':
            individual_models.append([comp.name, comp.eval()])
        else:
            fit = model.eval(incl_GP=False)
            individual_models.append([comp.name, comp.eval(fit)])
    individual_models = np.array(individual_models, dtype=object)

    model_lc = model.eval()
    residuals = lc.flux-model_lc
    astropytable.savetable_S5(meta.tab_filename_s5, meta, lc.time,
                              wavelengths[lc.fitted_channels],
                              wave_errs[lc.fitted_channels],
                              lc.flux, lc.unc_fit, individual_models, model_lc,
                              residuals)

    return