import numpy as np
import pandas as pd
import copy
from io import StringIO
import os
import sys
import h5py
import time as time_pkg
from scipy.optimize import minimize
import lmfit
import emcee
from dynesty import NestedSampler
from dynesty.utils import resample_equal
from .likelihood import computeRedChiSq, lnprob, ln_like, ptform
from . import plots_s5 as plots
from ..lib import astropytable
from multiprocessing import Pool
from astropy import table
[docs]def lsqfitter(lc, model, meta, log, calling_function='lsq', **kwargs):
"""Perform least-squares fit.
Parameters
----------
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object.
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit.
meta : eureka.lib.readECF.MetaClass
The metadata object.
log : logedit.Logedit
The open log in which notes from this step can be added.
calling_function : str
The fitter that is being run (e.g. may be 'emcee' if running lsqfitter
to initialize emcee walkers). Defailts to 'lsq'.
**kwargs : dict
Arbitrary keyword arguments.
Returns
-------
best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model after fitting
Notes
-----
History:
- December 29-30, 2021 Taylor Bell
Updated documentation and arguments. Reduced repeated code.
Also saving covariance matrix for later estimation of sampler
step size.
- January 7-22, 2022 Megan Mansfield
Adding ability to do a single shared fit across all channels
- February 28-March 1, 2022 Caroline Piaulet
Adding scatter_ppm parameter
- Mar 13-Apr 18, 2022 Caroline Piaulet
Record an astropy table for param values
"""
# Group the different variable types
freenames, freepars, prior1, prior2, priortype, indep_vars = \
group_variables(model)
if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
freepars = load_old_fitparams(meta, log, lc.channel, freenames)
start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype,
freenames)
log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))
def neg_lnprob(theta, lc, model, prior1, prior2, priortype, freenames):
return -lnprob(theta, lc, model, prior1, prior2, priortype, freenames)
global lsq_t0
lsq_t0 = time_pkg.time()
def callback_full(theta, lc, model, prior1, prior2, priortype, freenames):
global lsq_t0
if (time_pkg.time()-lsq_t0) > 0.5:
lsq_t0 = time_pkg.time()
print('Current lnprob = ', lnprob(theta, lc, model, prior1, prior2,
priortype, freenames), end='\r')
def callback(theta):
return callback_full(theta, lc, model, prior1, prior2, priortype,
freenames)
if not hasattr(meta, 'lsq_method'):
log.writelog('No lsq optimization method specified - using Nelder-Mead'
' by default.')
meta.lsq_method = 'Nelder-Mead'
if not hasattr(meta, 'lsq_tol'):
log.writelog('No lsq tolerance specified - using 1e-6 by default.')
meta.lsq_tol = 1e-6
if not hasattr(meta, 'lsq_maxiter'):
meta.lsq_maxiter = None
results = minimize(neg_lnprob, freepars,
args=(lc, model, prior1, prior2, priortype, freenames),
method=meta.lsq_method, tol=meta.lsq_tol,
options={'maxiter': meta.lsq_maxiter},
callback=callback)
log.writelog("\nVerbose lsq results: {}\n".format(results),
mute=(not meta.verbose))
if not meta.verbose:
log.writelog("Success?: {}".format(results.success))
log.writelog(results.message)
# Get the best fit params
fit_params = results.x
# Create table of results
t_results = table.Table([freenames, fit_params],
names=("Parameter", "Mean"))
model.update(fit_params, freenames)
if "scatter_ppm" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:11] == "scatter_ppm"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
fit_params[ind[chan]] * 1e-6
elif "scatter_mult" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:12] == "scatter_mult"]
if not hasattr(lc, 'unc_fit'):
lc.unc_fit = copy.deepcopy(lc.unc)
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
(fit_params[ind[chan]] *
lc.unc[chan*lc.time.size:(chan+1)*lc.time.size])
# Save the fit ASAP
save_fit(meta, lc, model, calling_function, t_results, freenames)
end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
freenames)
log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))
# Compute reduced chi-squared
chi2red = computeRedChiSq(lc, log, model, meta, freenames)
print('\nLSQ RESULTS:')
for i in range(len(freenames)):
if 'scatter_mult' in freenames[i]:
chan = freenames[i].split('_')[-1]
if chan.isnumeric():
chan = int(chan)
else:
chan = 0
scatter_ppm = (fit_params[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]) * 1e6)
log.writelog(f'{freenames[i]}: {fit_params[i]}; {scatter_ppm} ppm')
else:
log.writelog(f'{freenames[i]}: {fit_params[i]}')
log.writelog('')
# Plot fit
if meta.isplots_S5 >= 1:
plots.plot_fit(lc, model, meta, fitter=calling_function)
# Plot GP fit + components
if model.GP and meta.isplots_S5 >= 1:
plots.plot_GP_components(lc, model, meta, fitter=calling_function)
# Plot Allan plot
if meta.isplots_S5 >= 3 and calling_function == 'lsq':
# This plot is only really useful if you're actually using the
# lsq fitter, otherwise don't make it
plots.plot_rms(lc, model, meta, fitter=calling_function)
# Plot residuals distribution
if meta.isplots_S5 >= 3 and calling_function == 'lsq':
plots.plot_res_distr(lc, model, meta, fitter=calling_function)
# Make a new model instance
best_model = copy.deepcopy(model)
best_model.components[0].update(fit_params, freenames)
# Save the covariance matrix in case it's needed to estimate step size
# for a sampler
# FINDME:
# Commented out for now because op.least_squares() doesn't provide
# covariance matrix
# Need to compute using Jacobian matrix instead (hess_inv = (J.T J)^{-1})
# model_lc = model.eval()
# if results[1] is not None:
# residuals = (lc.flux - model_lc)
# cov_mat = results[1]*np.var(residuals)
# else:
# # Sometimes lsq will fail to converge and will return a None
# # covariance matrix
# cov_mat = None
cov_mat = None
best_model.__setattr__('cov_mat', cov_mat)
best_model.__setattr__('chi2red', chi2red)
best_model.__setattr__('fit_params', fit_params)
return best_model
[docs]def demcfitter(lc, model, meta, log, **kwargs):
"""Perform sampling using Differential Evolution Markov Chain.
This is an empty placeholder function to be filled later.
Parameters
----------
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object.
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit.
meta : eureka.lib.readECF.MetaClass
The metadata object.
log : logedit.Logedit
The open log in which notes from this step can be added.
**kwargs : dict
Arbitrary keyword arguments.
Returns
-------
best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model after fitting
Notes
-----
History:
- December 29, 2021 Taylor Bell
Updated documentation and arguments
"""
best_model = None
return best_model
[docs]def emceefitter(lc, model, meta, log, **kwargs):
"""Perform sampling using emcee.
Parameters
----------
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit
meta : eureka.lib.readECF.MetaClass
The metadata object
log : logedit.Logedit
The open log in which notes from this step can be added.
**kwargs : dict
Arbitrary keyword arguments.
Returns
-------
best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model after fitting
Notes
-----
History:
- December 29, 2021 Taylor Bell
Updated documentation. Reduced repeated code.
- January 7-22, 2022 Megan Mansfield
Adding ability to do a single shared fit across all channels
- February 23-25, 2022 Megan Mansfield
Added log-uniform and Gaussian priors.
- February 28-March 1, 2022 Caroline Piaulet
Adding scatter_ppm parameter. Added statements to avoid some initial
state issues.
- Mar 13-Apr 18, 2022 Caroline Piaulet
Record an astropy table for mean, median, percentiles,
+/- 1 sigma, all params
"""
# Group the different variable types
freenames, freepars, prior1, prior2, priortype, indep_vars = \
group_variables(model)
if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
freepars = load_old_fitparams(meta, log, lc.channel, freenames)
ndim = len(freenames)
if hasattr(meta, 'old_chain') and meta.old_chain is not None:
pos, nwalkers = start_from_oldchain_emcee(meta, log, ndim, lc.channel,
freenames)
else:
if not hasattr(meta, 'lsq_first') or meta.lsq_first:
# Only call lsq fitter first if asked or lsq_first option wasn't
# passed (allowing backwards compatibility)
log.writelog('\nCalling lsqfitter first...')
# RUN LEAST SQUARES
lsq_sol = lsqfitter(lc, model, meta, log,
calling_function='emcee_lsq', **kwargs)
freepars = lsq_sol.fit_params
# SCALE UNCERTAINTIES WITH REDUCED CHI2
if meta.rescale_err:
lc.unc *= np.sqrt(lsq_sol.chi2red)
else:
lsq_sol = None
pos, nwalkers = initialize_emcee_walkers(meta, log, ndim, lsq_sol,
freepars, prior1, prior2,
priortype)
start_lnprob = lnprob(np.median(pos, axis=0), lc, model, prior1, prior2,
priortype, freenames)
log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))
# Initialize tread pool
if hasattr(meta, 'ncpu') and meta.ncpu > 1:
pool = Pool(meta.ncpu)
else:
meta.ncpu = 1
pool = None
# Run emcee burn-in
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
args=(lc, model, prior1, prior2,
priortype, freenames),
pool=pool)
log.writelog('Running emcee burn-in...')
sampler.run_mcmc(pos, meta.run_nsteps, progress=True)
# state = sampler.run_mcmc(pos, meta.run_nsteps, progress=True)
# # Log some details about the burn-in phase
# acceptance_fraction = np.mean(sampler.acceptance_fraction)
# log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}",
# mute=(not meta.verbose))
# try:
# autocorr = sampler.get_autocorr_time()
# log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps",
# mute=(not meta.verbose))
# except:
# log.writelog("Error: Unable to estimate the autocorrelation time!",
# mute=(not meta.verbose))
# mid_lnprob = lnprob(np.median(sampler.get_chain()[-1], axis=0), lc,
# model, prior1, prior2, priortype, freenames)
# log.writelog(f'Intermediate lnprob: {mid_lnprob}',
# mute=(not meta.verbose))
# if meta.isplots_S5 >= 3:
# plots.plot_chain(sampler.get_chain(), lc, meta, freenames,
# fitter='emcee', burnin=True)
# # Reset the sampler and do the production run
# log.writelog('Running emcee production run...')
# sampler.reset()
# sampler.run_mcmc(state, meta.run_nsteps-meta.run_nburn, progress=True)
# samples = sampler.get_chain(flat=True)
samples = sampler.get_chain(flat=True, discard=meta.run_nburn)
if meta.ncpu > 1:
# Close the thread pool
pool.close()
pool.join()
# Record median + percentiles
q = np.percentile(samples, [16, 50, 84], axis=0)
fit_params = q[1] # median
mean_params = np.mean(samples, axis=0)
errs = np.std(samples, axis=0)
# Create table of results
t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1],
q[0], fit_params, q[2]],
names=("Parameter", "Mean", "-1sigma", "+1sigma",
"16th", "50th", "84th"))
upper_errs = q[2]-q[1]
lower_errs = q[1]-q[0]
model.update(fit_params, freenames)
model.errs = dict(zip(freenames, errs))
if "scatter_ppm" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:11] == "scatter_ppm"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
fit_params[ind[chan]] * 1e-6
elif "scatter_mult" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:12] == "scatter_mult"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
fit_params[ind[chan]] * lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]
else:
lc.unc_fit = lc.unc
# Save the fit ASAP so plotting errors don't make you lose everything
save_fit(meta, lc, model, 'emcee', t_results, freenames, samples)
end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
freenames)
log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))
acceptance_fraction = np.mean(sampler.acceptance_fraction)
log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}",
mute=(not meta.verbose))
try:
autocorr = np.mean(sampler.get_autocorr_time())
log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps",
mute=(not meta.verbose))
except:
# FINDME: Need to only catch the expected exception
log.writelog("WARNING: Unable to estimate the autocorrelation time!",
mute=(not meta.verbose))
# Compute reduced chi-squared
chi2red = computeRedChiSq(lc, log, model, meta, freenames)
log.writelog('\nEMCEE RESULTS:')
for i in range(ndim):
if 'scatter_mult' in freenames[i]:
chan = freenames[i].split('_')[-1]
if chan.isnumeric():
chan = int(chan)
else:
chan = 0
scatter_ppm = (1e6 * fit_params[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
scatter_ppm_upper = (1e6 * upper_errs[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
scatter_ppm_lower = (1e6 * lower_errs[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
f' -{lower_errs[i]}); {scatter_ppm} '
f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm')
else:
log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
f' -{lower_errs[i]})')
log.writelog('')
# Plot fit
if meta.isplots_S5 >= 1:
plots.plot_fit(lc, model, meta, fitter='emcee')
# Plot GP fit + components
if model.GP and meta.isplots_S5 >= 1:
plots.plot_GP_components(lc, model, meta, fitter='emcee')
# Plot chain evolution
if meta.isplots_S5 >= 3:
plots.plot_chain(sampler.get_chain(), lc, meta, freenames,
fitter='emcee', burnin=True, nburn=meta.run_nburn)
plots.plot_chain(sampler.get_chain(discard=meta.run_nburn), lc, meta,
freenames, fitter='emcee', burnin=False)
# Plot Allan plot
if meta.isplots_S5 >= 3:
plots.plot_rms(lc, model, meta, fitter='emcee')
# Plot residuals distribution
if meta.isplots_S5 >= 3:
plots.plot_res_distr(lc, model, meta, fitter='emcee')
if meta.isplots_S5 >= 3:
plots.plot_chain(sampler.get_chain(), lc, meta, freenames,
fitter='emcee', burnin=True, nburn=meta.run_nburn)
plots.plot_chain(sampler.get_chain(discard=meta.run_nburn), lc, meta,
freenames, fitter='emcee', burnin=False)
if meta.isplots_S5 >= 5:
plots.plot_corner(samples, lc, meta, freenames, fitter='emcee')
# Make a new model instance
best_model = copy.deepcopy(model)
best_model.components[0].update(fit_params, freenames)
best_model.__setattr__('chi2red', chi2red)
best_model.__setattr__('fit_params', fit_params)
return best_model
[docs]def start_from_oldchain_emcee(meta, log, ndim, channel, freenames):
"""Restart emcee using the ending point of an old chain.
Parameters
----------
meta : eureka.lib.readECF.MetaClass
The meta data object.
log : logedit.Logedit
The open log in which notes from this step can be added.
ndim : int
The number of fitted parameters.
channel : int
The channel number.
freenames : list
The names of the fitted parameters.
Returns
-------
pos : ndarray
The starting positions for all walkers.
nwalkers : int
The number of walkers (may differ from the requested number
if unable to get all walkers within the priors).
Raises
------
AssertionError
The old chain is not compatible with the current fit.
AssertionError
Unable to get enough walkers within the prior range.
"""
if meta.sharedp:
channel_key = 'shared'
else:
channel_key = f'ch{channel}'
foldername = os.path.join(meta.topdir, *meta.old_chain.split(os.sep))
fname = f'S5_emcee_fitparams_{channel_key}.csv'
fitted_values = pd.read_csv(os.path.join(foldername, fname),
escapechar='#', skipinitialspace=True)
full_keys = np.array(fitted_values.keys())
# Remove the " " from the start of the first key
full_keys[0] = full_keys[0][1:]
if np.all(full_keys != freenames):
message = ('Old chain does not have the same fitted parameters and '
'cannot be used to initialize the new fit.\n'
'The old chain included:\n['+','.join(full_keys)+']\n'
'The new chain included:\n['+','.join(freenames)+']')
log.writelog(message, mute=True)
raise AssertionError(message)
fname = f'S5_emcee_samples_{channel_key}'
# Load HDF5 files
full_fname = os.path.join(foldername, fname)+'.h5'
with h5py.File(full_fname, 'r') as hf:
samples = hf['samples'][:]
log.writelog(f'Old chain path: {full_fname}')
# Initialize the walkers using samples from the old chain
nwalkers = meta.run_nwalkers
pos = samples[-nwalkers:]
walkers_used = nwalkers
# Make sure that no walkers are starting in the same place as
# they would then exactly follow each other
repeat_pos = np.where([np.any(np.all(pos[i] == np.delete(pos, i, axis=0),
axis=1))
for i in range(pos.shape[0])])[0]
while (len(repeat_pos) > 0 and
samples.shape[0] > (walkers_used+len(repeat_pos))):
pos[repeat_pos] = samples[:-walkers_used][-len(repeat_pos):]
walkers_used += len(repeat_pos)
repeat_pos = np.where([np.any(np.all(pos[i] ==
np.delete(pos, i, axis=0),
axis=1))
for i in range(pos.shape[0])])[0]
# If unable to initialize all walkers in unique starting locations,
# use fewer walkers unless there'd be fewer walkers than dimensions
if len(repeat_pos) > 0 and (nwalkers-len(repeat_pos) > ndim):
pos = np.delete(pos, repeat_pos, axis=0)
nwalkers = pos.shape[0]
log.writelog(f'Warning: Unable to initialize all walkers at different '
f'positions using old chain!\nUsing {nwalkers} walkers '
f'instead of the initially requested {meta.run_nwalkers} '
f'walkers')
elif len(repeat_pos) > 0:
message = (f'Error: Unable to initialize all walkers at different '
f'positions using old chain!\nUsing {nwalkers} walkers '
f'instead of the initially requested {meta.run_nwalkers} '
f'walkers is not permitted as there are {ndim} fitted '
f'parameters')
log.writelog(message, mute=True)
raise AssertionError(message)
return pos, nwalkers
[docs]def initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1,
prior2, priortype):
"""Initialize emcee walker starting positions
Parameters
----------
meta : eureka.lib.readECF.MetaClass
The meta data object.
log : logedit.Logedit
The open log in which notes from this step can be added.
ndim : int
The number of fitted parameters.
lsq_sol : The results from the lsqfitter.
The results from the lsqfitter.
freepars : list
The names of the fitted parameters.
prior1 : list
The list of prior1 values.
prior2 : list
The list of prior2 values.
priortype : list
The types of each prior (to determine meaning of prior1 and prior2).
Returns
-------
pos : ndarray
The starting position of all walkers.
nwalkers : int
The number of walkers (may differ from the requested number
if unable to get all walkers within the priors).
Raises
------
AssertionError
Failed to initialize any walkers within the priors
AssertionError
Failed to initialize enough walkers within the priors
"""
u = np.where(priortype == 'U')[0]
lu = np.where(priortype == 'LU')[0]
n = np.where(priortype == 'N')[0]
if lsq_sol is not None and lsq_sol.cov_mat is not None:
step_size = np.diag(lsq_sol.cov_mat)
if np.any(step_size == 0.):
ind_zero_u = np.where(step_size[u] == 0.)[0]
ind_zero_lu = np.where(step_size[lu] == 0.)[0]
ind_zero_n = np.where(step_size[n] == 0.)[0]
step_size[u][ind_zero_u] = (0.001*(prior2[u][ind_zero_u] -
prior1[u][ind_zero_u]))
step_size[lu][ind_zero_lu] = (0.001 *
(np.exp(prior2[lu][ind_zero_lu]) -
np.exp(prior1[lu][ind_zero_lu])))
step_size[n][ind_zero_n] = 0.1*prior2[n][ind_zero_n]
else:
# Sometimes the lsq fitter won't converge and will give None as
# the covariance matrix. In that case, we need to establish the
# step size in another way. Using a fractional step compared to
# the prior range can work best for precisely known values like
# t0 and period
log.writelog('No covariance matrix from LSQ - falling back on a step '
'size based on the prior range')
step_size = np.ones(ndim)
step_size[u] = 0.001*(prior2[u] - prior1[u])
step_size[lu] = 0.001*(np.exp(prior2[lu]) - np.exp(prior1[lu]))
step_size[n] = 0.1*prior2[n]
nwalkers = meta.run_nwalkers
# make it robust to lsq hitting the upper or lower bound of the param space
ind_max = np.where(freepars[u] - prior2[u] == 0.)[0]
ind_min = np.where(freepars[u] - prior1[u] == 0.)[0]
ind_max_LU = np.where(np.log(freepars[lu]) - prior2[lu] == 0.)[0]
ind_min_LU = np.where(np.log(freepars[lu]) - prior1[lu] == 0.)[0]
pmid = (prior2+prior1)/2.
if len(ind_max) > 0 or len(ind_max_LU) > 0:
log.writelog('Warning: >=1 params hit the upper bound in the lsq fit. '
'Setting to the middle of the interval.')
freepars[u][ind_max] = pmid[u][ind_max]
freepars[lu][ind_max_LU] = (np.exp(prior2[lu][ind_max_LU]) +
np.exp(prior1[lu][ind_max_LU]))/2.
if len(ind_min) > 0 or len(ind_min_LU) > 0:
log.writelog('Warning: >=1 params hit the lower bound in the lsq fit. '
'Setting to the middle of the interval.')
freepars[u][ind_min] = pmid[u][ind_min]
freepars[lu][ind_min_LU] = (np.exp(prior2[lu][ind_min_LU]) +
np.exp(prior1[lu][ind_min_LU]))/2.
# Generate the walker positions
pos = np.array([freepars + step_size*np.random.randn(ndim)
for i in range(nwalkers)])
# Make sure the walker positions obey the priors
in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all())
for ii in pos[:, u]])
in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and
(np.log(ii) <= prior2[lu]).all())
for ii in pos[:, lu]])
if not np.all(in_range) or not np.all(in_range2):
log.writelog('Not all walkers were initialized within the priors, '
'using a smaller proposal distribution')
pos = pos[in_range]
# Make sure the step size is well within the limits
uniform_step = np.abs(np.append((prior2-freepars).reshape(-1, 1)/10,
(freepars-prior1).reshape(-1, 1)/10,
axis=1))
step_size_options = np.append(step_size.reshape(-1, 1), uniform_step,
axis=1)
if len(lu) != 0:
step_size_options[lu, 1] = np.abs((np.exp(prior2[lu]) -
freepars[lu]).reshape(-1, 1)/10)
step_size_options[lu, 2] = np.abs((np.exp(prior1[lu]) -
freepars[lu]).reshape(-1, 1)/10)
if len(n) != 0:
step_size_options[n, 1:] = step_size_options[n, 0].reshape(-1, 1)
step_size = np.min(step_size_options, axis=1)
if pos.shape[0] == 0:
remove_zeroth = True
new_nwalkers = nwalkers-len(pos)
pos = np.zeros((1, ndim))
else:
remove_zeroth = False
new_nwalkers = nwalkers-len(pos)
pos = np.append(pos, np.array([freepars +
step_size*np.random.randn(ndim)
for i in range(new_nwalkers)
]).reshape(-1, ndim), axis=0)
if remove_zeroth:
pos = pos[1:]
in_range = np.array([((prior1[u] <= ii).all() and
(ii <= prior2[u]).all())
for ii in pos[:, u]])
in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and
(np.log(ii) <= prior2[lu]).all())
for ii in pos[:, lu]])
if not np.any(in_range) and not np.any(in_range2):
raise AssertionError('Failed to initialize any walkers within the set '
'bounds for all parameters!\n'
'Check your stating position, decrease your step '
'size, or increase the bounds on your parameters')
elif not np.all(in_range) or not np.all(in_range2):
old_nwalkers = nwalkers
pos = pos[in_range+in_range2]
nwalkers = pos.shape[0]
if nwalkers > ndim:
log.writelog(f'Warning: Failed to initialize all walkers within '
f'the set bounds for all parameters!\nUsing '
f'{nwalkers} walkers instead of the initially '
f'requested {old_nwalkers} walkers')
else:
message = (f'Error: Failed to initialize all walkers within the '
f'set bounds for all parameters!\nUsing '
f'{nwalkers} walkers instead of the initially requested'
f' {old_nwalkers} walkers is not permitted as there are'
f' {ndim} fitted parameters')
log.writelog(message, mute=True)
raise AssertionError(message)
return pos, nwalkers
[docs]def dynestyfitter(lc, model, meta, log, **kwargs):
"""Perform sampling using dynesty.
Parameters
----------
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object.
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit.
meta : eureka.lib.readECF.MetaClass
The metadata object.
log : logedit.Logedit
The open log in which notes from this step can be added.
**kwargs : dict
Arbitrary keyword arguments.
Returns
-------
best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model after fitting
Notes
-----
History:
- December 29, 2021 Taylor Bell
Updated documentation. Reduced repeated code.
- January 7-22, 2022 Megan Mansfield
Adding ability to do a single shared fit across all channels
- February 23-25, 2022 Megan Mansfield
Added log-uniform and Gaussian priors.
- February 28-March 1, 2022 Caroline Piaulet
Adding scatter_ppm parameter.
- Mar 13-Apr 18, 2022 Caroline Piaulet
Record an astropy table for mean, median, percentiles,
+/- 1 sigma, all params
"""
# Group the different variable types
freenames, freepars, prior1, prior2, priortype, indep_vars = \
group_variables(model)
if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None:
freepars = load_old_fitparams(meta, log, lc.channel, freenames)
# DYNESTY
nlive = meta.run_nlive # number of live points
bound = meta.run_bound # use MutliNest algorithm for bounds
ndims = len(freepars) # two parameters
sample = meta.run_sample # uniform sampling
tol = meta.run_tol # the stopping criterion
start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype,
freenames)
log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose))
# START DYNESTY
l_args = [lc, model, freenames]
log.writelog('Running dynesty...')
min_nlive = int(np.ceil(ndims*(ndims+1)//2))
if nlive < min_nlive:
log.writelog(f'**** WARNING: You should set run_nlive to at least '
f'{min_nlive} ****')
if hasattr(meta, 'ncpu') and meta.ncpu > 1:
pool = Pool(meta.ncpu)
queue_size = meta.ncpu
else:
meta.ncpu = 1
pool = None
queue_size = None
sampler = NestedSampler(ln_like, ptform, ndims, pool=pool,
queue_size=queue_size, bound=bound, sample=sample,
nlive=nlive, logl_args=l_args,
ptform_args=[prior1, prior2, priortype])
sampler.run_nested(dlogz=tol, print_progress=True) # output progress bar
res = sampler.results # get results dictionary from sampler
if meta.ncpu > 1:
pool.close()
pool.join()
log.writelog('', mute=(not meta.verbose))
# Need to temporarily redirect output since res.summar() prints rather
# than returns a string
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
res.summary()
sys.stdout = old_stdout
log.writelog(mystdout.getvalue(), mute=(not meta.verbose))
# get function that resamples from the nested samples to give sampler
# with equal weight
# draw posterior samples
weights = np.exp(res['logwt'] - res['logz'][-1])
samples = resample_equal(res.samples, weights)
log.writelog('Number of posterior samples is {}'.format(len(samples)),
mute=(not meta.verbose))
# Record median + percentiles
q = np.percentile(samples, [16, 50, 84], axis=0)
fit_params = q[1] # median
mean_params = np.mean(samples, axis=0)
errs = np.std(samples, axis=0)
# Create table of results
t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1],
q[0], fit_params, q[2]],
names=("Parameter", "Mean", "-1sigma", "+1sigma",
"16th", "50th", "84th"))
upper_errs = q[2]-q[1]
lower_errs = q[1]-q[0]
model.update(fit_params, freenames)
model.errs = dict(zip(freenames, errs))
if "scatter_ppm" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:11] == "scatter_ppm"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
fit_params[ind[chan]] * 1e-6
elif "scatter_mult" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:12] == "scatter_mult"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
(fit_params[ind[chan]] *
lc.unc[chan*lc.time.size:(chan+1)*lc.time.size])
else:
lc.unc_fit = lc.unc
# Save the fit ASAP so plotting errors don't make you lose everything
save_fit(meta, lc, model, 'dynesty', t_results, freenames, samples)
end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype,
freenames)
log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose))
# Compute reduced chi-squared
chi2red = computeRedChiSq(lc, log, model, meta, freenames)
log.writelog('\nDYNESTY RESULTS:')
for i in range(ndims):
if 'scatter_mult' in freenames[i]:
chan = freenames[i].split('_')[-1]
if chan.isnumeric():
chan = int(chan)
else:
chan = 0
scatter_ppm = (1e6 * fit_params[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
scatter_ppm_upper = (1e6 * upper_errs[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
scatter_ppm_lower = (1e6 * lower_errs[i] *
np.ma.median(lc.unc[chan*lc.time.size:
(chan+1)*lc.time.size]))
log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
f' -{lower_errs[i]}); {scatter_ppm} '
f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm')
else:
log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},'
f' -{lower_errs[i]})')
log.writelog('')
# Plot fit
if meta.isplots_S5 >= 1:
plots.plot_fit(lc, model, meta, fitter='dynesty')
# Plot GP fit + components
if model.GP and meta.isplots_S5 >= 1:
plots.plot_GP_components(lc, model, meta, fitter='dynesty')
# Plot Allan plot
if meta.isplots_S5 >= 3:
plots.plot_rms(lc, model, meta, fitter='dynesty')
# Plot residuals distribution
if meta.isplots_S5 >= 3:
plots.plot_res_distr(lc, model, meta, fitter='dynesty')
# plot using corner.py
if meta.isplots_S5 >= 5:
plots.plot_corner(samples, lc, meta, freenames, fitter='dynesty')
# Make a new model instance
best_model = copy.deepcopy(model)
best_model.components[0].update(fit_params, freenames)
best_model.__setattr__('chi2red', chi2red)
best_model.__setattr__('fit_params', fit_params)
return best_model
[docs]def lmfitter(lc, model, meta, log, **kwargs):
"""Perform a fit using lmfit.
Parameters
----------
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object.
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit.
meta : eureka.lib.readECF.MetaClass
The metadata object.
log : logedit.Logedit
The open log in which notes from this step can be added.
**kwargs : dict
Arbitrary keyword arguments.
Returns
-------
best_model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model after fitting
Notes
-----
History:
- December 29, 2021 Taylor Bell
Updated documentation. Reduced repeated code.
- February 28-March 1, 2022 Caroline Piaulet
Adding scatter_ppm parameter.
- Mar 13-Apr 18, 2022 Caroline Piaulet
Record an astropy table for parameter values
"""
# TODO: Do something so that duplicate param names can all be handled
# (e.g. two Polynomail models with c0). Perhaps append something to the
# parameter name like c0_1 and c0_2?)
# Group the different variable types
param_list, freenames, indep_vars = group_variables_lmfit(model)
# Add the time as an independent variable
indep_vars['time'] = lc.time
# Initialize lmfit Params object
initialParams = lmfit.Parameters()
# Insert parameters
initialParams.add_many(*param_list)
# Create the lmfit lightcurve model
lcmodel = lmfit.Model(model.eval)
lcmodel.independent_vars = indep_vars.keys()
# Fit light curve model to the simulated data
result = lcmodel.fit(lc.flux, weights=1/lc.unc, params=initialParams,
**indep_vars, **kwargs)
# Get the best fit params
fit_params = result.__dict__['params']
# new_params = [(fit_params.get(i).name, fit_params.get(i).value,
# fit_params.get(i).vary, fit_params.get(i).min,
# fit_params.get(i).max) for i in fit_params]
# Create table of results
t_results = table.Table([freenames, fit_params],
names=("Parameter", "Mean"))
model.update(fit_params, freenames)
if "scatter_ppm" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:11] == "scatter_ppm"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
fit_params[ind[chan]] * 1e-6
elif "scatter_mult" in freenames:
ind = [i for i in np.arange(len(freenames))
if freenames[i][0:12] == "scatter_mult"]
for chan in range(len(ind)):
lc.unc_fit[chan*lc.time.size:(chan+1)*lc.time.size] = \
(fit_params[ind[chan]] *
lc.unc[chan*lc.time.size:(chan+1)*lc.time.size])
else:
lc.unc_fit = lc.unc
# Save the fit ASAP
save_fit(meta, lc, model, 'lmfitter', t_results, freenames)
# Compute reduced chi-squared
chi2red = computeRedChiSq(lc, log, model, meta, freenames)
# Log results
log.writelog(result.fit_report(), mute=(not meta.verbose))
# Plot fit
if meta.isplots_S5 >= 1:
plots.plot_fit(lc, model, meta, fitter='lmfitter')
# Plot GP fit + components
if model.GP and meta.isplots_S5 >= 1:
plots.plot_GP_components(lc, model, meta, fitter='lmfitter')
# Plot Allan plot
if meta.isplots_S5 >= 3:
plots.plot_rms(lc, model, meta, fitter='lmfitter')
# Plot residuals distribution
if meta.isplots_S5 >= 3:
plots.plot_res_distr(lc, model, meta, fitter='lmfitter')
# Create new model with best fit parameters
best_model = copy.deepcopy(model)
best_model.components[0].update(fit_params, freenames)
best_model.__setattr__('chi2red', chi2red)
best_model.__setattr__('fit_params', fit_params)
return best_model
[docs]def group_variables(model):
"""Group variables into fitted and frozen.
Parameters
----------
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit
Returns
-------
freenames : np.array
The names of fitted variables.
freepars : np.array
The fitted variables.
prior1 : np.array
The lower bound for constrained variables with uniform/log uniform
priors, or mean for constrained variables with Gaussian priors.
prior2 : np.array
The upper bound for constrained variables with uniform/log uniform
priors, or mean for constrained variables with Gaussian priors.
priortype : np.array
Keywords indicating the type of prior for each free parameter.
indep_vars : dict
The frozen variables.
Notes
-----
History:
- December 29, 2021 Taylor Bell
Moved code to separate function to reduce repeated code.
- January 11, 2022 Megan Mansfield
Added ability to have shared parameters
- February 23-25, 2022 Megan Mansfield
Added log-uniform and Gaussian priors.
"""
all_params = []
alreadylist = []
for chan in np.arange(model.components[0].nchan):
temp = model.components[0].longparamlist[chan]
for par in list(model.components[0].parameters.dict.items()):
if par[0] in temp:
if not all_params:
all_params.append(par)
alreadylist.append(par[0])
if par[0] not in alreadylist:
all_params.append(par)
alreadylist.append(par[0])
# Group the different variable types
freenames = []
freepars = []
prior1 = []
prior2 = []
priortype = []
indep_vars = {}
for ii, item in enumerate(all_params):
name, param = item
# param = list(param)
if ((param[1] == 'free') or (param[1] == 'shared')
or ('white' in param[1])):
freenames.append(name)
freepars.append(param[0])
if len(param) == 5: # If prior is specified.
prior1.append(param[2])
prior2.append(param[3])
priortype.append(param[4])
elif (len(param) > 3) & (len(param) < 5):
# If prior bounds are specified but not the prior type
raise IndexError("If you want to specify prior parameters, you"
" must also specify the prior type: 'U', 'LU'"
", or 'N'.")
else:
# If no prior is specified,
# assume uniform prior with infinite bounds.
prior1.append(-np.inf)
prior2.append(np.inf)
priortype.append('U')
elif param[1] == 'independent':
indep_vars[name] = param[0]
freenames = np.array(freenames)
freepars = np.array(freepars)
prior1 = np.array(prior1)
prior2 = np.array(prior2)
priortype = np.array(priortype)
return freenames, freepars, prior1, prior2, priortype, indep_vars
[docs]def group_variables_lmfit(model):
"""Group variables into fitted and frozen for lmfit fitter.
Parameters
----------
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit
Returns
-------
paramlist : list
The fitted variables.
freenames : np.array
The names of fitted variables.
indep_vars : dict
The frozen variables.
Notes
-----
History:
- December 29, 2021 Taylor Bell
Moved code to separate function to look similar to other fitters.
"""
all_params = [i for j in [model.components[n].parameters.dict.items()
for n in range(len(model.components))] for i in j]
# Group the different variable types
param_list = []
freenames = []
indep_vars = {}
for param in all_params:
param = list(param)
if param[1][1] == 'free':
freenames.append(param[0])
param[1][1] = True
param_list.append(tuple(param))
elif param[1][1] == 'fixed':
param[1][1] = False
param_list.append(tuple(param))
else:
indep_vars[param[0]] = param[1]
freenames = np.array(freenames)
return param_list, freenames, indep_vars
[docs]def load_old_fitparams(meta, log, channel, freenames):
"""Load in the best-fit values from a previous fit.
Parameters
----------
meta : eureka.lib.readECF.MetaClass
The metadata object.
log : logedit.Logedit
The open log in which notes from this step can be added.
channel : int
Unused. The current channel.
freenames : list
The names of the fitted parameters.
Returns
-------
fitted_values : np.array
The best-fit values from a previous fit
Raises
------
AssertionError
The old fit is incompatible with the current fit.
"""
fname = os.path.join(meta.topdir, *meta.old_fitparams.split(os.sep))
fitted_values = pd.read_csv(fname, escapechar='#', skipinitialspace=True)
full_keys = np.array(fitted_values.keys())
# Remove the " " from the start of the first key
full_keys[0] = full_keys[0][1:]
if np.all(full_keys != freenames):
log.writelog('Old fit does not have the same fitted parameters and '
'cannot be used to initialize the new fit.\n'
'The old fit included:\n['+','.join(full_keys)+']\n'
'The new fit included:\n['+','.join(freenames)+']',
mute=True)
raise AssertionError('Old fit does not have the same fitted parameters'
' and cannot be used to initialize the new fit.\n'
'The old fit included:\n['+','.join(full_keys) +
']\nThe new fit included:\n['+','.join(freenames)
+ ']')
return np.array(fitted_values)[0]
[docs]def save_fit(meta, lc, model, fitter, results_table, freenames, samples=[]):
"""Save a fit as a txt file as well as the entire chain if provided.
Parameters
----------
meta : eureka.lib.readECF.MetaClass
The metadata object.
lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve
The lightcurve data object.
model : eureka.S5_lightcurve_fitting.models.CompositeModel
The composite model to fit.
fitter : str
The current fitter being used.
fit_params : np.array
The best-fit values from the current fit.
freenames : list
The list of fitted parameter names.
samples : ndarray; optional
The full chain from a sampling method, by default [].
Notes
-----
History:
- Mar 13-Apr 18, 2022 Caroline Piaulet
Record an astropy table for mean, median, percentiles,
+/- 1 sigma, all params
"""
if lc.white:
channel_tag = '_white'
elif lc.share:
channel_tag = '_shared'
else:
ch_number = str(lc.channel).zfill(len(str(lc.nchannel)))
channel_tag = f'_ch{ch_number}'
# Save the fitted parameters and their uncertainties (if possible)
fname = f'S5_{fitter}_fitparams{channel_tag}'
results_table.write(meta.outputdir+fname+'.csv', format='csv',
overwrite=False)
# Save the chain from the sampler (if a chain was provided)
if len(samples) != 0:
fname = f'S5_{fitter}_samples{channel_tag}'
with h5py.File(meta.outputdir+fname+'.h5', 'w') as hf:
hf.create_dataset("samples", data=samples)
# Save the S5 outputs in a human readable ecsv file
event_ap_bg = meta.eventlabel+"_ap"+str(meta.spec_hw)+'_bg'+str(meta.bg_hw)
meta.tab_filename_s5 = (meta.outputdir+'S5_'+event_ap_bg+"_Table_Save" +
channel_tag+'.txt')
wavelengths = np.mean(np.append(meta.wave_low.reshape(1, -1),
meta.wave_hi.reshape(1, -1), axis=0),
axis=0)
wave_errs = (meta.wave_hi-meta.wave_low)/2
model_lc = model.eval()
residuals = lc.flux-model_lc
astropytable.savetable_S5(meta.tab_filename_s5, meta.time,
wavelengths[lc.fitted_channels],
wave_errs[lc.fitted_channels],
lc.flux, lc.unc_fit, model_lc,
residuals)
return