Source code for eureka.S5_lightcurve_fitting.fitters

import numpy as np
import pandas as pd
import copy
from io import StringIO
import os
import sys
import h5py
import xarray as xr
from astraeus import xarrayIO as xrio
import time as time_pkg

from scipy.optimize import minimize
import lmfit
import emcee

from dynesty import NestedSampler
from dynesty.utils import resample_equal

from .likelihood import (computeRedChiSq, lnprob, ln_like, ptform,
                         update_uncertainty)
from . import plots_s5 as plots
from ..lib import astropytable
from ..lib.split_channels import get_trim

from multiprocessing import Pool

from astropy import table


[docs] def lsqfitter(lc, model, meta, log, calling_function='lsq', **kwargs): """Perform least-squares fit. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. calling_function : str The fitter that is being run (e.g. may be 'emcee' if running lsqfitter to initialize emcee walkers). Defailts to 'lsq'. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting Notes ----- History: - December 29-30, 2021 Taylor Bell Updated documentation and arguments. Reduced repeated code. Also saving covariance matrix for later estimation of sampler step size. - January 7-22, 2022 Megan Mansfield Adding ability to do a single shared fit across all channels - February 28-March 1, 2022 Caroline Piaulet Adding scatter_ppm parameter - Mar 13-Apr 18, 2022 Caroline Piaulet Record an astropy table for param values """ # Group the different variable types freenames, freepars, prior1, prior2, priortype, indep_vars = \ group_variables(model) if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None: freepars = load_old_fitparams(meta, log, lc.channel, freenames) start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=calling_function+'StartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter=calling_function+'StartingPoint') def neg_lnprob(theta, lc, model, prior1, prior2, priortype, freenames): return -lnprob(theta, lc, model, prior1, prior2, priortype, freenames) global lsq_t0 lsq_t0 = time_pkg.time() def callback_full(theta, lc, model, prior1, prior2, priortype, freenames): global lsq_t0 if (time_pkg.time()-lsq_t0) > 0.5: lsq_t0 = time_pkg.time() print('Current lnprob = ', lnprob(theta, lc, model, prior1, prior2, priortype, freenames), end='\r') def callback(theta): return callback_full(theta, lc, model, prior1, prior2, priortype, freenames) if not hasattr(meta, 'lsq_method'): log.writelog('No lsq optimization method specified - using Nelder-Mead' ' by default.') meta.lsq_method = 'Nelder-Mead' if not hasattr(meta, 'lsq_tol'): log.writelog('No lsq tolerance specified - using 1e-6 by default.') meta.lsq_tol = 1e-6 if not hasattr(meta, 'lsq_maxiter'): meta.lsq_maxiter = None results = minimize(neg_lnprob, freepars, args=(lc, model, prior1, prior2, priortype, freenames), method=meta.lsq_method, tol=meta.lsq_tol, options={'maxiter': meta.lsq_maxiter}, callback=callback) log.writelog("\nVerbose lsq results: {}\n".format(results), mute=(not meta.verbose)) if not meta.verbose: log.writelog("Success?: {}".format(results.success)) log.writelog(results.message) # Get the best fit params fit_params = results.x # Create table of results t_results = table.Table([freenames, fit_params], names=("Parameter", "Mean")) model.update(fit_params) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames) # Save the fit ASAP save_fit(meta, lc, model, calling_function, t_results, freenames) end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog('\nLSQ RESULTS:') for i in range(len(freenames)): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]}; {scatter_ppm} ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]}') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=calling_function) # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter=calling_function) # Zoom in on phase variations if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs: plots.plot_phase_variations(lc, model, meta, fitter=calling_function) # Plot Allan plot if meta.isplots_S5 >= 3 and calling_function == 'lsq': # This plot is only really useful if you're actually using the # lsq fitter, otherwise don't make it plots.plot_rms(lc, model, meta, fitter=calling_function) # Plot residuals distribution if meta.isplots_S5 >= 3 and calling_function == 'lsq': plots.plot_res_distr(lc, model, meta, fitter=calling_function) # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) # Save the covariance matrix in case it's needed to estimate step size # for a sampler # FINDME: # Commented out for now because op.least_squares() doesn't provide # covariance matrix # Need to compute using Jacobian matrix instead (hess_inv = (J.T J)^{-1}) # model_lc = model.eval() # if results[1] is not None: # residuals = (lc.flux - model_lc) # cov_mat = results[1]*np.var(residuals) # else: # # Sometimes lsq will fail to converge and will return a None # # covariance matrix # cov_mat = None cov_mat = None best_model.__setattr__('cov_mat', cov_mat) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def demcfitter(lc, model, meta, log, **kwargs): """Perform sampling using Differential Evolution Markov Chain. This is an empty placeholder function to be filled later. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting Notes ----- History: - December 29, 2021 Taylor Bell Updated documentation and arguments """ best_model = None return best_model
[docs] def emceefitter(lc, model, meta, log, **kwargs): """Perform sampling using emcee. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit meta : eureka.lib.readECF.MetaClass The metadata object log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting Notes ----- History: - December 29, 2021 Taylor Bell Updated documentation. Reduced repeated code. - January 7-22, 2022 Megan Mansfield Adding ability to do a single shared fit across all channels - February 23-25, 2022 Megan Mansfield Added log-uniform and Gaussian priors. - February 28-March 1, 2022 Caroline Piaulet Adding scatter_ppm parameter. Added statements to avoid some initial state issues. - Mar 13-Apr 18, 2022 Caroline Piaulet Record an astropy table for mean, median, percentiles, +/- 1 sigma, all params """ # Group the different variable types freenames, freepars, prior1, prior2, priortype, indep_vars = \ group_variables(model) if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None: freepars = load_old_fitparams(meta, log, lc.channel, freenames) ndim = len(freenames) if hasattr(meta, 'old_chain') and meta.old_chain is not None: pos, nwalkers = start_from_oldchain_emcee(lc, meta, log, ndim, freenames) else: if not hasattr(meta, 'lsq_first') or meta.lsq_first: # Only call lsq fitter first if asked or lsq_first option wasn't # passed (allowing backwards compatibility) log.writelog('\nCalling lsqfitter first...') # RUN LEAST SQUARES lsq_sol = lsqfitter(lc, model, meta, log, calling_function='emcee_lsq', **kwargs) freepars = lsq_sol.fit_params # SCALE UNCERTAINTIES WITH REDUCED CHI2 if meta.rescale_err: lc.unc *= np.sqrt(lsq_sol.chi2red) else: lsq_sol = None pos, nwalkers = initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1, prior2, priortype) start_lnprob = lnprob(np.median(pos, axis=0), lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='emceeStartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter='emceeStartingPoint') # Initialize tread pool if hasattr(meta, 'ncpu') and meta.ncpu > 1: pool = Pool(meta.ncpu) else: meta.ncpu = 1 pool = None # Run emcee burn-in sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(lc, model, prior1, prior2, priortype, freenames), pool=pool) log.writelog('Running emcee burn-in...') sampler.run_mcmc(pos, meta.run_nsteps, progress=True) # state = sampler.run_mcmc(pos, meta.run_nsteps, progress=True) # # Log some details about the burn-in phase # acceptance_fraction = np.mean(sampler.acceptance_fraction) # log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}", # mute=(not meta.verbose)) # try: # autocorr = sampler.get_autocorr_time() # log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps", # mute=(not meta.verbose)) # except: # log.writelog("Error: Unable to estimate the autocorrelation time!", # mute=(not meta.verbose)) # mid_lnprob = lnprob(np.median(sampler.get_chain()[-1], axis=0), lc, # model, prior1, prior2, priortype, freenames) # log.writelog(f'Intermediate lnprob: {mid_lnprob}', # mute=(not meta.verbose)) # if meta.isplots_S5 >= 3: # plots.plot_chain(sampler.get_chain(), lc, meta, freenames, # fitter='emcee', burnin=True) # # Reset the sampler and do the production run # log.writelog('Running emcee production run...') # sampler.reset() # sampler.run_mcmc(state, meta.run_nsteps-meta.run_nburn, progress=True) # samples = sampler.get_chain(flat=True) samples = sampler.get_chain(flat=True, discard=meta.run_nburn) if meta.ncpu > 1: # Close the thread pool pool.close() pool.join() # Record median + percentiles q = np.percentile(samples, [16, 50, 84], axis=0) fit_params = q[1] # median mean_params = np.mean(samples, axis=0) errs = np.std(samples, axis=0) # Create table of results t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1], q[0], fit_params, q[2]], names=("Parameter", "Mean", "-1sigma", "+1sigma", "16th", "50th", "84th")) upper_errs = q[2]-q[1] lower_errs = q[1]-q[0] model.update(fit_params) model.errs = dict(zip(freenames, errs)) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames) # Save the fit ASAP so plotting errors don't make you lose everything save_fit(meta, lc, model, 'emcee', t_results, freenames, samples) end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) acceptance_fraction = np.mean(sampler.acceptance_fraction) log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}", mute=(not meta.verbose)) try: autocorr = np.mean(sampler.get_autocorr_time()) log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps", mute=(not meta.verbose)) except: # FINDME: Need to only catch the expected exception log.writelog("WARNING: Unable to estimate the autocorrelation time!", mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog('\nEMCEE RESULTS:') for i in range(ndim): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc scatter_ppm_upper = 1e6*upper_errs[i]*unc scatter_ppm_lower = 1e6*lower_errs[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]}); {scatter_ppm} ' f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]})') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='emcee') # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter='emcee') # Zoom in on phase variations if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs: plots.plot_phase_variations(lc, model, meta, fitter='emcee') # Plot Allan plot if meta.isplots_S5 >= 3: plots.plot_rms(lc, model, meta, fitter='emcee') # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter='emcee') # Plot chain evolution if meta.isplots_S5 >= 3: plots.plot_chain(sampler.get_chain(), lc, meta, freenames, fitter='emcee', burnin=True, nburn=meta.run_nburn) plots.plot_chain(sampler.get_chain(discard=meta.run_nburn), lc, meta, freenames, fitter='emcee', burnin=False) if meta.isplots_S5 >= 5: plots.plot_corner(samples, lc, meta, freenames, fitter='emcee') # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def start_from_oldchain_emcee(lc, meta, log, ndim, freenames): """Restart emcee using the ending point of an old chain. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. meta : eureka.lib.readECF.MetaClass The meta data object. log : logedit.Logedit The open log in which notes from this step can be added. ndim : int The number of fitted parameters. freenames : list The names of the fitted parameters. Returns ------- pos : ndarray The starting positions for all walkers. nwalkers : int The number of walkers (may differ from the requested number if unable to get all walkers within the priors). Raises ------ AssertionError The old chain is not compatible with the current fit. AssertionError Unable to get enough walkers within the prior range. """ if meta.sharedp: channel_key = 'shared' else: ch_number = str(lc.channel).zfill(len(str(lc.nchannel))) channel_key = f'ch{ch_number}' foldername = os.path.join(meta.topdir, *meta.old_chain.split(os.sep)) fname = f'S5_emcee_fitparams_{channel_key}.csv' fitted_values = pd.read_csv(os.path.join(foldername, fname), escapechar='#', skipinitialspace=True) full_keys = np.array(fitted_values['Parameter']) if np.all(full_keys != freenames): message = ('Old chain does not have the same fitted parameters and ' 'cannot be used to initialize the new fit.\n' 'The old chain included:\n['+','.join(full_keys)+']\n' 'The new chain included:\n['+','.join(freenames)+']') log.writelog(message, mute=True) raise AssertionError(message) fname = f'S5_emcee_samples_{channel_key}' # Load HDF5 files full_fname = os.path.join(foldername, fname)+'.h5' ds = xrio.readXR(full_fname, verbose=False) if ds is None: # Working with an old save file with h5py.File(full_fname, 'r') as hf: samples = hf['samples'][:] else: samples = ds.to_array().T.values log.writelog(f'Old chain path: {full_fname}') # Initialize the walkers using samples from the old chain nwalkers = meta.run_nwalkers pos = samples[-nwalkers:] walkers_used = nwalkers # Make sure that no walkers are starting in the same place as # they would then exactly follow each other repeat_pos = np.where([np.any(np.all(pos[i] == np.delete(pos, i, axis=0), axis=1)) for i in range(pos.shape[0])])[0] while (len(repeat_pos) > 0 and samples.shape[0] > (walkers_used+len(repeat_pos))): pos[repeat_pos] = samples[:-walkers_used][-len(repeat_pos):] walkers_used += len(repeat_pos) repeat_pos = np.where([np.any(np.all(pos[i] == np.delete(pos, i, axis=0), axis=1)) for i in range(pos.shape[0])])[0] # If unable to initialize all walkers in unique starting locations, # use fewer walkers unless there'd be fewer walkers than dimensions if len(repeat_pos) > 0 and (nwalkers-len(repeat_pos) > ndim): pos = np.delete(pos, repeat_pos, axis=0) nwalkers = pos.shape[0] log.writelog(f'Warning: Unable to initialize all walkers at different ' f'positions using old chain!\nUsing {nwalkers} walkers ' f'instead of the initially requested {meta.run_nwalkers} ' f'walkers') elif len(repeat_pos) > 0: message = (f'Error: Unable to initialize all walkers at different ' f'positions using old chain!\nUsing {nwalkers} walkers ' f'instead of the initially requested {meta.run_nwalkers} ' f'walkers is not permitted as there are {ndim} fitted ' f'parameters') log.writelog(message, mute=True) raise AssertionError(message) return pos, nwalkers
[docs] def initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1, prior2, priortype): """Initialize emcee walker starting positions Parameters ---------- meta : eureka.lib.readECF.MetaClass The meta data object. log : logedit.Logedit The open log in which notes from this step can be added. ndim : int The number of fitted parameters. lsq_sol : The results from the lsqfitter. The results from the lsqfitter. freepars : list The names of the fitted parameters. prior1 : list The list of prior1 values. prior2 : list The list of prior2 values. priortype : list The types of each prior (to determine meaning of prior1 and prior2). Returns ------- pos : ndarray The starting position of all walkers. nwalkers : int The number of walkers (may differ from the requested number if unable to get all walkers within the priors). Raises ------ AssertionError Failed to initialize any walkers within the priors AssertionError Failed to initialize enough walkers within the priors """ u = np.where(priortype == 'U')[0] lu = np.where(priortype == 'LU')[0] n = np.where(priortype == 'N')[0] if lsq_sol is not None and lsq_sol.cov_mat is not None: step_size = np.diag(lsq_sol.cov_mat) if np.any(step_size == 0.): ind_zero_u = np.where(step_size[u] == 0.)[0] ind_zero_lu = np.where(step_size[lu] == 0.)[0] ind_zero_n = np.where(step_size[n] == 0.)[0] step_size[u][ind_zero_u] = (0.001*(prior2[u][ind_zero_u] - prior1[u][ind_zero_u])) step_size[lu][ind_zero_lu] = (0.001 * (np.exp(prior2[lu][ind_zero_lu]) - np.exp(prior1[lu][ind_zero_lu]))) step_size[n][ind_zero_n] = 0.1*prior2[n][ind_zero_n] else: # Sometimes the lsq fitter won't converge and will give None as # the covariance matrix. In that case, we need to establish the # step size in another way. Using a fractional step compared to # the prior range can work best for precisely known values like # t0 and period log.writelog('No covariance matrix from LSQ - falling back on a step ' 'size based on the prior range') step_size = np.ones(ndim) step_size[u] = 0.001*(prior2[u] - prior1[u]) step_size[lu] = 0.001*(np.exp(prior2[lu]) - np.exp(prior1[lu])) step_size[n] = 0.1*prior2[n] nwalkers = meta.run_nwalkers # make it robust to lsq hitting the upper or lower bound of the param space ind_max = np.where(freepars[u] - prior2[u] == 0.)[0] ind_min = np.where(freepars[u] - prior1[u] == 0.)[0] ind_max_LU = np.where(np.log(freepars[lu]) - prior2[lu] == 0.)[0] ind_min_LU = np.where(np.log(freepars[lu]) - prior1[lu] == 0.)[0] pmid = (prior2+prior1)/2. if len(ind_max) > 0 or len(ind_max_LU) > 0: log.writelog('Warning: >=1 params hit the upper bound in the lsq fit. ' 'Setting to the middle of the interval.') freepars[u][ind_max] = pmid[u][ind_max] freepars[lu][ind_max_LU] = (np.exp(prior2[lu][ind_max_LU]) + np.exp(prior1[lu][ind_max_LU]))/2. if len(ind_min) > 0 or len(ind_min_LU) > 0: log.writelog('Warning: >=1 params hit the lower bound in the lsq fit. ' 'Setting to the middle of the interval.') freepars[u][ind_min] = pmid[u][ind_min] freepars[lu][ind_min_LU] = (np.exp(prior2[lu][ind_min_LU]) + np.exp(prior1[lu][ind_min_LU]))/2. # Generate the walker positions pos = np.array([freepars + step_size*np.random.randn(ndim) for i in range(nwalkers)]) # Make sure the walker positions obey the priors in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all()) for ii in pos[:, u]]) in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and (np.log(ii) <= prior2[lu]).all()) for ii in pos[:, lu]]) if not np.all(in_range) or not np.all(in_range2): log.writelog('Not all walkers were initialized within the priors, ' 'using a smaller proposal distribution') pos = pos[in_range] # Make sure the step size is well within the limits uniform_step = np.abs(np.append((prior2-freepars).reshape(-1, 1)/10, (freepars-prior1).reshape(-1, 1)/10, axis=1)) step_size_options = np.append(step_size.reshape(-1, 1), uniform_step, axis=1) if len(lu) != 0: step_size_options[lu, 1] = np.abs((np.exp(prior2[lu]) - freepars[lu]).reshape(-1, 1)/10) step_size_options[lu, 2] = np.abs((np.exp(prior1[lu]) - freepars[lu]).reshape(-1, 1)/10) if len(n) != 0: step_size_options[n, 1:] = step_size_options[n, 0].reshape(-1, 1) step_size = np.min(step_size_options, axis=1) if pos.shape[0] == 0: remove_zeroth = True new_nwalkers = nwalkers-len(pos) pos = np.zeros((1, ndim)) else: remove_zeroth = False new_nwalkers = nwalkers-len(pos) pos = np.append(pos, np.array([freepars + step_size*np.random.randn(ndim) for i in range(new_nwalkers) ]).reshape(-1, ndim), axis=0) if remove_zeroth: pos = pos[1:] in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all()) for ii in pos[:, u]]) in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and (np.log(ii) <= prior2[lu]).all()) for ii in pos[:, lu]]) if not np.any(in_range) and not np.any(in_range2): raise AssertionError('Failed to initialize any walkers within the set ' 'bounds for all parameters!\n' 'Check your stating position, decrease your step ' 'size, or increase the bounds on your parameters') elif not np.all(in_range) or not np.all(in_range2): old_nwalkers = nwalkers pos = pos[in_range+in_range2] nwalkers = pos.shape[0] if nwalkers > ndim: log.writelog(f'Warning: Failed to initialize all walkers within ' f'the set bounds for all parameters!\nUsing ' f'{nwalkers} walkers instead of the initially ' f'requested {old_nwalkers} walkers') else: message = (f'Error: Failed to initialize all walkers within the ' f'set bounds for all parameters!\nUsing ' f'{nwalkers} walkers instead of the initially requested' f' {old_nwalkers} walkers is not permitted as there are' f' {ndim} fitted parameters') log.writelog(message, mute=True) raise AssertionError(message) return pos, nwalkers
[docs] def dynestyfitter(lc, model, meta, log, **kwargs): """Perform sampling using dynesty. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting Notes ----- History: - December 29, 2021 Taylor Bell Updated documentation. Reduced repeated code. - January 7-22, 2022 Megan Mansfield Adding ability to do a single shared fit across all channels - February 23-25, 2022 Megan Mansfield Added log-uniform and Gaussian priors. - February 28-March 1, 2022 Caroline Piaulet Adding scatter_ppm parameter. - Mar 13-Apr 18, 2022 Caroline Piaulet Record an astropy table for mean, median, percentiles, +/- 1 sigma, all params """ # Group the different variable types freenames, freepars, prior1, prior2, priortype, indep_vars = \ group_variables(model) if hasattr(meta, 'old_fitparams') and meta.old_fitparams is not None: freepars = load_old_fitparams(meta, log, lc.channel, freenames) # DYNESTY nlive = meta.run_nlive # number of live points bound = meta.run_bound # use MutliNest algorithm for bounds ndims = len(freepars) # two parameters sample = meta.run_sample # uniform sampling tol = meta.run_tol # the stopping criterion start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='dynestyStartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter='dynestyStartingPoint') # START DYNESTY l_args = [lc, model, freenames] log.writelog('Running dynesty...') min_nlive = int(np.ceil(ndims*(ndims+1)//2)) if nlive == 'min': nlive = min_nlive elif nlive < min_nlive: log.writelog(f'**** WARNING: You should set run_nlive to at least ' f'{min_nlive} ****') if hasattr(meta, 'ncpu') and meta.ncpu > 1: pool = Pool(meta.ncpu) queue_size = meta.ncpu else: meta.ncpu = 1 pool = None queue_size = None sampler = NestedSampler(ln_like, ptform, ndims, pool=pool, queue_size=queue_size, bound=bound, sample=sample, nlive=nlive, logl_args=l_args, ptform_args=[prior1, prior2, priortype]) sampler.run_nested(dlogz=tol, print_progress=True) # output progress bar res = sampler.results # get results dictionary from sampler if meta.ncpu > 1: pool.close() pool.join() log.writelog('', mute=(not meta.verbose)) # Need to temporarily redirect output since res.summar() prints rather # than returns a string old_stdout = sys.stdout sys.stdout = mystdout = StringIO() res.summary() sys.stdout = old_stdout log.writelog(mystdout.getvalue(), mute=(not meta.verbose)) # get function that resamples from the nested samples to give sampler # with equal weight # draw posterior samples weights = np.exp(res['logwt'] - res['logz'][-1]) samples = resample_equal(res.samples, weights) log.writelog('Number of posterior samples is {}'.format(len(samples)), mute=(not meta.verbose)) # Record median + percentiles q = np.percentile(samples, [16, 50, 84], axis=0) fit_params = q[1] # median mean_params = np.mean(samples, axis=0) errs = np.std(samples, axis=0) # Create table of results t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1], q[0], fit_params, q[2]], names=("Parameter", "Mean", "-1sigma", "+1sigma", "16th", "50th", "84th")) upper_errs = q[2]-q[1] lower_errs = q[1]-q[0] model.update(fit_params) model.errs = dict(zip(freenames, errs)) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames) # Save the fit ASAP so plotting errors don't make you lose everything save_fit(meta, lc, model, 'dynesty', t_results, freenames, samples) end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog('\nDYNESTY RESULTS:') for i in range(ndims): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc scatter_ppm_upper = 1e6*upper_errs[i]*unc scatter_ppm_lower = 1e6*lower_errs[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]}); {scatter_ppm} ' f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]})') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='dynesty') # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter='dynesty') # Zoom in on phase variations if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs: plots.plot_phase_variations(lc, model, meta, fitter='dynesty') # Plot Allan plot if meta.isplots_S5 >= 3: plots.plot_rms(lc, model, meta, fitter='dynesty') # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter='dynesty') # plot using corner.py if meta.isplots_S5 >= 5: plots.plot_corner(samples, lc, meta, freenames, fitter='dynesty') # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def lmfitter(lc, model, meta, log, **kwargs): """Perform a fit using lmfit. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting Notes ----- History: - December 29, 2021 Taylor Bell Updated documentation. Reduced repeated code. - February 28-March 1, 2022 Caroline Piaulet Adding scatter_ppm parameter. - Mar 13-Apr 18, 2022 Caroline Piaulet Record an astropy table for parameter values """ # TODO: Do something so that duplicate param names can all be handled # (e.g. two Polynomail models with c0). Perhaps append something to the # parameter name like c0_1 and c0_2?) # Group the different variable types param_list, freenames, indep_vars = group_variables_lmfit(model) # Add the time as an independent variable indep_vars['time'] = lc.time # Initialize lmfit Params object initialParams = lmfit.Parameters() # Insert parameters initialParams.add_many(*param_list) # Create the lmfit lightcurve model lcmodel = lmfit.Model(model.eval) lcmodel.independent_vars = indep_vars.keys() # Fit light curve model to the simulated data result = lcmodel.fit(lc.flux, weights=1/lc.unc, params=initialParams, **indep_vars, **kwargs) # Get the best fit params fit_params = result.__dict__['params'] # new_params = [(fit_params.get(i).name, fit_params.get(i).value, # fit_params.get(i).vary, fit_params.get(i).min, # fit_params.get(i).max) for i in fit_params] # Create table of results t_results = table.Table([freenames, fit_params], names=("Parameter", "Mean")) model.update(fit_params) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames) # Save the fit ASAP save_fit(meta, lc, model, 'lmfitter', t_results, freenames) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) # Log results log.writelog(result.fit_report(), mute=(not meta.verbose)) # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='lmfitter') # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter='lmfitter') # Zoom in on phase variations if meta.isplots_S5 >= 1 and 'sinusoid_pc' in meta.run_myfuncs: plots.plot_phase_variations(lc, model, meta, fitter='lmfitter') # Plot Allan plot if meta.isplots_S5 >= 3: plots.plot_rms(lc, model, meta, fitter='lmfitter') # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter='lmfitter') # Create new model with best fit parameters best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def group_variables(model): """Group variables into fitted and frozen. Parameters ---------- model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit Returns ------- freenames : np.array The names of fitted variables. freepars : np.array The fitted variables. prior1 : np.array The lower bound for constrained variables with uniform/log uniform priors, or mean for constrained variables with Gaussian priors. prior2 : np.array The upper bound for constrained variables with uniform/log uniform priors, or mean for constrained variables with Gaussian priors. priortype : np.array Keywords indicating the type of prior for each free parameter. indep_vars : dict The frozen variables. Notes ----- History: - December 29, 2021 Taylor Bell Moved code to separate function to reduce repeated code. - January 11, 2022 Megan Mansfield Added ability to have shared parameters - February 23-25, 2022 Megan Mansfield Added log-uniform and Gaussian priors. """ all_params = [] alreadylist = [] for c in range(model.components[0].nchannel_fitted): temp = model.components[0].longparamlist[c] for par in list(model.components[0].parameters.dict.items()): if par[0] in temp: if not all_params: all_params.append(par) alreadylist.append(par[0]) if par[0] not in alreadylist: all_params.append(par) alreadylist.append(par[0]) # Group the different variable types freenames = [] freepars = [] prior1 = [] prior2 = [] priortype = [] indep_vars = {} for ii, item in enumerate(all_params): name, param = item # param = list(param) if ((param[1] == 'free') or (param[1] == 'shared') or ('white' in param[1])): freenames.append(name) freepars.append(param[0]) if len(param) == 5: # If prior is specified. prior1.append(param[2]) prior2.append(param[3]) priortype.append(param[4]) elif (len(param) > 3) & (len(param) < 5): # If prior bounds are specified but not the prior type raise IndexError("If you want to specify prior parameters, you" " must also specify the prior type: 'U', 'LU'" ", or 'N'.") else: # If no prior is specified, # assume uniform prior with infinite bounds. prior1.append(-np.inf) prior2.append(np.inf) priortype.append('U') elif param[1] == 'independent': indep_vars[name] = param[0] freenames = np.array(freenames) freepars = np.array(freepars) prior1 = np.array(prior1) prior2 = np.array(prior2) priortype = np.array(priortype) model.freenames = freenames return freenames, freepars, prior1, prior2, priortype, indep_vars
[docs] def group_variables_lmfit(model): """Group variables into fitted and frozen for lmfit fitter. Parameters ---------- model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit Returns ------- paramlist : list The fitted variables. freenames : np.array The names of fitted variables. indep_vars : dict The frozen variables. Notes ----- History: - December 29, 2021 Taylor Bell Moved code to separate function to look similar to other fitters. """ all_params = [i for j in [model.components[n].parameters.dict.items() for n in range(len(model.components))] for i in j] # Group the different variable types param_list = [] freenames = [] indep_vars = {} for param in all_params: param = list(param) if param[1][1] == 'free': freenames.append(param[0]) param[1][1] = True param_list.append(tuple(param)) elif param[1][1] == 'fixed': param[1][1] = False param_list.append(tuple(param)) else: indep_vars[param[0]] = param[1] freenames = np.array(freenames) return param_list, freenames, indep_vars
[docs] def load_old_fitparams(meta, log, channel, freenames): """Load in the best-fit values from a previous fit. Parameters ---------- meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. channel : int Unused. The current channel. freenames : list The names of the fitted parameters. Returns ------- fitted_values : np.array The best-fit values from a previous fit Raises ------ AssertionError The old fit is incompatible with the current fit. """ fname = os.path.join(meta.topdir, *meta.old_fitparams.split(os.sep)) fitted_values = pd.read_csv(fname, escapechar='#', skipinitialspace=True) full_keys = np.array(fitted_values.keys()) # Remove the " " from the start of the first key full_keys[0] = full_keys[0][1:] if np.all(full_keys != freenames): log.writelog('Old fit does not have the same fitted parameters and ' 'cannot be used to initialize the new fit.\n' 'The old fit included:\n['+','.join(full_keys)+']\n' 'The new fit included:\n['+','.join(freenames)+']', mute=True) raise AssertionError('Old fit does not have the same fitted parameters' ' and cannot be used to initialize the new fit.\n' 'The old fit included:\n['+','.join(full_keys) + ']\nThe new fit included:\n['+','.join(freenames) + ']') return np.array(fitted_values)[0]
[docs] def save_fit(meta, lc, model, fitter, results_table, freenames, samples=[]): """Save a fit as a txt file as well as the entire chain if provided. Parameters ---------- meta : eureka.lib.readECF.MetaClass The metadata object. lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. fitter : str The current fitter being used. fit_params : np.array The best-fit values from the current fit. freenames : list The list of fitted parameter names. samples : ndarray; optional The full chain from a sampling method, by default []. Notes ----- History: - Mar 13-Apr 18, 2022 Caroline Piaulet Record an astropy table for mean, median, percentiles, +/- 1 sigma, all params """ if lc.white: channel_tag = '_white' elif lc.share: channel_tag = '_shared' else: ch_number = str(lc.channel).zfill(len(str(lc.nchannel))) channel_tag = f'_ch{ch_number}' # Save the fitted parameters and their uncertainties (if possible) fname = f'S5_{fitter}_fitparams{channel_tag}' results_table.write(meta.outputdir+fname+'.csv', format='csv', overwrite=False) # Save the chain from the sampler using Astraeus (if a chain was provided) if len(samples) != 0: fname = meta.outputdir+f'S5_{fitter}_samples{channel_tag}.h5' ds = dict([(freenames[i], xr.DataArray(samples[:, i], dims=['sample'], name=freenames[i])) for i in range(len(freenames))]) ds = xrio.makeDataset(ds) xrio.writeXR(fname, ds) # Directory structure should not use expanded HW values spec_hw_val = meta.spec_hw bg_hw_val = meta.bg_hw spec_hw_val //= meta.expand if not isinstance(bg_hw_val, str): # Only divide if value is not a string (spectroscopic modes) bg_hw_val //= meta.expand # Save the S5 outputs in a human readable ecsv file if not isinstance(meta.bg_hw, str): # Only divide if value is not a string (spectroscopic modes) bg_hw = meta.bg_hw//meta.expand else: bg_hw = meta.bg_hw event_ap_bg = meta.eventlabel+"_ap"+str(meta.spec_hw//meta.expand) + \ '_bg'+str(bg_hw) meta.tab_filename_s5 = (meta.outputdir+'S5_'+event_ap_bg+"_Table_Save" + channel_tag+'.txt') wavelengths = np.mean(np.append(meta.wave_low.reshape(1, -1), meta.wave_hi.reshape(1, -1), axis=0), axis=0) wave_errs = (meta.wave_hi-meta.wave_low)/2 # Evaluate each individual model for easier access outside of Eureka! individual_models = [] for comp in model.components: if comp.name != 'GP': individual_models.append([comp.name, comp.eval()]) else: fit = model.eval(incl_GP=False) individual_models.append([comp.name, comp.eval(fit)]) individual_models = np.array(individual_models, dtype=object) model_lc = model.eval() residuals = lc.flux-model_lc astropytable.savetable_S5(meta.tab_filename_s5, meta, lc.time, wavelengths[lc.fitted_channels], wave_errs[lc.fitted_channels], lc.flux, lc.unc_fit, individual_models, model_lc, residuals) return