Source code for eureka.S5_lightcurve_fitting.fitters

import numpy as np
import pandas as pd
import copy
from io import StringIO
import os
import sys
import h5py
import xarray as xr
from astraeus import xarrayIO as xrio
import time as time_pkg

from scipy.optimize import minimize
import lmfit
import emcee

from dynesty import NestedSampler, DynamicNestedSampler
from dynesty.utils import resample_equal

from .likelihood import (computeRedChiSq, lnprob, ln_like, ptform,
                         update_uncertainty)
from . import plots_s5 as plots
from ..lib import astropytable, util
from ..lib.split_channels import get_trim

from multiprocessing import Pool

from astropy import table


[docs] def lsqfitter(lc, model, meta, log, calling_function='lsq', **kwargs): """Perform least-squares fit. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. calling_function : str The fitter that is being run (e.g. may be 'emcee' if running lsqfitter to initialize emcee walkers). Defailts to 'lsq'. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting """ # Group the different variable types freenames = lc.freenames freepars, prior1, prior2, priortype, indep_vars = \ group_variables(model) if meta.old_fitparams is not None: freepars = load_old_fitparams(lc, meta, log, freenames, calling_function) start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=calling_function+'StartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter=calling_function+'StartingPoint') # Plot star spots starting point if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter=calling_function+'StartingPoint') # Plot Harmonica string starting point if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter=calling_function+'StartingPoint') if not np.isfinite(start_lnprob): raise AssertionError( 'The starting lnprob value must be finite. Most likely, one of ' 'your initial parameter values are outside of the bounds of its ' 'prior.') def neg_lnprob(theta, lc, model, prior1, prior2, priortype, freenames): return -lnprob(theta, lc, model, prior1, prior2, priortype, freenames) global lsq_t0 lsq_t0 = time_pkg.time() def callback_full(theta, lc, model, prior1, prior2, priortype, freenames): global lsq_t0 if (time_pkg.time()-lsq_t0) > 0.5: lsq_t0 = time_pkg.time() print('Current lnprob = ', lnprob(theta, lc, model, prior1, prior2, priortype, freenames), end='\r') def callback(theta): return callback_full(theta, lc, model, prior1, prior2, priortype, freenames) results = minimize(neg_lnprob, freepars, args=(lc, model, prior1, prior2, priortype, freenames), method=meta.lsq_method, tol=meta.lsq_tol, options={'maxiter': meta.lsq_maxiter}, callback=callback) log.writelog("\nVerbose lsq results: {}\n".format(results), mute=(not meta.verbose)) if not meta.verbose: log.writelog("Success?: {}".format(results.success)) log.writelog(results.message) # Get the best fit params fit_params = results.x # Create table of results t_results = table.Table([freenames, fit_params], names=("Parameter", "Mean")) model.update(fit_params) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames, lc.nchannel_fitted) # Save the fit ASAP save_fit(meta, lc, model, calling_function, t_results, freenames) end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog('\nLSQ RESULTS:') for i in range(len(freenames)): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]}; {scatter_ppm} ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]}') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=calling_function) # Plot star spots if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter=calling_function) # Plot Harmonica string if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter=calling_function) # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter=calling_function) # Zoom in on phase variations if meta.isplots_S5 >= 1 and ('sinusoid_pc' in meta.run_myfuncs or 'poet_pc' in meta.run_myfuncs or 'quasilambert_pc' in meta.run_myfuncs): plots.plot_phase_variations(lc, model, meta, fitter=calling_function) # Make RMS time-averaging plot if meta.isplots_S5 >= 3 and calling_function == 'lsq' and \ np.size(lc.flux) > 20: # This plot is only really useful if you're actually using the # lsq fitter, otherwise don't make it # Also, mc3.stats.time_avg breaks when testing with a small # number of integrations plots.plot_rms(lc, model, meta, fitter=calling_function) # Plot residuals distribution if meta.isplots_S5 >= 3 and calling_function == 'lsq': plots.plot_res_distr(lc, model, meta, fitter=calling_function) # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) # Save the covariance matrix in case it's needed to estimate step size # for a sampler # FINDME: # Commented out for now because op.least_squares() doesn't provide # covariance matrix # Need to compute using Jacobian matrix instead (hess_inv = (J.T J)^{-1}) # model_lc = model.eval() # if results[1] is not None: # residuals = (lc.flux - model_lc) # cov_mat = results[1]*np.var(residuals) # else: # # Sometimes lsq will fail to converge and will return a None # # covariance matrix # cov_mat = None cov_mat = None best_model.__setattr__('cov_mat', cov_mat) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def demcfitter(lc, model, meta, log, **kwargs): """Perform sampling using Differential Evolution Markov Chain. This is an empty placeholder function to be filled later. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting """ best_model = None return best_model
[docs] def emceefitter(lc, model, meta, log, **kwargs): """Perform sampling using emcee. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit meta : eureka.lib.readECF.MetaClass The metadata object log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting """ # Group the different variable types freenames = lc.freenames freepars, prior1, prior2, priortype, indep_vars = \ group_variables(model) if meta.old_fitparams is not None: freepars = load_old_fitparams(lc, meta, log, freenames, 'emcee') ndim = len(freenames) if meta.old_chain is not None: pos, nwalkers = start_from_oldchain_emcee(lc, meta, log, ndim, freenames, freepars, prior1, prior2, priortype) else: if meta.lsq_first: # Only call lsq fitter first if asked log.writelog('\nCalling lsqfitter first...') # RUN LEAST SQUARES lsq_sol = lsqfitter(lc, model, meta, log, calling_function='emcee_lsq', **kwargs) freepars = lsq_sol.fit_params else: lsq_sol = None pos, nwalkers = initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1, prior2, priortype) start_lnprob = lnprob(np.median(pos, axis=0), lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='emceeStartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter='emceeStartingPoint') # Plot star spots starting point if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter='emceeStartingPoint') # Plot Harmonica string starting point if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter='emceeStartingPoint') # Initialize tread pool if meta.ncpu > 1: pool = Pool(meta.ncpu) else: meta.ncpu = 1 pool = None # Run emcee burn-in sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(lc, model, prior1, prior2, priortype, freenames), pool=pool) log.writelog('Running emcee burn-in and production steps...') sampler.run_mcmc(pos, meta.run_nsteps, progress=True) # log.writelog('Running emcee burn-in...') # state = sampler.run_mcmc(pos, meta.run_nsteps, progress=True) # # Log some details about the burn-in phase # acceptance_fraction = np.mean(sampler.acceptance_fraction) # log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}", # mute=(not meta.verbose)) # try: # autocorr = sampler.get_autocorr_time() # log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps", # mute=(not meta.verbose)) # except: # log.writelog("Error: Unable to estimate the autocorrelation time!", # mute=(not meta.verbose)) # mid_lnprob = lnprob(np.median(sampler.get_chain()[-1], axis=0), lc, # model, prior1, prior2, priortype, freenames) # log.writelog(f'Intermediate lnprob: {mid_lnprob}', # mute=(not meta.verbose)) # if meta.isplots_S5 >= 3: # plots.plot_chain(sampler.get_chain(), lc, meta, freenames, # fitter='emcee', burnin=True) # # Reset the sampler and do the production run # log.writelog('Running emcee production run...') # sampler.reset() # sampler.run_mcmc(state, meta.run_nsteps-meta.run_nburn, progress=True) # samples = sampler.get_chain(flat=True) samples = sampler.get_chain(flat=True, discard=meta.run_nburn) if meta.ncpu > 1: # Close the thread pool pool.close() pool.join() # Record median + percentiles q = np.percentile(samples, [16, 50, 84], axis=0) fit_params = q[1] # median mean_params = np.mean(samples, axis=0) errs = np.std(samples, axis=0) # Create table of results t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1], q[0], fit_params, q[2]], names=("Parameter", "Mean", "-1sigma", "+1sigma", "16th", "50th", "84th")) upper_errs = q[2]-q[1] lower_errs = q[1]-q[0] model.update(fit_params) model.errs = dict(zip(freenames, errs)) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames, lc.nchannel_fitted) # Save the fit ASAP so plotting errors don't make you lose everything save_fit(meta, lc, model, 'emcee', t_results, freenames, samples) end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) acceptance_fraction = np.mean(sampler.acceptance_fraction) log.writelog(f"Mean acceptance fraction: {acceptance_fraction:.3f}", mute=(not meta.verbose)) try: autocorr = np.mean(sampler.get_autocorr_time()) log.writelog(f"Mean autocorrelation time: {autocorr:.3f} steps", mute=(not meta.verbose)) except: # FINDME: Need to only catch the expected exception log.writelog("WARNING: Unable to estimate the autocorrelation time!", mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog('\nEMCEE RESULTS:') for i in range(ndim): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc scatter_ppm_upper = 1e6*upper_errs[i]*unc scatter_ppm_lower = 1e6*lower_errs[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]}); {scatter_ppm} ' f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]})') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='emcee') # Plot star spots if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter='emcee') # Plot Harmonica string if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter='emcee') # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter='emcee') # Zoom in on phase variations if meta.isplots_S5 >= 1 and ('sinusoid_pc' in meta.run_myfuncs or 'poet_pc' in meta.run_myfuncs or 'quasilambert_pc' in meta.run_myfuncs): plots.plot_phase_variations(lc, model, meta, fitter='emcee') # Make RMS time-averaging plot if meta.isplots_S5 >= 3 and np.size(lc.flux) > 20: # mc3.stats.time_avg breaks when testing with a small # number of integrations plots.plot_rms(lc, model, meta, fitter='emcee') # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter='emcee') # Plot chain evolution if meta.isplots_S5 >= 3: plots.plot_chain(sampler.get_chain(), lc, meta, freenames, fitter='emcee', burnin=True, nburn=meta.run_nburn) plots.plot_chain(sampler.get_chain(discard=meta.run_nburn), lc, meta, freenames, fitter='emcee', burnin=False) if meta.isplots_S5 >= 5: plots.plot_corner(samples, lc, meta, freenames, fitter='emcee') # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def start_from_oldchain_emcee(lc, meta, log, ndim, freenames, freepars, prior1, prior2, priortype): """Restart emcee using the ending point of an old chain. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. meta : eureka.lib.readECF.MetaClass The meta data object. log : logedit.Logedit The open log in which notes from this step can be added. ndim : int The number of fitted parameters. freenames : list The names of the fitted parameters. freepars : list The starting values of the fitted parameters. prior1 : list The list of prior1 values. prior2 : list The list of prior2 values. priortype : list The types of each prior (to determine meaning of prior1 and prior2). Returns ------- pos : ndarray The starting positions for all walkers. nwalkers : int The number of walkers (may differ from the requested number if unable to get all walkers within the priors). Raises ------ AssertionError The old chain is not compatible with the current fit. AssertionError Unable to get enough walkers within the prior range. """ if lc.white: channel_tag = '_white' elif lc.share: channel_tag = '_shared' else: ch_number = str(lc.channel).zfill(len(str(lc.nchannel))) channel_tag = f'_ch{ch_number}' foldername = os.path.join(meta.topdir, *meta.old_chain.split(os.sep)) fname = f'S5_emcee_fitparams{channel_tag}.csv' fitted_values = pd.read_csv(os.path.join(foldername, fname), escapechar='#', skipinitialspace=True) full_keys = np.array(fitted_values['Parameter']) fname = f'S5_emcee_samples{channel_tag}' # Load HDF5 files full_fname = os.path.join(foldername, fname)+'.h5' ds = xrio.readXR(full_fname, verbose=False) if ds is None: # Working with an old save file with h5py.File(full_fname, 'r') as hf: samples = hf['samples'][:] else: samples = ds.to_array().T.values log.writelog(f'Old chain path: {full_fname}') if not np.all([key in freenames for key in full_keys]): # There were extra free parameters before - just get the relevant ones relevant_inds = np.array([key in freenames for key in full_keys]) removed_inds = full_keys[~relevant_inds] full_keys = full_keys[relevant_inds] samples = samples[:, relevant_inds] message = ('Old chain had extra fitted parameters. ' 'Removing the previously fitted parameters:\n' f' {removed_inds}') log.writelog(message, mute=(not meta.verbose)) # Initialize the walkers using samples from the old chain nwalkers = meta.run_nwalkers pos = samples[-nwalkers:] walkers_used = nwalkers # Make sure that no walkers are starting in the same place as # they would then exactly follow each other repeat_pos = np.array([ i for i in range(pos.shape[0]) if np.any(np.all(pos[i] == np.delete(pos, i, axis=0), axis=1)) ]) while ( repeat_pos.size > 0 and samples.shape[0] > (walkers_used + repeat_pos.size) ): pos[repeat_pos] = samples[:-walkers_used][-repeat_pos.size:] walkers_used += repeat_pos.size repeat_pos = np.array([ i for i in range(pos.shape[0]) if np.any(np.all(pos[i] == np.delete(pos, i, axis=0), axis=1)) ]) # If unable to initialize all walkers in unique starting locations, # use fewer walkers unless there'd be fewer walkers than dimensions if len(repeat_pos) > 0 and (nwalkers-len(repeat_pos) > ndim): pos = np.delete(pos, repeat_pos, axis=0) nwalkers = pos.shape[0] log.writelog(f'Warning: Unable to initialize all walkers at different ' f'positions using old chain!\nUsing {nwalkers} walkers ' f'instead of the initially requested {meta.run_nwalkers} ' f'walkers') elif len(repeat_pos) > 0: message = (f'Error: Unable to initialize all walkers at different ' f'positions using old chain!\nUsing {nwalkers} walkers ' f'instead of the initially requested {meta.run_nwalkers} ' f'walkers is not permitted as there are {ndim} fitted ' f'parameters') log.writelog(message, mute=True) raise AssertionError(message) if not np.all([key in full_keys for key in freenames]): # There are now extra free parameters # Populate them using initialize_emcee_walkers missing_freenames = np.array([key for key in freenames if key not in full_keys]) message = ('Old chain was missing some fitted parameters. ' 'Adding the new fitted parameters:\n' f' {missing_freenames}') log.writelog(message, mute=(not meta.verbose)) meta.run_nwalkers = nwalkers temp_pos, nwalkers = initialize_emcee_walkers( meta, log, ndim, None, freepars, prior1, prior2, priortype) new_pos = np.zeros((nwalkers, len(freenames))) for i, key in enumerate(freenames): if key not in full_keys: # There are now extra free parameters # Populate them using initialize_emcee_walkers new_pos[:, i] = temp_pos[:, i] else: # This variable already existed, so just add it idx = np.flatnonzero(full_keys == key)[0] new_pos[:, i] = pos[:, idx] pos = new_pos return pos, nwalkers
[docs] def initialize_emcee_walkers(meta, log, ndim, lsq_sol, freepars, prior1, prior2, priortype): """Initialize emcee walker starting positions Parameters ---------- meta : eureka.lib.readECF.MetaClass The meta data object. log : logedit.Logedit The open log in which notes from this step can be added. ndim : int The number of fitted parameters. lsq_sol : The results from the lsqfitter. The results from the lsqfitter. freepars : list The initial values of the fitted parameters. prior1 : list The list of prior1 values. prior2 : list The list of prior2 values. priortype : list The types of each prior (to determine meaning of prior1 and prior2). Returns ------- pos : ndarray The starting position of all walkers. nwalkers : int The number of walkers (may differ from the requested number if unable to get all walkers within the priors). Raises ------ AssertionError Failed to initialize any walkers within the priors AssertionError Failed to initialize enough walkers within the priors """ u = np.where(priortype == 'U')[0] lu = np.where(priortype == 'LU')[0] n = np.where(priortype == 'N')[0] if lsq_sol is not None and lsq_sol.cov_mat is not None: step_size = np.diag(lsq_sol.cov_mat) if np.any(step_size == 0.): zero_u = np.flatnonzero(step_size[u] == 0.) zero_lu = np.flatnonzero(step_size[lu] == 0.) zero_n = np.flatnonzero(step_size[n] == 0.) delta_u = prior2[u][zero_u] - prior1[u][zero_u] step_size[u][zero_u] = 0.001 * delta_u delta_lu = (np.exp(prior2[lu][zero_lu]) - np.exp(prior1[lu][zero_lu])) step_size[lu][zero_lu] = 0.001 * delta_lu step_size[n][zero_n] = 0.1 * prior2[n][zero_n] else: # Sometimes the lsq fitter won't converge and will give None as # the covariance matrix. In that case, we need to establish the # step size in another way. Using a fractional step compared to # the prior range can work best for precisely known values like # t0 and period log.writelog('No covariance matrix from LSQ - falling back on a step ' 'size based on the prior range', mute=(not meta.verbose)) step_size = np.ones(ndim) step_size[u] = 0.001*(prior2[u] - prior1[u]) step_size[lu] = 0.001*(np.exp(prior2[lu]) - np.exp(prior1[lu])) step_size[n] = 0.1*prior2[n] nwalkers = meta.run_nwalkers # make it robust to lsq hitting the upper or lower bound of the param space ind_max = np.flatnonzero(freepars[u] == prior2[u]) ind_min = np.flatnonzero(freepars[u] == prior1[u]) ind_max_LU = np.flatnonzero(np.log(freepars[lu]) == prior2[lu]) ind_min_LU = np.flatnonzero(np.log(freepars[lu]) == prior1[lu]) pmid = (prior2+prior1)/2. if len(ind_max) > 0 or len(ind_max_LU) > 0: log.writelog('Warning: >=1 params hit the upper bound in the lsq fit. ' 'Setting to the middle of the interval.') freepars[u][ind_max] = pmid[u][ind_max] freepars[lu][ind_max_LU] = (np.exp(prior2[lu][ind_max_LU]) + np.exp(prior1[lu][ind_max_LU]))/2. if len(ind_min) > 0 or len(ind_min_LU) > 0: log.writelog('Warning: >=1 params hit the lower bound in the lsq fit. ' 'Setting to the middle of the interval.') freepars[u][ind_min] = pmid[u][ind_min] freepars[lu][ind_min_LU] = (np.exp(prior2[lu][ind_min_LU]) + np.exp(prior1[lu][ind_min_LU]))/2. # Generate the walker positions pos = np.array([freepars + step_size*np.random.randn(ndim) for i in range(nwalkers)]) # Make sure the walker positions obey the priors in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all()) for ii in pos[:, u]]) in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and (np.log(ii) <= prior2[lu]).all()) for ii in pos[:, lu]]) if not np.all(in_range) or not np.all(in_range2): log.writelog('Not all walkers were initialized within the priors, ' 'using a smaller proposal distribution') pos = pos[in_range] # Make sure the step size is well within the limits uniform_step = np.abs(np.append((prior2-freepars).reshape(-1, 1)/10, (freepars-prior1).reshape(-1, 1)/10, axis=1)) step_size_options = np.append(step_size.reshape(-1, 1), uniform_step, axis=1) if len(lu) != 0: step_size_options[lu, 1] = np.abs((np.exp(prior2[lu]) - freepars[lu]).reshape(-1, 1)/10) step_size_options[lu, 2] = np.abs((np.exp(prior1[lu]) - freepars[lu]).reshape(-1, 1)/10) if len(n) != 0: step_size_options[n, 1:] = step_size_options[n, 0].reshape(-1, 1) step_size = np.min(step_size_options, axis=1) if pos.shape[0] == 0: remove_zeroth = True new_nwalkers = nwalkers-len(pos) pos = np.zeros((1, ndim)) else: remove_zeroth = False new_nwalkers = nwalkers-len(pos) pos = np.append(pos, np.array([freepars + step_size*np.random.randn(ndim) for i in range(new_nwalkers) ]).reshape(-1, ndim), axis=0) if remove_zeroth: pos = pos[1:] in_range = np.array([((prior1[u] <= ii).all() and (ii <= prior2[u]).all()) for ii in pos[:, u]]) in_range2 = np.array([((prior1[lu] <= np.log(ii)).all() and (np.log(ii) <= prior2[lu]).all()) for ii in pos[:, lu]]) if not np.any(in_range) and not np.any(in_range2): raise AssertionError('Failed to initialize any walkers within the set ' 'bounds for all parameters!\n' 'Check your stating position, decrease your step ' 'size, or increase the bounds on your parameters') elif not np.all(in_range) or not np.all(in_range2): old_nwalkers = nwalkers pos = pos[in_range+in_range2] nwalkers = pos.shape[0] if nwalkers > ndim: log.writelog(f'Warning: Failed to initialize all walkers within ' f'the set bounds for all parameters!\nUsing ' f'{nwalkers} walkers instead of the initially ' f'requested {old_nwalkers} walkers') else: message = (f'Error: Failed to initialize all walkers within the ' f'set bounds for all parameters!\nUsing ' f'{nwalkers} walkers instead of the initially requested' f' {old_nwalkers} walkers is not permitted as there are' f' {ndim} fitted parameters') log.writelog(message, mute=True) raise AssertionError(message) return pos, nwalkers
[docs] def dynestyfitter(lc, model, meta, log, **kwargs): """Perform sampling using dynesty. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting. Notes ----- Uses either dynesty's static or dynamic nested sampling based on the `meta.run_dynamic` flag. """ fittername = 'dynamicdynesty' if meta.run_dynamic else 'dynesty' # Group the different variable types freenames = lc.freenames freepars, prior1, prior2, priortype, indep_vars = group_variables(model) if meta.old_fitparams is not None: freepars = load_old_fitparams(lc, meta, log, freenames, fittername) # Set up common dynesty parameters ndims = len(freepars) bound = meta.run_bound sample = meta.run_sample l_args = [lc, model, freenames] # Handle 'min' for meta.run_nlive nlive = meta.run_nlive min_nlive = int(np.ceil(ndims * (ndims + 1) // 2)) if nlive == 'min': nlive = min_nlive nlive_log = (f' Setting run_nlive = {nlive} (minimum ' f'recommended for ndim = {ndims})') elif nlive < min_nlive: nlive_log = (f'**** WARNING: You should set run_nlive to at least ' f'{min_nlive} ****') else: nlive_log = None # Initial log-likelihood start_lnprob = lnprob(freepars, lc, model, prior1, prior2, priortype, freenames) log.writelog(f' Starting lnprob: {start_lnprob}', mute=(not meta.verbose)) # Plot starting point if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=fittername+'StartingPoint') # Plot GP starting point if model.GP: plots.plot_GP_components(lc, model, meta, fitter=fittername+'StartingPoint') # Plot star spots starting point if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fittername+'StartingPoint') # Plot Harmonica string starting point if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter=fittername+'StartingPoint') # Set up multiprocessing if applicable if meta.ncpu > 1: pool = Pool(meta.ncpu) queue_size = meta.ncpu else: meta.ncpu = 1 pool = None queue_size = None # Choose between dynamic and static nested sampling if meta.run_dynamic: log.writelog(' Using dynamic nested sampling...') if nlive_log is not None: log.writelog(nlive_log, mute=(not meta.verbose)) sampler = DynamicNestedSampler( ln_like, ptform, ndims, pool=pool, queue_size=queue_size, bound=bound, sample=sample, logl_args=l_args, ptform_args=[prior1, prior2, priortype]) # Handle 'auto' for meta.run_nlive_batch nlive_batch = meta.run_nlive_batch if nlive_batch == 'auto': nlive_batch = max(25, nlive // 2) log.writelog(f' Setting run_nlive_batch = {nlive_batch} (auto ' f'default based on run_nlive = {nlive})', mute=(not meta.verbose)) # Run the sampler sampler.run_nested(nlive_init=nlive, nlive_batch=nlive_batch, dlogz_init=meta.run_tol, wt_kwargs={"pfrac": meta.run_pfrac}, print_progress=True) else: log.writelog(' Using static nested sampling...') if nlive_log is not None: log.writelog(nlive_log, mute=(not meta.verbose)) sampler = NestedSampler( ln_like, ptform, ndims, nlive=nlive, pool=pool, queue_size=queue_size, bound=bound, sample=sample, logl_args=l_args, ptform_args=[prior1, prior2, priortype]) # Run the sampler sampler.run_nested(dlogz=meta.run_tol, print_progress=True) # Get the results from the sampler res = sampler.results # Clean up pool if meta.ncpu > 1: pool.close() pool.join() # Log summary of results log.writelog('', mute=(not meta.verbose)) # Need to temporarily redirect output since res.summar() prints rather # than returns a string old_stdout = sys.stdout sys.stdout = mystdout = StringIO() res.summary() sys.stdout = old_stdout log.writelog(mystdout.getvalue(), mute=(not meta.verbose)) # Extract posterior samples weights = np.exp(res.logwt - res.logz[-1]) samples = resample_equal(res.samples, weights) log.writelog('Number of posterior samples is {}'.format(len(samples)), mute=(not meta.verbose)) # Record median + percentiles q = np.percentile(samples, [16, 50, 84], axis=0) fit_params = q[1] # median mean_params = np.mean(samples, axis=0) errs = np.std(samples, axis=0) # Create table of results t_results = table.Table([freenames, mean_params, q[0]-q[1], q[2]-q[1], q[0], fit_params, q[2]], names=("Parameter", "Mean", "-1sigma", "+1sigma", "16th", "50th", "84th")) upper_errs = q[2] - q[1] lower_errs = q[1] - q[0] # Update model and uncertainty model.update(fit_params) model.errs = dict(zip(freenames, errs)) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames, lc.nchannel_fitted) # Save the fit ASAP so plotting errors don't make you lose everything save_fit(meta, lc, model, fittername, t_results, freenames, samples) # Final log-likelihood end_lnprob = lnprob(fit_params, lc, model, prior1, prior2, priortype, freenames) log.writelog(f'Ending lnprob: {end_lnprob}', mute=(not meta.verbose)) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) log.writelog(f'\n{fittername.upper()} RESULTS:') for i in range(ndims): if 'scatter_mult' in freenames[i]: chan = freenames[i].split('_')[-1] if chan.isnumeric(): chan = int(chan) else: chan = 0 trim1, trim2 = get_trim(meta.nints, chan) unc = np.ma.median(lc.unc[trim1:trim2]) scatter_ppm = 1e6*fit_params[i]*unc scatter_ppm_upper = 1e6*upper_errs[i]*unc scatter_ppm_lower = 1e6*lower_errs[i]*unc log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]}); {scatter_ppm} ' f'(+{scatter_ppm_upper}, -{scatter_ppm_lower}) ppm') else: log.writelog(f'{freenames[i]}: {fit_params[i]} (+{upper_errs[i]},' f' -{lower_errs[i]})') log.writelog('') # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter=fittername) # Plot star spots if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter=fittername) # Plot Harmonica string if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter=fittername) # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter=fittername) # Zoom in on phase variations if meta.isplots_S5 >= 1 and ('sinusoid_pc' in meta.run_myfuncs or 'poet_pc' in meta.run_myfuncs or 'quasilambert_pc' in meta.run_myfuncs): plots.plot_phase_variations(lc, model, meta, fitter=fittername) # Make RMS time-averaging plot if meta.isplots_S5 >= 3 and np.size(lc.flux) > 20: # mc3.stats.time_avg breaks when testing with a small # number of integrations plots.plot_rms(lc, model, meta, fitter=fittername) # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter=fittername) # plot using corner.py if meta.isplots_S5 >= 5: plots.plot_corner(samples, lc, meta, freenames, fitter=fittername) # Make a new model instance best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def lmfitter(lc, model, meta, log, **kwargs): """Perform a fit using lmfit. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. **kwargs : dict Arbitrary keyword arguments. Returns ------- best_model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model after fitting """ # TODO: Do something so that duplicate param names can all be handled # (e.g. two Polynomail models with c0). Perhaps append something to the # parameter name like c0_1 and c0_2?) freenames = lc.freenames # Group the different variable types param_list, indep_vars = group_variables_lmfit(model) # Add the time as an independent variable indep_vars['time'] = lc.time # Initialize lmfit Params object initialParams = lmfit.Parameters() # Insert parameters initialParams.add_many(*param_list) # Create the lmfit lightcurve model lcmodel = lmfit.Model(model.eval) lcmodel.independent_vars = indep_vars.keys() # Fit light curve model to the simulated data result = lcmodel.fit(lc.flux, weights=1/lc.unc, params=initialParams, **indep_vars, **kwargs) # Get the best fit params fit_params = result.__dict__['params'] # new_params = [(fit_params.get(i).name, fit_params.get(i).value, # fit_params.get(i).vary, fit_params.get(i).min, # fit_params.get(i).max) for i in fit_params] # Create table of results t_results = table.Table([freenames, fit_params], names=("Parameter", "Mean")) model.update(fit_params) lc.unc_fit = update_uncertainty(fit_params, lc.nints, lc.unc, freenames, lc.nchannel_fitted) # Save the fit ASAP save_fit(meta, lc, model, 'lmfitter', t_results, freenames) # Compute reduced chi-squared chi2red = computeRedChiSq(lc, log, model, meta, freenames) # Log results log.writelog(result.fit_report(), mute=(not meta.verbose)) # Plot fit if meta.isplots_S5 >= 1: plots.plot_fit(lc, model, meta, fitter='lmfitter') # Plot star spots if 'fleck_tr' in meta.run_myfuncs and meta.isplots_S5 >= 3: plots.plot_fleck_star(lc, model, meta, fitter='lmfitter') # Plot Harmonica string if ('harmonica_tr' in meta.run_myfuncs and 'a1' in freenames and meta.isplots_S5 >= 3): plots.plot_harmonica_string(lc, model, meta, fitter='lmfitter') # Plot GP fit + components if model.GP and meta.isplots_S5 >= 1: plots.plot_GP_components(lc, model, meta, fitter='lmfitter') # Zoom in on phase variations if meta.isplots_S5 >= 1 and ('sinusoid_pc' in meta.run_myfuncs or 'poet_pc' in meta.run_myfuncs or 'quasilambert_pc' in meta.run_myfuncs): plots.plot_phase_variations(lc, model, meta, fitter='lmfitter') # Make RMS time-averaging plot if meta.isplots_S5 >= 3 and np.size(lc.flux) > 20: # mc3.stats.time_avg breaks when testing with a small # number of integrations plots.plot_rms(lc, model, meta, fitter='lmfitter') # Plot residuals distribution if meta.isplots_S5 >= 3: plots.plot_res_distr(lc, model, meta, fitter='lmfitter') # Create new model with best fit parameters best_model = copy.deepcopy(model) best_model.components[0].update(fit_params) best_model.__setattr__('chi2red', chi2red) best_model.__setattr__('fit_params', fit_params) return best_model
[docs] def group_variables(model): """Group variables into fitted and frozen. Parameters ---------- model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit Returns ------- freepars : np.array The fitted variables. prior1 : np.array The lower bound for constrained variables with uniform/log uniform priors, or mean for constrained variables with Gaussian priors. prior2 : np.array The upper bound for constrained variables with uniform/log uniform priors, or mean for constrained variables with Gaussian priors. priortype : np.array Keywords indicating the type of prior for each free parameter. indep_vars : dict The frozen variables. """ parameters_dict = model.components[0].parameters.dict freenames = model.components[0].freenames # Group the different variable types freepars = [] prior1 = [] prior2 = [] priortype = [] for ii, name in enumerate(freenames): param = parameters_dict[name] # param = list(param) if ((param[1] == 'free') or (param[1] == 'shared') or ('white' in param[1])): freepars.append(param[0]) if len(param) == 5: # If prior is specified. prior1.append(param[2]) prior2.append(param[3]) priortype.append(param[4]) elif (len(param) > 3) & (len(param) < 5): # If prior bounds are specified but not the prior type raise IndexError("If you want to specify prior parameters, you" " must also specify the prior type: 'U', 'LU'" ", or 'N'.") else: # If no prior is specified, # assume uniform prior with infinite bounds. prior1.append(-np.inf) prior2.append(np.inf) priortype.append('U') freepars = np.array(freepars) prior1 = np.array(prior1) prior2 = np.array(prior2) priortype = np.array(priortype) indep_vars = dict([[key, parameters_dict[key][0]] for key in parameters_dict.keys() if key not in freenames]) return freepars, prior1, prior2, priortype, indep_vars
[docs] def group_variables_lmfit(model): """Group variables into fitted and frozen for lmfit fitter. Parameters ---------- model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit Returns ------- paramlist : list The fitted variables. indep_vars : dict The frozen variables. """ all_params = [i for j in [model.components[n].parameters.dict.items() for n in range(len(model.components))] for i in j] # Group the different variable types param_list = [] indep_vars = {} for param in all_params: param = list(param) if param[1][1] == 'free': param[1][1] = True param_list.append(tuple(param)) elif param[1][1] == 'fixed': param[1][1] = False param_list.append(tuple(param)) else: indep_vars[param[0]] = param[1] return param_list, indep_vars
[docs] def load_old_fitparams(lc, meta, log, freenames, fitter): """Load in the best-fit values from a previous fit. Parameters ---------- lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. meta : eureka.lib.readECF.MetaClass The metadata object. log : logedit.Logedit The open log in which notes from this step can be added. freenames : list The names of the fitted parameters. fitter : str The name of the fitter, to figure out the old fitparams filename name. Returns ------- fitted_values : np.array The best-fit values from a previous fit Raises ------ AssertionError The old fit is incompatible with the current fit. """ if lc.white: channel_tag = '_white' elif lc.share: channel_tag = '_shared' else: ch_number = str(lc.channel).zfill(len(str(lc.nchannel))) channel_tag = f'_ch{ch_number}' foldername = os.path.join(meta.topdir, *meta.old_fitparams.split(os.sep)) fname = f'S5_{fitter}_fitparams{channel_tag}.csv' fitted_values = pd.read_csv(os.path.join(foldername, fname), escapechar='#', skipinitialspace=True) full_keys = np.array(fitted_values['Parameter']) if np.all(full_keys != freenames): log.writelog('Old fit does not have the same fitted parameters and ' 'cannot be used to initialize the new fit.\n' 'The old fit included:\n['+','.join(full_keys)+']\n' 'The new fit included:\n['+','.join(freenames)+']', mute=True) raise AssertionError('Old fit does not have the same fitted parameters' ' and cannot be used to initialize the new fit.\n' 'The old fit included:\n['+','.join(full_keys) + ']\nThe new fit included:\n['+','.join(freenames) + ']') if '50th' in fitted_values.keys(): # A sampler was used, so use the (more reliable) median oldfitparam = fitted_values['50th'].to_numpy() else: # An optimizer was used, so only the Mean column will be populated oldfitparam = fitted_values['Mean'].to_numpy() return oldfitparam
[docs] def save_fit(meta, lc, model, fitter, results_table, freenames, samples=[]): """Save a fit as a txt file as well as the entire chain if provided. Parameters ---------- meta : eureka.lib.readECF.MetaClass The metadata object. lc : eureka.S5_lightcurve_fitting.lightcurve.LightCurve The lightcurve data object. model : eureka.S5_lightcurve_fitting.models.CompositeModel The composite model to fit. fitter : str The current fitter being used. fit_params : np.array The best-fit values from the current fit. freenames : list The list of fitted parameter names. samples : ndarray; optional The full chain from a sampling method, by default []. """ if lc.white: channel_tag = '_white' elif lc.share: channel_tag = '_shared' else: ch_number = str(lc.channel).zfill(len(str(lc.nchannel))) channel_tag = f'_ch{ch_number}' # Save the fitted parameters and their uncertainties (if possible) fname = f'S5_{fitter}_fitparams{channel_tag}' results_table.write(meta.outputdir+fname+'.csv', format='csv', overwrite=False) # Save the chain from the sampler using Astraeus (if a chain was provided) if len(samples) != 0: fname = meta.outputdir+f'S5_{fitter}_samples{channel_tag}.h5' ds = dict([(freenames[i], xr.DataArray(samples[:, i], dims=['sample'], name=freenames[i])) for i in range(len(freenames))]) ds = xrio.makeDataset(ds) xrio.writeXR(fname, ds) # Directory structure should not use expanded HW values spec_hw_val, bg_hw_val = util.get_unexpanded_hws( meta.expand, meta.spec_hw, meta.bg_hw) event_ap_bg = meta.eventlabel+"_ap"+str(spec_hw_val) + \ '_bg'+str(bg_hw_val) meta.tab_filename_s5 = (meta.outputdir+'S5_'+event_ap_bg+"_Table_Save" + channel_tag+'.txt') wavelengths = np.mean(np.append(meta.wave_low.reshape(1, -1), meta.wave_hi.reshape(1, -1), axis=0), axis=0) wave_errs = (meta.wave_hi-meta.wave_low)/2 # Evaluate each individual model for easier access outside of Eureka! individual_models = [] for comp in model.components: if comp.name != 'GP': individual_models.append([comp.name, comp.eval()]) else: fit = model.eval(incl_GP=False) individual_models.append([comp.name, comp.eval(fit)]) individual_models = np.array(individual_models, dtype=object) model_lc = model.eval() residuals = lc.flux-model_lc astropytable.savetable_S5(meta.tab_filename_s5, meta, lc.time, wavelengths[lc.fitted_channels], wave_errs[lc.fitted_channels], lc.flux, lc.unc_fit, individual_models, model_lc, residuals) return