Source code for eureka.S5_lightcurve_fitting.models.GPModel

import numpy as np
import george
from george import kernels
import celerite

from .Model import Model
from ..likelihood import update_uncertainty
from ...lib.split_channels import split

# tinygp is not supported yet
try:
    import tinygp
except ModuleNotFoundError:
    # tinygp isn't supported yet, so don't throw an exception if it
    # isn't installed
    pass


[docs] class GPModel(Model): """Model for Gaussian Process (GP)""" def __init__(self, kernel_classes, kernel_inputs, lc, gp_code='george', normalize=False, useHODLR=False, **kwargs): """Initialize the GP model. Parameters ---------- kernel_classes : list The types of GP kernels to use. kernel_inputs : list The names of the GP kernel inputs. lc : eureka.S5_lightcurve_fitting.lightcurve The current lightcurve object. gp_code : str; optional Type GP package to use from ('george', 'celerite'), by default 'george'. normalize : bool; optional If True, normalize the covariate by mean subtracting it and dividing by the standard deviation. By default, False. useHODLR : bool; optional If True, use george's HODLRSolver instead of the default solver. Only relevant if gp_code is 'george'. By default, False. **kwargs : dict Additional parameters to pass to eureka.S5_lightcurve_fitting.models.Model.__init__(). Can pass in the parameters, longparamlist, nchan, and paramtitles arguments here. """ # Inherit from Model class super().__init__(**kwargs) # Define model type (physical, systematic, other) self.modeltype = 'GP' # Get GP parameters self.gp_code_name = gp_code self.normalize = normalize self.kernel_types = kernel_classes self.kernel_input_names = kernel_inputs self.kernel_inputs = None self.useHODLR = useHODLR self.nkernels = len(kernel_classes) self.flux = lc.flux self.fit = np.ones_like(self.flux) self.unc = lc.unc self.unc_fit = lc.unc_fit self.time = lc.time if self.gp_code_name == 'celerite': if self.nkernels > 1: raise AssertionError('Celerite cannot compute multi-' 'dimensional GPs. Please choose a ' 'different GP code') elif self.kernel_types[0] != 'Matern32': raise AssertionError('Our celerite implementation currently ' 'only supports a Matern32 kernel.') # Update coefficients self.coeffs = np.zeros((self.nchannel_fitted, self.nkernels, 2)) self._parse_coeffs() def _parse_coeffs(self): """Convert dict of coefficients into an array.""" # Parse parameters as coefficients for c in range(self.nchannel_fitted): if self.nchannel_fitted > 1: chan = self.fitted_channels[c] else: chan = 0 if chan == 0: chankey = '' else: chankey = f'_{chan}' for i, par in enumerate(['A', 'm']): for k in range(self.nkernels): if k == 0: kernelkey = '' else: kernelkey = str(k) try: index = f'{par}{kernelkey}{chankey}' self.coeffs[c, k, i] = self.parameters.dict[index][0] except KeyError: pass
[docs] def update(self, newparams, **kwargs): # Inherit from Model class super().update(newparams, **kwargs) self.unc_fit = update_uncertainty(newparams, self.nints, self.unc, self.freenames)
[docs] def eval(self, fit, channel=None, gp=None, **kwargs): """Compute GP with the given parameters Parameters ---------- fit : ndarray The rest of the current model evaluated. channel : int; optional If not None, only consider one of the channels. Defaults to None. gp : celerite.GP, george.GP, or tinygp.GaussianProcess; optional The current GP object. **kwargs : dict Must pass in the time array here if not already set. Returns ------- lcfinal : ndarray Predicted systematics model """ if channel is None: nchan = self.nchannel_fitted channels = self.fitted_channels else: nchan = 1 channels = [channel, ] # Get the time if self.time is None: self.time = kwargs.get('time') self.fit = fit lcfinal = np.array([]) for c in range(nchan): if self.nchannel_fitted > 1: chan = channels[c] # get flux and uncertainties for current channel flux, unc_fit = split([self.flux, self.unc_fit], self.nints, chan) if channel is None: fit = split([self.fit, ], self.nints, chan)[0] else: # If only a specific channel is being evaluated, then only # that channel's fitted mode will be passed in fit = self.fit else: chan = 0 # get flux and uncertainties for current channel flux = self.flux fit = self.fit unc_fit = self.unc_fit residuals = flux-fit time = self.time if self.multwhite: # Split the arrays that have lengths of the original time axis time = split([time, ], self.nints, chan)[0] # Create the GP object with current parameters if gp is None: gp = self.setup_GP(c) if self.gp_code_name == 'george': gp.compute(self.kernel_inputs[chan].T, unc_fit) mu = gp.predict(residuals, self.kernel_inputs[chan].T, return_cov=False) elif self.gp_code_name == 'celerite': gp.compute(self.kernel_inputs[chan][0], yerr=unc_fit) mu = gp.predict(residuals) elif self.gp_code_name == 'tinygp': cond_gp = gp.condition(residuals, noise=unc_fit).gp mu = cond_gp.loc lcfinal = np.append(lcfinal, mu) return lcfinal
[docs] def setup_inputs(self): """Setting up kernel inputs as array and standardizing them if asked. For details on the benefits of normalization, see e.g. Evans et al. 2017. """ self.kernel_inputs = [] for c in range(self.nchannel_fitted): if self.nchannel_fitted > 1: chan = self.fitted_channels[c] else: chan = 0 kernel_inputs_channel = [] for name in self.kernel_input_names: if name == 'time': x = np.copy(self.time) else: # add more input options here raise ValueError('Currently, only GPs as a function of ' 'time are supported, but you have ' 'specified a GP as a function of ' f'{name}.') if self.multwhite: x = split([x, ], self.nints, chan)[0] if self.normalize: x = (x-x.mean())/x.std() kernel_inputs_channel.append(x) kernel_inputs_channel = np.array(kernel_inputs_channel) # Need to reshape if there is only one covariate if len(kernel_inputs_channel.shape) == 1: kernel_inputs_channel = kernel_inputs_channel[np.newaxis] self.kernel_inputs.append(kernel_inputs_channel)
[docs] def setup_GP(self, c=0): """Set up GP kernels and GP object. Parameters ---------- c : int; optional The current channel index. Defaults to 0. Returns ------- celerite.GP, george.GP, or tinygp.GaussianProcess The GP object to use for this fit. """ if self.kernel_inputs is None: self.setup_inputs() # get the kernel which is the sum of the individual kernel functions kernel = self.get_kernel(self.kernel_types[0], 0, c) for k in range(1, self.nkernels): kernel += self.get_kernel(self.kernel_types[k], k, c) # Make the gp object if self.gp_code_name == 'george': if self.useHODLR: solver = george.solvers.HODLRSolver else: solver = None gp = george.GP(kernel, mean=0, fit_mean=False, solver=solver) elif self.gp_code_name == 'celerite': gp = celerite.GP(kernel, mean=0, fit_mean=False) elif self.gp_code_name == 'tinygp': if self.nchannel_fitted > 1: chan = self.fitted_channels[c] else: chan = 0 gp = tinygp.GaussianProcess(kernel, self.kernel_inputs[chan].T, mean=0) return gp
[docs] def get_kernel(self, kernel_name, k, c=0): """Get individual kernels. Parameters ---------- kernel_name : str The name of the kernel to get. k : int The kernel number. c : int; optional The channel index, by default 0. Returns ------- kernel The requested george, celerite, or tinygp kernel. Raises ------ AssertionError Celerite currently only supports a Matern32 kernel. """ if self.gp_code_name == 'george': # get metric and amplitude for the current kernel and channel amp = np.exp(self.coeffs[c, k, 0]*2) # Want exp(sigma)^2 metric = np.exp(self.coeffs[c, k, 1]*2) if kernel_name == 'Matern32': kernel = amp*kernels.Matern32Kernel( metric, ndim=self.nkernels, axes=k) elif kernel_name == 'ExpSquared': kernel = amp*kernels.ExpSquaredKernel( metric, ndim=self.nkernels, axes=k) elif kernel_name == 'RationalQuadratic': kernel = amp*kernels.RationalQuadraticKernel( log_alpha=1, metric=metric, ndim=self.nkernels, axes=k) elif kernel_name == 'Exp': kernel = amp*kernels.ExpKernel( metric, ndim=self.nkernels, axes=k) else: raise AssertionError(f'The kernel {kernel_name} is not in the ' 'currently supported list of kernels for ' 'george which includes:\nMatern32, ' 'ExpSquared, RationalQuadratic, Exp.') elif self.gp_code_name == 'celerite': # get metric and amplitude for the current kernel and channel amp = self.coeffs[c, k, 0] metric = self.coeffs[c, k, 1] kernel = celerite.terms.Matern32Term(log_sigma=0, log_rho=metric) # Setting the amplitude kernel *= celerite.terms.RealTerm(log_a=amp, log_c=-10) elif self.gp_code_name == 'tinygp': # get metric and amplitude for the current kernel and channel amp = np.exp(self.coeffs[c, k, 0]*2) # Want exp(sigma)^2 metric = np.exp(self.coeffs[c, k, 1]*2) if kernel_name == 'Matern32': kernel = amp*tinygp.kernels.Matern32(metric) elif kernel_name == 'ExpSquared': kernel = amp*tinygp.kernels.ExpSquared(metric) elif kernel_name == 'RationalQuadratic': kernel = amp*tinygp.kernels.RationalQuadratic(alpha=1, scale=metric) elif kernel_name == 'Exp': kernel = amp*tinygp.kernels.Exp(metric) else: raise AssertionError(f'The kernel {kernel_name} is not in the ' 'currently supported list of kernels for ' 'tinygp which includes:\nMatern32, ' 'ExpSquared, RationalQuadratic, Exp.') return kernel
[docs] def loglikelihood(self, fit, channel=None): """Compute log likelihood of GP Parameters ---------- fit : ndarray The fitted model. channel : int; optional If not None, only consider one of the channels. Defaults to None. Returns ------- float log likelihood of the GP evaluated by george/tinygp/celerite """ if channel is None: nchan = self.nchannel_fitted channels = self.fitted_channels else: nchan = 1 channels = [channel, ] # update uncertainty self.fit = fit logL = [] for c in np.arange(nchan): if self.nchannel_fitted > 1: chan = channels[c] # get flux and uncertainties for current channel flux, fit, unc_fit = split([self.flux, self.fit, self.unc_fit], self.nints, chan) else: chan = 0 # get flux and uncertainties for current channel flux = self.flux fit = self.fit unc_fit = self.unc_fit residuals = flux-fit # set up GP with current parameters gp = self.setup_GP(c) if self.gp_code_name == 'george': gp.compute(self.kernel_inputs[chan].T, unc_fit) logL_temp = gp.lnlikelihood(residuals, quiet=True) elif self.gp_code_name == 'celerite': gp.compute(self.kernel_inputs[chan][0], yerr=unc_fit) logL_temp = gp.log_likelihood(residuals) elif self.gp_code_name == 'tinygp': cond = gp.condition(residuals, diag=unc_fit) logL_temp = cond.log_probability logL.append(logL_temp) return sum(logL)