# -*- coding: utf-8 -*-
"""
The :mod:`PyDynamic.uncertainty.interpolate` module implements methods for
the propagation of uncertainties in the application of standard interpolation methods
as provided by :class:`scipy.interpolate.interp1d`.
This module contains the following function:
* :func:`interp1d_unc`: Interpolate arbitrary time series considering the associated
uncertainties
"""
from typing import Optional, Tuple, Union
import numpy as np
from scipy.interpolate import interp1d, splrep, BSpline
__all__ = ["interp1d_unc", "make_equidistant"]
[docs]def interp1d_unc(
t_new: np.ndarray,
t: np.ndarray,
y: np.ndarray,
uy: np.ndarray,
kind: Optional[str] = "linear",
copy=True,
bounds_error: Optional[bool] = None,
fill_value: Optional[Union[float, Tuple[float, float], str]] = np.nan,
fill_unc: Optional[Union[float, Tuple[float, float], str]] = np.nan,
assume_sorted: Optional[bool] = True,
returnC: Optional[bool] = False,
) -> Union[
Tuple[np.ndarray, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
]:
r"""Interpolate a 1-D function considering the associated uncertainties
t and y are arrays of values used to approximate some function :math:`f \colon y
= f(t)`.
Note that calling :func:`interp1d_unc` with NaNs present in input values
results in undefined behaviour.
An equal number of each of the original timestamps (or frequencies), values and
associated uncertainties is required.
Parameters
----------
t_new : (M,) array_like
A 1-D array of real values representing the timestamps (or frequencies) at
which to evaluate the interpolated values. t_new can be sorted in any order.
t : (N,) array_like
A 1-D array of real values representing timestamps (or frequencies) in
ascending order.
y : (N,) array_like
A 1-D array of real values. The length of y must be equal to the length
of t.
uy : (N,) array_like
A 1-D array of real values representing the standard uncertainties
associated with y.
kind : str, optional
Specifies the kind of interpolation for y as a string ('previous',
'next', 'nearest', 'linear' or 'cubic'). Default is ‘linear’.
copy : bool, optional
If True, the method makes internal copies of t and y. If False,
references to t and y are used. The default is to copy.
bounds_error : bool, optional
If True, a ValueError is raised any time interpolation is attempted on a
value outside of the range of x (where extrapolation is necessary). If
False, out of bounds values are assigned fill_value. By default, an error
is raised unless `fill_value="extrapolate"`.
fill_value : array-like or (array-like, array_like) or “extrapolate”, optional
- if a ndarray (or float), this value will be used to fill in for
requested points outside of the data range. If not provided, then the
default is NaN.
- If a two-element tuple, then the first element is used as a fill value
for `t_new < t[0]` and the second element is used for `t_new > t[-1]`.
Anything that is not a 2-element tuple (e.g., list or ndarray, regardless
of shape) is taken to be a single array-like argument meant to be used
for both bounds as `below, above = fill_value, fill_value`.
- If “extrapolate”, then points outside the data range will be set
to the first or last element of the values.
- If cubic-interpolation, C2-continuity at the transition to the
extrapolation-range is not guaranteed. This behavior might change
in future implementations, see issue #210 for details.
Both parameters `fill_value` and `fill_unc` should be
provided to ensure desired behaviour in the extrapolation range.
fill_unc : array-like or (array-like, array_like) or “extrapolate”, optional
Usage and behaviour as described in `fill_value` but for the
uncertainties. Both parameters `fill_value` and `fill_unc` should be
provided to ensure desired behaviour in the extrapolation range.
assume_sorted : bool, optional
If False, values of t can be in any order and they are sorted first. If
True, t has to be an array of monotonically increasing values.
returnC : bool, optional
If True, return sensitivity coefficients for later use. This is only
available for interpolation kind 'linear' and for
fill_unc="extrapolate" at the moment. If False sensitivity
coefficients are not returned and internal computation is
slightly more efficient.
If `returnC` is False, which is the default behaviour, the method returns:
Returns
-------
t_new : (M,) array_like
interpolation timestamps (or frequencies)
y_new : (M,) array_like
interpolated values
uy_new : (M,) array_like
interpolated associated standard uncertainties
Otherwise the method returns:
Returns
-------
t_new : (M,) array_like
interpolation timestamps (or frequencies)
y_new : (M,) array_like
interpolated values
uy_new : (M,) array_like
interpolated associated standard uncertainties
C : (M,N) array_like
sensitivity matrix :math:`C`, which is used to compute the uncertainties
:math:`U_{y_{new}} = C \cdot \operatorname{diag}(u_y^2) \cdot C^T`
References
----------
* White [White2017]_
"""
# This is taken from the class scipy.interpolate.interp1d to copy and sort the
# arrays in case that is requested and of course extended by the uncertainties.
# ----------------------------------------------------------------------------------
t = np.array(t, copy=copy)
y = np.array(y, copy=copy)
uy = np.array(uy, copy=copy)
if not assume_sorted:
ind = np.argsort(t)
t = t[ind]
y = np.take(y, ind)
uy = np.take(uy, ind)
# ----------------------------------------------------------------------------------
# Check for proper dimensions of inputs which are not checked as desired by SciPy.
if not len(y) == len(uy):
raise ValueError(
"Array of associated measurement values' uncertainties must be same length "
"as array of measurement values."
)
# Set up parameter dicts for calls of interp1d. We use it for interpolating the
# values and "interpolation" (i.e. look-ups) of uncertainties in the trivial cases.
interp1d_params = {
"kind": kind,
"copy": False,
"bounds_error": bounds_error,
"assume_sorted": True,
}
# Ensure constant values outside the original bounds by setting fill_value
# and fill_unc accordingly but do not bother in case exception would be thrown.
if not bounds_error:
# For the extrapolation to rely on SciPy's handling of fill values in all
# but the linear case we set those explicitly to boundary values of y and uy
# respectively but handle those cases separately.
if fill_value == "extrapolate":
fill_value = y[0], y[-1]
if fill_unc == "extrapolate":
fill_unc = uy[0], uy[-1]
elif bounds_error is not None and returnC:
# This means bounds_error is intentionally set to False and we want to
# extrapolate uncertainties with custom values. Additionally the sensitivity
# coefficients shall be returned. This is not yet possible, because in this
# case, we do not know, how to map the provided extrapolation values onto
# the original values and thus we cannot provide the coefficients. Once we
# deal with this, we will probably introduce another input parameter
# fill_sens which is expected to be of shape (N,) or a 2-tuple of this
# shape, which is then used in C wherever an extrapolation is performed.
raise NotImplementedError(
"This feature is not yet implemented. We are planning to add "
"another input parameter which is meant to carry the sensitivities "
"for the extrapolated uncertainties. Get in touch with us, "
"if you need it to discuss how to proceed."
)
# Inter- and extrapolate values in the desired fashion relying on SciPy.
interp_y = interp1d(t, y, fill_value=fill_value, **interp1d_params)
y_new = interp_y(t_new)
if kind in ("previous", "next", "nearest"):
if returnC:
raise NotImplementedError(
"Returning the sensitivity matrix for now is only supported for "
"interpolation types other than 'previous', 'next' and 'nearest'. Get"
"in touch with us, if you need this to discuss how to proceed."
)
# Look up uncertainties.
interp_uy = interp1d(t, uy, fill_value=fill_unc, **interp1d_params)
uy_new = interp_uy(t_new)
elif kind in ("linear", "cubic"):
# Calculate boolean arrays of indices from t_new which are outside t's bounds...
extrap_range_below = t_new < np.min(t)
extrap_range_above = t_new > np.max(t)
extrap_range = extrap_range_below | extrap_range_above
# .. and inside t's bounds.
interp_range = ~extrap_range
# Initialize the result array for the standard uncertainties.
uy_new = np.empty_like(y_new)
# Initialize the sensitivity matrix of shape (M, N) if needed.
if returnC or kind == "cubic":
C = np.zeros((len(t_new), len(uy)), "float64")
# First extrapolate the according values if required and then
# compute interpolated uncertainties following White, 2017.
# If extrapolation is needed, fill in the values provided via fill_unc.
if np.any(extrap_range):
# At this point fill_unc is either a float (np.nan is a float as well) or
# a 2-tuple of floats. In case we have one float we set uy_new to this value
# inside the extrapolation range.
if isinstance(fill_unc, float):
uy_new[extrap_range] = fill_unc
else:
# Now fill_unc should be a 2-tuple, which we can fill into uy_new.
uy_new[extrap_range_below], uy_new[extrap_range_above] = fill_unc
if returnC:
# In each row of C corresponding to an extrapolation value below the
# original range set the first column to 1 and in each row of C
# corresponding to an extrapolation value above the original range set
# the last column to 1. It is important to do this before
# interpolating, because in general those two columns can contain
# non-zero values in the interpolation range.
C[:, 0], C[:, -1] = extrap_range_below, extrap_range_above
# If interpolation is needed, compute uncertainties following White, 2017.
if np.any(interp_range):
if kind == "linear":
# This following section is taken mainly from
# scipy.interpolate.interp1d to determine the indices of the relevant
# original timestamps (or frequencies) just for the interpolation range.
# ----------------------------------------------------------------------
# 2. Find where in the original data, the values to interpolate
# would be inserted.
# Note: If t_new[n] == t[m], then m is returned by searchsorted.
t_new_indices = np.searchsorted(t, t_new[interp_range])
# 3. Clip x_new_indices so that they are within the range of
# self.x indices and at least 1. Removes mis-interpolation
# of x_new[n] = x[0]
t_new_indices = t_new_indices.clip(1, len(t) - 1).astype(int)
# 4. Calculate the slope of regions that each x_new value falls in.
lo = t_new_indices - 1
hi = t_new_indices
t_lo = t[lo]
t_hi = t[hi]
# ----------------------------------------------------------------------
if returnC:
# Prepare the sensitivity coefficients, which in the first place
# inside the interpolation range are the Lagrangian polynomials. We
# compute the Lagrangian polynomials for all interpolation nodes
# inside the original range.
L_1 = (t_new[interp_range] - t_hi) / (t_lo - t_hi)
L_2 = (t_new[interp_range] - t_lo) / (t_hi - t_lo)
# Create iterators needed to efficiently fill our sensitivity matrix
# in the rows corresponding to interpolation range.
lo_it = iter(lo)
hi_it = iter(hi)
L_1_it = iter(L_1)
L_2_it = iter(L_2)
# In each row of C set the column with the corresponding
# index in lo to L_1 and the column with the corresponding
# index in hi to L_2.
for index, C_row in enumerate(C):
if interp_range[index]:
C_row[next(lo_it)] = next(L_1_it)
C_row[next(hi_it)] = next(L_2_it)
# Compute the standard uncertainties avoiding to build the sparse
# covariance matrix diag(u_y^2). We reduce the equation C diag(
# u_y^2) C^T for now to a more efficient calculation, which will
# work as long as we deal with uncorrelated values, so that all
# information can be found on the diagonal of the covariance and
# thus the result matrix.
uy_new[interp_range] = np.sqrt(
np.sum(C[interp_range] ** 2 * uy ** 2, 1)
)
else:
# Since we do not need the sensitivity matrix, we compute
# uncertainties more efficient (although we are actually not so
# sure about this anymore). The simplification of the equation by
# pulling out the denominator, just works because we work with
# the squared Lagrangians. Otherwise we would have to account for
# the summation order.
uy_prev_sqr = uy[lo] ** 2
uy_next_sqr = uy[hi] ** 2
uy_new[interp_range] = np.sqrt(
(t_new[interp_range] - t_hi) ** 2 * uy_prev_sqr
+ (t_new[interp_range] - t_lo) ** 2 * uy_next_sqr
) / (t_hi - t_lo)
elif kind == "cubic":
# Calculate the uncertainty by generating a spline of sensitivity
# coefficients. This procedure is described by eq. (19) of White2017.
F_is = []
for i in range(len(t)):
x_temp = np.zeros_like(t)
x_temp[i] = 1.0
F_i = BSpline(*splrep(t, x_temp, k=3))
F_is.append(F_i)
# Calculate sensitivity coefficients.
C[interp_range] = np.array([F_i(t_new[interp_range]) for F_i in F_is]).T
C_sqr = np.square(C[interp_range])
# if at some point time-uncertainties are of interest, White2017
# already provides the formulas (eq. (17))
# ut = np.zeros_like(t)
# ut_new = np.zeros_like(t_new)
# a1 = np.dot(C_sqr, np.square(uy))
# a2 = np.dot(
# C_sqr,
# np.squeeze(np.square(interp_y._spline(t, nu=1))) * np.square(ut),
# )
# a3 = np.square(np.squeeze(interp_y._spline(t_new, nu=1))) * np.square(
# ut_new
# )
# uy_new[interp_range] = np.sqrt(a1 - a2 + a3)
# without consideration of time-uncertainty eq. (17) becomes
uy_new[interp_range] = np.sqrt(np.dot(C_sqr, np.square(uy)))
else:
raise NotImplementedError(
"%s is unsupported yet. Let us know, that you need it." % kind
)
if returnC:
return t_new, y_new, uy_new, C
return t_new, y_new, uy_new
def make_equidistant(t, y, uy, dt=5e-2, kind="linear"):
"""Interpolate non-equidistant time series to equidistant
Interpolate measurement values and propagate uncertainties accordingly.
Parameters
----------
t: (N,) array_like
timestamps (or frequencies)
y: (N,) array_like
corresponding measurement values
uy: (N,) array_like
corresponding measurement values' standard uncertainties
dt: float, optional
desired interval length
kind: str, optional
Specifies the kind of interpolation for the measurement values
as a string ('previous', 'next', 'nearest' or 'linear').
Returns
-------
t_new : (M,) array_like
interpolation timestamps (or frequencies)
y_new : (M,) array_like
interpolated measurement values
uy_new : (M,) array_like
interpolated measurement values' standard uncertainties
References
----------
* White [White2017]_
"""
# Find t's maximum.
t_max = np.max(t)
# Setup new vector of timestamps.
t_new = np.arange(np.min(t), t_max, dt)
# Since np.arange in overflow situations results in the biggest values not
# guaranteed to be smaller than t's maximum', we need to check for this and delete
# these unexpected values.
if t_new[-1] > t_max:
t_new = t_new[t_new <= t_max]
return interp1d_unc(t_new, t, y, uy, kind)