Source code for PyDynamic.uncertainty.interpolate

"""This module assists in uncertainty propagation for 1-dimensional interpolation

The :mod:`PyDynamic.uncertainty.interpolate` module implements methods for
the propagation of uncertainties in the application of standard interpolation methods
as provided by :class:`scipy.interpolate.interp1d`.

This module contains the following functions:

* :func:`interp1d_unc`: Interpolate arbitrary time series considering the associated
  uncertainties
* :func:`make_equidistant`: Interpolate a 1-D function equidistantly considering
  associated uncertainties

"""
__all__ = ["interp1d_unc", "make_equidistant"]
from typing import Optional, Tuple, Union

import numpy as np
from scipy.interpolate import BSpline, interp1d, splrep


[docs]def interp1d_unc( x_new: np.ndarray, x: np.ndarray, y: np.ndarray, uy: np.ndarray, kind: Optional[str] = "linear", copy=True, bounds_error: Optional[bool] = None, fill_value: Optional[Union[float, Tuple[float, float], str]] = np.nan, fill_unc: Optional[Union[float, Tuple[float, float], str]] = np.nan, assume_sorted: Optional[bool] = True, returnC: Optional[bool] = False, ) -> Union[ Tuple[np.ndarray, np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], ]: r"""Interpolate a 1-D function considering the associated uncertainties x and y are arrays of values used to approximate some function :math:`f \colon y = f(x)`. Note that calling :func:`interp1d_unc` with NaNs present in input values results in undefined behaviour. An equal number of each of the original x and y values and associated uncertainties is required. Parameters ---------- x_new : (M,) array_like A 1-D array of real values to evaluate the interpolant at. x_new can be sorted in any order. x : (N,) array_like A 1-D array of real values. y : (N,) array_like A 1-D array of real values. The length of y must be equal to the length of x. uy : (N,) array_like A 1-D array of real values representing the standard uncertainties associated with y. kind : str, optional Specifies the kind of interpolation for y as a string ('previous', 'next', 'nearest', 'linear' or 'cubic'). Default is ‘linear’. copy : bool, optional If True, the method makes internal copies of x and y. If False, references to x and y are used. The default is to copy. bounds_error : bool, optional If True, a ValueError is raised any time interpolation is attempted on a value outside of the range of x (where extrapolation is necessary). If False, out of bounds values are assigned fill_value. By default, an error is raised unless ``fill_value="extrapolate"``. fill_value : array-like or (array-like, array_like) or “extrapolate”, optional - if a ndarray (or float), this value will be used to fill in for requested points outside of the data range. If not provided, then the default is NaN. The array-like must broadcast properly to the dimensions of the non-interpolation axes. - If a two-element tuple, then the first element is used as a fill value for ``x_new < t[0]`` and the second element is used for ``x_new > t[-1]``. Anything that is not a 2-element tuple (e.g., list or ndarray, regardless of shape) is taken to be a single array-like argument meant to be used for both bounds as ``below, above = fill_value, fill_value``. - If “extrapolate”, then points outside the data range will be set to the first or last element of the values. - If cubic-interpolation, C2-continuity at the transition to the extrapolation-range is not guaranteed. This behavior might change in future implementations, see issue #210 for details. Both parameters fill_value and fill_unc should be provided to ensure desired behaviour in the extrapolation range. fill_unc : array-like or (array-like, array_like) or “extrapolate”, optional Usage and behaviour as described in fill_value but for the uncertainties. Both parameters fill_value and fill_unc should be provided to ensure desired behaviour in the extrapolation range. assume_sorted : bool, optional If False, values of x can be in any order and they are sorted first. If True, x has to be an array of monotonically increasing values. returnC : bool, optional If True, return sensitivity coefficients for later use. This is only available for interpolation kind 'linear' and for fill_unc="extrapolate" at the moment. If False sensitivity coefficients are not returned and internal computation is slightly more efficient. If returnC is False, which is the default behaviour, the method returns: Returns ------- x_new : (M,) array_like values at which the interpolant is evaluated y_new : (M,) array_like interpolated values uy_new : (M,) array_like interpolated associated standard uncertainties Otherwise the method returns: Returns ------- x_new : (M,) array_like values at which the interpolant is evaluated y_new : (M,) array_like interpolated values uy_new : (M,) array_like interpolated associated standard uncertainties C : (M,N) array_like sensitivity matrix :math:`C`, which is used to compute the uncertainties :math:`U_{y_{new}} = C \cdot \operatorname{diag}(u_y^2) \cdot C^T` References ---------- * White [White2017]_ """ # This is taken from the class scipy.interpolate.interp1d to copy and sort the # arrays in case that is requested and of course extended by the uncertainties. # ---------------------------------------------------------------------------------- x = np.array(x, copy=copy) y = np.array(y, copy=copy) uy = np.array(uy, copy=copy) if not assume_sorted: ind = np.argsort(x) x = x[ind] y = np.take(y, ind) uy = np.take(uy, ind) # ---------------------------------------------------------------------------------- # Check for proper dimensions of inputs which are not checked as desired by SciPy. if not len(y) == len(uy): raise ValueError( "interp1d_unc: Array of associated measurement values' uncertainties are " "expected to be of the same length as the array of measurement values, " f"but we have len(y) = {len(y)} and len(uy) = {len(uy)}. Please " f"provide an array of {len(y)} standard uncertainties." ) # Set up parameter dicts for calls of interp1d. We use it for interpolating the # values and "interpolation" (i.e. look-ups) of uncertainties in the trivial cases. interp1d_params = { "kind": kind, "copy": False, "bounds_error": bounds_error, "assume_sorted": True, } # Ensure constant values outside the original bounds by setting fill_value # and fill_unc accordingly but do not bother in case exception would be thrown. if not bounds_error: # For the extrapolation to rely on SciPy's handling of fill values in all # but the linear case we set those explicitly to boundary values of y and uy # respectively but handle those cases separately. if fill_value == "extrapolate": fill_value = y[0], y[-1] if fill_unc == "extrapolate": fill_unc = uy[0], uy[-1] elif bounds_error is not None and returnC: # This means bounds_error is intentionally set to False and we want to # extrapolate uncertainties with custom values. Additionally the sensitivity # coefficients shall be returned. This is not yet possible, because in this # case, we do not know, how to map the provided extrapolation values onto # the original values and thus we cannot provide the coefficients. Once we # deal with this, we will probably introduce another input parameter # fill_sens which is expected to be of shape (N,) or a 2-tuple of this # shape, which is then used in C wherever an extrapolation is performed. raise NotImplementedError( "interp1d_unc: This feature is not yet implemented. We are planning " "to add another input parameter which is meant to carry the " "sensitivities for the extrapolated uncertainties. Get in touch with " "us, if you need it to discuss how to proceed." ) # Inter- and extrapolate values in the desired fashion relying on SciPy. interp_y = interp1d(x, y, fill_value=fill_value, **interp1d_params) y_new = interp_y(x_new) if kind in ("previous", "next", "nearest"): if returnC: raise NotImplementedError( "interp1d_unc: Returning the sensitivity matrix for now is only " "supported for interpolation types other than 'previous', 'next' and " "'nearest'. Get in touch with us, if you need this to discuss how to " "proceed." ) # Look up uncertainties. interp_uy = interp1d(x, uy, fill_value=fill_unc, **interp1d_params) uy_new = interp_uy(x_new) elif kind in ("linear", "cubic"): # Calculate boolean arrays of indices from t_new which are outside t's bounds... extrap_range_below = x_new < np.min(x) extrap_range_above = x_new > np.max(x) extrap_range = extrap_range_below | extrap_range_above # .. and inside t's bounds. interp_range = ~extrap_range # Initialize the result array for the standard uncertainties. uy_new = np.empty_like(y_new) # Initialize the sensitivity matrix of shape (M, N) if needed. if returnC or kind == "cubic": C = np.zeros((len(x_new), len(uy)), "float64") # First extrapolate the according values if required and then # compute interpolated uncertainties following White, 2017. # If extrapolation is needed, fill in the values provided via fill_unc. if np.any(extrap_range): # At this point fill_unc is either a float (np.nan is a float as well) or # a 2-tuple of floats. In case we have one float we set uy_new to this value # inside the extrapolation range. if isinstance(fill_unc, float): uy_new[extrap_range] = fill_unc else: # Now fill_unc should be a 2-tuple, which we can fill into uy_new. uy_new[extrap_range_below], uy_new[extrap_range_above] = fill_unc if returnC: # In each row of C corresponding to an extrapolation value below the # original range set the first column to 1 and in each row of C # corresponding to an extrapolation value above the original range set # the last column to 1. It is important to do this before # interpolating, because in general those two columns can contain # non-zero values in the interpolation range. C[:, 0], C[:, -1] = extrap_range_below, extrap_range_above # If interpolation is needed, compute uncertainties following White, 2017. if np.any(interp_range): if kind == "linear": # This following section is taken mainly from # scipy.interpolate.interp1d to determine the indices of the relevant # original x values just for the interpolation range. # ---------------------------------------------------------------------- # 2. Find where in the original data, the values to interpolate # would be inserted. # Note: If x_new[n] == x[m], then m is returned by searchsorted. x_new_indices = np.searchsorted(x, x_new[interp_range]) # 3. Clip x_new_indices so that they are within the range of # self.x indices and at least 1. Removes mis-interpolation # of x_new[n] = x[0] x_new_indices = x_new_indices.clip(1, len(x) - 1).astype(int) # 4. Calculate the slope of regions that each x_new value falls in. lo = x_new_indices - 1 hi = x_new_indices x_lo = x[lo] x_hi = x[hi] # -------------------------------------------------------------------------- if returnC: # Prepare the sensitivity coefficients, which in the first place # inside the interpolation range are the Lagrangian polynomials. We # compute the Lagrangian polynomials for all interpolation nodes # inside the original range. L_1 = (x_new[interp_range] - x_hi) / (x_lo - x_hi) L_2 = (x_new[interp_range] - x_lo) / (x_hi - x_lo) # Create iterators needed to efficiently fill our sensitivity matrix # in the rows corresponding to interpolation range. lo_it = iter(lo) hi_it = iter(hi) L_1_it = iter(L_1) L_2_it = iter(L_2) # In each row of C set the column with the corresponding # index in lo to L_1 and the column with the corresponding # index in hi to L_2. for index, C_row in enumerate(C): if interp_range[index]: C_row[next(lo_it)] = next(L_1_it) C_row[next(hi_it)] = next(L_2_it) # Compute the standard uncertainties avoiding to build the sparse # covariance matrix diag(u_y^2). We reduce the equation C diag(u_y^2) # C^T for now to a more efficient calculation, which will work as # long as we deal with uncorrelated values, so that all information # can be found on the diagonal of the covariance and thus the result # matrix. uy_new[interp_range] = np.sqrt( np.sum(C[interp_range] ** 2 * uy ** 2, 1) ) else: # Since we do not need the sensitivity matrix, we compute uncertainties # more efficient (although we are actually not so sure about this # anymore). The simplification of the equation by pulling out the # denominator, just works because we work with the squared Lagrangians. # Otherwise we would have to account for the summation order. uy_prev_sqr = uy[lo] ** 2 uy_next_sqr = uy[hi] ** 2 uy_new[interp_range] = np.sqrt( (x_new[interp_range] - x_hi) ** 2 * uy_prev_sqr + (x_new[interp_range] - x_lo) ** 2 * uy_next_sqr ) / (x_hi - x_lo) elif kind == "cubic": # Calculate the uncertainty by generating a spline of sensitivity # coefficients. This procedure is described by eq. (19) of White2017. F_is = [] for i in range(len(x)): x_temp = np.zeros_like(x) x_temp[i] = 1.0 F_i = BSpline(*splrep(x, x_temp, k=3)) F_is.append(F_i) # Calculate sensitivity coefficients. C[interp_range] = np.array([F_i(x_new[interp_range]) for F_i in F_is]).T C_sqr = np.square(C[interp_range]) # if at some point time-uncertainties are of interest, White2017 # already provides the formulas (eq. (17)) # ut = np.zeros_like(t) # ut_new = np.zeros_like(t_new) # a1 = np.dot(C_sqr, np.square(uy)) # a2 = np.dot( # C_sqr, # np.squeeze(np.square(interp_y._spline(t, nu=1))) * np.square(ut), # ) # a3 = np.square(np.squeeze(interp_y._spline(t_new, nu=1))) * np.square( # ut_new # ) # uy_new[interp_range] = np.sqrt(a1 - a2 + a3) # without consideration of time-uncertainty eq. (17) becomes uy_new[interp_range] = np.sqrt(np.dot(C_sqr, np.square(uy))) else: raise NotImplementedError( f"interp1d_unc: The kind of interpolation '{kind}' is unsupported yet. Let " f"us know, that you need it." ) if returnC: return x_new, y_new, uy_new, C return x_new, y_new, uy_new
[docs]def make_equidistant( x: np.ndarray, y: np.ndarray, uy: np.ndarray, dx: Optional[float] = 5e-2, kind: Optional[str] = "linear", ): r"""Interpolate a 1-D function equidistantly considering associated uncertainties Interpolate function values equidistantly and propagate uncertainties accordingly. x and y are arrays of values used to approximate some function :math:`f \colon y = f(x)`. Note that calling :func:`interp1d_unc` with NaNs present in input values results in undefined behaviour. An equal number of each of the original x and y values and associated uncertainties is required. Parameters ---------- x: (N,) array_like A 1-D array of real values. y: (N,) array_like A 1-D array of real values. The length of y must be equal to the length of x. uy: (N,) array_like A 1-D array of real values representing the standard uncertainties associated with y. dx: float, optional desired interval length (defaults to 5e-2) kind : str, optional Specifies the kind of interpolation for y as a string ('previous', 'next', 'nearest', 'linear' or 'cubic'). Default is ‘linear’. Returns ------- x_new : (M,) array_like values at which the interpolant is evaluated y_new : (M,) array_like interpolated values uy_new : (M,) array_like interpolated associated standard uncertainties References ---------- * White [White2017]_ """ # Find x's maximum. x_max = np.max(x) # Setup new vector of x values. x_new = np.arange(np.min(x), x_max, dx) # Since np.arange in overflow situations results in the biggest values not # guaranteed to be smaller than x's maximum', we need to check for this and delete # these unexpected values. if x_new[-1] > x_max: x_new = x_new[x_new <= x_max] return interp1d_unc(x_new, x, y, uy, kind)