Source code for gigaanalysis.data


"""GigaAnalysis - Data Type - :mod:`gigaanalysis.data`
--------------------------------------------------------

This one module is imported directly into the :mod:`gigaanalysis` namespace,
so that the classes and functions here can be accessed directly.

This holds the :class:`Data` class and the functions that will manipulate 
them. This forms the backbone of the rest of the GigaAnalysis. The point of 
the :class:`Data` object is to hold sweeps. These are data sets with one 
independent and one dependant variable, which are super common in 
experimental physics research. By assuming the data is of this type more 
assumptions and error checking can be facilitated, and this is what 
GigaAnalysis aims to take advantage of.
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.interpolate import interp1d  # Used often to interpolate values


def _pick_float_dtype(to_check):
    """Return np.complex128 for complex dtypes, np.float64 otherwise.
    Adapted from scipy.interpolate"""
    if isinstance(to_check, np.ndarray):
        dtype = to_check.dtype
    else:
        dtype = type(to_check)
    if np.issubdtype(dtype, np.complexfloating):
        return np.complex_
    else:
        return np.float_


def _as_float_array(x):
    """Convert the input into a C contiguous float array.
    Adapted from scipy.interpolate
    NB: Upcasts half- and single-precision floats to double precision.
    """
    x = np.ascontiguousarray(x)
    x = x.astype(_pick_float_dtype(x), copy=False)
    return x


[docs]class Data(): """ The Data Class Data object holds the data in the measurements. It works as a simple wrapper of a two column numpy array (:class:`numpy.ndarray`). The data stored in the object is meant to be interpreted as x is a independent variable and y is dependant variable. Parameters ---------- values : numpy.ndarray A two column numpy array with the x data in the first column and the y data in the second. If a second no array is given then the first corresponds to the x data. split_y : numpy.ndarray, optional A 1D numpy array containing the y data. If None all the data should be contained in first array. strip_sort : bool or {'strip', 'sort'}, optional If true the data points with NaN are removed using :func:`numpy.isfinite` and the data is sorted by the x values. If 'strip' is given NaNs are removed but the data isn't sorted. If 'sort' is given the data is sorted but NaNs are left in. Default is False so the data isn't changed. interp_full : float, optional This interpolates the data to give an even spacing using the inbuilt method :meth:`to_even`. The default is None and the interpolation isn't done. Attributes ---------- values : numpy.ndarray Two column numpy array with the x and y data in. x : numpy.ndarray x data in a 1D numpy array. y : numpy.ndarray The y data in a 1D numpy array. both : (numpy.ndarray, numpy.ndarray) A two value tuple with the :attr:`x` and :attr:`y` in. Notes ----- Mathematical operations applied to the Data class just effects the :attr:`y` values, the :attr:`x` values stay the same. To act two :class:`Data` objects together the :attr:`x` values need to agree. :class:`Data` objects also be mathematically acted to array_like objects (:func:`numpy.asarray`) of length 1 or equal to the length of the Data. """ def __init__(self, values, split_y=None, strip_sort=False, interp_full=None): # Set up Class if isinstance(values, Data): values = values.values # If you pass a Data object to the class values = np.asarray(values) if split_y is not None: split_y = np.asarray(split_y) if values.ndim != 1: raise ValueError( f"If x and y data are split both need to be a " f"1D numpy array. values has shape {values.shape}") elif split_y.ndim != 1: raise ValueError( f"If x and y data are split both need to be a " f"1D numpy array. split_y has shape {split_y.shape}") elif values.size != split_y.size: raise ValueError( f"If x and y data are split both need to be the same " f"size. values has size {values.size} and split_y has " f"size {split_y.size}") values = np.concatenate( [values[:, None], split_y[:, None]], axis=1) if values.ndim != 2: raise ValueError( f"values needs to be a two column numpy array." f"values has the shape {values.shape}") elif values.shape[1] != 2: raise ValueError( f"values needs to be a two column numpy array." f"values has the shape {values.shape}") if strip_sort: if strip_sort == 'strip': values = values[np.isfinite(values).all(axis=1)] elif strip_sort == 'sort': values = values[np.argsort(values[:, 0]), :] else: values = values[np.isfinite(values).all(axis=1)] values = values[np.argsort(values[:, 0]), :] # all data in hidden attribute self.__values = _as_float_array(values) if interp_full is not None: self.to_even(interp_full) # Set up the attributes __slots__ = ("__values",) def __attribute_set(self, value): raise AttributeError( f"Can't set the attributes of a Data object directly. " f"Use .set_x, .set_y, .set_data functions.") def values(self): return self.__values values = property(values, __attribute_set, None, "Two column numpy array with the x and y data in.") def x(self): return self.__values[:, 0] x = property(x, __attribute_set, None, "x data in a 1D numpy array.") def y(self): return self.__values[:, 1] y = property(y, __attribute_set, None, "y data in a 1D numpy array.") def both(self): return self.__values[:, 0], self.__values[:, 1] both = property(both, __attribute_set, None, "A two value tuple with the :attr:`x` and :attr:`y` in.") # standard python methods def __str__(self): return np.array2string(self.values) def __repr__(self): return f"GA Data object:\n {str(self.values)[1:-1]}" def __len__(self): return self.x.size def __bool__(self): if self.values.size == 0: return False else: return True __array_ufunc__ = None # This is so that the user need to specify .x or .y when acting # with numpy functions. # For mathematical operations def __maths_check(self, other, operation,): """This performs the error checking on the standard operators Parameters ---------- other : :class:`Data` or array_like The feature that the data object maths acts on. operation : str The name of the operation being applied. Returns ------- Array like object to calculate with """ if isinstance(other, Data): if np.array_equal(self.x, other.x): return other.y else: raise ValueError( f"The two Data classes do not have the same x " f"values, so cannot be {operation}") try: other = np.asarray(other, dtype=_pick_float_dtype(other)) except: raise TypeError( f"Data cannot be {operation} with object of " f"type {type(other)}.") if other.size == 1: return other elif other.ndim != 1: raise ValueError( f"Array to {operation} Data object with is of the wrong " f"dimension. Its shape is {other.shape}") elif other.size != self.x.size: raise ValueError( f"Array to {operation} Data object with is of the wrong " f"length. Its length is {other.size} while the Data " f"is {self.x.size}") else: return other
[docs] def __mul__(self, other): """Multiplication of the y values. """ other = self.__maths_check(other, "multiplied") return Data(self.x, self.y*other)
def __rmul__(self, other): other = self.__maths_check(other, "multiplied") return Data(self.x, other*self.y)
[docs] def __truediv__(self, other): """Division of the y values.""" other = self.__maths_check(other, "divided") return Data(self.x, self.y/other)
def __rtruediv__(self, other): other = self.__maths_check(other, "divide by") return Data(self.x, other/self.y)
[docs] def __add__(self, other): """Addition of the y values.""" other = self.__maths_check(other, "added") return Data(self.x, self.y + other)
def __radd__(self, other): other = self.__maths_check(other, "added") return Data(self.x, other + self.y)
[docs] def __sub__(self, other): """Subtraction of the y values.""" other = self.__maths_check(other, "subtracted") return Data(self.x, self.y - other)
def __rsub__(self, other): other = self.__maths_check(other, "subtracted") return Data(self.x, other - self.y)
[docs] def __mod__(self, other): """Performs the modulus with the y values.""" other = self.__maths_check(other, "divided mod") return Data(self.x, self.y % other)
def __rmod__(self, other): other = self.__maths_check(other, "divided mod") return Data(self.x, other % self.y)
[docs] def __floordiv__(self, other): """Floor division on the y values.""" other = self.__maths_check(other, "floor division") return Data(self.x, self.y // other)
def __rfloordiv__(self, other): other = self.__maths_check(other, "floor division") return Data(self.x, other // self.y)
[docs] def __pow__(self, other): """Takes the power of the y values.""" other = self.__maths_check(other, "exponentiated") return Data(self.x, self.y ** other)
def __rpow__(self, other): other = self.__maths_check(other, "exponentiated") return Data(self.x, other ** self.y)
[docs] def __abs__(self): """Calculates the absolute value of the y values. """ return Data(self.x, abs(self.y))
[docs] def __neg__(self): """Negates the y values""" return Data(self.x, -self.y)
[docs] def __pos__(self): """Performs a unity operation on y values""" return Data(self.x, self.y)
[docs] def __eq__(self, other): """Data class is only equal to other Data classes with the same data. """ if type(other) != type(self): return False else: return np.array_equal(self.values, other.values)
[docs] def __iter__(self): """The iteration happens on the values, like if was numpy array. """ return iter(self.values)
# For indexing behaviour def __index_check(self, k): """Check an index given if it is correct type and size. Raises errors if it is the wrong type or shape. Also returns a bool which is true if only one item is called. Parameters ---------- k : slice or can be passed to :func:slice A object obtained from index calls Returns ------- individual : bool Is the index call only for one item? """ if isinstance(k, tuple): raise IndexError( "Data object only accepts one index") elif isinstance(k, slice): return False try: k = np.asarray(k) except: raise IndexError( "Data cannot index with this type.") if k.size == 1: return True elif k.ndim != 1: raise IndexError( "Data objec can only Index is one dimension.") elif k.dtype == np.int_: return False elif k.size != self.x.size: raise IndexError( f"Index given was wrong length. The length of index was " f"{k.size} and the Data is length {self.x.size}") else: return False
[docs] def __getitem__(self, k): """Indexing returns a subset of the Data object. If given a slice or and array of boolean a new Data object is produced. If given a int a length two array with [x, y] is returned. """ if self.__index_check(k): return self.values[k] else: return Data(self.values[k])
[docs] def __setitem__(self, k, v): """Item assignment is not allowed in Data objects. This kind of action is possible with the functions :meth:`set_x`, :meth:`set_y`, and :meth:`set_data`. """ raise Warning( "Data objects do not allow item assignment. For this " "functionality see .set_x, .set_y, and .set_data.")
[docs] def set_x(self, idx, val): """This is used for setting x values. Works similarly to ``Data.x[idx] = val`` but with more error checking. The previous code would also work (and be faster) but more care should be taken. The built in function :func:`slice(start, end, step)` maybe useful. Parameters ---------- idx : slice, int Objects that can be passed to a :class:`numpy.ndarray` as an index. val : numpy.ndarray The values to assign to the indexed x values. """ if isinstance(val, Data): raise TypeError( "Cannot set the object type with a Data object.") self.__index_check(idx) new_x = self.x new_x[idx] = val self.__init__(new_x, self.y)
[docs] def set_y(self, idx, val): """This is used for setting y values. Works similarly to ``Data.y[idx] = val`` but with more error checking. The previous code would also work (and be faster) but more care should be taken. The built in function :func:`slice(start, end, step)` maybe useful. Parameters ---------- idx : slice, int Objects that can be passed to a :class:`numpy.ndarray` as an index. val : numpy.ndarray The values to assign to the indexed y values. """ if isinstance(val, Data): raise TypeError( "Cannot set the object type with a Data object.") self.__index_check(idx) new_y = self.y new_y[idx] = val self.__init__(self.x, new_y)
[docs] def set_data(self, idx, val): """This is used for setting x and y values. Works similarly to ``Data.values[idx] = val`` but with more error checking. The previous code would also work (and be faster) but more care should be taken. The built in function :func:`slice(start, end, step)` maybe useful. Parameters ---------- idx : slice, int Objects that can be passed to a :class:`numpy.ndarray` as an index. val : numpy.ndarray, Data The values to assign to the indexed values. This can only be a two column :class:`numpy.ndarray` or a :class:`Data` object. """ if self.__index_check(idx): size = 2 else: size = self.values[idx].size if not isinstance(val, (Data, np.ndarray)): raise TypeError( f"The value to assign data must be a data object or a two " f"column numpy array. The type give was {type(val)}.") elif isinstance(val, Data): if size != val.values.size: raise ValueError( f"The Data to set is a different size to the Data " f"object given. The size to index was {size/2} " f"while the data to set was {val.values.size/2}.") else: new_data = self.values new_data[idx] = val.values elif val.ndim != 2: raise ValueError( f"The dimension of the numpy array to set to is not " f"the correct shape. Needs to be a two column array shape " f"given was {val.shape}.") elif val.shape[1] != 2: raise ValueError( f"The dimension of the numpy array to set to does not " f"have two columns. Needs to be a two column array shape " f"given was {val.shape}.") elif val.size != size: raise ValueError( f"The Data to set is a different size to the numpy " f"array given. The size to index was {size/2} " f"while the data to set was {val.size/2}.") else: new_data = self.values new_data[idx] = val self.__init__(new_data)
# Simple useful methods
[docs] def strip_nan(self): """This removes any row which has a nan or infinite values in. Returns ------- stripped_data : Data Data class without non-finite values in. """ return Data(self.values[np.isfinite(self.values).all(axis=1)])
[docs] def sort(self): """Sorts the data set in x and returns the new array. Returns ------- sorted_data : Data A Data class with the sorted data. """ return Data(self.values[np.argsort(self.x), :])
[docs] def min_x(self): """This provides the lowest value of x Returns ------- x_min : float The minimum value of x """ return np.min(self.x)
[docs] def max_x(self): """This provides the highest value of x Returns ------- x_max : float The maximum value of x """ return np.max(self.x)
[docs] def spacing_x(self): """Returns the average spacing in x Returns ------- x_max : float The average spacing in the x data """ return (self.max_x() - self.min_x())/len(self)
[docs] def x_cut(self, x_min, x_max): """This cuts the data to a region between x_min and x_max. Parameters ---------- x_min : float The minimal x value to cut the data. x_max : float The maximal x value to cut the data. Returns ------- cut_data : Data A data object with the values cut to the given x range. """ if x_min > x_max: raise ValueError('x_min should be smaller than x_max') return Data(self.values[(self.x > x_min) & (self.x < x_max)])
[docs] def y_from_x(self, x_val, bounds_error=True, kind='linear'): """Gives the y value for a certain x value or set of x values. Parameters ---------- x_val : float X values to interpolate y values from. bounds_error : bool, optional If an error should thrown in x value is out of range, default True. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. Returns ------- y_val : float or numpy.ndarray Corresponding to the requested x values in an array if only one value is given a float is returned. """ if bounds_error and \ (np.max(x_val)>self.max_x() or np.min(x_val)<self.min_x()): raise ValueError( f"The given x_values are out side of the range of data " f"which is between {self.min_x()} and {self.max_x()}") y_val = interp1d(self.x, self.y, bounds_error=False, fill_value=(self.y.min(), self.y.max()), kind=kind)(x_val) if y_val.size != 1: return y_val else: return float(y_val)
[docs] def apply_x(self, function): """This takes a function and applies it to the x values. Parameters ---------- function : Callable The function to apply to the x values. Returns ------- transformed_data Data class with new x values. """ return Data(function(self.x), self.y)
[docs] def apply_y(self, function): """This takes a function and applies it to the y values. Parameters ---------- function : Callable The function to apply to the y values. Returns ------- transformed_data Data class with new y values. """ return Data(self.x, function(self.y))
[docs] def append(self, new_data, in_place=False): """This adds values to the end of the data object. Parameters ---------- new_data : Data These are the values to add onto the end of the data object in_place : bool, optional Weather to edit the object or to return a new one. The default is `False` which returns a new object. Returns ------- combined_data : Data If in_place is `False` then a new Data object is returned. """ if isinstance(new_data, Data): pass else: try: new_data = Data(new_data) except: raise ValueError( f"The new_data to append was not a Data object or " f"could be cast to one. Was of type {type(new_data)}") new_vals = np.append(self.values, new_data.values, axis=0) if in_place: self.__init__(new_vals) else: return Data(new_vals)
# Methods for Interpolation of Data
[docs] def interp_range(self, min_x, max_x, step_size=None, num_points=None, shift_step=True, kind='linear'): """Evenly interpolates in x the data between a min and max value. This is used for combining datasets with corresponding but different x values. Either `step_size` or `num_points` can be defined. If `step_size` is defined :func:`numpy.arange` is used. If `num_points` is defined :func:`numpy.linspace` is used. If using `step_size` it rounds `min_x` to the next integer value of the steps, unless `shift_step` is `False`. If values outside the range of the original data need to be passed to be interpolated, this is possible with :func:`Data.interp_values`. It uses :func:`scipy.interpolate.interp1d`. Parameters ---------- min_x :float The minimum x value in the interpolation. max_y : float The maximum x value in the interpolation. step_size : float, optional The step size between each point. Either this or num_points must be defined. num_points : int, optional The number of points to interpolate. Either this or step_size must be defined. shift_step: bool, optional If the `min_x` value should be rounded to the next whole step. The default is True. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. Returns ------- interpolated_data : Data A Data object with evenly interpolated points. """ if step_size is None and num_points is None: raise ValueError( f"Must define either step_size or num_points.") if min_x > max_x: min_x, max_x = max_x, min_x # order min and max if np.min(self.x) > min_x: raise ValueError("min_x value to interpolate is below data") if np.max(self.x) < max_x: raise ValueError("max_x value to interpolate is above data") if step_size is not None: if shift_step: min_x = np.ceil(min_x/step_size)*step_size x_vals = np.arange(min_x, max_x, step_size) elif num_points is not None: x_vals = np.linspace(min_x, max_x, num_points) # The bounds are used in the rare case of floating point issues. min_y = self.y[self.x.argmin()] max_y = self.y[self.x.argmax()] y_vals = interp1d(self.x, self.y, kind=kind, bounds_error=False, fill_value=(min_y, max_y))(x_vals) return Data(x_vals, y_vals)
[docs] def interp_step(self, step_size, shift_step=True, kind='linear'): """Evenly interpolates in x the data between a min and max value. This uses :meth:`Data.interp_range` specifying `step_size` and giving the maximum range of x points. If using `step_size` it rounds `min_x` to the next integer value of the steps, unless `shift_step` is `False`. Parameters ---------- step_size : float, optional The step size between each point. Either this or num_points must be defined. shift_step: bool, optional If the `min_x` value should be rounded to the next whole step. The default is True. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. Returns ------- interpolated_data : Data A Data object with evenly interpolated points. """ return self.interp_range(self.min_x(), self.max_x(), step_size=step_size, shift_step=shift_step, kind=kind,)
[docs] def interp_number(self, num_points, kind='linear'): """Evenly interpolates in x the data for a fixed point number. This uses :meth:`Data.interp_range` specifying `num_points` and giving the maximum range of x points. Parameters ---------- num_points : int The number of points to interpolate. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. Returns ------- interpolated_data : Data A Data object with evenly interpolated points. """ return self.interp_range(self.min_x(), self.max_x(), num_points=num_points, kind=kind,)
[docs] def interp_values(self, x_vals, kind='linear', bounds_error=True, fill_value=np.nan, strip_sort=False): """Produce Data object from interpolating x values. This uses :func:`scipy.interpolate.interp1d` to produce a Data object by interpolating y values from given x values. Parameters ---------- x_vals : array_like The x values to interpolate which will be the x values. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. bounds_error : bool, optional If default of `True` data outside the existing range will throw an error. If `False` then the value is set by `fill_value`. fill_value : float or (float, float) or `extrapolate`, optional If bounds_error is `False` then this value will be used outside the range. Passed to :func:`scipy.interpolate.interp1d`. strip_sort : bool, optional The default is `False`, where to sort and remove NaNs from the Data object before returning. Returns ------- interpolated_data : Data A Data object with the given x values and interpolated y values. """ x_vals = np.asarray(x_vals) if x_vals.ndim != 1: raise ValueError( f"x_vals had shape {x_vals.shape} where as it need to be 1D") if bounds_error: if self.min_x() > np.min(x_vals): raise ValueError( "min_x value to interpolate is below data and " "bounds_error is True.") if self.max_x() < np.max(x_vals): raise ValueError( "max_x value to interpolate is above data and " "bounds_error is True") # The bounds are used in the rare case of floating point issues. min_y = self.y[self.x.argmin()] max_y = self.y[self.x.argmax()] y_vals = interp1d(self.x, self.y, kind=kind, bounds_error=False, fill_value=(min_y, max_y))(x_vals) else: y_vals = interp1d(self.x, self.y, kind=kind, bounds_error=False, fill_value=fill_value)(x_vals) return Data(x_vals, y_vals, strip_sort=strip_sort)
[docs] def to_even(self, step_size, shift_step=True, kind='linear'): """Evenly interpolates the data and updates the data object. This uses :meth:`Data.interp_range` specifying `step_size` and giving the maximum range of x points. If using `step_size` it rounds `min_x` to the next integer value of the steps, unless `shift_step` is `False`. Parameters ---------- step_size : float, optional The step size between each point. Either this or num_points must be defined. shift_step: bool, optional If the `min_x` value should be rounded to the next whole step. The default is True. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. """ self.__init__(self.interp_range(self.min_x(), self.max_x(), step_size=step_size, shift_step=shift_step, kind=kind,).values)
# Plotting Method
[docs] def plot(self, *args, axis=None, **kwargs): """Simple plotting utility Makes use of matplotlib function :func:`matplotlib.pyplot.plot`. Runs ``matplotlib.pyplot.plot(self.x, self.y, *args, **kwargs)`` If provided an axis keyword which operates so that if given ``axis.plot(self.x, self.y, *args, **kwargs)``. """ if axis is None: plt.plot(self.x, self.y, *args, **kwargs) else: axis.plot(self.x, self.y, *args, **kwargs)
# Saving Method
[docs] def to_csv(self, filename, columns=["X", "Y"], **kwargs): """Saves the data as a simple csv Uses :func:`pandas.DataFrame.to_csv` and kwargs are pass to it. The index keyword is set to False by default. Parameters ---------- filename : str Filename to save the data as. columns : [str, str] The title of the two columns. """ if 'index' not in kwargs: kwargs['index'] = False pd.DataFrame(self.values, columns=columns ).to_csv(filename, **kwargs)
[docs]def swap_xy(data, **kwargs): """Interchange the independent and dependent variables. This takes a :class:`.Data` object and returns a new one with the x and y variables swapped around. Keyword arguments are pass to the :class:`.Data` class. Parameters ---------- data : Data The data to switch the x and y values. Returns ------- swapped_data : Data A new :class:`.Data` object with x and y values switched. """ if not isinstance(data, Data): raise TypeError( f"data needs to be a Data object but was instead {type(data)}") return Data(data.y, data.x, **kwargs)
[docs]def empty_data(): """Generates an empty :class:`.Data` object. This is useful for place holding, and takes no parameters. Returns ------- empty_data : Data A Data object that contains no data points. """ return Data(np.array([], dtype=np.float_).reshape(0, 2))
[docs]def sum(data_list): """Preforms the sum of the y data a set of Data class objects. Parameters ---------- data_list : [Data] List of Data objects to sum together. Returns ------- summed_data : Data A Data object with the summed y values of the original data sets. """ if isinstance(data_list, list): pass elif isinstance(data_list, dict): data_list = list(data_list.values()) elif isinstance(data_list, Data): return data_list else: raise TypeError( f"The data_list was of type {type(data_list)} where as it " f"needs to be either a list or a dict with Data objects as the " f"values.") if not isinstance(data_list[0], Data): raise TypeError( f"List contained type {type(data_list[0])} where is must " f"only contain gigaanalysis.data.Data types.") if len(data_list) == 1: return data_list[0] total = data_list[0] for dat in data_list[1:]: if not isinstance(dat, Data): raise TypeError( f"List contained type {type(dat)} where is must " f"only contain gigaanalysis.data.Data types.") total += dat.y return total
[docs]def mean(data_list): """Preforms the mean of the y data a set of Data class objects. Parameters ---------- data_list : [Data] List of Data objects to sum together can also be a dictionary. Returns ------- averaged_data : Data A Data object with the summed y values of the original data sets. """ return sum(data_list)/len(data_list)
def _fit_one_y(data, x_value, x_range, poly_deg, std=False): """A function used by :func:`y_from_fit` that calculates the y value from one x value. """ xs, ys = data.x_cut(x_value - x_range/2, x_value + x_range/2).both xs = xs - x_value if len(xs) + 1 <= poly_deg: raise ValueError( f"There was only {len(xs)} in the provided range which is not " f"enough to fit a {poly_deg} order polynomial.") if std: if std == 'fit': val, err = np.polyfit(xs, ys, poly_deg, cov=True) err = np.sqrt(err[-1, -1]) elif std == 'residual': val = np.polyfit(xs, ys, poly_deg) y_res = ys.copy() for n in range(len(val)): y_res = y_res - (xs**n)*val[-n-1] err = np.std(y_res) else: raise ValueError("std was not either 'fit' or 'residual'.") return val[-1], err else: return np.polyfit(xs, ys, poly_deg)[-1]
[docs]def y_from_fit(data, x_value, x_range, poly_deg=1, as_Data=False, std=False): """Fits a polynomial over a range to interpolate a given value. This makes use of :func:`numpy.polyfit` to find an interpolated value of y form a data object and a given x value. Parameters ---------- data : Data The data to interpolate the value from. Should be a sorted data object. x_value : float or numpy.ndarray The value of the independent to obtain the associated dependent variable. x_range : float The range of independent variables to perform the fit over. poly_deg : int, optional The order of the polynomial to use when fitting to find the result. The default is `1` which is a linear fit. as_Data : bool, optional If default of False y values are given as an float or an array. If `True` then a Data object is returned. std : bool, optional If `fit` or 'residual' then the standard deviation is returned after the values. The standard deviation can either be calculated from the error in the fit (using 'fit') or from the distribution of the residuals of the fit (using 'residual'). The default value is `False`, where only the value will be returned. Returns ------- y_value : float, numpy.ndarray, or Data The y values obtained at the associated value of x for the fit performed. The type depends if multiple points are requested and if 'as_Data` is set. If `std` is `True` then the standard deviation is followed in the same format. """ if not isinstance(data, Data): raise TypeError( f"data needs to be a Data object but was a {type(data)}.") elif not isinstance(x_range, (int, float, np.int_, np.float_)): raise TypeError( f"x_range needs to be a float but was a {type(x_range)}") elif not isinstance(poly_deg, (int, np.int_)): raise TypeError( f"poly_deg needs to be a int but was a {type(poly_deg)}") x_value = np.asarray(x_value) if x_value.dtype != np.float_ and x_value.dtype != np.int_: raise TypeError( f"x_value needs to be of float type but was a {x_value.dtype}") elif x_value.ndim > 1: raise TypeError( f"x_value can a float or a 1D array like of floats but was of " f"shape {x_value.shape}") if std: if std not in ['fit', 'residual']: raise ValueError("std must either False or be 'fit' or " f"'residual' but was {std}") if x_value.size == 1: if not as_Data: return _fit_one_y(data, x_value, x_range, poly_deg, std=std) else: if std: val, err = _fit_one_y(data, x_value, x_range, poly_deg, std=std) return Data([[x_value, val]]), Data([[x_value, err]]) else: return Data([[x_value, _fit_one_y(data, x_value, x_range, poly_deg)]]) elif as_Data: if std: val, err = np.array( [_fit_one_y(data, xv, x_range, poly_deg, std=std) \ for xv in x_value]).T return Data(x_value, val), Data(x_value, err) else: return Data(x_value, np.array( [_fit_one_y(data, xv, x_range, poly_deg) for xv in x_value])) else: if std: val, err = np.array( [_fit_one_y(data, xv, x_range, poly_deg, std=std) \ for xv in x_value]).T return val, err else: return np.array([_fit_one_y(data, xv, x_range, poly_deg) \ for xv in x_value])
[docs]def collect_y_values(data_list): """Collates the y values into a array from a collection of Data objects. This takes either a list or dictionary of Data objects and collects the y values into one array. This can be useful of special comparisons such as trimmed means and standard deviations. Parameters ---------- data_list : list or dict A list of Data objects or a dictionary where the values are Data objects. The x values or all of these need to be the same. Returns ------- x_vals : numpy.ndarray One copy of the x values of the arrays. all_data : numpy.ndarray All the y data from the different data objects each on in it's own column. """ if isinstance(data_list, list): pass elif isinstance(data_list, dict): data_list = list(data_list.values()) elif isinstance(data_list, Data): return data_list.y[:, None] else: raise TypeError( f"The data_list was of type {type(data_list)} where as it " f"needs to be either a list or a dict with Data objects as the " f"values.") if not isinstance(data_list[0], Data): raise TypeError( f"List contained type {type(data_list[0])} where is must " f"only contain gigaanalysis.data.Data types.") if len(data_list) == 1: return data_list[0].y[:, None] all_data = data_list[0].y[:, None] x_vals = data_list[0].x for dat in data_list[1:]: if not isinstance(dat, Data): raise TypeError( f"List contained type {type(dat)} where is must " f"only contain gigaanalysis.data.Data types.") elif not np.array_equal(dat.x, x_vals): raise TypeError( f"The x values in the arrays do not match.") all_data = np.concatenate([all_data, dat.y[:, None]], axis=1) return x_vals, all_data
def __make_x_vals(min_x, max_x, step_size=None, num_points=None, shift_step=True): """This generates a set of evenly spaced x values. """ if step_size is not None: if shift_step: min_x = np.ceil(min_x/step_size)*step_size x_vals = np.arange(min_x, max_x, step_size) elif num_points is not None: x_vals = np.linspace(min_x, max_x, num_points) else: raise ValueError( f"Must define either step_size or num_points.") return x_vals def __match_x_list(data_list, step_size=None, num_points=None, shift_step=True): """This interpolates all the data sets in a list to the same x values. It takes the values from :func:`match_x` and each set to :func:`__match_x_list`, after working out the largest range of x_values that can be interpolated across every dataset. """ min_x = -np.inf max_x = np.inf max_len = 0 if not isinstance(data_list, list): raise TypeError( f"data_list need to be list it was a {type(data_list)}") for dat in data_list: if not isinstance(dat, Data): raise TypeError( f"data_list needs to be a list of Data objects but " f"contained the type {type(dat)}.") min_x = min_x if min_x > dat.min_x() else dat.min_x() max_x = max_x if max_x < dat.max_x() else dat.max_x() max_len = max_len if max_len > len(dat) else len(dat) if step_size is None and num_points is None: num_points = max_len x_vals = __make_x_vals(min_x, max_x, step_size=step_size, num_points=num_points, shift_step=shift_step) new_data_list = [ dat.interp_values(x_vals) for dat in data_list ] return new_data_list
[docs]def match_x(data_list, step_size=None, num_points=None, shift_step=True): """This transform a collection of dataset to have the same x values. This applies :meth:`Data.interp_values` to every data object with the largest possible range of x values to produce the new set of data. This is useful if the data object want to be combined arithmetically. Parameters ---------- data_list : list or dict of Data A list of data objects or dictionary with data objects as the values. step_size : float, optional Sets the spacing in the x values to a fixed amount if given :func:`numpy.arange` is called. num_points : int, optional The number of points to generates for the x values only used if `step_size` is not given or None. If used :func:`numpy.linspace` is called. shift_step : bool, optional Only valid if step_size is not new. The default is True and then the first value is an integer number of the steps. If False the lowest x value is used as the first value of the step. Returns ------- new_data_list : list or dict of Data objects The new data objects with the x values that are interoperated to be all the same. If a dict is provided a dict is returned with the same keys as before. """ if isinstance(data_list, Data): return __match_x_list([data_list], step_size=step_size, num_points=num_points, shift_step=shift_step) elif isinstance(data_list, list): return __match_x_list(data_list, step_size=step_size, num_points=num_points, shift_step=shift_step) elif isinstance(data_list, dict): key, vals = zip(*data_list.items()) new_vals = __match_x_list(list(vals), step_size=step_size, num_points=num_points, shift_step=shift_step) return dict(zip(key, new_vals)) elif isinstance(data_list, tuple): return __match_x_list(list(data_list), step_size=step_size, num_points=num_points, shift_step=shift_step) else: raise TypeError( f"data_list need to be either a list or a dictionary " f"but was of the type {type(data_list)}.")
def __interp_list(data_list, x_vals, kind='linear'): """Interpolates all the Data objects in a list. Is used by `interp_set`. """ new_list = [] for dat in data_list: if not isinstance(dat, Data): raise TypeError( f"One of the objects in the list was not a Data " f"object. It was of type {type(dat)}") new_list.append(dat.interp_values(x_vals, kind=kind)) return new_list
[docs]def interp_set(data_list, x_vals, kind='linear'): """Interpolates all Data objects in list or dictionary. This applied :meth:`Data.interp_values` to every item in the set and returns a new set. Parameters ---------- data_list : list or dict A list or dictionary of Data objects to interpolate. x_vals : :class:`numpy.ndarray` The x values to interpolate to produce the new set. kind : str or int, optional The type of interpolation to use. Passed to :func:`scipy.interpolate.interp1d`, default is `linear`. Returns ------- interpolated_set : list or dict The new set of Data is the same form but with interpolated values. """ if isinstance(data_list, Data): return [data_list.interp_values(x_vals, kind=kind)] elif isinstance(data_list, list): return __interp_list(data_list, x_vals, kind=kind) elif isinstance(data_list, dict): key, vals = zip(*data_list.items()) return dict(zip(key, __interp_list(vals, x_vals, kind=kind))) elif isinstance(data_list, tuple): return __interp_list(list(data_list), x_vals, kind=kind) else: raise TypeError( f"The data_list needs to be a dictionary or a list but was " f"instead of type {type(data_list)}.")
[docs]def concatenate(data_list, strip_sort=False): """Combines our collection of Data objects into one. This takes either a list, dictionary, or tuple of Data objects or arrays and concatenates their values into one data object. This makes use of :func:`numpy.concatenate`. Parameters ---------- data_list : list or dict The collection of Data objects to combine. strip_sort : bool or {'strip', 'sort'}, optional This will pass to the strip_sort keyword argument when producing the final Data object. Returns ------- concatenated_data : Data The data combined into one Data object. """ if isinstance(data_list, Data): if strip_sort: return Data(data_list, strip_sort=strip_sort) else: return data_list elif isinstance(data_list, list): pass elif isinstance(data_list, dict): data_list = list(data_list.values()) elif isinstance(data_list, tuple): data_list = list(data_list) else: raise TypeError( f"data_list need to be either a list or a dictionary " f"but was of the type {type(data_list)}.") all_vals = [] for dat in data_list: if isinstance(dat, Data): all_vals.append(dat.values) elif isinstance(dat, np.ndarray): if len(dat.shape) == 2 and dat.shape[1] == 2: all_vals.append(dat) else: raise ValueError( f"The values to concatenate in the form of a " f"numpy array are the wrong shape {dat.shape}") elif isinstance(dat, list) and len(dat) == 2: all_vals.append(np.asarray([dat])) else: raise TypeError( f"The list contains objects which are not Data objects " f"one of the objects was a {type(dat)}") return Data(np.concatenate(all_vals, axis=0), strip_sort=strip_sort)
[docs]def save_arrays(array_list, column_names, file_name, **kwargs): """Writes a list of arrays to csv. This saves a collection of one dimensional :class:`numpy.ndarray` stored in a list into a .csv file. It does this by passing it to a :class:`pandas.DataFrame` object and using the method `to_csv`. If the arrays are different lengths the values are padded with NaNs. kwargs are passed to :meth:`pandas.DataFrame.to_csv`. Parameters ---------- array_list : [numpy.ndarray] A list of 1d numpy.ndarrays to save to the .csv file. columns_names : [str] A list of column names for the .csv file the same length as the list of data arrays. file_name : str The file name to save the file as. """ if not isinstance(array_list, list): raise TypeError("array_list is not a list.") elif not isinstance(column_names, list): raise TypeError("column_names is not a list.") elif len(array_list) != len(column_names): raise ValueError("array_list and column_names are not " "the same lenght.") max_length = 0 for arr in array_list: if not isinstance(arr, np.ndarray): raise ValueError("array_list contains objects that are not " "numpy arrays.") elif len(arr.shape) != 1: raise ValueError("array_list arrays are not 1D.") elif max_length < arr.size: max_length = arr.size to_concat = [] for arr in array_list: to_concat.append(np.pad(arr, (0, max_length - arr.size), mode='constant', constant_values=np.nan)[:, None]) to_save = np.concatenate(to_concat, axis=1) if 'index' not in kwargs.keys(): kwargs['index'] = False pd.DataFrame(to_save, columns=column_names).to_csv(file_name, **kwargs)
[docs]def save_data(data_list, data_names, file_name, x_name='X', y_name='Y', name_space='/', no_sapce=True, **kwargs): """Saves a list of data objects in to a .csv file. This works by passing to :func:`save_arrays` and subsequently to :meth:`pandas.DataFrame.to_csv`. kwargs are passed to :meth:`pandas.DataFrame.to_csv` Parameters ---------- data_list : [gigaanalysis.data.Data] A list of Data objects to be saved to a .csv file data_names : [str] A list the same length as the data list of names of each of the data objects. These will make the first half of the column name in the .csv file. file_name : str The name the file will be saved as. x_name : str, optional The string to be append to the data name to indicate the x column in the file. Default is 'X'. y_name : str, optional The string to be append to the data name to indicate the y column in the file. Default is 'Y'. name_space : str, optional The string that separates the data_name and the x or y column name in the column headers in the .csv file. The default is '/'. """ if not isinstance(data_list, list): raise TypeError("data_list is not a list.") elif not isinstance(data_names, list): raise TypeError("data_names is not a list.") elif len(data_list) != len(data_names): raise ValueError("data_list and data_names are not " "the same lenght.") array_list = [] for dat in data_list: if not isinstance(dat, Data): raise ValueError("data_list contains objects that are not " "Data objects.") array_list.append(dat.x) array_list.append(dat.y) column_names = [] striping = ',' + name_space if no_sapce: striping += ' ' x_name = str(x_name).strip(striping) y_name = str(y_name).strip(striping) for name in data_names: name = str(name).strip(striping) column_names.append(name + name_space + x_name) column_names.append(name + name_space + y_name) save_arrays(array_list, column_names, file_name, **kwargs)
[docs]def save_dict(data_dict, file_name, x_name='X', y_name='Y', name_space='/', **kwargs): """Saves a dictionary of data objects in to a .csv file. This works by passing to :func:`save_data` and subsequently to :meth:`pandas.DataFrame.to_csv`. The names of the data objects are taken from the keys of the data_dict. kwargs are passed to :meth:`pandas.DataFrame.to_csv` Parameters ---------- data_list : [gigaanalysis.data.Data] A dictionary of Data objects to be saved to a .csv file. The keys of the dictionary will be used as the data names when passed to :func:`save_data`. file_name : str The name the file will be saved as. x_name : str, optional The string to be append to the data name to indicate the x column in the file. Default is 'X'. y_name : str, optional The string to be append to the data name to indicate the y column in the file. Default is 'Y'. name_space : str, optional The string that separates the data_name and the x or y column name in the column headers in the .csv file. The default is '/'. """ if not isinstance(data_dict, dict): raise TypeError("data_dict is not a dictionary.") for dat in data_dict.values(): if not isinstance(dat, Data): raise ValueError("data_dict contains values which are not " "Data objects.") save_data(list(data_dict.values()), list(data_dict.keys()), file_name, x_name=x_name, y_name=y_name, name_space=name_space, **kwargs)
[docs]def load_dict(file_name, name_space='/', strip_sort=False, interp_full=None, **kwargs): """Loads from a file a dictionary full of Data objects. The type of file it loads is the default produced by :func:`save_dict`. It assumes there is one line for the headers and they are used for the keys of the dictionary. It also removes NaNs at the end of each sweep to undo what is produced by uneven length of data objects. It makes use of :func:`pandas.read_csv`, and extra keyword arguments are passed to there. Parameters ---------- file_name : str The name and location of the file. name_space : str The string that separates the key from the x and y names. strip_sort : bool or {'strip', 'sort'}, optional Passed to :class:`Data`. If true the data points with NaN are removed using :func:`numpy.isfinite` and the data is sorted by the x values. If 'strip' is given NaNs are removed but the data is not sorted. If 'sort' is given the data is sorted but NaNs are left in. Default is False so the data isn't changed. interp_full : float, optional Passed to :class:`Data`. This interpolates the data to give an even spacing using the inbuilt method :meth:`to_even`. The default is None and the interpolation isn't done. Returns ------- data_dict : {str: Data} The data contained in the file in the form of a dictionary where the keys are obtained from the header of the data file. """ def to_key(column_name): """Makes the key from the column name.""" if name_space in column_name: return name_space.join(column_name.split(name_space)[:-1]) else: return column_name data_df = pd.read_csv(file_name, **kwargs) if len(data_df.columns)%2 == 1: raise ValueError( f"There needs to be an even number of columns. " f"The csv had {len(data.columns)}.") data_dict = {} for i in range(int(len(data_df.columns)/2)): to_read = data_df.iloc[:, [2*i, 2*i+1]] key = to_key(to_read.columns[0]) if key != to_key(to_read.columns[1]): raise ValueError( f"The columns names did not match for X and Y data. " f"The columns were {to_read.columns}.") # This next bit removes nans added to pad the data. last_val = None for n, x in enumerate(to_read.values[-1::-1, 0]): if x == x: # False if NaN last_val = n break if last_val == None: # No data found vals = np.array([]).reshape(0, 2) elif last_val == 0: # All cells had data vals = to_read.values else: vals = to_read.values[:-last_val, :] data_dict[key] = Data(vals, strip_sort=strip_sort, interp_full=interp_full) return data_dict
[docs]def gen_rand(n, func=None, seed=None, interp_full=None): """Produces Data object with random values. This uses :meth:`numpy.random.Generator.random` to produce a :class:`Data` object. The numbers in both x and y values are continually increasing in steps between 0 and 1. A function can be applied to the y values. Parameters ---------- n : int Number of data point to have in the object. func : function A function with one parameter to transform the y values. seed : float Seed to be passed to :func:`numpy.random.default_rng` interp_full : float, optional If given the data is evenly interpolated, passed to :class:`Data`. The default is `None` which doesn't interpolate the data. Returns ------- data : Data The generated data object. """ if not isinstance(n, (int, np.int_)): raise TypeError( f"n needs to be an int, but was a {type(n)}") elif n < 1: raise ValueError( f"n need to be a positive integer, but was {n}") gen_data = Data( np.cumsum(np.random.default_rng(seed).random((n, 2)), axis=0), interp_full=interp_full) if func is not None: gen_data = gen_data.apply_y(func) return gen_data