Source code for gigaanalysis.data


"""GigaAnalysis - Data Type - :mod:`gigaanalysis.data`
--------------------------------------------------------

This one module is imported directly into the :mod:`gigaanalysis` namespace,
so that the classes and functions here can be accessed directly.

This holds the :class:`Data` class and the functions that will manipulate 
them. This forms the backbone of the rest of the GigaAnalysis. The point of 
the :class:`Data` object is to hold sweeps. These are data sets with one 
independent and one dependant variable, which are super common in 
experimental physics research. By assuming the data is of this type more 
assumptions and error checking can be facilitated, and this is what 
GigaAnalysis aims to take advantage of.
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.interpolate import interp1d  # Used often to interpolate values


def _pick_float_dtype(to_check):
    """Return np.complex128 for complex dtypes, np.float64 otherwise.
    Adapted from scipy.interpolate"""
    if isinstance(to_check, np.ndarray):
        dtype = to_check.dtype
    else:
        dtype = type(to_check)
    if np.issubdtype(dtype, np.complexfloating):
        return np.complex_
    else:
        return np.float_


def _as_float_array(x):
    """Convert the input into a C contiguous float array.
    Adapted from scipy.interpolate
    NB: Upcasts half- and single-precision floats to double precision.
    """
    x = np.ascontiguousarray(x)
    x = x.astype(_pick_float_dtype(x), copy=False)
    return x


[docs]class Data():
    """
    The Data Class

    Data object holds the data in the measurements. It works as a simple
    wrapper of a two column numpy array (:class:`numpy.ndarray`). The data 
    stored in the object is meant to be interpreted as x is a independent 
    variable and y is dependant variable.


    Parameters
    ----------
    values : numpy.ndarray 
        A two column numpy array with the x data in the first column and 
        the y data in the second. If a second no array is given then the 
        first corresponds to the x data.
    split_y : numpy.ndarray, optional
        A 1D numpy array containing the y data. If None all the data 
        should be contained in first array.
    strip_sort : bool or {'strip', 'sort'}, optional
        If true the data points with NaN are removed using 
        :func:`numpy.isfinite` and the data is sorted by the x values.
        If 'strip' is given NaNs are removed but the data isn't sorted.
        If 'sort' is given the data is sorted but NaNs are left in.
        Default is False so the data isn't changed.
    interp_full : float, optional
        This interpolates the data to give an even spacing using the 
        inbuilt method :meth:`to_even`. The default is None and the 
        interpolation isn't done.

    Attributes
    ----------
    values : numpy.ndarray
        Two column numpy array with the x and y data in.
    x : numpy.ndarray
        x data in a 1D numpy array.
    y : numpy.ndarray
        The y data in a 1D numpy array.
    both : (numpy.ndarray, numpy.ndarray)
        A two value tuple with the :attr:`x` and :attr:`y` in.

    Notes
    -----
    Mathematical operations applied to the Data class just effects 
    the :attr:`y` values, the :attr:`x` values stay the same. To 
    act two :class:`Data` objects together the :attr:`x` values need to 
    agree. :class:`Data` objects also be mathematically acted to 
    array_like objects (:func:`numpy.asarray`) of length 1 or equal to the 
    length of the Data.

    """
    def __init__(self, values, split_y=None, strip_sort=False,
            interp_full=None):
        # Set up Class
        if isinstance(values, Data):
            values = values.values  # If you pass a Data object to the class

        values = np.asarray(values)

        if split_y is not None:
            split_y = np.asarray(split_y)
            if values.ndim != 1:
                raise ValueError(
                    f"If x and y data are split both need to be a "
                    f"1D numpy array. values has shape {values.shape}")
            elif split_y.ndim != 1:
                raise ValueError(
                    f"If x and y data are split both need to be a "
                    f"1D numpy array. split_y has shape {split_y.shape}")
            elif values.size != split_y.size:
                raise ValueError(
                    f"If x and y data are split both need to be the same "
                    f"size. values has size {values.size} and split_y has "
                    f"size {split_y.size}")
            values = np.concatenate(
                [values[:, None], split_y[:, None]], axis=1)

        if values.ndim != 2:
            raise ValueError(
                f"values needs to be a two column numpy array."
                f"values has the shape {values.shape}")
        elif values.shape[1] != 2:
            raise ValueError(
                f"values needs to be a two column numpy array."
                f"values has the shape {values.shape}")

        if strip_sort:
            if strip_sort == 'strip':
                values = values[np.isfinite(values).all(axis=1)]
            elif strip_sort == 'sort':
                values = values[np.argsort(values[:, 0]), :]
            else:
                values = values[np.isfinite(values).all(axis=1)]
                values = values[np.argsort(values[:, 0]), :]


        # all data in hidden attribute
        self.__values = _as_float_array(values)

        if interp_full is not None:
            self.to_even(interp_full)


    # Set up the attributes
    __slots__ = ("__values",)

    def __attribute_set(self, value):
        raise AttributeError(
            f"Can't set the attributes of a Data object directly. "
            f"Use .set_x, .set_y, .set_data functions.")

    def values(self):
        return self.__values

    values = property(values, __attribute_set, None,
        "Two column numpy array with the x and y data in.")

    def x(self):
        return self.__values[:, 0]

    x = property(x, __attribute_set, None,
        "x data in a 1D numpy array.")

    def y(self):
        return self.__values[:, 1]

    y = property(y, __attribute_set, None,
        "y data in a 1D numpy array.")

    def both(self):
        return self.__values[:, 0], self.__values[:, 1]

    both = property(both, __attribute_set, None,
        "A two value tuple with the :attr:`x` and :attr:`y` in.")

    # standard python methods
    def __str__(self):
        return np.array2string(self.values)

    def __repr__(self):
        return f"GA Data object:\n {str(self.values)[1:-1]}"

    def __len__(self):
        return self.x.size

    def __bool__(self):
        if self.values.size == 0:
            return False
        else:
            return True

    __array_ufunc__ = None
    # This is so that the user need to specify .x or .y when acting 
    # with numpy functions.


    # For mathematical operations
    def __maths_check(self, other, operation,):
        """This performs the error checking on the standard operators

        Parameters
        ----------
        other : :class:`Data` or array_like
            The feature that the data object maths acts on.
        operation : str
            The name of the operation being applied.

        Returns
        -------
            Array like object to calculate with
        """
        if isinstance(other, Data):
            if np.array_equal(self.x, other.x):
                return other.y
            else:
                raise ValueError(
                    f"The two Data classes do not have the same x "
                    f"values, so cannot be {operation}")
        try:
            other = np.asarray(other, dtype=_pick_float_dtype(other))
        except:
            raise TypeError(
                f"Data cannot be {operation} with object of "
                f"type {type(other)}.")
        if other.size == 1:
            return other
        elif other.ndim != 1:
            raise ValueError(
                f"Array to {operation} Data object with is of the wrong "
                f"dimension. Its shape is {other.shape}")
        elif other.size != self.x.size:
            raise ValueError(
                f"Array to {operation} Data object with is of the wrong "
                f"length. Its length is {other.size} while the Data "
                f"is {self.x.size}")
        else:
            return other

[docs]    def __mul__(self, other):
        """Multiplication of the y values. """
        other = self.__maths_check(other, "multiplied")
        return Data(self.x, self.y*other)
     
    def __rmul__(self, other):
        other = self.__maths_check(other, "multiplied")
        return Data(self.x, other*self.y)

[docs]    def __truediv__(self, other):
        """Division of the y values."""
        other = self.__maths_check(other, "divided")
        return Data(self.x, self.y/other)

    def __rtruediv__(self, other):
        other = self.__maths_check(other, "divide by")
        return Data(self.x, other/self.y)

[docs]    def __add__(self, other):
        """Addition of the y values."""
        other = self.__maths_check(other, "added")
        return Data(self.x, self.y + other)

    def __radd__(self, other):
        other = self.__maths_check(other, "added")
        return Data(self.x, other + self.y)

[docs]    def __sub__(self, other):
        """Subtraction of the y values."""
        other = self.__maths_check(other, "subtracted")
        return Data(self.x, self.y - other)

    def __rsub__(self, other):
        other = self.__maths_check(other, "subtracted")
        return Data(self.x, other - self.y)

[docs]    def __mod__(self, other):
        """Performs the modulus with the y values."""
        other = self.__maths_check(other, "divided mod")
        return Data(self.x, self.y % other)

    def __rmod__(self, other):
        other = self.__maths_check(other, "divided mod")
        return Data(self.x, other % self.y)

[docs]    def __floordiv__(self, other):
        """Floor division on the y values."""
        other = self.__maths_check(other, "floor division")
        return Data(self.x, self.y // other)

    def __rfloordiv__(self, other):
        other = self.__maths_check(other, "floor division")
        return Data(self.x, other // self.y)

[docs]    def __pow__(self, other):
        """Takes the power of the y values."""
        other  = self.__maths_check(other, "exponentiated")
        return Data(self.x, self.y ** other)

    def __rpow__(self, other):
        other = self.__maths_check(other, "exponentiated")
        return Data(self.x, other ** self.y)

[docs]    def __abs__(self):
        """Calculates the absolute value of the y values.
        """
        return Data(self.x, abs(self.y))

[docs]    def __neg__(self):
        """Negates the y values"""
        return Data(self.x, -self.y)

[docs]    def __pos__(self):
        """Performs a unity operation on y values"""
        return Data(self.x, self.y)

[docs]    def __eq__(self, other):
        """Data class is only equal to other Data classes with the same data.
        """
        if type(other) != type(self):
            return False
        else:
            return np.array_equal(self.values, other.values)

[docs]    def __iter__(self):
        """The iteration happens on the values, like if was numpy array.
        """
        return iter(self.values)

    # For indexing behaviour
    def __index_check(self, k):
        """Check an index given if it is correct type and size.
        
        Raises errors if it is the wrong type or shape. Also returns a bool
        which is true if only one item is called.

        Parameters
        ----------
        k : slice or can be passed to :func:slice
            A object obtained from index calls

        Returns
        -------
        individual : bool
            Is the index call only for one item?
        """
        if isinstance(k, tuple):
            raise IndexError(
                "Data object only accepts one index")
        elif isinstance(k, slice):
            return False
        try:
            k = np.asarray(k)
        except:
            raise IndexError(
                "Data cannot index with this type.")
        if k.size == 1:
            return True
        elif k.ndim != 1:
            raise IndexError(
                "Data objec can only Index is one dimension.")
        elif k.dtype == np.int_:
            return False
        elif k.size != self.x.size:
            raise IndexError(
                f"Index given was wrong length. The length of index was "
                f"{k.size} and the Data is length {self.x.size}")
        else:
            return False

[docs]    def __getitem__(self, k):
        """Indexing returns a subset of the Data object.

        If given a slice or and array of boolean a new Data object is 
        produced. If given a int a length two array with [x, y] is returned. 
        """
        if self.__index_check(k):
            return self.values[k]
        else:
            return Data(self.values[k])

[docs]    def __setitem__(self, k, v):
        """Item assignment is not allowed in Data objects.

        This kind of action is possible with the functions :meth:`set_x`, 
        :meth:`set_y`, and :meth:`set_data`.
        """
        raise Warning(
            "Data objects do not allow item assignment. For this "
            "functionality see .set_x, .set_y, and .set_data.")

[docs]    def set_x(self, idx, val):
        """This is used for setting x values.

        Works similarly to ``Data.x[idx] = val`` but with more error 
        checking. The previous code would also work (and be faster) but 
        more care should be taken. The built in function 
        :func:`slice(start, end, step)` maybe useful.

        Parameters
        ----------
        idx : slice, int
            Objects that can be passed to a :class:`numpy.ndarray` as 
            an index.
        val : numpy.ndarray
            The values to assign to the indexed x values. 
        """
        if isinstance(val, Data):
            raise TypeError(
                "Cannot set the object type with a Data object.")
        self.__index_check(idx)
        new_x = self.x
        new_x[idx] = val
        self.__init__(new_x, self.y)

[docs]    def set_y(self, idx, val):
        """This is used for setting y values.

        Works similarly to ``Data.y[idx] = val`` but with more error 
        checking. The previous code would also work (and be faster) but 
        more care should be taken. The built in function 
        :func:`slice(start, end, step)` maybe useful.

        Parameters
        ----------
        idx : slice, int
            Objects that can be passed to a :class:`numpy.ndarray` as 
            an index.
        val : numpy.ndarray
            The values to assign to the indexed y values. 
        """
        if isinstance(val, Data):
            raise TypeError(
                "Cannot set the object type with a Data object.")
        self.__index_check(idx)
        new_y = self.y
        new_y[idx] = val
        self.__init__(self.x, new_y)

[docs]    def set_data(self, idx, val):
        """This is used for setting x and y values.

        Works similarly to ``Data.values[idx] = val`` but with more error 
        checking. The previous code would also work (and be faster) but 
        more care should be taken. The built in function 
        :func:`slice(start, end, step)` maybe useful.

        Parameters
        ----------
        idx : slice, int
            Objects that can be passed to a :class:`numpy.ndarray` as 
            an index.
        val : numpy.ndarray, Data
            The values to assign to the indexed values. This can only be a
            two column :class:`numpy.ndarray` or a :class:`Data` object.
        """
        if self.__index_check(idx):
            size = 2
        else:
            size = self.values[idx].size
        if not isinstance(val, (Data, np.ndarray)):
            raise TypeError(
                f"The value to assign data must be a data object or a two "
                f"column numpy array. The type give was {type(val)}.")
        elif isinstance(val, Data):
            if size != val.values.size:
                raise ValueError(
                    f"The Data to set is a different size to the Data "
                    f"object given. The size to index was {size/2} "
                    f"while the data to set was {val.values.size/2}.")
            else:
                new_data = self.values
                new_data[idx] = val.values
        elif val.ndim != 2:
            raise ValueError(
                f"The dimension of the numpy array to set to is not "
                f"the correct shape. Needs to be a two column array shape "
                f"given was {val.shape}.")
        elif val.shape[1] != 2:
            raise ValueError(
                f"The dimension of the numpy array to set to does not "
                f"have two columns. Needs to be a two column array shape "
                f"given was {val.shape}.")
        elif val.size != size:
            raise ValueError(
                f"The Data to set is a different size to the numpy "
                f"array given. The size to index was {size/2} "
                f"while the data to set was {val.size/2}.")
        else:
            new_data = self.values
            new_data[idx] = val
        self.__init__(new_data)

    # Simple useful methods
[docs]    def strip_nan(self):
        """This removes any row which has a nan or infinite values in.
        
        Returns
        -------
        stripped_data : Data
            Data class without non-finite values in.
        """
        return Data(self.values[np.isfinite(self.values).all(axis=1)])

[docs]    def sort(self):
        """Sorts the data set in x and returns the new array.

        Returns
        -------
        sorted_data : Data
            A Data class with the sorted data.
        """
        return Data(self.values[np.argsort(self.x), :])

[docs]    def min_x(self):
        """This provides the lowest value of x

        Returns
        -------
        x_min : float
            The minimum value of x
        """
        return np.min(self.x)

[docs]    def max_x(self):
        """This provides the highest value of x

        Returns
        -------
        x_max : float
            The maximum value of x
        """
        return np.max(self.x)

[docs]    def spacing_x(self):
        """Returns the average spacing in x

        Returns
        -------
        x_max : float
            The average spacing in the x data
        """
        return (self.max_x() - self.min_x())/len(self)

[docs]    def x_cut(self, x_min, x_max):
        """This cuts the data to a region between x_min and x_max.
        
        Parameters
        ----------    
        x_min : float
            The minimal x value to cut the data.
        x_max : float
            The maximal x value to cut the data.
        
        Returns
        -------
        cut_data : Data    
            A data object with the values cut to the given x range.
        """
        if x_min > x_max:
            raise ValueError('x_min should be smaller than x_max')
        return Data(self.values[(self.x > x_min) & (self.x < x_max)])

[docs]    def y_from_x(self, x_val, bounds_error=True, kind='linear'):
        """Gives the y value for a certain x value or set of x values.

        Parameters
        ----------
        x_val : float
            X values to interpolate y values from.
        bounds_error : bool, optional
            If an error should thrown in x value is out of range, 
            default True.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.
        
        Returns
        -------
        y_val : float or numpy.ndarray
            Corresponding to the requested x values in an array if only one 
            value is given a float is returned.
        """
        if bounds_error and \
            (np.max(x_val)>self.max_x() or np.min(x_val)<self.min_x()):
            raise ValueError(
                f"The given x_values are out side of the range of data "
                f"which is between {self.min_x()} and {self.max_x()}")

        y_val = interp1d(self.x, self.y, bounds_error=False,
            fill_value=(self.y.min(), self.y.max()), kind=kind)(x_val)
        if y_val.size != 1:
            return y_val
        else:
            return float(y_val)

[docs]    def apply_x(self, function):
        """This takes a function and applies it to the x values.

        Parameters
        ----------
        function : Callable
            The function to apply to the x values.
        
        Returns
        -------
        transformed_data
            Data class with new x values.
        """
        return Data(function(self.x), self.y)

[docs]    def apply_y(self, function):
        """This takes a function and applies it to the y values.

        Parameters
        ----------
        function : Callable
            The function to apply to the y values.
        
        Returns
        -------
        transformed_data
            Data class with new y values.
        """
        return Data(self.x, function(self.y))

[docs]    def append(self, new_data, in_place=False):
        """This adds values to the end of the data object.

        Parameters
        ----------
        new_data : Data
            These are the values to add onto the end of the data object
        in_place : bool, optional
            Weather to edit the object or to return a new one. The default 
            is `False` which returns a new object.

        Returns
        -------
        combined_data : Data
            If in_place is `False` then a new Data object is returned.
        """
        if isinstance(new_data, Data):
            pass
        else:
            try:
                new_data = Data(new_data)
            except:
                raise ValueError(
                    f"The new_data to append was not a Data object or "
                    f"could be cast to one. Was of type {type(new_data)}")
        new_vals = np.append(self.values, new_data.values, axis=0)
        if in_place:
            self.__init__(new_vals)
        else:
            return Data(new_vals)

    # Methods for Interpolation of Data 
[docs]    def interp_range(self, min_x, max_x, 
        step_size=None, num_points=None, shift_step=True,
        kind='linear'):
        """Evenly interpolates in x the data between a min and max value.

        This is used for combining datasets with corresponding but different 
        x values. Either `step_size` or `num_points` can be defined. If 
        `step_size` is defined :func:`numpy.arange` is used. If `num_points` 
        is defined :func:`numpy.linspace` is used.
        If using `step_size` it rounds `min_x` to the next integer value of 
        the steps, unless `shift_step` is `False`.

        If values outside the range of the original data need to be 
        passed to be interpolated, this is possible with 
        :func:`Data.interp_values`.
        It uses :func:`scipy.interpolate.interp1d`.
        
        Parameters
        ----------
        min_x :float
            The minimum x value in the interpolation.
        max_y : float
            The maximum x value in the interpolation.
        step_size : float, optional
            The step size between each point. Either this or num_points must 
            be defined.
        num_points : int, optional
            The number of points to interpolate. Either this or step_size 
            must be defined.
        shift_step: bool, optional
            If the `min_x` value should be rounded to the next whole step. 
            The default is True.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.

        
        Returns
        -------
        interpolated_data : Data
            A Data object with evenly interpolated points.
        """
        if step_size is None and num_points is None:
            raise ValueError(
                f"Must define either step_size or num_points.")
        if min_x > max_x:
            min_x, max_x = max_x, min_x  # order min and max
        if np.min(self.x) > min_x:
            raise ValueError("min_x value to interpolate is below data")
        if np.max(self.x) < max_x:
            raise ValueError("max_x value to interpolate is above data")
        if step_size is not None:
            if shift_step:
                min_x = np.ceil(min_x/step_size)*step_size
            x_vals = np.arange(min_x, max_x, step_size)
        elif num_points is not None:
            x_vals = np.linspace(min_x, max_x, num_points)
        # The bounds are used in the rare case of floating point issues.
        min_y = self.y[self.x.argmin()]
        max_y = self.y[self.x.argmax()]
        y_vals = interp1d(self.x, self.y, kind=kind,
            bounds_error=False, fill_value=(min_y, max_y))(x_vals)
        return Data(x_vals, y_vals)

[docs]    def interp_step(self, step_size, shift_step=True, kind='linear'):
        """Evenly interpolates in x the data between a min and max value.

        This uses :meth:`Data.interp_range` specifying `step_size` and 
        giving the maximum range of x points.
        If using `step_size` it rounds `min_x` to the next integer value of 
        the steps, unless `shift_step` is `False`.
        
        Parameters
        ----------
        step_size : float, optional
            The step size between each point. Either this or num_points must 
            be defined.
        shift_step: bool, optional
            If the `min_x` value should be rounded to the next whole step. 
            The default is True.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.
        
        Returns
        -------
        interpolated_data : Data
            A Data object with evenly interpolated points.
        """
        return self.interp_range(self.min_x(), self.max_x(),
            step_size=step_size, shift_step=shift_step, kind=kind,)

[docs]    def interp_number(self, num_points, kind='linear'):
        """Evenly interpolates in x the data for a fixed point number.

        This uses :meth:`Data.interp_range` specifying `num_points` and 
        giving the maximum range of x points.
        
        Parameters
        ----------
        num_points : int
            The number of points to interpolate.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.
        
        Returns
        -------
        interpolated_data : Data
            A Data object with evenly interpolated points.
        """
        return self.interp_range(self.min_x(), self.max_x(),
            num_points=num_points, kind=kind,)

[docs]    def interp_values(self, x_vals, kind='linear', bounds_error=True,
            fill_value=np.nan, strip_sort=False):
        """Produce Data object from interpolating x values.

        This uses :func:`scipy.interpolate.interp1d` to produce a Data 
        object by interpolating y values from given x values.

        Parameters
        ----------
        x_vals : array_like
            The x values to interpolate which will be the x values.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.
        bounds_error : bool, optional
            If default of `True` data outside the existing range will throw 
            an error. If `False` then the value is set by `fill_value`.
        fill_value : float or (float, float) or `extrapolate`, optional
            If bounds_error is `False` then this value will be used outside 
            the range. Passed to :func:`scipy.interpolate.interp1d`.
        strip_sort : bool, optional
            The default is `False`, where to sort and remove NaNs from the 
            Data object before returning.

        Returns
        -------
        interpolated_data : Data
            A Data object with the given x values and interpolated y values.
        """
        x_vals  = np.asarray(x_vals)
        if x_vals.ndim != 1:
            raise ValueError(
                f"x_vals had shape {x_vals.shape} where as it need to be 1D")

        if bounds_error:
            if self.min_x() > np.min(x_vals):
                raise ValueError(
                    "min_x value to interpolate is below data and "
                    "bounds_error is True.")
            if self.max_x() < np.max(x_vals):
                raise ValueError(
                    "max_x value to interpolate is above data and "
                    "bounds_error is True")
            # The bounds are used in the rare case of floating point issues.
            min_y = self.y[self.x.argmin()]
            max_y = self.y[self.x.argmax()]
            y_vals = interp1d(self.x, self.y, kind=kind,
                bounds_error=False, fill_value=(min_y, max_y))(x_vals)
        else:
            y_vals = interp1d(self.x, self.y, kind=kind,
                bounds_error=False, fill_value=fill_value)(x_vals)

        return Data(x_vals, y_vals, strip_sort=strip_sort)

[docs]    def to_even(self, step_size, shift_step=True, kind='linear'):
        """Evenly interpolates the data and updates the data object.

        This uses :meth:`Data.interp_range` specifying `step_size` and 
        giving the maximum range of x points.
        If using `step_size` it rounds `min_x` to the next integer value of 
        the steps, unless `shift_step` is `False`.
        
        Parameters
        ----------
        step_size : float, optional
            The step size between each point. Either this or num_points must 
            be defined.
        shift_step: bool, optional
            If the `min_x` value should be rounded to the next whole step. 
            The default is True.
        kind : str or int, optional
            The type of interpolation to use. Passed to 
            :func:`scipy.interpolate.interp1d`, default is `linear`.
        """
        self.__init__(self.interp_range(self.min_x(), self.max_x(),
            step_size=step_size, shift_step=shift_step,
            kind=kind,).values)

    # Plotting Method
[docs]    def plot(self, *args, axis=None, **kwargs):
        """Simple plotting utility
        
        Makes use of matplotlib function :func:`matplotlib.pyplot.plot`.
        Runs ``matplotlib.pyplot.plot(self.x, self.y, *args, **kwargs)``
        If provided an axis keyword which operates so that if given
        ``axis.plot(self.x, self.y, *args, **kwargs)``.
        """
        if axis is None:
            plt.plot(self.x, self.y, *args, **kwargs)
        else:
            axis.plot(self.x, self.y, *args, **kwargs)

    # Saving Method
[docs]    def to_csv(self, filename, columns=["X", "Y"], **kwargs):
        """Saves the data as a simple csv

        Uses :func:`pandas.DataFrame.to_csv` and kwargs are pass to it. The 
        index keyword is set to False by default.

        Parameters
        ----------
        filename : str
            Filename to save the data as.
        columns : [str, str]
            The title of the two columns.
        """
        if 'index' not in kwargs:
            kwargs['index'] = False

        pd.DataFrame(self.values, columns=columns
            ).to_csv(filename, **kwargs)


[docs]def swap_xy(data, **kwargs):
    """Interchange the independent and dependent variables.
    
    This takes a :class:`.Data` object and returns a new one with the x and 
    y variables swapped around. Keyword arguments are pass to the 
    :class:`.Data` class.

    Parameters
    ----------
    data : Data
        The data to switch the x and y values.

    Returns
    -------
    swapped_data : Data
        A new :class:`.Data` object with x and y values switched.
    """
    if not isinstance(data, Data):
        raise TypeError(
            f"data needs to be a Data object but was instead {type(data)}")

    return Data(data.y, data.x, **kwargs)


[docs]def empty_data():
    """Generates an empty :class:`.Data` object.

    This is useful for place holding, and takes no parameters.

    Returns
    -------
    empty_data : Data
        A Data object that contains no data points.
    """
    return Data(np.array([], dtype=np.float_).reshape(0, 2))


[docs]def sum(data_list):
    """Preforms the sum of the y data a set of Data class objects.
    
    Parameters
    ----------
    data_list : [Data]
        List of Data objects to sum together.
    
    Returns
    -------
    summed_data : Data
        A Data object with the summed y values of the original data sets.
    """
    if isinstance(data_list, list):
        pass
    elif isinstance(data_list, dict):
        data_list = list(data_list.values())
    elif isinstance(data_list, Data):
        return data_list
    else:
        raise TypeError(
            f"The data_list was of type {type(data_list)} where as it "
            f"needs to be either a list or a dict with Data objects as the "
            f"values.")
    if not isinstance(data_list[0], Data):
            raise TypeError(
                f"List contained type {type(data_list[0])} where is must "
                f"only contain gigaanalysis.data.Data types.")
    if len(data_list) == 1:
        return data_list[0]
    total = data_list[0]
    for dat in data_list[1:]:
        if not isinstance(dat, Data):
            raise TypeError(
                f"List contained type {type(dat)} where is must "
                f"only contain gigaanalysis.data.Data types.")
        total += dat.y
    return total


[docs]def mean(data_list):
    """Preforms the mean of the y data a set of Data class objects.
    
    Parameters
    ----------
    data_list : [Data]
        List of Data objects to sum together can also be a dictionary.
    
    Returns
    -------
    averaged_data : Data
        A Data object with the summed y values of the original data sets.
    """
    return sum(data_list)/len(data_list)


def _fit_one_y(data, x_value, x_range, poly_deg, std=False):
    """A function used by :func:`y_from_fit` that calculates the y value 
    from one x value.
    """
    xs, ys = data.x_cut(x_value - x_range/2, x_value + x_range/2).both
    xs = xs - x_value
    if len(xs) + 1 <= poly_deg:
        raise ValueError(
            f"There was only {len(xs)} in the provided range which is not "
            f"enough to fit a {poly_deg} order polynomial.")
    if std:
        if std == 'fit':
            val, err = np.polyfit(xs, ys, poly_deg, cov=True)
            err = np.sqrt(err[-1, -1])
        elif std == 'residual':
            val = np.polyfit(xs, ys, poly_deg)
            y_res = ys.copy()
            for n in range(len(val)):
                y_res = y_res - (xs**n)*val[-n-1]
            err = np.std(y_res)
        else:
            raise ValueError("std was not either 'fit' or 'residual'.")
        return val[-1], err
    else:
        return np.polyfit(xs, ys, poly_deg)[-1]


[docs]def y_from_fit(data, x_value, x_range, poly_deg=1, as_Data=False, 
        std=False):
    """Fits a polynomial over a range to interpolate a given value.

    This makes use of :func:`numpy.polyfit` to find an interpolated value of 
    y form a data object and a given x value.

    Parameters
    ----------
    data : Data
        The data to interpolate the value from. Should be a sorted data 
        object.
    x_value : float or numpy.ndarray
        The value of the independent to obtain the associated dependent 
        variable.
    x_range : float
        The range of independent variables to perform the fit over.
    poly_deg : int, optional
        The order of the polynomial to use when fitting to find the result. 
        The default is `1` which is a linear fit.
    as_Data : bool, optional
        If default of False y values are given as an float or an array. If 
        `True` then a Data object is returned.
    std : bool, optional
        If `fit` or 'residual' then the standard deviation is returned after 
        the values. The standard deviation can either be calculated from the 
        error in the fit (using 'fit') or from the distribution of the 
        residuals of the fit (using 'residual'). The default value is 
        `False`, where only the value will be returned.

    Returns
    -------
    y_value : float, numpy.ndarray, or Data
        The y values obtained at the associated value of x for the fit 
        performed. The type depends if multiple points are requested and if 
        'as_Data` is set. If `std` is `True` then the standard deviation is 
        followed in the same format.
    """
    if not isinstance(data, Data):
        raise TypeError(
            f"data needs to be a Data object but was a {type(data)}.")
    elif not isinstance(x_range, (int, float, np.int_, np.float_)):
        raise TypeError(
            f"x_range needs to be a float but was a {type(x_range)}")
    elif not isinstance(poly_deg, (int, np.int_)):
        raise TypeError(
            f"poly_deg needs to be a int but was a {type(poly_deg)}")

    x_value = np.asarray(x_value)
    if x_value.dtype != np.float_ and x_value.dtype != np.int_:
        raise TypeError(
            f"x_value needs to be of float type but was a {x_value.dtype}")
    elif x_value.ndim > 1:
        raise TypeError(
            f"x_value can a float or a 1D array like of floats but was of "
            f"shape {x_value.shape}")

    if std:
        if std not in ['fit', 'residual']:
            raise ValueError("std must either False or be 'fit' or "
                f"'residual' but was {std}")

    if x_value.size == 1:
        if not as_Data:
            return _fit_one_y(data, x_value, x_range, poly_deg, std=std)
        else:
            if std:
                val, err = _fit_one_y(data, x_value, x_range, poly_deg, 
                    std=std)
                return Data([[x_value, val]]), Data([[x_value, err]])
            else:
                return Data([[x_value, 
                    _fit_one_y(data, x_value, x_range, poly_deg)]])
    elif as_Data:
        if std:
            val, err = np.array(
                [_fit_one_y(data, xv, x_range, poly_deg, std=std) \
                    for xv in x_value]).T
            return Data(x_value, val), Data(x_value, err)
        else:
            return Data(x_value, np.array(
                [_fit_one_y(data, xv, x_range, poly_deg) for xv in x_value]))
    else:
        if std:
            val, err = np.array(
                [_fit_one_y(data, xv, x_range, poly_deg, std=std) \
                    for xv in x_value]).T
            return val, err
        else:
            return np.array([_fit_one_y(data, xv, x_range, poly_deg) \
                for xv in x_value])


[docs]def collect_y_values(data_list):
    """Collates the y values into a array from a collection of Data objects.

    This takes either a list or dictionary of Data objects and collects the
    y values into one array. This can be useful of special comparisons such 
    as trimmed means and standard deviations.

    Parameters
    ----------
    data_list : list or dict
        A list of Data objects or a dictionary where the values are Data 
        objects. The x values or all of these need to be the same.

    Returns
    -------
    x_vals : numpy.ndarray
        One copy of the x values of the arrays.
    all_data : numpy.ndarray
        All the y data from the different data objects each on in it's own
        column.
    """
    if isinstance(data_list, list):
        pass
    elif isinstance(data_list, dict):
        data_list = list(data_list.values())
    elif isinstance(data_list, Data):
        return data_list.y[:, None]
    else:
        raise TypeError(
            f"The data_list was of type {type(data_list)} where as it "
            f"needs to be either a list or a dict with Data objects as the "
            f"values.")
    if not isinstance(data_list[0], Data):
            raise TypeError(
                f"List contained type {type(data_list[0])} where is must "
                f"only contain gigaanalysis.data.Data types.")
    if len(data_list) == 1:
        return data_list[0].y[:, None]
    all_data = data_list[0].y[:, None]
    x_vals = data_list[0].x
    for dat in data_list[1:]:
        if not isinstance(dat, Data):
            raise TypeError(
                f"List contained type {type(dat)} where is must "
                f"only contain gigaanalysis.data.Data types.")
        elif not np.array_equal(dat.x, x_vals):
            raise TypeError(
                f"The x values in the arrays do not match.")
        all_data = np.concatenate([all_data, dat.y[:, None]], axis=1)
    return x_vals, all_data


def __make_x_vals(min_x, max_x, 
    step_size=None, num_points=None, shift_step=True):
    """This generates a set of evenly spaced x values.
    """
    if step_size is not None:
        if shift_step:
            min_x = np.ceil(min_x/step_size)*step_size
        x_vals = np.arange(min_x, max_x, step_size)
    elif num_points is not None:
        x_vals = np.linspace(min_x, max_x, num_points)
    else:
        raise ValueError(
            f"Must define either step_size or num_points.")
    return x_vals


def __match_x_list(data_list,
    step_size=None, num_points=None, shift_step=True):
    """This interpolates all the data sets in a list to the same x values.

    It takes the values from :func:`match_x` and each set to  
    :func:`__match_x_list`, after working out the largest range of x_values
    that can be interpolated across every dataset.
    """
    min_x = -np.inf
    max_x = np.inf
    max_len = 0
    if not isinstance(data_list, list):
        raise TypeError(
            f"data_list need to be list it was a {type(data_list)}")
    for dat in data_list:
        if not isinstance(dat, Data):
            raise TypeError(
                f"data_list needs to be a list of Data objects but "
                f"contained the type {type(dat)}.")
        min_x = min_x if min_x > dat.min_x() else dat.min_x()
        max_x = max_x if max_x < dat.max_x() else dat.max_x()
        max_len = max_len if max_len > len(dat) else len(dat)
    if step_size is None and num_points is None:
        num_points = max_len
    x_vals = __make_x_vals(min_x, max_x, step_size=step_size,
        num_points=num_points, shift_step=shift_step)
    new_data_list = [
        dat.interp_values(x_vals)
        for dat in data_list
    ]
    return new_data_list
        

[docs]def match_x(data_list,
    step_size=None, num_points=None, shift_step=True):
    """This transform a collection of dataset to have the same x values.

    This applies :meth:`Data.interp_values` to every data object with the 
    largest possible range of x values to produce the new set of data. This 
    is useful if the data object want to be combined arithmetically.

    Parameters
    ----------
    data_list : list or dict of Data
        A list of data objects or dictionary with data objects as the values.
    step_size : float, optional
        Sets the spacing in the x values to a fixed amount if given  
        :func:`numpy.arange` is called.
    num_points : int, optional
        The number of points to generates for the x values only used if 
        `step_size` is not given or None. If used :func:`numpy.linspace` 
        is called.
    shift_step : bool, optional 
        Only valid if step_size is not new. The default is True and then 
        the first value is an integer number of the steps. If False the 
        lowest x value is used as the first value of the step.

    Returns
    -------
    new_data_list : list or dict of Data objects
        The new data objects with the x values that are interoperated to be 
        all the same. If a dict is provided a dict is returned with the 
        same keys as before.
    """
    if isinstance(data_list, Data):
        return __match_x_list([data_list], step_size=step_size,
            num_points=num_points, shift_step=shift_step)
    elif isinstance(data_list, list):
        return __match_x_list(data_list, step_size=step_size,
            num_points=num_points, shift_step=shift_step)
    elif isinstance(data_list, dict):
        key, vals = zip(*data_list.items())
        new_vals = __match_x_list(list(vals), step_size=step_size,
            num_points=num_points, shift_step=shift_step)
        return dict(zip(key, new_vals))
    elif isinstance(data_list, tuple):
         return __match_x_list(list(data_list), step_size=step_size,
            num_points=num_points, shift_step=shift_step)
    else:
        raise TypeError(
            f"data_list need to be either a list or a dictionary "
            f"but was of the type {type(data_list)}.")


def __interp_list(data_list, x_vals, kind='linear'):
    """Interpolates all the Data objects in a list. Is used by `interp_set`.
    """
    new_list = []
    for dat in data_list:
        if not isinstance(dat, Data):
            raise TypeError(
                f"One of the objects in the list was not a Data "
                f"object. It was of type {type(dat)}")
        new_list.append(dat.interp_values(x_vals, kind=kind))
    return new_list


[docs]def interp_set(data_list, x_vals, kind='linear'):
    """Interpolates all Data objects in list or dictionary.

    This applied :meth:`Data.interp_values` to every item in the set and 
    returns a new set.

    Parameters
    ----------
    data_list : list or dict
        A list or dictionary of Data objects to interpolate.
    x_vals : :class:`numpy.ndarray`
        The x values to interpolate to produce the new set.
    kind : str or int, optional
        The type of interpolation to use. Passed to 
        :func:`scipy.interpolate.interp1d`, default is `linear`.

    Returns
    -------
    interpolated_set : list or dict
        The new set of Data is the same form but with interpolated values.
    """
    if isinstance(data_list, Data):
        return [data_list.interp_values(x_vals, kind=kind)]
    elif isinstance(data_list, list):
        return __interp_list(data_list, x_vals, kind=kind)
    elif isinstance(data_list, dict):
        key, vals = zip(*data_list.items())
        return dict(zip(key, __interp_list(vals, x_vals, kind=kind)))
    elif isinstance(data_list, tuple):
        return __interp_list(list(data_list), x_vals, kind=kind)
    else:
        raise TypeError(
            f"The data_list needs to be a dictionary or a list but was "
            f"instead of type {type(data_list)}.")


[docs]def concatenate(data_list, strip_sort=False):
    """Combines our collection of Data objects into one.

    This takes either a list, dictionary, or tuple of Data objects or arrays 
    and concatenates their values into one data object.
    This makes use of :func:`numpy.concatenate`.

    Parameters
    ----------
    data_list : list or dict
        The collection of Data objects to combine.
    strip_sort : bool or {'strip', 'sort'}, optional
        This will pass to the strip_sort keyword argument when producing 
        the final Data object.

    Returns
    -------
    concatenated_data : Data
        The data combined into one Data object.
    """
    if isinstance(data_list, Data):
        if strip_sort:
            return Data(data_list, strip_sort=strip_sort)
        else:
            return data_list
    elif isinstance(data_list, list):
        pass
    elif isinstance(data_list, dict):
        data_list = list(data_list.values())
    elif isinstance(data_list, tuple):
         data_list = list(data_list)
    else:
        raise TypeError(
            f"data_list need to be either a list or a dictionary "
            f"but was of the type {type(data_list)}.")
    all_vals = []
    for dat in data_list:
        if isinstance(dat, Data):
            all_vals.append(dat.values)
        elif isinstance(dat, np.ndarray):
            if len(dat.shape) == 2 and dat.shape[1] == 2:
                all_vals.append(dat)
            else:
                raise ValueError(
                    f"The values to concatenate in the form of a "
                    f"numpy array are the wrong shape {dat.shape}")
        elif isinstance(dat, list) and len(dat) == 2:
            all_vals.append(np.asarray([dat]))
        else:
            raise TypeError(
                f"The list contains objects which are not Data objects "
                f"one of the objects was a {type(dat)}")
    return Data(np.concatenate(all_vals, axis=0), strip_sort=strip_sort)


[docs]def save_arrays(array_list, column_names, file_name, **kwargs):
    """Writes a list of arrays to csv.

    This saves a collection of one dimensional :class:`numpy.ndarray` 
    stored in a list into a .csv file. It does this by passing it to a 
    :class:`pandas.DataFrame` object and using the method `to_csv`. If the 
    arrays are different lengths the values are padded with NaNs.
    kwargs are passed to :meth:`pandas.DataFrame.to_csv`.

    Parameters
    ----------
    array_list : [numpy.ndarray]
        A list of 1d numpy.ndarrays to save to the .csv file.
    columns_names : [str]
        A list of column names for the .csv file the same length as the list 
        of data arrays.
    file_name : str
        The file name to save the file as.
    """
    if not isinstance(array_list, list):
        raise TypeError("array_list is not a list.")
    elif not isinstance(column_names, list):
        raise TypeError("column_names is not a list.")
    elif len(array_list) != len(column_names):
        raise ValueError("array_list and column_names are not "
            "the same lenght.")
    max_length = 0
    for arr in array_list:
        if not isinstance(arr, np.ndarray):
            raise ValueError("array_list contains objects that are not "
                "numpy arrays.")
        elif len(arr.shape) != 1:
            raise ValueError("array_list arrays are not 1D.")
        elif max_length < arr.size:
            max_length = arr.size
    to_concat = []
    for arr in array_list:
        to_concat.append(np.pad(arr, (0, max_length - arr.size),
            mode='constant',
            constant_values=np.nan)[:, None])
    to_save = np.concatenate(to_concat, axis=1)
    if 'index' not in kwargs.keys():
        kwargs['index'] = False
    pd.DataFrame(to_save, columns=column_names).to_csv(file_name, **kwargs)


[docs]def save_data(data_list, data_names, file_name, 
    x_name='X', y_name='Y', name_space='/', no_sapce=True, **kwargs):
    """Saves a list of data objects in to a .csv file.

    This works by passing to :func:`save_arrays` and subsequently to 
    :meth:`pandas.DataFrame.to_csv`. kwargs are passed to 
    :meth:`pandas.DataFrame.to_csv`

    Parameters
    ----------
    data_list : [gigaanalysis.data.Data]
        A list of Data objects to be saved to a .csv file
    data_names : [str]
        A list the same length as the data list of names of each of the data 
        objects. These will make the first half of the column name in the 
        .csv file.
    file_name : str
        The name the file will be saved as.
    x_name : str, optional
        The string to be append to the data name to indicate the x column in 
        the file. Default is 'X'.
    y_name : str, optional
        The string to be append to the data name to indicate the y column in 
        the file. Default is 'Y'.
    name_space : str, optional
        The string that separates the data_name and the x or y column name 
        in the column headers in the .csv file. The default is '/'.
    """
    if not isinstance(data_list, list):
        raise TypeError("data_list is not a list.")
    elif not isinstance(data_names, list):
        raise TypeError("data_names is not a list.")
    elif len(data_list) != len(data_names):
        raise ValueError("data_list and data_names are not "
            "the same lenght.")
    array_list = []
    for dat in data_list:
        if not isinstance(dat, Data):
            raise ValueError("data_list contains objects that are not "
                "Data objects.")
        array_list.append(dat.x)
        array_list.append(dat.y)
    column_names = []
    striping = ',' + name_space
    if no_sapce:
        striping += ' '
    x_name = str(x_name).strip(striping)
    y_name = str(y_name).strip(striping)
    for name in data_names:
        name = str(name).strip(striping)
        column_names.append(name + name_space + x_name)
        column_names.append(name + name_space + y_name)
    save_arrays(array_list, column_names, file_name, **kwargs)


[docs]def save_dict(data_dict, file_name,
    x_name='X', y_name='Y', name_space='/', **kwargs):
    """Saves a dictionary of data objects in to a .csv file.

    This works by passing to :func:`save_data` and subsequently to 
    :meth:`pandas.DataFrame.to_csv`. The names of the data objects are taken 
    from  the keys of the data_dict. kwargs are passed to 
    :meth:`pandas.DataFrame.to_csv`

    Parameters
    ----------
    data_list : [gigaanalysis.data.Data]
        A dictionary of Data objects to be saved to a .csv file. The keys of 
        the dictionary will be used as the data names when passed to 
        :func:`save_data`.
    file_name : str
        The name the file will be saved as.
    x_name : str, optional
        The string to be append to the data name to indicate the x column in 
        the file. Default is 'X'.
    y_name : str, optional
        The string to be append to the data name to indicate the y column in 
        the file. Default is 'Y'.
    name_space : str, optional
        The string that separates the data_name and the x or y column name 
        in the column headers in the .csv file. The default is '/'.
    """
    if not isinstance(data_dict, dict):
        raise TypeError("data_dict is not a dictionary.")
    for dat in data_dict.values():
        if not isinstance(dat, Data):
            raise ValueError("data_dict contains values which are not "
                "Data objects.")
    save_data(list(data_dict.values()), list(data_dict.keys()), file_name,
        x_name=x_name, y_name=y_name, name_space=name_space, **kwargs)


[docs]def load_dict(file_name, name_space='/',
    strip_sort=False, interp_full=None, **kwargs):
    """Loads from a file a dictionary full of Data objects.

    The type of file it loads is the default produced by :func:`save_dict`. 
    It assumes there is one line for the headers and they are used for the 
    keys of the dictionary. It also removes NaNs at the end of each sweep to 
    undo what is produced by uneven length of data objects.
    It makes use of :func:`pandas.read_csv`, and extra keyword arguments are 
    passed to there.

    Parameters
    ----------
    file_name : str
        The name and location of the file.
    name_space : str
        The string that separates the key from the x and y names.
    strip_sort : bool or {'strip', 'sort'}, optional
        Passed to :class:`Data`.
        If true the data points with NaN are removed using 
        :func:`numpy.isfinite` and the data is sorted by the x values.
        If 'strip' is given NaNs are removed but the data is not sorted.
        If 'sort' is given the data is sorted but NaNs are left in.
        Default is False so the data isn't changed.
    interp_full : float, optional
        Passed to :class:`Data`.
        This interpolates the data to give an even spacing using the 
        inbuilt method :meth:`to_even`. The default is None and the 
        interpolation isn't done.

    Returns
    -------
    data_dict : {str: Data}
        The data contained in the file in the form of a dictionary where the 
        keys are obtained from the header of the data file.
    """
    def to_key(column_name):
        """Makes the key from the column name."""
        if name_space in column_name:
            return name_space.join(column_name.split(name_space)[:-1])
        else:
            return column_name

    data_df = pd.read_csv(file_name, **kwargs)
    if len(data_df.columns)%2 == 1:
        raise ValueError(
            f"There needs to be an even number of columns. "
            f"The csv had {len(data.columns)}.")
    data_dict = {}
    for i in range(int(len(data_df.columns)/2)):
        to_read = data_df.iloc[:, [2*i, 2*i+1]]
        key = to_key(to_read.columns[0])
        if key != to_key(to_read.columns[1]):
            raise ValueError(
                f"The columns names did not match for X and Y data. "
                f"The columns were {to_read.columns}.")
        # This next bit removes nans added to pad the data.
        last_val = None
        for n, x in enumerate(to_read.values[-1::-1, 0]):
            if x == x:  # False if NaN
                last_val = n
                break
        
        if last_val == None:  # No data found
            vals = np.array([]).reshape(0, 2) 
        elif last_val == 0:  # All cells had data
            vals = to_read.values
        else:
            vals = to_read.values[:-last_val, :]
        data_dict[key] = Data(vals,
            strip_sort=strip_sort, interp_full=interp_full)
    return data_dict


[docs]def gen_rand(n, func=None, seed=None, interp_full=None):
    """Produces Data object with random values.

    This uses :meth:`numpy.random.Generator.random` to produce a
    :class:`Data` object. The numbers in both x and y values are continually 
    increasing in steps between 0 and 1. A function can be applied to the y 
    values.

    Parameters
    ----------
    n : int
        Number of data point to have in the object.
    func : function
        A function with one parameter to transform the y values.
    seed : float
        Seed to be passed to :func:`numpy.random.default_rng`
    interp_full : float, optional
        If given the data is evenly interpolated, passed to :class:`Data`. 
        The default is `None` which doesn't interpolate the data.

    Returns
    -------
    data : Data
        The generated data object. 
    """
    if not isinstance(n, (int, np.int_)):
        raise TypeError(
            f"n needs to be an int, but was a {type(n)}")
    elif n < 1:
        raise ValueError(
            f"n need to be a positive integer, but was {n}")

    gen_data = Data(
        np.cumsum(np.random.default_rng(seed).random((n, 2)), axis=0),
        interp_full=interp_full)

    if func is not None:
        gen_data = gen_data.apply_y(func)

    return gen_data