Source code for rework_pysatl_mpest.distributions.continuous_dist

"""
A module providing an abstract class for continuous distributions.
"""

__author__ = "Danil Totmyanin, Aleksandra Ri"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"

from abc import ABC, abstractmethod
from collections.abc import Sequence

from numpy import float64
from numpy.typing import ArrayLike, NDArray



[docs]
class ContinuousDistribution(ABC):
    """Abstract base class for continuous distributions.

    This class defines the basic mathematical functions of distributions
    that must be implemented by specific distributions. This class also
    provides some functions that are common to all distributions.

    Instances of subclasses can be compared for equality (``==``) and
    inequality (``!=``). Two instances are considered equal if they are of
    the exact same type and have identical parameter values.

    Attributes
    ----------
    name: str
        The name of the distribution (e.g., 'Normal', 'Gamma').
    params: set[str]
        The names of all parameters of the distribution
    params_to_optimize: set[str]
        Parameters names that are not fixed and can be optimized.
    fixed_params : set[str]
        A set of parameter names that are fixed and not subject to
        optimization.

    Methods
    -------

    **Implemented methods**

    .. autosummary::
        :toctree: generated/

        fix_param
        unfix_param
        get_params_vector
        set_params_from_vector

    **Abstract methods**

    .. autosummary::
        :toctree: generated/

        ppf
        pdf
        lpdf
        log_gradients
        generate

    Notes
    -----
    **Key Functionality**

    - Parameter management: fixing and releasing parameters for optimization.
    - Parameter vectorization: getting and setting parameters from a numpy
      vector.

    **Implementation Requirements**

    Subclasses must:

    1. Implement the :attr:`name` property to identify the distribution.

    2. Implement the :attr:`params` property to return all parameter names.

    3. Implement the abstract methods: :meth:`pdf`, :meth:`ppf`,
       :meth:`lpdf`, :meth:`log_gradients`, and :meth:`generate`.

    4. Define their parameters as instance attributes (e.g., :attr:`self.loc`,
       :attr:`self.scale`) with a :class:`rework_pysatl_mpest.core.Parameter` descriptor.

    """

    def __init__(self):
        """The constructor must be called by all descendants for the
        `fixed_params` attribute to be initialized.
        """

        self._fixed_params: set[str] = set()


[docs]
    def fix_param(self, name: str):
        """Fixes a parameter, excluding it from optimization and further changes.

        Parameters
        ----------
        name : str
            The name of the parameter to freeze.

        Raises
        ------
        ValueError
            If a parameter with the specified name does not exist.
        """

        if name not in self.params:
            raise ValueError(f"Parameter '{name}' does not exist in this distribution.")

        self._fixed_params.add(name)



[docs]
    def unfix_param(self, name: str):
        """Unfixes a parameter, allowing it to be changed again.

        If the parameter was not fixed, the method does nothing.

        Parameters
        ----------
        name : str
            The name of the parameter to unfix.
        """

        self._fixed_params.discard(name)



[docs]
    def get_params_vector(self, param_names: Sequence[str]) -> list[float]:
        """Retrieves specified parameter values as a list.

        Parameters
        ----------
        param_names : Sequence[str]
            A sequence of strings with the names of the parameters to retrieve.

        Returns
        -------
        list[float]
            A list containing the values of the requested parameters
            in the specified order.

        Raises
        ------
        ValueError
            If any of the requested parameter names do not exist in the
            distribution's :attr:`params`.
        """

        if not set(param_names).issubset(self.params):
            invalid_params = set(param_names) - self.params
            raise ValueError(f"Invalid parameter names provided: {invalid_params}")

        return [getattr(self, name) for name in param_names]



[docs]
    def set_params_from_vector(self, param_names: Sequence[str], vector: Sequence[float]):
        """Sets parameter values from a sequence of floats.

        Updates the distribution's parameters using values from the provided
        sequence. The order of values in the :attr:`vector` must correspond to the order
        of names in :attr:`param_names`.

        Parameters
        ----------
        param_names : Sequence[str]
            A sequence of parameter names to update.
        vector : Sequence[float]
            A sequence of new values for the parameters.

        Raises
        ------
        ValueError
            If any parameter names do not exist, or if the length of
            :attr:`param_names` does not match the length of :attr:`vector`.
        """

        if len(param_names) != len(vector):
            raise ValueError("The number of parameter names must match the number of values in the vector.")

        if not set(param_names).issubset(self.params):
            invalid_params = set(param_names) - self.params
            raise ValueError(f"Invalid parameter names provided: {invalid_params}")

        for name, value in zip(param_names, vector):
            setattr(self, name, value)


    @property
    @abstractmethod
    def name(self) -> str:
        """str: The name of the distribution (e.g., 'Normal', 'Gamma')."""

    @property
    @abstractmethod
    def params(self) -> set[str]:
        """set[str]: A set containing the names of all parameters of the distribution."""

    @property
    def params_to_optimize(self) -> set[str]:
        """set[str]: Gets the set of parameter names that are not fixed."""

        return self.params - self._fixed_params


[docs]
    @abstractmethod
    def pdf(self, X: ArrayLike) -> NDArray[float64]:
        """Probability Density Function.

        Parameters
        ----------
        X : ArrayLike
            The input data points at which to evaluate the PDF.

        Returns
        -------
        NDArray[np.float64]
            The PDF values corresponding to each point in :attr:`X`.
        """



[docs]
    @abstractmethod
    def ppf(self, P: ArrayLike) -> NDArray[float64]:
        """Percent Point Function (PPF) or quantile function.

        This is the inverse of the Cumulative Distribution Function (CDF).

        Parameters
        ----------
        P : ArrayLike
            The probability values (between 0 and 1) at which to evaluate
            the PPF.

        Returns
        -------
        NDArray[np.float64]
            The PPF values corresponding to each probability in :attr:`P`.
        """



[docs]
    @abstractmethod
    def lpdf(self, X: ArrayLike) -> NDArray[float64]:
        """Logarithm of the Probability Density Function.

        Evaluating the log-PDF is often more numerically stable than
        evaluating the PDF directly, especially for very small probability
        values.

        Parameters
        ----------
        X : ArrayLike
            The input data points at which to evaluate the LPDF.

        Returns
        -------
        NDArray[np.float64]
            The log-PDF values corresponding to each point in :attr:`X`.
        """



[docs]
    @abstractmethod
    def log_gradients(self, X: ArrayLike) -> NDArray[float64]:
        """Calculates the gradients of the log-PDF with respect to its parameters.

        The gradients are computed for the parameters that are not fixed.

        Parameters
        ----------
        X : ArrayLike
            The input data points at which to calculate the gradients.

        Returns
        -------
        NDArray[np.float64]
            An array where each row corresponds to a data point in :attr:`X` and
            each column corresponds to the gradient with respect to a specific
            optimizable parameter. The order of columns corresponds to the
            sorted order of :attr:`params_to_optimize`.
        """



[docs]
    @abstractmethod
    def generate(self, size: int) -> NDArray[float64]:
        """Generates random samples from the distribution.

        Parameters
        ----------
        size : int
            The number of random samples to generate.

        Returns
        -------
        NDArray[np.float64]
            A NumPy array containing the generated samples.
        """


    def __copy__(self) -> "ContinuousDistribution":
        """Creates a copy of the distribution instance.

        Returns
        -------
        ContinuousDistribution
            A new instance of the distribution, identical to the original.
        """
        params_dict = {p: getattr(self, p) for p in self.params}

        new_instance = self.__class__(**params_dict)
        new_instance._fixed_params = self._fixed_params.copy()

        return new_instance

    def __eq__(self, other: object):
        """Checks if two distribution objects are equal.

        Two distribution objects are considered equal if they are of the same
        type and all their parameters have the same values.

        Parameters
        ----------
        other : object
            The object to compare against.

        Returns
        -------
        bool
            True if the distributions are equal, False otherwise.
        """
        if not isinstance(other, ContinuousDistribution):
            return NotImplemented

        if type(self) is not type(other):
            return False

        sorted_params = sorted(list(self.params))

        return (
            self.name == other.name
            and self.params == other.params
            and self.get_params_vector(sorted_params) == other.get_params_vector(sorted_params)
        )

    def __hash__(self) -> int:
        """Computes the hash of the distribution.

        The hash is computed based on the distribution's name, its parameter
        names, and their corresponding values.

        Returns
        -------
        int
            The hash value of the distribution object.
        """
        sorted_params = sorted(list(self.params))
        param_values = tuple(self.get_params_vector(sorted_params))

        return hash(tuple([self.name, tuple(self.params), param_values]))