Source code for rework_pysatl_mpest.distributions.pareto

"""Module providing pareto type 1 distribution class"""

__author__ = "Maksim Pastukhov"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"


import numpy as np
from numpy import float64
from scipy.stats import pareto

from rework_pysatl_mpest.core.parameter import Parameter
from rework_pysatl_mpest.distributions.continuous_dist import ContinuousDistribution


[docs] class Pareto(ContinuousDistribution): """Class for the two-parameter Pareto distribution. The Pareto distribution is a power-law probability distribution commonly used to model phenomena with heavy-tailed behavior, such as income distribution, city population sizes, or file sizes. Parameters ---------- shape : float Shape parameter. Must be positive. scale : float Scale parameter. Must be positive. Attributes ---------- shape : float Shape parameter of the distribution. scale : float Scale (minimum) parameter of the distribution. Methods ------- .. autosummary:: :toctree: generated/ pdf ppf lpdf log_gradients generate """ PARAM_SHAPE = "shape" PARAM_SCALE = "scale" shape = Parameter(lambda x: x > 0, "Shape parameter must be a positive") scale = Parameter(lambda x: x > 0, "Scale parameter must be a positive") def __init__(self, shape: float, scale: float): super().__init__() self.shape = shape self.scale = scale @property def name(self) -> str: return "Pareto" @property def params(self) -> set[str]: return {self.PARAM_SHAPE, self.PARAM_SCALE}
[docs] def pdf(self, X): """Probability density function (PDF). The PDF for the two-parameter Pareto distribution is: .. math:: f(x | \\alpha, \\beta) = \\frac{\\alpha \\cdot \\beta^\\alpha}{x^{\\alpha + 1}} where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. The function is zero for :math:`x < \\beta`. """ X = np.asarray(X, dtype=float64) return np.where(self.scale <= X, (self.shape * (self.scale**self.shape)) / X ** (self.shape + 1), 0.0)
[docs] def ppf(self, P): """Percent Point Function (PPF) or quantile function. The PPF for the two-parameter Pareto distribution is: .. math:: Q(p | \\alpha, \\beta) = \\beta \\cdot (1 - p)^{-1/\\alpha} Parameters ---------- P : ArrayLike The probability values (between 0 and 1) at which to evaluate the PPF. Returns ------- NDArray[np.float64] The PPF values corresponding to each probability in :attr:`P`. """ P = np.asarray(P, dtype=float64) return np.where((P >= 0) & (P <= 1), self.scale * (1 - P) ** (-1.0 / self.shape), np.nan)
[docs] def lpdf(self, X): """Log of the Probability Density Function (LPDF). The log-PDF for the two-parameter Pareto distribution is: .. math:: \\ln f(x | \\alpha, \\beta) = \\alpha \\ln \\beta + \\ln \\alpha - (\\alpha + 1) \\ln x where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. The log-density is :math:`-\\infty` for :math:`x < \\beta`. Parameters ---------- X : ArrayLike The input data points at which to evaluate the LPDF. Returns ------- NDArray[np.float64] The log-PDF values corresponding to each point in :attr:`X`. """ X = np.asarray(X, dtype=float64) return np.where( self.scale <= X, np.log(self.shape) + self.shape * np.log(self.scale) - (1 + self.shape) * np.log(X), -np.inf, )
def _dlog_shape(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`shape` parameter. The derivative is non-zero only for :math:`X \\geq \\text{scale}`. .. math:: \\frac{\\partial \\ln f(x | \\alpha, \\beta)}{\\partial \\alpha} = \\ln \\beta - \\ln x where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. Parameters ---------- X : ArrayLike The input data points. Returns ------- NDArray[np.float64] The gradient of the lpdf with respect to :attr:`shape` for each point in :attr:`X`. """ X = np.asarray(X, dtype=float64) return np.where(self.scale <= X, 1.0 / self.shape + np.log(self.scale) - np.log(X), 0.0) def _dlog_scale(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`scale` parameter. The derivative is non-zero only for :math:`X \\geq \\text{scale}`. .. math:: \\frac{\\partial \\ln f(x | \\alpha, \\beta)}{\\partial \\beta} = \\frac{\\alpha}{\\beta} where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. Parameters ---------- X : ArrayLike The input data points. Returns ------- NDArray[np.float64] The gradient of the lpdf with respect to :attr:`scale` for each point in :attr:`X`. """ X = np.asarray(X, dtype=float64) return np.where(self.scale <= X, self.shape / self.scale, 0.0)
[docs] def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. The gradients are computed for the parameters that are not fixed. Parameters ---------- X : ArrayLike The input data points at which to calculate the gradients. Returns ------- NDArray[np.float64] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ X = np.asarray(X, dtype=float64) gradient_calculators = { self.PARAM_SHAPE: self._dlog_shape, self.PARAM_SCALE: self._dlog_scale, } optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: return np.empty((len(X), 0)) gradients = [gradient_calculators[param](X) for param in optimizable_params] return np.stack(gradients, axis=1)
[docs] def generate(self, size: int): """Generates random samples from the distribution. Parameters ---------- size : int The number of random samples to generate. Returns ------- NDArray[np.float64] A NumPy array containing the generated samples. """ return np.asarray(pareto.rvs(scale=self.scale, b=self.shape, size=size), dtype=float64)
def __repr__(self) -> str: """Returns a string representation of the object. Returns ------- str A string that can be used to recreate the object, e.g., "Pareto(shape=0.0, scale=2.0)". """ return f"{self.__class__.__name__}(shape={self.shape}, scale={self.scale})"