Source code for rework_pysatl_mpest.distributions.normal
"""Module providing normal (Gaussian) distribution class"""
__author__ = "Danil Totmyanin"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"
import numpy as np
from numpy import float64
from scipy.stats import norm
from ..core import Parameter
from .continuous_dist import ContinuousDistribution
[docs]
class Normal(ContinuousDistribution):
"""Class for the Normal (Gaussian) distribution.
Parameters
----------
loc : float
Mean of the distribution (mu). Can be any real number.
scale : float
Standard deviation of the distribution (sigma). Must be positive.
Attributes
----------
loc : float
Mean of the distribution.
scale : float
Standard deviation of the distribution.
Methods
-------
.. autosummary::
:toctree: generated/
ppf
pdf
lpdf
log_gradients
generate
"""
PARAM_LOC = "loc"
PARAM_SCALE = "scale"
loc = Parameter()
scale = Parameter(lambda x: x > 0, "Scale parameter must be positive")
def __init__(self, loc: float, scale: float):
super().__init__()
self.loc = loc
self.scale = scale
@property
def name(self) -> str:
return "Normal"
@property
def params(self) -> set[str]:
return {self.PARAM_LOC, self.PARAM_SCALE}
[docs]
def pdf(self, X):
"""Probability density function (PDF).
The PDF for the Normal distribution is:
.. math::
f(x | \\mu, \\sigma) = \\frac{1}{\\sigma \\sqrt{2\\pi}}
\\exp\\left( -\\frac{(x - \\mu)^2}{2\\sigma^2} \\right)
where :math:`\\mu` is the mean (loc) and :math:`\\sigma` is the
standard deviation (scale).
Parameters
----------
X : ArrayLike
The input data points at which to evaluate the PDF.
Returns
-------
NDArray[np.float64]
The PDF values corresponding to each point in :attr:`X`.
"""
X = np.asarray(X, dtype=float64)
z = (X - self.loc) / self.scale
return np.exp(-(z**2) / 2.0) / (self.scale * np.sqrt(2.0 * np.pi))
[docs]
def ppf(self, P):
"""Percent Point Function (PPF) or quantile function.
The PPF is the inverse of the Cumulative Distribution Function (CDF).
This implementation relies on `scipy.stats.norm.ppf` for accuracy
and robustness.
Parameters
----------
P : ArrayLike
The probability values (between 0 and 1) at which to evaluate the PPF.
Returns
-------
NDArray[np.float64]
The PPF values corresponding to each probability in :attr:`P`.
"""
P = np.asarray(P, dtype=float64)
return norm.ppf(P, loc=self.loc, scale=self.scale)
[docs]
def lpdf(self, X):
"""Log of the Probability Density Function (LPDF).
The log-PDF for the Normal distribution is:
.. math::
\\ln f(x) = -\\ln(\\sigma) - \\frac{1}{2} \\ln(2\\pi) -
\\frac{(x - \\mu)^2}{2\\sigma^2}
Parameters
----------
X : ArrayLike
The input data points at which to evaluate the LPDF.
Returns
-------
NDArray[np.float64]
The log-PDF values corresponding to each point in :attr:`X`.
"""
X = np.asarray(X, dtype=float64)
z = (X - self.loc) / self.scale
return -np.log(self.scale) - 0.5 * np.log(2.0 * np.pi) - 0.5 * z**2
def _dlog_loc(self, X):
"""Partial derivative of the lpdf w.r.t. the loc parameter."""
X = np.asarray(X, dtype=float64)
return (X - self.loc) / (self.scale**2)
def _dlog_scale(self, X):
"""Partial derivative of the lpdf w.r.t. the scale parameter."""
X = np.asarray(X, dtype=float64)
z_sq = ((X - self.loc) / self.scale) ** 2
return (z_sq - 1.0) / self.scale
[docs]
def log_gradients(self, X):
"""Calculates the gradients of the log-PDF w.r.t. its parameters.
Parameters
----------
X : ArrayLike
The input data points at which to calculate the gradients.
Returns
-------
NDArray[np.float64]
An array where each row corresponds to a data point in :attr:`X`
and each column corresponds to the gradient with respect to a
specific optimizable parameter. The order of columns corresponds
to the sorted order of :attr:`self.params_to_optimize`.
"""
X = np.asarray(X, dtype=float64)
gradient_calculators = {
self.PARAM_LOC: self._dlog_loc,
self.PARAM_SCALE: self._dlog_scale,
}
optimizable_params = sorted(list(self.params_to_optimize))
if not optimizable_params:
return np.empty((len(X), 0))
gradients = [gradient_calculators[param](X) for param in optimizable_params]
return np.stack(gradients, axis=1)
[docs]
def generate(self, size: int):
"""Generates random samples from the distribution.
This implementation relies on `scipy.stats.norm.rvs`.
Parameters
----------
size : int
The number of random samples to generate.
Returns
-------
NDArray[np.float64]
A NumPy array containing the generated samples.
"""
return np.asarray(norm.rvs(loc=self.loc, scale=self.scale, size=size), dtype=float64)
def __repr__(self) -> str:
"""Returns a string representation of the object.
Returns
-------
str
A string that can be used to recreate the object, e.g.,
"Normal(loc=0.0, scale=1.0)".
"""
return f"{self.__class__.__name__}(loc={self.loc}, scale={self.scale})"