Source code for pysatl_core.distributions.distribution

"""
Distribution Interface

This module defines the public Distribution protocol that serves as the
abstract interface for all probability distributions in the system.
"""

from __future__ import annotations

__author__ = "Leonid Elkin, Mikhail Mikhailov"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"

from abc import ABC, abstractmethod
from collections.abc import Mapping
from copy import deepcopy
from typing import TYPE_CHECKING, Self, cast

from pysatl_core.distributions.strategies import (
    ComputationPlan,
    ComputationStrategy,
    SamplingStrategy,
)
from pysatl_core.types import DEFAULT_ANALYTICAL_COMPUTATION_LABEL, NumericArray

_KEEP: object = object()


if TYPE_CHECKING:
    from collections.abc import Mapping
    from typing import Any

    from pysatl_core.distributions.computations.computation import AnalyticalComputation
    from pysatl_core.distributions.computations.options import StepOptions
    from pysatl_core.distributions.support import Support
    from pysatl_core.types import (
        DistributionType,
        GenericCharacteristicName,
        LabelName,
        Method,
    )


[docs] class Distribution(ABC): """ Protocol defining the interface for probability distributions. This protocol is the central abstraction used throughout the system. Concrete distribution implementations must provide the properties and methods defined here. Attributes ---------- distribution_type : DistributionType Type information about the distribution (kind, dimension, etc.). analytical_computations : Mapping Distribution-provided characteristic methods. For non-transformed distributions every method in this mapping is fully analytical, so this mapping matches the set of loops with ``is_analytical=True`` in the graph view. sampling_strategy : SamplingStrategy Strategy for generating random samples. computation_strategy : ComputationStrategy Strategy for computing characteristics and conversions. support : Support or None Support of the distribution, if defined. Notes ----- **Array semantics for analytical characteristics** Analytical characteristic methods (e.g. ``pdf``, ``cdf``) should be implemented to accept and return NumPy arrays directly (array semantics). This is the recommended approach because it enables efficient vectorised evaluation. If a method accepts only scalar inputs, the computation infrastructure will wrap it automatically via ``numpy.vectorize``, but this incurs a per-element Python call overhead and is significantly slower for large inputs. """
[docs] def __init__( self, distribution_type: DistributionType, analytical_computations: Mapping[ GenericCharacteristicName, (AnalyticalComputation[Any, Any] | Mapping[LabelName, AnalyticalComputation[Any, Any]]), ], support: Support | None = None, sampling_strategy: SamplingStrategy | None = None, computation_strategy: ComputationStrategy | None = None, ) -> None: """ Initialize common distribution state. Parameters ---------- distribution_type : DistributionType Type information about the distribution (kind, dimension, etc.). analytical_computations : Mapping Distribution-provided characteristic methods. For non-transformed distributions these methods are fully analytical. .. note:: Each characteristic callable should accept and return NumPy arrays (array semantics). Scalar-only callables are wrapped automatically via ``numpy.vectorize``, but at a significant per-element overhead cost. support : Support or None, default=None Support of the distribution. sampling_strategy : SamplingStrategy or None, default=None Sampling strategy instance. If omitted, univariate default is used. computation_strategy : ComputationStrategy or None, default=None Computation strategy instance. If omitted, default strategy is used. """ from pysatl_core.distributions.strategies import DefaultComputationStrategy from pysatl_core.sampling.default import DefaultSamplingUnivariateStrategy self._distribution_type = distribution_type normalized_analytical: dict[ GenericCharacteristicName, dict[LabelName, AnalyticalComputation[Any, Any]] ] = {} for characteristic_name, methods in analytical_computations.items(): if isinstance(methods, Mapping): normalized_analytical[characteristic_name] = dict(methods) else: normalized_analytical[characteristic_name] = { DEFAULT_ANALYTICAL_COMPUTATION_LABEL: methods } if not normalized_analytical: raise ValueError("Distribution requires at least one analytical computation.") for characteristic_name, labeled_methods in normalized_analytical.items(): if not labeled_methods: raise ValueError( f"Characteristic '{characteristic_name}' must provide at least one " "analytical computation." ) self._analytical_computations = normalized_analytical self._support = support self._sampling_strategy = sampling_strategy or DefaultSamplingUnivariateStrategy() self._computation_strategy = computation_strategy or DefaultComputationStrategy()
@property def distribution_type(self) -> DistributionType: """Return type metadata of the distribution (kind, dimension, etc.).""" return self._distribution_type @property def analytical_computations( self, ) -> Mapping[GenericCharacteristicName, Mapping[LabelName, AnalyticalComputation[Any, Any]]]: """ Return distribution-provided characteristic methods. For non-transformed distributions this mapping coincides with graph loops marked as ``is_analytical=True``. """ return self._analytical_computations
[docs] def loop_is_analytical( self, characteristic_name: GenericCharacteristicName, label_name: LabelName, ) -> bool: """ Tell whether a self-loop method is fully analytical in the graph. Parameters ---------- characteristic_name : GenericCharacteristicName Characteristic name of the self-loop. label_name : LabelName Label of the analytical computation variant. Returns ------- bool ``True`` when every required predecessor in the transformation chain is analytical. Notes ----- Presence in ``analytical_computations`` means that a characteristic has at least one analytical ancestor in its derivation chain. For non-transformed distributions these notions coincide, therefore this method always returns ``True``. """ _ = characteristic_name, label_name return True
@property def sampling_strategy(self) -> SamplingStrategy: """Return the currently attached sampling strategy.""" return self._sampling_strategy @property def computation_strategy(self) -> ComputationStrategy: """Return the currently attached computation strategy.""" return self._computation_strategy @property def support(self) -> Support | None: """Return the support of the distribution, if it is defined.""" return self._support @abstractmethod def _clone_with_strategies( self, *, sampling_strategy: SamplingStrategy | None | object = _KEEP, computation_strategy: ComputationStrategy | None | object = _KEEP, ) -> Distribution: """ Return a cloned distribution with updated strategies. The ``_KEEP`` sentinel means the existing strategy should be preserved for that side. """ ... def _new_sampling_strategy( self, sampling_strategy: SamplingStrategy | None | object = _KEEP, ) -> SamplingStrategy | None: """ Resolve sampling strategy for cloning. When ``sampling_strategy`` is ``_KEEP``, returns a deep copy of the current sampling strategy. """ return cast( SamplingStrategy | None, deepcopy(self._sampling_strategy) if sampling_strategy is _KEEP else sampling_strategy, ) def _new_computation_strategy( self, computation_strategy: ComputationStrategy | None | object = _KEEP, ) -> ComputationStrategy | None: """ Resolve computation strategy for cloning. When ``computation_strategy`` is ``_KEEP``, returns a deep copy of the current computation strategy. """ return cast( ComputationStrategy | None, deepcopy(self._computation_strategy) if computation_strategy is _KEEP else computation_strategy, )
[docs] def with_sampling_strategy(self, sampling_strategy: SamplingStrategy | None) -> Self: """Return a copy of this distribution with an updated sampling strategy.""" return cast(Self, self._clone_with_strategies(sampling_strategy=sampling_strategy))
[docs] def with_computation_strategy(self, computation_strategy: ComputationStrategy | None) -> Self: """Return a copy of this distribution with an updated computation strategy.""" return cast( Self, self._clone_with_strategies(computation_strategy=computation_strategy), )
[docs] def with_strategies( self, *, sampling_strategy: SamplingStrategy | None | object = _KEEP, computation_strategy: ComputationStrategy | None | object = _KEEP, ) -> Self: """Return a copy of this distribution with updated strategies.""" return cast( Self, self._clone_with_strategies( sampling_strategy=sampling_strategy, computation_strategy=computation_strategy, ), )
[docs] def query_method( self, characteristic_name: GenericCharacteristicName, options: StepOptions | None = None, *, characteristic_options: Mapping[str, Any] | None = None, computation_defaults: Mapping[str, Any] | None = None, ) -> Method[Any, Any]: """ Query a computation method for a specific characteristic. Parameters ---------- characteristic_name : str Name of the characteristic to compute (e.g., "pdf", "cdf"). options : StepOptions | None, default=None Per-step options built via :meth:`ComputationPlan.with_options`. When ``None``, every edge uses its declared defaults. characteristic_options : Mapping[str, Any] | None, default=None Shared characteristic options broadcast to every step that declares a matching :class:`CharacteristicOption`. These are intrinsic to the characteristic (e.g. ``eps``, ``x0`` for PPF) and affect the *meaning* of the result and the cache key. computation_defaults : Mapping[str, Any] | None, default=None Per-call computation option defaults. Override the strategy-level defaults and hardcoded descriptor defaults, but are overridden by per-step values in ``options``. Do **not** affect the cache key. Returns ------- Method Callable method that computes the characteristic. """ return self.computation_strategy.query_method( characteristic_name, self, options, characteristic_options=characteristic_options, computation_defaults=computation_defaults, )
[docs] def explain_computation_path( self, characteristic_name: GenericCharacteristicName ) -> ComputationPlan: """ Describe how the attached computation strategy will compute a characteristic. Returns an :class:`ComputationPlan` listing every step (loop or conversion edge) and the option descriptors that will be consulted at each step. The plan is also pinned by the strategy, so a subsequent :meth:`query_method` / :meth:`calculate_characteristic` call for the same ``characteristic_name`` follows exactly the same edges -- which is useful for introspection and protects against non-deterministic strategy choices. Parameters ---------- characteristic_name : str Name of the characteristic to introspect. Returns ------- ComputationPlan The plan describing the resolution path. """ return self.computation_strategy.explain_computation_path(characteristic_name, self)
[docs] def calculate_characteristic( self, characteristic_name: GenericCharacteristicName, value: Any, options: StepOptions | None = None, *, characteristic_options: Mapping[str, Any] | None = None, computation_defaults: Mapping[str, Any] | None = None, ) -> Any: """ Calculate a characteristic at the given value. Parameters ---------- characteristic_name : str Name of the characteristic to compute. value : Any Point(s) at which to evaluate the characteristic. options : StepOptions | None, default=None Per-step options built via :meth:`ComputationPlan.with_options`. characteristic_options : Mapping[str, Any] | None, default=None Shared characteristic options broadcast to every step that declares a matching :class:`CharacteristicOption`. computation_defaults : Mapping[str, Any] | None, default=None Per-call computation option defaults. Returns ------- Any Value of the characteristic at the given point(s). """ return self.query_method( characteristic_name, options, characteristic_options=characteristic_options, computation_defaults=computation_defaults, )(value)
[docs] def sample(self, n: int, **options: Any) -> NumericArray: """ Generate random samples from the distribution. Parameters ---------- n : int Number of samples to generate. **options : Any Additional sampling options forwarded to the underlying sampling strategy. Returns ------- NumericArray NumPy array containing ``n`` generated samples. The exact array shape depends on the distribution and the sampling strategy. """ return self.sampling_strategy.sample(n, distr=self, **options)