Source code for rework_pysatl_mpest.initializers.initializer
"""
A module providing an abstract base class for mixture model initializers.
"""
__author__ = "Viktor Khanukaev"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"
from abc import ABC, abstractmethod
from numpy.typing import ArrayLike
from rework_pysatl_mpest.distributions.continuous_dist import ContinuousDistribution
from rework_pysatl_mpest.initializers.strategies import ClusterMatchStrategy, EstimationStrategy
[docs]
class Initializer(ABC):
"""Abstract base class for mixture model initializers.
This class defines the interface for all initialization strategies that
estimate initial parameters for mixture models. Subclasses must implement
the `perform` method to provide specific initialization logic.
Methods
-------
perform(X, dists, cluster_match_strategy, estimation_strategies)
Performs initialization of mixture model parameters.
Notes
-----
**Purpose**
initializers are responsible for providing good starting points for
mixture model parameters before the main optimization process. This can
significantly improve convergence speed and solution quality.
**Implementation Requirements**
Subclasses must implement the `perform` method to:
- Estimate initial parameters for each distribution component
- Calculate initial mixture weights
- Return a properly initialized MixtureModel instance
**Common Initialization Strategies**
- Cluster-based initialization (using clustering algorithms)
"""
[docs]
@abstractmethod
def perform(
self,
X: ArrayLike,
dists: list[ContinuousDistribution],
cluster_match_info: ClusterMatchStrategy,
estimation_info: list[EstimationStrategy],
):
"""Performs initialization of mixture model parameters.
Parameters
----------
X : ArrayLike
Input data points used for parameter estimation. Should be a 1D array
of sample values from the mixture distribution.
dists : list[ContinuousDistribution]
List of distribution models to initialize. Each distribution
represents one component of the mixture model. The number of
distributions determines the number of mixture components.
cluster_match_info : ClusterMatchStrategy
Strategy for matching clusters to distribution models. Determines
how clusters identified in the data are assigned to specific
distribution components.
estimation_info : list[EstimationStrategy]
List of estimation strategies for each distribution model. Each
element specifies the parameter estimation method to use for the
corresponding distribution in the `dists` list.
Returns
-------
MixtureModel
An initialized mixture model with estimated parameters and
normalized component weights that sum to 1.
Raises
------
NotImplementedError
This method must be implemented by subclasses.
Notes
-----
The method should handle the following tasks:
1. Validate input parameters and data consistency
2. Estimate initial parameters for each distribution component using
the specified estimation strategies
3. Calculate initial mixture weights (should sum to 1)
4. Ensure all parameters are within valid ranges for each distribution
5. Return a properly configured MixtureModel instance
The implementation may use various strategies to estimate good
starting parameters for the EM algorithm or other optimization methods.
"""
raise NotImplementedError