Source code for simod.settings.control_flow_settings

from enum import Enum
from typing import List, Optional, Tuple, Union

from pix_framework.discovery.gateway_probabilities import GatewayProbabilitiesDiscoveryMethod
from pydantic import BaseModel

from .common_settings import Metric
from ..utilities import parse_single_value_or_interval


[docs] class ProcessModelDiscoveryAlgorithm(str, Enum): """ Enumeration of process model discovery algorithms. This enum defines the available algorithms for discovering process models from event logs. Attributes ---------- SPLIT_MINER_V1 : str Represents the first version of the Split Miner algorithm (`"sm1"`). SPLIT_MINER_V2 : str Represents the second version of the Split Miner algorithm (`"sm2"`). """ SPLIT_MINER_V1 = "sm1" SPLIT_MINER_V2 = "sm2"
[docs] @classmethod def from_str(cls, value: str) -> "ProcessModelDiscoveryAlgorithm": """ Converts a string representation of a process model discovery algorithm into the corresponding :class:`ProcessModelDiscoveryAlgorithm` instance. This method allows flexible input formats for each algorithm, supporting multiple variations of their names. Parameters ---------- value : str A string representing a process model discovery algorithm. Returns ------- :class:`ProcessModelDiscoveryAlgorithm` The corresponding enum instance for the given algorithm name. Raises ------ ValueError If the provided string does not match any known algorithm. """ if value.lower() in [ "sm2", "splitminer2", "split miner 2", "split_miner_2", "split-miner-2", "split_miner_v2", "split-miner-v2", "splitminer-v2", "split miner v2", ]: return cls.SPLIT_MINER_V2 elif value.lower() in [ "sm1", "splitminer1", "split miner 1", "split_miner_1", "split-miner-1", "split_miner_v1", "split-miner-v1", "splitminer-v1", "split miner v1", ]: return cls.SPLIT_MINER_V1 else: raise ValueError(f"Unknown process model discovery algorithm: {value}")
def __str__(self): if self == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1: return "Split Miner v1" elif self == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2: return "Split Miner v2" return f"Unknown ProcessModelDiscoveryAlgorithm {str(self)}"
[docs] class ControlFlowSettings(BaseModel): """ Control-flow model configuration parameters. This class defines the ranges of the configurable parameters for optimizing the control-flow structure of a discovered process model, including metric selection, iteration settings, and various discovery algorithm parameters. In each iteration of the optimization process, the parameters are sampled from these values or ranges. Attributes ---------- optimization_metric : :class:`~simod.settings.common_settings.Metric` The metric used to evaluate process model quality at each iteration of the optimization process (i.e., loss function). num_iterations : int The number of optimization iterations to perform. num_evaluations_per_iteration : int The number of replications for the evaluations of each iteration. gateway_probabilities : Union[:class:`GatewayProbabilitiesDiscoveryMethod`, List[:class:`GatewayProbabilitiesDiscoveryMethod`]] Fixed method or list of methods to use in each iteration to discover gateway probabilities. mining_algorithm : :class:`ProcessModelDiscoveryAlgorithm`, optional The process model discovery algorithm to use. epsilon : Union[float, Tuple[float, float]], optional Fixed number or range for the number of concurrent relations between events to be captured in the discovery algorithm (between 0.0 and 1.0). eta : Union[float, Tuple[float, float]], optional Fixed number or range for the threshold for filtering the incoming and outgoing edges in the discovery algorithm (between 0.0 and 1.0). replace_or_joins : Union[bool, List[bool]], optional Fixed value or list for whether to replace non-trivial OR joins. prioritize_parallelism : Union[bool, List[bool]], optional Fixed value or list for whether to prioritize parallelism over loops. discover_branch_rules : bool, optional Whether to discover branch rules for gateways. f_score : Union[float, Tuple[float, float]], optional Fixed value or range for the minimum f-score value to consider the discovered data-aware branching rules. """ optimization_metric: Metric = Metric.THREE_GRAM_DISTANCE num_iterations: int = 10 num_evaluations_per_iteration: int = 3 gateway_probabilities: Union[ GatewayProbabilitiesDiscoveryMethod, List[GatewayProbabilitiesDiscoveryMethod] ] = GatewayProbabilitiesDiscoveryMethod.DISCOVERY mining_algorithm: Optional[ProcessModelDiscoveryAlgorithm] = ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1 epsilon: Optional[Union[float, Tuple[float, float]]] = (0.0, 1.0) # parallelism threshold (epsilon) eta: Optional[Union[float, Tuple[float, float]]] = (0.0, 1.0) # percentile for frequency threshold (eta) discover_branch_rules: Optional[bool] = False f_score: Optional[Union[float, Tuple[float, float]]] = 0.7 # quality gateway for branch rules (f_score) replace_or_joins: Optional[Union[bool, List[bool]]] = False # should replace non-trivial OR joins prioritize_parallelism: Optional[Union[bool, List[bool]]] = False # should prioritize parallelism on loops
[docs] @staticmethod def one_shot() -> "ControlFlowSettings": """ Instantiates the control-flow model configuration for the one-shot mode (i.e., no optimization, one single iteration). Returns ------- :class:`ControlFlowSettings` Instance of the control-flow model configuration for the one-shot mode. """ return ControlFlowSettings( optimization_metric=Metric.THREE_GRAM_DISTANCE, num_iterations=1, num_evaluations_per_iteration=1, gateway_probabilities=GatewayProbabilitiesDiscoveryMethod.DISCOVERY, mining_algorithm=ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1, epsilon=0.3, eta=0.5, replace_or_joins=False, prioritize_parallelism=False, )
[docs] @staticmethod def from_dict(config: dict) -> "ControlFlowSettings": """ Instantiates the control-flow model configuration from a dictionary. Parameters ---------- config : dict Dictionary with the configuration values for the control-flow model parameters. Returns ------- :class:`ControlFlowSettings` Instance of the control-flow model configuration for the specified dictionary values. """ optimization_metric = Metric.from_str(config.get("optimization_metric", "n_gram_distance")) num_iterations = config.get("num_iterations", 10) num_evaluations_per_iteration = config.get("num_evaluations_per_iteration", 3) gateway_probabilities = GatewayProbabilitiesDiscoveryMethod.from_str( config.get("gateway_probabilities", "discovery") ) mining_algorithm = ProcessModelDiscoveryAlgorithm.from_str(config.get("mining_algorithm", "sm1")) epsilon, eta, replace_or_joins, prioritize_parallelism = None, None, None, None if mining_algorithm in [ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1]: eta = parse_single_value_or_interval(config.get("eta", (0.0, 1.0))) epsilon = parse_single_value_or_interval(config.get("epsilon", (0.0, 1.0))) replace_or_joins = config.get("replace_or_joins", False) prioritize_parallelism = config.get("prioritize_parallelism", False) elif mining_algorithm is ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2: epsilon = parse_single_value_or_interval(config.get("epsilon", (0.0, 1.0))) else: raise ValueError(f"Unknown process model discovery algorithm: {mining_algorithm}") discover_branch_rules = config.get("discover_branch_rules", False) f_score = None if discover_branch_rules: f_score = parse_single_value_or_interval(config.get("f_score", (0.0, 1.0))) return ControlFlowSettings( optimization_metric=optimization_metric, num_iterations=num_iterations, num_evaluations_per_iteration=num_evaluations_per_iteration, gateway_probabilities=gateway_probabilities, mining_algorithm=mining_algorithm, epsilon=epsilon, eta=eta, replace_or_joins=replace_or_joins, prioritize_parallelism=prioritize_parallelism, discover_branch_rules=discover_branch_rules, f_score=f_score )
[docs] def to_dict(self) -> dict: """ Translate the control-flow model configuration stored in this instance into a dictionary. Returns ------- dict Python dictionary storing this configuration. """ dictionary = { "optimization_metric": self.optimization_metric.value, "num_iterations": self.num_iterations, "num_evaluations_per_iteration": self.num_evaluations_per_iteration, } if isinstance(self.gateway_probabilities, GatewayProbabilitiesDiscoveryMethod): dictionary["gateway_probabilities"] = self.gateway_probabilities.value else: dictionary["gateway_probabilities"] = [method.value for method in self.gateway_probabilities] if self.mining_algorithm is not None: dictionary["mining_algorithm"] = self.mining_algorithm.value if self.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2: dictionary["epsilon"] = self.epsilon elif self.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1: dictionary["epsilon"] = self.epsilon dictionary["eta"] = self.eta dictionary["replace_or_joins"] = self.replace_or_joins dictionary["prioritize_parallelism"] = self.prioritize_parallelism if self.discover_branch_rules and self.f_score is not None: dictionary["f_score"] = self.f_score return dictionary