Source code for simod.control_flow.settings

from dataclasses import dataclass
from pathlib import Path
from typing import Optional

from pix_framework.discovery.gateway_probabilities import GatewayProbabilitiesDiscoveryMethod

from simod.settings.common_settings import Metric
from simod.settings.control_flow_settings import ProcessModelDiscoveryAlgorithm


[docs] @dataclass class HyperoptIterationParams: """ Parameters for a single iteration of the Control-Flow optimization process. This class defines the configuration settings used during an iteration of the optimization process, including process model discovery, optimization metric, and gateway probability discovery. Attributes ---------- output_dir : :class:`pathlib.Path` Directory where all output files for the current iteration will be stored. provided_model_path : :class:`pathlib.Path`, optional Path to a provided BPMN model, if available (no discovery needed). project_name : str Name of the project, mainly used for file naming. optimization_metric : :class:`Metric` Metric used to evaluate the candidate process model in this iteration. gateway_probabilities_method : :class:`GatewayProbabilitiesDiscoveryMethod` Method for discovering gateway probabilities. mining_algorithm : :class:`ProcessModelDiscoveryAlgorithm` Algorithm used for process model discovery, if necessary. epsilon : float, optional Number of concurrent relations between events to be captured in the discovery algorithm (between 0.0 and 1.0). eta : float, optional Threshold for filtering the incoming and outgoing edges in the discovery algorithm (between 0.0 and 1.0). replace_or_joins : bool, optional Whether to replace non-trivial OR joins in the discovered model. prioritize_parallelism : bool, optional Whether to prioritize parallelism or loops for model discovery. f_score : float], default=Non, optional Minimum f-score value to consider the discovered data-aware branching rules. Notes ----- - If `provided_model_path` is specified, process model discovery will be skipped. """ # General settings output_dir: Path # Directory where to output all the files of the current iteration provided_model_path: Optional[Path] # Provided when no need to discover BPMN model project_name: str # Name of the project for file naming optimization_metric: Metric # Metric to evaluate the candidate of this iteration gateway_probabilities_method: GatewayProbabilitiesDiscoveryMethod # Method to discover the gateway probabilities mining_algorithm: ProcessModelDiscoveryAlgorithm # Algorithm to discover the process model # Split Miner 2 # Split Miner 3 epsilon: Optional[float] # Parallelism threshold (epsilon) eta: Optional[float] # Percentile for frequency threshold (eta) replace_or_joins: Optional[bool] # Should replace non-trivial OR joins prioritize_parallelism: Optional[bool] # Should prioritize parallelism on loops f_score: Optional[float] = None # quality gateway for branch rules (f_score)
[docs] def to_dict(self) -> dict: """ Converts the instance into a dictionary representation of the optimization parameters. The returned dictionary is structured based on whether a process model needs to be discovered or if a pre-existing model is provided. Returns ------- dict A dictionary containing the optimization parameters for this iteration. """ optimization_parameters = { "output_dir": str(self.output_dir), "project_name": str(self.project_name), "optimization_metric": str(self.optimization_metric), "gateway_probabilities": self.gateway_probabilities_method.value, "mining_algorithm": str(self.mining_algorithm), } if self.provided_model_path is None: if self.mining_algorithm is ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2: optimization_parameters["epsilon"] = self.epsilon elif self.mining_algorithm is ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1: optimization_parameters["epsilon"] = self.epsilon optimization_parameters["eta"] = self.eta optimization_parameters["prioritize_parallelism"] = self.prioritize_parallelism optimization_parameters["replace_or_joins"] = self.replace_or_joins else: optimization_parameters["provided_model_path"] = str(self.provided_model_path) if self.f_score: optimization_parameters["discover_branch_rules"] = True optimization_parameters["f_score"] = self.f_score return optimization_parameters
@staticmethod def from_hyperopt_dict( hyperopt_dict: dict, optimization_metric: Metric, mining_algorithm: ProcessModelDiscoveryAlgorithm, output_dir: Path, provided_model_path: Optional[Path], project_name: str, ) -> "HyperoptIterationParams": """Create the params for this run from the hyperopt dictionary returned by the fmin function.""" gateway_probabilities_method = GatewayProbabilitiesDiscoveryMethod.from_str( hyperopt_dict["gateway_probabilities_method"] ) epsilon, eta, prioritize_parallelism, replace_or_joins = None, None, None, None if provided_model_path is None: if mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1: epsilon = hyperopt_dict["epsilon"] eta = hyperopt_dict["eta"] prioritize_parallelism = hyperopt_dict["prioritize_parallelism"] replace_or_joins = hyperopt_dict.get("replace_or_joins") elif mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2: epsilon = hyperopt_dict["epsilon"] f_score = hyperopt_dict.get("f_score", None) return HyperoptIterationParams( output_dir=output_dir, provided_model_path=provided_model_path, project_name=project_name, optimization_metric=optimization_metric, gateway_probabilities_method=gateway_probabilities_method, mining_algorithm=mining_algorithm, epsilon=epsilon, eta=eta, prioritize_parallelism=prioritize_parallelism, replace_or_joins=replace_or_joins, f_score=f_score )