Source code for pm4py.algo.transformation.log_to_trie.algorithm

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from enum import Enum
from pm4py import util
from pm4py.objects.log.obj import EventLog
from pm4py.objects.trie.obj import Trie
from pm4py.statistics.variants.log import get as get_variants_log
from pm4py.statistics.variants.pandas import get as get_variants_pandas
from pm4py.util import pandas_utils, exec_utils
from typing import Optional, Dict, Any, Union
import pandas as pd


[docs] class Parameters(Enum): ACTIVITY_KEY = util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY MAX_PATH_LENGTH = ( "max_path_length" # New parameter for maximum path length )
[docs] def apply( log: Union[EventLog, pd.DataFrame], parameters: Optional[Dict[Union[str, Parameters], Any]] = None, ) -> Trie: parameters = parameters if parameters is not None else dict() # Extract the maximum path length if provided max_path_length = exec_utils.get_param_value( Parameters.MAX_PATH_LENGTH, parameters, None ) if pandas_utils.check_is_pandas_dataframe(log): variants = get_variants_pandas.get_variants_set( log, parameters=parameters ) else: variants = get_variants_log.get_variants(log, parameters=parameters) variants = list(variants) root = Trie() for variant in variants: # If maximum path length is defined, truncate the variant if max_path_length is not None and len(variant) > max_path_length: variant = variant[:max_path_length] trie = root for i, activity in enumerate(variant): match = False for c in trie.children: if c.label == activity: trie = c match = True break if not match: node = Trie(label=activity, parent=trie, depth=trie.depth + 1) trie.children.append(node) trie = node # If at the end of this (possibly truncated) variant, mark as final if i == len(variant) - 1: trie.final = True return root