Source code for pm4py.objects.log.util.get_log_encoded
'''
PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.
Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
import numpy as np
[docs]
def get_log_encoded(
event_log, trace_attributes=[], event_attributes=[], concatenate=False
):
"""
Get event log encoded into matrix.
Parameters
------------
event_log
Trace log
trace_attributes
Attributes of the trace to be encoded
event_attributes
Attributes of the events to be encoded
concatenate
Boolean indicating if to generate all sub-sequences of events in a trace
Returns
------------
dataset
A numpy matrix with the event log
columns
The names of the columns in the dataset
"""
columns = []
dataset = []
max_trace_len = 0
for trace_index, trace in enumerate(event_log):
trace_encoding = []
tr_columns = []
for trace_attribute in trace_attributes:
tr_columns.append(trace_attribute)
try:
attr = trace.attributes[trace_attribute]
except BaseException:
attr = None
trace_encoding.append(attr)
for event_index, event in enumerate(trace):
for event_attribute in event_attributes:
tr_columns.append(event_attribute)
try:
attr = event[event_attribute]
except BaseException:
attr = None
trace_encoding.append(attr)
# For each trace in the event log, sequentially append the
# event sequence until that event
if concatenate is True:
if len(trace_encoding) > max_trace_len:
max_trace_len = len(trace_encoding)
columns = tr_columns
dataset.append(np.asarray(trace_encoding))
if concatenate is not True:
if len(trace_encoding) > max_trace_len:
max_trace_len = len(trace_encoding)
columns = tr_columns
dataset.append(np.asarray(trace_encoding))
dataset = np.asarray(
[
np.pad(
a, (0, max_trace_len - len(a)), "constant", constant_values=0
)
for a in dataset
]
)
columns = np.asarray(columns)
return dataset, columns