Source code for pm4py.objects.ocel.exporter.sqlite.variants.ocel20

'''
    PM4Py – A Process Mining Library for Python
Copyright (C) 2024 Process Intelligence Solutions UG (haftungsbeschränkt)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see this software project's root or
visit <https://www.gnu.org/licenses/>.

Website: https://processintelligence.solutions
Contact: info@processintelligence.solutions
'''
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any
import os
import pandas as pd
from pm4py.objects.ocel.util import names_stripping
from enum import Enum
from pm4py.util import exec_utils, pandas_utils
from pm4py.objects.ocel.util import ocel_consistency
from pm4py.objects.ocel.util import filtering_utils



[docs]
class Parameters(Enum):
    ENABLE_NAMES_STRIPPING = "enable_names_stripping"




[docs]
def apply(ocel: OCEL, file_path: str, parameters: Optional[Dict[Any, Any]] = None):
    """
    Exports the given OCEL (OCEL 2.0) into a SQLite database.
    Automatically converts the event and object 'ocel:timestamp' fields
    to Python datetime objects to avoid SQLite binding errors.
    """
    if parameters is None:
        parameters = {}

    enable_names_stripping = exec_utils.get_param_value(Parameters.ENABLE_NAMES_STRIPPING, parameters, True)

    import sqlite3

    # If the target file already exists, remove it to avoid conflicts
    if os.path.exists(file_path):
        os.remove(file_path)

    # Enforce consistency checks and propagate filtering
    ocel = ocel_consistency.apply(ocel, parameters=parameters)
    ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)

    event_id = ocel.event_id_column
    event_activity = ocel.event_activity
    event_timestamp = ocel.event_timestamp
    object_id = ocel.object_id_column
    object_type = ocel.object_type_column
    qualifier = ocel.qualifier
    changed_field = ocel.changed_field

    conn = sqlite3.connect(file_path)

    # Write the "event" table
    EVENTS = ocel.events[[event_id, event_activity]].rename(
        columns={event_id: "ocel_id", event_activity: "ocel_type"}
    )
    EVENTS = EVENTS.drop_duplicates()
    EVENTS.to_sql("event", conn, index=False)

    # Write the "object" table
    OBJECTS = ocel.objects[[object_id, object_type]].rename(
        columns={object_id: "ocel_id", object_type: "ocel_type"}
    )
    OBJECTS = OBJECTS.drop_duplicates()
    OBJECTS.to_sql("object", conn, index=False)

    # Prepare event and object type mappings
    event_types = sorted(pandas_utils.format_unique(EVENTS["ocel_type"].unique()))
    object_types = sorted(pandas_utils.format_unique(OBJECTS["ocel_type"].unique()))

    EVENT_CORR_TYPE = pandas_utils.instantiate_dataframe(
        {"ocel_type": event_types, "ocel_type_map": event_types}
    )
    OBJECT_CORR_TYPE = pandas_utils.instantiate_dataframe(
        {"ocel_type": object_types, "ocel_type_map": object_types}
    )

    # Optionally strip names
    if enable_names_stripping:
        EVENT_CORR_TYPE["ocel_type_map"] = EVENT_CORR_TYPE["ocel_type_map"].apply(
            lambda x: names_stripping.apply(x)
        )
        OBJECT_CORR_TYPE["ocel_type_map"] = OBJECT_CORR_TYPE["ocel_type_map"].apply(
            lambda x: names_stripping.apply(x)
        )

    EVENT_CORR_TYPE.to_sql("event_map_type", conn, index=False)
    OBJECT_CORR_TYPE.to_sql("object_map_type", conn, index=False)

    # Write the event-object relationships
    E2O = ocel.relations[[event_id, object_id, qualifier]].rename(
        columns={event_id: "ocel_event_id", object_id: "ocel_object_id", qualifier: "ocel_qualifier"}
    )
    E2O.to_sql("event_object", conn, index=False)

    # Write the object-object relationships
    O2O = ocel.o2o.rename(
        columns={
            object_id: "ocel_source_id",
            object_id + "_2": "ocel_target_id",
            qualifier: "ocel_qualifier",
        }
    )
    O2O.to_sql("object_object", conn, index=False)

    # Export event tables by activity
    e_types = sorted(pandas_utils.format_unique(ocel.events[event_activity].unique()))
    for act in e_types:
        df = ocel.events[ocel.events[event_activity] == act].dropna(how="all", axis="columns")
        # Rename timestamp column
        df = df.rename(columns={event_id: "ocel_id", event_timestamp: "ocel_time"})

        # Force ID to string
        df["ocel_id"] = df["ocel_id"].astype("string")

        # Convert 'ocel_time' to Python datetime (if it's a pandas Timestamp or string)
        df["ocel_time"] = pd.to_datetime(df["ocel_time"], errors="coerce")

        act_red = names_stripping.apply(act) if enable_names_stripping else act

        df = df.drop_duplicates()
        df.to_sql("event_" + act_red, conn, index=False)

    # Export object tables by type
    o_types = sorted(pandas_utils.format_unique(ocel.objects[object_type].unique()))
    for ot in o_types:
        df = ocel.objects[ocel.objects[object_type] == ot].dropna(how="all", axis="columns")
        df = df.rename(columns={object_id: "ocel_id"})
        del df[object_type]

        # Object changes table
        df2 = ocel.object_changes[ocel.object_changes[object_type] == ot].dropna(how="all", axis="columns")
        if len(df2) > 0:
            del df2[object_type]
            df2 = df2.rename(
                columns={
                    object_id: "ocel_id",
                    event_timestamp: "ocel_time",
                    changed_field: "ocel_changed_field",
                }
            )

            # Convert 'ocel_time' to Python datetime (if it's a pandas Timestamp or string)
            df2["ocel_time"] = pd.to_datetime(df2["ocel_time"], errors="coerce")

            df = pandas_utils.concat([df, df2], axis=0)

        df["ocel_id"] = df["ocel_id"].astype("string")

        ot_red = names_stripping.apply(ot) if enable_names_stripping else ot

        df.to_sql("object_" + ot_red, conn, index=False)

    conn.close()