├── .dockerignore
├── .github
    └── workflows
    │   └── simod.yml
├── .gitignore
├── .java-version
├── .readthedocs.yaml
├── .sdkmanrc
├── Dockerfile
├── LICENSE
├── README.md
├── ansible.cfg
├── ansible
    ├── docker.yml
    ├── experiment.yml
    ├── inventory.yml
    ├── templates
    │   └── docker_run_cmd.sh.j2
    └── test.yml
├── benchmarking
    ├── analyze_results.py
    ├── docker_collect_results.py
    ├── docker_jobs.py
    ├── input
    │   ├── config.yml
    │   ├── config_f_naive.yml
    │   ├── config_no_extraneous.yml
    │   └── config_observed_arrivals.yml
    ├── plot_measurements.py
    └── preprocess_logs.py
├── build_docker.sh
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── complete_configuration.yml
    │       ├── configuration_example.yml
    │       ├── configuration_example_data_aware.yml
    │       ├── configuration_example_fuzzy.yml
    │       ├── configuration_example_with_evaluation.yml
    │       ├── configuration_example_with_provided_process_model.yml
    │       ├── configuration_one_shot.yml
    │       └── simod.png
    │   ├── api.rst
    │   ├── citation.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── installation.rst
    │   └── usage.rst
├── poetry.toml
├── pyproject.toml
├── resources
    ├── config
    │   ├── benchmark
    │   │   ├── benchmark_diff.yml
    │   │   ├── benchmark_diff_data_aware.yml
    │   │   ├── benchmark_diff_extr.yml
    │   │   ├── benchmark_fuzz.yml
    │   │   ├── benchmark_fuzz_extr.yml
    │   │   └── benchmark_pool.yml
    │   ├── complete_configuration.yml
    │   ├── configuration_example.yml
    │   ├── configuration_example_data_aware.yml
    │   ├── configuration_example_fuzzy.yml
    │   ├── configuration_example_with_evaluation.yml
    │   ├── configuration_example_with_provided_process_model.yml
    │   └── configuration_one_shot.yml
    ├── event_logs
    │   ├── LoanApp_simplified_test.csv.gz
    │   ├── LoanApp_simplified_train.csv.gz
    │   └── PurchasingExample.csv.gz
    └── models
    │   └── LoanApp_simplified.bpmn
├── run.sh
├── src
    └── simod
    │   ├── __init__.py
    │   ├── batching
    │       ├── __init__.py
    │       ├── discovery.py
    │       └── types.py
    │   ├── branch_rules
    │       ├── __init__.py
    │       ├── discovery.py
    │       └── types.py
    │   ├── cli.py
    │   ├── cli_formatter.py
    │   ├── control_flow
    │       ├── __init__.py
    │       ├── discovery.py
    │       ├── lib
    │       │   ├── bpmn-layout-1.0.6-jar-with-dependencies.jar
    │       │   └── split-miner-1.7.1-all.jar
    │       ├── optimizer.py
    │       └── settings.py
    │   ├── data_attributes
    │       ├── __init__.py
    │       ├── discovery.py
    │       └── types.py
    │   ├── event_log
    │       ├── __init__.py
    │       ├── event_log.py
    │       └── preprocessor.py
    │   ├── extraneous_delays
    │       ├── __init__.py
    │       ├── optimizer.py
    │       ├── types.py
    │       └── utilities.py
    │   ├── metrics.py
    │   ├── prioritization
    │       ├── __init__.py
    │       ├── discovery.py
    │       └── types.py
    │   ├── resource_model
    │       ├── __init__.py
    │       ├── optimizer.py
    │       ├── repair.py
    │       └── settings.py
    │   ├── runtime_meter.py
    │   ├── settings
    │       ├── __init__.py
    │       ├── common_settings.py
    │       ├── control_flow_settings.py
    │       ├── extraneous_delays_settings.py
    │       ├── preprocessing_settings.py
    │       ├── resource_model_settings.py
    │       └── simod_settings.py
    │   ├── simod.py
    │   ├── simulation
    │       ├── __init__.py
    │       ├── parameters
    │       │   ├── BPS_model.py
    │       │   └── __init__.py
    │       └── prosimos.py
    │   └── utilities.py
└── tests
    ├── __init__.py
    ├── assets
        ├── Control_flow_optimization_test.bpmn
        ├── Control_flow_optimization_test.csv
        ├── Insurance_Claims_test.csv
        ├── Insurance_Claims_train.csv
        ├── LoanApp_simplified.bpmn
        ├── LoanApp_simplified.csv.gz
        ├── LoanApp_simplified_2.csv.gz
        ├── LoanApp_simplified_without_approve_loan_offer.csv
        ├── PurchasingExample.xes
        ├── Resource_model_optimization_test.bpmn
        ├── Resource_model_optimization_test.csv
        ├── Simple_log_no_start_times.csv
        ├── Simple_log_with_batching.csv
        ├── Simple_log_with_prioritization.csv
        ├── bpic15
        │   ├── BPIC15_1.bpmn
        │   ├── BPIC15_1.csv.gz
        │   └── bpic15_1_with_model_v4.yml
        ├── branch_rules
        │   ├── or.bpmn
        │   ├── or_1.csv.gz
        │   ├── or_2.csv.gz
        │   ├── or_3.csv.gz
        │   ├── or_4.csv.gz
        │   ├── or_5.csv.gz
        │   ├── or_6.csv.gz
        │   ├── or_7.csv.gz
        │   ├── or_8.csv.gz
        │   ├── xor.bpmn
        │   ├── xor_1.csv.gz
        │   ├── xor_2.csv.gz
        │   ├── xor_3.csv.gz
        │   ├── xor_5.csv.gz
        │   ├── xor_6.csv.gz
        │   └── xor_7.csv.gz
        ├── configuration_simod_basic.yml
        ├── configuration_simod_with_extraneous.yml
        ├── configuration_simod_with_model.yml
        ├── configuration_simod_with_model_and_batching.yml
        ├── configuration_simod_with_model_and_extraneous.yml
        ├── configuration_simod_with_model_and_prioritization.yml
        ├── control_flow_discovery_output
        │   ├── model-sm2.bpmn
        │   ├── model-sm3.bpmn
        │   ├── model-split_miner_v1.bpmn
        │   └── model-split_miner_v2.bpmn
        ├── data_attributes
        │   ├── case_attributes.csv.gz
        │   ├── event_attribute_1.csv.gz
        │   ├── event_attribute_15.csv.gz
        │   ├── event_attribute_3.csv.gz
        │   ├── event_attribute_5.csv.gz
        │   ├── event_attribute_7.csv.gz
        │   ├── event_attribute_9.csv.gz
        │   ├── global_attribute_1.csv.gz
        │   ├── global_attribute_15.csv.gz
        │   ├── global_attribute_3.csv.gz
        │   ├── global_attribute_5.csv.gz
        │   ├── global_attribute_7.csv.gz
        │   └── global_attribute_9.csv.gz
        ├── model_sequence_self_loop.xes
        ├── model_sequence_self_loop_only_end.xes
        └── process_model_with_SplitMiner_self_loops.bpmn
    ├── conftest.py
    ├── test_batching
        ├── __init__.py
        ├── assets
        │   ├── LoanApp_batch_sim_log.csv
        │   └── event_log_5.csv
        ├── test_batching_discovery.py
        └── test_types.py
    ├── test_bpic15.py
    ├── test_branch_rules
        ├── __init__.py
        └── test_discovery.py
    ├── test_case_attributes
        ├── __init__.py
        └── test_discovery.py
    ├── test_cli.py
    ├── test_control_flow
        ├── __init__.py
        ├── test_discovery.py
        └── test_optimizer.py
    ├── test_data_attributes
        ├── __init__.py
        └── test_discovery.py
    ├── test_event_log
        ├── __init__.py
        ├── test_event_log.py
        └── test_preprocessor.py
    ├── test_metrics.py
    ├── test_prioritization
        ├── __init__.py
        ├── test_prioritization_discovery.py
        └── test_prioritization_impact.py
    ├── test_resource_model
        ├── __init__.py
        └── test_optimizer.py
    ├── test_settings
        ├── __init__.py
        ├── test_control_flow_settings.py
        ├── test_resource_model_settings.py
        └── test_simod_settings.py
    ├── test_simod.py
    ├── test_simulation
        ├── __init__.py
        ├── assets
        │   ├── simulated_log_0.csv
        │   ├── simulated_log_1.csv
        │   ├── simulated_log_2.csv
        │   └── validation_log.csv
        └── test_evaluate_logs.py
    └── test_utilities.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | *
2 | !src
3 | !*.bash
4 | !*.sh
5 | !LICENSE
6 | !*.toml
7 | !*.ini
8 | !*.md
9 | !*.txt


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | outputs
 2 | output
 3 | *.pyc
 4 | *.stats
 5 | venv*
 6 | .venv
 7 | .idea
 8 | .vscode
 9 | *.egg-info
10 | .DS_Store
11 | tests/assets/validation_*
12 | input_files
13 | log/
14 | .pytest_cache/
15 | 
16 | *.dat
17 | *.log
18 | *.out
19 | 
20 | # Sonarqube files and folders
21 | .sonar_lock
22 | .scannerwork
23 | 
24 | .pymon
25 | .benchmarks
26 | 
27 | .coverage*
28 | htmlcov/
29 | 
30 | resources/private
31 | build/
32 | vendor/
33 | benchmarking/*.csv
34 | benchmarking/*.yaml
35 | benchmarking/*.yml
36 | benchmarking/input/logs
37 | benchmarking/results
38 | tests/test_benchmarking/logs
39 | sandbox
40 | dist/


--------------------------------------------------------------------------------
/.java-version:
--------------------------------------------------------------------------------
1 | 1.8
2 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version, and other tools you might need
 8 | build:
 9 |   os: ubuntu-24.04
10 |   tools:
11 |     python: "3.9"
12 | 
13 | # Build documentation in the "docs/" directory with Sphinx
14 | sphinx:
15 |    configuration: docs/source/conf.py
16 | 
17 | # Optionally, but recommended,
18 | # declare the Python requirements required to build your documentation
19 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |    install:
22 |    - requirements: docs/requirements.txt
23 | 


--------------------------------------------------------------------------------
/.sdkmanrc:
--------------------------------------------------------------------------------
1 | # Enable auto-env through the sdkman_auto_env config
2 | # Add key=value pairs of SDKs to use below
3 | java=8.0.382-amzn
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM openjdk:8
 2 | 
 3 | RUN apt-get update && apt-get install -y \
 4 |     python3 \
 5 |     python3-pip \
 6 |     python3-venv
 7 | 
 8 | RUN apt-get clean \
 9 |     && rm -rf /var/lib/apt/lists/* \
10 |     && rm -rf /tmp/* \
11 |     && rm -rf /var/tmp/*
12 | 
13 | WORKDIR /usr/src/Simod
14 | COPY . .
15 | RUN pip install -U pip
16 | RUN pip install poetry
17 | RUN poetry install
18 | 
19 | CMD ["/bin/bash"]
20 | 
21 | # Docker usage example:
22 | # $ docker run --rm -it -v /path/to/resources/:/usr/src/Simod/resources -v /path/to/output:/usr/src/Simod/outputs nokal/simod bash


--------------------------------------------------------------------------------
/ansible/docker.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Update the Docker image
 3 |   hosts: main
 4 | 
 5 |   tasks:
 6 |     - name: Pull the latest Docker image
 7 |       community.docker.docker_image:
 8 |         name: "{{ docker_image }}"
 9 |         source: pull
10 |         pull:
11 |           platform: amd64
12 |       vars:
13 |         docker_image: nokal/simod:latest
14 |       tags: docker


--------------------------------------------------------------------------------
/ansible/experiment.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Set up experiments environment
 3 |   hosts: main
 4 | 
 5 |   tasks:
 6 |     - set_fact:
 7 |         experiment_name: "{{ lookup('pipe', 'date +%Y-%m-%d_%H-%M-%S') }}"
 8 | 
 9 |     - name: Create experiments folder
10 |       ansible.builtin.file:
11 |         path: ~/simod_experiments/{{ experiment_name }}
12 |         state: directory
13 |         mode: 0775
14 | 
15 |     - name: Copy the input folder
16 |       ansible.builtin.copy:
17 |         src: ../benchmarking/input
18 |         dest: ~/simod_experiments/{{ experiment_name }}
19 |         mode: 0775
20 | 
21 |     - name: Copy the Python script
22 |       ansible.builtin.copy:
23 |         src: ../benchmarking/docker_jobs.py
24 |         dest: ~/simod_experiments/{{ experiment_name }}
25 |         mode: 0775
26 | 
27 | - name: Start the experiments
28 |   hosts: main
29 | 
30 |   tasks:
31 |     - name: Start the experiments
32 |       ansible.builtin.command: "~/miniconda3/bin/python docker_jobs.py"
33 |       args:
34 |         chdir: ~/simod_experiments/{{ experiment_name }}
35 | 
36 | #    - name: Copy the Python script
37 | #      ansible.builtin.copy:
38 | #        src: ../benchmarking/docker_collect_results.py
39 | #        dest: ~/simod_experiments/{{ experiment_name }}
40 | #        mode: 0775
41 | 
42 | #    - name: Collect results
43 | #      ansible.builtin.command: "~/miniconda3/bin/python docker_collect_results.py"
44 | #      args:
45 | #        chdir: ~/simod_experiments/{{ experiment_name }}
46 | 


--------------------------------------------------------------------------------
/ansible/inventory.yml:
--------------------------------------------------------------------------------
1 | all:
2 |   hosts:
3 |     main:
4 |       ansible_host: simodtesting.cloud.ut.ee
5 |     hpc:
6 |       ansible_host: rocket.hpc.ut.ee


--------------------------------------------------------------------------------
/ansible/templates/docker_run_cmd.sh.j2:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /usr/src/Simod
4 | source venv/bin/activate
5 | Xvfb :99 &>/dev/null & disown
6 | {{ command }}
7 | 


--------------------------------------------------------------------------------
/ansible/test.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Testing
 3 |   hosts: main
 4 | 
 5 |   tasks:
 6 |     - set_fact:
 7 |         base_dir: /home/ihar/simod_testing
 8 |         experiment_name: "{{ lookup('pipe', 'date +%Y-%m-%d_%H-%M-%S') }}"
 9 |       tags: testing
10 | 
11 |     - name: Create the testing directory
12 |       file:
13 |         path: "{{ base_dir }}/{{ experiment_name }}"
14 |         state: directory
15 |       with_items:
16 |         - "{{ base_dir }}"
17 |         - "{{ base_dir }}/{{ experiment_name }}"
18 |       tags: testing
19 | 
20 |     - name: Prepare Bash script for testing
21 |       template:
22 |         src: templates/docker_run_cmd.sh.j2
23 |         dest: "{{ base_dir }}/{{ experiment_name }}/docker_run.sh"
24 |         mode: 0755
25 |       vars:
26 |         command: pytest -vv --durations=0 -m "not system and not integration"
27 |       tags: testing
28 | 
29 |     - name: Run unit tests in a container
30 |       community.docker.docker_container:
31 |         name: simod_testing
32 |         image: "{{ docker_image }}"
33 |         command: /bin/bash -c "cd /usr/src/Simod/input && ./docker_run.sh"
34 |         volumes:
35 |           - "{{ base_dir }}/{{ experiment_name }}:/usr/src/Simod/input"
36 |           - "{{ base_dir }}/{{ experiment_name }}/output:/usr/src/Simod/outputs"
37 |         state: started
38 |         recreate: yes
39 |         restart_policy: no
40 |         tty: yes
41 |         detach: no
42 |         cleanup: yes
43 |       vars:
44 |         docker_image: nokal/simod:latest
45 |       register: docker_testing
46 |       ignore_errors: yes
47 |       tags: testing


--------------------------------------------------------------------------------
/benchmarking/analyze_results.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | from typing import Optional
 4 | 
 5 | import pandas as pd
 6 | 
 7 | simod_version = "3.6.0"
 8 | results_dir = Path(__file__).parent / Path(f"results/{simod_version}/diff_observed-arrivals")
 9 | 
10 | metric_names_mapping = {
11 |     "absolute_event_distribution": "AED",
12 |     "arrival_event_distribution": "CAR",
13 |     "circadian_event_distribution": "CED",
14 |     "cycle_time_distribution": "CTD",
15 |     "relative_event_distribution": "RED",
16 |     "three_gram_distance": "NGD(3)",
17 |     "two_gram_distance": "NGD",
18 | }
19 | 
20 | event_log_names_mapping = {
21 |     "BPIC_2012_train": "BPIC12",
22 |     "BPIC_2017_train": "BPIC17",
23 |     "CallCenter_train": "CALL",
24 |     "AcademicCredentials_train": "AC_CRE",
25 | }
26 | 
27 | 
28 | @dataclass
29 | class DiscoveryResult:
30 |     result_dir: Path
31 | 
32 |     _evaluation_measures_path: Optional[Path] = None
33 |     _evaluation_measures: Optional[pd.DataFrame] = None
34 |     _simulated_log_paths: Optional[list[Path]] = None
35 |     _name: Optional[str] = None
36 | 
37 |     def __post_init__(self):
38 |         evaluation_dir = self.result_dir / "evaluation"
39 |         self._evaluation_measures_path = next(evaluation_dir.glob("evaluation_*.csv"))
40 |         self._simulated_log_paths = list((evaluation_dir / "simulation").glob("simulated_*.csv"))
41 |         self._name = next(self.result_dir.glob("*.bpmn")).stem
42 |         self._name = event_log_names_mapping[self._name]
43 |         self._evaluation_measures = pd.read_csv(self._evaluation_measures_path).drop(columns=["run_num"])
44 |         self._evaluation_measures["name"] = self._name
45 |         self._rename_column_values("metric", metric_names_mapping)
46 | 
47 |     def _rename_column_values(self, column_name: str, mapping: dict[str, str]):
48 |         self._evaluation_measures[column_name] = self._evaluation_measures[column_name].apply(
49 |             lambda item: mapping[item]
50 |         )
51 | 
52 |     @property
53 |     def evaluation_measures(self) -> pd.DataFrame:
54 |         return self._evaluation_measures
55 | 
56 |     @property
57 |     def mean_evaluation_measures(self) -> pd.DataFrame:
58 |         return self.evaluation_measures.groupby(["metric"]).mean(numeric_only=True).assign(name=self.name).reset_index()
59 | 
60 |     @property
61 |     def name(self) -> str:
62 |         return self._name
63 | 
64 | 
65 | # Current measurements
66 | results = [DiscoveryResult(result_dir / "best_result") for result_dir in results_dir.iterdir() if result_dir.is_dir()]
67 | mean_evaluation_measures = pd.concat([result.mean_evaluation_measures for result in results]).reset_index(drop=True)
68 | mean_evaluation_measures["simod_version"] = simod_version
69 | 
70 | # Save measurements
71 | mean_evaluation_measures.to_csv("measurements.csv", index=False)
72 | 


--------------------------------------------------------------------------------
/benchmarking/input/config.yml:
--------------------------------------------------------------------------------
 1 | version: 4
 2 | common:
 3 |   train_log_path: <train_log_path>
 4 |   test_log_path: <test_log_path>
 5 |   num_final_evaluations: 10
 6 |   evaluation_metrics:
 7 |     - 3_gram_distance
 8 |     - 2_gram_distance
 9 |     - absolute_event_distribution
10 |     - relative_event_distribution
11 |     - circadian_event_distribution
12 |     - arrival_event_distribution
13 |     - cycle_time_distribution
14 |   log_ids:
15 |     case: case_id
16 |     activity: activity
17 |     resource: resource
18 |     start_time: start_time
19 |     end_time: end_time
20 |     enabled_time: enabled_time
21 | preprocessing:
22 |   multitasking: false
23 |   enable_time_concurrency_threshold: 0.5
24 |   concurrency_df: 0.75
25 |   concurrency_l2l: 0.9
26 |   concurrency_l1l: 0.9
27 | control_flow:
28 |   optimization_metric: n_gram_distance
29 |   num_iterations: 30
30 |   num_evaluations_per_iteration: 5
31 |   gateway_probabilities: discovery
32 |   discovery_algorithm: sm1
33 |   epsilon:
34 |     - 0.1
35 |     - 1.0
36 |   eta:
37 |     - 0.2
38 |     - 0.6
39 |   replace_or_joins:
40 |     - true
41 |     - false
42 |   prioritize_parallelism:
43 |     - true
44 |     - false
45 | resource_model:
46 |   optimization_metric: circadian_event_distribution
47 |   num_iterations: 40
48 |   num_evaluations_per_iteration: 5
49 |   resource_profiles:
50 |     discovery_type: differentiated_fuzzy
51 |     granularity:
52 |       - 15
53 |       - 60
54 |     fuzzy_angle:
55 |       - 0.1
56 |       - 0.9
57 | extraneous_activity_delays:
58 |   optimization_metric: relative_event_distribution
59 |   num_iterations: 1
60 | 


--------------------------------------------------------------------------------
/benchmarking/input/config_f_naive.yml:
--------------------------------------------------------------------------------
 1 | version: 4
 2 | common:
 3 |   train_log_path: <train_log_path>
 4 |   test_log_path: <test_log_path>
 5 |   num_final_evaluations: 10
 6 |   evaluation_metrics:
 7 |     - 3_gram_distance
 8 |     - 2_gram_distance
 9 |     - absolute_event_distribution
10 |     - relative_event_distribution
11 |     - circadian_event_distribution
12 |     - arrival_event_distribution
13 |     - cycle_time_distribution
14 |   log_ids:
15 |     case: case_id
16 |     activity: activity
17 |     resource: resource
18 |     start_time: start_time
19 |     end_time: end_time
20 |     enabled_time: enabled_time
21 | preprocessing:
22 |   multitasking: false
23 |   enable_time_concurrency_threshold: 0.5
24 |   concurrency_df: 0.75
25 |   concurrency_l2l: 0.9
26 |   concurrency_l1l: 0.9
27 | control_flow:
28 |   optimization_metric: n_gram_distance
29 |   num_iterations: 30
30 |   num_evaluations_per_iteration: 5
31 |   gateway_probabilities: discovery
32 |   discovery_algorithm: sm1
33 |   epsilon:
34 |     - 0.1
35 |     - 1.0
36 |   eta:
37 |     - 0.2
38 |     - 0.6
39 |   replace_or_joins:
40 |     - true
41 |     - false
42 |   prioritize_parallelism:
43 |     - true
44 |     - false
45 | resource_model:
46 |   optimization_metric: circadian_event_distribution
47 |   num_iterations: 40
48 |   num_evaluations_per_iteration: 5
49 |   resource_profiles:
50 |     discovery_type: differentiated_fuzzy
51 |     granularity:
52 |       - 15
53 |       - 60
54 |     fuzzy_angle:
55 |       - 0.1
56 |       - 0.9
57 | extraneous_activity_delays:
58 |   optimization_metric: relative_event_distribution
59 |   num_iterations: 20
60 |   discovery_method: naive
61 | 


--------------------------------------------------------------------------------
/benchmarking/input/config_no_extraneous.yml:
--------------------------------------------------------------------------------
 1 | version: 4
 2 | common:
 3 |   train_log_path: <train_log_path>
 4 |   test_log_path: <test_log_path>
 5 |   num_final_evaluations: 10
 6 |   evaluation_metrics:
 7 |     - 3_gram_distance
 8 |     - 2_gram_distance
 9 |     - absolute_event_distribution
10 |     - relative_event_distribution
11 |     - circadian_event_distribution
12 |     - arrival_event_distribution
13 |     - cycle_time_distribution
14 |   log_ids:
15 |     case: case_id
16 |     activity: activity
17 |     resource: resource
18 |     start_time: start_time
19 |     end_time: end_time
20 |     enabled_time: enabled_time
21 | preprocessing:
22 |   multitasking: false
23 |   enable_time_concurrency_threshold: 0.5
24 |   concurrency_df: 0.75
25 |   concurrency_l2l: 0.9
26 |   concurrency_l1l: 0.9
27 | control_flow:
28 |   optimization_metric: n_gram_distance
29 |   num_iterations: 30
30 |   num_evaluations_per_iteration: 5
31 |   gateway_probabilities: discovery
32 |   discovery_algorithm: sm1
33 |   epsilon:
34 |     - 0.1
35 |     - 1.0
36 |   eta:
37 |     - 0.2
38 |     - 0.6
39 |   replace_or_joins:
40 |     - true
41 |     - false
42 |   prioritize_parallelism:
43 |     - true
44 |     - false
45 | resource_model:
46 |   optimization_metric: circadian_event_distribution
47 |   num_iterations: 40
48 |   num_evaluations_per_iteration: 5
49 |   resource_profiles:
50 |     discovery_type: differentiated_fuzzy
51 |     granularity:
52 |       - 15
53 |       - 60
54 |     fuzzy_angle:
55 |       - 0.1
56 |       - 0.9
57 | 
58 | 


--------------------------------------------------------------------------------
/benchmarking/input/config_observed_arrivals.yml:
--------------------------------------------------------------------------------
 1 | version: 4
 2 | common:
 3 |   train_log_path: <train_log_path>
 4 |   test_log_path: <test_log_path>
 5 |   num_final_evaluations: 10
 6 |   evaluation_metrics:
 7 |     - 3_gram_distance
 8 |     - 2_gram_distance
 9 |     - absolute_event_distribution
10 |     - relative_event_distribution
11 |     - circadian_event_distribution
12 |     - arrival_event_distribution
13 |     - cycle_time_distribution
14 |   log_ids:
15 |     case: case_id
16 |     activity: activity
17 |     resource: resource
18 |     start_time: start_time
19 |     end_time: end_time
20 |     enabled_time: enabled_time
21 |   use_observed_arrival_distribution: true
22 | preprocessing:
23 |   multitasking: false
24 |   enable_time_concurrency_threshold: 0.5
25 |   concurrency_df: 0.75
26 |   concurrency_l2l: 0.9
27 |   concurrency_l1l: 0.9
28 | control_flow:
29 |   optimization_metric: n_gram_distance
30 |   num_iterations: 30
31 |   num_evaluations_per_iteration: 5
32 |   gateway_probabilities: discovery
33 |   discovery_algorithm: sm1
34 |   epsilon:
35 |     - 0.1
36 |     - 1.0
37 |   eta:
38 |     - 0.2
39 |     - 0.6
40 |   replace_or_joins:
41 |     - true
42 |     - false
43 |   prioritize_parallelism:
44 |     - true
45 |     - false
46 | resource_model:
47 |   optimization_metric: circadian_event_distribution
48 |   num_iterations: 40
49 |   num_evaluations_per_iteration: 5
50 |   resource_profiles:
51 |     discovery_type: differentiated
52 |     granularity:
53 |       - 15
54 |       - 60
55 |     confidence:
56 |       - 0.1
57 |       - 1.0
58 |     support:
59 |       - 0.1
60 |       - 1.0
61 |     participation: 0.4
62 | extraneous_activity_delays:
63 |   optimization_metric: relative_event_distribution
64 |   num_iterations: 1
65 | 


--------------------------------------------------------------------------------
/benchmarking/plot_measurements.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pandas as pd
 4 | import seaborn as sns
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | measurements_path = Path(__file__).parent / "results/measurements.csv"
 8 | 
 9 | df = pd.read_csv(measurements_path).sort_values(by=["metric", "name", "simod_version"])
10 | 
11 | ncols = 4
12 | nrows = df["metric"].nunique() * df["name"].nunique() / ncols
13 | 
14 | fig, axes = plt.subplots(nrows=int(nrows), ncols=int(ncols), figsize=(20, 40))
15 | 
16 | for group_name, group_df in df.groupby(["metric", "name"]):
17 |     metric, name = group_name
18 |     ax = axes.flatten()[list(df["metric"].unique()).index(metric) * 4 + list(df["name"].unique()).index(name)]
19 |     ax.set_title(f"{metric} - {name}")
20 |     sns.barplot(data=group_df, x="simod_version", y="distance", ax=ax)
21 | 
22 | plt.tight_layout()
23 | plt.savefig(Path(__file__).parent / "measurements.png")
24 | 


--------------------------------------------------------------------------------
/benchmarking/preprocess_logs.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def main():
 7 |     base_dir = Path("input/logs")
 8 | 
 9 |     for log_path in base_dir.glob("*.csv.gz"):
10 |         print(log_path)
11 |         df = pd.read_csv(log_path)
12 |         df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
13 |         df.to_csv(log_path, index=False, compression="gzip")
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main()
18 | 


--------------------------------------------------------------------------------
/build_docker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | docker buildx build --platform linux/amd64,linux/arm64 -t nokal/simod -f Dockerfile --push .


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | click==8.1.8
 2 | hyperopt==0.2.7
 3 | lxml==5.3.0
 4 | matplotlib==3.9.4
 5 | networkx==3.2.1
 6 | numpy==1.26.4
 7 | pandas==2.2.3
 8 | pendulum==3.0.0
 9 | pydantic==2.10.6
10 | python-dotenv==1.0.1
11 | python-multipart==0.0.12
12 | pytz==2024.2
13 | PyYAML==6.0.2
14 | requests==2.32.3
15 | scipy==1.13.1
16 | statistics==1.0.3.5
17 | tqdm==4.67.1
18 | xmltodict==0.13.0
19 | prosimos==2.0.6
20 | extraneous-activity-delays==2.2.1
21 | openxes-cli-py==0.1.15
22 | pix-framework==0.13.17
23 | log-distance-measures==2.0.2
24 | sphinx-rtd-theme
25 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_example.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow  #
 3 | # discovery, iii) 20 iterations of resource model (differentiated) discovery, and iv) direct discovery of       #
 4 | # extraneous delays.                                                                                            #
 5 | #################################################################################################################
 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 8 | #################################################################################################################
 9 | version: 5
10 | ##########
11 | # Common #
12 | ##########
13 | common:
14 |   # Path to the event log in CSV format
15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
17 |   log_ids:
18 |     case: "case_id"
19 |     activity: "activity"
20 |     resource: "resource"
21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
22 |     start_time: "start_time"
23 |     end_time: "end_time"
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 20
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Method for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 |   # Discover process model with SplitMiner v3
45 |   mining_algorithm: sm1
46 |   # Number of concurrent relations between events to be captured
47 |   epsilon:
48 |     - 0.05
49 |     - 0.4
50 |   # Threshold for filtering the incoming and outgoing edges
51 |   eta:
52 |     - 0.2
53 |     - 0.7
54 |   # Whether to replace non-trivial OR joins or not
55 |   replace_or_joins:
56 |     - true
57 |     - false
58 |   # Whether to prioritize parallelism over loops or not
59 |   prioritize_parallelism:
60 |     - true
61 |     - false
62 | ##################
63 | # Resource model #
64 | ##################
65 | resource_model:
66 |   # Metric to guide the optimization process (loss function to minimize)
67 |   optimization_metric: circadian_emd
68 |   # Number of optimization iterations over the search space
69 |   num_iterations: 20
70 |   # Number of times to evaluate each iteration (using the mean of all of them)
71 |   num_evaluations_per_iteration: 3
72 |   # Whether to discover prioritization or batching behavior
73 |   discover_prioritization_rules: false
74 |   discover_batching_rules: false
75 |   # Resource profiles configuration
76 |   resource_profiles:
77 |     # Resource profile discovery type
78 |     discovery_type: differentiated
79 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
80 |     granularity: 60
81 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
82 |     confidence:
83 |       - 0.5
84 |       - 0.85
85 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
86 |     support:
87 |       - 0.05
88 |       - 0.5
89 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
90 |     participation: 0.4
91 | #####################
92 | # Extraneous delays #
93 | #####################
94 | extraneous_activity_delays:
95 |   # Method to compute the extraneous delay
96 |   discovery_method: eclipse-aware
97 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
98 |   num_iterations: 1
99 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_example_data_aware.yml:
--------------------------------------------------------------------------------
  1 | #################################################################################################################
  2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 10 iterations of control-flow  #
  3 | # discovery (BPMN model provided) with data-aware decision points, iii) 20 iterations of resource model         #
  4 | # (differentiated) discovery, and iv) no discovery of extraneous delays.                                        #
  5 | #################################################################################################################
  6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
  7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
  8 | #################################################################################################################
  9 | version: 5
 10 | ##########
 11 | # Common #
 12 | ##########
 13 | common:
 14 |   # Path to the event log in CSV format
 15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
 16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
 17 |   log_ids:
 18 |     case: "case_id"
 19 |     activity: "activity"
 20 |     resource: "resource"
 21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
 22 |     start_time: "start_time"
 23 |     end_time: "end_time"
 24 |   # Whether to discover case attributes or not
 25 |   discover_data_attributes: true
 26 | #################
 27 | # Preprocessing #
 28 | #################
 29 | preprocessing:
 30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
 31 |   enable_time_concurrency_threshold: 0.75
 32 | ################
 33 | # Control-flow #
 34 | ################
 35 | control_flow:
 36 |   # Metric to guide the optimization process (loss function to minimize)
 37 |   optimization_metric: two_gram_distance
 38 |   # Number of optimization iterations over the search space
 39 |   num_iterations: 20
 40 |   # Number of times to evaluate each iteration (using the mean of all of them)
 41 |   num_evaluations_per_iteration: 3
 42 |   # Method for discovering gateway probabilities
 43 |   gateway_probabilities: discovery
 44 |   # Discover process model with SplitMiner v3
 45 |   mining_algorithm: sm1
 46 |   # Number of concurrent relations between events to be captured
 47 |   epsilon:
 48 |     - 0.05
 49 |     - 0.4
 50 |   # Threshold for filtering the incoming and outgoing edges
 51 |   eta:
 52 |     - 0.2
 53 |     - 0.7
 54 |   # Whether to replace non-trivial OR joins or not
 55 |   replace_or_joins:
 56 |     - true
 57 |     - false
 58 |   # Whether to prioritize parallelism over loops or not
 59 |   prioritize_parallelism:
 60 |     - true
 61 |     - false
 62 |   # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
 63 |   discover_branch_rules: true
 64 |   # Minimum f-score value to consider the discovered data-aware branching rules
 65 |   f_score:
 66 |     - 0.3
 67 |     - 0.9
 68 | ##################
 69 | # Resource model #
 70 | ##################
 71 | resource_model:
 72 |   # Metric to guide the optimization process (loss function to minimize)
 73 |   optimization_metric: circadian_emd
 74 |   # Number of optimization iterations over the search space
 75 |   num_iterations: 20
 76 |   # Number of times to evaluate each iteration (using the mean of all of them)
 77 |   num_evaluations_per_iteration: 3
 78 |   # Whether to discover prioritization or batching behavior
 79 |   discover_prioritization_rules: false
 80 |   discover_batching_rules: false
 81 |   # Resource profiles configuration
 82 |   resource_profiles:
 83 |     # Resource profile discovery type
 84 |     discovery_type: differentiated
 85 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
 86 |     granularity: 60
 87 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
 88 |     confidence:
 89 |       - 0.5
 90 |       - 0.85
 91 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
 92 |     support:
 93 |       - 0.05
 94 |       - 0.5
 95 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
 96 |     participation: 0.4
 97 | #####################
 98 | # Extraneous delays #
 99 | #####################
100 | extraneous_activity_delays:
101 |   # Method to compute the extraneous delay
102 |   discovery_method: eclipse-aware
103 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
104 |   num_iterations: 1
105 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_example_fuzzy.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow  #
 3 | # discovery, iii) 10 iterations of resource model (fuzzy availability) discovery, and iv) no discovery of       #
 4 | # extraneous delays.                                                                                            #
 5 | #################################################################################################################
 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 8 | #################################################################################################################
 9 | version: 5
10 | ##########
11 | # Common #
12 | ##########
13 | common:
14 |   # Path to the event log in CSV format
15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
17 |   log_ids:
18 |     case: "case_id"
19 |     activity: "activity"
20 |     resource: "resource"
21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
22 |     start_time: "start_time"
23 |     end_time: "end_time"
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 20
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Method for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 |   # Discover process model with SplitMiner v3
45 |   mining_algorithm: sm1
46 |   # Number of concurrent relations between events to be captured
47 |   epsilon:
48 |     - 0.05
49 |     - 0.4
50 |   # Threshold for filtering the incoming and outgoing edges
51 |   eta:
52 |     - 0.2
53 |     - 0.7
54 |   # Whether to replace non-trivial OR joins or not
55 |   replace_or_joins:
56 |     - true
57 |     - false
58 |   # Whether to prioritize parallelism over loops or not
59 |   prioritize_parallelism:
60 |     - true
61 |     - false
62 | ##################
63 | # Resource model #
64 | ##################
65 | resource_model:
66 |   # Metric to guide the optimization process (loss function to minimize)
67 |   optimization_metric: circadian_emd
68 |   # Number of optimization iterations over the search space
69 |   num_iterations: 10
70 |   # Number of times to evaluate each iteration (using the mean of all of them)
71 |   num_evaluations_per_iteration: 3
72 |   # Whether to discover prioritization or batching behavior
73 |   discover_prioritization_rules: false
74 |   discover_batching_rules: false
75 |   # Resource profiles configuration
76 |   resource_profiles:
77 |     # Resource profile discovery type
78 |     discovery_type: differentiated_fuzzy
79 |     # Duration of each granule in the resource calendar that will get its own probability
80 |     granularity: 60
81 |     # Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end)
82 |     fuzzy_angle:
83 |       - 0.1
84 |       - 0.9
85 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_example_with_evaluation.yml:
--------------------------------------------------------------------------------
  1 | #################################################################################################################
  2 | # Same simple configuration as 'configuration_example.yml' but evaluation the quality of the final BPS model    #
  3 | #################################################################################################################
  4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
  5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
  6 | #################################################################################################################
  7 | version: 5
  8 | ##########
  9 | # Common #
 10 | ##########
 11 | common:
 12 |   # Path to the event log in CSV format
 13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
 14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
 15 |   log_ids:
 16 |     case: "case_id"
 17 |     activity: "activity"
 18 |     resource: "resource"
 19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
 20 |     start_time: "start_time"
 21 |     end_time: "end_time"
 22 |   # Event log to evaluate the discovered BPS model with
 23 |   test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz
 24 |   # Number of evaluations of the discovered BPS model
 25 |   num_final_evaluations: 10
 26 |   # Metrics to evaluate the discovered BPS model
 27 |   evaluation_metrics:
 28 |     - 3_gram_distance
 29 |     - 2_gram_distance
 30 |     - absolute_event_distribution
 31 |     - relative_event_distribution
 32 |     - circadian_event_distribution
 33 |     - arrival_event_distribution
 34 |     - cycle_time_distribution
 35 |   # Whether to discover case attributes or not
 36 |   discover_data_attributes: false
 37 | #################
 38 | # Preprocessing #
 39 | #################
 40 | preprocessing:
 41 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
 42 |   enable_time_concurrency_threshold: 0.75
 43 | ################
 44 | # Control-flow #
 45 | ################
 46 | control_flow:
 47 |   # Metric to guide the optimization process (loss function to minimize)
 48 |   optimization_metric: two_gram_distance
 49 |   # Number of optimization iterations over the search space
 50 |   num_iterations: 20
 51 |   # Number of times to evaluate each iteration (using the mean of all of them)
 52 |   num_evaluations_per_iteration: 3
 53 |   # Methods for discovering gateway probabilities
 54 |   gateway_probabilities: discovery
 55 |   # Discover process model with SplitMiner v3
 56 |   mining_algorithm: sm1
 57 |   # Number of concurrent relations between events to be captured
 58 |   epsilon:
 59 |     - 0.05
 60 |     - 0.4
 61 |   # Threshold for filtering the incoming and outgoing edges
 62 |   eta:
 63 |     - 0.2
 64 |     - 0.7
 65 |   # Whether to replace non-trivial OR joins or not
 66 |   replace_or_joins:
 67 |     - true
 68 |     - false
 69 |   # Whether to prioritize parallelism over loops or not
 70 |   prioritize_parallelism:
 71 |     - true
 72 |     - false
 73 | ##################
 74 | # Resource model #
 75 | ##################
 76 | resource_model:
 77 |   # Metric to guide the optimization process (loss function to minimize)
 78 |   optimization_metric: circadian_emd
 79 |   # Number of optimization iterations over the search space
 80 |   num_iterations: 20
 81 |   # Number of times to evaluate each iteration (using the mean of all of them)
 82 |   num_evaluations_per_iteration: 3
 83 |   # Whether to discover prioritization or batching behavior
 84 |   discover_prioritization_rules: false
 85 |   discover_batching_rules: false
 86 |   # Resource profiles configuration
 87 |   resource_profiles:
 88 |     # Resource profile discovery type
 89 |     discovery_type: differentiated
 90 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
 91 |     granularity: 60
 92 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
 93 |     confidence:
 94 |       - 0.5
 95 |       - 0.85
 96 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
 97 |     support:
 98 |       - 0.05
 99 |       - 0.5
100 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
101 |     participation: 0.4
102 | #####################
103 | # Extraneous delays #
104 | #####################
105 | extraneous_activity_delays:
106 |   # Method to compute the extraneous delay
107 |   discovery_method: eclipse-aware
108 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
109 |   num_iterations: 1
110 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_example_with_provided_process_model.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Same simple configuration as 'configuration_example.yml' but providing the BPMN model                         #
 3 | #################################################################################################################
 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 6 | #################################################################################################################
 7 | version: 5
 8 | ##########
 9 | # Common #
10 | ##########
11 | common:
12 |   # Path to the event log in CSV format
13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
15 |   log_ids:
16 |     case: "case_id"
17 |     activity: "activity"
18 |     resource: "resource"
19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
20 |     start_time: "start_time"
21 |     end_time: "end_time"
22 |   # Use this process model and skip its discovery
23 |   process_model_path: ../models/LoanApp_simplified.bpmn
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 1
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Methods for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 | ##################
45 | # Resource model #
46 | ##################
47 | resource_model:
48 |   # Metric to guide the optimization process (loss function to minimize)
49 |   optimization_metric: circadian_emd
50 |   # Number of optimization iterations over the search space
51 |   num_iterations: 20
52 |   # Number of times to evaluate each iteration (using the mean of all of them)
53 |   num_evaluations_per_iteration: 3
54 |   # Whether to discover prioritization or batching behavior
55 |   discover_prioritization_rules: false
56 |   discover_batching_rules: false
57 |   # Resource profiles configuration
58 |   resource_profiles:
59 |     # Resource profile discovery type
60 |     discovery_type: pool
61 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
62 |     granularity: 60
63 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
64 |     confidence:
65 |       - 0.5
66 |       - 0.85
67 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
68 |     support:
69 |       - 0.05
70 |       - 0.5
71 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
72 |     participation: 0.4
73 | #####################
74 | # Extraneous delays #
75 | #####################
76 | extraneous_activity_delays:
77 |   # Method to compute the extraneous delay
78 |   discovery_method: eclipse-aware
79 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
80 |   num_iterations: 1
81 | 


--------------------------------------------------------------------------------
/docs/source/_static/configuration_one_shot.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example for running SIMOD without parameter optimization steps. The defined parameters   #
 3 | # should be individual values and not intervals, as there is no optimization.                                   #
 4 | #################################################################################################################
 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 6 | #################################################################################################################
 7 | version: 5
 8 | ##########
 9 | # Common #
10 | ##########
11 | common:
12 |   # Path to the event log in CSV format
13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
15 |   log_ids:
16 |     case: "case_id"
17 |     activity: "activity"
18 |     resource: "resource"
19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
20 |     start_time: "start_time"
21 |     end_time: "end_time"
22 | ################
23 | # Control-flow #
24 | ################
25 | control_flow:
26 |   # Number of optimization iterations over the search space
27 |   num_iterations: 1
28 |   # Number of times to evaluate each iteration (using the mean of all of them)
29 |   num_evaluations_per_iteration: 1
30 |   # Methods for discovering gateway probabilities
31 |   gateway_probabilities: discovery
32 |   # Discover process model with SplitMiner v3
33 |   mining_algorithm: sm1
34 |   # Number of concurrent relations between events to be captured
35 |   epsilon: 0.3
36 |   # Threshold for filtering the incoming and outgoing edges
37 |   eta: 0.5
38 |   # Whether to replace non-trivial OR joins or not
39 |   replace_or_joins: false
40 |   # Whether to prioritize parallelism over loops or not
41 |   prioritize_parallelism: true
42 | ##################
43 | # Resource model #
44 | ##################
45 | resource_model:
46 |   # Number of optimization iterations over the search space
47 |   num_iterations: 1
48 |   # Number of times to evaluate each iteration (using the mean of all of them)
49 |   num_evaluations_per_iteration: 1
50 |   # Resource profiles configuration
51 |   resource_profiles:
52 |     # Resource profile discovery type
53 |     discovery_type: differentiated
54 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
55 |     granularity: 60
56 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
57 |     confidence: 0.6
58 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
59 |     support: 0.2
60 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
61 |     participation: 0.4
62 | #####################
63 | # Extraneous delays #
64 | #####################
65 | extraneous_activity_delays:
66 |   # Method to compute the extraneous delay
67 |   discovery_method: eclipse-aware
68 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
69 |   num_iterations: 1
70 | 


--------------------------------------------------------------------------------
/docs/source/_static/simod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/docs/source/_static/simod.png


--------------------------------------------------------------------------------
/docs/source/citation.rst:
--------------------------------------------------------------------------------
 1 | Cite the Paper
 2 | ==============
 3 | 
 4 | When using SIMOD for a publication, please cite the following article in you paper:
 5 | 
 6 | `[Citation pending]
 7 | <https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4994518>`_
 8 | 
 9 | More References
10 | ^^^^^^^^^^^^^^^
11 | 
12 | `Camargo, M., Dumas, M., González, O., 2020. "Automated discovery of
13 | business process simulation models from event logs". Decis. Support Syst.
14 | 134, 113284.
15 | <https://doi.org/10.1016/j.dss.2020.113284>`_
16 | 
17 | `Chapela-Campa, D., Dumas, M., 2024. "Enhancing business process
18 | simulation models with extraneous activity delays". Inf. Syst. 122, 102346.
19 | <https://doi.org/10.1016/j.is.2024.102346>`_
20 | 
21 | `Chapela-Campa, D., Benchekroun, I., Baron, O., Dumas, M., Krass, D.,
22 | Senderovich, A., 2025. "A framework for measuring the quality of business
23 | process simulation models". Inf. Syst. 127, 102447.
24 | <https://doi.org/10.1016/j.is.2024.102447>`_
25 | 
26 | `Lashkevich, K., Milani, F., Chapela-Campa, D., Suvorau, I., Dumas, M.,
27 | 2024. "Unveiling the causes of waiting time in business processes from event
28 | logs". Inf. Syst. 126, 102434.
29 | <https://doi.org/10.1016/j.is.2024.102434>`_
30 | 
31 | `López-Pintado, O., Dumas, M., Berx, J., 2024a. "Discovery, simulation, and
32 | optimization of business processes with differentiated resources". Inf. Syst.
33 | 120, 102289.
34 | <https://doi.org/10.1016/j.is.2023.102289>`_
35 | 
36 | `López-Pintado, O., Dumas, M., 2023. "Discovery and simulation of business
37 | processes with probabilistic resource availability calendars", in: Proceedings
38 | of the 5th International Conference on Process Mining (ICPM), IEEE. pp.
39 | 1–8.
40 | <https://doi.org/10.1109/ICPM60904.2023.10271965>`_
41 | 
42 | `López-Pintado, O., Murashko, S., Dumas, M., 2024b. "Discovery and
43 | simulation of data-aware business processes", in: Proceedings of the 6th
44 | International Conference on Process Mining (ICPM), IEEE. pp. 105–112.
45 | <https://doi.org/10.48550/arXiv.2408.13666>`_
46 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'SIMOD'
10 | copyright = '2025, UT Information Systems Research Group'
11 | author = 'UT Information Systems Research Group'
12 | release = '5.1.2'
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | import os
18 | import sys
19 | 
20 | # Get the absolute path of the project's root directory
21 | sys.path.insert(0, os.path.abspath("../../src"))  # Adjust if necessary
22 | 
23 | extensions = [
24 |     "sphinx.ext.napoleon",
25 |     "sphinx.ext.viewcode",
26 |     "sphinx.ext.autosummary",
27 |     "sphinx.ext.intersphinx"
28 | ]
29 | 
30 | intersphinx_mapping = {
31 |     "python": ("https://docs.python.org/3.9", None),
32 |     "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
33 | }
34 | 
35 | templates_path = ['_templates']
36 | exclude_patterns = []
37 | autodoc_class_attributes = False
38 | 
39 | # -- Options for HTML output -------------------------------------------------
40 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
41 | 
42 | html_theme = 'sphinx_rtd_theme'
43 | html_static_path = ['_static']
44 | 
45 | # Automatically generate summaries
46 | autosummary_generate = True
47 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. SIMOD documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan 27 16:09:16 2025.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | SIMOD: Automated discovery of business process simulation models
 7 | ================================================================
 8 | 
 9 | SIMOD combines process mining and machine learning techniques to automate the discovery and tuning of Business Process
10 | Simulation models from event logs extracted from enterprise information systems (ERPs, CRM, case management systems,
11 | etc.). SIMOD takes as input an event log in CSV format, a configuration file, and (optionally) a BPMN process model,
12 | and discovers a business process simulation model that can be simulated using the Prosimos simulator, which is embedded
13 | in SIMOD.
14 | 
15 | 
16 | .. _fig_simod:
17 | .. figure:: _static/simod.png
18 |     :align: center
19 |     :scale: 60%
20 | 
21 |     SIMOD main workflow.
22 | 
23 | 
24 | In its standard workflow, SIMOD receives an event log and a configuration file, and
25 | runs an iterative process to discover the BPS model that bests reflect the behavior captured in the input event log.
26 | This iterative process is designed as a pipeline-based architecture composed of multiple stages that run a
27 | TPE-optimization process to obtain the parameters that lead to the most accurate model.
28 | 
29 | Alternatively, SIMOD can additionally receive as input a BPMN model of the process. In this case, SIMOD skips the
30 | corresponding discovery phase, and builds the BPS model over the input BPMN model.
31 | 
32 | .. note::
33 |    This project is under active development.
34 | 
35 | 
36 | .. toctree::
37 |    :maxdepth: 2
38 |    :caption: Contents:
39 | 
40 |    installation
41 |    usage
42 |    api
43 |    citation
44 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation Guide
 2 | ==================
 3 | 
 4 | This guide provides instructions on how to install SIMOD using **pip** (PyPI) or **Docker**.
 5 | 
 6 | Prerequisites
 7 | -------------
 8 | Before installing SIMOD, ensure you have the following dependencies:
 9 | 
10 | Dependencies for local installation
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 | 
13 | - **Python 3.9, 3.10, or 3.11**: The recommended version (extensively tested) is Python 3.9, however, it also works for
14 |   Python versions 3.10 and 3.11.
15 | - **Java 1.8**: Ensure Java is installed and added to your system’s PATH (e.g.,
16 |   `Java.com <https://www.java.com/en/download/manual.jsp>`_).
17 | - **Rust and Cargo (\*)**: If you are on a system without precompiled dependencies, you may also need to compile Rust
18 |   and Cargo (install them using `rustup.rs <https://rustup.rs/>`_).
19 | 
20 | Dependencies for Docker installation
21 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
22 | 
23 | - **Docker**: If you want to run SIMOD without installing dependencies, you can use the official Docker image (install
24 |   Docker from `https://www.docker.com/get-started/ <https://www.docker.com/get-started/>`_).
25 | 
26 | Installation via PyPI
27 | ---------------------
28 | The simplest way to install SIMOD is via **pip** from PyPI (`simod project <https://pypi.org/project/simod/>`_):
29 | 
30 | .. code-block:: bash
31 | 
32 |    python -m pip install simod
33 | 
34 | Running SIMOD after installation:
35 | 
36 | .. code-block:: bash
37 | 
38 |    simod --help
39 | 
40 | Installation via Docker
41 | -----------------------
42 | If you prefer running SIMOD inside a **Docker container**, in an isolated environment without requiring Python or Java
43 | installations, use the following commands:
44 | 
45 | .. code-block:: bash
46 | 
47 |    docker pull nokal/simod
48 | 
49 | To start a container:
50 | 
51 | .. code-block:: bash
52 | 
53 |    docker run -it -v /path/to/resources/:/usr/src/Simod/resources -v /path/to/output:/usr/src/Simod/outputs nokal/simod bash
54 | 
55 | Use the `resources/` directory to store event logs and configuration files. The `outputs/` directory will contain the
56 | results of SIMOD.
57 | 
58 | From inside the container, you can run SIMOD with:
59 | 
60 | .. code-block:: bash
61 | 
62 |    poetry run simod --help
63 | 
64 | Docker images for different SIMOD versions are available at `https://hub.docker.com/r/nokal/simod/tags <https://hub.docker.com/r/nokal/simod/tags>`_
65 | 
66 | Installation via source code
67 | ----------------------------
68 | If you prefer to download the source code and compile it directly (you would need to have `git`, `python`, and
69 | `poetry` installed), use the following commands:
70 | 
71 | .. code-block:: bash
72 | 
73 |    git clone https://github.com/AutomatedProcessImprovement/Simod.git
74 | 
75 |    cd Simod
76 | 
77 |    python -m venv simod-env
78 | 
79 |    # source ./simod-env/Scripts/activate  # for Linux systems
80 |    .\simod-env\Scripts\activate.bat
81 | 
82 |    poetry install
83 | 
84 | Running SIMOD after installation:
85 | 
86 | .. code-block:: bash
87 | 
88 |    simod --help
89 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage Guide
 2 | ===========
 3 | 
 4 | This guide provides instructions on how to use SIMOD from command line to discover a BPS model out of an event log in
 5 | CSV format.
 6 | 
 7 | Running Simod
 8 | -------------
 9 | 
10 | Once Simod is installed (see `Installation <installation.html>`_), you can run it by specifying a configuration file.
11 | 
12 | Installed via PyPI or source code
13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14 | 
15 | .. code-block:: bash
16 | 
17 |    simod --configuration resources/config/configuration_example.yml
18 | 
19 | Replace `resources/config/configuration_example.yml` with the path to your own configuration file. Paths can be
20 | relative to the configuration file or absolute.
21 | 
22 | 
23 | Installed via Docker
24 | ^^^^^^^^^^^^^^^^^^^^
25 | 
26 | .. code-block:: bash
27 | 
28 |    poetry run simod --configuration resources/config/configuration_example.yml
29 | 
30 | Replace `resources/config/configuration_example.yml` with the path to your own configuration file. Paths can be
31 | relative to the configuration file or absolute.
32 | 
33 | Configuration File
34 | ------------------
35 | The configuration file is a YAML file that specifies various parameters for Simod. Ensure that the path to your event
36 | log is specified in the configuration file. Here are some configuration examples:
37 | 
38 | - Basic configuration to discover the full BPS
39 |   model (`basic <_static/configuration_example.yml>`_).
40 | - Basic configuration to discover the full BPS model using fuzzy (probabilistic) resource
41 |   calendars (`probabilistic <_static/configuration_example_fuzzy.yml>`_).
42 | - Basic configuration to discover the full BPS model with data-aware branching rules
43 |   (`data-aware <_static/configuration_example_data_aware.yml>`_).
44 | - Basic configuration to discover the full BPS model, and evaluate it with a specified event
45 |   log (`with evaluation <_static/configuration_example_with_evaluation.yml>`_).
46 | - Basic configuration to discover a BPS model with a provided BPMN process model as starting
47 |   point (`with BPMN model <_static/configuration_example_with_provided_process_model.yml>`_).
48 | - Basic configuration to discover a BPS model with no optimization process (one-shot)
49 |   (`one-shot <_static/configuration_one_shot.yml>`_).
50 | - Complete configuration example with all the possible
51 |   parameters (`complete config <_static/complete_configuration.yml>`_).
52 | 
53 | Event Log Format
54 | ----------------
55 | Simod takes as input an event log in CSV format.
56 | 
57 | .. _tab_event_log:
58 | .. table:: Sample of input event log format.
59 |     :align: center
60 | 
61 |     =======  ===========  ===================  ===================  ========
62 |     case_id  activity     start_time           end_time             resource
63 |     =======  ===========  ===================  ===================  ========
64 |     512      Create PO    03/11/2021 08:00:00  03/11/2021 08:31:11  DIO
65 |     513      Create PO    03/11/2021 08:34:21  03/11/2021 09:02:09  DIO
66 |     514      Create PO    03/11/2021 09:11:11  03/11/2021 09:49:51  DIO
67 |     512      Approve PO   03/11/2021 12:13:06  03/11/2021 12:44:21  Joseph
68 |     513      Reject PO    03/11/2021 12:30:51  03/11/2021 13:15:50  Jolyne
69 |     514      Approve PO   03/11/2021 12:59:11  03/11/2021 13:32:36  Joseph
70 |     512      Check Stock  03/11/2021 14:22:10  03/11/2021 14:49:22  DIO
71 |     514      Check Stock  03/11/2021 15:11:01  03/11/2021 15:46:12  DIO
72 |     514      Order Goods  04/11/2021 09:46:12  04/11/2021 10:34:23  Joseph
73 |     512      Pack Goods   04/11/2021 10:46:50  04/11/2021 11:18:02  Giorno
74 |     =======  ===========  ===================  ===================  ========
75 | 
76 | The column names can be specified as part of the configuration file (`see here <_static/complete_configuration.yml>`_).
77 | 
78 | Output
79 | ------
80 | Simod discovers a business process simulation model that can be simulated using the
81 | `Prosimos simulator <https://github.com/AutomatedProcessImprovement/Prosimos>`_, which is embedded in Simod.
82 | 
83 | Once SIMOD is finished, the discovered BPS model can be found in the `outputs` directory, under the folder `best_result`.
84 | 


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 | create = true
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "simod"
 7 | version = "5.1.6"
 8 | authors = [
 9 |     "Ihar Suvorau <ihar.suvorau@gmail.com>",
10 |     "David Chapela <david.chapela@ut.ee>",
11 |     "Manuel Camargo <manuel.camargo@ut.ee>",
12 | ]
13 | description = "Simod is a Python tool for automated discovery of business process simulation models from event logs."
14 | readme = "README.md"
15 | packages = [{ include = "simod", from = "src" }]
16 | 
17 | [tool.poetry.dependencies]
18 | python = "^3.9,<3.12"
19 | click = "^8.1.3"
20 | hyperopt = "^0.2.7"
21 | lxml = "^5.3.0"
22 | matplotlib = "^3.6.0"
23 | networkx = "^3.2.1"
24 | numpy = "^1.24.23"
25 | pandas = "^2.1.0"
26 | pendulum = "^3.0.0"
27 | pydantic = "^2.3.0"
28 | python-dotenv = "^1.0.0"
29 | python-multipart = "^0.0.12"
30 | pytz = "^2024.2"
31 | PyYAML = "^6.0"
32 | requests = "^2.28.2"
33 | scipy = "^1.13.0"
34 | statistics = "^1.0.3.5"
35 | tqdm = "^4.64.1"
36 | xmltodict = "^0.13.0"
37 | prosimos = "^2.0.6"
38 | extraneous-activity-delays = "^2.1.21"
39 | openxes-cli-py = "^0.1.15"
40 | pix-framework = "^0.13.17"
41 | log-distance-measures = "^2.0.0"
42 | 
43 | [tool.poetry.group.dev.dependencies]
44 | pytest = "^7.1.3"
45 | pytest-cov = "^4"
46 | memory-profiler = "^0.61.0"
47 | pylint = "^2.17.4"
48 | setuptools = "^67.8.0"
49 | seaborn = "^0.12.2"
50 | pytest-benchmark = "^4.0.0"
51 | snakeviz = "^2.2.0"
52 | 
53 | [tool.poetry.scripts]
54 | simod = "simod.cli:main"
55 | 
56 | [tool.ruff]
57 | line-length = 120
58 | 
59 | [tool.black]
60 | line-length = 120
61 | 
62 | [tool.pytest.ini_options]
63 | markers = ["integration", "system", "manual", "benchmark", "smoke"]
64 | 


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_diff.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | ##########
 3 | # Common #
 4 | ##########
 5 | common:
 6 |   # Path to the event log in CSV format
 7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
 8 |   # Event log to evaluate the discovered BPS model with
 9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10 |   # Use observed arrival distributions
11 |   use_observed_arrival_distribution: false
12 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
13 |   log_ids:
14 |     case: "case_id"
15 |     activity: "activity"
16 |     resource: "resource"
17 |     start_time: "start_time"
18 |     end_time: "end_time"
19 |   # Number of evaluations of the discovered BPS model
20 |   num_final_evaluations: 10
21 |   # Metrics to evaluate the discovered BPS model
22 |   evaluation_metrics:
23 |     - 3_gram_distance
24 |     - 2_gram_distance
25 |     - absolute_event_distribution
26 |     - relative_event_distribution
27 |     - circadian_event_distribution
28 |     - arrival_event_distribution
29 |     - cycle_time_distribution
30 |   # Whether to discover case attributes or not
31 |   discover_data_attributes: false
32 | #################
33 | # Preprocessing #
34 | #################
35 | preprocessing:
36 |   multitasking: false
37 |   enable_time_concurrency_threshold: 0.5
38 | ################
39 | # Control-flow #
40 | ################
41 | control_flow:
42 |   # Metric to guide the optimization process (loss function to minimize)
43 |   optimization_metric: two_gram_distance
44 |   # Number of optimization iterations over the search space
45 |   num_iterations: 30
46 |   # Number of times to evaluate each iteration (using the mean of all of them)
47 |   num_evaluations_per_iteration: 5
48 |   # Methods for discovering gateway probabilities
49 |   gateway_probabilities: discovery
50 |   # Discover process model with SplitMiner v3
51 |   mining_algorithm: sm1
52 |   # Number of concurrent relations between events to be captured
53 |   epsilon:
54 |     - 0.05
55 |     - 0.4
56 |   # Threshold for filtering the incoming and outgoing edges
57 |   eta:
58 |     - 0.2
59 |     - 0.7
60 |   # Whether to replace non-trivial OR joins or not
61 |   replace_or_joins:
62 |     - true
63 |     - false
64 |   # Whether to prioritize parallelism over loops or not
65 |   prioritize_parallelism: true
66 | ##################
67 | # Resource model #
68 | ##################
69 | resource_model:
70 |   # Metric to guide the optimization process (loss function to minimize)
71 |   optimization_metric: circadian_emd
72 |   # Number of optimization iterations over the search space
73 |   num_iterations: 40
74 |   # Number of times to evaluate each iteration (using the mean of all of them)
75 |   num_evaluations_per_iteration: 5
76 |   # Whether to discover prioritization or batching behavior
77 |   discover_prioritization_rules: false
78 |   discover_batching_rules: false
79 |   # Resource profiles configuration
80 |   resource_profiles:
81 |     # Resource profile discovery type
82 |     discovery_type: differentiated
83 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
84 |     granularity: 60
85 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
86 |     confidence:
87 |       - 0.5
88 |       - 0.85
89 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
90 |     support:
91 |       - 0.05
92 |       - 0.5
93 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
94 |     participation: 0.4


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_diff_data_aware.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | ##########
 3 | # Common #
 4 | ##########
 5 | common:
 6 |   # Path to the event log in CSV format
 7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
 8 |   # Event log to evaluate the discovered BPS model with
 9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
11 |   log_ids:
12 |     case: "case_id"
13 |     activity: "activity"
14 |     resource: "resource"
15 |     start_time: "start_time"
16 |     end_time: "end_time"
17 |   # Number of evaluations of the discovered BPS model
18 |   num_final_evaluations: 10
19 |   # Metrics to evaluate the discovered BPS model
20 |   evaluation_metrics:
21 |     - 3_gram_distance
22 |     - 2_gram_distance
23 |     - absolute_event_distribution
24 |     - relative_event_distribution
25 |     - circadian_event_distribution
26 |     - arrival_event_distribution
27 |     - cycle_time_distribution
28 |   # Whether to discover case attributes or not
29 |   discover_data_attributes: true
30 | #################
31 | # Preprocessing #
32 | #################
33 | preprocessing:
34 |   multitasking: false
35 |   enable_time_concurrency_threshold: 0.5
36 | ################
37 | # Control-flow #
38 | ################
39 | control_flow:
40 |   # Metric to guide the optimization process (loss function to minimize)
41 |   optimization_metric: two_gram_distance
42 |   # Number of optimization iterations over the search space
43 |   num_iterations: 30
44 |   # Number of times to evaluate each iteration (using the mean of all of them)
45 |   num_evaluations_per_iteration: 5
46 |   # Methods for discovering gateway probabilities
47 |   gateway_probabilities: discovery
48 |   # Discover process model with SplitMiner v3
49 |   mining_algorithm: sm1
50 |   # Number of concurrent relations between events to be captured
51 |   epsilon:
52 |     - 0.05
53 |     - 0.4
54 |   # Threshold for filtering the incoming and outgoing edges
55 |   eta:
56 |     - 0.2
57 |     - 0.7
58 |   # Whether to replace non-trivial OR joins or not
59 |   replace_or_joins:
60 |     - true
61 |     - false
62 |   # Whether to prioritize parallelism over loops or not
63 |   prioritize_parallelism: true
64 |   # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
65 |   discover_branch_rules: true
66 |   # Minimum f-score value to consider the discovered data-aware branching rules
67 |   f_score:
68 |     - 0.3
69 |     - 0.9
70 | ##################
71 | # Resource model #
72 | ##################
73 | resource_model:
74 |   # Metric to guide the optimization process (loss function to minimize)
75 |   optimization_metric: circadian_emd
76 |   # Number of optimization iterations over the search space
77 |   num_iterations: 40
78 |   # Number of times to evaluate each iteration (using the mean of all of them)
79 |   num_evaluations_per_iteration: 5
80 |   # Whether to discover prioritization or batching behavior
81 |   discover_prioritization_rules: false
82 |   discover_batching_rules: false
83 |   # Resource profiles configuration
84 |   resource_profiles:
85 |     # Resource profile discovery type
86 |     discovery_type: differentiated
87 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
88 |     granularity: 60
89 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
90 |     confidence:
91 |       - 0.5
92 |       - 0.85
93 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
94 |     support:
95 |       - 0.05
96 |       - 0.5
97 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
98 |     participation: 0.4


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_diff_extr.yml:
--------------------------------------------------------------------------------
  1 | version: 5
  2 | ##########
  3 | # Common #
  4 | ##########
  5 | common:
  6 |   # Path to the event log in CSV format
  7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
  8 |   # Event log to evaluate the discovered BPS model with
  9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
 10 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
 11 |   log_ids:
 12 |     case: "case_id"
 13 |     activity: "activity"
 14 |     resource: "resource"
 15 |     start_time: "start_time"
 16 |     end_time: "end_time"
 17 |   # Number of evaluations of the discovered BPS model
 18 |   num_final_evaluations: 10
 19 |   # Metrics to evaluate the discovered BPS model
 20 |   evaluation_metrics:
 21 |     - 3_gram_distance
 22 |     - 2_gram_distance
 23 |     - absolute_event_distribution
 24 |     - relative_event_distribution
 25 |     - circadian_event_distribution
 26 |     - arrival_event_distribution
 27 |     - cycle_time_distribution
 28 |   # Whether to discover case attributes or not
 29 |   discover_data_attributes: false
 30 | #################
 31 | # Preprocessing #
 32 | #################
 33 | preprocessing:
 34 |   multitasking: false
 35 |   enable_time_concurrency_threshold: 0.5
 36 | ################
 37 | # Control-flow #
 38 | ################
 39 | control_flow:
 40 |   # Metric to guide the optimization process (loss function to minimize)
 41 |   optimization_metric: two_gram_distance
 42 |   # Number of optimization iterations over the search space
 43 |   num_iterations: 30
 44 |   # Number of times to evaluate each iteration (using the mean of all of them)
 45 |   num_evaluations_per_iteration: 5
 46 |   # Methods for discovering gateway probabilities
 47 |   gateway_probabilities: discovery
 48 |   # Discover process model with SplitMiner v3
 49 |   mining_algorithm: sm1
 50 |   # Number of concurrent relations between events to be captured
 51 |   epsilon:
 52 |     - 0.05
 53 |     - 0.4
 54 |   # Threshold for filtering the incoming and outgoing edges
 55 |   eta:
 56 |     - 0.2
 57 |     - 0.7
 58 |   # Whether to replace non-trivial OR joins or not
 59 |   replace_or_joins:
 60 |     - true
 61 |     - false
 62 |   # Whether to prioritize parallelism over loops or not
 63 |   prioritize_parallelism: true
 64 | ##################
 65 | # Resource model #
 66 | ##################
 67 | resource_model:
 68 |   # Metric to guide the optimization process (loss function to minimize)
 69 |   optimization_metric: circadian_emd
 70 |   # Number of optimization iterations over the search space
 71 |   num_iterations: 40
 72 |   # Number of times to evaluate each iteration (using the mean of all of them)
 73 |   num_evaluations_per_iteration: 5
 74 |   # Whether to discover prioritization or batching behavior
 75 |   discover_prioritization_rules: false
 76 |   discover_batching_rules: false
 77 |   # Resource profiles configuration
 78 |   resource_profiles:
 79 |     # Resource profile discovery type
 80 |     discovery_type: differentiated
 81 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
 82 |     granularity: 60
 83 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
 84 |     confidence:
 85 |       - 0.5
 86 |       - 0.85
 87 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
 88 |     support:
 89 |       - 0.05
 90 |       - 0.5
 91 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
 92 |     participation: 0.4
 93 | #####################
 94 | # Extraneous delays #
 95 | #####################
 96 | extraneous_activity_delays:
 97 |   # Method to compute the extraneous delay (naive or eclipse-aware)
 98 |   discovery_method: eclipse-aware
 99 |   # Metric to guide the optimization process (loss function to minimize)
100 |   optimization_metric: relative_emd
101 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
102 |   num_iterations: 20
103 | 


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_fuzz.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | ##########
 3 | # Common #
 4 | ##########
 5 | common:
 6 |   # Path to the event log in CSV format
 7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
 8 |   # Event log to evaluate the discovered BPS model with
 9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
11 |   log_ids:
12 |     case: "case_id"
13 |     activity: "activity"
14 |     resource: "resource"
15 |     start_time: "start_time"
16 |     end_time: "end_time"
17 |   # Number of evaluations of the discovered BPS model
18 |   num_final_evaluations: 10
19 |   # Metrics to evaluate the discovered BPS model
20 |   evaluation_metrics:
21 |     - 3_gram_distance
22 |     - 2_gram_distance
23 |     - absolute_event_distribution
24 |     - relative_event_distribution
25 |     - circadian_event_distribution
26 |     - arrival_event_distribution
27 |     - cycle_time_distribution
28 |   # Whether to discover case attributes or not
29 |   discover_data_attributes: false
30 | #################
31 | # Preprocessing #
32 | #################
33 | preprocessing:
34 |   multitasking: false
35 |   enable_time_concurrency_threshold: 0.5
36 | ################
37 | # Control-flow #
38 | ################
39 | control_flow:
40 |   # Metric to guide the optimization process (loss function to minimize)
41 |   optimization_metric: two_gram_distance
42 |   # Number of optimization iterations over the search space
43 |   num_iterations: 30
44 |   # Number of times to evaluate each iteration (using the mean of all of them)
45 |   num_evaluations_per_iteration: 5
46 |   # Methods for discovering gateway probabilities
47 |   gateway_probabilities: discovery
48 |   # Discover process model with SplitMiner v3
49 |   mining_algorithm: sm1
50 |   # Number of concurrent relations between events to be captured
51 |   epsilon:
52 |     - 0.05
53 |     - 0.4
54 |   # Threshold for filtering the incoming and outgoing edges
55 |   eta:
56 |     - 0.2
57 |     - 0.7
58 |   # Whether to replace non-trivial OR joins or not
59 |   replace_or_joins:
60 |     - true
61 |     - false
62 |   # Whether to prioritize parallelism over loops or not
63 |   prioritize_parallelism: true
64 | ##################
65 | # Resource model #
66 | ##################
67 | resource_model:
68 |   # Metric to guide the optimization process (loss function to minimize)
69 |   optimization_metric: circadian_emd
70 |   # Number of optimization iterations over the search space
71 |   num_iterations: 40
72 |   # Number of times to evaluate each iteration (using the mean of all of them)
73 |   num_evaluations_per_iteration: 5
74 |   # Whether to discover prioritization or batching behavior
75 |   discover_prioritization_rules: false
76 |   discover_batching_rules: false
77 |   # Resource profiles configuration
78 |   resource_profiles:
79 |     # Resource profile discovery type
80 |     discovery_type: differentiated_fuzzy
81 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
82 |     granularity: 60
83 |     fuzzy_angle:
84 |       - 0.1
85 |       - 0.9
86 | 


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_fuzz_extr.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | ##########
 3 | # Common #
 4 | ##########
 5 | common:
 6 |   # Path to the event log in CSV format
 7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
 8 |   # Event log to evaluate the discovered BPS model with
 9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
11 |   log_ids:
12 |     case: "case_id"
13 |     activity: "activity"
14 |     resource: "resource"
15 |     start_time: "start_time"
16 |     end_time: "end_time"
17 |   # Number of evaluations of the discovered BPS model
18 |   num_final_evaluations: 10
19 |   # Metrics to evaluate the discovered BPS model
20 |   evaluation_metrics:
21 |     - 3_gram_distance
22 |     - 2_gram_distance
23 |     - absolute_event_distribution
24 |     - relative_event_distribution
25 |     - circadian_event_distribution
26 |     - arrival_event_distribution
27 |     - cycle_time_distribution
28 |   # Whether to discover case attributes or not
29 |   discover_data_attributes: false
30 | #################
31 | # Preprocessing #
32 | #################
33 | preprocessing:
34 |   multitasking: false
35 |   enable_time_concurrency_threshold: 0.5
36 | ################
37 | # Control-flow #
38 | ################
39 | control_flow:
40 |   # Metric to guide the optimization process (loss function to minimize)
41 |   optimization_metric: two_gram_distance
42 |   # Number of optimization iterations over the search space
43 |   num_iterations: 30
44 |   # Number of times to evaluate each iteration (using the mean of all of them)
45 |   num_evaluations_per_iteration: 5
46 |   # Methods for discovering gateway probabilities
47 |   gateway_probabilities: discovery
48 |   # Discover process model with SplitMiner v3
49 |   mining_algorithm: sm1
50 |   # Number of concurrent relations between events to be captured
51 |   epsilon:
52 |     - 0.05
53 |     - 0.4
54 |   # Threshold for filtering the incoming and outgoing edges
55 |   eta:
56 |     - 0.2
57 |     - 0.7
58 |   # Whether to replace non-trivial OR joins or not
59 |   replace_or_joins:
60 |     - true
61 |     - false
62 |   # Whether to prioritize parallelism over loops or not
63 |   prioritize_parallelism: true
64 | ##################
65 | # Resource model #
66 | ##################
67 | resource_model:
68 |   # Metric to guide the optimization process (loss function to minimize)
69 |   optimization_metric: circadian_emd
70 |   # Number of optimization iterations over the search space
71 |   num_iterations: 40
72 |   # Number of times to evaluate each iteration (using the mean of all of them)
73 |   num_evaluations_per_iteration: 5
74 |   # Whether to discover prioritization or batching behavior
75 |   discover_prioritization_rules: false
76 |   discover_batching_rules: false
77 |   # Resource profiles configuration
78 |   resource_profiles:
79 |     # Resource profile discovery type
80 |     discovery_type: differentiated_fuzzy
81 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
82 |     granularity: 60
83 |     fuzzy_angle:
84 |       - 0.1
85 |       - 0.9
86 | #####################
87 | # Extraneous delays #
88 | #####################
89 | extraneous_activity_delays:
90 |   # Method to compute the extraneous delay (naive or eclipse-aware)
91 |   discovery_method: eclipse-aware
92 |   # Metric to guide the optimization process (loss function to minimize)
93 |   optimization_metric: relative_emd
94 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
95 |   num_iterations: 20
96 | 


--------------------------------------------------------------------------------
/resources/config/benchmark/benchmark_pool.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | ##########
 3 | # Common #
 4 | ##########
 5 | common:
 6 |   # Path to the event log in CSV format
 7 |   train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
 8 |   # Event log to evaluate the discovered BPS model with
 9 |   test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
11 |   log_ids:
12 |     case: "case_id"
13 |     activity: "activity"
14 |     resource: "resource"
15 |     start_time: "start_time"
16 |     end_time: "end_time"
17 |   # Number of evaluations of the discovered BPS model
18 |   num_final_evaluations: 10
19 |   # Metrics to evaluate the discovered BPS model
20 |   evaluation_metrics:
21 |     - 3_gram_distance
22 |     - 2_gram_distance
23 |     - absolute_event_distribution
24 |     - relative_event_distribution
25 |     - circadian_event_distribution
26 |     - arrival_event_distribution
27 |     - cycle_time_distribution
28 |   # Whether to discover case attributes or not
29 |   discover_data_attributes: false
30 | #################
31 | # Preprocessing #
32 | #################
33 | preprocessing:
34 |   multitasking: false
35 |   enable_time_concurrency_threshold: 0.5
36 | ################
37 | # Control-flow #
38 | ################
39 | control_flow:
40 |   # Metric to guide the optimization process (loss function to minimize)
41 |   optimization_metric: two_gram_distance
42 |   # Number of optimization iterations over the search space
43 |   num_iterations: 30
44 |   # Number of times to evaluate each iteration (using the mean of all of them)
45 |   num_evaluations_per_iteration: 5
46 |   # Methods for discovering gateway probabilities
47 |   gateway_probabilities: discovery
48 |   # Discover process model with SplitMiner v3
49 |   mining_algorithm: sm1
50 |   # Number of concurrent relations between events to be captured
51 |   epsilon:
52 |     - 0.05
53 |     - 0.4
54 |   # Threshold for filtering the incoming and outgoing edges
55 |   eta:
56 |     - 0.2
57 |     - 0.7
58 |   # Whether to replace non-trivial OR joins or not
59 |   replace_or_joins:
60 |     - true
61 |     - false
62 |   # Whether to prioritize parallelism over loops or not
63 |   prioritize_parallelism: true
64 | ##################
65 | # Resource model #
66 | ##################
67 | resource_model:
68 |   # Metric to guide the optimization process (loss function to minimize)
69 |   optimization_metric: circadian_emd
70 |   # Number of optimization iterations over the search space
71 |   num_iterations: 40
72 |   # Number of times to evaluate each iteration (using the mean of all of them)
73 |   num_evaluations_per_iteration: 5
74 |   # Whether to discover prioritization or batching behavior
75 |   discover_prioritization_rules: false
76 |   discover_batching_rules: false
77 |   # Resource profiles configuration
78 |   resource_profiles:
79 |     # Resource profile discovery type
80 |     discovery_type: pool
81 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
82 |     granularity: 60
83 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
84 |     confidence:
85 |       - 0.5
86 |       - 0.85
87 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
88 |     support:
89 |       - 0.05
90 |       - 0.5
91 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
92 |     participation: 0.4
93 | 


--------------------------------------------------------------------------------
/resources/config/configuration_example.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow  #
 3 | # discovery, iii) 20 iterations of resource model (differentiated) discovery, and iv) direct discovery of       #
 4 | # extraneous delays.                                                                                            #
 5 | #################################################################################################################
 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 8 | #################################################################################################################
 9 | version: 5
10 | ##########
11 | # Common #
12 | ##########
13 | common:
14 |   # Path to the event log in CSV format
15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
17 |   log_ids:
18 |     case: "case_id"
19 |     activity: "activity"
20 |     resource: "resource"
21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
22 |     start_time: "start_time"
23 |     end_time: "end_time"
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 20
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Method for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 |   # Discover process model with SplitMiner v3
45 |   mining_algorithm: sm1
46 |   # Number of concurrent relations between events to be captured
47 |   epsilon:
48 |     - 0.05
49 |     - 0.4
50 |   # Threshold for filtering the incoming and outgoing edges
51 |   eta:
52 |     - 0.2
53 |     - 0.7
54 |   # Whether to replace non-trivial OR joins or not
55 |   replace_or_joins:
56 |     - true
57 |     - false
58 |   # Whether to prioritize parallelism over loops or not
59 |   prioritize_parallelism:
60 |     - true
61 |     - false
62 | ##################
63 | # Resource model #
64 | ##################
65 | resource_model:
66 |   # Metric to guide the optimization process (loss function to minimize)
67 |   optimization_metric: circadian_emd
68 |   # Number of optimization iterations over the search space
69 |   num_iterations: 20
70 |   # Number of times to evaluate each iteration (using the mean of all of them)
71 |   num_evaluations_per_iteration: 3
72 |   # Whether to discover prioritization or batching behavior
73 |   discover_prioritization_rules: false
74 |   discover_batching_rules: false
75 |   # Resource profiles configuration
76 |   resource_profiles:
77 |     # Resource profile discovery type
78 |     discovery_type: differentiated
79 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
80 |     granularity: 60
81 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
82 |     confidence:
83 |       - 0.5
84 |       - 0.85
85 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
86 |     support:
87 |       - 0.05
88 |       - 0.5
89 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
90 |     participation: 0.4
91 | #####################
92 | # Extraneous delays #
93 | #####################
94 | extraneous_activity_delays:
95 |   # Method to compute the extraneous delay
96 |   discovery_method: eclipse-aware
97 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
98 |   num_iterations: 1
99 | 


--------------------------------------------------------------------------------
/resources/config/configuration_example_data_aware.yml:
--------------------------------------------------------------------------------
  1 | #################################################################################################################
  2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 10 iterations of control-flow  #
  3 | # discovery (BPMN model provided) with data-aware decision points, iii) 20 iterations of resource model         #
  4 | # (differentiated) discovery, and iv) no discovery of extraneous delays.                                        #
  5 | #################################################################################################################
  6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
  7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
  8 | #################################################################################################################
  9 | version: 5
 10 | ##########
 11 | # Common #
 12 | ##########
 13 | common:
 14 |   # Path to the event log in CSV format
 15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
 16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
 17 |   log_ids:
 18 |     case: "case_id"
 19 |     activity: "activity"
 20 |     resource: "resource"
 21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
 22 |     start_time: "start_time"
 23 |     end_time: "end_time"
 24 |   # Whether to discover case attributes or not
 25 |   discover_data_attributes: true
 26 | #################
 27 | # Preprocessing #
 28 | #################
 29 | preprocessing:
 30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
 31 |   enable_time_concurrency_threshold: 0.75
 32 | ################
 33 | # Control-flow #
 34 | ################
 35 | control_flow:
 36 |   # Metric to guide the optimization process (loss function to minimize)
 37 |   optimization_metric: two_gram_distance
 38 |   # Number of optimization iterations over the search space
 39 |   num_iterations: 20
 40 |   # Number of times to evaluate each iteration (using the mean of all of them)
 41 |   num_evaluations_per_iteration: 3
 42 |   # Method for discovering gateway probabilities
 43 |   gateway_probabilities: discovery
 44 |   # Discover process model with SplitMiner v3
 45 |   mining_algorithm: sm1
 46 |   # Number of concurrent relations between events to be captured
 47 |   epsilon:
 48 |     - 0.05
 49 |     - 0.4
 50 |   # Threshold for filtering the incoming and outgoing edges
 51 |   eta:
 52 |     - 0.2
 53 |     - 0.7
 54 |   # Whether to replace non-trivial OR joins or not
 55 |   replace_or_joins:
 56 |     - true
 57 |     - false
 58 |   # Whether to prioritize parallelism over loops or not
 59 |   prioritize_parallelism:
 60 |     - true
 61 |     - false
 62 |   # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
 63 |   discover_branch_rules: true
 64 |   # Minimum f-score value to consider the discovered data-aware branching rules
 65 |   f_score:
 66 |     - 0.3
 67 |     - 0.9
 68 | ##################
 69 | # Resource model #
 70 | ##################
 71 | resource_model:
 72 |   # Metric to guide the optimization process (loss function to minimize)
 73 |   optimization_metric: circadian_emd
 74 |   # Number of optimization iterations over the search space
 75 |   num_iterations: 20
 76 |   # Number of times to evaluate each iteration (using the mean of all of them)
 77 |   num_evaluations_per_iteration: 3
 78 |   # Whether to discover prioritization or batching behavior
 79 |   discover_prioritization_rules: false
 80 |   discover_batching_rules: false
 81 |   # Resource profiles configuration
 82 |   resource_profiles:
 83 |     # Resource profile discovery type
 84 |     discovery_type: differentiated
 85 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
 86 |     granularity: 60
 87 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
 88 |     confidence:
 89 |       - 0.5
 90 |       - 0.85
 91 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
 92 |     support:
 93 |       - 0.05
 94 |       - 0.5
 95 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
 96 |     participation: 0.4
 97 | #####################
 98 | # Extraneous delays #
 99 | #####################
100 | extraneous_activity_delays:
101 |   # Method to compute the extraneous delay
102 |   discovery_method: eclipse-aware
103 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
104 |   num_iterations: 1
105 | 


--------------------------------------------------------------------------------
/resources/config/configuration_example_fuzzy.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow  #
 3 | # discovery, iii) 10 iterations of resource model (fuzzy availability) discovery, and iv) no discovery of       #
 4 | # extraneous delays.                                                                                            #
 5 | #################################################################################################################
 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 8 | #################################################################################################################
 9 | version: 5
10 | ##########
11 | # Common #
12 | ##########
13 | common:
14 |   # Path to the event log in CSV format
15 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
16 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
17 |   log_ids:
18 |     case: "case_id"
19 |     activity: "activity"
20 |     resource: "resource"
21 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
22 |     start_time: "start_time"
23 |     end_time: "end_time"
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 20
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Method for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 |   # Discover process model with SplitMiner v3
45 |   mining_algorithm: sm1
46 |   # Number of concurrent relations between events to be captured
47 |   epsilon:
48 |     - 0.05
49 |     - 0.4
50 |   # Threshold for filtering the incoming and outgoing edges
51 |   eta:
52 |     - 0.2
53 |     - 0.7
54 |   # Whether to replace non-trivial OR joins or not
55 |   replace_or_joins:
56 |     - true
57 |     - false
58 |   # Whether to prioritize parallelism over loops or not
59 |   prioritize_parallelism:
60 |     - true
61 |     - false
62 | ##################
63 | # Resource model #
64 | ##################
65 | resource_model:
66 |   # Metric to guide the optimization process (loss function to minimize)
67 |   optimization_metric: circadian_emd
68 |   # Number of optimization iterations over the search space
69 |   num_iterations: 10
70 |   # Number of times to evaluate each iteration (using the mean of all of them)
71 |   num_evaluations_per_iteration: 3
72 |   # Whether to discover prioritization or batching behavior
73 |   discover_prioritization_rules: false
74 |   discover_batching_rules: false
75 |   # Resource profiles configuration
76 |   resource_profiles:
77 |     # Resource profile discovery type
78 |     discovery_type: differentiated_fuzzy
79 |     # Duration of each granule in the resource calendar that will get its own probability
80 |     granularity: 60
81 |     # Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end)
82 |     fuzzy_angle:
83 |       - 0.1
84 |       - 0.9
85 | 


--------------------------------------------------------------------------------
/resources/config/configuration_example_with_evaluation.yml:
--------------------------------------------------------------------------------
  1 | #################################################################################################################
  2 | # Same simple configuration as 'configuration_example.yml' but evaluation the quality of the final BPS model    #
  3 | #################################################################################################################
  4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
  5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
  6 | #################################################################################################################
  7 | version: 5
  8 | ##########
  9 | # Common #
 10 | ##########
 11 | common:
 12 |   # Path to the event log in CSV format
 13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
 14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
 15 |   log_ids:
 16 |     case: "case_id"
 17 |     activity: "activity"
 18 |     resource: "resource"
 19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
 20 |     start_time: "start_time"
 21 |     end_time: "end_time"
 22 |   # Event log to evaluate the discovered BPS model with
 23 |   test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz
 24 |   # Number of evaluations of the discovered BPS model
 25 |   num_final_evaluations: 10
 26 |   # Metrics to evaluate the discovered BPS model
 27 |   evaluation_metrics:
 28 |     - 3_gram_distance
 29 |     - 2_gram_distance
 30 |     - absolute_event_distribution
 31 |     - relative_event_distribution
 32 |     - circadian_event_distribution
 33 |     - arrival_event_distribution
 34 |     - cycle_time_distribution
 35 |   # Whether to discover case attributes or not
 36 |   discover_data_attributes: false
 37 | #################
 38 | # Preprocessing #
 39 | #################
 40 | preprocessing:
 41 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
 42 |   enable_time_concurrency_threshold: 0.75
 43 | ################
 44 | # Control-flow #
 45 | ################
 46 | control_flow:
 47 |   # Metric to guide the optimization process (loss function to minimize)
 48 |   optimization_metric: two_gram_distance
 49 |   # Number of optimization iterations over the search space
 50 |   num_iterations: 20
 51 |   # Number of times to evaluate each iteration (using the mean of all of them)
 52 |   num_evaluations_per_iteration: 3
 53 |   # Methods for discovering gateway probabilities
 54 |   gateway_probabilities: discovery
 55 |   # Discover process model with SplitMiner v3
 56 |   mining_algorithm: sm1
 57 |   # Number of concurrent relations between events to be captured
 58 |   epsilon:
 59 |     - 0.05
 60 |     - 0.4
 61 |   # Threshold for filtering the incoming and outgoing edges
 62 |   eta:
 63 |     - 0.2
 64 |     - 0.7
 65 |   # Whether to replace non-trivial OR joins or not
 66 |   replace_or_joins:
 67 |     - true
 68 |     - false
 69 |   # Whether to prioritize parallelism over loops or not
 70 |   prioritize_parallelism:
 71 |     - true
 72 |     - false
 73 | ##################
 74 | # Resource model #
 75 | ##################
 76 | resource_model:
 77 |   # Metric to guide the optimization process (loss function to minimize)
 78 |   optimization_metric: circadian_emd
 79 |   # Number of optimization iterations over the search space
 80 |   num_iterations: 20
 81 |   # Number of times to evaluate each iteration (using the mean of all of them)
 82 |   num_evaluations_per_iteration: 3
 83 |   # Whether to discover prioritization or batching behavior
 84 |   discover_prioritization_rules: false
 85 |   discover_batching_rules: false
 86 |   # Resource profiles configuration
 87 |   resource_profiles:
 88 |     # Resource profile discovery type
 89 |     discovery_type: differentiated
 90 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
 91 |     granularity: 60
 92 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
 93 |     confidence:
 94 |       - 0.5
 95 |       - 0.85
 96 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
 97 |     support:
 98 |       - 0.05
 99 |       - 0.5
100 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
101 |     participation: 0.4
102 | #####################
103 | # Extraneous delays #
104 | #####################
105 | extraneous_activity_delays:
106 |   # Method to compute the extraneous delay
107 |   discovery_method: eclipse-aware
108 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
109 |   num_iterations: 1
110 | 


--------------------------------------------------------------------------------
/resources/config/configuration_example_with_provided_process_model.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Same simple configuration as 'configuration_example.yml' but providing the BPMN model                         #
 3 | #################################################################################################################
 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model                   #
 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 6 | #################################################################################################################
 7 | version: 5
 8 | ##########
 9 | # Common #
10 | ##########
11 | common:
12 |   # Path to the event log in CSV format
13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
15 |   log_ids:
16 |     case: "case_id"
17 |     activity: "activity"
18 |     resource: "resource"
19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
20 |     start_time: "start_time"
21 |     end_time: "end_time"
22 |   # Use this process model and skip its discovery
23 |   process_model_path: ../models/LoanApp_simplified.bpmn
24 |   # Whether to discover case attributes or not
25 |   discover_data_attributes: false
26 | #################
27 | # Preprocessing #
28 | #################
29 | preprocessing:
30 |   # Threshold to consider two activities as concurrent when computing the enabled time (if necessary)
31 |   enable_time_concurrency_threshold: 0.75
32 | ################
33 | # Control-flow #
34 | ################
35 | control_flow:
36 |   # Metric to guide the optimization process (loss function to minimize)
37 |   optimization_metric: two_gram_distance
38 |   # Number of optimization iterations over the search space
39 |   num_iterations: 1
40 |   # Number of times to evaluate each iteration (using the mean of all of them)
41 |   num_evaluations_per_iteration: 3
42 |   # Methods for discovering gateway probabilities
43 |   gateway_probabilities: discovery
44 | ##################
45 | # Resource model #
46 | ##################
47 | resource_model:
48 |   # Metric to guide the optimization process (loss function to minimize)
49 |   optimization_metric: circadian_emd
50 |   # Number of optimization iterations over the search space
51 |   num_iterations: 20
52 |   # Number of times to evaluate each iteration (using the mean of all of them)
53 |   num_evaluations_per_iteration: 3
54 |   # Whether to discover prioritization or batching behavior
55 |   discover_prioritization_rules: false
56 |   discover_batching_rules: false
57 |   # Resource profiles configuration
58 |   resource_profiles:
59 |     # Resource profile discovery type
60 |     discovery_type: pool
61 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
62 |     granularity: 60
63 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
64 |     confidence:
65 |       - 0.5
66 |       - 0.85
67 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
68 |     support:
69 |       - 0.05
70 |       - 0.5
71 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
72 |     participation: 0.4
73 | #####################
74 | # Extraneous delays #
75 | #####################
76 | extraneous_activity_delays:
77 |   # Method to compute the extraneous delay
78 |   discovery_method: eclipse-aware
79 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
80 |   num_iterations: 1
81 | 


--------------------------------------------------------------------------------
/resources/config/configuration_one_shot.yml:
--------------------------------------------------------------------------------
 1 | #################################################################################################################
 2 | # Simple configuration example for running SIMOD without parameter optimization steps. The defined parameters   #
 3 | # should be individual values and not intervals, as there is no optimization.                                   #
 4 | #################################################################################################################
 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters                 #
 6 | #################################################################################################################
 7 | version: 5
 8 | ##########
 9 | # Common #
10 | ##########
11 | common:
12 |   # Path to the event log in CSV format
13 |   train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
14 |   # Specify the name for each of the columns in the CSV file (XES standard by default)
15 |   log_ids:
16 |     case: "case_id"
17 |     activity: "activity"
18 |     resource: "resource"
19 |     enabled_time: "enabled_time"  # If not present in the log, automatically computed
20 |     start_time: "start_time"
21 |     end_time: "end_time"
22 | ################
23 | # Control-flow #
24 | ################
25 | control_flow:
26 |   # Number of optimization iterations over the search space
27 |   num_iterations: 1
28 |   # Number of times to evaluate each iteration (using the mean of all of them)
29 |   num_evaluations_per_iteration: 1
30 |   # Methods for discovering gateway probabilities
31 |   gateway_probabilities: discovery
32 |   # Discover process model with SplitMiner v3
33 |   mining_algorithm: sm1
34 |   # Number of concurrent relations between events to be captured
35 |   epsilon: 0.3
36 |   # Threshold for filtering the incoming and outgoing edges
37 |   eta: 0.5
38 |   # Whether to replace non-trivial OR joins or not
39 |   replace_or_joins: false
40 |   # Whether to prioritize parallelism over loops or not
41 |   prioritize_parallelism: true
42 | ##################
43 | # Resource model #
44 | ##################
45 | resource_model:
46 |   # Number of optimization iterations over the search space
47 |   num_iterations: 1
48 |   # Number of times to evaluate each iteration (using the mean of all of them)
49 |   num_evaluations_per_iteration: 1
50 |   # Resource profiles configuration
51 |   resource_profiles:
52 |     # Resource profile discovery type
53 |     discovery_type: differentiated
54 |     # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
55 |     granularity: 60
56 |     # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
57 |     confidence: 0.6
58 |     # Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
59 |     support: 0.2
60 |     # Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
61 |     participation: 0.4
62 | #####################
63 | # Extraneous delays #
64 | #####################
65 | extraneous_activity_delays:
66 |   # Method to compute the extraneous delay
67 |   discovery_method: eclipse-aware
68 |   # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
69 |   num_iterations: 1
70 | 


--------------------------------------------------------------------------------
/resources/event_logs/LoanApp_simplified_test.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/LoanApp_simplified_test.csv.gz


--------------------------------------------------------------------------------
/resources/event_logs/LoanApp_simplified_train.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/LoanApp_simplified_train.csv.gz


--------------------------------------------------------------------------------
/resources/event_logs/PurchasingExample.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/PurchasingExample.csv.gz


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script is used for running Simod from a Docker container.
 4 | 
 5 | # configuration path from the command line
 6 | CONFIG_PATH=$1
 7 | 
 8 | # optional output_dir from the command line
 9 | OUTPUT_DIR=$2
10 | 
11 | # if no config_path is specified, exit with error
12 | if [ -z "$CONFIG_PATH" ]; then
13 |     echo "ERROR: No configuration file specified."
14 |     exit 1
15 | fi
16 | 
17 | # if no output_dir is specified, use the default directory
18 | if [ -z "$OUTPUT_DIR" ]; then
19 |     OUTPUT_DIR=$(pwd)/outputs
20 | fi
21 | 
22 | # run Simod
23 | poetry run simod --configuration $CONFIG_PATH --output $OUTPUT_DIR
24 | 


--------------------------------------------------------------------------------
/src/simod/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["simod"]
2 | 


--------------------------------------------------------------------------------
/src/simod/batching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/batching/__init__.py


--------------------------------------------------------------------------------
/src/simod/batching/discovery.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pix_framework.discovery.batch_processing.batch_characteristics import discover_batch_processing_and_characteristics
 3 | from pix_framework.io.event_log import EventLogIDs
 4 | 
 5 | from simod.batching.types import BatchingRule
 6 | 
 7 | 
 8 | def discover_batching_rules(log: pd.DataFrame, log_ids: EventLogIDs) -> list[BatchingRule]:
 9 |     """
10 |     Discover batching _rules from a log.
11 |     The enabled_time column is required. If it is missing, it will be estimated using the start-time-estimator.
12 |     """
13 |     rules = discover_batch_processing_and_characteristics(
14 |         event_log=log, log_ids=log_ids, batch_min_size=3, max_sequential_gap=pd.Timedelta("10m")
15 |     )
16 | 
17 |     rules = list(map(lambda x: BatchingRule.from_dict(x), rules))
18 | 
19 |     return rules
20 | 


--------------------------------------------------------------------------------
/src/simod/branch_rules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/branch_rules/__init__.py


--------------------------------------------------------------------------------
/src/simod/branch_rules/discovery.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from typing import List
 3 | 
 4 | from simod.branch_rules.types import BranchRules
 5 | 
 6 | from pix_framework.io.event_log import EventLogIDs
 7 | from pix_framework.discovery.gateway_probabilities import GatewayProbabilities
 8 | from pix_framework.discovery.gateway_conditions.gateway_conditions import discover_gateway_conditions
 9 | 
10 | 
11 | def discover_branch_rules(bpmn_graph, log: pd.DataFrame, log_ids: EventLogIDs, f_score=0.7) -> list[BranchRules]:
12 |     """
13 |     Discover branch_rules from a log.
14 |     """
15 |     rules = discover_gateway_conditions(bpmn_graph, log, log_ids, f_score_threshold=f_score)
16 | 
17 |     rules = list(map(lambda x: BranchRules.from_dict(x), rules))
18 | 
19 |     return rules
20 | 
21 | 
22 | def map_branch_rules_to_flows(gateway_probabilities: List[GatewayProbabilities], branch_rules: List[BranchRules]):
23 |     condition_lookup = {rule.id: rule for rule in branch_rules}
24 | 
25 |     for gateway in gateway_probabilities:
26 |         for path in gateway.outgoing_paths:
27 |             if path.path_id in condition_lookup:
28 |                 path.condition_id = condition_lookup[path.path_id].id
29 | 
30 |     return gateway_probabilities
31 | 


--------------------------------------------------------------------------------
/src/simod/branch_rules/types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class BranchRule:
 6 |     attribute: str
 7 |     comparison: str
 8 |     value: str
 9 | 
10 |     @staticmethod
11 |     def from_dict(data: dict) -> "BranchRule":
12 |         return BranchRule(
13 |             attribute=data["attribute"],
14 |             comparison=data["comparison"],
15 |             value=data["value"]
16 |         )
17 | 
18 |     def to_dict(self):
19 |         return {
20 |             "attribute": self.attribute,
21 |             "comparison": self.comparison,
22 |             "value": self.value
23 |         }
24 | 
25 | 
26 | @dataclass
27 | class BranchRules:
28 |     id: str
29 |     rules: list[list[BranchRule]]
30 | 
31 |     @staticmethod
32 |     def from_dict(data: dict) -> "BranchRules":
33 |         return BranchRules(
34 |             id=data["id"],
35 |             rules=[
36 |                 [BranchRule.from_dict(rule) for rule in rule_set]
37 |                 for rule_set in data["rules"]
38 |             ]
39 |         )
40 | 
41 |     def to_dict(self):
42 |         return {
43 |             "id": self.id,
44 |             "rules": [[rule.to_dict() for rule in rule_set] for rule_set in self.rules]
45 |         }
46 | 


--------------------------------------------------------------------------------
/src/simod/cli.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | from typing import Optional
  4 | 
  5 | import click
  6 | import yaml
  7 | from pix_framework.filesystem.file_manager import get_random_folder_id
  8 | 
  9 | from simod.event_log.event_log import EventLog
 10 | from simod.runtime_meter import RuntimeMeter
 11 | from simod.settings.simod_settings import SimodSettings
 12 | from simod.simod import Simod
 13 | 
 14 | 
 15 | @click.command(
 16 |     help="""
 17 |     Simod combines process mining and machine learning techniques to automate the discovery and tuning of
 18 |     Business Process Simulation models from event logs extracted from enterprise information systems.
 19 |     """
 20 | )
 21 | @click.option(
 22 |     "--configuration",
 23 |     "-c",
 24 |     default=None,
 25 |     required=False,
 26 |     type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path),
 27 |     help="Path to the Simod configuration file.",
 28 | )
 29 | @click.option(
 30 |     "--output",
 31 |     "-o",
 32 |     default=None,
 33 |     required=False,
 34 |     type=click.Path(file_okay=False, resolve_path=True, path_type=Path),
 35 |     help="Path to the output directory where discovery results will be stored.",
 36 | )
 37 | @click.option(
 38 |     "--one-shot",
 39 |     default=False,
 40 |     is_flag=True,
 41 |     required=False,
 42 |     type=bool,
 43 |     help="Run Simod with default settings only once without the optimization phase.",
 44 | )
 45 | @click.option(
 46 |     "--event-log",
 47 |     "-l",
 48 |     required=False,
 49 |     type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path),
 50 |     help="Path to the event log file when using the --one-shot flag. "
 51 |     "Columns must be named 'case_id', 'activity', 'start_time', 'end_time', 'resource'.",
 52 | )
 53 | @click.option(
 54 |     "--schema-yaml",
 55 |     required=False,
 56 |     is_flag=True,
 57 |     help="Print the configuration YAML schema and exit.",
 58 | )
 59 | @click.option(
 60 |     "--schema-json",
 61 |     required=False,
 62 |     is_flag=True,
 63 |     help="Print the configuration JSON schema and exit.",
 64 | )
 65 | @click.version_option()
 66 | def main(
 67 |     configuration: Optional[Path],
 68 |     output: Optional[Path],
 69 |     one_shot: bool,
 70 |     event_log: Optional[Path],
 71 |     schema_yaml: bool,
 72 |     schema_json: bool,
 73 | ) -> None:
 74 |     if schema_yaml:
 75 |         print(yaml.dump(SimodSettings().model_json_schema()))
 76 |         return
 77 | 
 78 |     if schema_json:
 79 |         print(json.dumps(SimodSettings().model_json_schema()))
 80 |         return
 81 | 
 82 |     if one_shot:
 83 |         settings = SimodSettings.one_shot()
 84 |         settings.common.train_log_path = event_log
 85 |         settings.common.test_log_path = None
 86 |     else:
 87 |         settings = SimodSettings.from_path(configuration)
 88 | 
 89 |     output = output if output is not None else (Path.cwd() / "outputs" / get_random_folder_id()).absolute()
 90 | 
 91 |     # To measure the runtime of each stage
 92 |     runtimes = RuntimeMeter()
 93 | 
 94 |     # Read and preprocess event log
 95 |     runtimes.start(RuntimeMeter.PREPROCESSING)
 96 |     event_log = EventLog.from_path(
 97 |         log_ids=settings.common.log_ids,
 98 |         train_log_path=settings.common.train_log_path,
 99 |         test_log_path=settings.common.test_log_path,
100 |         preprocessing_settings=settings.preprocessing,
101 |         need_test_partition=settings.common.perform_final_evaluation,
102 |     )
103 |     runtimes.stop(RuntimeMeter.PREPROCESSING)
104 | 
105 |     # Instantiate and run Simod
106 |     simod = Simod(settings, event_log=event_log, output_dir=output)
107 |     simod.run(runtimes=runtimes)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     main()
112 | 


--------------------------------------------------------------------------------
/src/simod/cli_formatter.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | 
 4 | def print_section(message: str):
 5 |     click.secho(f"\n{message}", bold=True)
 6 |     click.secho("=" * len(message), bold=True)
 7 | 
 8 | 
 9 | def print_subsection(message: str):
10 |     click.secho(f"\n{message}")
11 |     click.echo("-" * len(message))
12 | 
13 | 
14 | def print_asset(message: str):
15 |     click.secho(f"\n︎{message}", bold=True)
16 | 
17 | 
18 | def print_message(message: str):
19 |     click.echo(message)
20 | 
21 | 
22 | def print_notice(message: str):
23 |     click.secho(f"\n{message}", bold=True)
24 | 
25 | 
26 | def print_warning(message: str):
27 |     click.secho(f"\n{message}", bold=True)
28 | 
29 | 
30 | def print_step(message: str):
31 |     click.echo(f"\n{message}")
32 | 


--------------------------------------------------------------------------------
/src/simod/control_flow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/__init__.py


--------------------------------------------------------------------------------
/src/simod/control_flow/lib/bpmn-layout-1.0.6-jar-with-dependencies.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/lib/bpmn-layout-1.0.6-jar-with-dependencies.jar


--------------------------------------------------------------------------------
/src/simod/control_flow/lib/split-miner-1.7.1-all.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/lib/split-miner-1.7.1-all.jar


--------------------------------------------------------------------------------
/src/simod/data_attributes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/data_attributes/__init__.py


--------------------------------------------------------------------------------
/src/simod/data_attributes/discovery.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from simod.data_attributes.types import GlobalAttribute, CaseAttribute, EventAttribute
 4 | 
 5 | from pix_framework.io.event_log import EventLogIDs
 6 | from pix_framework.discovery.attributes.attribute_discovery import discover_attributes
 7 | 
 8 | 
 9 | def discover_data_attributes(log: pd.DataFrame, log_ids: EventLogIDs) -> (list[CaseAttribute], list[GlobalAttribute], list[EventAttribute]):
10 |     """
11 |     Discover data attributes from a log ignoring common non-case columns.
12 |     """
13 |     attributes = discover_attributes(
14 |         event_log=log,
15 |         log_ids=log_ids,
16 |         avoid_columns=[
17 |             log_ids.case,
18 |             log_ids.activity,
19 |             log_ids.enabled_time,
20 |             log_ids.start_time,
21 |             log_ids.end_time,
22 |             log_ids.resource,
23 |         ],
24 |         confidence_threshold=0.95,
25 |     )
26 | 
27 |     global_attributes = list(map(GlobalAttribute.from_dict, attributes["global_attributes"]))
28 |     case_attributes = list(map(CaseAttribute.from_dict, attributes["case_attributes"]))
29 |     event_attributes = list(map(EventAttribute.from_dict, attributes["event_attributes"]))
30 | 
31 |     return global_attributes, case_attributes, event_attributes
32 | 


--------------------------------------------------------------------------------
/src/simod/data_attributes/types.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from enum import Enum
  3 | from typing import Union
  4 | 
  5 | 
  6 | class CaseAttributeType(Enum):
  7 |     DISCRETE = "discrete"
  8 |     CONTINUOUS = "continuous"
  9 | 
 10 | 
 11 | class GlobalAttributeType(Enum):
 12 |     DISCRETE = "discrete"
 13 |     CONTINUOUS = "continuous"
 14 | 
 15 | 
 16 | class EventAttributeType(Enum):
 17 |     DISCRETE = "discrete"
 18 |     CONTINUOUS = "continuous"
 19 |     EXPRESSION = "expression"
 20 |     DTREE = "dtree"
 21 | 
 22 | 
 23 | @dataclass
 24 | class CaseAttribute:
 25 |     name: str
 26 |     type: CaseAttributeType
 27 |     values: Union[list[dict], dict[str, float]]
 28 | 
 29 |     @staticmethod
 30 |     def from_dict(case_attribute: dict) -> "CaseAttribute":
 31 |         """
 32 |         Creates a CaseAttribute object from a dictionary returned by data_attribute_discovery.discovery.
 33 |         """
 34 |         return CaseAttribute(
 35 |             name=case_attribute["name"],
 36 |             type=CaseAttributeType(case_attribute["type"]),
 37 |             values=case_attribute["values"],
 38 |         )
 39 | 
 40 |     def to_prosimos(self) -> dict:
 41 |         if self.type == CaseAttributeType.CONTINUOUS:
 42 |             return {
 43 |                 "name": self.name,
 44 |                 "type": self.type.value,
 45 |                 "values": self.values,
 46 |             }
 47 |         else:
 48 |             return {
 49 |                 "name": self.name,
 50 |                 "type": self.type.value,
 51 |                 "values": self.values
 52 |             }
 53 | 
 54 | 
 55 | @dataclass
 56 | class GlobalAttribute:
 57 |     name: str
 58 |     type: GlobalAttributeType
 59 |     values: Union[list[dict], dict[str, float]]
 60 | 
 61 |     @staticmethod
 62 |     def from_dict(global_attribute: dict) -> "GlobalAttribute":
 63 |         """
 64 |         Creates a GlobalAttribute object from a dictionary returned by data_attribute_discovery.discovery.
 65 |         """
 66 |         return GlobalAttribute(
 67 |             name=global_attribute["name"],
 68 |             type=GlobalAttributeType(global_attribute["type"]),
 69 |             values=global_attribute["values"],
 70 |         )
 71 | 
 72 |     def to_prosimos(self) -> dict:
 73 |         if self.type == GlobalAttributeType.CONTINUOUS:
 74 |             return {
 75 |                 "name": self.name,
 76 |                 "type": self.type.value,
 77 |                 "values": self.values,
 78 |             }
 79 |         else:
 80 |             return {
 81 |                 "name": self.name,
 82 |                 "type": self.type.value,
 83 |                 "values": self.values
 84 |             }
 85 | 
 86 | 
 87 | @dataclass
 88 | class EventAttributeDetails:
 89 |     name: str
 90 |     type: EventAttributeType
 91 |     values: Union[list[dict[str, float]], dict[str, Union[str, list[dict[str, float]]]], str]
 92 | 
 93 |     @staticmethod
 94 |     def from_dict(attribute: dict) -> "EventAttributeDetails":
 95 |         """
 96 |         Creates an EventAttributeDetails object from a dictionary returned by data_attribute_discovery.discovery.
 97 |         """
 98 |         return EventAttributeDetails(
 99 |             name=attribute["name"],
100 |             type=EventAttributeType(attribute["type"]),
101 |             values=attribute["values"],
102 |         )
103 | 
104 |     def to_prosimos(self) -> dict:
105 |         if self.type == EventAttributeType.CONTINUOUS:
106 |             return {
107 |                 "name": self.name,
108 |                 "type": self.type.value,
109 |                 "values": self.values,
110 |             }
111 |         elif self.type == EventAttributeType.DISCRETE:
112 |             return {
113 |                 "name": self.name,
114 |                 "type": self.type.value,
115 |                 "values": self.values
116 | 
117 |             }
118 |         elif self.type == EventAttributeType.EXPRESSION:
119 |             return {
120 |                 "name": self.name,
121 |                 "type": self.type.value,
122 |                 "values": self.values,
123 |             }
124 |         elif self.type == EventAttributeType.DTREE:
125 |             return {
126 |                 "name": self.name,
127 |                 "type": self.type.value,
128 |                 "values": self.values
129 |             }
130 | 
131 | 
132 | @dataclass
133 | class EventAttribute:
134 |     event_id: str
135 |     attributes: list[EventAttributeDetails]
136 | 
137 |     @staticmethod
138 |     def from_dict(event_attribute: dict) -> "EventAttribute":
139 |         """
140 |         Creates an EventAttribute object from a dictionary.
141 |         """
142 |         return EventAttribute(
143 |             event_id=event_attribute["event_id"],
144 |             attributes=[EventAttributeDetails.from_dict(attr) for attr in event_attribute["attributes"]],
145 |         )
146 | 
147 |     def to_prosimos(self) -> dict:
148 |         return {
149 |             "event_id": self.event_id,
150 |             "attributes": [attr.to_prosimos() for attr in self.attributes],
151 |         }
152 | 


--------------------------------------------------------------------------------
/src/simod/event_log/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/event_log/__init__.py


--------------------------------------------------------------------------------
/src/simod/event_log/preprocessor.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from pathlib import Path
  3 | from typing import Optional
  4 | 
  5 | import pandas as pd
  6 | from pix_framework.enhancement.concurrency_oracle import OverlappingConcurrencyOracle
  7 | from pix_framework.enhancement.multitasking import adjust_durations
  8 | from pix_framework.enhancement.start_time_estimator.config import ConcurrencyThresholds
  9 | from pix_framework.enhancement.start_time_estimator.config import Configuration as StartTimeEstimatorConfiguration
 10 | from pix_framework.enhancement.start_time_estimator.estimator import StartTimeEstimator
 11 | from pix_framework.io.event_log import EventLogIDs
 12 | 
 13 | from simod.cli_formatter import print_section, print_step
 14 | 
 15 | 
 16 | @dataclass
 17 | class MultitaskingSettings:
 18 |     log_path: Path
 19 |     output_dir: Path
 20 |     is_concurrent: bool
 21 |     verbose: bool
 22 | 
 23 | 
 24 | @dataclass
 25 | class Settings:
 26 |     multitasking_settings: Optional[MultitaskingSettings] = None
 27 | 
 28 | 
 29 | class Preprocessor:
 30 |     """
 31 |     Handles event log pre-processing by executing various transformations
 32 |     to estimate missing timestamps and adjust data for multitasking.
 33 | 
 34 |     This class modifies an input event log based on the specified settings
 35 |     and returns the pre-processed log.
 36 | 
 37 |     Attributes
 38 |     ----------
 39 |     log : :class:`pandas.DataFrame`
 40 |         The event log stored as a DataFrame.
 41 |     log_ids : :class:`EventLogIDs`
 42 |         Identifiers for mapping column names in the event log.
 43 |     """
 44 | 
 45 |     _log: pd.DataFrame
 46 |     _log_ids: EventLogIDs
 47 | 
 48 |     def __init__(self, log: pd.DataFrame, log_ids: EventLogIDs):
 49 |         keys = [log_ids.start_time, log_ids.end_time] if log_ids.start_time in log.columns else [log_ids.end_time]
 50 |         self._log = log.sort_values(by=keys).reset_index(drop=True)
 51 |         self._log_ids = log_ids
 52 | 
 53 |     def run(
 54 |         self,
 55 |         multitasking: bool = False,
 56 |         concurrency_thresholds: ConcurrencyThresholds = ConcurrencyThresholds(),
 57 |         enable_time_concurrency_threshold: float = 0.75,
 58 |     ) -> pd.DataFrame:
 59 |         """
 60 |         Executes event log pre-processing steps based on the specified parameters.
 61 | 
 62 |         This includes estimating missing start times, adjusting timestamps
 63 |         for multitasking scenarios, and computing enabled times.
 64 | 
 65 |         Parameters
 66 |         ----------
 67 |         multitasking : bool
 68 |             Whether to adjust the timestamps for multitasking.
 69 |         concurrency_thresholds : :class:`ConcurrencyThresholds`, optional
 70 |             Thresholds for the Heuristics Miner to estimate start times.
 71 |         enable_time_concurrency_threshold : float
 72 |             Threshold for estimating enabled times.
 73 | 
 74 |         Returns
 75 |         -------
 76 |         :class:`pandas.DataFrame`
 77 |             The pre-processed event log.
 78 |         """
 79 |         print_section("Pre-processing")
 80 | 
 81 |         if self._log_ids.start_time not in self._log.columns or self._log[self._log_ids.start_time].isnull().any():
 82 |             self._add_start_times(concurrency_thresholds)
 83 | 
 84 |         if multitasking:
 85 |             self._adjust_for_multitasking()
 86 | 
 87 |         if self._log_ids.enabled_time not in self._log.columns:
 88 |             # The start times were not estimated (otherwise enabled times would
 89 |             # be present), and the enabled times are not in the original log
 90 |             self._add_enabled_times(enable_time_concurrency_threshold)
 91 | 
 92 |         return self._log
 93 | 
 94 |     def _adjust_for_multitasking(self, verbose=False):
 95 |         print_step("Adjusting timestamps for multitasking")
 96 | 
 97 |         self._log = adjust_durations(
 98 |             self._log,
 99 |             self._log_ids,
100 |             verbose=verbose,
101 |         )
102 | 
103 |     def _add_start_times(self, concurrency_thresholds: ConcurrencyThresholds):
104 |         print_step("Adding start times")
105 | 
106 |         configuration = StartTimeEstimatorConfiguration(
107 |             log_ids=self._log_ids,
108 |             concurrency_thresholds=concurrency_thresholds,
109 |         )
110 | 
111 |         self._log = StartTimeEstimator(self._log, configuration).estimate(replace_recorded_start_times=True)
112 | 
113 |     def _add_enabled_times(self, enable_time_concurrency_threshold: float):
114 |         print_step("Adding enabled times")
115 | 
116 |         configuration = StartTimeEstimatorConfiguration(
117 |             log_ids=self._log_ids,
118 |             concurrency_thresholds=ConcurrencyThresholds(df=enable_time_concurrency_threshold),
119 |             consider_start_times=True,
120 |         )
121 |         # The start times are the original ones, so use overlapping concurrency oracle
122 |         OverlappingConcurrencyOracle(self._log, configuration).add_enabled_times(self._log)
123 | 


--------------------------------------------------------------------------------
/src/simod/extraneous_delays/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/extraneous_delays/__init__.py


--------------------------------------------------------------------------------
/src/simod/extraneous_delays/optimizer.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from pathlib import Path
  3 | from typing import List
  4 | 
  5 | from extraneous_activity_delays.config import (
  6 |     Configuration as ExtraneousActivityDelaysConfiguration,
  7 |     TimerPlacement,
  8 |     SimulationEngine,
  9 |     SimulationModel,
 10 | )
 11 | from extraneous_activity_delays.enhance_with_delays import HyperOptEnhancer, DirectEnhancer
 12 | from lxml import etree
 13 | from pix_framework.filesystem.file_manager import remove_asset
 14 | 
 15 | from simod.cli_formatter import print_step
 16 | from simod.event_log.event_log import EventLog
 17 | from simod.extraneous_delays.types import ExtraneousDelay
 18 | from simod.settings.extraneous_delays_settings import ExtraneousDelaysSettings
 19 | from simod.simulation.parameters.BPS_model import BPSModel
 20 | 
 21 | 
 22 | class ExtraneousDelaysOptimizer:
 23 |     """
 24 |     Optimizer for the discovery of the extraneous delays model.
 25 | 
 26 |     This class performs either a direct discovery of the extraneous delays of the process, or launches an iterative
 27 |     optimization that first discovers the extraneous delays and then adjusts their size to better reflect reality.
 28 | 
 29 |     Attributes
 30 |     ----------
 31 |     event_log : :class:`~simod.event_log.event_log.EventLog`
 32 |         The event log containing the train and validation data.
 33 |     bps_model : :class:`~simod.simulation.parameters.BPS_model.BPSModel`
 34 |         The business process simulation model to enhance with extraneous delays, including the BPMN representation.
 35 |     settings : :class:`~simod.settings.extraneous_delays_settings.ExtraneousDelaysSettings`
 36 |         Configuration settings for extraneous delay discovery.
 37 |     base_directory : :class:`pathlib.Path`
 38 |         Directory where output files will be stored.
 39 |     """
 40 | 
 41 |     def __init__(
 42 |         self,
 43 |         event_log: EventLog,
 44 |         bps_model: BPSModel,
 45 |         settings: ExtraneousDelaysSettings,
 46 |         base_directory: Path,
 47 |     ):
 48 |         self.event_log = event_log
 49 |         self.bps_model = bps_model
 50 |         self.settings = settings
 51 |         self.base_directory = base_directory
 52 | 
 53 |         assert self.bps_model.process_model is not None, "BPMN model is not specified."
 54 | 
 55 |     def run(self) -> List[ExtraneousDelay]:
 56 |         """
 57 |         Executes the extraneous delay discovery process.
 58 | 
 59 |         This method configures the optimization process, applies either a direct enhancement
 60 |         or a hyperparameter optimization approach to identify delays, and returns the best
 61 |         detected delays as a list of `ExtraneousDelay` objects.
 62 | 
 63 |         Returns
 64 |         -------
 65 |         List[:class:`~simod.extraneous_delays.types.ExtraneousDelay`]
 66 |             A list of detected extraneous delays, each containing activity names, delay IDs,
 67 |             and their corresponding duration distributions.
 68 |         """
 69 |         # Set-up configuration for extraneous delay discovery
 70 |         configuration = ExtraneousActivityDelaysConfiguration(
 71 |             log_ids=self.event_log.log_ids,
 72 |             process_name=self.event_log.process_name,
 73 |             num_iterations=self.settings.num_iterations,
 74 |             num_evaluation_simulations=self.settings.num_evaluations_per_iteration,
 75 |             training_partition_ratio=0.5,
 76 |             optimization_metric=self.settings.optimization_metric,
 77 |             discovery_method=self.settings.discovery_method,
 78 |             timer_placement=TimerPlacement.BEFORE,
 79 |             simulation_engine=SimulationEngine.PROSIMOS,
 80 |         )
 81 |         configuration.PATH_OUTPUTS = self.base_directory
 82 |         # Discover extraneous delays
 83 |         simulation_model = _bps_model_to_simulation_model(self.bps_model)
 84 |         if self.settings.num_iterations > 1:
 85 |             enhancer = HyperOptEnhancer(self.event_log.train_validation_partition, simulation_model, configuration)
 86 |             enhancer.enhance_simulation_model_with_delays()
 87 |             best_timers = enhancer.best_timers
 88 |         else:
 89 |             enhancer = DirectEnhancer(self.event_log.train_validation_partition, simulation_model, configuration)
 90 |             best_timers = enhancer.timers
 91 |         # Return best delays
 92 |         return [
 93 |             ExtraneousDelay(
 94 |                 activity_name=activity,
 95 |                 delay_id=f"Event_{str(uuid.uuid4())}",
 96 |                 duration_distribution=best_timers[activity],
 97 |             )
 98 |             for activity in best_timers
 99 |         ]
100 | 
101 |     def cleanup(self):
102 |         print_step(f"Removing {self.base_directory}")
103 |         remove_asset(self.base_directory)
104 | 
105 | 
106 | def _bps_model_to_simulation_model(bps_model: BPSModel) -> SimulationModel:
107 |     parser = etree.XMLParser(remove_blank_text=True)
108 |     bpmn_model = etree.parse(bps_model.process_model, parser)
109 |     parameters = bps_model.to_prosimos_format()
110 | 
111 |     simulation_model = SimulationModel(bpmn_model, parameters)
112 | 
113 |     return simulation_model
114 | 


--------------------------------------------------------------------------------
/src/simod/extraneous_delays/types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from pix_framework.statistics.distribution import DurationDistribution
 4 | 
 5 | 
 6 | @dataclass
 7 | class ExtraneousDelay:
 8 |     """
 9 |     Represents an extraneous delay within a business process activity.
10 | 
11 |     This class encapsulates the details of an identified extraneous delay,
12 |     including the affected activity, a unique delay identifier, and the
13 |     duration distribution of the delay.
14 | 
15 |     Attributes
16 |     ----------
17 |     activity_name : str
18 |         The name of the activity where the extraneous delay occurs.
19 |     delay_id : str
20 |         A unique identifier for the delay event.
21 |     duration_distribution : :class:`DurationDistribution`
22 |         The statistical distribution representing the delay duration.
23 |     """
24 | 
25 |     activity_name: str
26 |     delay_id: str
27 |     duration_distribution: DurationDistribution
28 | 
29 |     def to_dict(self) -> dict:
30 |         """
31 |         Converts the extraneous delay into a dictionary format.
32 | 
33 |         The dictionary representation is compatible with the Prosimos simulation
34 |         engine, containing activity details, a unique event identifier, and the
35 |         delay duration distribution.
36 | 
37 |         Returns
38 |         -------
39 |         dict
40 |             A dictionary representation of the extraneous delay.
41 |         """
42 |         return {
43 |             "activity": self.activity_name,
44 |             "event_id": self.delay_id,
45 |         } | self.duration_distribution.to_prosimos_distribution()
46 | 
47 |     @staticmethod
48 |     def from_dict(delay: dict) -> "ExtraneousDelay":
49 |         """
50 |         Creates an `ExtraneousDelay` instance from a dictionary.
51 | 
52 |         This method reconstructs an `ExtraneousDelay` object from a dictionary
53 |         containing activity name, delay identifier, and duration distribution.
54 | 
55 |         Parameters
56 |         ----------
57 |         delay : dict
58 |             A dictionary representation of an extraneous delay.
59 | 
60 |         Returns
61 |         -------
62 |         :class:`ExtraneousDelay`
63 |             An instance of `ExtraneousDelay` with the extracted attributes.
64 |         """
65 |         return ExtraneousDelay(
66 |             activity_name=delay["activity"],
67 |             delay_id=delay["event_id"],
68 |             duration_distribution=DurationDistribution.from_dict(delay),
69 |         )
70 | 


--------------------------------------------------------------------------------
/src/simod/prioritization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/prioritization/__init__.py


--------------------------------------------------------------------------------
/src/simod/prioritization/discovery.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pix_framework.discovery.prioritization.discovery import discover_priority_rules
 3 | from pix_framework.io.event_log import EventLogIDs
 4 | 
 5 | from ..data_attributes.types import CaseAttribute
 6 | from .types import PrioritizationRule
 7 | 
 8 | 
 9 | def discover_prioritization_rules(
10 |     log: pd.DataFrame, log_ids: EventLogIDs, case_attributes: list[CaseAttribute]
11 | ) -> list[PrioritizationRule]:
12 |     """
13 |     Discover prioritization rules from a log.
14 |     The enabled_time column is required. If it is missing, it will be estimated using the start-time-estimator.
15 |     """
16 |     case_attribute_names = list(map(lambda x: x.name, case_attributes))
17 | 
18 |     rules = discover_priority_rules(
19 |         event_log=log.rename(  # Rename columns for hardcoded discovery package
20 |             {log_ids.enabled_time: "enabled_time", log_ids.start_time: "start_time", log_ids.resource: "Resource"},
21 |             axis=1,
22 |         ),
23 |         attributes=case_attribute_names,
24 |     )
25 | 
26 |     rules = list(map(PrioritizationRule.from_prosimos, rules))
27 | 
28 |     return rules
29 | 


--------------------------------------------------------------------------------
/src/simod/prioritization/types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class PrioritizationFiringRule:
 6 |     attribute: str
 7 |     comparison: str
 8 |     value: list[str]
 9 | 
10 |     @staticmethod
11 |     def from_prosimos(rule: dict) -> "PrioritizationFiringRule":
12 |         return PrioritizationFiringRule(
13 |             attribute=rule["attribute"],
14 |             comparison=rule["comparison"],
15 |             value=rule["value"],
16 |         )
17 | 
18 |     def to_prosimos(self) -> dict:
19 |         return {
20 |             "attribute": self.attribute,
21 |             "comparison": self.comparison,
22 |             "value": self.value,
23 |         }
24 | 
25 | 
26 | class AndRules:
27 |     _rules: list[PrioritizationFiringRule]
28 | 
29 |     def __init__(self, rules: list[PrioritizationFiringRule]):
30 |         self._rules = rules
31 | 
32 |     @staticmethod
33 |     def from_prosimos(and_rules: list[dict]) -> "AndRules":
34 |         return AndRules(
35 |             rules=list(map(PrioritizationFiringRule.from_prosimos, and_rules)),
36 |         )
37 | 
38 |     def to_prosimos(self) -> list[dict]:
39 |         return list(map(lambda x: x.to_prosimos(), self._rules))
40 | 
41 | 
42 | class OrRules:
43 |     _rules: list[AndRules]
44 | 
45 |     def __init__(self, rules: list[AndRules]):
46 |         self._rules = rules
47 | 
48 |     @staticmethod
49 |     def from_prosimos(group: list[list[dict]]) -> "OrRules":
50 |         return OrRules(
51 |             rules=list(map(AndRules.from_prosimos, group)),
52 |         )
53 | 
54 |     def to_prosimos(self) -> list[dict]:
55 |         return list(map(lambda x: x.to_prosimos(), self._rules))
56 | 
57 | 
58 | @dataclass
59 | class PrioritizationRule:
60 |     priority_level: int
61 |     rules: OrRules
62 | 
63 |     @staticmethod
64 |     def from_prosimos(level: dict) -> "PrioritizationRule":
65 |         return PrioritizationRule(
66 |             priority_level=level["priority_level"],
67 |             rules=OrRules.from_prosimos(level["rules"]),
68 |         )
69 | 
70 |     def to_prosimos(self) -> dict:
71 |         return {
72 |             "priority_level": self.priority_level,
73 |             "rules": self.rules.to_prosimos(),
74 |         }
75 | 


--------------------------------------------------------------------------------
/src/simod/resource_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/resource_model/__init__.py


--------------------------------------------------------------------------------
/src/simod/resource_model/repair.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pix_framework.discovery.resource_calendar_and_performance.resource_activity_performance import (
 4 |     ActivityResourceDistribution,
 5 |     ResourceDistribution,
 6 | )
 7 | from pix_framework.discovery.resource_model import ResourceModel
 8 | from pix_framework.io.event_log import EventLogIDs
 9 | from pix_framework.statistics.distribution import DurationDistribution, get_best_fitting_distribution
10 | 
11 | from simod.cli_formatter import print_message
12 | 
13 | 
14 | def repair_with_missing_activities(
15 |     resource_model: ResourceModel, model_activities: list[str], event_log: pd.DataFrame, log_ids: EventLogIDs
16 | ):
17 |     """
18 |     Updates the resource_model with missing activity_resource_distributions for activities that are present in the
19 |     model but not in yet in the resource_model.
20 |     """
21 | 
22 |     # getting missing activities
23 |     resource_model_activities = [
24 |         distribution.activity_id for distribution in resource_model.activity_resource_distributions
25 |     ]
26 |     missing_activities = [activity for activity in model_activities if activity not in resource_model_activities]
27 | 
28 |     # add missing activities to each resource's assigned_tasks
29 |     for resource_profile in resource_model.resource_profiles:
30 |         for resource in resource_profile.resources:
31 |             resource.assigned_tasks += missing_activities
32 | 
33 |     # estimate the duration distribution of the activity from all its occurrences in event_log
34 |     duration_distributions_per_activity = {}
35 |     for activity in missing_activities:
36 |         duration_distributions_per_activity[activity] = estimate_duration_distribution_for_activity(
37 |             activity, event_log, log_ids
38 |         )
39 | 
40 |     # add the missing activity resource distributions to the resource model for all the resources
41 |     resource_names = [
42 |         resource.id for resource_profile in resource_model.resource_profiles for resource in resource_profile.resources
43 |     ]
44 |     for activity, duration_distribution in duration_distributions_per_activity.items():
45 |         resource_distributions = [
46 |             ResourceDistribution(
47 |                 resource_id=resource_name, distribution=duration_distribution.to_prosimos_distribution()
48 |             )
49 |             for resource_name in resource_names
50 |         ]
51 |         resource_model.activity_resource_distributions.append(
52 |             ActivityResourceDistribution(activity_id=activity, activity_resources_distributions=resource_distributions)
53 |         )
54 | 
55 |     print_message(f"Repaired resource model with missing activities: {missing_activities}")
56 | 
57 | 
58 | def estimate_duration_distribution_for_activity(
59 |     activity: str, event_log: pd.DataFrame, log_ids: EventLogIDs
60 | ) -> DurationDistribution:
61 |     activity_events = event_log[event_log[log_ids.activity] == activity]
62 |     durations = (activity_events[log_ids.end_time] - activity_events[log_ids.start_time]).values
63 |     durations = [duration for duration in durations if not pd.isna(duration)]
64 |     durations = [duration.astype("timedelta64[s]").astype(np.float64) for duration in durations]
65 | 
66 |     if len(durations) > 0:
67 |         distribution = get_best_fitting_distribution(durations)
68 |     else:
69 |         distribution = DurationDistribution(name="fix", mean=1)
70 | 
71 |     return distribution
72 | 


--------------------------------------------------------------------------------
/src/simod/resource_model/settings.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from pathlib import Path
  3 | 
  4 | from pix_framework.discovery.resource_calendar_and_performance.calendar_discovery_parameters import (
  5 |     CalendarDiscoveryParameters,
  6 |     CalendarType,
  7 | )
  8 | 
  9 | from simod.settings.common_settings import Metric
 10 | from simod.utilities import nearest_divisor_for_granularity
 11 | 
 12 | 
 13 | @dataclass
 14 | class HyperoptIterationParams:
 15 |     """
 16 |     Parameters for a single iteration of the Resource Model optimization process.
 17 | 
 18 |     This class defines the necessary parameters for optimizing the resource model of the BPS model.
 19 |     It includes the parameter values for the discovery of resource profiles, calendars, etc.
 20 | 
 21 |     Attributes
 22 |     ----------
 23 |     output_dir : :class:`pathlib.Path`
 24 |         Directory where all files of the current iteration will be stored.
 25 |     process_model_path : :class:`pathlib.Path`
 26 |         Path to the BPMN process model used for optimization.
 27 |     project_name : str
 28 |         Name of the project for file naming purposes.
 29 |     optimization_metric : :class:`~simod.settings.common_settings.Metric`
 30 |         Metric used to evaluate the quality of the current iteration's candidate.
 31 |     calendar_discovery_params : :class:`CalendarDiscoveryParameters`
 32 |         Parameters for the resource calendar (i.e., working schedules) discovery.
 33 |     discover_prioritization_rules : bool, optional
 34 |         Whether to attempt discovering prioritization rules (default: False).
 35 |     discover_batching_rules : bool, optional
 36 |         Whether to attempt discovering batching rules (default: False).
 37 |     """
 38 | 
 39 |     # General settings
 40 |     output_dir: Path  # Directory where to output all the files of the current iteration
 41 |     process_model_path: Path  # Path to BPMN model
 42 |     project_name: str  # Name of the project for file naming
 43 | 
 44 |     optimization_metric: Metric  # Metric to evaluate the candidate of this iteration
 45 |     calendar_discovery_params: CalendarDiscoveryParameters  # Parameters for the calendar discovery
 46 |     discover_prioritization_rules: bool = False  # Whether to try to add prioritization or not
 47 |     discover_batching_rules: bool = False  # Whether to try to add batching or not
 48 | 
 49 |     def to_dict(self) -> dict:
 50 |         """
 51 |         Converts the parameters of the current iteration into a dictionary format.
 52 | 
 53 |         Returns
 54 |         -------
 55 |         dict
 56 |             A dictionary containing the iteration parameters.
 57 |         """
 58 |         # Save common params
 59 |         optimization_parameters = {
 60 |             "output_dir": str(self.output_dir),
 61 |             "process_model_path": str(self.process_model_path),
 62 |             "project_name": str(self.project_name),
 63 |             "optimization_metric": str(self.optimization_metric),
 64 |             "discover_prioritization_rules": str(self.discover_prioritization_rules),
 65 |             "discover_batching_rules": str(self.discover_batching_rules),
 66 |         } | self.calendar_discovery_params.to_dict()
 67 |         # Return dict
 68 |         return optimization_parameters
 69 | 
 70 |     @staticmethod
 71 |     def from_hyperopt_dict(
 72 |         hyperopt_dict: dict,
 73 |         optimization_metric: Metric,
 74 |         discovery_type: CalendarType,
 75 |         output_dir: Path,
 76 |         process_model_path: Path,
 77 |         project_name: str,
 78 |     ) -> "HyperoptIterationParams":
 79 |         """Create the params for this run from the hyperopt dictionary returned by the fmin function."""
 80 |         # Extract model discovery parameters if needed (by default None)
 81 |         granularity = None
 82 |         confidence = None
 83 |         support = None
 84 |         participation = None
 85 |         fuzzy_angle = 1.0
 86 | 
 87 |         def safe_granularity(granularity: int) -> int:
 88 |             if 1440 % granularity != 0:
 89 |                 return nearest_divisor_for_granularity(granularity)
 90 |             return granularity
 91 | 
 92 |         if discovery_type in [
 93 |             CalendarType.UNDIFFERENTIATED,
 94 |             CalendarType.DIFFERENTIATED_BY_RESOURCE,
 95 |             CalendarType.DIFFERENTIATED_BY_POOL,
 96 |         ]:
 97 |             granularity = safe_granularity(hyperopt_dict["granularity"])
 98 |             confidence = hyperopt_dict["confidence"]
 99 |             support = hyperopt_dict["support"]
100 |             participation = hyperopt_dict["participation"]
101 |         elif discovery_type == CalendarType.DIFFERENTIATED_BY_RESOURCE_FUZZY:
102 |             granularity = safe_granularity(hyperopt_dict["granularity"])
103 |             fuzzy_angle = hyperopt_dict["fuzzy_angle"]
104 | 
105 |         discover_prioritization_rules = hyperopt_dict.get("discover_prioritization_rules", False)
106 |         discover_batching_rules = hyperopt_dict.get("discover_batching_rules", False)
107 | 
108 |         return HyperoptIterationParams(
109 |             output_dir=output_dir,
110 |             process_model_path=process_model_path,
111 |             project_name=project_name,
112 |             optimization_metric=optimization_metric,
113 |             calendar_discovery_params=CalendarDiscoveryParameters(
114 |                 discovery_type=discovery_type,
115 |                 granularity=granularity,
116 |                 confidence=confidence,
117 |                 support=support,
118 |                 participation=participation,
119 |                 fuzzy_angle=fuzzy_angle,
120 |             ),
121 |             discover_prioritization_rules=discover_prioritization_rules,
122 |             discover_batching_rules=discover_batching_rules,
123 |         )
124 | 


--------------------------------------------------------------------------------
/src/simod/runtime_meter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import timeit
 3 | 
 4 | 
 5 | class RuntimeMeter:
 6 | 
 7 |     runtime_start: dict
 8 |     runtime_stop: dict
 9 |     runtimes: dict
10 | 
11 |     TOTAL: str = "SIMOD_TOTAL_RUNTIME"
12 |     PREPROCESSING: str = "preprocessing"
13 |     INITIAL_MODEL: str = "discover-initial-BPS-model"
14 |     CONTROL_FLOW_MODEL: str = "optimize-control-flow-model"
15 |     RESOURCE_MODEL: str = "optimize-resource-model"
16 |     DATA_ATTRIBUTES_MODEL: str = "discover-data-attributes"
17 |     EXTRANEOUS_DELAYS: str = "discover-extraneous-delays"
18 |     FINAL_MODEL: str = "discover-final-BPS-model"
19 |     EVALUATION: str = "evaluate-final-BPS-model"
20 | 
21 |     def __init__(self):
22 |         self.runtime_start = dict()
23 |         self.runtime_stop = dict()
24 |         self.runtimes = dict()
25 | 
26 |     def start(self, stage_name: str):
27 |         self.runtime_start[stage_name] = timeit.default_timer()
28 | 
29 |     def stop(self, stage_name: str):
30 |         self.runtime_stop[stage_name] = timeit.default_timer()
31 |         self.runtimes[stage_name] = self.runtime_stop[stage_name] - self.runtime_start[stage_name]
32 | 
33 |     def to_json(self) -> str:
34 |         return json.dumps(self.runtimes)
35 | 


--------------------------------------------------------------------------------
/src/simod/settings/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "common_settings",
3 |     "control_flow_settings",
4 |     "extraneous_delays_settings",
5 |     "simod_settings",
6 |     "preprocessing_settings",
7 |     "resource_model_settings",
8 | ]
9 | 


--------------------------------------------------------------------------------
/src/simod/settings/extraneous_delays_settings.py:
--------------------------------------------------------------------------------
  1 | from extraneous_activity_delays.config import (
  2 |     OptimizationMetric as ExtraneousDelaysOptimizationMetric,
  3 |     DiscoveryMethod as ExtraneousDelaysDiscoveryMethod,
  4 | )
  5 | from pydantic import BaseModel
  6 | 
  7 | from simod.settings.common_settings import Metric
  8 | 
  9 | 
 10 | class ExtraneousDelaysSettings(BaseModel):
 11 |     """
 12 |     Configuration settings for extraneous delay optimization.
 13 | 
 14 |     This class defines parameters for discovering and optimizing extraneous
 15 |     delays in process simulations, including optimization metrics, discovery
 16 |     methods, and iteration settings. In each iteration of the optimization process, the
 17 |     parameters are sampled from these values or ranges.
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     optimization_metric : :class:`ExtraneousDelaysOptimizationMetric`
 22 |         The metric used to evaluate process model quality at each iteration of the optimization process (i.e.,
 23 |         loss function).
 24 |     num_iterations : int
 25 |         The number of optimization iterations to perform.
 26 |     num_evaluations_per_iteration : int
 27 |         The number of replications for the evaluations of each iteration.
 28 |     discovery_method : :class:`ExtraneousDelaysDiscoveryMethod`
 29 |         The method used to discover extraneous delays.
 30 |     """
 31 | 
 32 |     optimization_metric: ExtraneousDelaysOptimizationMetric = ExtraneousDelaysOptimizationMetric.RELATIVE_EMD
 33 |     discovery_method: ExtraneousDelaysDiscoveryMethod = ExtraneousDelaysDiscoveryMethod.COMPLEX
 34 |     num_iterations: int = 1
 35 |     num_evaluations_per_iteration: int = 3
 36 | 
 37 |     @staticmethod
 38 |     def from_dict(config: dict) -> "ExtraneousDelaysSettings":
 39 |         """
 40 |         Instantiates the extraneous delays model configuration from a dictionary.
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         config : dict
 45 |             Dictionary with the configuration values for the extraneous delays model parameters.
 46 | 
 47 |         Returns
 48 |         -------
 49 |         :class:`ExtraneousDelaysSettings`
 50 |             Instance of the extraneous delays model configuration for the specified dictionary values.
 51 |         """
 52 |         optimization_metric = ExtraneousDelaysSettings._match_metric(
 53 |             config.get("optimization_metric", "relative_event_distribution")
 54 |         )
 55 |         discovery_method = ExtraneousDelaysSettings._match_method(config.get("discovery_method", "eclipse-aware"))
 56 |         num_iterations = config.get("num_iterations", 1)
 57 |         num_evaluations_per_iteration = config.get("num_evaluations_per_iteration", 3)
 58 | 
 59 |         return ExtraneousDelaysSettings(
 60 |             optimization_metric=optimization_metric,
 61 |             discovery_method=discovery_method,
 62 |             num_iterations=num_iterations,
 63 |             num_evaluations_per_iteration=num_evaluations_per_iteration,
 64 |         )
 65 | 
 66 |     def to_dict(self) -> dict:
 67 |         """
 68 |         Translate the extraneous delays model configuration stored in this instance into a dictionary.
 69 | 
 70 |         Returns
 71 |         -------
 72 |         dict
 73 |             Python dictionary storing this configuration.
 74 |         """
 75 |         return {
 76 |             "optimization_metric": str(self.optimization_metric.name),
 77 |             "discovery_method": str(self.discovery_method.name),
 78 |             "num_iterations": self.num_iterations,
 79 |             "num_evaluations_per_iteration": self.num_evaluations_per_iteration,
 80 |         }
 81 | 
 82 |     @staticmethod
 83 |     def _match_metric(metric: str) -> ExtraneousDelaysOptimizationMetric:
 84 |         metric = Metric.from_str(metric)
 85 |         if metric == Metric.ABSOLUTE_EMD:
 86 |             return ExtraneousDelaysOptimizationMetric.ABSOLUTE_EMD
 87 |         elif metric == Metric.CYCLE_TIME_EMD:
 88 |             return ExtraneousDelaysOptimizationMetric.CYCLE_TIME
 89 |         elif metric == Metric.CIRCADIAN_EMD:
 90 |             return ExtraneousDelaysOptimizationMetric.CIRCADIAN_EMD
 91 |         elif metric == Metric.RELATIVE_EMD:
 92 |             return ExtraneousDelaysOptimizationMetric.RELATIVE_EMD
 93 |         else:
 94 |             raise ValueError(f"Unknown extraneous delays optimization metric {metric}")
 95 | 
 96 |     @staticmethod
 97 |     def _match_method(method: str) -> ExtraneousDelaysDiscoveryMethod:
 98 |         if method.lower() in ["naive", "naiv", "naiiv"]:
 99 |             return ExtraneousDelaysDiscoveryMethod.NAIVE
100 |         elif method.lower() in ["complex", "eclipse-aware", "eclipseaware", "eclipse aware"]:
101 |             return ExtraneousDelaysDiscoveryMethod.COMPLEX
102 |         else:
103 |             raise ValueError(f"Unknown extraneous delays discovery method {method}")
104 | 


--------------------------------------------------------------------------------
/src/simod/settings/preprocessing_settings.py:
--------------------------------------------------------------------------------
 1 | from pix_framework.enhancement.start_time_estimator.config import ConcurrencyThresholds
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class PreprocessingSettings(BaseModel):
 6 |     """
 7 |     Configuration for event log preprocessing.
 8 | 
 9 |     This class defines parameters used to preprocess event logs before
10 |     SIMOD main pipeline, including concurrency threshold settings
11 |     and multitasking options.
12 | 
13 |     Attributes
14 |     ----------
15 |     multitasking : bool
16 |         Whether to preprocess the event log to handle resources working in more than one activity at a time.
17 |     enable_time_concurrency_threshold : float
18 |         Threshold for determining concurrent events (for computing enabled) time based on the ratio of overlapping
19 |         w.r.t. their occurrences. Ranges from 0 to 1 (0.3 means that two activities will be considered concurrent
20 |         when their execution overlaps in 30% or more of the cases).
21 |     concurrency_thresholds : :class:`ConcurrencyThresholds`
22 |         Thresholds for the computation of the start times (if missing) based on the Heuristics miner algorithm,
23 |         including direct-follows (df), length-2-loops (l2l), and length-1-loops (l1l).
24 |     """
25 | 
26 |     multitasking: bool = False
27 |     enable_time_concurrency_threshold: float = 0.5
28 |     concurrency_thresholds: ConcurrencyThresholds = ConcurrencyThresholds(df=0.75, l2l=0.9, l1l=0.9)
29 | 
30 |     @staticmethod
31 |     def from_dict(config: dict) -> "PreprocessingSettings":
32 |         """
33 |         Instantiates SIMOD preprocessing configuration from a dictionary.
34 | 
35 |         Parameters
36 |         ----------
37 |         config : dict
38 |             Dictionary with the configuration values for the preprocessing parameters.
39 | 
40 |         Returns
41 |         -------
42 |         :class:`PreprocessingSettings`
43 |             Instance of SIMOD preprocessing configuration for the specified dictionary values.
44 |         """
45 |         return PreprocessingSettings(
46 |             multitasking=config.get("multitasking", False),
47 |             enable_time_concurrency_threshold=config.get("enable_time_concurrency_threshold", 0.5),
48 |             concurrency_thresholds=ConcurrencyThresholds(
49 |                 df=config.get("concurrency_df", 0.9),
50 |                 l2l=config.get("concurrency_l2l", 0.9),
51 |                 l1l=config.get("concurrency_l1l", 0.9),
52 |             ),
53 |         )
54 | 
55 |     def to_dict(self) -> dict:
56 |         """
57 |         Translate the preprocessing configuration stored in this instance into a dictionary.
58 | 
59 |         Returns
60 |         -------
61 |         dict
62 |             Python dictionary storing this configuration.
63 |         """
64 |         return {
65 |             "multitasking": self.multitasking,
66 |             "enable_time_concurrency_threshold": self.enable_time_concurrency_threshold,
67 |             "concurrency_df": self.concurrency_thresholds.df,
68 |             "concurrency_l2l": self.concurrency_thresholds.l2l,
69 |             "concurrency_l1l": self.concurrency_thresholds.l1l,
70 |         }
71 | 


--------------------------------------------------------------------------------
/src/simod/simulation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/simulation/__init__.py


--------------------------------------------------------------------------------
/src/simod/simulation/parameters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/simulation/parameters/__init__.py


--------------------------------------------------------------------------------
/src/simod/utilities.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import platform
  4 | import subprocess
  5 | import time
  6 | import traceback
  7 | from builtins import float
  8 | from pathlib import Path
  9 | from typing import List, Tuple, Union
 10 | 
 11 | from hyperopt import STATUS_FAIL, STATUS_OK
 12 | 
 13 | 
 14 | def get_project_dir() -> Path:
 15 |     return Path(os.path.dirname(__file__)).parent.parent
 16 | 
 17 | 
 18 | def is_windows() -> bool:
 19 |     return platform.system().lower() == "windows"
 20 | 
 21 | 
 22 | def execute_external_command(args):
 23 |     if is_windows():
 24 |         subprocess.call(" ".join(args))
 25 |     else:
 26 |         subprocess.call(args)
 27 | 
 28 | 
 29 | def hyperopt_step(status: str, fn, *args) -> Tuple[str, object]:
 30 |     """Function executes the provided function with arguments in hyperopt safe way."""
 31 |     if status == STATUS_OK:
 32 |         try:
 33 |             return STATUS_OK, fn(*args)
 34 |         except Exception as error:
 35 |             print(error)
 36 |             traceback.print_exc()
 37 |             return STATUS_FAIL, None
 38 |     else:
 39 |         return status, None
 40 | 
 41 | 
 42 | def nearest_divisor_for_granularity(granularity: int) -> int:
 43 |     closest = 1440
 44 |     closest_diff = abs(granularity - closest)
 45 |     for i in range(1, int(math.sqrt(1440)) + 1):
 46 |         if 1440 % i == 0:
 47 |             divisor1 = i
 48 |             divisor2 = 1440 // i
 49 |             for divisor in [divisor1, divisor2]:
 50 |                 if divisor <= granularity:
 51 |                     diff = granularity - divisor
 52 |                     if diff < closest_diff:
 53 |                         closest = divisor
 54 |                         closest_diff = diff
 55 |     return closest
 56 | 
 57 | 
 58 | def parse_single_value_or_interval(value: Union[float, int, List[float]]) -> Union[float, Tuple[float, float]]:
 59 |     if isinstance(value, float):
 60 |         return value
 61 |     elif isinstance(value, int):
 62 |         return float(value)
 63 |     else:
 64 |         return value[0], value[1]
 65 | 
 66 | 
 67 | def get_process_name_from_log_path(log_path: Path) -> str:
 68 |     # Get name of the file (last component)
 69 |     name = log_path.name
 70 |     # Remove each of the suffixes, if any
 71 |     for suffix in reversed(log_path.suffixes):
 72 |         name = name.removesuffix(suffix)
 73 |     # Return remaining name
 74 |     return name
 75 | 
 76 | 
 77 | def get_process_model_path(base_dir: Path, process_name: str) -> Path:
 78 |     return base_dir / f"{process_name}.bpmn"
 79 | 
 80 | 
 81 | def get_simulation_parameters_path(base_dir: Path, process_name: str) -> Path:
 82 |     return base_dir / f"{process_name}.json"
 83 | 
 84 | 
 85 | def measure_runtime(output_file: str = "runtime.txt"):
 86 |     """
 87 |     Decorator for measuring runtime of a function and writing it to a file.
 88 |     :param output_file: Path to the output file relative to the project root.
 89 |     """
 90 | 
 91 |     def decorator(func: callable):
 92 |         def wrapper(*args, **kwargs):
 93 |             start = time.time()
 94 |             result = func(*args, **kwargs)
 95 |             end = time.time() - start
 96 |             with open(output_file, "a") as f:
 97 |                 module_name = func.__module__.split(".")[-1]
 98 |                 func_name = func.__name__
 99 |                 f.write(f"{module_name}.{func_name}: {end} s\n")
100 |             return result
101 | 
102 |         return wrapper
103 | 
104 |     return decorator
105 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/__init__.py


--------------------------------------------------------------------------------
/tests/assets/LoanApp_simplified.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/LoanApp_simplified.csv.gz


--------------------------------------------------------------------------------
/tests/assets/LoanApp_simplified_2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/LoanApp_simplified_2.csv.gz


--------------------------------------------------------------------------------
/tests/assets/bpic15/BPIC15_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/bpic15/BPIC15_1.csv.gz


--------------------------------------------------------------------------------
/tests/assets/bpic15/bpic15_1_with_model_v4.yml:
--------------------------------------------------------------------------------
 1 | version: 4
 2 | common:
 3 |   train_log_path: BPIC15_1.csv.gz
 4 |   process_model_path: BPIC15_1.bpmn
 5 |   num_final_evaluations: 1  # Number of evaluations of the discovered BPS model.
 6 |   evaluation_metrics: # Metrics to evaluate the discovered BPS model with.
 7 |     - 3_gram_distance
 8 |     - 2_gram_distance
 9 |     - absolute_event_distribution
10 |     - relative_event_distribution
11 |     - circadian_event_distribution
12 |     - arrival_event_distribution
13 |     - cycle_time_distribution
14 |   clean_intermediate_files: false
15 |   log_ids:
16 |     case: case:concept:name
17 |     activity: concept:name
18 |     resource: org:resource
19 |     start_time: start_timestamp
20 |     end_time: time:timestamp
21 |     enabled_time: enabled_time
22 | preprocessing:
23 |   multitasking: false  # Reassign activity durations when happening in multitasking.
24 |   enable_time_concurrency_threshold: 0.5  # Concurrency threshold for the enabled time computation.
25 |   concurrency_df: 0.75  # Concurrency thresholds for the start time (and enabled time) estimations when
26 |   concurrency_l2l: 0.9  # the start time is missing in the train event log. Using the Heuristics Miner
27 |   concurrency_l1l: 0.9  # concurrency oracle.
28 | control_flow:
29 |   optimization_metric: n_gram_distance
30 |   num_iterations: 1  # Number of iterations to run the hyper-optimization process for control-flow discovery
31 |   num_evaluations_per_iteration: 5  # Number of times to evaluate each iteration (using the mean of all of them)
32 |   gateway_probabilities: # Methods to discover the probabilities of each gateway
33 |     - equiprobable
34 |     - discovery
35 |   discovery_algorithm: sm1  # Process model discovery algorithm: sm1 (Split Miner v1) or sm2 (Split Miner v2)
36 |   epsilon:
37 |     - 0.0
38 |     - 1.0
39 |   eta:
40 |     - 0.0
41 |     - 1.0
42 |   replace_or_joins:
43 |     - true
44 |     - false
45 |   prioritize_parallelism:
46 |     - true
47 |     - false
48 | resource_model:
49 |   optimization_metric: circadian_event_distribution
50 |   num_iterations: 1  # Number of iterations to run the hyper-optimization process for control-flow discovery
51 |   num_evaluations_per_iteration: 5  # Number of times to evaluate each iteration (using the mean of all of them)
52 |   resource_profiles:
53 |     discovery_type: differentiated  # Resource discovery type ('undifferentiated', 'pool', or 'differentiated')
54 |     granularity:
55 |       - 15
56 |       - 60
57 |     confidence:
58 |       - 0.5
59 |       - 0.85
60 |     support:
61 |       - 0.01
62 |       - 0.3
63 |     participation: 0.4
64 | extraneous_activity_delays:
65 |   optimization_metric: relative_event_distribution
66 |   num_iterations: 1  # Number of iterations of the optimization process (if 1, direct discovery without optimization)
67 |   num_evaluations_per_iteration: 3  # Number of times to evaluate each iteration (using the mean of all of them)
68 | 


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_1.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_2.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_3.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_4.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_4.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_5.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_6.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_6.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_7.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_7.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/or_8.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_8.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_1.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_2.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_3.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_5.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_6.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_6.csv.gz


--------------------------------------------------------------------------------
/tests/assets/branch_rules/xor_7.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_7.csv.gz


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_basic.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   test_log_path: LoanApp_simplified.csv.gz
 5 |   num_final_evaluations: 1
 6 |   discover_data_attributes: true
 7 |   evaluation_metrics:
 8 |     - absolute_hourly_emd
 9 |   log_ids:
10 |     case: case:concept:name
11 |     activity: concept:name
12 |     resource: org:resource
13 |     start_time: start_timestamp
14 |     end_time: time:timestamp
15 |     enabled_time: enabled_time
16 | preprocessing:
17 |   multitasking: false
18 | control_flow:
19 |   optimization_metric: n_gram_distance
20 |   num_iterations: 3
21 |   num_evaluations_per_iteration: 3
22 |   discovery_algorithm: sm1
23 |   epsilon:
24 |     - 0.0
25 |     - 1.0
26 |   eta:
27 |     - 0.0
28 |     - 1.0
29 |   replace_or_joins:
30 |     - true
31 |     - false
32 |   prioritize_parallelism:
33 |     - true
34 |     - false
35 |   gateway_probabilities:
36 |     - discovery
37 |     - equiprobable
38 | resource_model:
39 |   optimization_metric: absolute_hourly_emd
40 |   num_iterations: 3
41 |   num_evaluations_per_iteration: 3
42 |   resource_profiles:
43 |     discovery_type: pool
44 |     granularity: 60
45 |     confidence:
46 |       - 0.5
47 |       - 0.85
48 |     support:
49 |       - 0.01
50 |       - 0.3
51 |     participation: 0.4
52 | 


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_with_extraneous.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   discover_data_attributes: true
 5 |   log_ids:
 6 |     case: case:concept:name
 7 |     activity: concept:name
 8 |     resource: org:resource
 9 |     start_time: start_timestamp
10 |     end_time: time:timestamp
11 |     enabled_time: enabled_time
12 | preprocessing:
13 |   multitasking: false
14 | control_flow:
15 |   optimization_metric: n_gram_distance
16 |   num_iterations: 3
17 |   num_evaluations_per_iteration: 3
18 |   discovery_algorithm: sm1
19 |   epsilon:
20 |     - 0.0
21 |     - 1.0
22 |   eta:
23 |     - 0.0
24 |     - 1.0
25 |   replace_or_joins:
26 |     - true
27 |     - false
28 |   prioritize_parallelism:
29 |     - true
30 |     - false
31 |   gateway_probabilities:
32 |     - discovery
33 |     - equiprobable
34 | resource_model:
35 |   optimization_metric: absolute_hourly_emd
36 |   num_iterations: 3
37 |   num_evaluations_per_iteration: 3
38 |   resource_profiles:
39 |     discovery_type: pool
40 |     granularity: 60
41 |     confidence:
42 |       - 0.5
43 |       - 0.85
44 |     support:
45 |       - 0.01
46 |       - 0.3
47 |     participation: 0.4
48 | extraneous_activity_delays:
49 |   optimization_metric: relative_emd
50 |   num_iterations: 1 # Direct discovery, no optimization
51 | 


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_with_model.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   process_model_path: LoanApp_simplified.bpmn
 5 |   discover_data_attributes: true
 6 |   perform_final_evaluation: true
 7 |   num_final_evaluations: 1
 8 |   evaluation_metrics:
 9 |     - absolute_hourly_emd
10 |   log_ids:
11 |     case: case:concept:name
12 |     activity: concept:name
13 |     resource: org:resource
14 |     start_time: start_timestamp
15 |     end_time: time:timestamp
16 |     enabled_time: enabled_time
17 | preprocessing:
18 |   multitasking: false
19 | control_flow:
20 |   optimization_metric: n_gram_distance
21 |   num_iterations: 3
22 |   num_evaluations_per_iteration: 3
23 |   gateway_probabilities:
24 |     - discovery
25 |     - equiprobable
26 | resource_model:
27 |   optimization_metric: absolute_hourly_emd
28 |   num_iterations: 3
29 |   num_evaluations_per_iteration: 3
30 |   resource_profiles:
31 |     discovery_type: pool
32 |     granularity: 60
33 |     confidence:
34 |       - 0.5
35 |       - 0.85
36 |     support:
37 |       - 0.01
38 |       - 0.3
39 |     participation: 0.4
40 | 


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_with_model_and_batching.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   test_log_path: LoanApp_simplified.csv.gz
 5 |   process_model_path: LoanApp_simplified.bpmn
 6 |   num_final_evaluations: 0  # On purpose so it is corrected to 10
 7 |   clean_intermediate_files: false
 8 |   discover_data_attributes: true
 9 |   evaluation_metrics:
10 |     - absolute_hourly_emd
11 |   log_ids:
12 |     case: case:concept:name
13 |     activity: concept:name
14 |     resource: org:resource
15 |     start_time: start_timestamp
16 |     end_time: time:timestamp
17 |     enabled_time: enabled_time
18 | preprocessing:
19 |   multitasking: false
20 | control_flow:
21 |   optimization_metric: three_gram_distance
22 |   num_iterations: 3
23 |   num_evaluations_per_iteration: 3
24 |   gateway_probabilities:
25 |     - discovery
26 |     - equiprobable
27 | resource_model:
28 |   optimization_metric: absolute_hourly_emd
29 |   num_iterations: 5
30 |   num_evaluations_per_iteration: 3
31 |   discover_batching_rules: true
32 |   resource_profiles:
33 |     discovery_type: pool
34 |     granularity: 60
35 |     confidence:
36 |       - 0.5
37 |       - 0.85
38 |     support:
39 |       - 0.01
40 |       - 0.3
41 |     participation: 0.4
42 | 


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_with_model_and_extraneous.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   process_model_path: LoanApp_simplified.bpmn
 5 |   discover_data_attributes: true
 6 |   num_final_evaluations: 1
 7 |   evaluation_metrics:
 8 |     - absolute_hourly_emd
 9 |   log_ids:
10 |     case: case:concept:name
11 |     activity: concept:name
12 |     resource: org:resource
13 |     start_time: start_timestamp
14 |     end_time: time:timestamp
15 |     enabled_time: enabled_time
16 | preprocessing:
17 |   multitasking: false
18 | control_flow:
19 |   optimization_metric: n_gram_distance
20 |   num_iterations: 3
21 |   num_evaluations_per_iteration: 3
22 |   gateway_probabilities:
23 |     - discovery
24 |     - equiprobable
25 | resource_model:
26 |   optimization_metric: absolute_hourly_emd
27 |   num_iterations: 3
28 |   num_evaluations_per_iteration: 3
29 |   resource_profiles:
30 |     discovery_type: pool
31 |     granularity: 60
32 |     confidence:
33 |       - 0.5
34 |       - 0.85
35 |     support:
36 |       - 0.01
37 |       - 0.3
38 |     participation: 0.4
39 | extraneous_activity_delays:
40 |   optimization_metric: relative_emd
41 |   discovery_method: naive
42 |   num_iterations: 1 # Direct discovery, no optimization
43 | 


--------------------------------------------------------------------------------
/tests/assets/configuration_simod_with_model_and_prioritization.yml:
--------------------------------------------------------------------------------
 1 | version: 5
 2 | common:
 3 |   train_log_path: LoanApp_simplified.csv.gz
 4 |   test_log_path: LoanApp_simplified.csv.gz
 5 |   process_model_path: LoanApp_simplified.bpmn
 6 |   num_final_evaluations: 1
 7 |   clean_intermediate_files: false
 8 |   evaluation_metrics:
 9 |     - absolute_hourly_emd
10 |   log_ids:
11 |     case: case:concept:name
12 |     activity: concept:name
13 |     resource: org:resource
14 |     start_time: start_timestamp
15 |     end_time: time:timestamp
16 |     enabled_time: enabled_time
17 | preprocessing:
18 |   multitasking: false
19 | control_flow:
20 |   optimization_metric: two_gram_distance
21 |   num_iterations: 3
22 |   num_evaluations_per_iteration: 3
23 |   gateway_probabilities:
24 |     - discovery
25 |     - equiprobable
26 | resource_model:
27 |   optimization_metric: absolute_hourly_emd
28 |   num_iterations: 5
29 |   num_evaluations_per_iteration: 3
30 |   discover_prioritization_rules: True
31 |   resource_profiles:
32 |     discovery_type: pool
33 |     granularity: 60
34 |     confidence:
35 |       - 0.5
36 |       - 0.85
37 |     support:
38 |       - 0.01
39 |       - 0.3
40 |     participation: 0.4
41 | 


--------------------------------------------------------------------------------
/tests/assets/data_attributes/case_attributes.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/case_attributes.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_1.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_15.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_15.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_3.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_5.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_7.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_7.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/event_attribute_9.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_9.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_1.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_15.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_15.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_3.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_5.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_7.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_7.csv.gz


--------------------------------------------------------------------------------
/tests/assets/data_attributes/global_attribute_9.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_9.csv.gz


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from click.testing import CliRunner
 5 | 
 6 | 
 7 | @pytest.fixture(scope='function')
 8 | def runner(request):
 9 |     return CliRunner()
10 | 
11 | 
12 | @pytest.fixture(scope='module')
13 | def entry_point() -> Path:
14 |     if Path.cwd().name == 'tests':
15 |         return Path('assets')
16 |     elif 'test_' in Path.cwd().name:
17 |         return Path('../assets')
18 |     else:
19 |         return Path('tests/assets')
20 | 


--------------------------------------------------------------------------------
/tests/test_batching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_batching/__init__.py


--------------------------------------------------------------------------------
/tests/test_batching/test_batching_discovery.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from pix_framework.io.event_log import EventLogIDs, read_csv_log
 4 | from simod.batching.discovery import discover_batching_rules
 5 | from simod.batching.types import BatchingFiringRule
 6 | 
 7 | assets_dir = Path(__file__).parent / "assets"
 8 | 
 9 | 
10 | def test_discover_batching_rules():
11 |     log_path = assets_dir / "event_log_5.csv"
12 |     log_ids = EventLogIDs(
13 |         case="case_id",
14 |         activity="Activity",
15 |         start_time="start_time",
16 |         end_time="end_time",
17 |         resource="Resource",
18 |         enabled_time="enabled_time",
19 |         batch_id="batch_instance_id",
20 |         batch_type="batch_instance_type",
21 |     )
22 |     log = read_csv_log(log_path, log_ids)
23 | 
24 |     rules = discover_batching_rules(log, log_ids)
25 | 
26 |     assert len(rules) > 0
27 | 
28 | 
29 | def test_discover_batching_rules_loanapp():
30 |     log_path = assets_dir / "LoanApp_batch_sim_log.csv"
31 |     log_ids = EventLogIDs(
32 |         case="case_id",
33 |         activity="activity",
34 |         start_time="start_time",
35 |         end_time="end_time",
36 |         resource="resource",
37 |         enabled_time="enable_time",
38 |         batch_id="batch_instance_id",
39 |         batch_type="batch_instance_type",
40 |     )
41 |     expected_rules = BatchingFiringRule(
42 |         attribute="batch_size",
43 |         comparison="=",
44 |         value="3",
45 |     )
46 |     log = read_csv_log(log_path, log_ids)
47 | 
48 |     rules = discover_batching_rules(log, log_ids)
49 | 
50 |     assert len(rules) == 1
51 |     assert rules[0].firing_rules[0][0] == expected_rules
52 | 


--------------------------------------------------------------------------------
/tests/test_batching/test_types.py:
--------------------------------------------------------------------------------
 1 | from simod.batching.types import BatchingRule
 2 | 
 3 | batching_discovery_result = [
 4 |     {
 5 |         "activity": "B",
 6 |         "resources": ["Alice"],
 7 |         "type": "Sequential",
 8 |         "batch_frequency": 0.96,
 9 |         "size_distribution": {"3": 48, "1": 2},
10 |         "duration_distribution": {"3": 0.5},
11 |         "firing_rules": {
12 |             "confidence": 1.0,
13 |             "support": 1.0,
14 |             "rules": [
15 |                 [
16 |                     {"attribute": "batch_size", "comparison": ">", "value": "3"},
17 |                     {"attribute": "batch_size", "comparison": "<", "value": "5"},
18 |                 ],
19 |                 [
20 |                     {"attribute": "batch_size", "comparison": ">", "value": "10"},
21 |                 ],
22 |             ],
23 |         },
24 |     },
25 |     {
26 |         "activity": "C",
27 |         "resources": ["Bob"],
28 |         "type": "Sequential",
29 |         "batch_frequency": 0.96,
30 |         "size_distribution": {"3": 48, "1": 2},
31 |         "duration_distribution": {"3": 0.5},
32 |         "firing_rules": {
33 |             "confidence": 1.0,
34 |             "support": 1.0,
35 |             "rules": [[{"attribute": "batch_size", "comparison": ">", "value": "3"}]],
36 |         },
37 |     },
38 | ]
39 | 
40 | 
41 | def test_serialization_deserialization():
42 |     rules = [BatchingRule.from_dict(rule) for rule in batching_discovery_result]
43 | 
44 |     assert len(rules) == len(batching_discovery_result)
45 |     for i in range(len(rules)):
46 |         assert rules[i].to_dict() == batching_discovery_result[i]
47 | 
48 | 
49 | def test_prosimos_serialization():
50 |     rules = [BatchingRule.from_dict(rule) for rule in batching_discovery_result]
51 |     activities_ids_by_name = {"B": "2", "C": "3"}
52 |     activities_names_by_id = {"2": "B", "3": "C"}
53 | 
54 |     rules_prosimos = [rule.to_prosimos(activities_ids_by_name) for rule in rules]
55 |     rules_from_prosimos = [BatchingRule.from_prosimos(rule, activities_names_by_id) for rule in rules_prosimos]
56 | 
57 |     # Prosimos doesn't use resources and batch_frequency attributes, so we set them
58 |     # to None to compare. Also the confidence and support of the rules.
59 |     for rule in rules:
60 |         rule.resources = None
61 |         rule.batch_frequency = None
62 |         rule.firing_rules.confidence = -1.0
63 |         rule.firing_rules.support = -1.0
64 | 
65 |     assert rules == rules_from_prosimos
66 | 


--------------------------------------------------------------------------------
/tests/test_bpic15.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from simod.event_log.event_log import EventLog
 3 | from simod.settings.simod_settings import SimodSettings
 4 | from simod.simod import Simod
 5 | 
 6 | 
 7 | @pytest.mark.system
 8 | def test_bpic15(entry_point):
 9 |     settings = SimodSettings.from_path(entry_point / "bpic15/bpic15_1_with_model_v4.yml")
10 | 
11 |     event_log = EventLog.from_path(
12 |         train_log_path=settings.common.train_log_path,
13 |         log_ids=settings.common.log_ids,
14 |         preprocessing_settings=settings.preprocessing,
15 |     )
16 |     optimizer = Simod(settings, event_log=event_log)
17 |     optimizer.run()
18 | 
19 |     assert optimizer.final_bps_model.resource_model is not None
20 | 


--------------------------------------------------------------------------------
/tests/test_branch_rules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_branch_rules/__init__.py


--------------------------------------------------------------------------------
/tests/test_branch_rules/test_discovery.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pprint
 4 | 
 5 | import pytest
 6 | import pandas as pd
 7 | from pathlib import Path
 8 | from pix_framework.io.event_log import EventLogIDs
 9 | from simod.branch_rules.discovery import discover_branch_rules
10 | from pix_framework.io.bpm_graph import BPMNGraph
11 | 
12 | LOG_IDS = EventLogIDs(case="case_id",
13 |                       activity="activity",
14 |                       start_time="start_time",
15 |                       end_time="end_time",
16 |                       resource="resource"
17 |                       )
18 | 
19 | ASSET_DIR = "branch_rules"
20 | XOR_BPMN = "xor.bpmn"
21 | OR_BPMN = "or.bpmn"
22 | XOR_LOG_PATHS = "xor_*.csv.gz"
23 | OR_LOG_PATHS = "or_8.csv.gz"
24 | 
25 | # total_branch_rules -> How many branches should get rules
26 | # rules_per_branch -> how many single rules should be on that branch (exact number or range)
27 | xor_expected_conditions = {
28 |     "xor_1.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1},  # Categorical equal probs
29 |     "xor_2.csv.gz": {"total_branch_rules": 3, "rules_per_branch": 1},  # Categorical unbalanced
30 |     "xor_3.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1},  # Categorical with different probs
31 |     "xor_5.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)},  # Numerical intervals
32 |     "xor_6.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)},  # Conditions
33 |     "xor_7.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)},  # Complex AND and OR conditions
34 | }
35 | 
36 | or_expected_conditions = {
37 |     "or_1.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1},  # Categorical equal probs 1 flow only
38 |     "or_2.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)},  # Categorical equal probs 2 flow2
39 |     "or_3.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1},  # Categorical equal probs all flows (warning)
40 |     "or_4.csv.gz": {"total_branch_rules": 3, "rules_per_branch": 1},  # Categorical unbalanced 1 flow only (warning)
41 |     "or_5.csv.gz": {"total_branch_rules": 6, "rules_per_branch": 1},  # Categorical unbalanced 2 flows (warning)
42 |     "or_6.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)},  # Categorical unbalanced all flows (warning)
43 |     "or_7.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)},  # Numerical with AND operator
44 |     "or_8.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1},  # Numerical with full range
45 | }
46 | 
47 | 
48 | @pytest.fixture(scope="module")
49 | def xor_log_files(entry_point):
50 |     """Fixture to generate full paths for XOR branch condition log files."""
51 |     xor_log_pattern = os.path.join(entry_point, ASSET_DIR, XOR_LOG_PATHS)
52 |     files = glob.glob(xor_log_pattern)
53 |     return [(file, xor_expected_conditions[os.path.basename(file)]) for file in files]
54 | 
55 | 
56 | @pytest.fixture(scope="module")
57 | def or_log_files(entry_point):
58 |     or_log_pattern = os.path.join(entry_point, ASSET_DIR, OR_LOG_PATHS)
59 |     files = glob.glob(or_log_pattern)
60 |     return [(file, or_expected_conditions[os.path.basename(file)]) for file in files]
61 | 
62 | 
63 | def assert_branch_rules(bpmn_graph, log, log_ids, expected_conditions):
64 |     branch_rules = discover_branch_rules(bpmn_graph, log, log_ids)
65 | 
66 |     assert len(branch_rules) == expected_conditions["total_branch_rules"], \
67 |         f"Expected {expected_conditions['total_branch_rules']} BranchRules, found {len(branch_rules)}"
68 | 
69 |     for branch_rule in branch_rules:
70 |         rule_count = len(branch_rule.rules)
71 | 
72 |         if isinstance(expected_conditions["rules_per_branch"], tuple):
73 |             min_rules, max_rules = expected_conditions["rules_per_branch"]
74 |             assert min_rules <= rule_count <= max_rules, \
75 |                 f"Expected between {min_rules} and {max_rules} rules, found {rule_count}"
76 |         else:
77 |             assert rule_count == expected_conditions["rules_per_branch"], \
78 |                 f"Expected {expected_conditions['rules_per_branch']} rules, found {rule_count}"
79 | 
80 | 
81 | def test_discover_xor_branch_rules(entry_point, xor_log_files):
82 |     bpmn_path = os.path.join(entry_point, ASSET_DIR, XOR_BPMN)
83 |     for log_path, expected_conditions in xor_log_files:
84 |         log = pd.read_csv(log_path, compression="gzip")
85 |         bpmn_graph = BPMNGraph.from_bpmn_path(Path(bpmn_path))
86 |         assert_branch_rules(bpmn_graph, log, LOG_IDS, expected_conditions)
87 | 
88 | 
89 | def test_discover_or_branch_rules(entry_point, or_log_files):
90 |     bpmn_path = os.path.join(entry_point, ASSET_DIR, OR_BPMN)
91 |     for log_path, expected_conditions in or_log_files:
92 |         log = pd.read_csv(log_path, compression="gzip")
93 |         bpmn_graph = BPMNGraph.from_bpmn_path(Path(bpmn_path))
94 |         assert_branch_rules(bpmn_graph, log, LOG_IDS, expected_conditions)
95 | 


--------------------------------------------------------------------------------
/tests/test_case_attributes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_case_attributes/__init__.py


--------------------------------------------------------------------------------
/tests/test_case_attributes/test_discovery.py:
--------------------------------------------------------------------------------
 1 | from pix_framework.io.event_log import EventLogIDs, read_csv_log
 2 | from simod.data_attributes.discovery import discover_data_attributes
 3 | 
 4 | 
 5 | def test_discover_case_attributes(entry_point):
 6 |     log_path = entry_point / "Insurance_Claims_train.csv"
 7 |     log_ids = EventLogIDs(
 8 |         case="case_id", activity="activity", start_time="start_time", end_time="end_time", resource="Resource"
 9 |     )
10 |     log = read_csv_log(log_path, log_ids)
11 | 
12 |     global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids)
13 | 
14 |     assert len(case_attributes) > 0
15 |     assert "extraneous_delay" in map(lambda x: x.name, case_attributes)
16 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from simod import cli
 4 | 
 5 | 
 6 | @pytest.mark.system
 7 | @pytest.mark.parametrize("path", ["configuration_simod_basic.yml"])
 8 | def test_optimize(entry_point, runner, path):
 9 |     config_path = entry_point / path
10 |     result = runner.invoke(cli.main, ["--configuration", config_path.absolute()])
11 |     assert not result.exception
12 |     assert result.exit_code == 0
13 | 


--------------------------------------------------------------------------------
/tests/test_control_flow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_control_flow/__init__.py


--------------------------------------------------------------------------------
/tests/test_data_attributes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_data_attributes/__init__.py


--------------------------------------------------------------------------------
/tests/test_data_attributes/test_discovery.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pytest
 4 | import pandas as pd
 5 | from pix_framework.io.event_log import EventLogIDs
 6 | from simod.data_attributes.discovery import discover_data_attributes\
 7 | 
 8 | LOG_IDS = EventLogIDs(case="case_id",
 9 |                       activity="activity",
10 |                       start_time="start_time",
11 |                       end_time="end_time",
12 |                       resource="resource"
13 |                       )
14 | 
15 | ASSET_DIR = "data_attributes"
16 | GLOBAL_ATTRIBUTE_LOG_PATHS = "global_attribute_*.csv.gz"
17 | CASE_ATTRIBUTE_LOG_PATHS = "case_attribute*.csv.gz"
18 | EVENT_ATTRIBUTE_LOG_PATHS = "event_attribute*.csv.gz"
19 | 
20 | 
21 | @pytest.fixture(scope="module")
22 | def global_log_files(entry_point):
23 |     log_pattern = os.path.join(entry_point, ASSET_DIR, GLOBAL_ATTRIBUTE_LOG_PATHS)
24 |     return glob.glob(log_pattern)
25 | 
26 | 
27 | @pytest.fixture(scope="module")
28 | def case_log_files(entry_point):
29 |     log_pattern = os.path.join(entry_point, ASSET_DIR, CASE_ATTRIBUTE_LOG_PATHS)
30 |     return glob.glob(log_pattern)
31 | 
32 | 
33 | @pytest.fixture(scope="module")
34 | def event_log_files(entry_point):
35 |     log_pattern = os.path.join(entry_point, ASSET_DIR, EVENT_ATTRIBUTE_LOG_PATHS)
36 |     return glob.glob(log_pattern)
37 | 
38 | 
39 | def assert_attributes(log, log_ids, expected_case_attrs, expected_event_attrs, expected_global_attrs, runs=5):
40 |     success_count = 0
41 | 
42 |     for i in range(runs):
43 |         global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids)
44 |         print(f"try {i}")
45 |         try:
46 |             assert len(global_attributes) == expected_global_attrs, \
47 |                 f"Expected {expected_global_attrs} global attributes, found {len(global_attributes)}"
48 |             assert len(case_attributes) == expected_case_attrs, \
49 |                 f"Expected {expected_case_attrs} case attributes, found {len(case_attributes)}"
50 |             assert len(event_attributes) == expected_event_attrs, \
51 |                 f"Expected {expected_event_attrs} event attributes, found {len(event_attributes)}"
52 |             success_count += 1
53 |         except AssertionError as e:
54 |             print(f"Assertion failed: {e}")
55 | 
56 |     if success_count < runs // 2:
57 |         raise AssertionError("Majority of runs failed")
58 | 
59 | 
60 | def test_discover_global_attributes(entry_point, global_log_files):
61 |     for log_path in global_log_files:
62 |         log = pd.read_csv(log_path, compression="gzip")
63 |         assert_attributes(log, LOG_IDS, expected_case_attrs=0, expected_event_attrs=16, expected_global_attrs=1)
64 | 
65 | 
66 | def test_discover_case_attributes(entry_point, case_log_files):
67 |     for log_path in case_log_files:
68 |         log = pd.read_csv(log_path, compression="gzip")
69 |         assert_attributes(log, LOG_IDS, expected_case_attrs=5, expected_event_attrs=0, expected_global_attrs=0)
70 | 
71 | 
72 | def test_discover_event_attributes(entry_point, event_log_files):
73 |     for log_path in event_log_files:
74 |         log = pd.read_csv(log_path, compression="gzip")
75 |         assert_attributes(log, LOG_IDS, expected_case_attrs=0, expected_event_attrs=1, expected_global_attrs=0)
76 | 
77 | 


--------------------------------------------------------------------------------
/tests/test_event_log/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_event_log/__init__.py


--------------------------------------------------------------------------------
/tests/test_event_log/test_event_log.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pix_framework.io.event_log import APROMORE_LOG_IDS, DEFAULT_XES_IDS
 3 | 
 4 | from simod.event_log.event_log import EventLog
 5 | 
 6 | test_cases = [
 7 |     {
 8 |         "log_name": "Simple_log_no_start_times.csv",
 9 |         "log_ids": APROMORE_LOG_IDS
10 |     },
11 |     {
12 |         "log_name": "LoanApp_simplified.csv.gz",
13 |         "log_ids": DEFAULT_XES_IDS,
14 |     },
15 | ]
16 | 
17 | 
18 | @pytest.mark.parametrize("test_data", test_cases, ids=[test_data["log_name"] for test_data in test_cases])
19 | def test_optimizer(test_data, entry_point):
20 |     path = (entry_point / test_data["log_name"]).absolute()
21 |     log_ids = test_data["log_ids"]
22 | 
23 |     event_log = EventLog.from_path(path, log_ids, need_test_partition=True)
24 | 
25 |     assert event_log.log_ids == log_ids
26 |     assert event_log.train_partition is not None
27 |     assert event_log.validation_partition is not None
28 |     assert event_log.test_partition is not None
29 |     assert len(event_log.train_partition) > len(event_log.validation_partition)
30 |     assert len(event_log.validation_partition) < len(event_log.test_partition)
31 | 
32 | 
33 | def test_wrong_log_extension(entry_point):
34 |     training_message = r"The specified training log has an unsupported extension.*Only 'csv' and 'csv.gz' supported."
35 |     test_message = r"The specified test log has an unsupported extension.*Only 'csv' and 'csv.gz' supported."
36 |     # Assert wrong training log
37 |     with pytest.raises(ValueError, match=training_message) as error:
38 |         EventLog.from_path(
39 |             train_log_path=entry_point / "Control_flow_optimization_test.bpmn",
40 |             log_ids=DEFAULT_XES_IDS,
41 |             test_log_path=None
42 |         )
43 |         assert error.type
44 |     with pytest.raises(ValueError, match=training_message) as error:
45 |         EventLog.from_path(
46 |             train_log_path=entry_point / "PurchasingExample.xes",
47 |             log_ids=DEFAULT_XES_IDS,
48 |             test_log_path=None
49 |         )
50 |     # Assert wrong test log
51 |     with pytest.raises(ValueError, match=test_message) as error:
52 |         EventLog.from_path(
53 |             train_log_path=entry_point / "Control_flow_optimization_test.csv",
54 |             log_ids=DEFAULT_XES_IDS,
55 |             test_log_path=entry_point / "PurchasingExample.xes",
56 |         )
57 |     with pytest.raises(ValueError, match=test_message) as error:
58 |         EventLog.from_path(
59 |             train_log_path=entry_point / "Control_flow_optimization_test.csv",
60 |             log_ids=DEFAULT_XES_IDS,
61 |             test_log_path=entry_point / "PurchasingExample.xes.gz",
62 |         )
63 | 


--------------------------------------------------------------------------------
/tests/test_event_log/test_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pix_framework.io.event_log import APROMORE_LOG_IDS, read_csv_log
 3 | from simod.event_log.preprocessor import Preprocessor
 4 | 
 5 | 
 6 | @pytest.mark.integration
 7 | @pytest.mark.parametrize("log_name", ["Simple_log_no_start_times.csv"])
 8 | def test_add_start_times(log_name, entry_point):
 9 |     log_ids = APROMORE_LOG_IDS
10 |     event_log = read_csv_log(entry_point / log_name, log_ids)
11 |     preprocessor = Preprocessor(event_log, log_ids)
12 |     log = preprocessor.run()
13 | 
14 |     assert log[log_ids.start_time].isna().sum() == 0
15 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pix_framework.io.event_log import DEFAULT_XES_IDS, read_csv_log
 3 | from simod.metrics import get_absolute_emd
 4 | 
 5 | test_cases = [
 6 |     {
 7 |         "name": "LoanApp_simplified",
 8 |         "original_log": {"log_name": "LoanApp_simplified.csv.gz", "log_ids": DEFAULT_XES_IDS},
 9 |         "simulated_log": {"log_name": "LoanApp_simplified_2.csv.gz", "log_ids": DEFAULT_XES_IDS},
10 |     }
11 | ]
12 | 
13 | 
14 | @pytest.mark.integration
15 | @pytest.mark.parametrize("test_data", test_cases, ids=[test_data["name"] for test_data in test_cases])
16 | def test_absolute_timestamp_emd(entry_point, test_data):
17 |     original_log_path = entry_point / test_data["original_log"]["log_name"]
18 |     simulated_log_path = entry_point / test_data["simulated_log"]["log_name"]
19 | 
20 |     original_log_ids = test_data["original_log"]["log_ids"]
21 |     simulated_log_ids = test_data["simulated_log"]["log_ids"]
22 | 
23 |     original_log = read_csv_log(original_log_path, original_log_ids)
24 |     simulated_log = read_csv_log(simulated_log_path, simulated_log_ids)
25 | 
26 |     # Test different logs
27 |     emd = get_absolute_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
28 |     assert emd > 0.0
29 |     # Test similar log
30 |     emd = get_absolute_emd(original_log, original_log_ids, original_log, simulated_log_ids)
31 |     assert emd == 0.0
32 | 


--------------------------------------------------------------------------------
/tests/test_prioritization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_prioritization/__init__.py


--------------------------------------------------------------------------------
/tests/test_prioritization/test_prioritization_discovery.py:
--------------------------------------------------------------------------------
 1 | from pix_framework.io.event_log import DEFAULT_XES_IDS, read_csv_log
 2 | from simod.data_attributes.discovery import discover_data_attributes
 3 | from simod.prioritization.discovery import (
 4 |     discover_prioritization_rules,
 5 | )
 6 | from simod.prioritization.types import PrioritizationRule
 7 | 
 8 | 
 9 | def test_prioritization_rules_serialization_deserialization(entry_point):
10 |     rules_dict = {
11 |         "prioritisation_rules": [
12 |             {
13 |                 "priority_level": 1,
14 |                 "rules": [
15 |                     [
16 |                         {"attribute": "loan_amount", "comparison": "in", "value": ["1000", "2000"]},
17 |                         {"attribute": "type", "comparison": "=", "value": "BUSINESS"},
18 |                     ],
19 |                     [{"attribute": "loan_amount", "comparison": "in", "value": ["2000", "inf"]}],
20 |                 ],
21 |             },
22 |             {"priority_level": 2, "rules": [[{"attribute": "loan_amount", "comparison": ">", "value": "500"}]]},
23 |         ]
24 |     }
25 | 
26 |     rules = list(map(PrioritizationRule.from_prosimos, rules_dict["prioritisation_rules"]))
27 |     rules_dict_2 = {"prioritisation_rules": list(map(lambda x: x.to_prosimos(), rules))}
28 | 
29 |     assert len(rules) == 2
30 |     assert rules_dict == rules_dict_2
31 | 
32 | 
33 | def test_discover_prioritization_rules(entry_point):
34 |     log_path = entry_point / "Simple_log_with_prioritization.csv"
35 |     log_ids = DEFAULT_XES_IDS
36 |     log = read_csv_log(log_path, log_ids)
37 | 
38 |     global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids)
39 | 
40 |     rules = discover_prioritization_rules(log, log_ids, case_attributes)
41 | 
42 |     assert len(rules) > 0
43 | 


--------------------------------------------------------------------------------
/tests/test_prioritization/test_prioritization_impact.py:
--------------------------------------------------------------------------------
 1 | config_yaml_A = """
 2 | version: 2
 3 | common:
 4 |   log_path: tests/assets/___.csv
 5 |   log_ids:
 6 |     case: case_id
 7 |     activity: Activity
 8 |     resource: Resource
 9 |     start_time: start_time
10 |     end_time: end_time
11 |   repetitions: 10
12 |   evaluation_metrics: 
13 |     - absolute_hourly_emd
14 |     - cycle_time_emd
15 |     - circadian_emd
16 | preprocessing:
17 |   multitasking: false
18 | control_flow:
19 |   optimization_metric: cycle_time_emd
20 |   max_evaluations: 10
21 |   mining_algorithm: sm1
22 |   epsilon:
23 |     - 0.0
24 |     - 1.0
25 |   eta:
26 |     - 0.0
27 |     - 1.0
28 |   gateway_probabilities:
29 |     - discovery
30 |   replace_or_joins:
31 |     - true
32 |     - false
33 |   prioritize_parallelism:
34 |     - true
35 |     - false
36 | resource_model:
37 |   optimization_metric: absolute_hourly_emd
38 |   max_evaluations: 10
39 |   discover_prioritization_rules: true
40 |   resource_profiles:
41 |     discovery_type: differentiated
42 |     granularity: 
43 |       - 15
44 |       - 60
45 |     confidence:
46 |       - 0.5
47 |       - 0.85
48 |     support:
49 |       - 0.01 
50 |       - 0.3
51 |     participation: 0.4
52 | extraneous_activity_delays:
53 |   optimization_metric: absolute_emd
54 |   num_iterations: 1
55 | """
56 | 
57 | # @pytest.mark.manual
58 | # def test_prioritization_discovery_impact(entry_point: Path):
59 | #     settings = SimodSettings.from_stream(config_yaml_A)
60 | #     settings.log_path = (entry_point / Path(settings.common.log_path.name)).absolute()
61 | #
62 | #     log = EventLog.from_path(
63 | #         path=settings.common.log_path,
64 | #         log_ids=settings.common.log_ids,
65 | #         process_name=settings.common.log_path.stem,
66 | #     )
67 | #
68 | #     simod = Simod(settings=settings, event_log=log)
69 | #     simod.run()
70 | 
71 | # TODO: find a log with prioritization _rules
72 | 


--------------------------------------------------------------------------------
/tests/test_resource_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_resource_model/__init__.py


--------------------------------------------------------------------------------
/tests/test_settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_settings/__init__.py


--------------------------------------------------------------------------------
/tests/test_settings/test_control_flow_settings.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pix_framework.discovery.gateway_probabilities import GatewayProbabilitiesDiscoveryMethod
  3 | 
  4 | from simod.settings.control_flow_settings import ControlFlowSettings, ProcessModelDiscoveryAlgorithm
  5 | 
  6 | settings_single_values_sm2 = {
  7 |     "num_iterations": 2,
  8 |     "num_evaluations_per_iteration": 3,
  9 |     "gateway_probabilities": "equiprobable",
 10 |     "mining_algorithm": "Split Miner 2",
 11 |     "epsilon": 0.45,
 12 |     "eta": 0.34,
 13 |     "replace_or_joins": True,
 14 |     "prioritize_parallelism": True,
 15 | }
 16 | 
 17 | settings_interval_values_sm2 = {
 18 |     "num_iterations": 10,
 19 |     "num_evaluations_per_iteration": 3,
 20 |     "gateway_probabilities": ["equiprobable", "discovery"],
 21 |     "mining_algorithm": "Split Miner 2",
 22 |     "epsilon": [0.12, 0.45],
 23 |     "eta": [0.34, 0.55],
 24 |     "replace_or_joins": [True, False],
 25 |     "prioritize_parallelism": [True, False],
 26 | }
 27 | 
 28 | settings_single_values_sm1 = {
 29 |     "num_iterations": 2,
 30 |     "num_evaluations_per_iteration": 3,
 31 |     "gateway_probabilities": "equiprobable",
 32 |     "mining_algorithm": "Split Miner 1",
 33 |     "epsilon": 0.45,
 34 |     "eta": 0.34,
 35 |     "replace_or_joins": True,
 36 |     "prioritize_parallelism": True,
 37 | }
 38 | 
 39 | settings_interval_values_sm1 = {
 40 |     "num_iterations": 10,
 41 |     "num_evaluations_per_iteration": 3,
 42 |     "gateway_probabilities": ["equiprobable", "discovery"],
 43 |     "mining_algorithm": "Split Miner 1",
 44 |     "epsilon": [0.12, 0.45],
 45 |     "eta": [0.34, 0.55],
 46 |     "replace_or_joins": [True, False],
 47 |     "prioritize_parallelism": [True, False],
 48 | }
 49 | 
 50 | test_cases = [
 51 |     {"name": "Single values SM2", "control_flow": settings_single_values_sm2},
 52 |     {"name": "Intervals SM2", "control_flow": settings_interval_values_sm2},
 53 |     {"name": "Single values SM1", "control_flow": settings_single_values_sm1},
 54 |     {"name": "Intervals SM1", "control_flow": settings_interval_values_sm1},
 55 | ]
 56 | 
 57 | 
 58 | @pytest.mark.parametrize("test_data", test_cases, ids=list(map(lambda x: x["name"], test_cases)))
 59 | def test_control_flow_settings(test_data: dict):
 60 |     settings = ControlFlowSettings.from_dict(test_data["control_flow"])
 61 | 
 62 |     if test_data["name"] == "Single values SM2":
 63 |         assert settings.num_iterations == settings_single_values_sm2["num_iterations"]
 64 |         assert settings.num_evaluations_per_iteration == settings_single_values_sm2["num_evaluations_per_iteration"]
 65 |         assert settings.gateway_probabilities == GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE
 66 |         assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2
 67 |         assert settings.epsilon == settings_single_values_sm2["epsilon"]
 68 |         assert settings.eta is None
 69 |         assert settings.replace_or_joins is None
 70 |         assert settings.prioritize_parallelism is None
 71 |     elif test_data["name"] == "Intervals SM2":
 72 |         assert settings.num_iterations == settings_interval_values_sm2["num_iterations"]
 73 |         assert settings.num_evaluations_per_iteration == settings_interval_values_sm2["num_evaluations_per_iteration"]
 74 |         assert settings.gateway_probabilities == [
 75 |             GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE,
 76 |             GatewayProbabilitiesDiscoveryMethod.DISCOVERY,
 77 |         ]
 78 |         assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2
 79 |         assert settings.epsilon == (
 80 |             settings_interval_values_sm2["epsilon"][0],
 81 |             settings_interval_values_sm2["epsilon"][1],
 82 |         )
 83 |         assert settings.eta is None
 84 |         assert settings.replace_or_joins is None
 85 |         assert settings.prioritize_parallelism is None
 86 |     elif test_data["name"] == "Single values SM1":
 87 |         assert settings.num_iterations == settings_single_values_sm1["num_iterations"]
 88 |         assert settings.num_evaluations_per_iteration == settings_single_values_sm1["num_evaluations_per_iteration"]
 89 |         assert settings.gateway_probabilities == GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE
 90 |         assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1
 91 |         assert settings.epsilon == settings_single_values_sm1["epsilon"]
 92 |         assert settings.eta == settings_single_values_sm1["eta"]
 93 |         assert settings.replace_or_joins == settings_single_values_sm1["replace_or_joins"]
 94 |         assert settings.prioritize_parallelism == settings_single_values_sm1["prioritize_parallelism"]
 95 |     elif test_data["name"] == "Intervals SM1":
 96 |         assert settings.num_iterations == settings_interval_values_sm1["num_iterations"]
 97 |         assert settings.num_evaluations_per_iteration == settings_interval_values_sm1["num_evaluations_per_iteration"]
 98 |         assert settings.gateway_probabilities == [
 99 |             GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE,
100 |             GatewayProbabilitiesDiscoveryMethod.DISCOVERY,
101 |         ]
102 |         assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1
103 |         assert settings.epsilon == (
104 |             settings_interval_values_sm1["epsilon"][0],
105 |             settings_interval_values_sm1["epsilon"][1],
106 |         )
107 |         assert settings.eta == (settings_interval_values_sm1["eta"][0], settings_interval_values_sm1["eta"][1])
108 |         assert settings.replace_or_joins == settings_interval_values_sm1["replace_or_joins"]
109 |         assert settings.prioritize_parallelism == settings_interval_values_sm1["prioritize_parallelism"]
110 |     else:
111 |         assert False
112 | 


--------------------------------------------------------------------------------
/tests/test_settings/test_resource_model_settings.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pix_framework.discovery.resource_calendar_and_performance.calendar_discovery_parameters import CalendarType
 3 | from simod.settings.common_settings import Metric
 4 | from simod.settings.resource_model_settings import ResourceModelSettings
 5 | 
 6 | settings_single_values = {
 7 |     "optimization_metric": "absolute_hourly_emd",
 8 |     "num_iterations": 2,
 9 |     "num_evaluations_per_iteration": 3,
10 |     "resource_profiles": {
11 |         "discovery_type": "pool",
12 |         "granularity": 60,
13 |         "confidence": 0.05,
14 |         "support": 0.5,
15 |         "participation": 0.4,
16 |     },
17 | }
18 | settings_interval_values = {
19 |     "optimization_metric": "circadian_emd",
20 |     "num_iterations": 2,
21 |     "num_evaluations_per_iteration": 3,
22 |     "resource_profiles": {
23 |         "discovery_type": "differentiated",
24 |         "granularity": [15, 60],
25 |         "confidence": [0.05, 0.4],
26 |         "support": [0.5, 0.8],
27 |         "participation": [0.2, 0.6],
28 |     },
29 | }
30 | 
31 | test_cases = [
32 |     {"name": "Single values", "resource_model": settings_single_values},
33 |     {"name": "Intervals", "resource_model": settings_interval_values},
34 | ]
35 | 
36 | 
37 | @pytest.mark.parametrize("test_data", test_cases, ids=list(map(lambda x: x["name"], test_cases)))
38 | def test_resource_model_settings(test_data: dict):
39 |     settings = ResourceModelSettings.from_dict(test_data["resource_model"])
40 | 
41 |     if test_data["name"] == "Single values":
42 |         assert settings.num_iterations == settings_single_values["num_iterations"]
43 |         assert settings.optimization_metric == Metric.ABSOLUTE_EMD
44 |         assert settings.discovery_type == CalendarType.DIFFERENTIATED_BY_POOL
45 |         assert settings.granularity == 60
46 |         assert settings.confidence == 0.05
47 |         assert settings.support == 0.5
48 |         assert settings.participation == 0.4
49 |     elif test_data["name"] == "Intervals":
50 |         assert settings.num_iterations == settings_single_values["num_iterations"]
51 |         assert settings.optimization_metric == Metric.CIRCADIAN_EMD
52 |         assert settings.discovery_type == CalendarType.DIFFERENTIATED_BY_RESOURCE
53 |         assert settings.granularity == (15, 60)
54 |         assert settings.confidence == (0.05, 0.4)
55 |         assert settings.support == (0.5, 0.8)
56 |         assert settings.participation == (0.2, 0.6)
57 |     else:
58 |         assert False
59 | 


--------------------------------------------------------------------------------
/tests/test_simulation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_simulation/__init__.py


--------------------------------------------------------------------------------
/tests/test_simulation/test_evaluate_logs.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from pix_framework.io.event_log import EventLogIDs, read_csv_log
 5 | from simod.settings.common_settings import Metric
 6 | from simod.simulation.prosimos import evaluate_logs
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("parallel", [True, False])
10 | def test_evaluate_logs(parallel):
11 |     metrics = [
12 |         Metric.CIRCADIAN_EMD,
13 |         Metric.ABSOLUTE_EMD,
14 |         Metric.CYCLE_TIME_EMD,
15 |         Metric.TWO_GRAM_DISTANCE,
16 |     ]
17 | 
18 |     assets_dir = Path(__file__).parent / "assets"
19 | 
20 |     log_paths = list(assets_dir.glob("*.csv"))
21 | 
22 |     log_ids = EventLogIDs(
23 |         case="case_id",
24 |         activity="activity",
25 |         resource="resource",
26 |         start_time="start_time",
27 |         end_time="end_time",
28 |         enabled_time="enabled_time",
29 |         enabling_activity="enabling_activity",
30 |         available_time="available_time",
31 |         estimated_start_time="estimated_start_time",
32 |     )
33 | 
34 |     validation_log = read_csv_log(assets_dir / "validation_log.csv", log_ids)
35 | 
36 |     results = evaluate_logs(
37 |         metrics=metrics,
38 |         simulation_log_paths=log_paths,
39 |         validation_log=validation_log,
40 |         validation_log_ids=log_ids,
41 |     )
42 | 
43 |     assert len(results) > 0
44 | 


--------------------------------------------------------------------------------
/tests/test_utilities.py:
--------------------------------------------------------------------------------
 1 | from simod.utilities import parse_single_value_or_interval
 2 | 
 3 | 
 4 | def test_parse_single_value_or_interval(entry_point):
 5 |     assert parse_single_value_or_interval(1.0) == 1.0
 6 |     assert parse_single_value_or_interval(0.23) == 0.23
 7 |     assert parse_single_value_or_interval(0.0) == 0.0
 8 |     assert parse_single_value_or_interval([0.0, 1.0]) == (0.0, 1.0)
 9 |     assert parse_single_value_or_interval([0.32, 0.78]) == (0.32, 0.78)
10 | 


--------------------------------------------------------------------------------