├── .dockerignore ├── .github └── workflows │ └── simod.yml ├── .gitignore ├── .java-version ├── .readthedocs.yaml ├── .sdkmanrc ├── Dockerfile ├── LICENSE ├── README.md ├── ansible.cfg ├── ansible ├── docker.yml ├── experiment.yml ├── inventory.yml ├── templates │ └── docker_run_cmd.sh.j2 └── test.yml ├── benchmarking ├── analyze_results.py ├── docker_collect_results.py ├── docker_jobs.py ├── input │ ├── config.yml │ ├── config_f_naive.yml │ ├── config_no_extraneous.yml │ └── config_observed_arrivals.yml ├── plot_measurements.py └── preprocess_logs.py ├── build_docker.sh ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── complete_configuration.yml │ ├── configuration_example.yml │ ├── configuration_example_data_aware.yml │ ├── configuration_example_fuzzy.yml │ ├── configuration_example_with_evaluation.yml │ ├── configuration_example_with_provided_process_model.yml │ ├── configuration_one_shot.yml │ └── simod.png │ ├── api.rst │ ├── citation.rst │ ├── conf.py │ ├── index.rst │ ├── installation.rst │ └── usage.rst ├── poetry.toml ├── pyproject.toml ├── resources ├── config │ ├── benchmark │ │ ├── benchmark_diff.yml │ │ ├── benchmark_diff_data_aware.yml │ │ ├── benchmark_diff_extr.yml │ │ ├── benchmark_fuzz.yml │ │ ├── benchmark_fuzz_extr.yml │ │ └── benchmark_pool.yml │ ├── complete_configuration.yml │ ├── configuration_example.yml │ ├── configuration_example_data_aware.yml │ ├── configuration_example_fuzzy.yml │ ├── configuration_example_with_evaluation.yml │ ├── configuration_example_with_provided_process_model.yml │ └── configuration_one_shot.yml ├── event_logs │ ├── LoanApp_simplified_test.csv.gz │ ├── LoanApp_simplified_train.csv.gz │ └── PurchasingExample.csv.gz └── models │ └── LoanApp_simplified.bpmn ├── run.sh ├── src └── simod │ ├── __init__.py │ ├── batching │ ├── __init__.py │ ├── discovery.py │ └── types.py │ ├── branch_rules │ ├── __init__.py │ ├── discovery.py │ └── types.py │ ├── cli.py │ ├── cli_formatter.py │ ├── control_flow │ ├── __init__.py │ ├── discovery.py │ ├── lib │ │ ├── bpmn-layout-1.0.6-jar-with-dependencies.jar │ │ └── split-miner-1.7.1-all.jar │ ├── optimizer.py │ └── settings.py │ ├── data_attributes │ ├── __init__.py │ ├── discovery.py │ └── types.py │ ├── event_log │ ├── __init__.py │ ├── event_log.py │ └── preprocessor.py │ ├── extraneous_delays │ ├── __init__.py │ ├── optimizer.py │ ├── types.py │ └── utilities.py │ ├── metrics.py │ ├── prioritization │ ├── __init__.py │ ├── discovery.py │ └── types.py │ ├── resource_model │ ├── __init__.py │ ├── optimizer.py │ ├── repair.py │ └── settings.py │ ├── runtime_meter.py │ ├── settings │ ├── __init__.py │ ├── common_settings.py │ ├── control_flow_settings.py │ ├── extraneous_delays_settings.py │ ├── preprocessing_settings.py │ ├── resource_model_settings.py │ └── simod_settings.py │ ├── simod.py │ ├── simulation │ ├── __init__.py │ ├── parameters │ │ ├── BPS_model.py │ │ └── __init__.py │ └── prosimos.py │ └── utilities.py └── tests ├── __init__.py ├── assets ├── Control_flow_optimization_test.bpmn ├── Control_flow_optimization_test.csv ├── Insurance_Claims_test.csv ├── Insurance_Claims_train.csv ├── LoanApp_simplified.bpmn ├── LoanApp_simplified.csv.gz ├── LoanApp_simplified_2.csv.gz ├── LoanApp_simplified_without_approve_loan_offer.csv ├── PurchasingExample.xes ├── Resource_model_optimization_test.bpmn ├── Resource_model_optimization_test.csv ├── Simple_log_no_start_times.csv ├── Simple_log_with_batching.csv ├── Simple_log_with_prioritization.csv ├── bpic15 │ ├── BPIC15_1.bpmn │ ├── BPIC15_1.csv.gz │ └── bpic15_1_with_model_v4.yml ├── branch_rules │ ├── or.bpmn │ ├── or_1.csv.gz │ ├── or_2.csv.gz │ ├── or_3.csv.gz │ ├── or_4.csv.gz │ ├── or_5.csv.gz │ ├── or_6.csv.gz │ ├── or_7.csv.gz │ ├── or_8.csv.gz │ ├── xor.bpmn │ ├── xor_1.csv.gz │ ├── xor_2.csv.gz │ ├── xor_3.csv.gz │ ├── xor_5.csv.gz │ ├── xor_6.csv.gz │ └── xor_7.csv.gz ├── configuration_simod_basic.yml ├── configuration_simod_with_extraneous.yml ├── configuration_simod_with_model.yml ├── configuration_simod_with_model_and_batching.yml ├── configuration_simod_with_model_and_extraneous.yml ├── configuration_simod_with_model_and_prioritization.yml ├── control_flow_discovery_output │ ├── model-sm2.bpmn │ ├── model-sm3.bpmn │ ├── model-split_miner_v1.bpmn │ └── model-split_miner_v2.bpmn ├── data_attributes │ ├── case_attributes.csv.gz │ ├── event_attribute_1.csv.gz │ ├── event_attribute_15.csv.gz │ ├── event_attribute_3.csv.gz │ ├── event_attribute_5.csv.gz │ ├── event_attribute_7.csv.gz │ ├── event_attribute_9.csv.gz │ ├── global_attribute_1.csv.gz │ ├── global_attribute_15.csv.gz │ ├── global_attribute_3.csv.gz │ ├── global_attribute_5.csv.gz │ ├── global_attribute_7.csv.gz │ └── global_attribute_9.csv.gz ├── model_sequence_self_loop.xes ├── model_sequence_self_loop_only_end.xes └── process_model_with_SplitMiner_self_loops.bpmn ├── conftest.py ├── test_batching ├── __init__.py ├── assets │ ├── LoanApp_batch_sim_log.csv │ └── event_log_5.csv ├── test_batching_discovery.py └── test_types.py ├── test_bpic15.py ├── test_branch_rules ├── __init__.py └── test_discovery.py ├── test_case_attributes ├── __init__.py └── test_discovery.py ├── test_cli.py ├── test_control_flow ├── __init__.py ├── test_discovery.py └── test_optimizer.py ├── test_data_attributes ├── __init__.py └── test_discovery.py ├── test_event_log ├── __init__.py ├── test_event_log.py └── test_preprocessor.py ├── test_metrics.py ├── test_prioritization ├── __init__.py ├── test_prioritization_discovery.py └── test_prioritization_impact.py ├── test_resource_model ├── __init__.py └── test_optimizer.py ├── test_settings ├── __init__.py ├── test_control_flow_settings.py ├── test_resource_model_settings.py └── test_simod_settings.py ├── test_simod.py ├── test_simulation ├── __init__.py ├── assets │ ├── simulated_log_0.csv │ ├── simulated_log_1.csv │ ├── simulated_log_2.csv │ └── validation_log.csv └── test_evaluate_logs.py └── test_utilities.py /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !src 3 | !*.bash 4 | !*.sh 5 | !LICENSE 6 | !*.toml 7 | !*.ini 8 | !*.md 9 | !*.txt -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | outputs 2 | output 3 | *.pyc 4 | *.stats 5 | venv* 6 | .venv 7 | .idea 8 | .vscode 9 | *.egg-info 10 | .DS_Store 11 | tests/assets/validation_* 12 | input_files 13 | log/ 14 | .pytest_cache/ 15 | 16 | *.dat 17 | *.log 18 | *.out 19 | 20 | # Sonarqube files and folders 21 | .sonar_lock 22 | .scannerwork 23 | 24 | .pymon 25 | .benchmarks 26 | 27 | .coverage* 28 | htmlcov/ 29 | 30 | resources/private 31 | build/ 32 | vendor/ 33 | benchmarking/*.csv 34 | benchmarking/*.yaml 35 | benchmarking/*.yml 36 | benchmarking/input/logs 37 | benchmarking/results 38 | tests/test_benchmarking/logs 39 | sandbox 40 | dist/ -------------------------------------------------------------------------------- /.java-version: -------------------------------------------------------------------------------- 1 | 1.8 2 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version, and other tools you might need 8 | build: 9 | os: ubuntu-24.04 10 | tools: 11 | python: "3.9" 12 | 13 | # Build documentation in the "docs/" directory with Sphinx 14 | sphinx: 15 | configuration: docs/source/conf.py 16 | 17 | # Optionally, but recommended, 18 | # declare the Python requirements required to build your documentation 19 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 20 | python: 21 | install: 22 | - requirements: docs/requirements.txt 23 | -------------------------------------------------------------------------------- /.sdkmanrc: -------------------------------------------------------------------------------- 1 | # Enable auto-env through the sdkman_auto_env config 2 | # Add key=value pairs of SDKs to use below 3 | java=8.0.382-amzn 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | python3 \ 5 | python3-pip \ 6 | python3-venv 7 | 8 | RUN apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* \ 10 | && rm -rf /tmp/* \ 11 | && rm -rf /var/tmp/* 12 | 13 | WORKDIR /usr/src/Simod 14 | COPY . . 15 | RUN pip install -U pip 16 | RUN pip install poetry 17 | RUN poetry install 18 | 19 | CMD ["/bin/bash"] 20 | 21 | # Docker usage example: 22 | # $ docker run --rm -it -v /path/to/resources/:/usr/src/Simod/resources -v /path/to/output:/usr/src/Simod/outputs nokal/simod bash -------------------------------------------------------------------------------- /ansible/docker.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update the Docker image 3 | hosts: main 4 | 5 | tasks: 6 | - name: Pull the latest Docker image 7 | community.docker.docker_image: 8 | name: "{{ docker_image }}" 9 | source: pull 10 | pull: 11 | platform: amd64 12 | vars: 13 | docker_image: nokal/simod:latest 14 | tags: docker -------------------------------------------------------------------------------- /ansible/experiment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up experiments environment 3 | hosts: main 4 | 5 | tasks: 6 | - set_fact: 7 | experiment_name: "{{ lookup('pipe', 'date +%Y-%m-%d_%H-%M-%S') }}" 8 | 9 | - name: Create experiments folder 10 | ansible.builtin.file: 11 | path: ~/simod_experiments/{{ experiment_name }} 12 | state: directory 13 | mode: 0775 14 | 15 | - name: Copy the input folder 16 | ansible.builtin.copy: 17 | src: ../benchmarking/input 18 | dest: ~/simod_experiments/{{ experiment_name }} 19 | mode: 0775 20 | 21 | - name: Copy the Python script 22 | ansible.builtin.copy: 23 | src: ../benchmarking/docker_jobs.py 24 | dest: ~/simod_experiments/{{ experiment_name }} 25 | mode: 0775 26 | 27 | - name: Start the experiments 28 | hosts: main 29 | 30 | tasks: 31 | - name: Start the experiments 32 | ansible.builtin.command: "~/miniconda3/bin/python docker_jobs.py" 33 | args: 34 | chdir: ~/simod_experiments/{{ experiment_name }} 35 | 36 | # - name: Copy the Python script 37 | # ansible.builtin.copy: 38 | # src: ../benchmarking/docker_collect_results.py 39 | # dest: ~/simod_experiments/{{ experiment_name }} 40 | # mode: 0775 41 | 42 | # - name: Collect results 43 | # ansible.builtin.command: "~/miniconda3/bin/python docker_collect_results.py" 44 | # args: 45 | # chdir: ~/simod_experiments/{{ experiment_name }} 46 | -------------------------------------------------------------------------------- /ansible/inventory.yml: -------------------------------------------------------------------------------- 1 | all: 2 | hosts: 3 | main: 4 | ansible_host: simodtesting.cloud.ut.ee 5 | hpc: 6 | ansible_host: rocket.hpc.ut.ee -------------------------------------------------------------------------------- /ansible/templates/docker_run_cmd.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /usr/src/Simod 4 | source venv/bin/activate 5 | Xvfb :99 &>/dev/null & disown 6 | {{ command }} 7 | -------------------------------------------------------------------------------- /ansible/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Testing 3 | hosts: main 4 | 5 | tasks: 6 | - set_fact: 7 | base_dir: /home/ihar/simod_testing 8 | experiment_name: "{{ lookup('pipe', 'date +%Y-%m-%d_%H-%M-%S') }}" 9 | tags: testing 10 | 11 | - name: Create the testing directory 12 | file: 13 | path: "{{ base_dir }}/{{ experiment_name }}" 14 | state: directory 15 | with_items: 16 | - "{{ base_dir }}" 17 | - "{{ base_dir }}/{{ experiment_name }}" 18 | tags: testing 19 | 20 | - name: Prepare Bash script for testing 21 | template: 22 | src: templates/docker_run_cmd.sh.j2 23 | dest: "{{ base_dir }}/{{ experiment_name }}/docker_run.sh" 24 | mode: 0755 25 | vars: 26 | command: pytest -vv --durations=0 -m "not system and not integration" 27 | tags: testing 28 | 29 | - name: Run unit tests in a container 30 | community.docker.docker_container: 31 | name: simod_testing 32 | image: "{{ docker_image }}" 33 | command: /bin/bash -c "cd /usr/src/Simod/input && ./docker_run.sh" 34 | volumes: 35 | - "{{ base_dir }}/{{ experiment_name }}:/usr/src/Simod/input" 36 | - "{{ base_dir }}/{{ experiment_name }}/output:/usr/src/Simod/outputs" 37 | state: started 38 | recreate: yes 39 | restart_policy: no 40 | tty: yes 41 | detach: no 42 | cleanup: yes 43 | vars: 44 | docker_image: nokal/simod:latest 45 | register: docker_testing 46 | ignore_errors: yes 47 | tags: testing -------------------------------------------------------------------------------- /benchmarking/analyze_results.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | import pandas as pd 6 | 7 | simod_version = "3.6.0" 8 | results_dir = Path(__file__).parent / Path(f"results/{simod_version}/diff_observed-arrivals") 9 | 10 | metric_names_mapping = { 11 | "absolute_event_distribution": "AED", 12 | "arrival_event_distribution": "CAR", 13 | "circadian_event_distribution": "CED", 14 | "cycle_time_distribution": "CTD", 15 | "relative_event_distribution": "RED", 16 | "three_gram_distance": "NGD(3)", 17 | "two_gram_distance": "NGD", 18 | } 19 | 20 | event_log_names_mapping = { 21 | "BPIC_2012_train": "BPIC12", 22 | "BPIC_2017_train": "BPIC17", 23 | "CallCenter_train": "CALL", 24 | "AcademicCredentials_train": "AC_CRE", 25 | } 26 | 27 | 28 | @dataclass 29 | class DiscoveryResult: 30 | result_dir: Path 31 | 32 | _evaluation_measures_path: Optional[Path] = None 33 | _evaluation_measures: Optional[pd.DataFrame] = None 34 | _simulated_log_paths: Optional[list[Path]] = None 35 | _name: Optional[str] = None 36 | 37 | def __post_init__(self): 38 | evaluation_dir = self.result_dir / "evaluation" 39 | self._evaluation_measures_path = next(evaluation_dir.glob("evaluation_*.csv")) 40 | self._simulated_log_paths = list((evaluation_dir / "simulation").glob("simulated_*.csv")) 41 | self._name = next(self.result_dir.glob("*.bpmn")).stem 42 | self._name = event_log_names_mapping[self._name] 43 | self._evaluation_measures = pd.read_csv(self._evaluation_measures_path).drop(columns=["run_num"]) 44 | self._evaluation_measures["name"] = self._name 45 | self._rename_column_values("metric", metric_names_mapping) 46 | 47 | def _rename_column_values(self, column_name: str, mapping: dict[str, str]): 48 | self._evaluation_measures[column_name] = self._evaluation_measures[column_name].apply( 49 | lambda item: mapping[item] 50 | ) 51 | 52 | @property 53 | def evaluation_measures(self) -> pd.DataFrame: 54 | return self._evaluation_measures 55 | 56 | @property 57 | def mean_evaluation_measures(self) -> pd.DataFrame: 58 | return self.evaluation_measures.groupby(["metric"]).mean(numeric_only=True).assign(name=self.name).reset_index() 59 | 60 | @property 61 | def name(self) -> str: 62 | return self._name 63 | 64 | 65 | # Current measurements 66 | results = [DiscoveryResult(result_dir / "best_result") for result_dir in results_dir.iterdir() if result_dir.is_dir()] 67 | mean_evaluation_measures = pd.concat([result.mean_evaluation_measures for result in results]).reset_index(drop=True) 68 | mean_evaluation_measures["simod_version"] = simod_version 69 | 70 | # Save measurements 71 | mean_evaluation_measures.to_csv("measurements.csv", index=False) 72 | -------------------------------------------------------------------------------- /benchmarking/input/config.yml: -------------------------------------------------------------------------------- 1 | version: 4 2 | common: 3 | train_log_path: 4 | test_log_path: 5 | num_final_evaluations: 10 6 | evaluation_metrics: 7 | - 3_gram_distance 8 | - 2_gram_distance 9 | - absolute_event_distribution 10 | - relative_event_distribution 11 | - circadian_event_distribution 12 | - arrival_event_distribution 13 | - cycle_time_distribution 14 | log_ids: 15 | case: case_id 16 | activity: activity 17 | resource: resource 18 | start_time: start_time 19 | end_time: end_time 20 | enabled_time: enabled_time 21 | preprocessing: 22 | multitasking: false 23 | enable_time_concurrency_threshold: 0.5 24 | concurrency_df: 0.75 25 | concurrency_l2l: 0.9 26 | concurrency_l1l: 0.9 27 | control_flow: 28 | optimization_metric: n_gram_distance 29 | num_iterations: 30 30 | num_evaluations_per_iteration: 5 31 | gateway_probabilities: discovery 32 | discovery_algorithm: sm1 33 | epsilon: 34 | - 0.1 35 | - 1.0 36 | eta: 37 | - 0.2 38 | - 0.6 39 | replace_or_joins: 40 | - true 41 | - false 42 | prioritize_parallelism: 43 | - true 44 | - false 45 | resource_model: 46 | optimization_metric: circadian_event_distribution 47 | num_iterations: 40 48 | num_evaluations_per_iteration: 5 49 | resource_profiles: 50 | discovery_type: differentiated_fuzzy 51 | granularity: 52 | - 15 53 | - 60 54 | fuzzy_angle: 55 | - 0.1 56 | - 0.9 57 | extraneous_activity_delays: 58 | optimization_metric: relative_event_distribution 59 | num_iterations: 1 60 | -------------------------------------------------------------------------------- /benchmarking/input/config_f_naive.yml: -------------------------------------------------------------------------------- 1 | version: 4 2 | common: 3 | train_log_path: 4 | test_log_path: 5 | num_final_evaluations: 10 6 | evaluation_metrics: 7 | - 3_gram_distance 8 | - 2_gram_distance 9 | - absolute_event_distribution 10 | - relative_event_distribution 11 | - circadian_event_distribution 12 | - arrival_event_distribution 13 | - cycle_time_distribution 14 | log_ids: 15 | case: case_id 16 | activity: activity 17 | resource: resource 18 | start_time: start_time 19 | end_time: end_time 20 | enabled_time: enabled_time 21 | preprocessing: 22 | multitasking: false 23 | enable_time_concurrency_threshold: 0.5 24 | concurrency_df: 0.75 25 | concurrency_l2l: 0.9 26 | concurrency_l1l: 0.9 27 | control_flow: 28 | optimization_metric: n_gram_distance 29 | num_iterations: 30 30 | num_evaluations_per_iteration: 5 31 | gateway_probabilities: discovery 32 | discovery_algorithm: sm1 33 | epsilon: 34 | - 0.1 35 | - 1.0 36 | eta: 37 | - 0.2 38 | - 0.6 39 | replace_or_joins: 40 | - true 41 | - false 42 | prioritize_parallelism: 43 | - true 44 | - false 45 | resource_model: 46 | optimization_metric: circadian_event_distribution 47 | num_iterations: 40 48 | num_evaluations_per_iteration: 5 49 | resource_profiles: 50 | discovery_type: differentiated_fuzzy 51 | granularity: 52 | - 15 53 | - 60 54 | fuzzy_angle: 55 | - 0.1 56 | - 0.9 57 | extraneous_activity_delays: 58 | optimization_metric: relative_event_distribution 59 | num_iterations: 20 60 | discovery_method: naive 61 | -------------------------------------------------------------------------------- /benchmarking/input/config_no_extraneous.yml: -------------------------------------------------------------------------------- 1 | version: 4 2 | common: 3 | train_log_path: 4 | test_log_path: 5 | num_final_evaluations: 10 6 | evaluation_metrics: 7 | - 3_gram_distance 8 | - 2_gram_distance 9 | - absolute_event_distribution 10 | - relative_event_distribution 11 | - circadian_event_distribution 12 | - arrival_event_distribution 13 | - cycle_time_distribution 14 | log_ids: 15 | case: case_id 16 | activity: activity 17 | resource: resource 18 | start_time: start_time 19 | end_time: end_time 20 | enabled_time: enabled_time 21 | preprocessing: 22 | multitasking: false 23 | enable_time_concurrency_threshold: 0.5 24 | concurrency_df: 0.75 25 | concurrency_l2l: 0.9 26 | concurrency_l1l: 0.9 27 | control_flow: 28 | optimization_metric: n_gram_distance 29 | num_iterations: 30 30 | num_evaluations_per_iteration: 5 31 | gateway_probabilities: discovery 32 | discovery_algorithm: sm1 33 | epsilon: 34 | - 0.1 35 | - 1.0 36 | eta: 37 | - 0.2 38 | - 0.6 39 | replace_or_joins: 40 | - true 41 | - false 42 | prioritize_parallelism: 43 | - true 44 | - false 45 | resource_model: 46 | optimization_metric: circadian_event_distribution 47 | num_iterations: 40 48 | num_evaluations_per_iteration: 5 49 | resource_profiles: 50 | discovery_type: differentiated_fuzzy 51 | granularity: 52 | - 15 53 | - 60 54 | fuzzy_angle: 55 | - 0.1 56 | - 0.9 57 | 58 | -------------------------------------------------------------------------------- /benchmarking/input/config_observed_arrivals.yml: -------------------------------------------------------------------------------- 1 | version: 4 2 | common: 3 | train_log_path: 4 | test_log_path: 5 | num_final_evaluations: 10 6 | evaluation_metrics: 7 | - 3_gram_distance 8 | - 2_gram_distance 9 | - absolute_event_distribution 10 | - relative_event_distribution 11 | - circadian_event_distribution 12 | - arrival_event_distribution 13 | - cycle_time_distribution 14 | log_ids: 15 | case: case_id 16 | activity: activity 17 | resource: resource 18 | start_time: start_time 19 | end_time: end_time 20 | enabled_time: enabled_time 21 | use_observed_arrival_distribution: true 22 | preprocessing: 23 | multitasking: false 24 | enable_time_concurrency_threshold: 0.5 25 | concurrency_df: 0.75 26 | concurrency_l2l: 0.9 27 | concurrency_l1l: 0.9 28 | control_flow: 29 | optimization_metric: n_gram_distance 30 | num_iterations: 30 31 | num_evaluations_per_iteration: 5 32 | gateway_probabilities: discovery 33 | discovery_algorithm: sm1 34 | epsilon: 35 | - 0.1 36 | - 1.0 37 | eta: 38 | - 0.2 39 | - 0.6 40 | replace_or_joins: 41 | - true 42 | - false 43 | prioritize_parallelism: 44 | - true 45 | - false 46 | resource_model: 47 | optimization_metric: circadian_event_distribution 48 | num_iterations: 40 49 | num_evaluations_per_iteration: 5 50 | resource_profiles: 51 | discovery_type: differentiated 52 | granularity: 53 | - 15 54 | - 60 55 | confidence: 56 | - 0.1 57 | - 1.0 58 | support: 59 | - 0.1 60 | - 1.0 61 | participation: 0.4 62 | extraneous_activity_delays: 63 | optimization_metric: relative_event_distribution 64 | num_iterations: 1 65 | -------------------------------------------------------------------------------- /benchmarking/plot_measurements.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | import seaborn as sns 5 | from matplotlib import pyplot as plt 6 | 7 | measurements_path = Path(__file__).parent / "results/measurements.csv" 8 | 9 | df = pd.read_csv(measurements_path).sort_values(by=["metric", "name", "simod_version"]) 10 | 11 | ncols = 4 12 | nrows = df["metric"].nunique() * df["name"].nunique() / ncols 13 | 14 | fig, axes = plt.subplots(nrows=int(nrows), ncols=int(ncols), figsize=(20, 40)) 15 | 16 | for group_name, group_df in df.groupby(["metric", "name"]): 17 | metric, name = group_name 18 | ax = axes.flatten()[list(df["metric"].unique()).index(metric) * 4 + list(df["name"].unique()).index(name)] 19 | ax.set_title(f"{metric} - {name}") 20 | sns.barplot(data=group_df, x="simod_version", y="distance", ax=ax) 21 | 22 | plt.tight_layout() 23 | plt.savefig(Path(__file__).parent / "measurements.png") 24 | -------------------------------------------------------------------------------- /benchmarking/preprocess_logs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | 5 | 6 | def main(): 7 | base_dir = Path("input/logs") 8 | 9 | for log_path in base_dir.glob("*.csv.gz"): 10 | print(log_path) 11 | df = pd.read_csv(log_path) 12 | df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) 13 | df.to_csv(log_path, index=False, compression="gzip") 14 | 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | docker buildx build --platform linux/amd64,linux/arm64 -t nokal/simod -f Dockerfile --push . -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | click==8.1.8 2 | hyperopt==0.2.7 3 | lxml==5.3.0 4 | matplotlib==3.9.4 5 | networkx==3.2.1 6 | numpy==1.26.4 7 | pandas==2.2.3 8 | pendulum==3.0.0 9 | pydantic==2.10.6 10 | python-dotenv==1.0.1 11 | python-multipart==0.0.12 12 | pytz==2024.2 13 | PyYAML==6.0.2 14 | requests==2.32.3 15 | scipy==1.13.1 16 | statistics==1.0.3.5 17 | tqdm==4.67.1 18 | xmltodict==0.13.0 19 | prosimos==2.0.6 20 | extraneous-activity-delays==2.2.1 21 | openxes-cli-py==0.1.15 22 | pix-framework==0.13.17 23 | log-distance-measures==2.0.2 24 | sphinx-rtd-theme 25 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_example.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow # 3 | # discovery, iii) 20 iterations of resource model (differentiated) discovery, and iv) direct discovery of # 4 | # extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | ################## 63 | # Resource model # 64 | ################## 65 | resource_model: 66 | # Metric to guide the optimization process (loss function to minimize) 67 | optimization_metric: circadian_emd 68 | # Number of optimization iterations over the search space 69 | num_iterations: 20 70 | # Number of times to evaluate each iteration (using the mean of all of them) 71 | num_evaluations_per_iteration: 3 72 | # Whether to discover prioritization or batching behavior 73 | discover_prioritization_rules: false 74 | discover_batching_rules: false 75 | # Resource profiles configuration 76 | resource_profiles: 77 | # Resource profile discovery type 78 | discovery_type: differentiated 79 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 80 | granularity: 60 81 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 82 | confidence: 83 | - 0.5 84 | - 0.85 85 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 86 | support: 87 | - 0.05 88 | - 0.5 89 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 90 | participation: 0.4 91 | ##################### 92 | # Extraneous delays # 93 | ##################### 94 | extraneous_activity_delays: 95 | # Method to compute the extraneous delay 96 | discovery_method: eclipse-aware 97 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 98 | num_iterations: 1 99 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_example_data_aware.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 10 iterations of control-flow # 3 | # discovery (BPMN model provided) with data-aware decision points, iii) 20 iterations of resource model # 4 | # (differentiated) discovery, and iv) no discovery of extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: true 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes 63 | discover_branch_rules: true 64 | # Minimum f-score value to consider the discovered data-aware branching rules 65 | f_score: 66 | - 0.3 67 | - 0.9 68 | ################## 69 | # Resource model # 70 | ################## 71 | resource_model: 72 | # Metric to guide the optimization process (loss function to minimize) 73 | optimization_metric: circadian_emd 74 | # Number of optimization iterations over the search space 75 | num_iterations: 20 76 | # Number of times to evaluate each iteration (using the mean of all of them) 77 | num_evaluations_per_iteration: 3 78 | # Whether to discover prioritization or batching behavior 79 | discover_prioritization_rules: false 80 | discover_batching_rules: false 81 | # Resource profiles configuration 82 | resource_profiles: 83 | # Resource profile discovery type 84 | discovery_type: differentiated 85 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 86 | granularity: 60 87 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 88 | confidence: 89 | - 0.5 90 | - 0.85 91 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 92 | support: 93 | - 0.05 94 | - 0.5 95 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 96 | participation: 0.4 97 | ##################### 98 | # Extraneous delays # 99 | ##################### 100 | extraneous_activity_delays: 101 | # Method to compute the extraneous delay 102 | discovery_method: eclipse-aware 103 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 104 | num_iterations: 1 105 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_example_fuzzy.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow # 3 | # discovery, iii) 10 iterations of resource model (fuzzy availability) discovery, and iv) no discovery of # 4 | # extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | ################## 63 | # Resource model # 64 | ################## 65 | resource_model: 66 | # Metric to guide the optimization process (loss function to minimize) 67 | optimization_metric: circadian_emd 68 | # Number of optimization iterations over the search space 69 | num_iterations: 10 70 | # Number of times to evaluate each iteration (using the mean of all of them) 71 | num_evaluations_per_iteration: 3 72 | # Whether to discover prioritization or batching behavior 73 | discover_prioritization_rules: false 74 | discover_batching_rules: false 75 | # Resource profiles configuration 76 | resource_profiles: 77 | # Resource profile discovery type 78 | discovery_type: differentiated_fuzzy 79 | # Duration of each granule in the resource calendar that will get its own probability 80 | granularity: 60 81 | # Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end) 82 | fuzzy_angle: 83 | - 0.1 84 | - 0.9 85 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_example_with_evaluation.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Same simple configuration as 'configuration_example.yml' but evaluation the quality of the final BPS model # 3 | ################################################################################################################# 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | # Event log to evaluate the discovered BPS model with 23 | test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz 24 | # Number of evaluations of the discovered BPS model 25 | num_final_evaluations: 10 26 | # Metrics to evaluate the discovered BPS model 27 | evaluation_metrics: 28 | - 3_gram_distance 29 | - 2_gram_distance 30 | - absolute_event_distribution 31 | - relative_event_distribution 32 | - circadian_event_distribution 33 | - arrival_event_distribution 34 | - cycle_time_distribution 35 | # Whether to discover case attributes or not 36 | discover_data_attributes: false 37 | ################# 38 | # Preprocessing # 39 | ################# 40 | preprocessing: 41 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 42 | enable_time_concurrency_threshold: 0.75 43 | ################ 44 | # Control-flow # 45 | ################ 46 | control_flow: 47 | # Metric to guide the optimization process (loss function to minimize) 48 | optimization_metric: two_gram_distance 49 | # Number of optimization iterations over the search space 50 | num_iterations: 20 51 | # Number of times to evaluate each iteration (using the mean of all of them) 52 | num_evaluations_per_iteration: 3 53 | # Methods for discovering gateway probabilities 54 | gateway_probabilities: discovery 55 | # Discover process model with SplitMiner v3 56 | mining_algorithm: sm1 57 | # Number of concurrent relations between events to be captured 58 | epsilon: 59 | - 0.05 60 | - 0.4 61 | # Threshold for filtering the incoming and outgoing edges 62 | eta: 63 | - 0.2 64 | - 0.7 65 | # Whether to replace non-trivial OR joins or not 66 | replace_or_joins: 67 | - true 68 | - false 69 | # Whether to prioritize parallelism over loops or not 70 | prioritize_parallelism: 71 | - true 72 | - false 73 | ################## 74 | # Resource model # 75 | ################## 76 | resource_model: 77 | # Metric to guide the optimization process (loss function to minimize) 78 | optimization_metric: circadian_emd 79 | # Number of optimization iterations over the search space 80 | num_iterations: 20 81 | # Number of times to evaluate each iteration (using the mean of all of them) 82 | num_evaluations_per_iteration: 3 83 | # Whether to discover prioritization or batching behavior 84 | discover_prioritization_rules: false 85 | discover_batching_rules: false 86 | # Resource profiles configuration 87 | resource_profiles: 88 | # Resource profile discovery type 89 | discovery_type: differentiated 90 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 91 | granularity: 60 92 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 93 | confidence: 94 | - 0.5 95 | - 0.85 96 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 97 | support: 98 | - 0.05 99 | - 0.5 100 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 101 | participation: 0.4 102 | ##################### 103 | # Extraneous delays # 104 | ##################### 105 | extraneous_activity_delays: 106 | # Method to compute the extraneous delay 107 | discovery_method: eclipse-aware 108 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 109 | num_iterations: 1 110 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_example_with_provided_process_model.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Same simple configuration as 'configuration_example.yml' but providing the BPMN model # 3 | ################################################################################################################# 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | # Use this process model and skip its discovery 23 | process_model_path: ../models/LoanApp_simplified.bpmn 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 1 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Methods for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | ################## 45 | # Resource model # 46 | ################## 47 | resource_model: 48 | # Metric to guide the optimization process (loss function to minimize) 49 | optimization_metric: circadian_emd 50 | # Number of optimization iterations over the search space 51 | num_iterations: 20 52 | # Number of times to evaluate each iteration (using the mean of all of them) 53 | num_evaluations_per_iteration: 3 54 | # Whether to discover prioritization or batching behavior 55 | discover_prioritization_rules: false 56 | discover_batching_rules: false 57 | # Resource profiles configuration 58 | resource_profiles: 59 | # Resource profile discovery type 60 | discovery_type: pool 61 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 62 | granularity: 60 63 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 64 | confidence: 65 | - 0.5 66 | - 0.85 67 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 68 | support: 69 | - 0.05 70 | - 0.5 71 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 72 | participation: 0.4 73 | ##################### 74 | # Extraneous delays # 75 | ##################### 76 | extraneous_activity_delays: 77 | # Method to compute the extraneous delay 78 | discovery_method: eclipse-aware 79 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 80 | num_iterations: 1 81 | -------------------------------------------------------------------------------- /docs/source/_static/configuration_one_shot.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example for running SIMOD without parameter optimization steps. The defined parameters # 3 | # should be individual values and not intervals, as there is no optimization. # 4 | ################################################################################################################# 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | ################ 23 | # Control-flow # 24 | ################ 25 | control_flow: 26 | # Number of optimization iterations over the search space 27 | num_iterations: 1 28 | # Number of times to evaluate each iteration (using the mean of all of them) 29 | num_evaluations_per_iteration: 1 30 | # Methods for discovering gateway probabilities 31 | gateway_probabilities: discovery 32 | # Discover process model with SplitMiner v3 33 | mining_algorithm: sm1 34 | # Number of concurrent relations between events to be captured 35 | epsilon: 0.3 36 | # Threshold for filtering the incoming and outgoing edges 37 | eta: 0.5 38 | # Whether to replace non-trivial OR joins or not 39 | replace_or_joins: false 40 | # Whether to prioritize parallelism over loops or not 41 | prioritize_parallelism: true 42 | ################## 43 | # Resource model # 44 | ################## 45 | resource_model: 46 | # Number of optimization iterations over the search space 47 | num_iterations: 1 48 | # Number of times to evaluate each iteration (using the mean of all of them) 49 | num_evaluations_per_iteration: 1 50 | # Resource profiles configuration 51 | resource_profiles: 52 | # Resource profile discovery type 53 | discovery_type: differentiated 54 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 55 | granularity: 60 56 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 57 | confidence: 0.6 58 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 59 | support: 0.2 60 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 61 | participation: 0.4 62 | ##################### 63 | # Extraneous delays # 64 | ##################### 65 | extraneous_activity_delays: 66 | # Method to compute the extraneous delay 67 | discovery_method: eclipse-aware 68 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 69 | num_iterations: 1 70 | -------------------------------------------------------------------------------- /docs/source/_static/simod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/docs/source/_static/simod.png -------------------------------------------------------------------------------- /docs/source/citation.rst: -------------------------------------------------------------------------------- 1 | Cite the Paper 2 | ============== 3 | 4 | When using SIMOD for a publication, please cite the following article in you paper: 5 | 6 | `[Citation pending] 7 | `_ 8 | 9 | More References 10 | ^^^^^^^^^^^^^^^ 11 | 12 | `Camargo, M., Dumas, M., González, O., 2020. "Automated discovery of 13 | business process simulation models from event logs". Decis. Support Syst. 14 | 134, 113284. 15 | `_ 16 | 17 | `Chapela-Campa, D., Dumas, M., 2024. "Enhancing business process 18 | simulation models with extraneous activity delays". Inf. Syst. 122, 102346. 19 | `_ 20 | 21 | `Chapela-Campa, D., Benchekroun, I., Baron, O., Dumas, M., Krass, D., 22 | Senderovich, A., 2025. "A framework for measuring the quality of business 23 | process simulation models". Inf. Syst. 127, 102447. 24 | `_ 25 | 26 | `Lashkevich, K., Milani, F., Chapela-Campa, D., Suvorau, I., Dumas, M., 27 | 2024. "Unveiling the causes of waiting time in business processes from event 28 | logs". Inf. Syst. 126, 102434. 29 | `_ 30 | 31 | `López-Pintado, O., Dumas, M., Berx, J., 2024a. "Discovery, simulation, and 32 | optimization of business processes with differentiated resources". Inf. Syst. 33 | 120, 102289. 34 | `_ 35 | 36 | `López-Pintado, O., Dumas, M., 2023. "Discovery and simulation of business 37 | processes with probabilistic resource availability calendars", in: Proceedings 38 | of the 5th International Conference on Process Mining (ICPM), IEEE. pp. 39 | 1–8. 40 | `_ 41 | 42 | `López-Pintado, O., Murashko, S., Dumas, M., 2024b. "Discovery and 43 | simulation of data-aware business processes", in: Proceedings of the 6th 44 | International Conference on Process Mining (ICPM), IEEE. pp. 105–112. 45 | `_ 46 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'SIMOD' 10 | copyright = '2025, UT Information Systems Research Group' 11 | author = 'UT Information Systems Research Group' 12 | release = '5.1.2' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | import os 18 | import sys 19 | 20 | # Get the absolute path of the project's root directory 21 | sys.path.insert(0, os.path.abspath("../../src")) # Adjust if necessary 22 | 23 | extensions = [ 24 | "sphinx.ext.napoleon", 25 | "sphinx.ext.viewcode", 26 | "sphinx.ext.autosummary", 27 | "sphinx.ext.intersphinx" 28 | ] 29 | 30 | intersphinx_mapping = { 31 | "python": ("https://docs.python.org/3.9", None), 32 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), 33 | } 34 | 35 | templates_path = ['_templates'] 36 | exclude_patterns = [] 37 | autodoc_class_attributes = False 38 | 39 | # -- Options for HTML output ------------------------------------------------- 40 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 41 | 42 | html_theme = 'sphinx_rtd_theme' 43 | html_static_path = ['_static'] 44 | 45 | # Automatically generate summaries 46 | autosummary_generate = True 47 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. SIMOD documentation master file, created by 2 | sphinx-quickstart on Mon Jan 27 16:09:16 2025. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | SIMOD: Automated discovery of business process simulation models 7 | ================================================================ 8 | 9 | SIMOD combines process mining and machine learning techniques to automate the discovery and tuning of Business Process 10 | Simulation models from event logs extracted from enterprise information systems (ERPs, CRM, case management systems, 11 | etc.). SIMOD takes as input an event log in CSV format, a configuration file, and (optionally) a BPMN process model, 12 | and discovers a business process simulation model that can be simulated using the Prosimos simulator, which is embedded 13 | in SIMOD. 14 | 15 | 16 | .. _fig_simod: 17 | .. figure:: _static/simod.png 18 | :align: center 19 | :scale: 60% 20 | 21 | SIMOD main workflow. 22 | 23 | 24 | In its standard workflow, SIMOD receives an event log and a configuration file, and 25 | runs an iterative process to discover the BPS model that bests reflect the behavior captured in the input event log. 26 | This iterative process is designed as a pipeline-based architecture composed of multiple stages that run a 27 | TPE-optimization process to obtain the parameters that lead to the most accurate model. 28 | 29 | Alternatively, SIMOD can additionally receive as input a BPMN model of the process. In this case, SIMOD skips the 30 | corresponding discovery phase, and builds the BPS model over the input BPMN model. 31 | 32 | .. note:: 33 | This project is under active development. 34 | 35 | 36 | .. toctree:: 37 | :maxdepth: 2 38 | :caption: Contents: 39 | 40 | installation 41 | usage 42 | api 43 | citation 44 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation Guide 2 | ================== 3 | 4 | This guide provides instructions on how to install SIMOD using **pip** (PyPI) or **Docker**. 5 | 6 | Prerequisites 7 | ------------- 8 | Before installing SIMOD, ensure you have the following dependencies: 9 | 10 | Dependencies for local installation 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | - **Python 3.9, 3.10, or 3.11**: The recommended version (extensively tested) is Python 3.9, however, it also works for 14 | Python versions 3.10 and 3.11. 15 | - **Java 1.8**: Ensure Java is installed and added to your system’s PATH (e.g., 16 | `Java.com `_). 17 | - **Rust and Cargo (\*)**: If you are on a system without precompiled dependencies, you may also need to compile Rust 18 | and Cargo (install them using `rustup.rs `_). 19 | 20 | Dependencies for Docker installation 21 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | - **Docker**: If you want to run SIMOD without installing dependencies, you can use the official Docker image (install 24 | Docker from `https://www.docker.com/get-started/ `_). 25 | 26 | Installation via PyPI 27 | --------------------- 28 | The simplest way to install SIMOD is via **pip** from PyPI (`simod project `_): 29 | 30 | .. code-block:: bash 31 | 32 | python -m pip install simod 33 | 34 | Running SIMOD after installation: 35 | 36 | .. code-block:: bash 37 | 38 | simod --help 39 | 40 | Installation via Docker 41 | ----------------------- 42 | If you prefer running SIMOD inside a **Docker container**, in an isolated environment without requiring Python or Java 43 | installations, use the following commands: 44 | 45 | .. code-block:: bash 46 | 47 | docker pull nokal/simod 48 | 49 | To start a container: 50 | 51 | .. code-block:: bash 52 | 53 | docker run -it -v /path/to/resources/:/usr/src/Simod/resources -v /path/to/output:/usr/src/Simod/outputs nokal/simod bash 54 | 55 | Use the `resources/` directory to store event logs and configuration files. The `outputs/` directory will contain the 56 | results of SIMOD. 57 | 58 | From inside the container, you can run SIMOD with: 59 | 60 | .. code-block:: bash 61 | 62 | poetry run simod --help 63 | 64 | Docker images for different SIMOD versions are available at `https://hub.docker.com/r/nokal/simod/tags `_ 65 | 66 | Installation via source code 67 | ---------------------------- 68 | If you prefer to download the source code and compile it directly (you would need to have `git`, `python`, and 69 | `poetry` installed), use the following commands: 70 | 71 | .. code-block:: bash 72 | 73 | git clone https://github.com/AutomatedProcessImprovement/Simod.git 74 | 75 | cd Simod 76 | 77 | python -m venv simod-env 78 | 79 | # source ./simod-env/Scripts/activate # for Linux systems 80 | .\simod-env\Scripts\activate.bat 81 | 82 | poetry install 83 | 84 | Running SIMOD after installation: 85 | 86 | .. code-block:: bash 87 | 88 | simod --help 89 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage Guide 2 | =========== 3 | 4 | This guide provides instructions on how to use SIMOD from command line to discover a BPS model out of an event log in 5 | CSV format. 6 | 7 | Running Simod 8 | ------------- 9 | 10 | Once Simod is installed (see `Installation `_), you can run it by specifying a configuration file. 11 | 12 | Installed via PyPI or source code 13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 14 | 15 | .. code-block:: bash 16 | 17 | simod --configuration resources/config/configuration_example.yml 18 | 19 | Replace `resources/config/configuration_example.yml` with the path to your own configuration file. Paths can be 20 | relative to the configuration file or absolute. 21 | 22 | 23 | Installed via Docker 24 | ^^^^^^^^^^^^^^^^^^^^ 25 | 26 | .. code-block:: bash 27 | 28 | poetry run simod --configuration resources/config/configuration_example.yml 29 | 30 | Replace `resources/config/configuration_example.yml` with the path to your own configuration file. Paths can be 31 | relative to the configuration file or absolute. 32 | 33 | Configuration File 34 | ------------------ 35 | The configuration file is a YAML file that specifies various parameters for Simod. Ensure that the path to your event 36 | log is specified in the configuration file. Here are some configuration examples: 37 | 38 | - Basic configuration to discover the full BPS 39 | model (`basic <_static/configuration_example.yml>`_). 40 | - Basic configuration to discover the full BPS model using fuzzy (probabilistic) resource 41 | calendars (`probabilistic <_static/configuration_example_fuzzy.yml>`_). 42 | - Basic configuration to discover the full BPS model with data-aware branching rules 43 | (`data-aware <_static/configuration_example_data_aware.yml>`_). 44 | - Basic configuration to discover the full BPS model, and evaluate it with a specified event 45 | log (`with evaluation <_static/configuration_example_with_evaluation.yml>`_). 46 | - Basic configuration to discover a BPS model with a provided BPMN process model as starting 47 | point (`with BPMN model <_static/configuration_example_with_provided_process_model.yml>`_). 48 | - Basic configuration to discover a BPS model with no optimization process (one-shot) 49 | (`one-shot <_static/configuration_one_shot.yml>`_). 50 | - Complete configuration example with all the possible 51 | parameters (`complete config <_static/complete_configuration.yml>`_). 52 | 53 | Event Log Format 54 | ---------------- 55 | Simod takes as input an event log in CSV format. 56 | 57 | .. _tab_event_log: 58 | .. table:: Sample of input event log format. 59 | :align: center 60 | 61 | ======= =========== =================== =================== ======== 62 | case_id activity start_time end_time resource 63 | ======= =========== =================== =================== ======== 64 | 512 Create PO 03/11/2021 08:00:00 03/11/2021 08:31:11 DIO 65 | 513 Create PO 03/11/2021 08:34:21 03/11/2021 09:02:09 DIO 66 | 514 Create PO 03/11/2021 09:11:11 03/11/2021 09:49:51 DIO 67 | 512 Approve PO 03/11/2021 12:13:06 03/11/2021 12:44:21 Joseph 68 | 513 Reject PO 03/11/2021 12:30:51 03/11/2021 13:15:50 Jolyne 69 | 514 Approve PO 03/11/2021 12:59:11 03/11/2021 13:32:36 Joseph 70 | 512 Check Stock 03/11/2021 14:22:10 03/11/2021 14:49:22 DIO 71 | 514 Check Stock 03/11/2021 15:11:01 03/11/2021 15:46:12 DIO 72 | 514 Order Goods 04/11/2021 09:46:12 04/11/2021 10:34:23 Joseph 73 | 512 Pack Goods 04/11/2021 10:46:50 04/11/2021 11:18:02 Giorno 74 | ======= =========== =================== =================== ======== 75 | 76 | The column names can be specified as part of the configuration file (`see here <_static/complete_configuration.yml>`_). 77 | 78 | Output 79 | ------ 80 | Simod discovers a business process simulation model that can be simulated using the 81 | `Prosimos simulator `_, which is embedded in Simod. 82 | 83 | Once SIMOD is finished, the discovered BPS model can be found in the `outputs` directory, under the folder `best_result`. 84 | -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | create = true 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "simod" 7 | version = "5.1.6" 8 | authors = [ 9 | "Ihar Suvorau ", 10 | "David Chapela ", 11 | "Manuel Camargo ", 12 | ] 13 | description = "Simod is a Python tool for automated discovery of business process simulation models from event logs." 14 | readme = "README.md" 15 | packages = [{ include = "simod", from = "src" }] 16 | 17 | [tool.poetry.dependencies] 18 | python = "^3.9,<3.12" 19 | click = "^8.1.3" 20 | hyperopt = "^0.2.7" 21 | lxml = "^5.3.0" 22 | matplotlib = "^3.6.0" 23 | networkx = "^3.2.1" 24 | numpy = "^1.24.23" 25 | pandas = "^2.1.0" 26 | pendulum = "^3.0.0" 27 | pydantic = "^2.3.0" 28 | python-dotenv = "^1.0.0" 29 | python-multipart = "^0.0.12" 30 | pytz = "^2024.2" 31 | PyYAML = "^6.0" 32 | requests = "^2.28.2" 33 | scipy = "^1.13.0" 34 | statistics = "^1.0.3.5" 35 | tqdm = "^4.64.1" 36 | xmltodict = "^0.13.0" 37 | prosimos = "^2.0.6" 38 | extraneous-activity-delays = "^2.1.21" 39 | openxes-cli-py = "^0.1.15" 40 | pix-framework = "^0.13.17" 41 | log-distance-measures = "^2.0.0" 42 | 43 | [tool.poetry.group.dev.dependencies] 44 | pytest = "^7.1.3" 45 | pytest-cov = "^4" 46 | memory-profiler = "^0.61.0" 47 | pylint = "^2.17.4" 48 | setuptools = "^67.8.0" 49 | seaborn = "^0.12.2" 50 | pytest-benchmark = "^4.0.0" 51 | snakeviz = "^2.2.0" 52 | 53 | [tool.poetry.scripts] 54 | simod = "simod.cli:main" 55 | 56 | [tool.ruff] 57 | line-length = 120 58 | 59 | [tool.black] 60 | line-length = 120 61 | 62 | [tool.pytest.ini_options] 63 | markers = ["integration", "system", "manual", "benchmark", "smoke"] 64 | -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_diff.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Use observed arrival distributions 11 | use_observed_arrival_distribution: false 12 | # Specify the name for each of the columns in the CSV file (XES standard by default) 13 | log_ids: 14 | case: "case_id" 15 | activity: "activity" 16 | resource: "resource" 17 | start_time: "start_time" 18 | end_time: "end_time" 19 | # Number of evaluations of the discovered BPS model 20 | num_final_evaluations: 10 21 | # Metrics to evaluate the discovered BPS model 22 | evaluation_metrics: 23 | - 3_gram_distance 24 | - 2_gram_distance 25 | - absolute_event_distribution 26 | - relative_event_distribution 27 | - circadian_event_distribution 28 | - arrival_event_distribution 29 | - cycle_time_distribution 30 | # Whether to discover case attributes or not 31 | discover_data_attributes: false 32 | ################# 33 | # Preprocessing # 34 | ################# 35 | preprocessing: 36 | multitasking: false 37 | enable_time_concurrency_threshold: 0.5 38 | ################ 39 | # Control-flow # 40 | ################ 41 | control_flow: 42 | # Metric to guide the optimization process (loss function to minimize) 43 | optimization_metric: two_gram_distance 44 | # Number of optimization iterations over the search space 45 | num_iterations: 30 46 | # Number of times to evaluate each iteration (using the mean of all of them) 47 | num_evaluations_per_iteration: 5 48 | # Methods for discovering gateway probabilities 49 | gateway_probabilities: discovery 50 | # Discover process model with SplitMiner v3 51 | mining_algorithm: sm1 52 | # Number of concurrent relations between events to be captured 53 | epsilon: 54 | - 0.05 55 | - 0.4 56 | # Threshold for filtering the incoming and outgoing edges 57 | eta: 58 | - 0.2 59 | - 0.7 60 | # Whether to replace non-trivial OR joins or not 61 | replace_or_joins: 62 | - true 63 | - false 64 | # Whether to prioritize parallelism over loops or not 65 | prioritize_parallelism: true 66 | ################## 67 | # Resource model # 68 | ################## 69 | resource_model: 70 | # Metric to guide the optimization process (loss function to minimize) 71 | optimization_metric: circadian_emd 72 | # Number of optimization iterations over the search space 73 | num_iterations: 40 74 | # Number of times to evaluate each iteration (using the mean of all of them) 75 | num_evaluations_per_iteration: 5 76 | # Whether to discover prioritization or batching behavior 77 | discover_prioritization_rules: false 78 | discover_batching_rules: false 79 | # Resource profiles configuration 80 | resource_profiles: 81 | # Resource profile discovery type 82 | discovery_type: differentiated 83 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 84 | granularity: 60 85 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 86 | confidence: 87 | - 0.5 88 | - 0.85 89 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 90 | support: 91 | - 0.05 92 | - 0.5 93 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 94 | participation: 0.4 -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_diff_data_aware.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Specify the name for each of the columns in the CSV file (XES standard by default) 11 | log_ids: 12 | case: "case_id" 13 | activity: "activity" 14 | resource: "resource" 15 | start_time: "start_time" 16 | end_time: "end_time" 17 | # Number of evaluations of the discovered BPS model 18 | num_final_evaluations: 10 19 | # Metrics to evaluate the discovered BPS model 20 | evaluation_metrics: 21 | - 3_gram_distance 22 | - 2_gram_distance 23 | - absolute_event_distribution 24 | - relative_event_distribution 25 | - circadian_event_distribution 26 | - arrival_event_distribution 27 | - cycle_time_distribution 28 | # Whether to discover case attributes or not 29 | discover_data_attributes: true 30 | ################# 31 | # Preprocessing # 32 | ################# 33 | preprocessing: 34 | multitasking: false 35 | enable_time_concurrency_threshold: 0.5 36 | ################ 37 | # Control-flow # 38 | ################ 39 | control_flow: 40 | # Metric to guide the optimization process (loss function to minimize) 41 | optimization_metric: two_gram_distance 42 | # Number of optimization iterations over the search space 43 | num_iterations: 30 44 | # Number of times to evaluate each iteration (using the mean of all of them) 45 | num_evaluations_per_iteration: 5 46 | # Methods for discovering gateway probabilities 47 | gateway_probabilities: discovery 48 | # Discover process model with SplitMiner v3 49 | mining_algorithm: sm1 50 | # Number of concurrent relations between events to be captured 51 | epsilon: 52 | - 0.05 53 | - 0.4 54 | # Threshold for filtering the incoming and outgoing edges 55 | eta: 56 | - 0.2 57 | - 0.7 58 | # Whether to replace non-trivial OR joins or not 59 | replace_or_joins: 60 | - true 61 | - false 62 | # Whether to prioritize parallelism over loops or not 63 | prioritize_parallelism: true 64 | # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes 65 | discover_branch_rules: true 66 | # Minimum f-score value to consider the discovered data-aware branching rules 67 | f_score: 68 | - 0.3 69 | - 0.9 70 | ################## 71 | # Resource model # 72 | ################## 73 | resource_model: 74 | # Metric to guide the optimization process (loss function to minimize) 75 | optimization_metric: circadian_emd 76 | # Number of optimization iterations over the search space 77 | num_iterations: 40 78 | # Number of times to evaluate each iteration (using the mean of all of them) 79 | num_evaluations_per_iteration: 5 80 | # Whether to discover prioritization or batching behavior 81 | discover_prioritization_rules: false 82 | discover_batching_rules: false 83 | # Resource profiles configuration 84 | resource_profiles: 85 | # Resource profile discovery type 86 | discovery_type: differentiated 87 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 88 | granularity: 60 89 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 90 | confidence: 91 | - 0.5 92 | - 0.85 93 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 94 | support: 95 | - 0.05 96 | - 0.5 97 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 98 | participation: 0.4 -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_diff_extr.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Specify the name for each of the columns in the CSV file (XES standard by default) 11 | log_ids: 12 | case: "case_id" 13 | activity: "activity" 14 | resource: "resource" 15 | start_time: "start_time" 16 | end_time: "end_time" 17 | # Number of evaluations of the discovered BPS model 18 | num_final_evaluations: 10 19 | # Metrics to evaluate the discovered BPS model 20 | evaluation_metrics: 21 | - 3_gram_distance 22 | - 2_gram_distance 23 | - absolute_event_distribution 24 | - relative_event_distribution 25 | - circadian_event_distribution 26 | - arrival_event_distribution 27 | - cycle_time_distribution 28 | # Whether to discover case attributes or not 29 | discover_data_attributes: false 30 | ################# 31 | # Preprocessing # 32 | ################# 33 | preprocessing: 34 | multitasking: false 35 | enable_time_concurrency_threshold: 0.5 36 | ################ 37 | # Control-flow # 38 | ################ 39 | control_flow: 40 | # Metric to guide the optimization process (loss function to minimize) 41 | optimization_metric: two_gram_distance 42 | # Number of optimization iterations over the search space 43 | num_iterations: 30 44 | # Number of times to evaluate each iteration (using the mean of all of them) 45 | num_evaluations_per_iteration: 5 46 | # Methods for discovering gateway probabilities 47 | gateway_probabilities: discovery 48 | # Discover process model with SplitMiner v3 49 | mining_algorithm: sm1 50 | # Number of concurrent relations between events to be captured 51 | epsilon: 52 | - 0.05 53 | - 0.4 54 | # Threshold for filtering the incoming and outgoing edges 55 | eta: 56 | - 0.2 57 | - 0.7 58 | # Whether to replace non-trivial OR joins or not 59 | replace_or_joins: 60 | - true 61 | - false 62 | # Whether to prioritize parallelism over loops or not 63 | prioritize_parallelism: true 64 | ################## 65 | # Resource model # 66 | ################## 67 | resource_model: 68 | # Metric to guide the optimization process (loss function to minimize) 69 | optimization_metric: circadian_emd 70 | # Number of optimization iterations over the search space 71 | num_iterations: 40 72 | # Number of times to evaluate each iteration (using the mean of all of them) 73 | num_evaluations_per_iteration: 5 74 | # Whether to discover prioritization or batching behavior 75 | discover_prioritization_rules: false 76 | discover_batching_rules: false 77 | # Resource profiles configuration 78 | resource_profiles: 79 | # Resource profile discovery type 80 | discovery_type: differentiated 81 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 82 | granularity: 60 83 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 84 | confidence: 85 | - 0.5 86 | - 0.85 87 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 88 | support: 89 | - 0.05 90 | - 0.5 91 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 92 | participation: 0.4 93 | ##################### 94 | # Extraneous delays # 95 | ##################### 96 | extraneous_activity_delays: 97 | # Method to compute the extraneous delay (naive or eclipse-aware) 98 | discovery_method: eclipse-aware 99 | # Metric to guide the optimization process (loss function to minimize) 100 | optimization_metric: relative_emd 101 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 102 | num_iterations: 20 103 | -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_fuzz.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Specify the name for each of the columns in the CSV file (XES standard by default) 11 | log_ids: 12 | case: "case_id" 13 | activity: "activity" 14 | resource: "resource" 15 | start_time: "start_time" 16 | end_time: "end_time" 17 | # Number of evaluations of the discovered BPS model 18 | num_final_evaluations: 10 19 | # Metrics to evaluate the discovered BPS model 20 | evaluation_metrics: 21 | - 3_gram_distance 22 | - 2_gram_distance 23 | - absolute_event_distribution 24 | - relative_event_distribution 25 | - circadian_event_distribution 26 | - arrival_event_distribution 27 | - cycle_time_distribution 28 | # Whether to discover case attributes or not 29 | discover_data_attributes: false 30 | ################# 31 | # Preprocessing # 32 | ################# 33 | preprocessing: 34 | multitasking: false 35 | enable_time_concurrency_threshold: 0.5 36 | ################ 37 | # Control-flow # 38 | ################ 39 | control_flow: 40 | # Metric to guide the optimization process (loss function to minimize) 41 | optimization_metric: two_gram_distance 42 | # Number of optimization iterations over the search space 43 | num_iterations: 30 44 | # Number of times to evaluate each iteration (using the mean of all of them) 45 | num_evaluations_per_iteration: 5 46 | # Methods for discovering gateway probabilities 47 | gateway_probabilities: discovery 48 | # Discover process model with SplitMiner v3 49 | mining_algorithm: sm1 50 | # Number of concurrent relations between events to be captured 51 | epsilon: 52 | - 0.05 53 | - 0.4 54 | # Threshold for filtering the incoming and outgoing edges 55 | eta: 56 | - 0.2 57 | - 0.7 58 | # Whether to replace non-trivial OR joins or not 59 | replace_or_joins: 60 | - true 61 | - false 62 | # Whether to prioritize parallelism over loops or not 63 | prioritize_parallelism: true 64 | ################## 65 | # Resource model # 66 | ################## 67 | resource_model: 68 | # Metric to guide the optimization process (loss function to minimize) 69 | optimization_metric: circadian_emd 70 | # Number of optimization iterations over the search space 71 | num_iterations: 40 72 | # Number of times to evaluate each iteration (using the mean of all of them) 73 | num_evaluations_per_iteration: 5 74 | # Whether to discover prioritization or batching behavior 75 | discover_prioritization_rules: false 76 | discover_batching_rules: false 77 | # Resource profiles configuration 78 | resource_profiles: 79 | # Resource profile discovery type 80 | discovery_type: differentiated_fuzzy 81 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 82 | granularity: 60 83 | fuzzy_angle: 84 | - 0.1 85 | - 0.9 86 | -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_fuzz_extr.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Specify the name for each of the columns in the CSV file (XES standard by default) 11 | log_ids: 12 | case: "case_id" 13 | activity: "activity" 14 | resource: "resource" 15 | start_time: "start_time" 16 | end_time: "end_time" 17 | # Number of evaluations of the discovered BPS model 18 | num_final_evaluations: 10 19 | # Metrics to evaluate the discovered BPS model 20 | evaluation_metrics: 21 | - 3_gram_distance 22 | - 2_gram_distance 23 | - absolute_event_distribution 24 | - relative_event_distribution 25 | - circadian_event_distribution 26 | - arrival_event_distribution 27 | - cycle_time_distribution 28 | # Whether to discover case attributes or not 29 | discover_data_attributes: false 30 | ################# 31 | # Preprocessing # 32 | ################# 33 | preprocessing: 34 | multitasking: false 35 | enable_time_concurrency_threshold: 0.5 36 | ################ 37 | # Control-flow # 38 | ################ 39 | control_flow: 40 | # Metric to guide the optimization process (loss function to minimize) 41 | optimization_metric: two_gram_distance 42 | # Number of optimization iterations over the search space 43 | num_iterations: 30 44 | # Number of times to evaluate each iteration (using the mean of all of them) 45 | num_evaluations_per_iteration: 5 46 | # Methods for discovering gateway probabilities 47 | gateway_probabilities: discovery 48 | # Discover process model with SplitMiner v3 49 | mining_algorithm: sm1 50 | # Number of concurrent relations between events to be captured 51 | epsilon: 52 | - 0.05 53 | - 0.4 54 | # Threshold for filtering the incoming and outgoing edges 55 | eta: 56 | - 0.2 57 | - 0.7 58 | # Whether to replace non-trivial OR joins or not 59 | replace_or_joins: 60 | - true 61 | - false 62 | # Whether to prioritize parallelism over loops or not 63 | prioritize_parallelism: true 64 | ################## 65 | # Resource model # 66 | ################## 67 | resource_model: 68 | # Metric to guide the optimization process (loss function to minimize) 69 | optimization_metric: circadian_emd 70 | # Number of optimization iterations over the search space 71 | num_iterations: 40 72 | # Number of times to evaluate each iteration (using the mean of all of them) 73 | num_evaluations_per_iteration: 5 74 | # Whether to discover prioritization or batching behavior 75 | discover_prioritization_rules: false 76 | discover_batching_rules: false 77 | # Resource profiles configuration 78 | resource_profiles: 79 | # Resource profile discovery type 80 | discovery_type: differentiated_fuzzy 81 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 82 | granularity: 60 83 | fuzzy_angle: 84 | - 0.1 85 | - 0.9 86 | ##################### 87 | # Extraneous delays # 88 | ##################### 89 | extraneous_activity_delays: 90 | # Method to compute the extraneous delay (naive or eclipse-aware) 91 | discovery_method: eclipse-aware 92 | # Metric to guide the optimization process (loss function to minimize) 93 | optimization_metric: relative_emd 94 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 95 | num_iterations: 20 96 | -------------------------------------------------------------------------------- /resources/config/benchmark/benchmark_pool.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | ########## 3 | # Common # 4 | ########## 5 | common: 6 | # Path to the event log in CSV format 7 | train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz 8 | # Event log to evaluate the discovered BPS model with 9 | test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz 10 | # Specify the name for each of the columns in the CSV file (XES standard by default) 11 | log_ids: 12 | case: "case_id" 13 | activity: "activity" 14 | resource: "resource" 15 | start_time: "start_time" 16 | end_time: "end_time" 17 | # Number of evaluations of the discovered BPS model 18 | num_final_evaluations: 10 19 | # Metrics to evaluate the discovered BPS model 20 | evaluation_metrics: 21 | - 3_gram_distance 22 | - 2_gram_distance 23 | - absolute_event_distribution 24 | - relative_event_distribution 25 | - circadian_event_distribution 26 | - arrival_event_distribution 27 | - cycle_time_distribution 28 | # Whether to discover case attributes or not 29 | discover_data_attributes: false 30 | ################# 31 | # Preprocessing # 32 | ################# 33 | preprocessing: 34 | multitasking: false 35 | enable_time_concurrency_threshold: 0.5 36 | ################ 37 | # Control-flow # 38 | ################ 39 | control_flow: 40 | # Metric to guide the optimization process (loss function to minimize) 41 | optimization_metric: two_gram_distance 42 | # Number of optimization iterations over the search space 43 | num_iterations: 30 44 | # Number of times to evaluate each iteration (using the mean of all of them) 45 | num_evaluations_per_iteration: 5 46 | # Methods for discovering gateway probabilities 47 | gateway_probabilities: discovery 48 | # Discover process model with SplitMiner v3 49 | mining_algorithm: sm1 50 | # Number of concurrent relations between events to be captured 51 | epsilon: 52 | - 0.05 53 | - 0.4 54 | # Threshold for filtering the incoming and outgoing edges 55 | eta: 56 | - 0.2 57 | - 0.7 58 | # Whether to replace non-trivial OR joins or not 59 | replace_or_joins: 60 | - true 61 | - false 62 | # Whether to prioritize parallelism over loops or not 63 | prioritize_parallelism: true 64 | ################## 65 | # Resource model # 66 | ################## 67 | resource_model: 68 | # Metric to guide the optimization process (loss function to minimize) 69 | optimization_metric: circadian_emd 70 | # Number of optimization iterations over the search space 71 | num_iterations: 40 72 | # Number of times to evaluate each iteration (using the mean of all of them) 73 | num_evaluations_per_iteration: 5 74 | # Whether to discover prioritization or batching behavior 75 | discover_prioritization_rules: false 76 | discover_batching_rules: false 77 | # Resource profiles configuration 78 | resource_profiles: 79 | # Resource profile discovery type 80 | discovery_type: pool 81 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 82 | granularity: 60 83 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 84 | confidence: 85 | - 0.5 86 | - 0.85 87 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 88 | support: 89 | - 0.05 90 | - 0.5 91 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 92 | participation: 0.4 93 | -------------------------------------------------------------------------------- /resources/config/configuration_example.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow # 3 | # discovery, iii) 20 iterations of resource model (differentiated) discovery, and iv) direct discovery of # 4 | # extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | ################## 63 | # Resource model # 64 | ################## 65 | resource_model: 66 | # Metric to guide the optimization process (loss function to minimize) 67 | optimization_metric: circadian_emd 68 | # Number of optimization iterations over the search space 69 | num_iterations: 20 70 | # Number of times to evaluate each iteration (using the mean of all of them) 71 | num_evaluations_per_iteration: 3 72 | # Whether to discover prioritization or batching behavior 73 | discover_prioritization_rules: false 74 | discover_batching_rules: false 75 | # Resource profiles configuration 76 | resource_profiles: 77 | # Resource profile discovery type 78 | discovery_type: differentiated 79 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 80 | granularity: 60 81 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 82 | confidence: 83 | - 0.5 84 | - 0.85 85 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 86 | support: 87 | - 0.05 88 | - 0.5 89 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 90 | participation: 0.4 91 | ##################### 92 | # Extraneous delays # 93 | ##################### 94 | extraneous_activity_delays: 95 | # Method to compute the extraneous delay 96 | discovery_method: eclipse-aware 97 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 98 | num_iterations: 1 99 | -------------------------------------------------------------------------------- /resources/config/configuration_example_data_aware.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 10 iterations of control-flow # 3 | # discovery (BPMN model provided) with data-aware decision points, iii) 20 iterations of resource model # 4 | # (differentiated) discovery, and iv) no discovery of extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: true 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | # Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes 63 | discover_branch_rules: true 64 | # Minimum f-score value to consider the discovered data-aware branching rules 65 | f_score: 66 | - 0.3 67 | - 0.9 68 | ################## 69 | # Resource model # 70 | ################## 71 | resource_model: 72 | # Metric to guide the optimization process (loss function to minimize) 73 | optimization_metric: circadian_emd 74 | # Number of optimization iterations over the search space 75 | num_iterations: 20 76 | # Number of times to evaluate each iteration (using the mean of all of them) 77 | num_evaluations_per_iteration: 3 78 | # Whether to discover prioritization or batching behavior 79 | discover_prioritization_rules: false 80 | discover_batching_rules: false 81 | # Resource profiles configuration 82 | resource_profiles: 83 | # Resource profile discovery type 84 | discovery_type: differentiated 85 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 86 | granularity: 60 87 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 88 | confidence: 89 | - 0.5 90 | - 0.85 91 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 92 | support: 93 | - 0.05 94 | - 0.5 95 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 96 | participation: 0.4 97 | ##################### 98 | # Extraneous delays # 99 | ##################### 100 | extraneous_activity_delays: 101 | # Method to compute the extraneous delay 102 | discovery_method: eclipse-aware 103 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 104 | num_iterations: 1 105 | -------------------------------------------------------------------------------- /resources/config/configuration_example_fuzzy.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example with i) no evaluation of the final BPS model, ii) 20 iterations of control-flow # 3 | # discovery, iii) 10 iterations of resource model (fuzzy availability) discovery, and iv) no discovery of # 4 | # extraneous delays. # 5 | ################################################################################################################# 6 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 7 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 8 | ################################################################################################################# 9 | version: 5 10 | ########## 11 | # Common # 12 | ########## 13 | common: 14 | # Path to the event log in CSV format 15 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 16 | # Specify the name for each of the columns in the CSV file (XES standard by default) 17 | log_ids: 18 | case: "case_id" 19 | activity: "activity" 20 | resource: "resource" 21 | enabled_time: "enabled_time" # If not present in the log, automatically computed 22 | start_time: "start_time" 23 | end_time: "end_time" 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 20 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Method for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | # Discover process model with SplitMiner v3 45 | mining_algorithm: sm1 46 | # Number of concurrent relations between events to be captured 47 | epsilon: 48 | - 0.05 49 | - 0.4 50 | # Threshold for filtering the incoming and outgoing edges 51 | eta: 52 | - 0.2 53 | - 0.7 54 | # Whether to replace non-trivial OR joins or not 55 | replace_or_joins: 56 | - true 57 | - false 58 | # Whether to prioritize parallelism over loops or not 59 | prioritize_parallelism: 60 | - true 61 | - false 62 | ################## 63 | # Resource model # 64 | ################## 65 | resource_model: 66 | # Metric to guide the optimization process (loss function to minimize) 67 | optimization_metric: circadian_emd 68 | # Number of optimization iterations over the search space 69 | num_iterations: 10 70 | # Number of times to evaluate each iteration (using the mean of all of them) 71 | num_evaluations_per_iteration: 3 72 | # Whether to discover prioritization or batching behavior 73 | discover_prioritization_rules: false 74 | discover_batching_rules: false 75 | # Resource profiles configuration 76 | resource_profiles: 77 | # Resource profile discovery type 78 | discovery_type: differentiated_fuzzy 79 | # Duration of each granule in the resource calendar that will get its own probability 80 | granularity: 60 81 | # Angle of the fuzzy trapezoid when computing the availability probability for an activity (angle from start to end) 82 | fuzzy_angle: 83 | - 0.1 84 | - 0.9 85 | -------------------------------------------------------------------------------- /resources/config/configuration_example_with_evaluation.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Same simple configuration as 'configuration_example.yml' but evaluation the quality of the final BPS model # 3 | ################################################################################################################# 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | # Event log to evaluate the discovered BPS model with 23 | test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz 24 | # Number of evaluations of the discovered BPS model 25 | num_final_evaluations: 10 26 | # Metrics to evaluate the discovered BPS model 27 | evaluation_metrics: 28 | - 3_gram_distance 29 | - 2_gram_distance 30 | - absolute_event_distribution 31 | - relative_event_distribution 32 | - circadian_event_distribution 33 | - arrival_event_distribution 34 | - cycle_time_distribution 35 | # Whether to discover case attributes or not 36 | discover_data_attributes: false 37 | ################# 38 | # Preprocessing # 39 | ################# 40 | preprocessing: 41 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 42 | enable_time_concurrency_threshold: 0.75 43 | ################ 44 | # Control-flow # 45 | ################ 46 | control_flow: 47 | # Metric to guide the optimization process (loss function to minimize) 48 | optimization_metric: two_gram_distance 49 | # Number of optimization iterations over the search space 50 | num_iterations: 20 51 | # Number of times to evaluate each iteration (using the mean of all of them) 52 | num_evaluations_per_iteration: 3 53 | # Methods for discovering gateway probabilities 54 | gateway_probabilities: discovery 55 | # Discover process model with SplitMiner v3 56 | mining_algorithm: sm1 57 | # Number of concurrent relations between events to be captured 58 | epsilon: 59 | - 0.05 60 | - 0.4 61 | # Threshold for filtering the incoming and outgoing edges 62 | eta: 63 | - 0.2 64 | - 0.7 65 | # Whether to replace non-trivial OR joins or not 66 | replace_or_joins: 67 | - true 68 | - false 69 | # Whether to prioritize parallelism over loops or not 70 | prioritize_parallelism: 71 | - true 72 | - false 73 | ################## 74 | # Resource model # 75 | ################## 76 | resource_model: 77 | # Metric to guide the optimization process (loss function to minimize) 78 | optimization_metric: circadian_emd 79 | # Number of optimization iterations over the search space 80 | num_iterations: 20 81 | # Number of times to evaluate each iteration (using the mean of all of them) 82 | num_evaluations_per_iteration: 3 83 | # Whether to discover prioritization or batching behavior 84 | discover_prioritization_rules: false 85 | discover_batching_rules: false 86 | # Resource profiles configuration 87 | resource_profiles: 88 | # Resource profile discovery type 89 | discovery_type: differentiated 90 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 91 | granularity: 60 92 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 93 | confidence: 94 | - 0.5 95 | - 0.85 96 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 97 | support: 98 | - 0.05 99 | - 0.5 100 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 101 | participation: 0.4 102 | ##################### 103 | # Extraneous delays # 104 | ##################### 105 | extraneous_activity_delays: 106 | # Method to compute the extraneous delay 107 | discovery_method: eclipse-aware 108 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 109 | num_iterations: 1 110 | -------------------------------------------------------------------------------- /resources/config/configuration_example_with_provided_process_model.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Same simple configuration as 'configuration_example.yml' but providing the BPMN model # 3 | ################################################################################################################# 4 | # - Increase the num_iterations to (potentially) improve the quality of that discovered model # 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | # Use this process model and skip its discovery 23 | process_model_path: ../models/LoanApp_simplified.bpmn 24 | # Whether to discover case attributes or not 25 | discover_data_attributes: false 26 | ################# 27 | # Preprocessing # 28 | ################# 29 | preprocessing: 30 | # Threshold to consider two activities as concurrent when computing the enabled time (if necessary) 31 | enable_time_concurrency_threshold: 0.75 32 | ################ 33 | # Control-flow # 34 | ################ 35 | control_flow: 36 | # Metric to guide the optimization process (loss function to minimize) 37 | optimization_metric: two_gram_distance 38 | # Number of optimization iterations over the search space 39 | num_iterations: 1 40 | # Number of times to evaluate each iteration (using the mean of all of them) 41 | num_evaluations_per_iteration: 3 42 | # Methods for discovering gateway probabilities 43 | gateway_probabilities: discovery 44 | ################## 45 | # Resource model # 46 | ################## 47 | resource_model: 48 | # Metric to guide the optimization process (loss function to minimize) 49 | optimization_metric: circadian_emd 50 | # Number of optimization iterations over the search space 51 | num_iterations: 20 52 | # Number of times to evaluate each iteration (using the mean of all of them) 53 | num_evaluations_per_iteration: 3 54 | # Whether to discover prioritization or batching behavior 55 | discover_prioritization_rules: false 56 | discover_batching_rules: false 57 | # Resource profiles configuration 58 | resource_profiles: 59 | # Resource profile discovery type 60 | discovery_type: pool 61 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 62 | granularity: 60 63 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 64 | confidence: 65 | - 0.5 66 | - 0.85 67 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 68 | support: 69 | - 0.05 70 | - 0.5 71 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 72 | participation: 0.4 73 | ##################### 74 | # Extraneous delays # 75 | ##################### 76 | extraneous_activity_delays: 77 | # Method to compute the extraneous delay 78 | discovery_method: eclipse-aware 79 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 80 | num_iterations: 1 81 | -------------------------------------------------------------------------------- /resources/config/configuration_one_shot.yml: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # Simple configuration example for running SIMOD without parameter optimization steps. The defined parameters # 3 | # should be individual values and not intervals, as there is no optimization. # 4 | ################################################################################################################# 5 | # - Visit 'complete_configuration.yml' example for a description of all configurable parameters # 6 | ################################################################################################################# 7 | version: 5 8 | ########## 9 | # Common # 10 | ########## 11 | common: 12 | # Path to the event log in CSV format 13 | train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz 14 | # Specify the name for each of the columns in the CSV file (XES standard by default) 15 | log_ids: 16 | case: "case_id" 17 | activity: "activity" 18 | resource: "resource" 19 | enabled_time: "enabled_time" # If not present in the log, automatically computed 20 | start_time: "start_time" 21 | end_time: "end_time" 22 | ################ 23 | # Control-flow # 24 | ################ 25 | control_flow: 26 | # Number of optimization iterations over the search space 27 | num_iterations: 1 28 | # Number of times to evaluate each iteration (using the mean of all of them) 29 | num_evaluations_per_iteration: 1 30 | # Methods for discovering gateway probabilities 31 | gateway_probabilities: discovery 32 | # Discover process model with SplitMiner v3 33 | mining_algorithm: sm1 34 | # Number of concurrent relations between events to be captured 35 | epsilon: 0.3 36 | # Threshold for filtering the incoming and outgoing edges 37 | eta: 0.5 38 | # Whether to replace non-trivial OR joins or not 39 | replace_or_joins: false 40 | # Whether to prioritize parallelism over loops or not 41 | prioritize_parallelism: true 42 | ################## 43 | # Resource model # 44 | ################## 45 | resource_model: 46 | # Number of optimization iterations over the search space 47 | num_iterations: 1 48 | # Number of times to evaluate each iteration (using the mean of all of them) 49 | num_evaluations_per_iteration: 1 50 | # Resource profiles configuration 51 | resource_profiles: 52 | # Resource profile discovery type 53 | discovery_type: differentiated 54 | # Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be) 55 | granularity: 60 56 | # Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources) 57 | confidence: 0.6 58 | # Minimum support of the intervals in the discovered calendar (of a resource or set of resources) 59 | support: 0.2 60 | # Participation of a resource in the process to discover a calendar for them (gathered together otherwise) 61 | participation: 0.4 62 | ##################### 63 | # Extraneous delays # 64 | ##################### 65 | extraneous_activity_delays: 66 | # Method to compute the extraneous delay 67 | discovery_method: eclipse-aware 68 | # Number of optimization iterations over the search space (1 = direct discovery, no optimization stage) 69 | num_iterations: 1 70 | -------------------------------------------------------------------------------- /resources/event_logs/LoanApp_simplified_test.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/LoanApp_simplified_test.csv.gz -------------------------------------------------------------------------------- /resources/event_logs/LoanApp_simplified_train.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/LoanApp_simplified_train.csv.gz -------------------------------------------------------------------------------- /resources/event_logs/PurchasingExample.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/resources/event_logs/PurchasingExample.csv.gz -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script is used for running Simod from a Docker container. 4 | 5 | # configuration path from the command line 6 | CONFIG_PATH=$1 7 | 8 | # optional output_dir from the command line 9 | OUTPUT_DIR=$2 10 | 11 | # if no config_path is specified, exit with error 12 | if [ -z "$CONFIG_PATH" ]; then 13 | echo "ERROR: No configuration file specified." 14 | exit 1 15 | fi 16 | 17 | # if no output_dir is specified, use the default directory 18 | if [ -z "$OUTPUT_DIR" ]; then 19 | OUTPUT_DIR=$(pwd)/outputs 20 | fi 21 | 22 | # run Simod 23 | poetry run simod --configuration $CONFIG_PATH --output $OUTPUT_DIR 24 | -------------------------------------------------------------------------------- /src/simod/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["simod"] 2 | -------------------------------------------------------------------------------- /src/simod/batching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/batching/__init__.py -------------------------------------------------------------------------------- /src/simod/batching/discovery.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pix_framework.discovery.batch_processing.batch_characteristics import discover_batch_processing_and_characteristics 3 | from pix_framework.io.event_log import EventLogIDs 4 | 5 | from simod.batching.types import BatchingRule 6 | 7 | 8 | def discover_batching_rules(log: pd.DataFrame, log_ids: EventLogIDs) -> list[BatchingRule]: 9 | """ 10 | Discover batching _rules from a log. 11 | The enabled_time column is required. If it is missing, it will be estimated using the start-time-estimator. 12 | """ 13 | rules = discover_batch_processing_and_characteristics( 14 | event_log=log, log_ids=log_ids, batch_min_size=3, max_sequential_gap=pd.Timedelta("10m") 15 | ) 16 | 17 | rules = list(map(lambda x: BatchingRule.from_dict(x), rules)) 18 | 19 | return rules 20 | -------------------------------------------------------------------------------- /src/simod/branch_rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/branch_rules/__init__.py -------------------------------------------------------------------------------- /src/simod/branch_rules/discovery.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import List 3 | 4 | from simod.branch_rules.types import BranchRules 5 | 6 | from pix_framework.io.event_log import EventLogIDs 7 | from pix_framework.discovery.gateway_probabilities import GatewayProbabilities 8 | from pix_framework.discovery.gateway_conditions.gateway_conditions import discover_gateway_conditions 9 | 10 | 11 | def discover_branch_rules(bpmn_graph, log: pd.DataFrame, log_ids: EventLogIDs, f_score=0.7) -> list[BranchRules]: 12 | """ 13 | Discover branch_rules from a log. 14 | """ 15 | rules = discover_gateway_conditions(bpmn_graph, log, log_ids, f_score_threshold=f_score) 16 | 17 | rules = list(map(lambda x: BranchRules.from_dict(x), rules)) 18 | 19 | return rules 20 | 21 | 22 | def map_branch_rules_to_flows(gateway_probabilities: List[GatewayProbabilities], branch_rules: List[BranchRules]): 23 | condition_lookup = {rule.id: rule for rule in branch_rules} 24 | 25 | for gateway in gateway_probabilities: 26 | for path in gateway.outgoing_paths: 27 | if path.path_id in condition_lookup: 28 | path.condition_id = condition_lookup[path.path_id].id 29 | 30 | return gateway_probabilities 31 | -------------------------------------------------------------------------------- /src/simod/branch_rules/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class BranchRule: 6 | attribute: str 7 | comparison: str 8 | value: str 9 | 10 | @staticmethod 11 | def from_dict(data: dict) -> "BranchRule": 12 | return BranchRule( 13 | attribute=data["attribute"], 14 | comparison=data["comparison"], 15 | value=data["value"] 16 | ) 17 | 18 | def to_dict(self): 19 | return { 20 | "attribute": self.attribute, 21 | "comparison": self.comparison, 22 | "value": self.value 23 | } 24 | 25 | 26 | @dataclass 27 | class BranchRules: 28 | id: str 29 | rules: list[list[BranchRule]] 30 | 31 | @staticmethod 32 | def from_dict(data: dict) -> "BranchRules": 33 | return BranchRules( 34 | id=data["id"], 35 | rules=[ 36 | [BranchRule.from_dict(rule) for rule in rule_set] 37 | for rule_set in data["rules"] 38 | ] 39 | ) 40 | 41 | def to_dict(self): 42 | return { 43 | "id": self.id, 44 | "rules": [[rule.to_dict() for rule in rule_set] for rule_set in self.rules] 45 | } 46 | -------------------------------------------------------------------------------- /src/simod/cli.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | import click 6 | import yaml 7 | from pix_framework.filesystem.file_manager import get_random_folder_id 8 | 9 | from simod.event_log.event_log import EventLog 10 | from simod.runtime_meter import RuntimeMeter 11 | from simod.settings.simod_settings import SimodSettings 12 | from simod.simod import Simod 13 | 14 | 15 | @click.command( 16 | help=""" 17 | Simod combines process mining and machine learning techniques to automate the discovery and tuning of 18 | Business Process Simulation models from event logs extracted from enterprise information systems. 19 | """ 20 | ) 21 | @click.option( 22 | "--configuration", 23 | "-c", 24 | default=None, 25 | required=False, 26 | type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path), 27 | help="Path to the Simod configuration file.", 28 | ) 29 | @click.option( 30 | "--output", 31 | "-o", 32 | default=None, 33 | required=False, 34 | type=click.Path(file_okay=False, resolve_path=True, path_type=Path), 35 | help="Path to the output directory where discovery results will be stored.", 36 | ) 37 | @click.option( 38 | "--one-shot", 39 | default=False, 40 | is_flag=True, 41 | required=False, 42 | type=bool, 43 | help="Run Simod with default settings only once without the optimization phase.", 44 | ) 45 | @click.option( 46 | "--event-log", 47 | "-l", 48 | required=False, 49 | type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path), 50 | help="Path to the event log file when using the --one-shot flag. " 51 | "Columns must be named 'case_id', 'activity', 'start_time', 'end_time', 'resource'.", 52 | ) 53 | @click.option( 54 | "--schema-yaml", 55 | required=False, 56 | is_flag=True, 57 | help="Print the configuration YAML schema and exit.", 58 | ) 59 | @click.option( 60 | "--schema-json", 61 | required=False, 62 | is_flag=True, 63 | help="Print the configuration JSON schema and exit.", 64 | ) 65 | @click.version_option() 66 | def main( 67 | configuration: Optional[Path], 68 | output: Optional[Path], 69 | one_shot: bool, 70 | event_log: Optional[Path], 71 | schema_yaml: bool, 72 | schema_json: bool, 73 | ) -> None: 74 | if schema_yaml: 75 | print(yaml.dump(SimodSettings().model_json_schema())) 76 | return 77 | 78 | if schema_json: 79 | print(json.dumps(SimodSettings().model_json_schema())) 80 | return 81 | 82 | if one_shot: 83 | settings = SimodSettings.one_shot() 84 | settings.common.train_log_path = event_log 85 | settings.common.test_log_path = None 86 | else: 87 | settings = SimodSettings.from_path(configuration) 88 | 89 | output = output if output is not None else (Path.cwd() / "outputs" / get_random_folder_id()).absolute() 90 | 91 | # To measure the runtime of each stage 92 | runtimes = RuntimeMeter() 93 | 94 | # Read and preprocess event log 95 | runtimes.start(RuntimeMeter.PREPROCESSING) 96 | event_log = EventLog.from_path( 97 | log_ids=settings.common.log_ids, 98 | train_log_path=settings.common.train_log_path, 99 | test_log_path=settings.common.test_log_path, 100 | preprocessing_settings=settings.preprocessing, 101 | need_test_partition=settings.common.perform_final_evaluation, 102 | ) 103 | runtimes.stop(RuntimeMeter.PREPROCESSING) 104 | 105 | # Instantiate and run Simod 106 | simod = Simod(settings, event_log=event_log, output_dir=output) 107 | simod.run(runtimes=runtimes) 108 | 109 | 110 | if __name__ == "__main__": 111 | main() 112 | -------------------------------------------------------------------------------- /src/simod/cli_formatter.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | 4 | def print_section(message: str): 5 | click.secho(f"\n{message}", bold=True) 6 | click.secho("=" * len(message), bold=True) 7 | 8 | 9 | def print_subsection(message: str): 10 | click.secho(f"\n{message}") 11 | click.echo("-" * len(message)) 12 | 13 | 14 | def print_asset(message: str): 15 | click.secho(f"\n︎{message}", bold=True) 16 | 17 | 18 | def print_message(message: str): 19 | click.echo(message) 20 | 21 | 22 | def print_notice(message: str): 23 | click.secho(f"\n{message}", bold=True) 24 | 25 | 26 | def print_warning(message: str): 27 | click.secho(f"\n{message}", bold=True) 28 | 29 | 30 | def print_step(message: str): 31 | click.echo(f"\n{message}") 32 | -------------------------------------------------------------------------------- /src/simod/control_flow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/__init__.py -------------------------------------------------------------------------------- /src/simod/control_flow/lib/bpmn-layout-1.0.6-jar-with-dependencies.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/lib/bpmn-layout-1.0.6-jar-with-dependencies.jar -------------------------------------------------------------------------------- /src/simod/control_flow/lib/split-miner-1.7.1-all.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/control_flow/lib/split-miner-1.7.1-all.jar -------------------------------------------------------------------------------- /src/simod/data_attributes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/data_attributes/__init__.py -------------------------------------------------------------------------------- /src/simod/data_attributes/discovery.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from simod.data_attributes.types import GlobalAttribute, CaseAttribute, EventAttribute 4 | 5 | from pix_framework.io.event_log import EventLogIDs 6 | from pix_framework.discovery.attributes.attribute_discovery import discover_attributes 7 | 8 | 9 | def discover_data_attributes(log: pd.DataFrame, log_ids: EventLogIDs) -> (list[CaseAttribute], list[GlobalAttribute], list[EventAttribute]): 10 | """ 11 | Discover data attributes from a log ignoring common non-case columns. 12 | """ 13 | attributes = discover_attributes( 14 | event_log=log, 15 | log_ids=log_ids, 16 | avoid_columns=[ 17 | log_ids.case, 18 | log_ids.activity, 19 | log_ids.enabled_time, 20 | log_ids.start_time, 21 | log_ids.end_time, 22 | log_ids.resource, 23 | ], 24 | confidence_threshold=0.95, 25 | ) 26 | 27 | global_attributes = list(map(GlobalAttribute.from_dict, attributes["global_attributes"])) 28 | case_attributes = list(map(CaseAttribute.from_dict, attributes["case_attributes"])) 29 | event_attributes = list(map(EventAttribute.from_dict, attributes["event_attributes"])) 30 | 31 | return global_attributes, case_attributes, event_attributes 32 | -------------------------------------------------------------------------------- /src/simod/data_attributes/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import Union 4 | 5 | 6 | class CaseAttributeType(Enum): 7 | DISCRETE = "discrete" 8 | CONTINUOUS = "continuous" 9 | 10 | 11 | class GlobalAttributeType(Enum): 12 | DISCRETE = "discrete" 13 | CONTINUOUS = "continuous" 14 | 15 | 16 | class EventAttributeType(Enum): 17 | DISCRETE = "discrete" 18 | CONTINUOUS = "continuous" 19 | EXPRESSION = "expression" 20 | DTREE = "dtree" 21 | 22 | 23 | @dataclass 24 | class CaseAttribute: 25 | name: str 26 | type: CaseAttributeType 27 | values: Union[list[dict], dict[str, float]] 28 | 29 | @staticmethod 30 | def from_dict(case_attribute: dict) -> "CaseAttribute": 31 | """ 32 | Creates a CaseAttribute object from a dictionary returned by data_attribute_discovery.discovery. 33 | """ 34 | return CaseAttribute( 35 | name=case_attribute["name"], 36 | type=CaseAttributeType(case_attribute["type"]), 37 | values=case_attribute["values"], 38 | ) 39 | 40 | def to_prosimos(self) -> dict: 41 | if self.type == CaseAttributeType.CONTINUOUS: 42 | return { 43 | "name": self.name, 44 | "type": self.type.value, 45 | "values": self.values, 46 | } 47 | else: 48 | return { 49 | "name": self.name, 50 | "type": self.type.value, 51 | "values": self.values 52 | } 53 | 54 | 55 | @dataclass 56 | class GlobalAttribute: 57 | name: str 58 | type: GlobalAttributeType 59 | values: Union[list[dict], dict[str, float]] 60 | 61 | @staticmethod 62 | def from_dict(global_attribute: dict) -> "GlobalAttribute": 63 | """ 64 | Creates a GlobalAttribute object from a dictionary returned by data_attribute_discovery.discovery. 65 | """ 66 | return GlobalAttribute( 67 | name=global_attribute["name"], 68 | type=GlobalAttributeType(global_attribute["type"]), 69 | values=global_attribute["values"], 70 | ) 71 | 72 | def to_prosimos(self) -> dict: 73 | if self.type == GlobalAttributeType.CONTINUOUS: 74 | return { 75 | "name": self.name, 76 | "type": self.type.value, 77 | "values": self.values, 78 | } 79 | else: 80 | return { 81 | "name": self.name, 82 | "type": self.type.value, 83 | "values": self.values 84 | } 85 | 86 | 87 | @dataclass 88 | class EventAttributeDetails: 89 | name: str 90 | type: EventAttributeType 91 | values: Union[list[dict[str, float]], dict[str, Union[str, list[dict[str, float]]]], str] 92 | 93 | @staticmethod 94 | def from_dict(attribute: dict) -> "EventAttributeDetails": 95 | """ 96 | Creates an EventAttributeDetails object from a dictionary returned by data_attribute_discovery.discovery. 97 | """ 98 | return EventAttributeDetails( 99 | name=attribute["name"], 100 | type=EventAttributeType(attribute["type"]), 101 | values=attribute["values"], 102 | ) 103 | 104 | def to_prosimos(self) -> dict: 105 | if self.type == EventAttributeType.CONTINUOUS: 106 | return { 107 | "name": self.name, 108 | "type": self.type.value, 109 | "values": self.values, 110 | } 111 | elif self.type == EventAttributeType.DISCRETE: 112 | return { 113 | "name": self.name, 114 | "type": self.type.value, 115 | "values": self.values 116 | 117 | } 118 | elif self.type == EventAttributeType.EXPRESSION: 119 | return { 120 | "name": self.name, 121 | "type": self.type.value, 122 | "values": self.values, 123 | } 124 | elif self.type == EventAttributeType.DTREE: 125 | return { 126 | "name": self.name, 127 | "type": self.type.value, 128 | "values": self.values 129 | } 130 | 131 | 132 | @dataclass 133 | class EventAttribute: 134 | event_id: str 135 | attributes: list[EventAttributeDetails] 136 | 137 | @staticmethod 138 | def from_dict(event_attribute: dict) -> "EventAttribute": 139 | """ 140 | Creates an EventAttribute object from a dictionary. 141 | """ 142 | return EventAttribute( 143 | event_id=event_attribute["event_id"], 144 | attributes=[EventAttributeDetails.from_dict(attr) for attr in event_attribute["attributes"]], 145 | ) 146 | 147 | def to_prosimos(self) -> dict: 148 | return { 149 | "event_id": self.event_id, 150 | "attributes": [attr.to_prosimos() for attr in self.attributes], 151 | } 152 | -------------------------------------------------------------------------------- /src/simod/event_log/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/event_log/__init__.py -------------------------------------------------------------------------------- /src/simod/event_log/preprocessor.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | import pandas as pd 6 | from pix_framework.enhancement.concurrency_oracle import OverlappingConcurrencyOracle 7 | from pix_framework.enhancement.multitasking import adjust_durations 8 | from pix_framework.enhancement.start_time_estimator.config import ConcurrencyThresholds 9 | from pix_framework.enhancement.start_time_estimator.config import Configuration as StartTimeEstimatorConfiguration 10 | from pix_framework.enhancement.start_time_estimator.estimator import StartTimeEstimator 11 | from pix_framework.io.event_log import EventLogIDs 12 | 13 | from simod.cli_formatter import print_section, print_step 14 | 15 | 16 | @dataclass 17 | class MultitaskingSettings: 18 | log_path: Path 19 | output_dir: Path 20 | is_concurrent: bool 21 | verbose: bool 22 | 23 | 24 | @dataclass 25 | class Settings: 26 | multitasking_settings: Optional[MultitaskingSettings] = None 27 | 28 | 29 | class Preprocessor: 30 | """ 31 | Handles event log pre-processing by executing various transformations 32 | to estimate missing timestamps and adjust data for multitasking. 33 | 34 | This class modifies an input event log based on the specified settings 35 | and returns the pre-processed log. 36 | 37 | Attributes 38 | ---------- 39 | log : :class:`pandas.DataFrame` 40 | The event log stored as a DataFrame. 41 | log_ids : :class:`EventLogIDs` 42 | Identifiers for mapping column names in the event log. 43 | """ 44 | 45 | _log: pd.DataFrame 46 | _log_ids: EventLogIDs 47 | 48 | def __init__(self, log: pd.DataFrame, log_ids: EventLogIDs): 49 | keys = [log_ids.start_time, log_ids.end_time] if log_ids.start_time in log.columns else [log_ids.end_time] 50 | self._log = log.sort_values(by=keys).reset_index(drop=True) 51 | self._log_ids = log_ids 52 | 53 | def run( 54 | self, 55 | multitasking: bool = False, 56 | concurrency_thresholds: ConcurrencyThresholds = ConcurrencyThresholds(), 57 | enable_time_concurrency_threshold: float = 0.75, 58 | ) -> pd.DataFrame: 59 | """ 60 | Executes event log pre-processing steps based on the specified parameters. 61 | 62 | This includes estimating missing start times, adjusting timestamps 63 | for multitasking scenarios, and computing enabled times. 64 | 65 | Parameters 66 | ---------- 67 | multitasking : bool 68 | Whether to adjust the timestamps for multitasking. 69 | concurrency_thresholds : :class:`ConcurrencyThresholds`, optional 70 | Thresholds for the Heuristics Miner to estimate start times. 71 | enable_time_concurrency_threshold : float 72 | Threshold for estimating enabled times. 73 | 74 | Returns 75 | ------- 76 | :class:`pandas.DataFrame` 77 | The pre-processed event log. 78 | """ 79 | print_section("Pre-processing") 80 | 81 | if self._log_ids.start_time not in self._log.columns or self._log[self._log_ids.start_time].isnull().any(): 82 | self._add_start_times(concurrency_thresholds) 83 | 84 | if multitasking: 85 | self._adjust_for_multitasking() 86 | 87 | if self._log_ids.enabled_time not in self._log.columns: 88 | # The start times were not estimated (otherwise enabled times would 89 | # be present), and the enabled times are not in the original log 90 | self._add_enabled_times(enable_time_concurrency_threshold) 91 | 92 | return self._log 93 | 94 | def _adjust_for_multitasking(self, verbose=False): 95 | print_step("Adjusting timestamps for multitasking") 96 | 97 | self._log = adjust_durations( 98 | self._log, 99 | self._log_ids, 100 | verbose=verbose, 101 | ) 102 | 103 | def _add_start_times(self, concurrency_thresholds: ConcurrencyThresholds): 104 | print_step("Adding start times") 105 | 106 | configuration = StartTimeEstimatorConfiguration( 107 | log_ids=self._log_ids, 108 | concurrency_thresholds=concurrency_thresholds, 109 | ) 110 | 111 | self._log = StartTimeEstimator(self._log, configuration).estimate(replace_recorded_start_times=True) 112 | 113 | def _add_enabled_times(self, enable_time_concurrency_threshold: float): 114 | print_step("Adding enabled times") 115 | 116 | configuration = StartTimeEstimatorConfiguration( 117 | log_ids=self._log_ids, 118 | concurrency_thresholds=ConcurrencyThresholds(df=enable_time_concurrency_threshold), 119 | consider_start_times=True, 120 | ) 121 | # The start times are the original ones, so use overlapping concurrency oracle 122 | OverlappingConcurrencyOracle(self._log, configuration).add_enabled_times(self._log) 123 | -------------------------------------------------------------------------------- /src/simod/extraneous_delays/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/extraneous_delays/__init__.py -------------------------------------------------------------------------------- /src/simod/extraneous_delays/optimizer.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from extraneous_activity_delays.config import ( 6 | Configuration as ExtraneousActivityDelaysConfiguration, 7 | TimerPlacement, 8 | SimulationEngine, 9 | SimulationModel, 10 | ) 11 | from extraneous_activity_delays.enhance_with_delays import HyperOptEnhancer, DirectEnhancer 12 | from lxml import etree 13 | from pix_framework.filesystem.file_manager import remove_asset 14 | 15 | from simod.cli_formatter import print_step 16 | from simod.event_log.event_log import EventLog 17 | from simod.extraneous_delays.types import ExtraneousDelay 18 | from simod.settings.extraneous_delays_settings import ExtraneousDelaysSettings 19 | from simod.simulation.parameters.BPS_model import BPSModel 20 | 21 | 22 | class ExtraneousDelaysOptimizer: 23 | """ 24 | Optimizer for the discovery of the extraneous delays model. 25 | 26 | This class performs either a direct discovery of the extraneous delays of the process, or launches an iterative 27 | optimization that first discovers the extraneous delays and then adjusts their size to better reflect reality. 28 | 29 | Attributes 30 | ---------- 31 | event_log : :class:`~simod.event_log.event_log.EventLog` 32 | The event log containing the train and validation data. 33 | bps_model : :class:`~simod.simulation.parameters.BPS_model.BPSModel` 34 | The business process simulation model to enhance with extraneous delays, including the BPMN representation. 35 | settings : :class:`~simod.settings.extraneous_delays_settings.ExtraneousDelaysSettings` 36 | Configuration settings for extraneous delay discovery. 37 | base_directory : :class:`pathlib.Path` 38 | Directory where output files will be stored. 39 | """ 40 | 41 | def __init__( 42 | self, 43 | event_log: EventLog, 44 | bps_model: BPSModel, 45 | settings: ExtraneousDelaysSettings, 46 | base_directory: Path, 47 | ): 48 | self.event_log = event_log 49 | self.bps_model = bps_model 50 | self.settings = settings 51 | self.base_directory = base_directory 52 | 53 | assert self.bps_model.process_model is not None, "BPMN model is not specified." 54 | 55 | def run(self) -> List[ExtraneousDelay]: 56 | """ 57 | Executes the extraneous delay discovery process. 58 | 59 | This method configures the optimization process, applies either a direct enhancement 60 | or a hyperparameter optimization approach to identify delays, and returns the best 61 | detected delays as a list of `ExtraneousDelay` objects. 62 | 63 | Returns 64 | ------- 65 | List[:class:`~simod.extraneous_delays.types.ExtraneousDelay`] 66 | A list of detected extraneous delays, each containing activity names, delay IDs, 67 | and their corresponding duration distributions. 68 | """ 69 | # Set-up configuration for extraneous delay discovery 70 | configuration = ExtraneousActivityDelaysConfiguration( 71 | log_ids=self.event_log.log_ids, 72 | process_name=self.event_log.process_name, 73 | num_iterations=self.settings.num_iterations, 74 | num_evaluation_simulations=self.settings.num_evaluations_per_iteration, 75 | training_partition_ratio=0.5, 76 | optimization_metric=self.settings.optimization_metric, 77 | discovery_method=self.settings.discovery_method, 78 | timer_placement=TimerPlacement.BEFORE, 79 | simulation_engine=SimulationEngine.PROSIMOS, 80 | ) 81 | configuration.PATH_OUTPUTS = self.base_directory 82 | # Discover extraneous delays 83 | simulation_model = _bps_model_to_simulation_model(self.bps_model) 84 | if self.settings.num_iterations > 1: 85 | enhancer = HyperOptEnhancer(self.event_log.train_validation_partition, simulation_model, configuration) 86 | enhancer.enhance_simulation_model_with_delays() 87 | best_timers = enhancer.best_timers 88 | else: 89 | enhancer = DirectEnhancer(self.event_log.train_validation_partition, simulation_model, configuration) 90 | best_timers = enhancer.timers 91 | # Return best delays 92 | return [ 93 | ExtraneousDelay( 94 | activity_name=activity, 95 | delay_id=f"Event_{str(uuid.uuid4())}", 96 | duration_distribution=best_timers[activity], 97 | ) 98 | for activity in best_timers 99 | ] 100 | 101 | def cleanup(self): 102 | print_step(f"Removing {self.base_directory}") 103 | remove_asset(self.base_directory) 104 | 105 | 106 | def _bps_model_to_simulation_model(bps_model: BPSModel) -> SimulationModel: 107 | parser = etree.XMLParser(remove_blank_text=True) 108 | bpmn_model = etree.parse(bps_model.process_model, parser) 109 | parameters = bps_model.to_prosimos_format() 110 | 111 | simulation_model = SimulationModel(bpmn_model, parameters) 112 | 113 | return simulation_model 114 | -------------------------------------------------------------------------------- /src/simod/extraneous_delays/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from pix_framework.statistics.distribution import DurationDistribution 4 | 5 | 6 | @dataclass 7 | class ExtraneousDelay: 8 | """ 9 | Represents an extraneous delay within a business process activity. 10 | 11 | This class encapsulates the details of an identified extraneous delay, 12 | including the affected activity, a unique delay identifier, and the 13 | duration distribution of the delay. 14 | 15 | Attributes 16 | ---------- 17 | activity_name : str 18 | The name of the activity where the extraneous delay occurs. 19 | delay_id : str 20 | A unique identifier for the delay event. 21 | duration_distribution : :class:`DurationDistribution` 22 | The statistical distribution representing the delay duration. 23 | """ 24 | 25 | activity_name: str 26 | delay_id: str 27 | duration_distribution: DurationDistribution 28 | 29 | def to_dict(self) -> dict: 30 | """ 31 | Converts the extraneous delay into a dictionary format. 32 | 33 | The dictionary representation is compatible with the Prosimos simulation 34 | engine, containing activity details, a unique event identifier, and the 35 | delay duration distribution. 36 | 37 | Returns 38 | ------- 39 | dict 40 | A dictionary representation of the extraneous delay. 41 | """ 42 | return { 43 | "activity": self.activity_name, 44 | "event_id": self.delay_id, 45 | } | self.duration_distribution.to_prosimos_distribution() 46 | 47 | @staticmethod 48 | def from_dict(delay: dict) -> "ExtraneousDelay": 49 | """ 50 | Creates an `ExtraneousDelay` instance from a dictionary. 51 | 52 | This method reconstructs an `ExtraneousDelay` object from a dictionary 53 | containing activity name, delay identifier, and duration distribution. 54 | 55 | Parameters 56 | ---------- 57 | delay : dict 58 | A dictionary representation of an extraneous delay. 59 | 60 | Returns 61 | ------- 62 | :class:`ExtraneousDelay` 63 | An instance of `ExtraneousDelay` with the extracted attributes. 64 | """ 65 | return ExtraneousDelay( 66 | activity_name=delay["activity"], 67 | delay_id=delay["event_id"], 68 | duration_distribution=DurationDistribution.from_dict(delay), 69 | ) 70 | -------------------------------------------------------------------------------- /src/simod/prioritization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/prioritization/__init__.py -------------------------------------------------------------------------------- /src/simod/prioritization/discovery.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pix_framework.discovery.prioritization.discovery import discover_priority_rules 3 | from pix_framework.io.event_log import EventLogIDs 4 | 5 | from ..data_attributes.types import CaseAttribute 6 | from .types import PrioritizationRule 7 | 8 | 9 | def discover_prioritization_rules( 10 | log: pd.DataFrame, log_ids: EventLogIDs, case_attributes: list[CaseAttribute] 11 | ) -> list[PrioritizationRule]: 12 | """ 13 | Discover prioritization rules from a log. 14 | The enabled_time column is required. If it is missing, it will be estimated using the start-time-estimator. 15 | """ 16 | case_attribute_names = list(map(lambda x: x.name, case_attributes)) 17 | 18 | rules = discover_priority_rules( 19 | event_log=log.rename( # Rename columns for hardcoded discovery package 20 | {log_ids.enabled_time: "enabled_time", log_ids.start_time: "start_time", log_ids.resource: "Resource"}, 21 | axis=1, 22 | ), 23 | attributes=case_attribute_names, 24 | ) 25 | 26 | rules = list(map(PrioritizationRule.from_prosimos, rules)) 27 | 28 | return rules 29 | -------------------------------------------------------------------------------- /src/simod/prioritization/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class PrioritizationFiringRule: 6 | attribute: str 7 | comparison: str 8 | value: list[str] 9 | 10 | @staticmethod 11 | def from_prosimos(rule: dict) -> "PrioritizationFiringRule": 12 | return PrioritizationFiringRule( 13 | attribute=rule["attribute"], 14 | comparison=rule["comparison"], 15 | value=rule["value"], 16 | ) 17 | 18 | def to_prosimos(self) -> dict: 19 | return { 20 | "attribute": self.attribute, 21 | "comparison": self.comparison, 22 | "value": self.value, 23 | } 24 | 25 | 26 | class AndRules: 27 | _rules: list[PrioritizationFiringRule] 28 | 29 | def __init__(self, rules: list[PrioritizationFiringRule]): 30 | self._rules = rules 31 | 32 | @staticmethod 33 | def from_prosimos(and_rules: list[dict]) -> "AndRules": 34 | return AndRules( 35 | rules=list(map(PrioritizationFiringRule.from_prosimos, and_rules)), 36 | ) 37 | 38 | def to_prosimos(self) -> list[dict]: 39 | return list(map(lambda x: x.to_prosimos(), self._rules)) 40 | 41 | 42 | class OrRules: 43 | _rules: list[AndRules] 44 | 45 | def __init__(self, rules: list[AndRules]): 46 | self._rules = rules 47 | 48 | @staticmethod 49 | def from_prosimos(group: list[list[dict]]) -> "OrRules": 50 | return OrRules( 51 | rules=list(map(AndRules.from_prosimos, group)), 52 | ) 53 | 54 | def to_prosimos(self) -> list[dict]: 55 | return list(map(lambda x: x.to_prosimos(), self._rules)) 56 | 57 | 58 | @dataclass 59 | class PrioritizationRule: 60 | priority_level: int 61 | rules: OrRules 62 | 63 | @staticmethod 64 | def from_prosimos(level: dict) -> "PrioritizationRule": 65 | return PrioritizationRule( 66 | priority_level=level["priority_level"], 67 | rules=OrRules.from_prosimos(level["rules"]), 68 | ) 69 | 70 | def to_prosimos(self) -> dict: 71 | return { 72 | "priority_level": self.priority_level, 73 | "rules": self.rules.to_prosimos(), 74 | } 75 | -------------------------------------------------------------------------------- /src/simod/resource_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/resource_model/__init__.py -------------------------------------------------------------------------------- /src/simod/resource_model/repair.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pix_framework.discovery.resource_calendar_and_performance.resource_activity_performance import ( 4 | ActivityResourceDistribution, 5 | ResourceDistribution, 6 | ) 7 | from pix_framework.discovery.resource_model import ResourceModel 8 | from pix_framework.io.event_log import EventLogIDs 9 | from pix_framework.statistics.distribution import DurationDistribution, get_best_fitting_distribution 10 | 11 | from simod.cli_formatter import print_message 12 | 13 | 14 | def repair_with_missing_activities( 15 | resource_model: ResourceModel, model_activities: list[str], event_log: pd.DataFrame, log_ids: EventLogIDs 16 | ): 17 | """ 18 | Updates the resource_model with missing activity_resource_distributions for activities that are present in the 19 | model but not in yet in the resource_model. 20 | """ 21 | 22 | # getting missing activities 23 | resource_model_activities = [ 24 | distribution.activity_id for distribution in resource_model.activity_resource_distributions 25 | ] 26 | missing_activities = [activity for activity in model_activities if activity not in resource_model_activities] 27 | 28 | # add missing activities to each resource's assigned_tasks 29 | for resource_profile in resource_model.resource_profiles: 30 | for resource in resource_profile.resources: 31 | resource.assigned_tasks += missing_activities 32 | 33 | # estimate the duration distribution of the activity from all its occurrences in event_log 34 | duration_distributions_per_activity = {} 35 | for activity in missing_activities: 36 | duration_distributions_per_activity[activity] = estimate_duration_distribution_for_activity( 37 | activity, event_log, log_ids 38 | ) 39 | 40 | # add the missing activity resource distributions to the resource model for all the resources 41 | resource_names = [ 42 | resource.id for resource_profile in resource_model.resource_profiles for resource in resource_profile.resources 43 | ] 44 | for activity, duration_distribution in duration_distributions_per_activity.items(): 45 | resource_distributions = [ 46 | ResourceDistribution( 47 | resource_id=resource_name, distribution=duration_distribution.to_prosimos_distribution() 48 | ) 49 | for resource_name in resource_names 50 | ] 51 | resource_model.activity_resource_distributions.append( 52 | ActivityResourceDistribution(activity_id=activity, activity_resources_distributions=resource_distributions) 53 | ) 54 | 55 | print_message(f"Repaired resource model with missing activities: {missing_activities}") 56 | 57 | 58 | def estimate_duration_distribution_for_activity( 59 | activity: str, event_log: pd.DataFrame, log_ids: EventLogIDs 60 | ) -> DurationDistribution: 61 | activity_events = event_log[event_log[log_ids.activity] == activity] 62 | durations = (activity_events[log_ids.end_time] - activity_events[log_ids.start_time]).values 63 | durations = [duration for duration in durations if not pd.isna(duration)] 64 | durations = [duration.astype("timedelta64[s]").astype(np.float64) for duration in durations] 65 | 66 | if len(durations) > 0: 67 | distribution = get_best_fitting_distribution(durations) 68 | else: 69 | distribution = DurationDistribution(name="fix", mean=1) 70 | 71 | return distribution 72 | -------------------------------------------------------------------------------- /src/simod/resource_model/settings.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | from pix_framework.discovery.resource_calendar_and_performance.calendar_discovery_parameters import ( 5 | CalendarDiscoveryParameters, 6 | CalendarType, 7 | ) 8 | 9 | from simod.settings.common_settings import Metric 10 | from simod.utilities import nearest_divisor_for_granularity 11 | 12 | 13 | @dataclass 14 | class HyperoptIterationParams: 15 | """ 16 | Parameters for a single iteration of the Resource Model optimization process. 17 | 18 | This class defines the necessary parameters for optimizing the resource model of the BPS model. 19 | It includes the parameter values for the discovery of resource profiles, calendars, etc. 20 | 21 | Attributes 22 | ---------- 23 | output_dir : :class:`pathlib.Path` 24 | Directory where all files of the current iteration will be stored. 25 | process_model_path : :class:`pathlib.Path` 26 | Path to the BPMN process model used for optimization. 27 | project_name : str 28 | Name of the project for file naming purposes. 29 | optimization_metric : :class:`~simod.settings.common_settings.Metric` 30 | Metric used to evaluate the quality of the current iteration's candidate. 31 | calendar_discovery_params : :class:`CalendarDiscoveryParameters` 32 | Parameters for the resource calendar (i.e., working schedules) discovery. 33 | discover_prioritization_rules : bool, optional 34 | Whether to attempt discovering prioritization rules (default: False). 35 | discover_batching_rules : bool, optional 36 | Whether to attempt discovering batching rules (default: False). 37 | """ 38 | 39 | # General settings 40 | output_dir: Path # Directory where to output all the files of the current iteration 41 | process_model_path: Path # Path to BPMN model 42 | project_name: str # Name of the project for file naming 43 | 44 | optimization_metric: Metric # Metric to evaluate the candidate of this iteration 45 | calendar_discovery_params: CalendarDiscoveryParameters # Parameters for the calendar discovery 46 | discover_prioritization_rules: bool = False # Whether to try to add prioritization or not 47 | discover_batching_rules: bool = False # Whether to try to add batching or not 48 | 49 | def to_dict(self) -> dict: 50 | """ 51 | Converts the parameters of the current iteration into a dictionary format. 52 | 53 | Returns 54 | ------- 55 | dict 56 | A dictionary containing the iteration parameters. 57 | """ 58 | # Save common params 59 | optimization_parameters = { 60 | "output_dir": str(self.output_dir), 61 | "process_model_path": str(self.process_model_path), 62 | "project_name": str(self.project_name), 63 | "optimization_metric": str(self.optimization_metric), 64 | "discover_prioritization_rules": str(self.discover_prioritization_rules), 65 | "discover_batching_rules": str(self.discover_batching_rules), 66 | } | self.calendar_discovery_params.to_dict() 67 | # Return dict 68 | return optimization_parameters 69 | 70 | @staticmethod 71 | def from_hyperopt_dict( 72 | hyperopt_dict: dict, 73 | optimization_metric: Metric, 74 | discovery_type: CalendarType, 75 | output_dir: Path, 76 | process_model_path: Path, 77 | project_name: str, 78 | ) -> "HyperoptIterationParams": 79 | """Create the params for this run from the hyperopt dictionary returned by the fmin function.""" 80 | # Extract model discovery parameters if needed (by default None) 81 | granularity = None 82 | confidence = None 83 | support = None 84 | participation = None 85 | fuzzy_angle = 1.0 86 | 87 | def safe_granularity(granularity: int) -> int: 88 | if 1440 % granularity != 0: 89 | return nearest_divisor_for_granularity(granularity) 90 | return granularity 91 | 92 | if discovery_type in [ 93 | CalendarType.UNDIFFERENTIATED, 94 | CalendarType.DIFFERENTIATED_BY_RESOURCE, 95 | CalendarType.DIFFERENTIATED_BY_POOL, 96 | ]: 97 | granularity = safe_granularity(hyperopt_dict["granularity"]) 98 | confidence = hyperopt_dict["confidence"] 99 | support = hyperopt_dict["support"] 100 | participation = hyperopt_dict["participation"] 101 | elif discovery_type == CalendarType.DIFFERENTIATED_BY_RESOURCE_FUZZY: 102 | granularity = safe_granularity(hyperopt_dict["granularity"]) 103 | fuzzy_angle = hyperopt_dict["fuzzy_angle"] 104 | 105 | discover_prioritization_rules = hyperopt_dict.get("discover_prioritization_rules", False) 106 | discover_batching_rules = hyperopt_dict.get("discover_batching_rules", False) 107 | 108 | return HyperoptIterationParams( 109 | output_dir=output_dir, 110 | process_model_path=process_model_path, 111 | project_name=project_name, 112 | optimization_metric=optimization_metric, 113 | calendar_discovery_params=CalendarDiscoveryParameters( 114 | discovery_type=discovery_type, 115 | granularity=granularity, 116 | confidence=confidence, 117 | support=support, 118 | participation=participation, 119 | fuzzy_angle=fuzzy_angle, 120 | ), 121 | discover_prioritization_rules=discover_prioritization_rules, 122 | discover_batching_rules=discover_batching_rules, 123 | ) 124 | -------------------------------------------------------------------------------- /src/simod/runtime_meter.py: -------------------------------------------------------------------------------- 1 | import json 2 | import timeit 3 | 4 | 5 | class RuntimeMeter: 6 | 7 | runtime_start: dict 8 | runtime_stop: dict 9 | runtimes: dict 10 | 11 | TOTAL: str = "SIMOD_TOTAL_RUNTIME" 12 | PREPROCESSING: str = "preprocessing" 13 | INITIAL_MODEL: str = "discover-initial-BPS-model" 14 | CONTROL_FLOW_MODEL: str = "optimize-control-flow-model" 15 | RESOURCE_MODEL: str = "optimize-resource-model" 16 | DATA_ATTRIBUTES_MODEL: str = "discover-data-attributes" 17 | EXTRANEOUS_DELAYS: str = "discover-extraneous-delays" 18 | FINAL_MODEL: str = "discover-final-BPS-model" 19 | EVALUATION: str = "evaluate-final-BPS-model" 20 | 21 | def __init__(self): 22 | self.runtime_start = dict() 23 | self.runtime_stop = dict() 24 | self.runtimes = dict() 25 | 26 | def start(self, stage_name: str): 27 | self.runtime_start[stage_name] = timeit.default_timer() 28 | 29 | def stop(self, stage_name: str): 30 | self.runtime_stop[stage_name] = timeit.default_timer() 31 | self.runtimes[stage_name] = self.runtime_stop[stage_name] - self.runtime_start[stage_name] 32 | 33 | def to_json(self) -> str: 34 | return json.dumps(self.runtimes) 35 | -------------------------------------------------------------------------------- /src/simod/settings/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "common_settings", 3 | "control_flow_settings", 4 | "extraneous_delays_settings", 5 | "simod_settings", 6 | "preprocessing_settings", 7 | "resource_model_settings", 8 | ] 9 | -------------------------------------------------------------------------------- /src/simod/settings/extraneous_delays_settings.py: -------------------------------------------------------------------------------- 1 | from extraneous_activity_delays.config import ( 2 | OptimizationMetric as ExtraneousDelaysOptimizationMetric, 3 | DiscoveryMethod as ExtraneousDelaysDiscoveryMethod, 4 | ) 5 | from pydantic import BaseModel 6 | 7 | from simod.settings.common_settings import Metric 8 | 9 | 10 | class ExtraneousDelaysSettings(BaseModel): 11 | """ 12 | Configuration settings for extraneous delay optimization. 13 | 14 | This class defines parameters for discovering and optimizing extraneous 15 | delays in process simulations, including optimization metrics, discovery 16 | methods, and iteration settings. In each iteration of the optimization process, the 17 | parameters are sampled from these values or ranges. 18 | 19 | Attributes 20 | ---------- 21 | optimization_metric : :class:`ExtraneousDelaysOptimizationMetric` 22 | The metric used to evaluate process model quality at each iteration of the optimization process (i.e., 23 | loss function). 24 | num_iterations : int 25 | The number of optimization iterations to perform. 26 | num_evaluations_per_iteration : int 27 | The number of replications for the evaluations of each iteration. 28 | discovery_method : :class:`ExtraneousDelaysDiscoveryMethod` 29 | The method used to discover extraneous delays. 30 | """ 31 | 32 | optimization_metric: ExtraneousDelaysOptimizationMetric = ExtraneousDelaysOptimizationMetric.RELATIVE_EMD 33 | discovery_method: ExtraneousDelaysDiscoveryMethod = ExtraneousDelaysDiscoveryMethod.COMPLEX 34 | num_iterations: int = 1 35 | num_evaluations_per_iteration: int = 3 36 | 37 | @staticmethod 38 | def from_dict(config: dict) -> "ExtraneousDelaysSettings": 39 | """ 40 | Instantiates the extraneous delays model configuration from a dictionary. 41 | 42 | Parameters 43 | ---------- 44 | config : dict 45 | Dictionary with the configuration values for the extraneous delays model parameters. 46 | 47 | Returns 48 | ------- 49 | :class:`ExtraneousDelaysSettings` 50 | Instance of the extraneous delays model configuration for the specified dictionary values. 51 | """ 52 | optimization_metric = ExtraneousDelaysSettings._match_metric( 53 | config.get("optimization_metric", "relative_event_distribution") 54 | ) 55 | discovery_method = ExtraneousDelaysSettings._match_method(config.get("discovery_method", "eclipse-aware")) 56 | num_iterations = config.get("num_iterations", 1) 57 | num_evaluations_per_iteration = config.get("num_evaluations_per_iteration", 3) 58 | 59 | return ExtraneousDelaysSettings( 60 | optimization_metric=optimization_metric, 61 | discovery_method=discovery_method, 62 | num_iterations=num_iterations, 63 | num_evaluations_per_iteration=num_evaluations_per_iteration, 64 | ) 65 | 66 | def to_dict(self) -> dict: 67 | """ 68 | Translate the extraneous delays model configuration stored in this instance into a dictionary. 69 | 70 | Returns 71 | ------- 72 | dict 73 | Python dictionary storing this configuration. 74 | """ 75 | return { 76 | "optimization_metric": str(self.optimization_metric.name), 77 | "discovery_method": str(self.discovery_method.name), 78 | "num_iterations": self.num_iterations, 79 | "num_evaluations_per_iteration": self.num_evaluations_per_iteration, 80 | } 81 | 82 | @staticmethod 83 | def _match_metric(metric: str) -> ExtraneousDelaysOptimizationMetric: 84 | metric = Metric.from_str(metric) 85 | if metric == Metric.ABSOLUTE_EMD: 86 | return ExtraneousDelaysOptimizationMetric.ABSOLUTE_EMD 87 | elif metric == Metric.CYCLE_TIME_EMD: 88 | return ExtraneousDelaysOptimizationMetric.CYCLE_TIME 89 | elif metric == Metric.CIRCADIAN_EMD: 90 | return ExtraneousDelaysOptimizationMetric.CIRCADIAN_EMD 91 | elif metric == Metric.RELATIVE_EMD: 92 | return ExtraneousDelaysOptimizationMetric.RELATIVE_EMD 93 | else: 94 | raise ValueError(f"Unknown extraneous delays optimization metric {metric}") 95 | 96 | @staticmethod 97 | def _match_method(method: str) -> ExtraneousDelaysDiscoveryMethod: 98 | if method.lower() in ["naive", "naiv", "naiiv"]: 99 | return ExtraneousDelaysDiscoveryMethod.NAIVE 100 | elif method.lower() in ["complex", "eclipse-aware", "eclipseaware", "eclipse aware"]: 101 | return ExtraneousDelaysDiscoveryMethod.COMPLEX 102 | else: 103 | raise ValueError(f"Unknown extraneous delays discovery method {method}") 104 | -------------------------------------------------------------------------------- /src/simod/settings/preprocessing_settings.py: -------------------------------------------------------------------------------- 1 | from pix_framework.enhancement.start_time_estimator.config import ConcurrencyThresholds 2 | from pydantic import BaseModel 3 | 4 | 5 | class PreprocessingSettings(BaseModel): 6 | """ 7 | Configuration for event log preprocessing. 8 | 9 | This class defines parameters used to preprocess event logs before 10 | SIMOD main pipeline, including concurrency threshold settings 11 | and multitasking options. 12 | 13 | Attributes 14 | ---------- 15 | multitasking : bool 16 | Whether to preprocess the event log to handle resources working in more than one activity at a time. 17 | enable_time_concurrency_threshold : float 18 | Threshold for determining concurrent events (for computing enabled) time based on the ratio of overlapping 19 | w.r.t. their occurrences. Ranges from 0 to 1 (0.3 means that two activities will be considered concurrent 20 | when their execution overlaps in 30% or more of the cases). 21 | concurrency_thresholds : :class:`ConcurrencyThresholds` 22 | Thresholds for the computation of the start times (if missing) based on the Heuristics miner algorithm, 23 | including direct-follows (df), length-2-loops (l2l), and length-1-loops (l1l). 24 | """ 25 | 26 | multitasking: bool = False 27 | enable_time_concurrency_threshold: float = 0.5 28 | concurrency_thresholds: ConcurrencyThresholds = ConcurrencyThresholds(df=0.75, l2l=0.9, l1l=0.9) 29 | 30 | @staticmethod 31 | def from_dict(config: dict) -> "PreprocessingSettings": 32 | """ 33 | Instantiates SIMOD preprocessing configuration from a dictionary. 34 | 35 | Parameters 36 | ---------- 37 | config : dict 38 | Dictionary with the configuration values for the preprocessing parameters. 39 | 40 | Returns 41 | ------- 42 | :class:`PreprocessingSettings` 43 | Instance of SIMOD preprocessing configuration for the specified dictionary values. 44 | """ 45 | return PreprocessingSettings( 46 | multitasking=config.get("multitasking", False), 47 | enable_time_concurrency_threshold=config.get("enable_time_concurrency_threshold", 0.5), 48 | concurrency_thresholds=ConcurrencyThresholds( 49 | df=config.get("concurrency_df", 0.9), 50 | l2l=config.get("concurrency_l2l", 0.9), 51 | l1l=config.get("concurrency_l1l", 0.9), 52 | ), 53 | ) 54 | 55 | def to_dict(self) -> dict: 56 | """ 57 | Translate the preprocessing configuration stored in this instance into a dictionary. 58 | 59 | Returns 60 | ------- 61 | dict 62 | Python dictionary storing this configuration. 63 | """ 64 | return { 65 | "multitasking": self.multitasking, 66 | "enable_time_concurrency_threshold": self.enable_time_concurrency_threshold, 67 | "concurrency_df": self.concurrency_thresholds.df, 68 | "concurrency_l2l": self.concurrency_thresholds.l2l, 69 | "concurrency_l1l": self.concurrency_thresholds.l1l, 70 | } 71 | -------------------------------------------------------------------------------- /src/simod/simulation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/simulation/__init__.py -------------------------------------------------------------------------------- /src/simod/simulation/parameters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/src/simod/simulation/parameters/__init__.py -------------------------------------------------------------------------------- /src/simod/utilities.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import platform 4 | import subprocess 5 | import time 6 | import traceback 7 | from builtins import float 8 | from pathlib import Path 9 | from typing import List, Tuple, Union 10 | 11 | from hyperopt import STATUS_FAIL, STATUS_OK 12 | 13 | 14 | def get_project_dir() -> Path: 15 | return Path(os.path.dirname(__file__)).parent.parent 16 | 17 | 18 | def is_windows() -> bool: 19 | return platform.system().lower() == "windows" 20 | 21 | 22 | def execute_external_command(args): 23 | if is_windows(): 24 | subprocess.call(" ".join(args)) 25 | else: 26 | subprocess.call(args) 27 | 28 | 29 | def hyperopt_step(status: str, fn, *args) -> Tuple[str, object]: 30 | """Function executes the provided function with arguments in hyperopt safe way.""" 31 | if status == STATUS_OK: 32 | try: 33 | return STATUS_OK, fn(*args) 34 | except Exception as error: 35 | print(error) 36 | traceback.print_exc() 37 | return STATUS_FAIL, None 38 | else: 39 | return status, None 40 | 41 | 42 | def nearest_divisor_for_granularity(granularity: int) -> int: 43 | closest = 1440 44 | closest_diff = abs(granularity - closest) 45 | for i in range(1, int(math.sqrt(1440)) + 1): 46 | if 1440 % i == 0: 47 | divisor1 = i 48 | divisor2 = 1440 // i 49 | for divisor in [divisor1, divisor2]: 50 | if divisor <= granularity: 51 | diff = granularity - divisor 52 | if diff < closest_diff: 53 | closest = divisor 54 | closest_diff = diff 55 | return closest 56 | 57 | 58 | def parse_single_value_or_interval(value: Union[float, int, List[float]]) -> Union[float, Tuple[float, float]]: 59 | if isinstance(value, float): 60 | return value 61 | elif isinstance(value, int): 62 | return float(value) 63 | else: 64 | return value[0], value[1] 65 | 66 | 67 | def get_process_name_from_log_path(log_path: Path) -> str: 68 | # Get name of the file (last component) 69 | name = log_path.name 70 | # Remove each of the suffixes, if any 71 | for suffix in reversed(log_path.suffixes): 72 | name = name.removesuffix(suffix) 73 | # Return remaining name 74 | return name 75 | 76 | 77 | def get_process_model_path(base_dir: Path, process_name: str) -> Path: 78 | return base_dir / f"{process_name}.bpmn" 79 | 80 | 81 | def get_simulation_parameters_path(base_dir: Path, process_name: str) -> Path: 82 | return base_dir / f"{process_name}.json" 83 | 84 | 85 | def measure_runtime(output_file: str = "runtime.txt"): 86 | """ 87 | Decorator for measuring runtime of a function and writing it to a file. 88 | :param output_file: Path to the output file relative to the project root. 89 | """ 90 | 91 | def decorator(func: callable): 92 | def wrapper(*args, **kwargs): 93 | start = time.time() 94 | result = func(*args, **kwargs) 95 | end = time.time() - start 96 | with open(output_file, "a") as f: 97 | module_name = func.__module__.split(".")[-1] 98 | func_name = func.__name__ 99 | f.write(f"{module_name}.{func_name}: {end} s\n") 100 | return result 101 | 102 | return wrapper 103 | 104 | return decorator 105 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/__init__.py -------------------------------------------------------------------------------- /tests/assets/LoanApp_simplified.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/LoanApp_simplified.csv.gz -------------------------------------------------------------------------------- /tests/assets/LoanApp_simplified_2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/LoanApp_simplified_2.csv.gz -------------------------------------------------------------------------------- /tests/assets/bpic15/BPIC15_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/bpic15/BPIC15_1.csv.gz -------------------------------------------------------------------------------- /tests/assets/bpic15/bpic15_1_with_model_v4.yml: -------------------------------------------------------------------------------- 1 | version: 4 2 | common: 3 | train_log_path: BPIC15_1.csv.gz 4 | process_model_path: BPIC15_1.bpmn 5 | num_final_evaluations: 1 # Number of evaluations of the discovered BPS model. 6 | evaluation_metrics: # Metrics to evaluate the discovered BPS model with. 7 | - 3_gram_distance 8 | - 2_gram_distance 9 | - absolute_event_distribution 10 | - relative_event_distribution 11 | - circadian_event_distribution 12 | - arrival_event_distribution 13 | - cycle_time_distribution 14 | clean_intermediate_files: false 15 | log_ids: 16 | case: case:concept:name 17 | activity: concept:name 18 | resource: org:resource 19 | start_time: start_timestamp 20 | end_time: time:timestamp 21 | enabled_time: enabled_time 22 | preprocessing: 23 | multitasking: false # Reassign activity durations when happening in multitasking. 24 | enable_time_concurrency_threshold: 0.5 # Concurrency threshold for the enabled time computation. 25 | concurrency_df: 0.75 # Concurrency thresholds for the start time (and enabled time) estimations when 26 | concurrency_l2l: 0.9 # the start time is missing in the train event log. Using the Heuristics Miner 27 | concurrency_l1l: 0.9 # concurrency oracle. 28 | control_flow: 29 | optimization_metric: n_gram_distance 30 | num_iterations: 1 # Number of iterations to run the hyper-optimization process for control-flow discovery 31 | num_evaluations_per_iteration: 5 # Number of times to evaluate each iteration (using the mean of all of them) 32 | gateway_probabilities: # Methods to discover the probabilities of each gateway 33 | - equiprobable 34 | - discovery 35 | discovery_algorithm: sm1 # Process model discovery algorithm: sm1 (Split Miner v1) or sm2 (Split Miner v2) 36 | epsilon: 37 | - 0.0 38 | - 1.0 39 | eta: 40 | - 0.0 41 | - 1.0 42 | replace_or_joins: 43 | - true 44 | - false 45 | prioritize_parallelism: 46 | - true 47 | - false 48 | resource_model: 49 | optimization_metric: circadian_event_distribution 50 | num_iterations: 1 # Number of iterations to run the hyper-optimization process for control-flow discovery 51 | num_evaluations_per_iteration: 5 # Number of times to evaluate each iteration (using the mean of all of them) 52 | resource_profiles: 53 | discovery_type: differentiated # Resource discovery type ('undifferentiated', 'pool', or 'differentiated') 54 | granularity: 55 | - 15 56 | - 60 57 | confidence: 58 | - 0.5 59 | - 0.85 60 | support: 61 | - 0.01 62 | - 0.3 63 | participation: 0.4 64 | extraneous_activity_delays: 65 | optimization_metric: relative_event_distribution 66 | num_iterations: 1 # Number of iterations of the optimization process (if 1, direct discovery without optimization) 67 | num_evaluations_per_iteration: 3 # Number of times to evaluate each iteration (using the mean of all of them) 68 | -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_1.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_2.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_3.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_4.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_4.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_5.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_6.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_6.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_7.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_7.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/or_8.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/or_8.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_1.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_2.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_3.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_5.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_6.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_6.csv.gz -------------------------------------------------------------------------------- /tests/assets/branch_rules/xor_7.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/branch_rules/xor_7.csv.gz -------------------------------------------------------------------------------- /tests/assets/configuration_simod_basic.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | test_log_path: LoanApp_simplified.csv.gz 5 | num_final_evaluations: 1 6 | discover_data_attributes: true 7 | evaluation_metrics: 8 | - absolute_hourly_emd 9 | log_ids: 10 | case: case:concept:name 11 | activity: concept:name 12 | resource: org:resource 13 | start_time: start_timestamp 14 | end_time: time:timestamp 15 | enabled_time: enabled_time 16 | preprocessing: 17 | multitasking: false 18 | control_flow: 19 | optimization_metric: n_gram_distance 20 | num_iterations: 3 21 | num_evaluations_per_iteration: 3 22 | discovery_algorithm: sm1 23 | epsilon: 24 | - 0.0 25 | - 1.0 26 | eta: 27 | - 0.0 28 | - 1.0 29 | replace_or_joins: 30 | - true 31 | - false 32 | prioritize_parallelism: 33 | - true 34 | - false 35 | gateway_probabilities: 36 | - discovery 37 | - equiprobable 38 | resource_model: 39 | optimization_metric: absolute_hourly_emd 40 | num_iterations: 3 41 | num_evaluations_per_iteration: 3 42 | resource_profiles: 43 | discovery_type: pool 44 | granularity: 60 45 | confidence: 46 | - 0.5 47 | - 0.85 48 | support: 49 | - 0.01 50 | - 0.3 51 | participation: 0.4 52 | -------------------------------------------------------------------------------- /tests/assets/configuration_simod_with_extraneous.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | discover_data_attributes: true 5 | log_ids: 6 | case: case:concept:name 7 | activity: concept:name 8 | resource: org:resource 9 | start_time: start_timestamp 10 | end_time: time:timestamp 11 | enabled_time: enabled_time 12 | preprocessing: 13 | multitasking: false 14 | control_flow: 15 | optimization_metric: n_gram_distance 16 | num_iterations: 3 17 | num_evaluations_per_iteration: 3 18 | discovery_algorithm: sm1 19 | epsilon: 20 | - 0.0 21 | - 1.0 22 | eta: 23 | - 0.0 24 | - 1.0 25 | replace_or_joins: 26 | - true 27 | - false 28 | prioritize_parallelism: 29 | - true 30 | - false 31 | gateway_probabilities: 32 | - discovery 33 | - equiprobable 34 | resource_model: 35 | optimization_metric: absolute_hourly_emd 36 | num_iterations: 3 37 | num_evaluations_per_iteration: 3 38 | resource_profiles: 39 | discovery_type: pool 40 | granularity: 60 41 | confidence: 42 | - 0.5 43 | - 0.85 44 | support: 45 | - 0.01 46 | - 0.3 47 | participation: 0.4 48 | extraneous_activity_delays: 49 | optimization_metric: relative_emd 50 | num_iterations: 1 # Direct discovery, no optimization 51 | -------------------------------------------------------------------------------- /tests/assets/configuration_simod_with_model.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | process_model_path: LoanApp_simplified.bpmn 5 | discover_data_attributes: true 6 | perform_final_evaluation: true 7 | num_final_evaluations: 1 8 | evaluation_metrics: 9 | - absolute_hourly_emd 10 | log_ids: 11 | case: case:concept:name 12 | activity: concept:name 13 | resource: org:resource 14 | start_time: start_timestamp 15 | end_time: time:timestamp 16 | enabled_time: enabled_time 17 | preprocessing: 18 | multitasking: false 19 | control_flow: 20 | optimization_metric: n_gram_distance 21 | num_iterations: 3 22 | num_evaluations_per_iteration: 3 23 | gateway_probabilities: 24 | - discovery 25 | - equiprobable 26 | resource_model: 27 | optimization_metric: absolute_hourly_emd 28 | num_iterations: 3 29 | num_evaluations_per_iteration: 3 30 | resource_profiles: 31 | discovery_type: pool 32 | granularity: 60 33 | confidence: 34 | - 0.5 35 | - 0.85 36 | support: 37 | - 0.01 38 | - 0.3 39 | participation: 0.4 40 | -------------------------------------------------------------------------------- /tests/assets/configuration_simod_with_model_and_batching.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | test_log_path: LoanApp_simplified.csv.gz 5 | process_model_path: LoanApp_simplified.bpmn 6 | num_final_evaluations: 0 # On purpose so it is corrected to 10 7 | clean_intermediate_files: false 8 | discover_data_attributes: true 9 | evaluation_metrics: 10 | - absolute_hourly_emd 11 | log_ids: 12 | case: case:concept:name 13 | activity: concept:name 14 | resource: org:resource 15 | start_time: start_timestamp 16 | end_time: time:timestamp 17 | enabled_time: enabled_time 18 | preprocessing: 19 | multitasking: false 20 | control_flow: 21 | optimization_metric: three_gram_distance 22 | num_iterations: 3 23 | num_evaluations_per_iteration: 3 24 | gateway_probabilities: 25 | - discovery 26 | - equiprobable 27 | resource_model: 28 | optimization_metric: absolute_hourly_emd 29 | num_iterations: 5 30 | num_evaluations_per_iteration: 3 31 | discover_batching_rules: true 32 | resource_profiles: 33 | discovery_type: pool 34 | granularity: 60 35 | confidence: 36 | - 0.5 37 | - 0.85 38 | support: 39 | - 0.01 40 | - 0.3 41 | participation: 0.4 42 | -------------------------------------------------------------------------------- /tests/assets/configuration_simod_with_model_and_extraneous.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | process_model_path: LoanApp_simplified.bpmn 5 | discover_data_attributes: true 6 | num_final_evaluations: 1 7 | evaluation_metrics: 8 | - absolute_hourly_emd 9 | log_ids: 10 | case: case:concept:name 11 | activity: concept:name 12 | resource: org:resource 13 | start_time: start_timestamp 14 | end_time: time:timestamp 15 | enabled_time: enabled_time 16 | preprocessing: 17 | multitasking: false 18 | control_flow: 19 | optimization_metric: n_gram_distance 20 | num_iterations: 3 21 | num_evaluations_per_iteration: 3 22 | gateway_probabilities: 23 | - discovery 24 | - equiprobable 25 | resource_model: 26 | optimization_metric: absolute_hourly_emd 27 | num_iterations: 3 28 | num_evaluations_per_iteration: 3 29 | resource_profiles: 30 | discovery_type: pool 31 | granularity: 60 32 | confidence: 33 | - 0.5 34 | - 0.85 35 | support: 36 | - 0.01 37 | - 0.3 38 | participation: 0.4 39 | extraneous_activity_delays: 40 | optimization_metric: relative_emd 41 | discovery_method: naive 42 | num_iterations: 1 # Direct discovery, no optimization 43 | -------------------------------------------------------------------------------- /tests/assets/configuration_simod_with_model_and_prioritization.yml: -------------------------------------------------------------------------------- 1 | version: 5 2 | common: 3 | train_log_path: LoanApp_simplified.csv.gz 4 | test_log_path: LoanApp_simplified.csv.gz 5 | process_model_path: LoanApp_simplified.bpmn 6 | num_final_evaluations: 1 7 | clean_intermediate_files: false 8 | evaluation_metrics: 9 | - absolute_hourly_emd 10 | log_ids: 11 | case: case:concept:name 12 | activity: concept:name 13 | resource: org:resource 14 | start_time: start_timestamp 15 | end_time: time:timestamp 16 | enabled_time: enabled_time 17 | preprocessing: 18 | multitasking: false 19 | control_flow: 20 | optimization_metric: two_gram_distance 21 | num_iterations: 3 22 | num_evaluations_per_iteration: 3 23 | gateway_probabilities: 24 | - discovery 25 | - equiprobable 26 | resource_model: 27 | optimization_metric: absolute_hourly_emd 28 | num_iterations: 5 29 | num_evaluations_per_iteration: 3 30 | discover_prioritization_rules: True 31 | resource_profiles: 32 | discovery_type: pool 33 | granularity: 60 34 | confidence: 35 | - 0.5 36 | - 0.85 37 | support: 38 | - 0.01 39 | - 0.3 40 | participation: 0.4 41 | -------------------------------------------------------------------------------- /tests/assets/data_attributes/case_attributes.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/case_attributes.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_1.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_15.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_15.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_3.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_5.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_7.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_7.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/event_attribute_9.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/event_attribute_9.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_1.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_15.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_15.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_3.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_5.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_7.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_7.csv.gz -------------------------------------------------------------------------------- /tests/assets/data_attributes/global_attribute_9.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/assets/data_attributes/global_attribute_9.csv.gz -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from click.testing import CliRunner 5 | 6 | 7 | @pytest.fixture(scope='function') 8 | def runner(request): 9 | return CliRunner() 10 | 11 | 12 | @pytest.fixture(scope='module') 13 | def entry_point() -> Path: 14 | if Path.cwd().name == 'tests': 15 | return Path('assets') 16 | elif 'test_' in Path.cwd().name: 17 | return Path('../assets') 18 | else: 19 | return Path('tests/assets') 20 | -------------------------------------------------------------------------------- /tests/test_batching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_batching/__init__.py -------------------------------------------------------------------------------- /tests/test_batching/test_batching_discovery.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pix_framework.io.event_log import EventLogIDs, read_csv_log 4 | from simod.batching.discovery import discover_batching_rules 5 | from simod.batching.types import BatchingFiringRule 6 | 7 | assets_dir = Path(__file__).parent / "assets" 8 | 9 | 10 | def test_discover_batching_rules(): 11 | log_path = assets_dir / "event_log_5.csv" 12 | log_ids = EventLogIDs( 13 | case="case_id", 14 | activity="Activity", 15 | start_time="start_time", 16 | end_time="end_time", 17 | resource="Resource", 18 | enabled_time="enabled_time", 19 | batch_id="batch_instance_id", 20 | batch_type="batch_instance_type", 21 | ) 22 | log = read_csv_log(log_path, log_ids) 23 | 24 | rules = discover_batching_rules(log, log_ids) 25 | 26 | assert len(rules) > 0 27 | 28 | 29 | def test_discover_batching_rules_loanapp(): 30 | log_path = assets_dir / "LoanApp_batch_sim_log.csv" 31 | log_ids = EventLogIDs( 32 | case="case_id", 33 | activity="activity", 34 | start_time="start_time", 35 | end_time="end_time", 36 | resource="resource", 37 | enabled_time="enable_time", 38 | batch_id="batch_instance_id", 39 | batch_type="batch_instance_type", 40 | ) 41 | expected_rules = BatchingFiringRule( 42 | attribute="batch_size", 43 | comparison="=", 44 | value="3", 45 | ) 46 | log = read_csv_log(log_path, log_ids) 47 | 48 | rules = discover_batching_rules(log, log_ids) 49 | 50 | assert len(rules) == 1 51 | assert rules[0].firing_rules[0][0] == expected_rules 52 | -------------------------------------------------------------------------------- /tests/test_batching/test_types.py: -------------------------------------------------------------------------------- 1 | from simod.batching.types import BatchingRule 2 | 3 | batching_discovery_result = [ 4 | { 5 | "activity": "B", 6 | "resources": ["Alice"], 7 | "type": "Sequential", 8 | "batch_frequency": 0.96, 9 | "size_distribution": {"3": 48, "1": 2}, 10 | "duration_distribution": {"3": 0.5}, 11 | "firing_rules": { 12 | "confidence": 1.0, 13 | "support": 1.0, 14 | "rules": [ 15 | [ 16 | {"attribute": "batch_size", "comparison": ">", "value": "3"}, 17 | {"attribute": "batch_size", "comparison": "<", "value": "5"}, 18 | ], 19 | [ 20 | {"attribute": "batch_size", "comparison": ">", "value": "10"}, 21 | ], 22 | ], 23 | }, 24 | }, 25 | { 26 | "activity": "C", 27 | "resources": ["Bob"], 28 | "type": "Sequential", 29 | "batch_frequency": 0.96, 30 | "size_distribution": {"3": 48, "1": 2}, 31 | "duration_distribution": {"3": 0.5}, 32 | "firing_rules": { 33 | "confidence": 1.0, 34 | "support": 1.0, 35 | "rules": [[{"attribute": "batch_size", "comparison": ">", "value": "3"}]], 36 | }, 37 | }, 38 | ] 39 | 40 | 41 | def test_serialization_deserialization(): 42 | rules = [BatchingRule.from_dict(rule) for rule in batching_discovery_result] 43 | 44 | assert len(rules) == len(batching_discovery_result) 45 | for i in range(len(rules)): 46 | assert rules[i].to_dict() == batching_discovery_result[i] 47 | 48 | 49 | def test_prosimos_serialization(): 50 | rules = [BatchingRule.from_dict(rule) for rule in batching_discovery_result] 51 | activities_ids_by_name = {"B": "2", "C": "3"} 52 | activities_names_by_id = {"2": "B", "3": "C"} 53 | 54 | rules_prosimos = [rule.to_prosimos(activities_ids_by_name) for rule in rules] 55 | rules_from_prosimos = [BatchingRule.from_prosimos(rule, activities_names_by_id) for rule in rules_prosimos] 56 | 57 | # Prosimos doesn't use resources and batch_frequency attributes, so we set them 58 | # to None to compare. Also the confidence and support of the rules. 59 | for rule in rules: 60 | rule.resources = None 61 | rule.batch_frequency = None 62 | rule.firing_rules.confidence = -1.0 63 | rule.firing_rules.support = -1.0 64 | 65 | assert rules == rules_from_prosimos 66 | -------------------------------------------------------------------------------- /tests/test_bpic15.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from simod.event_log.event_log import EventLog 3 | from simod.settings.simod_settings import SimodSettings 4 | from simod.simod import Simod 5 | 6 | 7 | @pytest.mark.system 8 | def test_bpic15(entry_point): 9 | settings = SimodSettings.from_path(entry_point / "bpic15/bpic15_1_with_model_v4.yml") 10 | 11 | event_log = EventLog.from_path( 12 | train_log_path=settings.common.train_log_path, 13 | log_ids=settings.common.log_ids, 14 | preprocessing_settings=settings.preprocessing, 15 | ) 16 | optimizer = Simod(settings, event_log=event_log) 17 | optimizer.run() 18 | 19 | assert optimizer.final_bps_model.resource_model is not None 20 | -------------------------------------------------------------------------------- /tests/test_branch_rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_branch_rules/__init__.py -------------------------------------------------------------------------------- /tests/test_branch_rules/test_discovery.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pprint 4 | 5 | import pytest 6 | import pandas as pd 7 | from pathlib import Path 8 | from pix_framework.io.event_log import EventLogIDs 9 | from simod.branch_rules.discovery import discover_branch_rules 10 | from pix_framework.io.bpm_graph import BPMNGraph 11 | 12 | LOG_IDS = EventLogIDs(case="case_id", 13 | activity="activity", 14 | start_time="start_time", 15 | end_time="end_time", 16 | resource="resource" 17 | ) 18 | 19 | ASSET_DIR = "branch_rules" 20 | XOR_BPMN = "xor.bpmn" 21 | OR_BPMN = "or.bpmn" 22 | XOR_LOG_PATHS = "xor_*.csv.gz" 23 | OR_LOG_PATHS = "or_8.csv.gz" 24 | 25 | # total_branch_rules -> How many branches should get rules 26 | # rules_per_branch -> how many single rules should be on that branch (exact number or range) 27 | xor_expected_conditions = { 28 | "xor_1.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1}, # Categorical equal probs 29 | "xor_2.csv.gz": {"total_branch_rules": 3, "rules_per_branch": 1}, # Categorical unbalanced 30 | "xor_3.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1}, # Categorical with different probs 31 | "xor_5.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)}, # Numerical intervals 32 | "xor_6.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)}, # Conditions 33 | "xor_7.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)}, # Complex AND and OR conditions 34 | } 35 | 36 | or_expected_conditions = { 37 | "or_1.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1}, # Categorical equal probs 1 flow only 38 | "or_2.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)}, # Categorical equal probs 2 flow2 39 | "or_3.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1}, # Categorical equal probs all flows (warning) 40 | "or_4.csv.gz": {"total_branch_rules": 3, "rules_per_branch": 1}, # Categorical unbalanced 1 flow only (warning) 41 | "or_5.csv.gz": {"total_branch_rules": 6, "rules_per_branch": 1}, # Categorical unbalanced 2 flows (warning) 42 | "or_6.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 3)}, # Categorical unbalanced all flows (warning) 43 | "or_7.csv.gz": {"total_branch_rules": 15, "rules_per_branch": (1, 2)}, # Numerical with AND operator 44 | "or_8.csv.gz": {"total_branch_rules": 15, "rules_per_branch": 1}, # Numerical with full range 45 | } 46 | 47 | 48 | @pytest.fixture(scope="module") 49 | def xor_log_files(entry_point): 50 | """Fixture to generate full paths for XOR branch condition log files.""" 51 | xor_log_pattern = os.path.join(entry_point, ASSET_DIR, XOR_LOG_PATHS) 52 | files = glob.glob(xor_log_pattern) 53 | return [(file, xor_expected_conditions[os.path.basename(file)]) for file in files] 54 | 55 | 56 | @pytest.fixture(scope="module") 57 | def or_log_files(entry_point): 58 | or_log_pattern = os.path.join(entry_point, ASSET_DIR, OR_LOG_PATHS) 59 | files = glob.glob(or_log_pattern) 60 | return [(file, or_expected_conditions[os.path.basename(file)]) for file in files] 61 | 62 | 63 | def assert_branch_rules(bpmn_graph, log, log_ids, expected_conditions): 64 | branch_rules = discover_branch_rules(bpmn_graph, log, log_ids) 65 | 66 | assert len(branch_rules) == expected_conditions["total_branch_rules"], \ 67 | f"Expected {expected_conditions['total_branch_rules']} BranchRules, found {len(branch_rules)}" 68 | 69 | for branch_rule in branch_rules: 70 | rule_count = len(branch_rule.rules) 71 | 72 | if isinstance(expected_conditions["rules_per_branch"], tuple): 73 | min_rules, max_rules = expected_conditions["rules_per_branch"] 74 | assert min_rules <= rule_count <= max_rules, \ 75 | f"Expected between {min_rules} and {max_rules} rules, found {rule_count}" 76 | else: 77 | assert rule_count == expected_conditions["rules_per_branch"], \ 78 | f"Expected {expected_conditions['rules_per_branch']} rules, found {rule_count}" 79 | 80 | 81 | def test_discover_xor_branch_rules(entry_point, xor_log_files): 82 | bpmn_path = os.path.join(entry_point, ASSET_DIR, XOR_BPMN) 83 | for log_path, expected_conditions in xor_log_files: 84 | log = pd.read_csv(log_path, compression="gzip") 85 | bpmn_graph = BPMNGraph.from_bpmn_path(Path(bpmn_path)) 86 | assert_branch_rules(bpmn_graph, log, LOG_IDS, expected_conditions) 87 | 88 | 89 | def test_discover_or_branch_rules(entry_point, or_log_files): 90 | bpmn_path = os.path.join(entry_point, ASSET_DIR, OR_BPMN) 91 | for log_path, expected_conditions in or_log_files: 92 | log = pd.read_csv(log_path, compression="gzip") 93 | bpmn_graph = BPMNGraph.from_bpmn_path(Path(bpmn_path)) 94 | assert_branch_rules(bpmn_graph, log, LOG_IDS, expected_conditions) 95 | -------------------------------------------------------------------------------- /tests/test_case_attributes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_case_attributes/__init__.py -------------------------------------------------------------------------------- /tests/test_case_attributes/test_discovery.py: -------------------------------------------------------------------------------- 1 | from pix_framework.io.event_log import EventLogIDs, read_csv_log 2 | from simod.data_attributes.discovery import discover_data_attributes 3 | 4 | 5 | def test_discover_case_attributes(entry_point): 6 | log_path = entry_point / "Insurance_Claims_train.csv" 7 | log_ids = EventLogIDs( 8 | case="case_id", activity="activity", start_time="start_time", end_time="end_time", resource="Resource" 9 | ) 10 | log = read_csv_log(log_path, log_ids) 11 | 12 | global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids) 13 | 14 | assert len(case_attributes) > 0 15 | assert "extraneous_delay" in map(lambda x: x.name, case_attributes) 16 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from simod import cli 4 | 5 | 6 | @pytest.mark.system 7 | @pytest.mark.parametrize("path", ["configuration_simod_basic.yml"]) 8 | def test_optimize(entry_point, runner, path): 9 | config_path = entry_point / path 10 | result = runner.invoke(cli.main, ["--configuration", config_path.absolute()]) 11 | assert not result.exception 12 | assert result.exit_code == 0 13 | -------------------------------------------------------------------------------- /tests/test_control_flow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_control_flow/__init__.py -------------------------------------------------------------------------------- /tests/test_data_attributes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_data_attributes/__init__.py -------------------------------------------------------------------------------- /tests/test_data_attributes/test_discovery.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pytest 4 | import pandas as pd 5 | from pix_framework.io.event_log import EventLogIDs 6 | from simod.data_attributes.discovery import discover_data_attributes\ 7 | 8 | LOG_IDS = EventLogIDs(case="case_id", 9 | activity="activity", 10 | start_time="start_time", 11 | end_time="end_time", 12 | resource="resource" 13 | ) 14 | 15 | ASSET_DIR = "data_attributes" 16 | GLOBAL_ATTRIBUTE_LOG_PATHS = "global_attribute_*.csv.gz" 17 | CASE_ATTRIBUTE_LOG_PATHS = "case_attribute*.csv.gz" 18 | EVENT_ATTRIBUTE_LOG_PATHS = "event_attribute*.csv.gz" 19 | 20 | 21 | @pytest.fixture(scope="module") 22 | def global_log_files(entry_point): 23 | log_pattern = os.path.join(entry_point, ASSET_DIR, GLOBAL_ATTRIBUTE_LOG_PATHS) 24 | return glob.glob(log_pattern) 25 | 26 | 27 | @pytest.fixture(scope="module") 28 | def case_log_files(entry_point): 29 | log_pattern = os.path.join(entry_point, ASSET_DIR, CASE_ATTRIBUTE_LOG_PATHS) 30 | return glob.glob(log_pattern) 31 | 32 | 33 | @pytest.fixture(scope="module") 34 | def event_log_files(entry_point): 35 | log_pattern = os.path.join(entry_point, ASSET_DIR, EVENT_ATTRIBUTE_LOG_PATHS) 36 | return glob.glob(log_pattern) 37 | 38 | 39 | def assert_attributes(log, log_ids, expected_case_attrs, expected_event_attrs, expected_global_attrs, runs=5): 40 | success_count = 0 41 | 42 | for i in range(runs): 43 | global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids) 44 | print(f"try {i}") 45 | try: 46 | assert len(global_attributes) == expected_global_attrs, \ 47 | f"Expected {expected_global_attrs} global attributes, found {len(global_attributes)}" 48 | assert len(case_attributes) == expected_case_attrs, \ 49 | f"Expected {expected_case_attrs} case attributes, found {len(case_attributes)}" 50 | assert len(event_attributes) == expected_event_attrs, \ 51 | f"Expected {expected_event_attrs} event attributes, found {len(event_attributes)}" 52 | success_count += 1 53 | except AssertionError as e: 54 | print(f"Assertion failed: {e}") 55 | 56 | if success_count < runs // 2: 57 | raise AssertionError("Majority of runs failed") 58 | 59 | 60 | def test_discover_global_attributes(entry_point, global_log_files): 61 | for log_path in global_log_files: 62 | log = pd.read_csv(log_path, compression="gzip") 63 | assert_attributes(log, LOG_IDS, expected_case_attrs=0, expected_event_attrs=16, expected_global_attrs=1) 64 | 65 | 66 | def test_discover_case_attributes(entry_point, case_log_files): 67 | for log_path in case_log_files: 68 | log = pd.read_csv(log_path, compression="gzip") 69 | assert_attributes(log, LOG_IDS, expected_case_attrs=5, expected_event_attrs=0, expected_global_attrs=0) 70 | 71 | 72 | def test_discover_event_attributes(entry_point, event_log_files): 73 | for log_path in event_log_files: 74 | log = pd.read_csv(log_path, compression="gzip") 75 | assert_attributes(log, LOG_IDS, expected_case_attrs=0, expected_event_attrs=1, expected_global_attrs=0) 76 | 77 | -------------------------------------------------------------------------------- /tests/test_event_log/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_event_log/__init__.py -------------------------------------------------------------------------------- /tests/test_event_log/test_event_log.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pix_framework.io.event_log import APROMORE_LOG_IDS, DEFAULT_XES_IDS 3 | 4 | from simod.event_log.event_log import EventLog 5 | 6 | test_cases = [ 7 | { 8 | "log_name": "Simple_log_no_start_times.csv", 9 | "log_ids": APROMORE_LOG_IDS 10 | }, 11 | { 12 | "log_name": "LoanApp_simplified.csv.gz", 13 | "log_ids": DEFAULT_XES_IDS, 14 | }, 15 | ] 16 | 17 | 18 | @pytest.mark.parametrize("test_data", test_cases, ids=[test_data["log_name"] for test_data in test_cases]) 19 | def test_optimizer(test_data, entry_point): 20 | path = (entry_point / test_data["log_name"]).absolute() 21 | log_ids = test_data["log_ids"] 22 | 23 | event_log = EventLog.from_path(path, log_ids, need_test_partition=True) 24 | 25 | assert event_log.log_ids == log_ids 26 | assert event_log.train_partition is not None 27 | assert event_log.validation_partition is not None 28 | assert event_log.test_partition is not None 29 | assert len(event_log.train_partition) > len(event_log.validation_partition) 30 | assert len(event_log.validation_partition) < len(event_log.test_partition) 31 | 32 | 33 | def test_wrong_log_extension(entry_point): 34 | training_message = r"The specified training log has an unsupported extension.*Only 'csv' and 'csv.gz' supported." 35 | test_message = r"The specified test log has an unsupported extension.*Only 'csv' and 'csv.gz' supported." 36 | # Assert wrong training log 37 | with pytest.raises(ValueError, match=training_message) as error: 38 | EventLog.from_path( 39 | train_log_path=entry_point / "Control_flow_optimization_test.bpmn", 40 | log_ids=DEFAULT_XES_IDS, 41 | test_log_path=None 42 | ) 43 | assert error.type 44 | with pytest.raises(ValueError, match=training_message) as error: 45 | EventLog.from_path( 46 | train_log_path=entry_point / "PurchasingExample.xes", 47 | log_ids=DEFAULT_XES_IDS, 48 | test_log_path=None 49 | ) 50 | # Assert wrong test log 51 | with pytest.raises(ValueError, match=test_message) as error: 52 | EventLog.from_path( 53 | train_log_path=entry_point / "Control_flow_optimization_test.csv", 54 | log_ids=DEFAULT_XES_IDS, 55 | test_log_path=entry_point / "PurchasingExample.xes", 56 | ) 57 | with pytest.raises(ValueError, match=test_message) as error: 58 | EventLog.from_path( 59 | train_log_path=entry_point / "Control_flow_optimization_test.csv", 60 | log_ids=DEFAULT_XES_IDS, 61 | test_log_path=entry_point / "PurchasingExample.xes.gz", 62 | ) 63 | -------------------------------------------------------------------------------- /tests/test_event_log/test_preprocessor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pix_framework.io.event_log import APROMORE_LOG_IDS, read_csv_log 3 | from simod.event_log.preprocessor import Preprocessor 4 | 5 | 6 | @pytest.mark.integration 7 | @pytest.mark.parametrize("log_name", ["Simple_log_no_start_times.csv"]) 8 | def test_add_start_times(log_name, entry_point): 9 | log_ids = APROMORE_LOG_IDS 10 | event_log = read_csv_log(entry_point / log_name, log_ids) 11 | preprocessor = Preprocessor(event_log, log_ids) 12 | log = preprocessor.run() 13 | 14 | assert log[log_ids.start_time].isna().sum() == 0 15 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pix_framework.io.event_log import DEFAULT_XES_IDS, read_csv_log 3 | from simod.metrics import get_absolute_emd 4 | 5 | test_cases = [ 6 | { 7 | "name": "LoanApp_simplified", 8 | "original_log": {"log_name": "LoanApp_simplified.csv.gz", "log_ids": DEFAULT_XES_IDS}, 9 | "simulated_log": {"log_name": "LoanApp_simplified_2.csv.gz", "log_ids": DEFAULT_XES_IDS}, 10 | } 11 | ] 12 | 13 | 14 | @pytest.mark.integration 15 | @pytest.mark.parametrize("test_data", test_cases, ids=[test_data["name"] for test_data in test_cases]) 16 | def test_absolute_timestamp_emd(entry_point, test_data): 17 | original_log_path = entry_point / test_data["original_log"]["log_name"] 18 | simulated_log_path = entry_point / test_data["simulated_log"]["log_name"] 19 | 20 | original_log_ids = test_data["original_log"]["log_ids"] 21 | simulated_log_ids = test_data["simulated_log"]["log_ids"] 22 | 23 | original_log = read_csv_log(original_log_path, original_log_ids) 24 | simulated_log = read_csv_log(simulated_log_path, simulated_log_ids) 25 | 26 | # Test different logs 27 | emd = get_absolute_emd(original_log, original_log_ids, simulated_log, simulated_log_ids) 28 | assert emd > 0.0 29 | # Test similar log 30 | emd = get_absolute_emd(original_log, original_log_ids, original_log, simulated_log_ids) 31 | assert emd == 0.0 32 | -------------------------------------------------------------------------------- /tests/test_prioritization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_prioritization/__init__.py -------------------------------------------------------------------------------- /tests/test_prioritization/test_prioritization_discovery.py: -------------------------------------------------------------------------------- 1 | from pix_framework.io.event_log import DEFAULT_XES_IDS, read_csv_log 2 | from simod.data_attributes.discovery import discover_data_attributes 3 | from simod.prioritization.discovery import ( 4 | discover_prioritization_rules, 5 | ) 6 | from simod.prioritization.types import PrioritizationRule 7 | 8 | 9 | def test_prioritization_rules_serialization_deserialization(entry_point): 10 | rules_dict = { 11 | "prioritisation_rules": [ 12 | { 13 | "priority_level": 1, 14 | "rules": [ 15 | [ 16 | {"attribute": "loan_amount", "comparison": "in", "value": ["1000", "2000"]}, 17 | {"attribute": "type", "comparison": "=", "value": "BUSINESS"}, 18 | ], 19 | [{"attribute": "loan_amount", "comparison": "in", "value": ["2000", "inf"]}], 20 | ], 21 | }, 22 | {"priority_level": 2, "rules": [[{"attribute": "loan_amount", "comparison": ">", "value": "500"}]]}, 23 | ] 24 | } 25 | 26 | rules = list(map(PrioritizationRule.from_prosimos, rules_dict["prioritisation_rules"])) 27 | rules_dict_2 = {"prioritisation_rules": list(map(lambda x: x.to_prosimos(), rules))} 28 | 29 | assert len(rules) == 2 30 | assert rules_dict == rules_dict_2 31 | 32 | 33 | def test_discover_prioritization_rules(entry_point): 34 | log_path = entry_point / "Simple_log_with_prioritization.csv" 35 | log_ids = DEFAULT_XES_IDS 36 | log = read_csv_log(log_path, log_ids) 37 | 38 | global_attributes, case_attributes, event_attributes = discover_data_attributes(log, log_ids) 39 | 40 | rules = discover_prioritization_rules(log, log_ids, case_attributes) 41 | 42 | assert len(rules) > 0 43 | -------------------------------------------------------------------------------- /tests/test_prioritization/test_prioritization_impact.py: -------------------------------------------------------------------------------- 1 | config_yaml_A = """ 2 | version: 2 3 | common: 4 | log_path: tests/assets/___.csv 5 | log_ids: 6 | case: case_id 7 | activity: Activity 8 | resource: Resource 9 | start_time: start_time 10 | end_time: end_time 11 | repetitions: 10 12 | evaluation_metrics: 13 | - absolute_hourly_emd 14 | - cycle_time_emd 15 | - circadian_emd 16 | preprocessing: 17 | multitasking: false 18 | control_flow: 19 | optimization_metric: cycle_time_emd 20 | max_evaluations: 10 21 | mining_algorithm: sm1 22 | epsilon: 23 | - 0.0 24 | - 1.0 25 | eta: 26 | - 0.0 27 | - 1.0 28 | gateway_probabilities: 29 | - discovery 30 | replace_or_joins: 31 | - true 32 | - false 33 | prioritize_parallelism: 34 | - true 35 | - false 36 | resource_model: 37 | optimization_metric: absolute_hourly_emd 38 | max_evaluations: 10 39 | discover_prioritization_rules: true 40 | resource_profiles: 41 | discovery_type: differentiated 42 | granularity: 43 | - 15 44 | - 60 45 | confidence: 46 | - 0.5 47 | - 0.85 48 | support: 49 | - 0.01 50 | - 0.3 51 | participation: 0.4 52 | extraneous_activity_delays: 53 | optimization_metric: absolute_emd 54 | num_iterations: 1 55 | """ 56 | 57 | # @pytest.mark.manual 58 | # def test_prioritization_discovery_impact(entry_point: Path): 59 | # settings = SimodSettings.from_stream(config_yaml_A) 60 | # settings.log_path = (entry_point / Path(settings.common.log_path.name)).absolute() 61 | # 62 | # log = EventLog.from_path( 63 | # path=settings.common.log_path, 64 | # log_ids=settings.common.log_ids, 65 | # process_name=settings.common.log_path.stem, 66 | # ) 67 | # 68 | # simod = Simod(settings=settings, event_log=log) 69 | # simod.run() 70 | 71 | # TODO: find a log with prioritization _rules 72 | -------------------------------------------------------------------------------- /tests/test_resource_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_resource_model/__init__.py -------------------------------------------------------------------------------- /tests/test_settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_settings/__init__.py -------------------------------------------------------------------------------- /tests/test_settings/test_control_flow_settings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pix_framework.discovery.gateway_probabilities import GatewayProbabilitiesDiscoveryMethod 3 | 4 | from simod.settings.control_flow_settings import ControlFlowSettings, ProcessModelDiscoveryAlgorithm 5 | 6 | settings_single_values_sm2 = { 7 | "num_iterations": 2, 8 | "num_evaluations_per_iteration": 3, 9 | "gateway_probabilities": "equiprobable", 10 | "mining_algorithm": "Split Miner 2", 11 | "epsilon": 0.45, 12 | "eta": 0.34, 13 | "replace_or_joins": True, 14 | "prioritize_parallelism": True, 15 | } 16 | 17 | settings_interval_values_sm2 = { 18 | "num_iterations": 10, 19 | "num_evaluations_per_iteration": 3, 20 | "gateway_probabilities": ["equiprobable", "discovery"], 21 | "mining_algorithm": "Split Miner 2", 22 | "epsilon": [0.12, 0.45], 23 | "eta": [0.34, 0.55], 24 | "replace_or_joins": [True, False], 25 | "prioritize_parallelism": [True, False], 26 | } 27 | 28 | settings_single_values_sm1 = { 29 | "num_iterations": 2, 30 | "num_evaluations_per_iteration": 3, 31 | "gateway_probabilities": "equiprobable", 32 | "mining_algorithm": "Split Miner 1", 33 | "epsilon": 0.45, 34 | "eta": 0.34, 35 | "replace_or_joins": True, 36 | "prioritize_parallelism": True, 37 | } 38 | 39 | settings_interval_values_sm1 = { 40 | "num_iterations": 10, 41 | "num_evaluations_per_iteration": 3, 42 | "gateway_probabilities": ["equiprobable", "discovery"], 43 | "mining_algorithm": "Split Miner 1", 44 | "epsilon": [0.12, 0.45], 45 | "eta": [0.34, 0.55], 46 | "replace_or_joins": [True, False], 47 | "prioritize_parallelism": [True, False], 48 | } 49 | 50 | test_cases = [ 51 | {"name": "Single values SM2", "control_flow": settings_single_values_sm2}, 52 | {"name": "Intervals SM2", "control_flow": settings_interval_values_sm2}, 53 | {"name": "Single values SM1", "control_flow": settings_single_values_sm1}, 54 | {"name": "Intervals SM1", "control_flow": settings_interval_values_sm1}, 55 | ] 56 | 57 | 58 | @pytest.mark.parametrize("test_data", test_cases, ids=list(map(lambda x: x["name"], test_cases))) 59 | def test_control_flow_settings(test_data: dict): 60 | settings = ControlFlowSettings.from_dict(test_data["control_flow"]) 61 | 62 | if test_data["name"] == "Single values SM2": 63 | assert settings.num_iterations == settings_single_values_sm2["num_iterations"] 64 | assert settings.num_evaluations_per_iteration == settings_single_values_sm2["num_evaluations_per_iteration"] 65 | assert settings.gateway_probabilities == GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE 66 | assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2 67 | assert settings.epsilon == settings_single_values_sm2["epsilon"] 68 | assert settings.eta is None 69 | assert settings.replace_or_joins is None 70 | assert settings.prioritize_parallelism is None 71 | elif test_data["name"] == "Intervals SM2": 72 | assert settings.num_iterations == settings_interval_values_sm2["num_iterations"] 73 | assert settings.num_evaluations_per_iteration == settings_interval_values_sm2["num_evaluations_per_iteration"] 74 | assert settings.gateway_probabilities == [ 75 | GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE, 76 | GatewayProbabilitiesDiscoveryMethod.DISCOVERY, 77 | ] 78 | assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2 79 | assert settings.epsilon == ( 80 | settings_interval_values_sm2["epsilon"][0], 81 | settings_interval_values_sm2["epsilon"][1], 82 | ) 83 | assert settings.eta is None 84 | assert settings.replace_or_joins is None 85 | assert settings.prioritize_parallelism is None 86 | elif test_data["name"] == "Single values SM1": 87 | assert settings.num_iterations == settings_single_values_sm1["num_iterations"] 88 | assert settings.num_evaluations_per_iteration == settings_single_values_sm1["num_evaluations_per_iteration"] 89 | assert settings.gateway_probabilities == GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE 90 | assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1 91 | assert settings.epsilon == settings_single_values_sm1["epsilon"] 92 | assert settings.eta == settings_single_values_sm1["eta"] 93 | assert settings.replace_or_joins == settings_single_values_sm1["replace_or_joins"] 94 | assert settings.prioritize_parallelism == settings_single_values_sm1["prioritize_parallelism"] 95 | elif test_data["name"] == "Intervals SM1": 96 | assert settings.num_iterations == settings_interval_values_sm1["num_iterations"] 97 | assert settings.num_evaluations_per_iteration == settings_interval_values_sm1["num_evaluations_per_iteration"] 98 | assert settings.gateway_probabilities == [ 99 | GatewayProbabilitiesDiscoveryMethod.EQUIPROBABLE, 100 | GatewayProbabilitiesDiscoveryMethod.DISCOVERY, 101 | ] 102 | assert settings.mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1 103 | assert settings.epsilon == ( 104 | settings_interval_values_sm1["epsilon"][0], 105 | settings_interval_values_sm1["epsilon"][1], 106 | ) 107 | assert settings.eta == (settings_interval_values_sm1["eta"][0], settings_interval_values_sm1["eta"][1]) 108 | assert settings.replace_or_joins == settings_interval_values_sm1["replace_or_joins"] 109 | assert settings.prioritize_parallelism == settings_interval_values_sm1["prioritize_parallelism"] 110 | else: 111 | assert False 112 | -------------------------------------------------------------------------------- /tests/test_settings/test_resource_model_settings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pix_framework.discovery.resource_calendar_and_performance.calendar_discovery_parameters import CalendarType 3 | from simod.settings.common_settings import Metric 4 | from simod.settings.resource_model_settings import ResourceModelSettings 5 | 6 | settings_single_values = { 7 | "optimization_metric": "absolute_hourly_emd", 8 | "num_iterations": 2, 9 | "num_evaluations_per_iteration": 3, 10 | "resource_profiles": { 11 | "discovery_type": "pool", 12 | "granularity": 60, 13 | "confidence": 0.05, 14 | "support": 0.5, 15 | "participation": 0.4, 16 | }, 17 | } 18 | settings_interval_values = { 19 | "optimization_metric": "circadian_emd", 20 | "num_iterations": 2, 21 | "num_evaluations_per_iteration": 3, 22 | "resource_profiles": { 23 | "discovery_type": "differentiated", 24 | "granularity": [15, 60], 25 | "confidence": [0.05, 0.4], 26 | "support": [0.5, 0.8], 27 | "participation": [0.2, 0.6], 28 | }, 29 | } 30 | 31 | test_cases = [ 32 | {"name": "Single values", "resource_model": settings_single_values}, 33 | {"name": "Intervals", "resource_model": settings_interval_values}, 34 | ] 35 | 36 | 37 | @pytest.mark.parametrize("test_data", test_cases, ids=list(map(lambda x: x["name"], test_cases))) 38 | def test_resource_model_settings(test_data: dict): 39 | settings = ResourceModelSettings.from_dict(test_data["resource_model"]) 40 | 41 | if test_data["name"] == "Single values": 42 | assert settings.num_iterations == settings_single_values["num_iterations"] 43 | assert settings.optimization_metric == Metric.ABSOLUTE_EMD 44 | assert settings.discovery_type == CalendarType.DIFFERENTIATED_BY_POOL 45 | assert settings.granularity == 60 46 | assert settings.confidence == 0.05 47 | assert settings.support == 0.5 48 | assert settings.participation == 0.4 49 | elif test_data["name"] == "Intervals": 50 | assert settings.num_iterations == settings_single_values["num_iterations"] 51 | assert settings.optimization_metric == Metric.CIRCADIAN_EMD 52 | assert settings.discovery_type == CalendarType.DIFFERENTIATED_BY_RESOURCE 53 | assert settings.granularity == (15, 60) 54 | assert settings.confidence == (0.05, 0.4) 55 | assert settings.support == (0.5, 0.8) 56 | assert settings.participation == (0.2, 0.6) 57 | else: 58 | assert False 59 | -------------------------------------------------------------------------------- /tests/test_simulation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutomatedProcessImprovement/Simod/56cd99f61f64e2b08656f88d617586eac2687416/tests/test_simulation/__init__.py -------------------------------------------------------------------------------- /tests/test_simulation/test_evaluate_logs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from pix_framework.io.event_log import EventLogIDs, read_csv_log 5 | from simod.settings.common_settings import Metric 6 | from simod.simulation.prosimos import evaluate_logs 7 | 8 | 9 | @pytest.mark.parametrize("parallel", [True, False]) 10 | def test_evaluate_logs(parallel): 11 | metrics = [ 12 | Metric.CIRCADIAN_EMD, 13 | Metric.ABSOLUTE_EMD, 14 | Metric.CYCLE_TIME_EMD, 15 | Metric.TWO_GRAM_DISTANCE, 16 | ] 17 | 18 | assets_dir = Path(__file__).parent / "assets" 19 | 20 | log_paths = list(assets_dir.glob("*.csv")) 21 | 22 | log_ids = EventLogIDs( 23 | case="case_id", 24 | activity="activity", 25 | resource="resource", 26 | start_time="start_time", 27 | end_time="end_time", 28 | enabled_time="enabled_time", 29 | enabling_activity="enabling_activity", 30 | available_time="available_time", 31 | estimated_start_time="estimated_start_time", 32 | ) 33 | 34 | validation_log = read_csv_log(assets_dir / "validation_log.csv", log_ids) 35 | 36 | results = evaluate_logs( 37 | metrics=metrics, 38 | simulation_log_paths=log_paths, 39 | validation_log=validation_log, 40 | validation_log_ids=log_ids, 41 | ) 42 | 43 | assert len(results) > 0 44 | -------------------------------------------------------------------------------- /tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | from simod.utilities import parse_single_value_or_interval 2 | 3 | 4 | def test_parse_single_value_or_interval(entry_point): 5 | assert parse_single_value_or_interval(1.0) == 1.0 6 | assert parse_single_value_or_interval(0.23) == 0.23 7 | assert parse_single_value_or_interval(0.0) == 0.0 8 | assert parse_single_value_or_interval([0.0, 1.0]) == (0.0, 1.0) 9 | assert parse_single_value_or_interval([0.32, 0.78]) == (0.32, 0.78) 10 | --------------------------------------------------------------------------------