├── pycelonis1
├── 02_try_ml_use_cases
│ └── 00_time_series_forecasting
│ │ └── 01_ts_forecasting
│ │ ├── utils
│ │ ├── __init__.py
│ │ ├── ext_data_utils.py
│ │ ├── utils.py
│ │ ├── plot_utils.py
│ │ └── model_utils.py
│ │ └── main.py
├── 04_Data_Formatting
│ └── 00_Combine_csv_files.ipynb
├── 06_Extractors
│ ├── 99_Extract-logs-from-EMS.ipynb
│ ├── 03_Datadog_log_data_extraction.ipynb
│ └── 11_API template.ipynb
├── 00_manage_celonis
│ ├── 01_misc
│ │ ├── 00_trigger_workbench_execution.ipynb
│ │ └── 01_use_case_version_control.ipynb
│ └── 00_ibc_to_ibc_movers
│ │ ├── 05_workflow_to_studio_skill_mover.ipynb
│ │ ├── 03_workflow_mover.ipynb
│ │ ├── 06_package_mover.ipynb
│ │ ├── 01_data_pool_mover.ipynb
│ │ ├── 02_asset_mover.ipynb
│ │ ├── 07_action_engine_skill_mover.ipynb
│ │ ├── 04_process_analysis_to_studio_mover.ipynb
│ │ ├── 00_process_analysis_mover.ipynb
│ │ ├── 08_replacer.ipynb
│ │ └── KPI_Mover.ipynb
├── 03_Connectivity
│ ├── 05_Transformation_Download_to_MLW.ipynb
│ ├── 18_EMS_Data_Consumption_Report.ipynb
│ ├── 02b_Transformation_Mover.ipynb
│ ├── 03_Data_Model_Mover.ipynb
│ ├── 11_Extraction_Unifier.ipynb
│ └── 02a_Extraction_Mover.ipynb
└── 01_use_pycelonis
│ └── 00_basics
│ ├── 00_connecting_to_celonis.ipynb
│ ├── 03_pushing_data.ipynb
│ └── 02_pulling_data_from_datamodel.ipynb
├── pycelonis2
├── .DS_Store
├── 02_pycelonis_version_migrator
│ ├── function_get_data_frame.txt
│ ├── README.md
│ └── Pycelonis_Migration_UI.ipynb
└── 01_example_use_cases
│ └── 01_use_case_version_control.ipynb
├── .gitignore
└── README.md
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pycelonis2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celonis/pycelonis-examples/HEAD/pycelonis2/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # ---> Python
2 | # Byte-compiled / optimized / DLL files
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 |
7 | all_scripts/
8 |
9 | .idea/
10 |
--------------------------------------------------------------------------------
/pycelonis1/04_Data_Formatting/00_Combine_csv_files.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import glob\n",
11 | "\n",
12 | "path = r'D:\\Customer Data\\Testfile_join' # use your path\n",
13 | "all_files = glob.glob(path + \"/*.csv\")\n",
14 | "\n",
15 | "li = []\n",
16 | "\n",
17 | "for filename in all_files:\n",
18 | " df = pd.read_csv(filename, index_col=None, header=0)\n",
19 | " li.append(df)\n",
20 | "\n",
21 | "frame = pd.concat(li, axis=0, ignore_index=True)\n",
22 | "\n",
23 | "df.to_csv('D:\\Customer Data\\Testfile_join\\out.csv')"
24 | ]
25 | }
26 | ],
27 | "metadata": {
28 | "kernelspec": {
29 | "display_name": "Python 3",
30 | "language": "python",
31 | "name": "python3"
32 | },
33 | "language_info": {
34 | "codemirror_mode": {
35 | "name": "ipython",
36 | "version": 3
37 | },
38 | "file_extension": ".py",
39 | "mimetype": "text/x-python",
40 | "name": "python",
41 | "nbconvert_exporter": "python",
42 | "pygments_lexer": "ipython3",
43 | "version": "3.7.6"
44 | }
45 | },
46 | "nbformat": 4,
47 | "nbformat_minor": 4
48 | }
49 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyCelonis Examples
2 |
3 | This repository contains demo notebooks covering popular functionalities and use cases of PyCelonis.
4 | It acts as an addition to the official [tutorials](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/) for PyCelonis.
5 |
6 | The repository contains several notebooks for both PyCelonis 1.X and 2.X that act as examples on what you can achieve using PyCelonis.
7 | The examples are grouped by their PyCelonis version and specific use cases.
8 |
9 | ## PyCelonis
10 |
11 | [](https://celonis.github.io/pycelonis/license.html)
12 | [](https://celonis.github.io/pycelonis/index.html)
13 |
14 | PyCelonis is a python api wrapper for Celonis EMS.
15 |
16 | Using this package you can programmatically interact with Analyses, Workspaces, Datamodels, Datapools and other Celonis objects.
17 | The package also allows pushing and pulling data to and from data pools and data models.
18 |
19 | PyCelonis is pre-installed in all Celonis Machine Learning Workbenches by default.
20 | For more information about PyCelonis and how to set it up in your local python environment, [see the docs](https://celonis.github.io/pycelonis/).
21 |
--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/99_Extract-logs-from-EMS.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import requests\n",
10 | "import pandas as pd\n",
11 | "import pycelonis as py"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "api_key = ''\n",
21 | "team_url = 'https://XXXXXX.celonis.cloud/'\n",
22 | "\n",
23 | "instance = py.get_celonis(team_url, api_key)"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "audit_log = instance.api_request('https://XXXXX.celonis.cloud/api/team/logs/audit/csv')\n",
33 | "audit_log"
34 | ]
35 | }
36 | ],
37 | "metadata": {
38 | "kernelspec": {
39 | "display_name": "Python 3",
40 | "language": "python",
41 | "name": "python3"
42 | },
43 | "language_info": {
44 | "codemirror_mode": {
45 | "name": "ipython",
46 | "version": 3
47 | },
48 | "file_extension": ".py",
49 | "mimetype": "text/x-python",
50 | "name": "python",
51 | "nbconvert_exporter": "python",
52 | "pygments_lexer": "ipython3",
53 | "version": "3.7.3"
54 | }
55 | },
56 | "nbformat": 4,
57 | "nbformat_minor": 4
58 | }
59 |
--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/function_get_data_frame.txt:
--------------------------------------------------------------------------------
1 | import pycelonis
2 | import pandas as pd
3 | from pycelonis.pql import PQL
4 |
5 | def extract_table_from_data_pool(
6 | celonis_object: pycelonis.celonis.Celonis,
7 | data_pool: pycelonis.ems.data_integration.data_pool.DataPool,
8 | data_model: pycelonis.ems.data_integration.data_model.DataModel,
9 | table: pycelonis.ems.data_integration.data_pool_table.DataPoolTable,
10 | ) -> pd.DataFrame:
11 |
12 | """This function creates the PQL query to extract a whole
13 | table from a data model. If the table is too big, the kernel might
14 | shut down, make sure the RAM memory is enough.
15 |
16 | Input:
17 | celonis_object: Celonis object already instantiated
18 | data_pool: Data Pool object already instantiated
19 | data_model: Data Model object already instantiated
20 | table: Table object already instantiated
21 |
22 | Returns:
23 | df: DataFrame containing the target table.
24 | """
25 |
26 | # Instantiate query object
27 | query = PQL(distinct=False, limit=None, offset=None)
28 |
29 | # Populate query with all the columns from the table
30 | for PQL_column in table.get_columns():
31 | query += PQLColumn(name=PQL_column.name, query=f""" "{table.name}"."{PQL_column.name}" """)
32 |
33 | try:
34 | # If you have USE permissions on the data model this method can be used
35 | df = DataModel.export_data_frame_from(
36 | celonis_object.client,
37 | pool_id=data_model.pool_id,
38 | data_model_id=data_model.id,
39 | query=query)
40 | print("You have use permissions")
41 | except:
42 | # Should work if you don't have USE permissions
43 | df = data_model.export_data_frame(query)
44 | print("You might not have USE permissions")
45 |
46 | return df
47 |
48 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/01_misc/00_trigger_workbench_execution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Trigger a Notebook from outside the Workbench (e.g. from an external automation software)"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import requests\n",
17 | "import json\n",
18 | "\n",
19 | "url = \"https://TEAMNAME_HERE.REALMHERE.celonis.cloud/machine-learning/api/executions\"\n",
20 | "\n",
21 | "\n",
22 | "# specify ID of Workbench (find this in the URL of a specific Workbench you want to trigger)\n",
23 | "# specify filename, if your file is not stored in the root you should include the foldername in the path, e.g. \"executionFileName\": \"Foldername/Run quickly.ipynb\"\n",
24 | "# specify any params you want to pass. To see how they are received in the notebook, check https://papermill.readthedocs.io/en/latest/\n",
25 | "payload = {\n",
26 | " \"notebookId\": \"4417517d-55ae-482b-8f56-ac6d1d864e68\",\n",
27 | " \"executionFileName\": \"Run quickly.ipynb\",\n",
28 | " \"params\":{\n",
29 | " \"param1\":\"value1\"\n",
30 | " }\n",
31 | "}\n",
32 | "\n",
33 | "payload = json.dumps(payload)\n",
34 | "\n",
35 | "# specify API Key (Applicationkeys do not work as of June 2020)\n",
36 | "headers = {\n",
37 | " 'content-type': 'application/json;charset=UTF-8',\n",
38 | " 'authorization': 'Bearer API_KEY_HERE'\n",
39 | "}\n",
40 | "\n",
41 | "response = requests.request(\"POST\", url, headers=headers, data = payload)\n",
42 | "\n",
43 | "print(response.text.encode('utf8'))\n"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "# Get status of execution"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "import json\n",
60 | "resp = json.loads(response.text)\n",
61 | "\n",
62 | "url = url + resp['id']\n",
63 | "response = requests.request(\"GET\", url, headers=headers)\n",
64 | "print(response.text.encode('utf8'))"
65 | ]
66 | }
67 | ],
68 | "metadata": {
69 | "kernelspec": {
70 | "display_name": "Python 3",
71 | "language": "python",
72 | "name": "python3"
73 | },
74 | "language_info": {
75 | "codemirror_mode": {
76 | "name": "ipython",
77 | "version": 3
78 | },
79 | "file_extension": ".py",
80 | "mimetype": "text/x-python",
81 | "name": "python",
82 | "nbconvert_exporter": "python",
83 | "pygments_lexer": "ipython3",
84 | "version": "3.7.3"
85 | }
86 | },
87 | "nbformat": 4,
88 | "nbformat_minor": 4
89 | }
--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/ext_data_utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from . import plot_utils
3 |
4 |
5 | def load_external_data(
6 | overall_gdp_csv,
7 | industry_gdp_perc_csv,
8 | csv_col_1,
9 | csv_col_2,
10 | csv_col_2_new,
11 | col_final,
12 | ):
13 | """Load External/GDP data"""
14 |
15 | # Load National GDP data (need to create/upload external csv)
16 | all_gdp_csv = pd.read_csv(overall_gdp_csv)
17 |
18 | # Load Industry GDP % csv (need to create/upload external csv)
19 | all_gdp_ind_perc_csv = pd.read_csv(industry_gdp_perc_csv)
20 | # Rename col
21 | all_gdp_ind_perc_csv = all_gdp_ind_perc_csv.rename(
22 | columns={csv_col_2: csv_col_2_new})
23 |
24 | # Manually estimate GDP values for future quarters (CORE for TS Predictions)
25 | all_gdp = all_gdp_csv.copy()
26 | all_gdp = all_gdp.append([
27 | {
28 | "DATE": "7/1/2020",
29 | csv_col_1: 20200.0
30 | },
31 | {
32 | "DATE": "10/1/2020",
33 | csv_col_1: 21000.0
34 | },
35 | {
36 | "DATE": "1/1/2021",
37 | csv_col_1: 21000.0
38 | },
39 | ])
40 | all_gdp = all_gdp.reset_index(drop=True)
41 |
42 | # Manually estimate Industry GDP % values for future quarters (CORE for TS Predictions)
43 | all_gdp_ind_perc = all_gdp_ind_perc_csv.append([
44 | {
45 | "DATE": "4/1/2020",
46 | csv_col_2_new: 11.0
47 | },
48 | {
49 | "DATE": "7/1/2020",
50 | csv_col_2_new: 11.0
51 | },
52 | {
53 | "DATE": "10/1/2020",
54 | csv_col_2_new: 11.0
55 | },
56 | {
57 | "DATE": "1/1/2021",
58 | csv_col_2_new: 11.0
59 | },
60 | ])
61 | # Convert to %
62 | all_gdp_ind_perc[csv_col_2_new] = all_gdp_ind_perc[csv_col_2_new] / 100.0
63 | all_gdp_ind_perc = all_gdp_ind_perc.reset_index(drop=True)
64 | all_gdp_ind_perc.head()
65 |
66 | # Calculate Industry GDP
67 | all_gdp[col_final] = all_gdp[csv_col_1] * all_gdp_ind_perc[csv_col_2_new]
68 |
69 | # Resample to weekly GDP data
70 | all_gdp["DATE"] = pd.to_datetime(all_gdp["DATE"], format="%m/%d/%Y")
71 | all_gdp_weekly = all_gdp.copy()
72 | all_gdp_weekly = all_gdp_weekly.drop(columns=csv_col_1)
73 | all_gdp_weekly = all_gdp_weekly.set_index("DATE").resample(
74 | "W").ffill().reset_index()
75 | all_gdp_weekly[col_final] = all_gdp_weekly[col_final] * 4 / 52
76 | # Plot resampled external data
77 | plot_utils.plot_gdp(all_gdp_weekly, col_final)
78 |
79 | # Smoothen the weekly GDP data
80 | ext_data = all_gdp_weekly.copy()
81 | ext_data[col_final] = ext_data.iloc[:, 1].rolling(window=12,
82 | center=False,
83 | min_periods=1).mean()
84 | # Plot final external data
85 | plot_utils.plot_gdp(ext_data, col_final)
86 | return ext_data
87 |
--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/README.md:
--------------------------------------------------------------------------------
1 | # Pycelonis Version Migrator
2 | This code serves the purpose of automatically migrating a given code from pycelonis 1 to pycelonis 2
3 | The code is not perfect (It doesn't cover every single use case), but serves as a tool to save time for
4 | people having to migrate codes to pycelonis 2
5 |
6 | # Project Description
7 | ## Input
8 | The UI Script only needs the path to the location of the .py / .ipynb archive you want to migrate.
9 |
10 | ## Output
11 | Once the code is run, it generates another .py or .ipynb file with the same name as the one you inputted but with
12 | "_migrated_automatically" after. In every line of code that is changed there will appear a comment # CHANGED or # MANUALLY CHECK.
13 | For more clarity on what has actually changed, the code generates a diff.html file that can be easily opened with any
14 | browser and highlights all the changes made in either, red, green or yellow. Nevertheless, the UI script enables a
15 | visualization of this HTML within the very UI notebook.
16 |
17 | ## Check the output once it has run
18 | After it has run, you should check manually if there is any mistake and solve it reading the pycelonis documentation
19 | or also reading [this article](https://celonis.github.io/pycelonis/2.0.0/tutorials/executed/04_migration/01_migration_guide/)
20 | on how to migrate most of the biggest changes. As stated before, the code is not perfect,
21 | but will definitely save you time.
22 |
23 | The backend script relies on a class called PycelonisMigrator, which uses different regex patterns and mainly the
24 | regex library to modify the text.
25 |
26 | # Scripts
27 | This project consists of two different scripts:
28 |
29 | - **Pycelonis_Migration_UI.ipynb**: This is the code the final user should use. It contains a brief description of the -
30 | overall project, and the call to the backend to perform the migration.
31 | - **pycelonis_migration.py**: This is the code that performs all of the operations. It is based on a class called
32 | PycelonisMigrator and several functions that help defining the output of the regex substitute patterns.
33 |
34 |
35 | # Additional Information
36 | ## Pycelonis version change
37 | This project only migrates the script you provided, but bear in mind that you also need to update the python packages
38 | so you can test the updated script outcome. It is highly encouraged for you to check which version of pycelonis are you
39 | currently using. For this you can either:
40 | - Type the following command in a newly opened terminal and search for the pycelonis version number:
41 | >pip list
42 | - Inside any notebook run the following piece of code:
43 | > import pycelonis
44 | > pycelonis.\__version\__
45 |
46 | This way you can revert to the older version in case you have problems. Once you have done this, you can safely update
47 | pycelonis to the latest version. For updating pycelonis python package follow [these guidelines.](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/)
48 | You will need to run this command in terminal:
49 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis
50 |
51 | Note that you can select the version you want to install by adding it at the end of the command:
52 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis=="2.0.1"
53 |
54 | ## Friendly reminder
55 | - This project **does never change the original script** provided to migrate. You can safely use it since it only reads
56 | the original, but doesn't write back on it.
57 |
58 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/05_workflow_to_studio_skill_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Move Workflows from Process Automation to Studio Skills"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "source_team_url = 'https://your_team.celonis.cloud/'\n",
17 | "source_api_key = 'your_api_token'\n",
18 | "destination_team_url = 'https://your_team.celonis.cloud/'\n",
19 | "destination_api_key = 'your_api_token'"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "### Set environment variables for the source team setup"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "import os\n",
36 | "os.environ['TEAM_URL'] = source_team_url\n",
37 | "os.environ['API_TOKEN'] = source_api_key"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "### Pull workflow from process automation in source team as an asset"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 3,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n"
57 | ]
58 | }
59 | ],
60 | "source": [
61 | "!content-cli pull workflow --id 'insert_workflow_id_here' --asset"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "### Set environment variables again for the destination team setup\n",
69 | "No need to do this step (re-define environment variables) if source and destination teams are the same"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "os.environ['TEAM_URL'] = destination_team_url\n",
79 | "os.environ['API_TOKEN'] = destination_api_key"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "### Push downloaded workflow file to the studio as skill asset in the source team\n",
87 | "Hint: Press tab while writing the file name to auto complete"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 5,
93 | "metadata": {},
94 | "outputs": [
95 | {
96 | "name": "stdout",
97 | "output_type": "stream",
98 | "text": [
99 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: test.On-time-Delivery-Prediction\n"
100 | ]
101 | }
102 | ],
103 | "source": [
104 | "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'"
105 | ]
106 | }
107 | ],
108 | "metadata": {
109 | "kernelspec": {
110 | "display_name": "Python 3",
111 | "language": "python",
112 | "name": "python3"
113 | },
114 | "language_info": {
115 | "codemirror_mode": {
116 | "name": "ipython",
117 | "version": 3
118 | },
119 | "file_extension": ".py",
120 | "mimetype": "text/x-python",
121 | "name": "python",
122 | "nbconvert_exporter": "python",
123 | "pygments_lexer": "ipython3",
124 | "version": "3.7.6"
125 | }
126 | },
127 | "nbformat": 4,
128 | "nbformat_minor": 4
129 | }
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/05_Transformation_Download_to_MLW.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Script Downloader"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "**This tutorial shows how to download transformations of one data job.**\n",
15 | "\n",
16 | "**The installation of ftfy is a requirement for this script and needs to be installed once per workbench.**"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 24,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Looking in indexes: https://pypi.celonis.cloud, https://pypi.org/simple\n",
29 | "Requirement already satisfied: ftfy in /home/jovyan/.local/lib/python3.7/site-packages (5.6)\n",
30 | "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.7/site-packages (from ftfy) (0.1.8)\n",
31 | "Note: you may need to restart the kernel to use updated packages.\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "pip install ftfy"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "**In the first step we need to connect to the data job.**"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 23,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "from pycelonis import get_celonis\n",
53 | "import os\n",
54 | "from ftfy import fix_text #requires pip install\n",
55 | "\n",
56 | "c = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
57 | "data_pool = c.pools.find(\"Name or ID of the source data pool.\")\n",
58 | "data_job = data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "**In the next step we create the SQL for the respective data job.**"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 4,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "sql = ''\n",
75 | "for transformation in data_job.transformations.data:\n",
76 | " if transformation.statement is None:\n",
77 | " continue\n",
78 | " sql = sql + '--###' + transformation.name + '###--' + '\\n\\n' + transformation.statement.strip() + '\\n\\n'"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "**In this step the SQL statement is saved as file.**"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 14,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "filepath = os.path.join('', data_job.name + '.sql')\n",
95 | "with open(filepath, 'w') as f:\n",
96 | " f.write(fix_text(sql))"
97 | ]
98 | }
99 | ],
100 | "metadata": {
101 | "jupytext": {
102 | "formats": "ipynb,py:percent"
103 | },
104 | "kernelspec": {
105 | "display_name": "Python 3",
106 | "language": "python",
107 | "name": "python3"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.7.6"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 4
124 | }
125 |
--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/main.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pycelonis import get_celonis, pql
3 |
4 | from utils import ext_data_utils, model_utils, utils
5 |
6 | # Load input data
7 | celonis = get_celonis()
8 | dm_id = 'TBD'
9 | datamodel = celonis.datamodels.find(dm_id)
10 | input_columns = [("col_name", "pretty_name"), ("col_name_2", "pretty_name_2")]
11 | input_filter = "FILTER TBD"
12 |
13 | train_df = utils.get_pql_dataframe(datamodel, input_columns, input_filter)
14 |
15 | # Import External Data for n-step Predictions (such as GDP below)
16 | ext_data = ext_data_utils.load_external_data(
17 | overall_gdp_csv="US_GDP.csv",
18 | industry_gdp_perc_csv="US_MANUF_GDP_PERC.csv",
19 | csv_col_1="GDP",
20 | csv_col_2="VAPGDPMA",
21 | csv_col_2_new="IND_PERC",
22 | col_final="IND_GDP",
23 | )
24 |
25 | # INPUTS
26 | subsets = ['subset1', 'subset2'] # PARAM
27 | subset_needs_adjusts = ['subset2'
28 | ] # PARAM Subsets which need a baseline adjustment
29 | subset_col_name = 'subset_filtering_column' # PARAM
30 | input_y_col_name = "Y_column" # PARAM
31 | input_exo_col_name = 'ext_data_column' # PARAM
32 | model_class_col_name = 'classification_naming' # PARAM Column to flag train vs test vs forecast timeframes
33 | model_y_pred_col_name = 'Y_prediction_column' # PARAM
34 | val_size_perc = 0.2
35 |
36 | # OUTPUTS, for Exported Predictions to DM
37 | all_subset_results = {}
38 | all_subset_exports = {}
39 | output_col_names = {
40 | "index": "Date", # PARAM
41 | input_y_col_name: "Actual Y Value", # PARAM
42 | model_y_pred_col_name: "Predicted Y Value", # PARAM
43 | model_class_col_name: "Classification", # PARAM
44 | }
45 |
46 | # Run Predictions for each selected subset
47 | for subset in subsets:
48 | # Check if subset needs baseline adjustment
49 | to_adjust = False
50 | if subset in subset_needs_adjusts:
51 | to_adjust = True
52 |
53 | # Filter train df for subset
54 | subset_train_df = utils.get_subset_df(train_df, subset, subset_col_name)
55 |
56 | # Run Predictions model for this subset
57 | print('Run TS Predictions model for subset train df \n',
58 | subset_train_df.head())
59 | subset_results = model_utils.run_predictions_model(subset_train_df,
60 | ext_data,
61 | input_y_col_name,
62 | input_exo_col_name,
63 | val_size_perc, to_adjust)
64 | # Store Output (subset Predictions)
65 | all_subset_results[subset] = subset_results
66 | print('subset ', subset, ' Prediction outputs have shape ',
67 | all_subset_results[subset].shape)
68 | # Store export-version of the Output (subset Predictions)
69 | all_subset_exports[subset] = utils.prepare_export_df(
70 | subset_results, output_col_names, model_y_pred_col_name)
71 |
72 | print("Finished running predictions for all subsets, total output shape is ",
73 | all_subset_results[subset].shape)
74 | print("Subsets are ", all_subset_exports.keys())
75 |
76 | # Combine Results into single Export table
77 | # Add new 'subset name' column to the export-version of Predictions
78 | export_df = utils.constitute_export_df(all_subset_exports, subset_col_name)
79 |
80 | # Export table to DM
81 | export_table_name = "Predictions_Output"
82 | print('Export df shape is ', export_df.shape)
83 | print('Export df head is ')
84 | print(export_df.head(10))
85 | print('Export df tail is ')
86 | print(export_df.tail(10))
87 | datamodel.push_table(export_df,
88 | export_table_name,
89 | reload_datamodel=False,
90 | if_exists="replace")
91 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/03_workflow_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Workflow Mover using Content CLI\n",
8 | "\n",
9 | "### This tutorial shows how to copy a workflow (process automation) from one team to another one"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
19 | "source_api_key = 'API TOKEN HERE'\n",
20 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
21 | "destination_api_key = 'API TOKEN HERE'"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "### Set environment variable for the source team"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# DO NOT MODIFY THIS CELL\n",
38 | "import os\n",
39 | "os.environ['TEAM_URL'] = source_team_url\n",
40 | "os.environ['API_TOKEN'] = source_api_key"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "### Pull workflow from the source team"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: workflow_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n"
60 | ]
61 | }
62 | ],
63 | "source": [
64 | "!content-cli pull workflow --id 'WORKFLOW ID HERE'"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "### Set environment variables again for the destination team setup\n",
72 | "No need to do this step (re-define environment variables) if source and destination teams are the same"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 4,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "# DO NOT MODIFY THIS CELL\n",
82 | "os.environ['TEAM_URL'] = destination_team_url\n",
83 | "os.environ['API_TOKEN'] = destination_api_key"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "### Push workflow to the destination team\n",
91 | "\n",
92 | "Hint: Press tab while writing the file name to auto complete"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 5,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "name": "stdout",
102 | "output_type": "stream",
103 | "text": [
104 | "\u001b[32minfo\u001b[39m: Workflow was pushed successfully. New Id: b5391c57-87ae-47f9-a876-2c18e304a994\n"
105 | ]
106 | }
107 | ],
108 | "source": [
109 | "!content-cli push workflow --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE'"
110 | ]
111 | }
112 | ],
113 | "metadata": {
114 | "kernelspec": {
115 | "display_name": "Python 3",
116 | "language": "python",
117 | "name": "python3"
118 | },
119 | "language_info": {
120 | "codemirror_mode": {
121 | "name": "ipython",
122 | "version": 3
123 | },
124 | "file_extension": ".py",
125 | "mimetype": "text/x-python",
126 | "name": "python",
127 | "nbconvert_exporter": "python",
128 | "pygments_lexer": "ipython3",
129 | "version": "3.7.6"
130 | }
131 | },
132 | "nbformat": 4,
133 | "nbformat_minor": 4
134 | }
135 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/06_package_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Package Mover using Content CLI\n",
8 | "\n",
9 | "### This script moves a package from one team/place to another "
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
19 | "source_api_key = 'API TOKEN HERE'\n",
20 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
21 | "destination_api_key = 'API TOKEN HERE'"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "#### Set environment variable for the source team"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# DO NOT MODIFY THIS CELL\n",
38 | "import os\n",
39 | "os.environ['TEAM_URL'] = source_team_url\n",
40 | "os.environ['API_TOKEN'] = source_api_key"
41 | ]
42 | },
43 | {
44 | "attachments": {},
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "#### Pull Package from the source team\n",
49 | "\n",
50 | "
Looking for the package key? Locate the package, open the ... menu, and click Key
"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {
57 | "jupyter": {
58 | "source_hidden": true
59 | }
60 | },
61 | "outputs": [
62 | {
63 | "name": "stdout",
64 | "output_type": "stream",
65 | "text": [
66 | "\u001b[32minfo\u001b[39m: File downloaded successfully\n"
67 | ]
68 | }
69 | ],
70 | "source": [
71 | "!content-cli pull package --key 'PACKAGE KEY HERE'"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "#### Set environment variable for the destination team\n",
79 | "(Skip this step of setting environment variables again if source and destination are the same teams)"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 4,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "# DO NOT MODIFY THIS CELL\n",
89 | "os.environ['TEAM_URL'] = destination_team_url\n",
90 | "os.environ['API_TOKEN'] = destination_api_key"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "#### Push package to the destination team\n",
98 | "(Hint: Press tab to autocomplete the file name)"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 5,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "\u001b[32minfo\u001b[39m: Package was pushed successfully.\n"
111 | ]
112 | }
113 | ],
114 | "source": [
115 | "!content-cli push package --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' #ex. package_testpackage.zip"
116 | ]
117 | }
118 | ],
119 | "metadata": {
120 | "kernelspec": {
121 | "display_name": "Python 3",
122 | "language": "python",
123 | "name": "python3"
124 | },
125 | "language_info": {
126 | "codemirror_mode": {
127 | "name": "ipython",
128 | "version": 3
129 | },
130 | "file_extension": ".py",
131 | "mimetype": "text/x-python",
132 | "name": "python",
133 | "nbconvert_exporter": "python",
134 | "pygments_lexer": "ipython3",
135 | "version": "3.7.6"
136 | }
137 | },
138 | "nbformat": 4,
139 | "nbformat_minor": 4
140 | }
141 |
--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/Pycelonis_Migration_UI.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "15ffaabc",
6 | "metadata": {},
7 | "source": [
8 | "# User Interface for the Pycelonis migration script"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "f505a141",
14 | "metadata": {},
15 | "source": [
16 | "- **PROJECT DESCRIPTION:** This project migrates a given code (either .py or .ipynb) to pycelonis 2.0. It consists of two scripts, this one you are reading (**Pycelonis_Migration_UI.ipynb**) which is the **ONLY ONE THE USER SHOULD OPEN**; and the pycelonis_migration.py, which can be regarded as the backend of this project. The former should only be modified if you are planning on collaborating in enhancing this project.
\n",
17 | "
\n",
18 | " This whole project creates a modified copy of the original code (Inside the project's folder) and it afterwards produces a diff HTML file to easily visualize the changes. The modified code has the same name as the original one inputed in code_path, but with \"_migrated_automatically\" between the original name and the file extension. If you open the new outputted file, you can also see every line that was changed because it will have a comment with either # CHANGED or # CHECK MANUALLY.
\n",
19 | "
\n",
20 | "- **UI Overview**: The overall structure of this code (**Pycelonis_Migration_UI.ipynb**) is:
\n",
21 | " - Inputs: You just need to input a working path as a string in the variable code_path
\n",
22 | " - Outputs: You are going to get another code file with the migrated version, as well as an HTML file to visualize the output easily.
\n",
23 | "\n",
24 | "The whole aim is for you to test the modified code (_migrated_automatically) and look for the changes to see if they are properly working. This project doesn't cover all the use cases, so **you should manually check the output afterwards**. Altough it is not perfect, it can definetely help you save a lot of time."
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "id": "523df5c4",
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "import pycelonis_migration\n",
35 | "from IPython.display import IFrame"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "id": "90823215",
41 | "metadata": {},
42 | "source": [
43 | "Fullfill the next cell with the path to the code you want to migrate. It can either be a notebook or a plain .py file"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "id": "f9be9117",
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "code_path = \"/Users/tests/Downloads/test_code.ipynb\""
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "id": "cfd4531a",
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "pycelonis_migration.main(path=code_path)"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "id": "88476ecb",
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "IFrame(\"diff.html\", width=1000, height=600)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "id": "00d11c77",
80 | "metadata": {},
81 | "outputs": [],
82 | "source": []
83 | }
84 | ],
85 | "metadata": {
86 | "kernelspec": {
87 | "display_name": "Projects",
88 | "language": "python",
89 | "name": "projects"
90 | },
91 | "language_info": {
92 | "codemirror_mode": {
93 | "name": "ipython",
94 | "version": 3
95 | },
96 | "file_extension": ".py",
97 | "mimetype": "text/x-python",
98 | "name": "python",
99 | "nbconvert_exporter": "python",
100 | "pygments_lexer": "ipython3",
101 | "version": "3.9.12"
102 | }
103 | },
104 | "nbformat": 4,
105 | "nbformat_minor": 5
106 | }
107 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/01_data_pool_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Data Pool Mover using Content CLI\n",
8 | "\n",
9 | "### This tutorial shows how to copy a datapool from one team to another\n",
10 | "Note: Datamodels and data jobs contained in the datapool are moved, but not the actual data"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
20 | "source_api_key = 'API TOKEN HERE'\n",
21 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
22 | "destination_api_key = 'API TOKEN HERE'"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "### Set environment variable for the source team"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# DO NOT MODIFY THIS CELL\n",
39 | "import os\n",
40 | "os.environ['TEAM_URL'] = source_team_url\n",
41 | "os.environ['API_TOKEN'] = source_api_key"
42 | ]
43 | },
44 | {
45 | "attachments": {},
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Pull data pool from the source team\n",
50 | "\n",
51 | "Looking for the data pool ID? Open to the pool and view the browser URL
(ex. https://team.cluster.celonis.cloud/integration/ui/pools/52f73644-9369-4604-a050-92cb493f2025)
"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "name": "stdout",
61 | "output_type": "stream",
62 | "text": [
63 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: data-pool_7796633e-c2db-4524-92ec-85ae5fe65282.json\n"
64 | ]
65 | }
66 | ],
67 | "source": [
68 | "!content-cli pull data-pool --id 'DATA POOL ID HERE'"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "### Set environment variable for the destination team\n",
76 | "Skip this step of setting up destination team if source and destination team are the same"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 4,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "# DO NOT MODIFY THIS CELL\n",
86 | "import os\n",
87 | "os.environ['TEAM_URL'] = destination_team_url\n",
88 | "os.environ['API_TOKEN'] = destination_api_key"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "### Push data pool config into destination team\n",
96 | "Hint: press tab to auto complete the file name"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 5,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "\u001b[32minfo\u001b[39m: Data Pool was pushed successfully. New ID: undefined\n"
109 | ]
110 | }
111 | ],
112 | "source": [
113 | "!content-cli push data-pool --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' #ex. data-pool_7796633e-c2db-4524-92ec-85ae5fe65282.json"
114 | ]
115 | }
116 | ],
117 | "metadata": {
118 | "kernelspec": {
119 | "display_name": "Python 3",
120 | "language": "python",
121 | "name": "python3"
122 | },
123 | "language_info": {
124 | "codemirror_mode": {
125 | "name": "ipython",
126 | "version": 3
127 | },
128 | "file_extension": ".py",
129 | "mimetype": "text/x-python",
130 | "name": "python",
131 | "nbconvert_exporter": "python",
132 | "pygments_lexer": "ipython3",
133 | "version": "3.7.6"
134 | }
135 | },
136 | "nbformat": 4,
137 | "nbformat_minor": 4
138 | }
139 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/02_asset_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "## Asset Mover using Content CLI\n",
9 | "\n",
10 | "### This script moves any asset in Studio (skill, view, analysis, knowledge model etc.) from one team to another "
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
20 | "source_api_key = 'API TOKEN HERE'\n",
21 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
22 | "destination_api_key = 'API TOKEN HERE'"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "### Set environment variable for the source team"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# DO NOT MODIFY THIS CELL\n",
39 | "import os\n",
40 | "os.environ['TEAM_URL'] = source_team_url\n",
41 | "os.environ['API_TOKEN'] = source_api_key"
42 | ]
43 | },
44 | {
45 | "attachments": {},
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Pull asset from the source team\n",
50 | "\n",
51 | "Looking for the asset key? Locate the asset, open the ... menu, and click Key
"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "name": "stdout",
61 | "output_type": "stream",
62 | "text": [
63 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_mykm.yml\n"
64 | ]
65 | }
66 | ],
67 | "source": [
68 | "!content-cli pull asset --key 'ASSET KEY HERE' #ex. test.testview"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "### Set environment variable for the destination team\n",
76 | "Skip this step of setting environment variables again if source and destination are the same teams"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 4,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "# DO NOT MODIFY THIS CELL\n",
86 | "os.environ['TEAM_URL'] = destination_team_url\n",
87 | "os.environ['API_TOKEN'] = destination_api_key"
88 | ]
89 | },
90 | {
91 | "attachments": {},
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "### Push asset to the destination team\n",
96 | "Hint: Press tab to autocomplete the file name\n",
97 | "\n",
98 | "Looking for the package key? Locate the package, open the ... menu, and click Key
"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 5,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: noor.mykm\n"
111 | ]
112 | }
113 | ],
114 | "source": [
115 | "!content-cli push asset --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' --package 'PACKAGE KEY TO PUSH THE ASSET TO HERE' #example file: asset_mykm.yml"
116 | ]
117 | }
118 | ],
119 | "metadata": {
120 | "kernelspec": {
121 | "display_name": "Python 3",
122 | "language": "python",
123 | "name": "python3"
124 | },
125 | "language_info": {
126 | "codemirror_mode": {
127 | "name": "ipython",
128 | "version": 3
129 | },
130 | "file_extension": ".py",
131 | "mimetype": "text/x-python",
132 | "name": "python",
133 | "nbconvert_exporter": "python",
134 | "pygments_lexer": "ipython3",
135 | "version": "3.7.6"
136 | }
137 | },
138 | "nbformat": 4,
139 | "nbformat_minor": 4
140 | }
141 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/07_action_engine_skill_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Action Engine Skill Mover using Content CLI\n",
8 | "\n",
9 | "### This tutorial shows how to copy an action engine skill from one team to another one"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
19 | "source_api_key = 'API TOKEN HERE'\n",
20 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
21 | "destination_api_key = 'API TOKEN HERE'"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "### Set environment variable for the source team"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# DO NOT MODIFY THIS CELL\n",
38 | "import os\n",
39 | "os.environ['TEAM_URL'] = source_team_url\n",
40 | "os.environ['API_TOKEN'] = source_api_key"
41 | ]
42 | },
43 | {
44 | "attachments": {},
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "### Pull action engine skill from the source team\n",
49 | "\n",
50 | "Looking for the action engine skill ID? Open to Action Engine, open the project from the projects tab. Select the desired skill and view the browser URL
(ex. https://team.cluster.celonis.cloud/action-engine/ui/projects/deef790a-2a2b-4637-a99b-77d271278279/skills/43ac6d44-1320-4c17-be39-b7307474ebc1)
"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "name": "stdout",
60 | "output_type": "stream",
61 | "text": [
62 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: skill_08594b68-2731-4ede-abaf-4fd7eb5720ca.json\n"
63 | ]
64 | }
65 | ],
66 | "source": [
67 | "!content-cli pull skill --skillId 'ACTION ENGINE SKILL ID HERE' --projectId 'PROJECT ID HERE'"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "### Set environment variables again for the destination team setup\n",
75 | "No need to do this step (re-define environment variables) if source and destination teams are the same"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 4,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "# DO NOT MODIFY THIS CELL\n",
85 | "os.environ['TEAM_URL'] = destination_team_url\n",
86 | "os.environ['API_TOKEN'] = destination_api_key"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "### Push action engine skill to the destination team\n",
94 | "\n",
95 | "Hint: Press tab while writing the file name to auto complete"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 5,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "\u001b[32minfo\u001b[39m: Skill was pushed successfully. New ID: 4639e4dd-b0ea-484f-822b-5415f2244c5d\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "!content-cli push skill --projectId 'NEW PROJECT ID HERE' --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' #ex. skill_08594b68-2731-4ede-abaf-4fd7eb5720ca.json"
113 | ]
114 | }
115 | ],
116 | "metadata": {
117 | "kernelspec": {
118 | "display_name": "Python 3",
119 | "language": "python",
120 | "name": "python3"
121 | },
122 | "language_info": {
123 | "codemirror_mode": {
124 | "name": "ipython",
125 | "version": 3
126 | },
127 | "file_extension": ".py",
128 | "mimetype": "text/x-python",
129 | "name": "python",
130 | "nbconvert_exporter": "python",
131 | "pygments_lexer": "ipython3",
132 | "version": "3.7.6"
133 | }
134 | },
135 | "nbformat": 4,
136 | "nbformat_minor": 4
137 | }
138 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/04_process_analysis_to_studio_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "## Move Analysis from Process Analytics to Studio\n",
9 | "\n",
10 | "\n",
11 | " NOTE: This tutorial is intended for use with Process Analytics analyses. To pull data from Studio Analyses, see our Asset Mover Template\n",
12 | "
"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
22 | "source_api_key = 'API TOKEN HERE'\n",
23 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
24 | "destination_api_key = 'API TOKEN HERE'"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "### Set environment variables for the source team setup"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# DO NOT MODIFY THIS CELL\n",
41 | "import os\n",
42 | "os.environ['TEAM_URL'] = source_team_url\n",
43 | "os.environ['API_TOKEN'] = source_api_key"
44 | ]
45 | },
46 | {
47 | "attachments": {},
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "### Pull analysis from process analytics in source team as an asset\n",
52 | "\n",
53 | "Looking for the analysis ID? Open to the analysis and view the browser URL
(ex. https://team.cluster.celonis.cloud/process-mining/analysis/dd277835-1140-4f30-98ea-1b190bd21cec/)
"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_6b2166e2-0c40-43e2-b3e6-62996c7dae11.yaml\n"
66 | ]
67 | }
68 | ],
69 | "source": [
70 | "!content-cli pull analysis --id 'ANALYSIS ID HERE' --asset"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "### Set environment variables again for the destination team setup\n",
78 | "No need to do this step (re-define environment variables) if source and destination teams are the same"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 4,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "# DO NOT MODIFY THIS CELL\n",
88 | "os.environ['TEAM_URL'] = destination_team_url\n",
89 | "os.environ['API_TOKEN'] = destination_api_key"
90 | ]
91 | },
92 | {
93 | "attachments": {},
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "### Push downloaded analysis file to the studio as asset in the source team\n",
98 | "Hint: Press tab while writing the file name to auto complete\n",
99 | "\n",
100 | "Looking for the package key? Locate the package, open the ... menu, and click Key
"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 5,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "name": "stdout",
110 | "output_type": "stream",
111 | "text": [
112 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: test.RCA\n"
113 | ]
114 | }
115 | ],
116 | "source": [
117 | "!content-cli push asset --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' --package 'PACKAGE KEY TO PUSH TO HERE' #example file: asset_6b2166e2-0c40-43e2-b3e6-62996c7dae11.yaml"
118 | ]
119 | }
120 | ],
121 | "metadata": {
122 | "kernelspec": {
123 | "display_name": "Python 3",
124 | "language": "python",
125 | "name": "python3"
126 | },
127 | "language_info": {
128 | "codemirror_mode": {
129 | "name": "ipython",
130 | "version": 3
131 | },
132 | "file_extension": ".py",
133 | "mimetype": "text/x-python",
134 | "name": "python",
135 | "nbconvert_exporter": "python",
136 | "pygments_lexer": "ipython3",
137 | "version": "3.7.6"
138 | }
139 | },
140 | "nbformat": 4,
141 | "nbformat_minor": 4
142 | }
143 |
--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/utils.py:
--------------------------------------------------------------------------------
1 | from pycelonis import pql
2 |
3 | import datetime
4 | import isoweek
5 | import pandas as pd
6 |
7 | ## Loading Data
8 |
9 |
10 | def get_pql_dataframe(dm, input_columns, input_filter):
11 | """Query input columns with filters from input DM"""
12 | query = pql.PQL()
13 | for col_name, col_pretty_name in input_columns:
14 | query += pql.PQLColumn(col_name, col_pretty_name)
15 | if input_filter != '':
16 | query += pql.PQLFilter(input_filter)
17 | queried_df = dm.get_data_frame(query)
18 | return queried_df
19 |
20 |
21 | def get_subset_df(train_df, subset, subset_col_name):
22 | """Filter df for subset"""
23 | subset_train_df = train_df[train_df[subset_col_name] == subset]
24 | subset_train_df.drop(columns=[subset_col_name], inplace=True)
25 | return subset_train_df
26 |
27 |
28 | ## Pre-processing
29 |
30 |
31 | def fill_empty_dates(df):
32 | """Fill empty weeks of date Df"""
33 | my_date = datetime.datetime.now()
34 | year, week_num, day_of_week = my_date.isocalendar()
35 | d = isoweek.Week(year, week_num - 1).monday()
36 | rng = pd.date_range(df["Date"].min(), d, freq="7D")
37 | df = df.set_index("Date").reindex(rng, fill_value=0).reset_index()
38 | df.rename(columns={"index": "Date"}, inplace=True)
39 | return df
40 |
41 |
42 | def cap_outliers(df, max_outlier_value):
43 | """Clean outliers"""
44 | df.loc[df["Net Order Value"] > max_outlier_value,
45 | "Net Order Value"] = max_outlier_value
46 | return df
47 |
48 |
49 | def adjust_baseline(df, change_date, end_date):
50 | """Calculate baseline avg difference between TS before change_date vs TS between change_date and end_date"""
51 | diff_high_low = (
52 | df.loc[(change_date < df["Date"]) &
53 | (df["Date"] <= end_date), "Net Order Value"].mean() -
54 | df.loc[df["Date"] <= change_date, "Net Order Value"].mean())
55 | # Adjust lower baseline with the above avg difference
56 | df.loc[df["Date"] <= change_date, "Net Order Value"] += diff_high_low
57 | return df
58 |
59 |
60 | ## Model utils
61 |
62 |
63 | def calculate_trend(df, ts_seasonality, center=False):
64 | """Calculate Trend"""
65 | t = df.iloc[:, 1].rolling(window=ts_seasonality, center=center).mean()
66 | return t
67 |
68 |
69 | def combine_ext_data(train_df, ext_data, days_to_shift=None):
70 | """Combine External/GDP data with Y"""
71 | # Add Exo regressors (GDP) to train df
72 | train_df = train_df.set_index("Date")
73 | ext_data["DATE"] = pd.to_datetime(ext_data["DATE"])
74 | ext_data = ext_data.set_index("DATE")
75 | # Optional - Align dates of Industry GDP with Trend
76 | if days_to_shift is not None:
77 | ext_data = ext_data.shift(days_to_shift, freq="D")
78 | # Combine Train Df with GDP
79 | train_df = train_df.combine_first(ext_data)
80 | return train_df
81 |
82 |
83 | def get_trend_and_exo_for_fit(train_df, exo_col_name, trend_col_name,
84 | val_size_perc):
85 | """Create subsets for Trend Fit"""
86 | # Create X set (Exo Regressor)
87 | X = train_df.dropna()[exo_col_name].values
88 | train_size = int(len(X) * (1 - val_size_perc))
89 | X_train = X[:train_size].reshape(-1, 1)
90 | # Create Y set (Trend to fit)
91 | Y_train = train_df.dropna()[trend_col_name].values[:train_size].reshape(
92 | -1, 1)
93 | return X_train, Y_train
94 |
95 |
96 | def fill_seasonality(train_df,
97 | seas_period_days,
98 | seasonality_col_name='Seasonality'):
99 | """Fill empty seasonality dates"""
100 | delta = datetime.timedelta(days=-seas_period_days)
101 | for i in train_df[train_df[seasonality_col_name].isnull() == True].index:
102 | print(i, i + delta)
103 | train_df.loc[i][seasonality_col_name] = train_df.loc[
104 | i + delta][seasonality_col_name]
105 | return train_df
106 |
107 |
108 | ## Exports
109 |
110 |
111 | def prepare_export_df(train_df, output_col_names, y_pred_col_name):
112 | """Reformat results for Export to DM"""
113 | print(output_col_names)
114 | cols_to_load = list(output_col_names)
115 | cols_to_load.remove('index')
116 | print(cols_to_load)
117 | export_df = pd.DataFrame(train_df[cols_to_load])
118 | export_df.reset_index(inplace=True)
119 | export_df.rename(columns=output_col_names, inplace=True)
120 | return export_df
121 |
122 |
123 | def constitute_export_df(all_subset_exports, subset_col_name):
124 | """Create export-version Df from the export-version of subsets"""
125 | export_df = pd.DataFrame()
126 | for key in all_subset_exports:
127 | subset_df = all_subset_exports[key]
128 | subset_df[subset_col_name] = key
129 | export_df = pd.concat([export_df, subset_df], axis=0)
130 | return export_df
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/00_process_analysis_mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "## Process Analytics Analysis Mover using Content CLI\n",
9 | "\n",
10 | "\n",
11 | "### This tutorial shows how to copy an analysis from one team/process analytics workspace to another one\n",
12 | "\n",
13 | "\n",
14 | " NOTE: This tutorial is intended for use with Process Analytics analyses. To pull data from Studio Analyses, see our Asset Mover Template\n",
15 | "
"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 1,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "source_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
25 | "source_api_key = 'API TOKEN HERE'\n",
26 | "destination_team_url = 'INSERT TEAM URL HERE' #ex. https://your_team.celonis.cloud/\n",
27 | "destination_api_key = 'API TOKEN HERE'"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "### Set environment variables for the source team setup"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# DO NOT MODIFY THIS CELL\n",
44 | "import os\n",
45 | "os.environ['TEAM_URL'] = source_team_url\n",
46 | "os.environ['API_TOKEN'] = source_api_key\n"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "### Pull analysis from the source team"
54 | ]
55 | },
56 | {
57 | "attachments": {},
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "Looking for the analysis ID? Open to the analysis and view the browser URL
(ex. https://team.cluster.celonis.cloud/process-mining/analysis/dd277835-1140-4f30-98ea-1b190bd21cec/)
"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 3,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: analysis_6b2166e2-0c40-43e2-b3e6-62996c7dae11.json\n"
74 | ]
75 | }
76 | ],
77 | "source": [
78 | "!content-cli pull analysis --id 'ANALYSIS ID HERE'"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "### Set environment variables again for the destination team setup\n",
86 | "(No need to do this step (re-define environment variables) if source and destination teams are the same)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 4,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "# DO NOT MODIFY THIS CELL\n",
96 | "os.environ['TEAM_URL'] = destination_team_url\n",
97 | "os.environ['API_TOKEN'] = destination_api_key"
98 | ]
99 | },
100 | {
101 | "attachments": {},
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "### Push analysis to the destination team\n",
106 | "\n",
107 | "Hint: Press tab while writing the file name to auto complete\n",
108 | "\n",
109 | "Looking for the workspace id? Open process analytics, select the workspace, and view the browser URL
(ex. https://team.cluster.celonis.cloud/process-mining/ui?workspaces=694c4a6a-af11-4fb1-90e7-ffbc609cce4f)
"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 5,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "\u001b[32minfo\u001b[39m: Analysis was pushed successfully. New ID: 07f700ff-20e3-4dd8-878b-c7fb6319b3b2\n"
122 | ]
123 | }
124 | ],
125 | "source": [
126 | "!content-cli push analysis --workspaceId 'WORKSPACE ID TO PUSH TO HERE' --file 'DOWLOADED FILE FROM THE PREVIOUS STEP HERE' #ex. analysis_6b2166e2-0c40-43e2-b3e6-62996c7dae11.json"
127 | ]
128 | }
129 | ],
130 | "metadata": {
131 | "kernelspec": {
132 | "display_name": "Python 3",
133 | "language": "python",
134 | "name": "python3"
135 | },
136 | "language_info": {
137 | "codemirror_mode": {
138 | "name": "ipython",
139 | "version": 3
140 | },
141 | "file_extension": ".py",
142 | "mimetype": "text/x-python",
143 | "name": "python",
144 | "nbconvert_exporter": "python",
145 | "pygments_lexer": "ipython3",
146 | "version": "3.8.3-final"
147 | }
148 | },
149 | "nbformat": 4,
150 | "nbformat_minor": 4
151 | }
152 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/01_misc/01_use_case_version_control.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Create text-based backups of analyses and transformations (for git)\n",
8 | "\n",
9 | "\n",
10 | "### This script backs up all analyses and transformations into a backup folder, the user only needs to provide a Celonis object and a folder.\n"
11 | ]
12 | },
13 | {
14 | "source": [
15 | "### Do imports, log in to the Celonis instance, create backup folder"
16 | ],
17 | "cell_type": "markdown",
18 | "metadata": {}
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 1,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | "2019-10-08 15:36:00 - Login successful! Hello Simon Riezebos\n"
30 | ]
31 | }
32 | ],
33 | "source": [
34 | "from pycelonis import get_celonis\n",
35 | "import shutil\n",
36 | "from pathlib import Path\n",
37 | "from pycelonis.utils.api_utils import pathify\n",
38 | "\n",
39 | "celonis = get_celonis(read_only=True)\n",
40 | "backup_path = Path('IBC Backup')\n",
41 | "if not backup_path.exists():\n",
42 | " backup_path.mkdir()"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Create or clean analyses backup folder"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "analyses_path = backup_path / \"Analyses\"\n",
59 | "if analyses_path.exists():\n",
60 | " shutil.rmtree(analyses_path)\n",
61 | "analyses_path.mkdir()"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "### Create backups of all analyses that are published in separate workspace directories"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 4,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "for workspace in celonis.workspaces:\n",
78 | " workspace_path = analyses_path / pathify(workspace.name)\n",
79 | " workspace_path.mkdir()\n",
80 | " for a in workspace.analyses:\n",
81 | " if a.data.get('lastPublishedDraftId') is not None:\n",
82 | " a.backup_content(workspace_path)"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "### (Optional) Remove all draft files to only see published changes"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 5,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "for path in analyses_path.rglob('*'):\n",
99 | " if path.name.startswith(\"draft\") and path.suffix == \".json\":\n",
100 | " path.unlink()"
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "### Create or clean transformation backup folder"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "transformation_path = backup_path / \"Transformations\"\n",
117 | "if transformation_path.exists():\n",
118 | " shutil.rmtree(transformation_path)\n",
119 | "transformation_path.mkdir()"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "### Create backups of all transformations in separate Pool and Data Job directories"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 5,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "for pool in celonis.pools:\n",
136 | " pool_path = transformation_path / pathify(pool.name)\n",
137 | " pool_path.mkdir()\n",
138 | " for job in pool.data_jobs:\n",
139 | " job_path = pool_path / pathify(job.name)\n",
140 | " job_path.mkdir()\n",
141 | " for tm in job.transformations:\n",
142 | " try:\n",
143 | " tm.backup_content(job_path)\n",
144 | " except:\n",
145 | " pass"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "### (Optional) Inititate git repo\n",
153 | "Navigate to the back_path on the command line, run:\n",
154 | "```\n",
155 | "git init\n",
156 | "git add .\n",
157 | "git commit -m \"Activating version control\"\n",
158 | "```\n",
159 | "When you re-run this notebook all changes will be easy to inspect, and can be committed again"
160 | ]
161 | }
162 | ],
163 | "metadata": {
164 | "jupytext": {
165 | "formats": "ipynb,py:percent"
166 | },
167 | "kernelspec": {
168 | "display_name": "Python 3",
169 | "language": "python",
170 | "name": "python3"
171 | },
172 | "language_info": {
173 | "codemirror_mode": {
174 | "name": "ipython",
175 | "version": 3
176 | },
177 | "file_extension": ".py",
178 | "mimetype": "text/x-python",
179 | "name": "python",
180 | "nbconvert_exporter": "python",
181 | "pygments_lexer": "ipython3",
182 | "version": "3.6.8"
183 | }
184 | },
185 | "nbformat": 4,
186 | "nbformat_minor": 4
187 | }
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/08_replacer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Replacer\n",
8 | "\n",
9 | "### This tutorial shows how to replace any text in a whole analysis. \n",
10 | "Be careful, only use real key words to replace, otherwise you might replace also words or word parts you did not mean to.\n",
11 | "\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "### First connect to the analysis and indicate what should be replaced\n",
19 | "It is recommended to use the ID of the respective analysis."
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 36,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "name": "stdout",
29 | "output_type": "stream",
30 | "text": [
31 | "2020-02-17 08:20:12 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "import json\n",
37 | "from pycelonis import get_celonis\n",
38 | "\n",
39 | "celonis = get_celonis(\"URL to the team in which you are working.\", \"Specify a valid API key for the cloud team.\")\n",
40 | "analysis = celonis.analyses.find(\"Name or ID of the analysis.\")\n",
41 | "\n",
42 | "# enter as many replacements as you want and separate them with a comma\n",
43 | "replacements ={\n",
44 | " 'old_word_1' : 'new_word_1',\n",
45 | " 'old_word_2' : 'new_word_2'\n",
46 | "}"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "### Get the current version of the published analysis, the drafted one from the edit mode and the saved formulas"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 40,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "num_replacements = 0\n",
63 | "doc_p = analysis.published.data\n",
64 | "doc_d = analysis.draft.data\n",
65 | "kpis = analysis.saved_formulas"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "### Replace in the formulas"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 38,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "Replacements in the formulas: 78\n"
85 | ]
86 | }
87 | ],
88 | "source": [
89 | "for kpi in kpis:\n",
90 | " for key, val in replacements.items():\n",
91 | " num_replacements = num_replacements + kpi.data[\"name\"].count(key)\n",
92 | " num_replacements = num_replacements + kpi.data[\"template\"].count(key)\n",
93 | " num_replacements = num_replacements + kpi.data[\"description\"].count(key)\n",
94 | " \n",
95 | " name = kpi.data[\"name\"].replace(key, val)\n",
96 | " template = kpi.data[\"template\"].replace(key, val)\n",
97 | " description = kpi.data[\"description\"].replace(key, val)\n",
98 | " parameters = kpi.data[\"parameters\"]\n",
99 | " \n",
100 | " kpi.delete()\n",
101 | " analysis.create_saved_formula(name=name, description=description, template=template, parameters=parameters) \n",
102 | " \n",
103 | "print('Replacements in the formulas: ' + str(num_replacements))"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "### Replace in the published and drafted analysis"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 39,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "Overall replacements: 143\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "json_doc_dump_p = json.dumps(doc_p, ensure_ascii=False)\n",
128 | "json_doc_dump_p = json_doc_dump_p.replace(\"â¬\", \"€\")\n",
129 | "\n",
130 | "json_doc_dump_d = json.dumps(doc_d, ensure_ascii=False)\n",
131 | "json_doc_dump_d = json_doc_dump_d.replace(\"â¬\", \"€\")\n",
132 | "\n",
133 | "for key, val in replacements.items():\n",
134 | " num_replacements += json_doc_dump_p.count(key)\n",
135 | " json_doc_dump_p = json_doc_dump_p.replace(key, val)\n",
136 | " json_doc_dump_d = json_doc_dump_d.replace(key, val)\n",
137 | " \n",
138 | "json_doc_dump_p = json_doc_dump_p.replace(\"â¬\", \"€\")\n",
139 | "json_doc_dump_d = json_doc_dump_d.replace(\"â¬\", \"€\")\n",
140 | "\n",
141 | "doc_p = json.loads(json_doc_dump_p)\n",
142 | "doc_d = json.loads(json_doc_dump_d)\n",
143 | "\n",
144 | "analysis.draft.data = doc_d\n",
145 | "analysis.published.data = doc_p\n",
146 | "\n",
147 | "print('Overall replacements: ' + str(num_replacements))"
148 | ]
149 | }
150 | ],
151 | "metadata": {
152 | "jupytext": {
153 | "formats": "ipynb,py:percent"
154 | },
155 | "kernelspec": {
156 | "display_name": "Python 3",
157 | "language": "python",
158 | "name": "python3"
159 | },
160 | "language_info": {
161 | "codemirror_mode": {
162 | "name": "ipython",
163 | "version": 3
164 | },
165 | "file_extension": ".py",
166 | "mimetype": "text/x-python",
167 | "name": "python",
168 | "nbconvert_exporter": "python",
169 | "pygments_lexer": "ipython3",
170 | "version": "3.7.6"
171 | }
172 | },
173 | "nbformat": 4,
174 | "nbformat_minor": 4
175 | }
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/18_EMS_Data_Consumption_Report.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "4090f679",
6 | "metadata": {},
7 | "source": [
8 | "# Pull and analyze the APC consumption report\n",
9 | "\n",
10 | "##### Recommendation\n",
11 | "Use the recent (Oct 2021) EMS feature 'Pipeline Monitoring' to easily and flexibly analyze your APC and even your Job executions within Analyses. Documentation is here: https://help.celonis.cloud/help/display/CIBC/Custom+Data+Pipeline+Monitoring\n",
12 | "##### Purpose of this script\n",
13 | "Allows to analyze the full APC consumption report (https://TEAM.CLUSTER.celonis.cloud/integration/ui/data-consumption) within the MLW or other environments, by pulling it in python. This can serve as a complement to the EMS features as this allows export and APC aggregation by Data Pool.\n",
14 | "\n",
15 | "#### Inputs\n",
16 | "None if run from the MLWB.\n",
17 | "Token if run out of the MLW.\n",
18 | "\n",
19 | "#### Outputs\n",
20 | "Consumption report with used GB per table and data pool:\n",
21 | "* as pandas DataFrame\n",
22 | "* as CSV file\n",
23 | "\n",
24 | "#### Steps\n",
25 | "1. Import and connect\n",
26 | "2. Fetch data\n",
27 | "3. Process date and data volume\n",
28 | "4. Analyze (group by Data Pool)\n",
29 | "5. Export as CSV"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "id": "aa8a37bb",
35 | "metadata": {},
36 | "source": [
37 | "## Import and connect"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "id": "5ed198a9",
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "import pandas as pd\n",
48 | "from pycelonis import get_celonis\n",
49 | "from datetime import datetime as dt\n",
50 | "import time"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "id": "b1c1089b",
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "c = get_celonis()"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "id": "2e46ad2e",
66 | "metadata": {},
67 | "source": [
68 | "## Fetch data"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "id": "e5a274cc",
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "def get_consumption_df(c):\n",
79 | " page = 0\n",
80 | " df=pd.DataFrame()\n",
81 | " \n",
82 | " # Iterate over pages of data consumption\n",
83 | " while True: # while true + if -> break \n",
84 | " url = f\"{c.url}/integration//api/pools/data-consumption/?limit=5000&page={page}&sort=consumptionInBytesZA\"\n",
85 | " consumption_table = c.api_request(url, message = 'None', method = 'GET', get_json = True)\n",
86 | " t_list = consumption_table[\"extendedTableConsumptionTransports\"]\n",
87 | " if len(t_list) == 0:\n",
88 | " # Reached last page: no more data\n",
89 | " break\n",
90 | " df = pd.concat([df,pd.DataFrame(t_list)])\n",
91 | " page += 1\n",
92 | " # Limit api request rate\n",
93 | " time.sleep(1)\n",
94 | " return df\n",
95 | "\n",
96 | "df_consumption_ = get_consumption_df(c)"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "id": "f35588f9",
102 | "metadata": {},
103 | "source": [
104 | "## Transform"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "id": "97d49319",
111 | "metadata": {},
112 | "outputs": [],
113 | "source": [
114 | "df_consumption = df_consumption_.copy()\n",
115 | "df_consumption[\"rawDataSizeGB\"] = df_consumption[\"rawDataSize\"] / (1024**3)\n",
116 | "df_consumption[\"lastUpdateDt\"] = pd.to_datetime(df_consumption[\"lastUpdate\"], unit='ms')\n",
117 | "df_consumption = df_consumption[[\"dataPoolId\", \"dataPoolName\", \"tableName\", \"rawDataSizeGB\", \"lastUpdateDt\"]]\n",
118 | "df_consumption.head() "
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "id": "de1b6e00",
124 | "metadata": {},
125 | "source": [
126 | "## Analyse"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "id": "369e9c6d",
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "df_consumption_summary = df_consumption.groupby([\"dataPoolId\", \"dataPoolName\"]).agg({\"rawDataSizeGB\":sum, \"lastUpdateDt\":min}).reset_index()\n",
137 | "df_consumption_summary = df_consumption_summary.sort_values(\"rawDataSizeGB\", ascending=False)"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "id": "a108f5bb",
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "df_consumption_summary[\"rawDataSizeGB\"].sum()"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "id": "2ea4a47c",
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "df_consumption_summary"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "id": "a9702873",
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "# Details per table\n",
168 | "df_consumption.sort_values(\"rawDataSizeGB\", ascending=False)"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "id": "bd112b97",
174 | "metadata": {},
175 | "source": [
176 | "## Export"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "id": "29d728b3",
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "def to_csv(df, name):\n",
187 | " df.to_csv(f\"{name}_{dt.now().strftime('%Y-%m-%d_%Hh%M')}.csv\", sep=';', decimal=',')"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "id": "ad9f9cd7",
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "to_csv(df_consumption_summary, \"consumption_summary\")\n",
198 | "to_csv(df_consumption, \"consumption_details\")"
199 | ]
200 | }
201 | ],
202 | "metadata": {
203 | "kernelspec": {
204 | "display_name": "Python 3",
205 | "language": "python",
206 | "name": "python3"
207 | },
208 | "language_info": {
209 | "codemirror_mode": {
210 | "name": "ipython",
211 | "version": 3
212 | },
213 | "file_extension": ".py",
214 | "mimetype": "text/x-python",
215 | "name": "python",
216 | "nbconvert_exporter": "python",
217 | "pygments_lexer": "ipython3",
218 | "version": "3.8.8"
219 | }
220 | },
221 | "nbformat": 4,
222 | "nbformat_minor": 5
223 | }
224 |
--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/00_connecting_to_celonis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Connecting to Celonis"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "### The Celonis object\n",
15 | "\n",
16 | "Import the get_celonis function and call it. This will return either an IBC object or a CPM4 object. **Permissions are determined by the App/API key you use to log in!** Set `read_only` to `True` if you want to make sure you can't break anything."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "scrolled": true
24 | },
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "2019-10-08 12:01:08 - Login successful! Hello Simon Riezebos\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "from pycelonis import get_celonis\n",
36 | "\n",
37 | "celonis = get_celonis()"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "### (Optional) specify login details manually\n",
45 | "By default `pycelonis` will get the login information from environment variables. See :meth:`get_celonis` for more details. You can also specify them manually."
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "login = {\n",
55 | " \"celonis_url\": \"demo.eu-1.celonis.cloud\",\n",
56 | " \"api_token\": \"paste_here_your_api_token\",\n",
57 | " #The following 2 lines are only necessary when connecting to CPM4.5, not for IBC:\n",
58 | " #\"api_id\": \"paste_here_your_api_id\", \n",
59 | " #\"username\": \"paste_here_your_username\",\n",
60 | "}\n",
61 | "celonis_manual = get_celonis(**login)"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "### Working with (Celonis) objects in Jupyter Notebook"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "The object returned by `get_celonis` is your portal into celonis, **press tab after `c.`** to see the available methods and attributes."
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "celonis."
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "Objects in celonis can be found using their ID or (substring of) name. **Press shift-tab inside the parentheses** to see the signature and documentation of each function."
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 3,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | ""
103 | ]
104 | },
105 | "execution_count": 3,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "analysis = celonis.analyses.find('117f7528-8504-4450-9fd6-8ebcf1749d18')\n",
112 | "analysis"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "Objects can also be accessed directly with auto-complete using the `.names[]` or `.ids[]` property of a collection."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "celonis.datamodels.names[<-PRESS TAB HERE]"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "### Advanced: access the API data of a Celonis object\n",
136 | "Almost every object that represents an object in Celonis contains a `.data` property that shows the JSON data from the Celonis API. This data is **automatically refreshed** and if **changes are made to this data, `pycelonis` tries to make the same changes in the object in Celonis.**"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 5,
142 | "metadata": {},
143 | "outputs": [
144 | {
145 | "data": {
146 | "text/plain": [
147 | "{'permissions': ['MOVE_TO',\n",
148 | " 'DELETE_WORKSPACE',\n",
149 | " 'CREATE_WORKSPACE',\n",
150 | " 'DELETE_ALL_WORKSPACES',\n",
151 | " 'DELETE_ALL_ANALYSES',\n",
152 | " 'EDIT_ALL_ANALYSES',\n",
153 | " 'EDIT_ALL_WORKSPACES',\n",
154 | " 'USE_ALL_ANALYSES',\n",
155 | " 'CREATE_ANALYSES',\n",
156 | " 'DELETE_ANALYSIS',\n",
157 | " 'EDIT_WORKSPACE',\n",
158 | " 'MANAGE_PERMISSIONS',\n",
159 | " 'EXPORT_CONTENT',\n",
160 | " 'USE_ANALYSIS',\n",
161 | " 'EDIT_ANALYSIS'],\n",
162 | " 'id': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
163 | " 'tenantId': None,\n",
164 | " 'name': 'OTD Prediction Binary - Technical App (OAF) - Copy',\n",
165 | " 'description': None,\n",
166 | " 'deleted': False,\n",
167 | " 'transportId': None,\n",
168 | " 'lastPublishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n",
169 | " 'autoSaveId': '2e47dccc-8cbf-400d-8404-72e1f5298d0d',\n",
170 | " 'processId': 'acb6313c-bba8-46fd-9637-24c7d5463746',\n",
171 | " 'createDate': 1556264369787,\n",
172 | " 'favourite': False,\n",
173 | " 'contentId': None,\n",
174 | " 'contentVersion': 0,\n",
175 | " 'tags': [{'name': 'WillBeDeleted'}],\n",
176 | " 'applicationId': '',\n",
177 | " 'publicLink': False,\n",
178 | " 'lastPublishedDate': 1564498481791,\n",
179 | " 'lastPublishedUser': 'Simon',\n",
180 | " 'objectId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
181 | " 'publishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n",
182 | " 'folderId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
183 | " 'parentObjectId': 'acb6313c-bba8-46fd-9637-24c7d5463746'}"
184 | ]
185 | },
186 | "execution_count": 5,
187 | "metadata": {},
188 | "output_type": "execute_result"
189 | }
190 | ],
191 | "source": [
192 | "analysis.data"
193 | ]
194 | }
195 | ],
196 | "metadata": {
197 | "jupytext": {
198 | "formats": "ipynb,py:percent"
199 | },
200 | "kernelspec": {
201 | "display_name": "Python 3",
202 | "language": "python",
203 | "name": "python3"
204 | },
205 | "language_info": {
206 | "codemirror_mode": {
207 | "name": "ipython",
208 | "version": 3
209 | },
210 | "file_extension": ".py",
211 | "mimetype": "text/x-python",
212 | "name": "python",
213 | "nbconvert_exporter": "python",
214 | "pygments_lexer": "ipython3",
215 | "version": "3.7.3"
216 | }
217 | },
218 | "nbformat": 4,
219 | "nbformat_minor": 4
220 | }
--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/03_Datadog_log_data_extraction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# How to Extract Datadog Log Data Into the IBC"
8 | ]
9 | },
10 | {
11 | "cell_type": "raw",
12 | "metadata": {},
13 | "source": [
14 | "Documentation: https://confluence.celonis.com/pages/viewpage.action?pageId=105841328\n",
15 | "Placeholder for use case-specific info is XXXXX"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "## Set up Required Packages and Settings"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "import pandas as pd\n",
32 | "from pandas.io.json import json_normalize\n",
33 | "from pycelonis import get_celonis\n",
34 | "import requests\n",
35 | "from time import sleep\n",
36 | "\n",
37 | "pd.set_option('max_colwidth', 300)"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "## Define API Requests Parameters"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "baseUrlLogs = 'https://api.datadoghq.com/api/v1/logs-queries/list'\n",
54 | "\n",
55 | "headers = {\n",
56 | " 'content-type': 'application/json',\n",
57 | " 'DD-API-KEY': 'XXXXX',\n",
58 | " 'DD-APPLICATION-KEY': 'XXXXX'\n",
59 | "}\n",
60 | "\n",
61 | "\n",
62 | "startDate = \"XXXXX\" #ISO-8601 string, unix timestamp or relative time (such as \"now-1h\" or \"now-1d\")\n",
63 | "endDate = \"XXXXX\" #ISO-8601 string, unix timestamp or relative time (such as \"now\")\n",
64 | "\n",
65 | " \n",
66 | "bodyLogXXXXX = {\n",
67 | " \"query\": \"XXXXX\", #Datadog log explorer query, e.g.:\"@errorType:(INTERNAL OR EXTERNAL)\"\n",
68 | " \"sort\": \"asc\",\n",
69 | " \"time\": {\n",
70 | " \"from\": startDate,\n",
71 | " \"to\": endDate\n",
72 | " },\n",
73 | " \"limit\": 1,\n",
74 | "}"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "## Define Required Helper Functions"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "#### Get newest log ID"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "def get_newestLogId_logXXXXX():\n",
98 | " print('Getting newest log ID')\n",
99 | " newestLogId = \"\"\n",
100 | " response = requests.post(baseUrlLogs, headers=headers, json=bodyLogXXXXX)\n",
101 | " newestLogId = json_normalize(response.json()['logs'])['id'] \n",
102 | " newestLogId = newestLogId.to_string(index=False)[1:]\n",
103 | " print('Newest log ID retrieved')\n",
104 | " return newestLogId"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "## Fetch Data"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "#### Get most current log ID as starting point for API request"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "nextLogId_logXXXXX = get_newestLogId_logXXXXX()"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "#### Get log data and convert it to dataframe"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "print('Getting log XXXXX')\n",
144 | "logXXXXX = pd.DataFrame()\n",
145 | "while nextLogId_logXXXXX != None:\n",
146 | " response = requests.post(baseUrlLogs, headers=headers, json={\n",
147 | " \"limit\": 1000,\n",
148 | " \"query\": \"XXXXX\",\n",
149 | " \"startAt\": nextLogId_logXXXXX,\n",
150 | " \"sort\": \"asc\",\n",
151 | " \"time\": {\n",
152 | " \"from\": startDate,\n",
153 | " \"to\": endDate\n",
154 | " },\n",
155 | " }) \n",
156 | " data = json_normalize(response.json()['logs'])[['XXXXX'\n",
157 | " , 'XXXXX'\n",
158 | " , ...\n",
159 | " ]]\n",
160 | " \n",
161 | " #additional functionalities to isolate information from message string, set data types, etc. as required\n",
162 | " \n",
163 | " nextLogId_logXXXXX = response.json()['nextLogId']\n",
164 | " logXXXXX = logXXXXX.append(data, ignore_index=True, sort=False)\n",
165 | " sleep(0.1)\n",
166 | "print('Log XXXXX retrieved')"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "## Push Data to the IBC"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "#### Connect to IBC team and identify data pool (here: manually)"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": null,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": [
189 | "login = {\n",
190 | " 'celonis_url': 'XXXXX',\n",
191 | " 'api_token': 'XXXXX',\n",
192 | "}\n",
193 | "celonis_manual = get_celonis(**login)\n",
194 | "\n",
195 | "data_pool = celonis_manual.pools.find('XXXXX')\n",
196 | "data_pool"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "#### Push dataframes into IBC team/data pool"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": null,
209 | "metadata": {},
210 | "outputs": [],
211 | "source": [
212 | "print('Starting to push data to IBC')\n",
213 | "data_pool.push_table(logXXXXX, 'DD_LOG_XXXXX', if_exists = 'upsert', primary_keys = ['id'])\n",
214 | "print('Data push successful')"
215 | ]
216 | }
217 | ],
218 | "metadata": {
219 | "kernelspec": {
220 | "display_name": "Python 3",
221 | "language": "python",
222 | "name": "python3"
223 | },
224 | "language_info": {
225 | "codemirror_mode": {
226 | "name": "ipython",
227 | "version": 3
228 | },
229 | "file_extension": ".py",
230 | "mimetype": "text/x-python",
231 | "name": "python",
232 | "nbconvert_exporter": "python",
233 | "pygments_lexer": "ipython3",
234 | "version": "3.7.6"
235 | }
236 | },
237 | "nbformat": 4,
238 | "nbformat_minor": 4
239 | }
240 |
--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/plot_utils.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from statsmodels.graphics import tsaplots
3 |
4 |
5 | def plot_clean_y(df, train_df, y_max):
6 | """Plot Pre-processed Y"""
7 | fig, ax = plt.subplots(figsize=(20, 10))
8 | plt.plot(df["Date"], df["Net Order Value"], c="c", label="Y Original")
9 | plt.plot(train_df["Date"], train_df["Net Order Value"], c="b", label="Y")
10 | plt.legend(loc="upper right")
11 | plt.axis([min(train_df["Date"]), max(train_df["Date"]), 0, y_max])
12 | plt.show()
13 |
14 |
15 | def plot_gdp(ext_data, col_final):
16 | """Plot resulting Industry GDP"""
17 | fig, ax = plt.subplots(figsize=(20, 10))
18 | plt.plot(ext_data["DATE"], ext_data[col_final], c="b")
19 | plt.show()
20 |
21 |
22 | def plot_y_trend(train_df, t, y_min, y_max):
23 | """Plot Y and Trend"""
24 | fig, ax = plt.subplots(figsize=(20, 10))
25 | plt.plot(train_df["Date"], t, color="b", label="Trend")
26 | plt.plot(train_df["Date"],
27 | train_df["Net Order Value"],
28 | color="g",
29 | label="Y")
30 | plt.legend(loc="upper right")
31 | ax.set_ylim([y_min, y_max])
32 | plt.show()
33 |
34 |
35 | def plot_y_trend_ext(train_df, Y, exo_col_name, exo_pretty_name, y_min, y_max,
36 | y_min_exo, y_max_exo):
37 | """Plot Y, Trend and Exo Regressors"""
38 | fig, ax = plt.subplots(figsize=(20, 10))
39 | ax2 = ax.twinx()
40 | # Net Order Value
41 | ax.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
42 | # External data/GDP
43 | ax2.plot(train_df.index,
44 | train_df[exo_col_name],
45 | color="c",
46 | label=exo_pretty_name)
47 | # Trend
48 | ax.plot(train_df.dropna().index[:len(Y)], Y, color="b", label="Trend")
49 | plt.legend(loc="upper right")
50 | ax.set_ylim([y_min, y_max])
51 | ax2.set_ylim([y_min_exo, y_max_exo])
52 | plt.show()
53 |
54 |
55 | def plot_y_pred_trend_ext(train_df, exo_col_name, X, Y, X_F, y_min, y_max,
56 | y_min_exo, y_max_exo):
57 | """Plot Predicted Y, Trend and Exo Regressors"""
58 | fig, ax = plt.subplots(figsize=(20, 10))
59 | ax2 = ax.twinx()
60 | # External Data/GDP
61 | ax2.plot(train_df[exo_col_name].dropna().index,
62 | train_df[exo_col_name].dropna(),
63 | color="m",
64 | label="External data (Full)")
65 | ax2.plot(train_df.dropna().index[:len(X)],
66 | X,
67 | color="c",
68 | label="External data (Train for Trend fit)")
69 | # Trend
70 | ax.plot(train_df.dropna().index[:len(Y)],
71 | Y,
72 | color="b",
73 | label="Trend (Train for Trend fit)")
74 | # Predicted Trend (through Reg)
75 | ax.plot(train_df[exo_col_name].dropna().index,
76 | train_df["Predicted Trend"][-len(X_F):],
77 | color="g",
78 | label="Trend (Predicted)")
79 | ax.legend(loc="upper right")
80 | ax2.legend(loc="lower right")
81 | ax.set_ylim([y_min, y_max])
82 | ax2.set_ylim([y_min_exo, y_max_exo])
83 | plt.show()
84 |
85 |
86 | def plot_y_t_s(train_df, trend_col_name, seasonality_col_name):
87 | """Plot Y, T and S"""
88 | fig, ax = plt.subplots(figsize=(20, 10))
89 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
90 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
91 | plt.plot(train_df.index,
92 | train_df[trend_col_name] + train_df[seasonality_col_name],
93 | color="m",
94 | label="T+S")
95 | plt.legend(loc="upper right")
96 | plt.show()
97 |
98 |
99 | def plot_y_t_s_with_pred(train_df, trend_col_name, seasonality_col_name,
100 | pred_trend_col_name):
101 | """Plot Y, T, S and Predicted T + S"""
102 | fig, ax = plt.subplots(figsize=(20, 10))
103 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
104 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
105 | plt.plot(train_df.index,
106 | train_df[pred_trend_col_name],
107 | color="c",
108 | label="T Pred")
109 |
110 | plt.plot(train_df.index,
111 | train_df[trend_col_name] + train_df[seasonality_col_name],
112 | color="m",
113 | label="T+S")
114 | plt.plot(train_df.index,
115 | train_df[pred_trend_col_name] + train_df[seasonality_col_name],
116 | color="r",
117 | label="T Pred + S")
118 | plt.legend(loc="upper right")
119 | plt.show()
120 |
121 |
122 | def plot_r(train_df, r_col_name):
123 | """Plot Residuals"""
124 | fig, ax = plt.subplots(figsize=(20, 10))
125 | plt.plot(train_df.index, train_df[r_col_name], color="y", label="R")
126 | plt.legend(loc="upper right")
127 | plt.show()
128 |
129 |
130 | def plot_acf_pacf_r(r, lags):
131 | """Plot ACF and PACF plots for Residuals"""
132 | fig, ax = plt.subplots(2, 1, figsize=(20, 10))
133 | fig = tsaplots.plot_acf(r.dropna(), lags=lags, ax=ax[0])
134 | fig = tsaplots.plot_pacf(r.dropna(), lags=lags, ax=ax[1])
135 | plt.show()
136 |
137 |
138 | def plot_results(results):
139 | """Plot all Residual sets (train, Val and Forecast)"""
140 | fig, ax = plt.subplots(figsize=(20, 10))
141 | results["Date"] = results["Date"].astype(str)
142 | plt.plot(results["Date"], results["Predicted Net Order Value"], c="b")
143 | plt.plot(results["Date"], results["Actual Net Order Value"], c="r")
144 | plt.fill_between(results["Date"],
145 | results["Conf_lower"],
146 | results["Conf_Upper"],
147 | color="k",
148 | alpha=0.15)
149 | for i, tick in enumerate(ax.get_xticklabels()):
150 | tick.set_rotation(45)
151 | tick.set_visible(False)
152 | if i % 3 == 0:
153 | tick.set_visible(True)
154 | plt.show()
155 |
156 |
157 | def plot_final(train_df, trend_col_name, seasonality_col_name, r_col_name,
158 | trend_pred_col_name, y_pred_col_name, class_col_name):
159 | """Plot Y, T, S, R and Predicted Y with intermediary components"""
160 | fig, ax = plt.subplots(figsize=(20, 10))
161 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
162 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
163 | plt.plot(train_df.index,
164 | train_df[trend_col_name] + train_df[seasonality_col_name],
165 | color="m",
166 | label="T+S")
167 | # Seasonality
168 | plt.plot(train_df.index,
169 | train_df[seasonality_col_name],
170 | color="m",
171 | label="S")
172 | # Predicted Trend
173 | plt.plot(train_df.index,
174 | train_df[trend_pred_col_name],
175 | color="y",
176 | label="T Pred")
177 | plt.plot(train_df.index,
178 | train_df[trend_pred_col_name] + train_df[seasonality_col_name],
179 | color="k",
180 | label="T Pred + S")
181 | # Predicted Y on Validation part
182 | plt.plot(
183 | train_df[train_df[class_col_name] == "test"].index,
184 | train_df[train_df[class_col_name] == "test"][y_pred_col_name],
185 | color="c",
186 | label="Y Pred (val)",
187 | )
188 | # Predicted Y on Future part
189 | plt.plot(
190 | train_df[train_df[class_col_name] == "forecast"].index,
191 | train_df[train_df[class_col_name] == "forecast"][y_pred_col_name],
192 | color="r",
193 | label="Y Pred (future)",
194 | )
195 | plt.legend(loc="upper right")
196 | plt.show()
197 |
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/02b_Transformation_Mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Transformation (DataJob) Mover"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "**This tutorial shows how to copy transformations from one team/ data pool to another one, independent of the cluster.**\n",
15 | "\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "**To do so we first need to connect to the source data job.**"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "from pycelonis import get_celonis\n",
32 | "\n",
33 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
34 | "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n",
35 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "**In the next step we connect to the target data job.**"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
52 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
53 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "global_vars = {}\n",
70 | "for source_var in source_data_pool.variables: # loop through global parameters of source data pool\n",
71 | " global_vars.update({source_var.id: None}) # save ID of source global parameter\n",
72 | " for target_var in target_data_pool.variables: # loop through the global parameters of target data pool\n",
73 | " if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper(): # if the placeholder of a source global parameter and a target global parameter match\n",
74 | " global_vars.update({source_var.id: target_var.id}) # match the saved ID of source global parameter wih the target global parameter ID\n",
75 | "print(\"Global parameter configurations saved.\")"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "**This section serves to create the target transformation, the related transformation parameters and to copy over the template settings.**"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "for source_transformation in source_data_job.transformations.data: # loop through source transformations\n",
92 | " \n",
93 | " if source_transformation.statement is None: # if the source transformation is empty, it will not be created\n",
94 | " continue\n",
95 | " \n",
96 | " # copy transformation from source to target data job:\n",
97 | " target_transformation = target_data_job.create_transformation(name=source_transformation.name, description=source_transformation.data['description'], statement=source_transformation.statement)\n",
98 | " print(\"Transformation: '\" + target_transformation.name + \"' created.\")\n",
99 | " \n",
100 | " for source_local_var in source_transformation.variables: # loop through the source transformation parameters\n",
101 | " if source_local_var['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n",
102 | " if source_local_var['defaultSettings']['poolVariableId'] is not None: \n",
103 | " target_id = global_vars.get(source_local_var['defaultSettings']['poolVariableId'])\n",
104 | " if target_id is None:\n",
105 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
106 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
107 | " target_id = para.id\n",
108 | " global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n",
109 | " source_local_var['defaultSettings']['poolVariableId'] = target_id\n",
110 | " if source_local_var['settings'] is not None: # create the connection for the value to the target global parameters \n",
111 | " if source_local_var['settings']['poolVariableId'] is not None:\n",
112 | " target_id = global_vars.get(source_local_var['settings']['poolVariableId'])\n",
113 | " if target_id is None:\n",
114 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
115 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
116 | " target_id = para.id\n",
117 | " global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n",
118 | " source_local_var['settings']['poolVariableId'] = target_id\n",
119 | "\n",
120 | " target_transformation.create_transformation_parameter(source_local_var) # create the target transformation parameter\n",
121 | " print(\"Parameter '\" + source_local_var['name'] + \"' created.\")\n",
122 | " \n",
123 | " if(source_transformation.data['template']): # copy template settings to target transformation\n",
124 | " target_transformation.to_template(source_transformation.data['protectionStatus'])\n",
125 | "\n",
126 | "print(\"Congrats you copied the data job \"+ source_data_job.name + \"!\")"
127 | ]
128 | }
129 | ],
130 | "metadata": {
131 | "jupytext": {
132 | "formats": "ipynb,py:percent"
133 | },
134 | "kernelspec": {
135 | "display_name": "Python 3",
136 | "language": "python",
137 | "name": "python3"
138 | },
139 | "language_info": {
140 | "codemirror_mode": {
141 | "name": "ipython",
142 | "version": 3
143 | },
144 | "file_extension": ".py",
145 | "mimetype": "text/x-python",
146 | "name": "python",
147 | "nbconvert_exporter": "python",
148 | "pygments_lexer": "ipython3",
149 | "version": "3.8.8"
150 | }
151 | },
152 | "nbformat": 4,
153 | "nbformat_minor": 4
154 | }
155 |
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/03_Data_Model_Mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Data Model Mover"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "**This tutorial shows how to copy a data model from one team/ data pool to another one, independent of the cluster.**\n",
15 | "\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "**To do so we first need to connect to the source data model.**"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 32,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "2020-02-13 15:59:50 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "from pycelonis import get_celonis\n",
40 | "\n",
41 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
42 | "source_data_model = c_source.datamodels.find(\"ID of the source data model. It can be copied from the URL.\") "
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "**In the next step we connect to the target data pool.**"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 39,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "2020-02-13 16:05:17 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
67 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
68 | "data_source_name = \"Name of the data connection in the target data pool the target data model should refer to. Indicate an empty string to point to the global scope.\""
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "**Create the data model and add the tables.**"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 40,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "if source_data_model.name in target_data_pool.datamodels.names:\n",
85 | " print('A data model with the same name does already exist in the target data pool. Please rename one.')\n",
86 | "else: \n",
87 | " target_data_model = target_data_pool.create_datamodel(source_data_model.name) # create target data model\n",
88 | "\n",
89 | " connection = target_data_pool.data_connections.names[data_source_name] if data_source_name else None # choose the connection for the data model \n",
90 | "\n",
91 | " target_data_model.add_tables_from_pool(source_data_model.tables, connection) # add the tables from the connection to the data model"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "**Copy Activity & Case table settings. If you copy to a data pool that does not contain the case and activity table in the specified connection, this step cannot be performed. Just continue with the next one.**"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 41,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "data": {
108 | "text/plain": [
109 | "[,]"
110 | ]
111 | },
112 | "execution_count": 41,
113 | "metadata": {},
114 | "output_type": "execute_result"
115 | }
116 | ],
117 | "source": [
118 | "target_data_model.create_process_configuration(\n",
119 | " activity_table=source_data_model.process_configurations[0].activity_table.data[\"name\"] if source_data_model.process_configurations[0].activity_table else None,\n",
120 | " case_table=source_data_model.process_configurations[0].case_table.data[\"name\"] if source_data_model.process_configurations[0].case_table else None,\n",
121 | " case_column=source_data_model.process_configurations[0].case_column if source_data_model.process_configurations[0].activity_table else None,\n",
122 | " activity_column=source_data_model.process_configurations[0].activity_column if source_data_model.process_configurations[0].activity_table else None,\n",
123 | " timestamp_column=source_data_model.process_configurations[0].timestamp_column if source_data_model.process_configurations[0].activity_table else None,\n",
124 | " sorting_column=source_data_model.process_configurations[0].sorting_column if source_data_model.process_configurations[0].activity_table else None)"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "**Add forgein key relationships.**"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 42,
137 | "metadata": {},
138 | "outputs": [
139 | {
140 | "name": "stdout",
141 | "output_type": "stream",
142 | "text": [
143 | "Relationships copied.\n"
144 | ]
145 | }
146 | ],
147 | "source": [
148 | "for fk in source_data_model.foreign_keys:\n",
149 | " target_data_model.create_foreign_key(fk[\"source_table\"], fk[\"target_table\"], fk[\"columns\"], from_scratch=True)\n",
150 | "print('Relationships copied.')"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "**Add table aliases.**"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 38,
163 | "metadata": {},
164 | "outputs": [
165 | {
166 | "name": "stderr",
167 | "output_type": "stream",
168 | "text": [
169 | "WARNING:pycelonis:More things might have changed than requested\n",
170 | "WARNING:pycelonis:More things might have changed than requested\n",
171 | "WARNING:pycelonis:More things might have changed than requested\n",
172 | "WARNING:pycelonis:More things might have changed than requested\n"
173 | ]
174 | },
175 | {
176 | "name": "stdout",
177 | "output_type": "stream",
178 | "text": [
179 | "Please ignore the warnings. Congrats you copied the data model 'SAP ECC - Accounts Payable Data Model'!\n"
180 | ]
181 | }
182 | ],
183 | "source": [
184 | "for t in source_data_model.tables:\n",
185 | " if t.alias == t.name and isinstance(source_data_model, pycelonis.objects_ibc.Datamodel):\n",
186 | " target_t = target_data_model.tables.find(t.data[\"name\"])\n",
187 | " try:\n",
188 | " target_t.alias = t.name\n",
189 | " except ValueError:\n",
190 | " pass\n",
191 | " if t.alias != t.name and isinstance(source_data_model, pycelonis.objects_cpm4.Datamodel):\n",
192 | " target_t = target_data_model.tables.find(t.data[\"name\"])\n",
193 | " try:\n",
194 | " target_t.alias = t.alias\n",
195 | " except ValueError:\n",
196 | " pass\n",
197 | "print(\"Please ignore the warnings. Congrats you copied the data model '\"+ target_data_model.name + \"'!\")"
198 | ]
199 | }
200 | ],
201 | "metadata": {
202 | "jupytext": {
203 | "formats": "ipynb,py:percent"
204 | },
205 | "kernelspec": {
206 | "display_name": "Python 3",
207 | "language": "python",
208 | "name": "python3"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.8.8"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 4
225 | }
226 |
--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/03_pushing_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Pushing Data\n",
8 | "\n",
9 | "### This tutorial shows how data can be pushed from Python to Celonis. The data is pushed to a Celonis Data Pool or Data Model and is ready to use within IBC.\n",
10 | "In this to Tutorial we will:\n",
11 | "1. Connect to Celonis\n",
12 | "2. Prepare the data that needs to be pushed into a dataframe.\n",
13 | "3. Push the data into Celonis\n",
14 | " 1. Push data to datapool\n",
15 | " 2. Push data directly to a specific datamodel in the datapool"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "### Connect to Celonis"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "2020-10-23 11:50:39 - pycelonis: Login successful! Hello Noor\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "from pycelonis import get_celonis\n",
40 | "celonis = get_celonis()"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "### Prepare the data that needs to be pushed to celonis into a dataframe\n",
48 | "\n",
49 | "For the sake of this demo, we will create a dummy dataframe, but you can put any data in the dataframe as you like"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 2,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/html": [
60 | "\n",
61 | "\n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " | \n",
78 | " A | \n",
79 | " B | \n",
80 | " C | \n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " \n",
85 | " | 0 | \n",
86 | " 2 | \n",
87 | " 2 | \n",
88 | " 10 | \n",
89 | "
\n",
90 | " \n",
91 | " | 1 | \n",
92 | " 4 | \n",
93 | " 0 | \n",
94 | " 2 | \n",
95 | "
\n",
96 | " \n",
97 | " | 2 | \n",
98 | " 8 | \n",
99 | " 0 | \n",
100 | " 1 | \n",
101 | "
\n",
102 | " \n",
103 | " | 3 | \n",
104 | " 0 | \n",
105 | " 0 | \n",
106 | " 8 | \n",
107 | "
\n",
108 | " \n",
109 | "
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " A B C\n",
114 | "0 2 2 10\n",
115 | "1 4 0 2\n",
116 | "2 8 0 1\n",
117 | "3 0 0 8"
118 | ]
119 | },
120 | "execution_count": 2,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "import pandas as pd\n",
127 | "\n",
128 | "df = pd.DataFrame({'A': [2, 4, 8, 0], 'B': [2, 0, 0, 0], 'C': [10, 2, 1, 8]})\n",
129 | "df.head()"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "### Push the data from the dataframe into a table in Celonis\n"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "#### A. Push data to the datapool\n",
144 | "\n",
145 | "##### Find the datapool\n"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 3,
151 | "metadata": {},
152 | "outputs": [],
153 | "source": [
154 | "data_pool = celonis.pools.find(\"id_or_name_of_data_pool\")"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "##### If you are unsure about the name/id of your data pool you can list all the datapools available to you"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "celonis.pools"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "Now we push the data frame to the data pool. For this, we use the push_table() function, which has the following properties:\n",
178 | "* df_or_path: Either the pandas data frame or the path to a data frame that should be pushed to Celonis\n",
179 | "* table_name: The name that this data frame should have in the data pool\n",
180 | "* if_exists: Specifies what happens when the table already exists in the data pool. The options are 'replace', 'append', 'upsert', 'error'\n",
181 | "\n",
182 | "The additional parameters (like the setting of primary keys and column types) can be checked by pressing SHIFT + TAB, while the curser is in the function push_table()"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "data_pool.push_table(df,\"table_name\", if_exists = 'replace')"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {},
197 | "source": [
198 | "The table is now in the data pool and can be added to any data model in that pool."
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "### B. Push data directly to the datamodel\n",
206 | "We could also directly push the table to the data model. We would first need to find the data model.\n",
207 | "##### Find the datamodel"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 5,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "data_model = celonis.datamodels.find(\"datamodel id/name\")"
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "metadata": {},
222 | "source": [
223 | "##### If you are unsure about the name/id of your data pool you can list all the datamodels available to you"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": 5,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "celonis.datamodels"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "##### Push the dataframe as table in the datamodel\n",
240 | "\n",
241 | "Use the push_table() function again. In this example we will replace the table if it already exist, however we can also use the options: append and upsert as stated earlier.\n",
242 | "\n",
243 | "Here we have additionally the option to reload the datamodel. In this example we choose to set reload_datamodel as False. In this case, the changes will be effective with the next scheduled or manual datamodel reload."
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 6,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "data": {
253 | "text/plain": [
254 | ""
255 | ]
256 | },
257 | "execution_count": 6,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "data_model.push_table(df,\"table_name\",reload_datamodel=False, if_exists = 'replace')"
264 | ]
265 | }
266 | ],
267 | "metadata": {
268 | "jupytext": {
269 | "formats": "ipynb,py:percent"
270 | },
271 | "kernelspec": {
272 | "display_name": "Python 3",
273 | "language": "python",
274 | "name": "python3"
275 | },
276 | "language_info": {
277 | "codemirror_mode": {
278 | "name": "ipython",
279 | "version": 3
280 | },
281 | "file_extension": ".py",
282 | "mimetype": "text/x-python",
283 | "name": "python",
284 | "nbconvert_exporter": "python",
285 | "pygments_lexer": "ipython3",
286 | "version": "3.7.4"
287 | }
288 | },
289 | "nbformat": 4,
290 | "nbformat_minor": 4
291 | }
292 |
--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/KPI_Mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "**Import packages and log in**"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import csv\n",
17 | "import os\n",
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "import copy\n",
21 | "import sys\n",
22 | "import yaml\n",
23 | "import re\n",
24 | "from collections import OrderedDict\n",
25 | "from pycelonis import get_celonis, pql\n",
26 | "from pycelonis.pql import PQL, PQLColumn\n",
27 | "from pycelonis.utils import parquet_utils as pu\n",
28 | "\n",
29 | "login = {\n",
30 | " \"celonis_url\": \"\",\n",
31 | " \"api_token\": \"\",\n",
32 | " }\n",
33 | "celonis = get_celonis(**login)"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "package = celonis.packages.find('31af4c4d-4ddd-40ae-97a6-9d1146345e6f')\n",
43 | "source_analysis = package.analyses.find('0c191ff3-5ef8-47c9-92dd-f5170e342f2a')"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "class quoted(str):\n",
53 | " pass\n",
54 | "\n",
55 | "def quoted_presenter(dumper, data):\n",
56 | " return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='\"')\n",
57 | " \n",
58 | "yaml.add_representer(quoted, quoted_presenter)\n",
59 | "\n",
60 | "class literal(str):\n",
61 | " pass\n",
62 | "\n",
63 | "def literal_presenter(dumper, data):\n",
64 | " return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')\n",
65 | " \n",
66 | "yaml.add_representer(literal, literal_presenter)\n",
67 | "\n",
68 | "def ordered_dict_presenter(dumper, data):\n",
69 | " return dumper.represent_dict(data.items())\n",
70 | "\n",
71 | "yaml.add_representer(OrderedDict, ordered_dict_presenter)\n",
72 | "\n",
73 | "\n",
74 | "def add_parameters(data, km_kpi):\n",
75 | " if len(data['parameters']) != 0:\n",
76 | " km_parameters = []\n",
77 | " for parameter, i in zip(data['parameters'], range(len(data['parameters']))):\n",
78 | " km_parameter = {\n",
79 | " 'id': 'p'+str(i+1),\n",
80 | " 'displayName': parameter['name'],\n",
81 | " 'defaultValue': 0\n",
82 | " }\n",
83 | " km_parameters.append(km_parameter)\n",
84 | " km_kpi['parameters'] = km_parameters\n",
85 | " return km_kpi\n",
86 | "\n",
87 | "def clean_formula_names(saved_formulas):\n",
88 | " to_return = []\n",
89 | " for data in saved_formulas:\n",
90 | " data['clean_name'] = data['name'].replace(')', '').replace('(', '')\n",
91 | " data['clean_name'] = re.sub(\"[^0-9a-zA-Z]+\", \"_\", data['clean_name'])\n",
92 | " to_return.append(data)\n",
93 | " return to_return\n",
94 | "\n",
95 | "def clean_template(clean_formulas):\n",
96 | " to_return = []\n",
97 | " for data in clean_formulas:\n",
98 | " tmp_template = data['template']\n",
99 | " for tmp_data in clean_formulas:\n",
100 | " name = tmp_data['name']\n",
101 | " clean_name = tmp_data['clean_name']\n",
102 | " tmp_template = tmp_template.replace(name, clean_name)\n",
103 | " data['clean_template'] = tmp_template\n",
104 | " to_return.append(data)\n",
105 | " return clean_formulas\n",
106 | " \n",
107 | "def clean_variables(var_list):\n",
108 | " to_return = []\n",
109 | " for y in var_list:\n",
110 | " tmp_var_query = y['value']\n",
111 | " for x in var_list: \n",
112 | " full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n",
113 | " tmp_var_query = tmp_var_query.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n",
114 | " var_data = {'id': y['name'],\n",
115 | " 'displayName': y['name'].replace('_', ' '),\n",
116 | " 'description':\"\",\n",
117 | " 'value': tmp_var_query\n",
118 | " }\n",
119 | " to_return.append(var_data)\n",
120 | " return to_return\n",
121 | "\n",
122 | "def replace_variables(clean_formulas, var_list):\n",
123 | " to_return = []\n",
124 | " for data in clean_formulas:\n",
125 | " tmp_data = copy.copy(data['clean_template'])\n",
126 | " for x in var_list:\n",
127 | " full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n",
128 | " tmp_data = tmp_data.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n",
129 | " data['clean_template'] = tmp_data\n",
130 | " to_return.append(data)\n",
131 | " return to_return\n",
132 | " \n",
133 | "def saved_formulas_to_yaml(source_analysis):\n",
134 | " \"\"\"Given a Celonis Analysis object, saves a 'data.yml' file \"\"\"\n",
135 | " \n",
136 | " saved_formulas = copy.copy(source_analysis.saved_formulas)\n",
137 | " var_list = copy.copy(source_analysis.analysis.draft.variables)\n",
138 | " km_kpis = []\n",
139 | " list_of_data = [formula.data for formula in saved_formulas] \n",
140 | " \n",
141 | " clean_data = clean_formula_names(list_of_data)\n",
142 | " clean_data = clean_template(clean_data)\n",
143 | " clean_data = replace_variables(clean_data, var_list)\n",
144 | " for data in clean_data:\n",
145 | " description = data['description'].replace('\\'', '').replace('\"', '')\n",
146 | " pql = '\\n'+data['clean_template'].replace('.id', '.\"id\"').replace('.Id', '.\"Id\"').replace('.ID', '.\"ID\"')\n",
147 | " km_kpi = OrderedDict(id=data['clean_name'],\n",
148 | " displayName=data['name'].replace('_', ' '),\n",
149 | " description=quoted(description),\n",
150 | " pql=literal(pql),\n",
151 | " )\n",
152 | " km_kpi = add_parameters(data, km_kpi)\n",
153 | " km_kpis.append(km_kpi)\n",
154 | " \n",
155 | " km_kpis = {'kpis':km_kpis}\n",
156 | " \n",
157 | " with open('kpis.yml', 'w') as outfile:\n",
158 | " yaml.dump(km_kpis, outfile, sort_keys=False)\n",
159 | " return\n",
160 | "\n",
161 | "def variables_to_yaml(source_analysis):\n",
162 | " var_list = copy.copy(source_analysis.analysis.draft.variables)\n",
163 | " var_list = clean_variables(var_list)\n",
164 | " km_vars = []\n",
165 | " for var in var_list:\n",
166 | " km_var = OrderedDict(id=var['id'],\n",
167 | " displayName=var['displayName'],\n",
168 | " description=var['description'],\n",
169 | " value=literal(var['value'].replace('\"', '') )\n",
170 | " )\n",
171 | " km_vars.append(km_var)\n",
172 | " km_vars = {'variables':km_vars}\n",
173 | " with open('variables.yml', 'w') as outfile:\n",
174 | " yaml.dump(km_vars, outfile, sort_keys=False)\n",
175 | " return"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "saved_formulas_to_yaml(source_analysis)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "variables_to_yaml(source_analysis)"
194 | ]
195 | }
196 | ],
197 | "metadata": {
198 | "kernelspec": {
199 | "display_name": "Python 3",
200 | "language": "python",
201 | "name": "python3"
202 | },
203 | "language_info": {
204 | "codemirror_mode": {
205 | "name": "ipython",
206 | "version": 3
207 | },
208 | "file_extension": ".py",
209 | "mimetype": "text/x-python",
210 | "name": "python",
211 | "nbconvert_exporter": "python",
212 | "pygments_lexer": "ipython3",
213 | "version": "3.7.6"
214 | }
215 | },
216 | "nbformat": 4,
217 | "nbformat_minor": 4
218 | }
219 |
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/11_Extraction_Unifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Extraction Unifier"
8 | ]
9 | },
10 | {
11 | "cell_type": "raw",
12 | "metadata": {},
13 | "source": [
14 | "Created by: e.vogt@celonis.com\n",
15 | "Uploaded on: 08.07.2020"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "**This tutorial shows how to copy multiple separate extraction jobs into one extraction.**"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "**To do so we first need to connect to the source data model.**"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "2020-04-02 08:25:56 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "from pycelonis import get_celonis\n",
47 | "\n",
48 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
49 | "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n",
50 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "for source_extraction in source_data_job.extractions.data: \n",
60 | " print(source_extraction.name)"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "**In the next step we connect to the target data pool.**"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 4,
73 | "metadata": {},
74 | "outputs": [
75 | {
76 | "name": "stdout",
77 | "output_type": "stream",
78 | "text": [
79 | "2020-04-02 08:29:22 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n"
80 | ]
81 | }
82 | ],
83 | "source": [
84 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
85 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
86 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")\n",
87 | "target_extraction = target_data_job.extractions.ids[\"Name or ID of the target data extraction.\"]"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "**Create target extraction.**"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 6,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "Congrats you copied the extraction test_unified!\n",
107 | "Congrats you copied the extraction test_unified!\n",
108 | "Congrats you copied the extraction test_unified!\n",
109 | "Congrats you copied the extraction test_unified!\n",
110 | "Parameter 'Maximal Activity End Date' created.\n",
111 | "Parameter 'Maximal Activity Start Date' created.\n",
112 | "Congrats you copied the extraction test_unified!\n",
113 | "Congrats you copied the extraction test_unified!\n",
114 | "Congrats you copied the extraction test_unified!\n",
115 | "Congrats you copied the extraction test_unified!\n",
116 | "Congrats you copied the extraction test_unified!\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "for source_extraction in source_data_job.extractions.data: # loop through source extractions\n",
122 | "\n",
123 | " for source_ex_para in source_extraction.variables: # loop through the source extraction parameters\n",
124 | " if source_ex_para['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n",
125 | " if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n",
126 | " target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId']) \n",
127 | " if target_id is None:\n",
128 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
129 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
130 | " target_id = para.id\n",
131 | " global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n",
132 | " source_ex_para['defaultSettings']['poolVariableId'] = target_id\n",
133 | " if source_ex_para['settings'] is not None: # create the connection for the value to the target global parameters\n",
134 | " if source_ex_para['settings']['poolVariableId'] is not None:\n",
135 | " target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n",
136 | " if target_id is None:\n",
137 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
138 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
139 | " target_id = para.id\n",
140 | " global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n",
141 | " source_ex_para['settings']['poolVariableId'] = target_id\n",
142 | "\n",
143 | " target_extraction.create_extraction_parameter(source_ex_para) # create the target transformation parameter\n",
144 | " print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")\n",
145 | " \n",
146 | " for table in source_extraction.tables.data:\n",
147 | "\n",
148 | " if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None or table.data['changeDateOffsetParameter'] is not None:\n",
149 | "\n",
150 | " temp = table.data.copy()\n",
151 | "\n",
152 | " for para in source_extraction.variables:\n",
153 | " if para['id'] == table.data['creationDateParameterStart']:\n",
154 | " placeholder_start = para['placeholder']\n",
155 | " if para['id'] == table.data['creationDateParameterEnd']:\n",
156 | " placeholder_end = para['placeholder']\n",
157 | " if para['id'] == table.data['changeDateOffsetParameter']:\n",
158 | " placeholder_change = para['placeholder']\n",
159 | "\n",
160 | " for para in target_extraction.variables:\n",
161 | " if para['placeholder'] == placeholder_start:\n",
162 | " temp['creationDateParameterStart'] = para['id']\n",
163 | " if para['placeholder'] == placeholder_end:\n",
164 | " temp['creationDateParameterEnd'] = para['id']\n",
165 | " if para['placeholder'] == placeholder_change:\n",
166 | " temp['changeDateOffsetParameter'] = para['id']\n",
167 | "\n",
168 | " table=temp \n",
169 | "\n",
170 | " try:\n",
171 | " target_extraction.add_table(table=table)\n",
172 | " except:\n",
173 | " print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n",
174 | "\n",
175 | " print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\") "
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.7.6"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 4
207 | }
208 |
--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/02_pulling_data_from_datamodel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Pulling data from a Data Model"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from pycelonis import get_celonis\n",
17 | "from pycelonis.pql import PQL, PQLColumn, PQLFilter\n",
18 | "import pandas as pd"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "### Connect to Celonis"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "2020-03-02 13:00:58 - pycelonis: Login successful! Hello Dimitris\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "celonis = get_celonis(\"api token\")"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Select Datamodel"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 5,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "datamodel = celonis.datamodels.find('datamodel id/name')"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "This is the Table in our Datamodel. It has 2 Columns with the names \"A\" and \"B\"."
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 3,
71 | "metadata": {},
72 | "outputs": [
73 | {
74 | "data": {
75 | "text/html": [
76 | "\n",
77 | "\n",
90 | "
\n",
91 | " \n",
92 | " \n",
93 | " | \n",
94 | " A | \n",
95 | " B | \n",
96 | "
\n",
97 | " \n",
98 | " \n",
99 | " \n",
100 | " | 0 | \n",
101 | " 1 | \n",
102 | " 3 | \n",
103 | "
\n",
104 | " \n",
105 | " | 1 | \n",
106 | " 2 | \n",
107 | " 4 | \n",
108 | "
\n",
109 | " \n",
110 | " | 2 | \n",
111 | " 5 | \n",
112 | " 6 | \n",
113 | "
\n",
114 | " \n",
115 | "
\n",
116 | "
"
117 | ],
118 | "text/plain": [
119 | " A B\n",
120 | "0 1 3\n",
121 | "1 2 4\n",
122 | "2 5 6"
123 | ]
124 | },
125 | "execution_count": 3,
126 | "metadata": {},
127 | "output_type": "execute_result"
128 | }
129 | ],
130 | "source": []
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "### Define the query you want to pull\n",
137 | "Example 1: pull a specific Column. We select Column \"A\" from table \"Table_name\" and name this \"Name 1\""
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 9,
143 | "metadata": {},
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/html": [
148 | "\n",
149 | "\n",
162 | "
\n",
163 | " \n",
164 | " \n",
165 | " | \n",
166 | " Name 1 | \n",
167 | "
\n",
168 | " \n",
169 | " \n",
170 | " \n",
171 | " | 0 | \n",
172 | " 1 | \n",
173 | "
\n",
174 | " \n",
175 | " | 1 | \n",
176 | " 2 | \n",
177 | "
\n",
178 | " \n",
179 | " | 2 | \n",
180 | " 5 | \n",
181 | "
\n",
182 | " \n",
183 | "
\n",
184 | "
"
185 | ],
186 | "text/plain": [
187 | " Name 1\n",
188 | "0 1\n",
189 | "1 2\n",
190 | "2 5"
191 | ]
192 | },
193 | "execution_count": 9,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "query = PQLColumn(query = \"Table_name.A\", name = \"Name 1\")\n",
200 | "df = datamodel.get_data_frame(query)\n",
201 | "df"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "Example 2: Pull a custom PQL-Statement"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 11,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/html": [
219 | "\n",
220 | "\n",
233 | "
\n",
234 | " \n",
235 | " \n",
236 | " | \n",
237 | " Name 1 | \n",
238 | "
\n",
239 | " \n",
240 | " \n",
241 | " \n",
242 | " | 0 | \n",
243 | " 5 | \n",
244 | "
\n",
245 | " \n",
246 | "
\n",
247 | "
"
248 | ],
249 | "text/plain": [
250 | " Name 1\n",
251 | "0 5"
252 | ]
253 | },
254 | "execution_count": 11,
255 | "metadata": {},
256 | "output_type": "execute_result"
257 | }
258 | ],
259 | "source": [
260 | "query = PQLColumn(query = \"MAX(Table_name.A)\", name = \"Name 1\")\n",
261 | "df = datamodel.get_data_frame(query)\n",
262 | "df"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "Example 3: Do more things at once and add Filters"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 13,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "data": {
279 | "text/html": [
280 | "\n",
281 | "\n",
294 | "
\n",
295 | " \n",
296 | " \n",
297 | " | \n",
298 | " Name 1 | \n",
299 | " Name 2 | \n",
300 | "
\n",
301 | " \n",
302 | " \n",
303 | " \n",
304 | " | 0 | \n",
305 | " 2 | \n",
306 | " 1 | \n",
307 | "
\n",
308 | " \n",
309 | "
\n",
310 | "
"
311 | ],
312 | "text/plain": [
313 | " Name 1 Name 2\n",
314 | "0 2 1"
315 | ]
316 | },
317 | "execution_count": 13,
318 | "metadata": {},
319 | "output_type": "execute_result"
320 | }
321 | ],
322 | "source": [
323 | "query = PQL()\n",
324 | "query += PQLColumn(\"MAX(Table_name.A)\", \"Name 1\")\n",
325 | "query += PQLColumn(\"COUNT(Table_name.B)\", \"Name 2\")\n",
326 | "query += PQLFilter(\"Filter Table_name.B < 5\")\n",
327 | "query += PQLFilter(\"Filter Table_name.B > 3\")\n",
328 | "\n",
329 | "df = datamodel.get_data_frame(query)\n",
330 | "df"
331 | ]
332 | }
333 | ],
334 | "metadata": {
335 | "kernelspec": {
336 | "display_name": "Python 3",
337 | "language": "python",
338 | "name": "python3"
339 | },
340 | "language_info": {
341 | "codemirror_mode": {
342 | "name": "ipython",
343 | "version": 3
344 | },
345 | "file_extension": ".py",
346 | "mimetype": "text/x-python",
347 | "name": "python",
348 | "nbconvert_exporter": "python",
349 | "pygments_lexer": "ipython3",
350 | "version": "3.7.4"
351 | }
352 | },
353 | "nbformat": 4,
354 | "nbformat_minor": 4
355 | }
356 |
--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/02a_Extraction_Mover.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Extraction Mover"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "**This tutorial shows how to copy a simple extraction from one team/ data job to another one, independent of the cluster.**\n",
15 | "\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "**To do so we first need to connect to the source data model.**"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "import pycelonis\n",
32 | "from pycelonis import get_celonis\n",
33 | "\n",
34 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
35 | "# All IDs required can be found within the URLs when displaying the related objects in the EMS.\n",
36 | "source_data_pool = c_source.pools.find('Name or ID of the source data pool.')\n",
37 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")\n",
38 | "source_extraction = source_data_job.extractions.ids['ID of the source extraction task.']"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "# Print source variables\n",
48 | "print(source_data_pool)\n",
49 | "print(source_data_job)\n",
50 | "print(source_extraction)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "**In the next step we connect to the target data pool.**"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
67 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
68 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "# Print target variables\n",
78 | "print(target_data_pool)\n",
79 | "print(target_data_job)"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "**Create target extraction.**"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "# Print source extraction name and type\n",
96 | "print(source_extraction.name)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "target_extraction = target_data_job.create_extraction(source_extraction.name)\n",
106 | "print(target_extraction.name)"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "global_vars = {}\n",
123 | "for source_var in source_data_pool.variables: # loop through global parameters of source data pool\n",
124 | " global_vars.update({source_var.id: None}) # save ID of source global parameter\n",
125 | " for target_var in target_data_pool.variables: # loop through the global parameters of target data pool\n",
126 | " if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper(): # if the placeholder of a source global parameter and a target global parameter match\n",
127 | " global_vars.update({source_var.id: target_var.id}) # match the saved ID of source global parameter wih the target global parameter ID\n",
128 | "print(\"Global parameter configurations saved.\")"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "**This section serves to create the extraction parameters.**"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "for source_ex_para in source_extraction.variables: # loop through the source extraction parameters\n",
145 | " if source_ex_para['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n",
146 | " if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n",
147 | " target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId']) \n",
148 | " if target_id is None:\n",
149 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
150 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
151 | " target_id = para.id\n",
152 | " global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n",
153 | " source_ex_para['defaultSettings']['poolVariableId'] = target_id\n",
154 | " if source_ex_para['settings'] is not None: # create the connection for the value to the target global parameters\n",
155 | " if source_ex_para['settings']['poolVariableId'] is not None:\n",
156 | " target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n",
157 | " if target_id is None:\n",
158 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
159 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
160 | " target_id = para.id\n",
161 | " global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n",
162 | " source_ex_para['settings']['poolVariableId'] = target_id\n",
163 | " \n",
164 | " target_extraction.create_extraction_parameter(source_ex_para) # create the target transformation parameter\n",
165 | " print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "**Add tables in target extraction and overwrite parameter connections for time filters.**"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "for table in source_extraction.tables.data:\n",
182 | " \n",
183 | " if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None:\n",
184 | " #None or table.data['changeDateOffsetParameter'] is not None:\n",
185 | " \n",
186 | " temp = table.data.copy()\n",
187 | " \n",
188 | " for para in source_extraction.variables:\n",
189 | " if para['id'] == table.data['creationDateParameterStart']:\n",
190 | " placeholder_start = para['placeholder']\n",
191 | "# if para['id'] == table.data['creationDateParameterEnd']:\n",
192 | "# placeholder_end = para['placeholder']\n",
193 | "# if para['id'] == table.data['changeDateOffsetParameter']:\n",
194 | "# placeholder_change = para['placeholder']\n",
195 | " \n",
196 | " for para in target_extraction.variables:\n",
197 | " if para['placeholder'] == placeholder_start:\n",
198 | " temp['creationDateParameterStart'] = para['id']\n",
199 | " # if para['placeholder'] == placeholder_end:\n",
200 | " # temp['creationDateParameterEnd'] = para['id']\n",
201 | " # if para['placeholder'] == placeholder_change:\n",
202 | " # temp['changeDateOffsetParameter'] = para['id']\n",
203 | " \n",
204 | " table=temp \n",
205 | " \n",
206 | " try:\n",
207 | " target_extraction.add_table(table=table)\n",
208 | " except:\n",
209 | " print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n",
210 | "\n",
211 | "print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\")"
212 | ]
213 | }
214 | ],
215 | "metadata": {
216 | "jupytext": {
217 | "formats": "ipynb,py:percent"
218 | },
219 | "kernelspec": {
220 | "display_name": "Python 3",
221 | "language": "python",
222 | "name": "python3"
223 | },
224 | "language_info": {
225 | "codemirror_mode": {
226 | "name": "ipython",
227 | "version": 3
228 | },
229 | "file_extension": ".py",
230 | "mimetype": "text/x-python",
231 | "name": "python",
232 | "nbconvert_exporter": "python",
233 | "pygments_lexer": "ipython3",
234 | "version": "3.8.8"
235 | }
236 | },
237 | "nbformat": 4,
238 | "nbformat_minor": 4
239 | }
240 |
--------------------------------------------------------------------------------
/pycelonis2/01_example_use_cases/01_use_case_version_control.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Version Control Use Case\n",
8 | "This example is solely intended as a demonstration to highlight relevant pycelonis functions and properties. In this example, you will learn how to create text-based backups of analyses, knowledge models, package variables, and transformations. More specifically, you will learn:\n",
9 | "\n",
10 | "- How to connect to the EMS\n",
11 | "- How to create folders\n",
12 | "- How to create backups of all published analyses, knowledge models, package variables and transformations\n",
13 | "- Optionally, How to commit the backup folder to GitHub\n",
14 | "\n",
15 | "\n",
16 | " NOTE: Any Celonis objects with a serialized_content property can be backed up to a YAML or JSON file.\n",
17 | "
"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Prerequisites\n",
25 | "To follow this tutorial, you should have PyCelonis installed and should know how to perform basic interactions with PyCelonis objects. If you don't know how to do this, please complete the **Celonis Basics** tutorial first. Further, it would be helpful to already have the previously mentioned assets inside your EMS. Please refer to the **Studio - Introduction** and **Data Integration - Data Jobs** tutorials for an overview of working with each asset type."
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "### 1. Import PyCelonis, connect to Celonis API, and create the backup folder"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 1,
38 | "metadata": {
39 | "collapsed": false,
40 | "jupyter": {
41 | "outputs_hidden": false
42 | },
43 | "pycharm": {
44 | "is_executing": true
45 | }
46 | },
47 | "outputs": [
48 | {
49 | "name": "stdout",
50 | "output_type": "stream",
51 | "text": [
52 | "[2023-01-30 20:53:08,556] INFO: Initial connect successful! PyCelonis Version: 2.0.1\n"
53 | ]
54 | }
55 | ],
56 | "source": [
57 | "from pycelonis import get_celonis\n",
58 | "from datetime import datetime as dt\n",
59 | "from pathlib import Path\n",
60 | "import json\n",
61 | "\n",
62 | "celonis = get_celonis()\n",
63 | "backup_path = Path('IBC Backup')\n",
64 | "now = dt.now().strftime(\"%d-%m-%Y_%H-%M\")\n",
65 | "\n",
66 | "if not backup_path.exists():\n",
67 | " backup_path.mkdir()"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "### 2. Create new backup folders for analyses, knowledge models, variables, and transformations"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 2,
80 | "metadata": {
81 | "collapsed": false,
82 | "jupyter": {
83 | "outputs_hidden": false
84 | }
85 | },
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "IBC Backup/Analyses_30-01-2023_20-53\n",
92 | "IBC Backup/KnowledgeModels_30-01-2023_20-53\n",
93 | "IBC Backup/Transformations_30-01-2023_20-53\n",
94 | "IBC Backup/Variables_30-01-2023_20-53\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "analyses_path = backup_path / f\"Analyses_{now}\"\n",
100 | "kms_path = backup_path / f\"KnowledgeModels_{now}\"\n",
101 | "vars_path = backup_path / f\"Variables_{now}\"\n",
102 | "trans_path = backup_path / f\"Transformations_{now}\"\n",
103 | "\n",
104 | "print(analyses_path)\n",
105 | "print(kms_path)\n",
106 | "print(vars_path)\n",
107 | "print(trans_path)\n",
108 | "\n",
109 | "analyses_path.mkdir()\n",
110 | "kms_path.mkdir()\n",
111 | "vars_path.mkdir()\n",
112 | "trans_path.mkdir()"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "### 3. Create backups of all published analyses, sorted by workspace"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "outputs": [],
126 | "source": [
127 | "# Helper Function\n",
128 | "\n",
129 | "def backup_assets(assets, path):\n",
130 | " for asset in assets:\n",
131 | "\n",
132 | " # skip unpublished assets\n",
133 | " if asset.activated_draft_id is None:\n",
134 | " continue\n",
135 | "\n",
136 | " file_name = f'{asset.key}.{asset.serialization_type.lower()}'\n",
137 | " file = path / file_name\n",
138 | "\n",
139 | " file.write_text(asset.serialized_content)"
140 | ],
141 | "metadata": {
142 | "collapsed": false
143 | }
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 4,
148 | "metadata": {
149 | "collapsed": false,
150 | "jupyter": {
151 | "outputs_hidden": false
152 | }
153 | },
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | "Analyses Backup Complete\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "for space in celonis.studio.get_spaces():\n",
165 | "\n",
166 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
167 | "\n",
168 | " # Create space folders for analyses backup\n",
169 | " space_path = analyses_path / space_folder\n",
170 | " space_path.mkdir()\n",
171 | "\n",
172 | " # Create backup files\n",
173 | " for pkg in space.get_packages():\n",
174 | " backup_assets(pkg.get_analyses(), space_path)\n",
175 | "\n",
176 | "print(\"Analyses Backup Complete\")"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "### 4. Create backups of all published knowledge models, sorted by workspace"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 5,
189 | "metadata": {},
190 | "outputs": [
191 | {
192 | "name": "stdout",
193 | "output_type": "stream",
194 | "text": [
195 | "Knowledge Models Backup Complete\n"
196 | ]
197 | }
198 | ],
199 | "source": [
200 | "for space in celonis.studio.get_spaces():\n",
201 | "\n",
202 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
203 | "\n",
204 | " # Create space folders for knowledge models backup\n",
205 | " space_path = kms_path / space_folder\n",
206 | " space_path.mkdir()\n",
207 | "\n",
208 | " # Create backup files\n",
209 | " for pkg in space.get_packages():\n",
210 | " backup_assets(pkg.get_knowledge_models(), space_path)\n",
211 | "\n",
212 | "print(\"Knowledge Models Backup Complete\")"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "### 5. Create backups of all package variables, sorted by package"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "outputs": [],
226 | "source": [
227 | "# Helper Function\n",
228 | "\n",
229 | "def backup_variables(variables, package_path):\n",
230 | " for variable in variables:\n",
231 | "\n",
232 | " file_name = f\"{variable.key}.json\"\n",
233 | " file = package_path / file_name\n",
234 | "\n",
235 | " content = json.dumps({\n",
236 | " 'key': variable.key,\n",
237 | " 'type_': variable.type_,\n",
238 | " 'description': variable.description,\n",
239 | " 'value': variable.value,\n",
240 | " 'package_key': variable.package_key\n",
241 | " })\n",
242 | "\n",
243 | " file.write_text(content)"
244 | ],
245 | "metadata": {
246 | "collapsed": false
247 | }
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 6,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "name": "stdout",
256 | "output_type": "stream",
257 | "text": [
258 | "Package Variables Backup Complete\n"
259 | ]
260 | }
261 | ],
262 | "source": [
263 | "for space in celonis.studio.get_spaces():\n",
264 | "\n",
265 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
266 | "\n",
267 | " # Create space folders for variables backup\n",
268 | " space_path = vars_path / space_folder\n",
269 | " space_path.mkdir()\n",
270 | "\n",
271 | " # Create backup files\n",
272 | " for pkg in space.get_packages():\n",
273 | " pkg_folder = f\"{pkg.name}_{pkg.id}\".replace(\" \", \"_\")\n",
274 | " pkg_path = space_path / pkg_folder\n",
275 | " \n",
276 | " variables = pkg.get_variables()\n",
277 | " \n",
278 | " # Skip packages without variables\n",
279 | " if variables == []:\n",
280 | " continue\n",
281 | " \n",
282 | " pkg_path.mkdir()\n",
283 | " backup_variables(variables, pkg_path)\n",
284 | "\n",
285 | "print(\"Package Variables Backup Complete\")"
286 | ]
287 | },
288 | {
289 | "cell_type": "markdown",
290 | "metadata": {},
291 | "source": [
292 | "### 6. Create backups of all transformations, sorted by data job*"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "outputs": [],
299 | "source": [
300 | "# Helper Function\n",
301 | "\n",
302 | "def backup_transformations(transformations, job_path):\n",
303 | " for transformation in transformations:\n",
304 | "\n",
305 | " # Handle errors retrieving the statement\n",
306 | " try:\n",
307 | " statement = transformation.get_statement()\n",
308 | " # Skip transformations with no statement\n",
309 | " if statement is None:\n",
310 | " continue\n",
311 | " except Exception as e:\n",
312 | " print(f\"FAILED to backup: {transformation.name} with id: {transformation.id} \\n You either don't have permissions to access the asset or the transformation is proprietary to Celonis.\")\n",
313 | " continue\n",
314 | "\n",
315 | " file_name = f\"{transformation.name}.json\"\n",
316 | " file = job_path / file_name\n",
317 | "\n",
318 | " content = json.dumps({\n",
319 | " 'id': transformation.id,\n",
320 | " 'name': transformation.name,\n",
321 | " 'description': transformation.description,\n",
322 | " 'statement': statement,\n",
323 | " 'pool_id': transformation.pool_id,\n",
324 | " 'job_id': transformation.job_id\n",
325 | " })\n",
326 | "\n",
327 | " file.write_text(content)"
328 | ],
329 | "metadata": {
330 | "collapsed": false
331 | }
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 7,
336 | "metadata": {
337 | "collapsed": false,
338 | "jupyter": {
339 | "outputs_hidden": false
340 | }
341 | },
342 | "outputs": [
343 | {
344 | "name": "stdout",
345 | "output_type": "stream",
346 | "text": [
347 | "Transformations Backup Complete\n"
348 | ]
349 | }
350 | ],
351 | "source": [
352 | "for pool in celonis.data_integration.get_data_pools():\n",
353 | "\n",
354 | " # Create space folders for analyses backup\n",
355 | " pool_folder = f\"{pool.name}_{pool.id}\".replace(\" \", \"_\")\n",
356 | " pool_path = trans_path / pool_folder\n",
357 | " pool_path.mkdir()\n",
358 | "\n",
359 | " for job in pool.get_jobs():\n",
360 | " job_folder = f\"{job.name}_{job.id}\"\n",
361 | " job_path = pool_path / job_folder\n",
362 | " transformations = job.get_transformations()\n",
363 | "\n",
364 | " # Skip jobs without transformations\n",
365 | " if transformations == []:\n",
366 | " continue\n",
367 | "\n",
368 | " job_path.mkdir()\n",
369 | " backup_transformations(transformations, job_path)\n",
370 | "\n",
371 | "print(\"Transformations Backup Complete\")"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "**Transformations downloaded from the marketplace are proprietary to Celonis and are unable to backup*"
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "metadata": {},
384 | "source": [
385 | "### 7. (Optional) Commit the backup folder to GitHub"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "metadata": {},
391 | "source": [
392 | "Navigate to the backup_path on the command line using the *cd* command, then run:\n",
393 | "\n",
394 | " git init\n",
395 | " git add .\n",
396 | " git commit -m \"Activating version control\""
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "metadata": {},
402 | "source": [
403 | "## Conclusion\n",
404 | "Congratulations! You have learned how to connect to the EMS, how to create folders, how to create backups of various assets, and how to commit the backup folder to GitHub."
405 | ]
406 | }
407 | ],
408 | "metadata": {
409 | "kernelspec": {
410 | "display_name": "Python 3 (ipykernel)",
411 | "language": "python",
412 | "name": "python3"
413 | },
414 | "language_info": {
415 | "codemirror_mode": {
416 | "name": "ipython",
417 | "version": 3
418 | },
419 | "file_extension": ".py",
420 | "mimetype": "text/x-python",
421 | "name": "python",
422 | "nbconvert_exporter": "python",
423 | "pygments_lexer": "ipython3",
424 | "version": "3.8.13"
425 | }
426 | },
427 | "nbformat": 4,
428 | "nbformat_minor": 4
429 | }
430 |
--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/11_API template.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "77c1a358-1d72-43d6-91f4-5c1bed16e682",
6 | "metadata": {},
7 | "source": [
8 | "# Data Imports via API\n",
9 | "#### Historical and forecast weather data used as an example"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "id": "c467d46e-4b6d-482d-a995-557bfec50900",
15 | "metadata": {},
16 | "source": [
17 | "## Step 1: Import Required Libraries"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "id": "33648981-2703-4262-b8fd-caf1c9d80048",
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "#Libraries specifically required for API imports\n",
28 | "import yaml\n",
29 | "from yaml import load, dump\n",
30 | "\n",
31 | "import requests\n",
32 | "from pandas.io.json import json_normalize\n",
33 | "\n",
34 | "#Other libraries that are always good to have\n",
35 | "import pandas as pd\n",
36 | "import numpy as np\n",
37 | "from datetime import date, timedelta, time, datetime\n",
38 | "import matplotlib.pyplot as plt\n",
39 | "\n",
40 | "\n",
41 | "#Note: if it is your first time importing a library, run a PIP install like so. Ensure you keep the exclamation point\n",
42 | "\n",
43 | "# ! pip install library_name\n",
44 | "\n"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "155c6433-adda-4ddb-8afb-15d5ebdf5ffe",
50 | "metadata": {},
51 | "source": [
52 | "## Optional - Step 2: connect to Celonis\n",
53 | "#### complete only if you need to integrate the API data with Celonis data / analyses"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "id": "a5ffde80-8843-4352-b5f4-d17de82fd106",
60 | "metadata": {
61 | "tags": []
62 | },
63 | "outputs": [],
64 | "source": [
65 | "from pycelonis import get_celonis, pql\n",
66 | "\n",
67 | "celonis = get_celonis(\"team\", \n",
68 | " \"key\")\n",
69 | "\n",
70 | "#team is something like: https://berkshirehathawayenergy.us-2.celonis.cloud/\n",
71 | "#the key can be created by going to Edit Profile (under the circular button in top right) --> create API key"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "id": "b64b326c-7070-4532-b85b-11e434c82a10",
77 | "metadata": {},
78 | "source": [
79 | "## Optional - Step 3: load data from Celonis data model\n",
80 | "#### Complete only if you need to integrate API data with Celonis data/analyses. All code is sample code and should be adjusted to fit your data."
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "id": "4cd5616b-f39c-49c0-a2ea-56ebc0b205df",
87 | "metadata": {
88 | "tags": []
89 | },
90 | "outputs": [],
91 | "source": [
92 | "\n",
93 | "#use the code below if you are querying directly from a data model\n",
94 | " # dm_id = 'data_model_id'\n",
95 | " # datamodel = celonis.datamodels.find(dm_id)\n",
96 | "\n",
97 | "#use the code below to query from studio\n",
98 | " # package = celonis.packages.find('package_id')\n",
99 | " # source_analysis = package.analyses.find('analysis_id')\n",
100 | "\n",
101 | " \n",
102 | "#use the code below to create your PQL query (examples of aggregate functions and filter statements are provided)\n",
103 | " # q1 = pql.PQL()\n",
104 | " # q1 += pql.PQLColumn(\"ROUND_DAY(table.field1)\", \"Date\")\n",
105 | " # q1 += pql.PQLColumn(\"SUM(table.field2)\", \"Actual\")\n",
106 | " # q1 += pql.PQLColumn(\"table.field3\", \"Region\")\n",
107 | " # q1 += pql.PQLFilter(\"table.field2 IS NOT NULL; \")\n",
108 | " # q1 += pql.PQLFilter(\"table.field1 > TO_DATE ( '2019-08-17 00:00:00' , FORMAT ( '%Y-%m-%d %H:%M:%S' )); \")\n",
109 | "\n",
110 | "#use the code below to create your dataframe\n",
111 | " # df1 = datamodel.get_data_frame(q1)\n",
112 | "\n"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "id": "165afba5-2e3e-41d6-803d-6b46d597cb28",
118 | "metadata": {},
119 | "source": [
120 | "## Step 4: Import Data via API\n",
121 | "#### To request your own API token for NOAA weather data, go here: https://www.ncdc.noaa.gov/cdo-web/token\n",
122 | "#### To find the dataset and station ID you need, go here: https://www.ncdc.noaa.gov/cdo-web/webservices/v2"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "id": "3e8fbbf5-032c-400c-883e-2cbdfb4817be",
129 | "metadata": {
130 | "tags": []
131 | },
132 | "outputs": [],
133 | "source": [
134 | "# ADJUSTMENT REQUIRED - Define the amount of time that you will pull data for (in days)\n",
135 | "timeframe = 30\n",
136 | "\n",
137 | "#usually the max date will be Today's date\n",
138 | "max_date = datetime.today().strftime(\"%Y-%m-%d\")\n",
139 | "\n",
140 | "#the min date will be the Today's date offset by the timeframe indicated above\n",
141 | "min_date = datetime.today() + timedelta(days=timeframe*(-1))\n",
142 | "min_date = min_date.strftime(\"%Y-%m-%d\")\n",
143 | "\n",
144 | "# ADJUSTMENT REQUIRED - define the data set ID (refer to link above to find dataset IDs)\n",
145 | "datasetid = 'GHCND'\n",
146 | "\n",
147 | "# ADJUSTMENT REQUIRED - define the station ID (refer to link above to find station IDs)\n",
148 | "stationid = 'GHCND:USW00023169'\n",
149 | "\n",
150 | "# ADJUSTMENT REQUIRED - define the token\n",
151 | "token = 'xyz'\n",
152 | "\n",
153 | "# run this code to obtain the request\n",
154 | "url = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=' + datasetid +'&stationid=' + stationid + '&startdate=' + min_date + '&enddate=' + max_date + '&units=standard&limit=1000'\n",
155 | "payload = {}\n",
156 | "headers = {\n",
157 | " 'token': token\n",
158 | "}\n",
159 | "\n",
160 | "\n",
161 | "#run the query to get raw data, put raw data into JSON format\n",
162 | "data = requests.request(\"GET\", url, headers=headers, json = payload)\n",
163 | "data = data.json()\n",
164 | "\n",
165 | "\n",
166 | "data"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "id": "86df13a1-21a5-4b01-81ac-78aa0bd83403",
172 | "metadata": {},
173 | "source": [
174 | "## Step 5: create a dataframe from your JSON data using json_normalize function\n",
175 | "#### 'results' should be replaced with the JSON object you need"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 8,
181 | "id": "f6ec8959-d4ba-4237-abf4-161e5c8466cd",
182 | "metadata": {
183 | "tags": []
184 | },
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/html": [
189 | "\n",
190 | "\n",
203 | "
\n",
204 | " \n",
205 | " \n",
206 | " | \n",
207 | " date | \n",
208 | " datatype | \n",
209 | " station | \n",
210 | " attributes | \n",
211 | " value | \n",
212 | "
\n",
213 | " \n",
214 | " \n",
215 | " \n",
216 | " | 0 | \n",
217 | " 2021-08-25T00:00:00 | \n",
218 | " AWND | \n",
219 | " GHCND:USW00023169 | \n",
220 | " ,,W, | \n",
221 | " 7.4 | \n",
222 | "
\n",
223 | " \n",
224 | " | 1 | \n",
225 | " 2021-08-25T00:00:00 | \n",
226 | " PRCP | \n",
227 | " GHCND:USW00023169 | \n",
228 | " ,,W,2400 | \n",
229 | " 0.0 | \n",
230 | "
\n",
231 | " \n",
232 | " | 2 | \n",
233 | " 2021-08-25T00:00:00 | \n",
234 | " SNOW | \n",
235 | " GHCND:USW00023169 | \n",
236 | " ,,W, | \n",
237 | " 0.0 | \n",
238 | "
\n",
239 | " \n",
240 | " | 3 | \n",
241 | " 2021-08-25T00:00:00 | \n",
242 | " SNWD | \n",
243 | " GHCND:USW00023169 | \n",
244 | " ,,W, | \n",
245 | " 0.0 | \n",
246 | "
\n",
247 | " \n",
248 | " | 4 | \n",
249 | " 2021-08-25T00:00:00 | \n",
250 | " TAVG | \n",
251 | " GHCND:USW00023169 | \n",
252 | " H,,S, | \n",
253 | " 92.0 | \n",
254 | "
\n",
255 | " \n",
256 | " | ... | \n",
257 | " ... | \n",
258 | " ... | \n",
259 | " ... | \n",
260 | " ... | \n",
261 | " ... | \n",
262 | "
\n",
263 | " \n",
264 | " | 279 | \n",
265 | " 2021-09-20T00:00:00 | \n",
266 | " SNOW | \n",
267 | " GHCND:USW00023169 | \n",
268 | " ,,D, | \n",
269 | " 0.0 | \n",
270 | "
\n",
271 | " \n",
272 | " | 280 | \n",
273 | " 2021-09-20T00:00:00 | \n",
274 | " TAVG | \n",
275 | " GHCND:USW00023169 | \n",
276 | " H,,S, | \n",
277 | " 81.0 | \n",
278 | "
\n",
279 | " \n",
280 | " | 281 | \n",
281 | " 2021-09-20T00:00:00 | \n",
282 | " TMAX | \n",
283 | " GHCND:USW00023169 | \n",
284 | " ,,D,2400 | \n",
285 | " 88.0 | \n",
286 | "
\n",
287 | " \n",
288 | " | 282 | \n",
289 | " 2021-09-20T00:00:00 | \n",
290 | " TMIN | \n",
291 | " GHCND:USW00023169 | \n",
292 | " ,,D,2400 | \n",
293 | " 73.0 | \n",
294 | "
\n",
295 | " \n",
296 | " | 283 | \n",
297 | " 2021-09-21T00:00:00 | \n",
298 | " TAVG | \n",
299 | " GHCND:USW00023169 | \n",
300 | " H,,S, | \n",
301 | " 80.0 | \n",
302 | "
\n",
303 | " \n",
304 | "
\n",
305 | "
284 rows × 5 columns
\n",
306 | "
"
307 | ],
308 | "text/plain": [
309 | " date datatype station attributes value\n",
310 | "0 2021-08-25T00:00:00 AWND GHCND:USW00023169 ,,W, 7.4\n",
311 | "1 2021-08-25T00:00:00 PRCP GHCND:USW00023169 ,,W,2400 0.0\n",
312 | "2 2021-08-25T00:00:00 SNOW GHCND:USW00023169 ,,W, 0.0\n",
313 | "3 2021-08-25T00:00:00 SNWD GHCND:USW00023169 ,,W, 0.0\n",
314 | "4 2021-08-25T00:00:00 TAVG GHCND:USW00023169 H,,S, 92.0\n",
315 | ".. ... ... ... ... ...\n",
316 | "279 2021-09-20T00:00:00 SNOW GHCND:USW00023169 ,,D, 0.0\n",
317 | "280 2021-09-20T00:00:00 TAVG GHCND:USW00023169 H,,S, 81.0\n",
318 | "281 2021-09-20T00:00:00 TMAX GHCND:USW00023169 ,,D,2400 88.0\n",
319 | "282 2021-09-20T00:00:00 TMIN GHCND:USW00023169 ,,D,2400 73.0\n",
320 | "283 2021-09-21T00:00:00 TAVG GHCND:USW00023169 H,,S, 80.0\n",
321 | "\n",
322 | "[284 rows x 5 columns]"
323 | ]
324 | },
325 | "execution_count": 8,
326 | "metadata": {},
327 | "output_type": "execute_result"
328 | }
329 | ],
330 | "source": [
331 | "df=pd.json_normalize(data['results'])\n",
332 | "df"
333 | ]
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "id": "f8851b47-4f28-47c4-a0be-d6018eae9f61",
338 | "metadata": {},
339 | "source": [
340 | "## Optional - Step 6: Helpful Formatting Functions\n",
341 | "#### This step provides examples functions that can be used to manipulate your API output. These sample functions refer to a generic dataframe titled \"df\". \"df\" should be replaced with your dataframe name.\n"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": null,
347 | "id": "0747aebc-7e0f-41f3-be38-713ae624dbce",
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "#limit how much output ALL functions in the workbook will show\n",
352 | "pd.set_option('display.max_rows', None)\n",
353 | "\n",
354 | "#sort dataframe values by Date\n",
355 | "df = df.sort_values(by=[\"column_name\"])\n",
356 | "\n",
357 | "#drop the last two rows of the dataframe\n",
358 | "df.drop(df.tail(2).index,inplace = True)\n",
359 | "\n",
360 | "#reset the index of the dataframe\n",
361 | "df = df.reset_index(drop=True)\n",
362 | "\n",
363 | "#rename a column\n",
364 | "df = df.rename(columns={\"current_column_name\": \"new_column_name\"})\n",
365 | "\n",
366 | "#change datatype of a column\n",
367 | "df['date_column_name'] = pd.to_datetime(df['date_column_name'])\n",
368 | "\n",
369 | "#filter dataset\n",
370 | "df = df[df['date_column_name'] > pd.Timestamp(datetime.now())]\n",
371 | "\n",
372 | "#drop columns\n",
373 | "df = df.drop(columns=['column1', 'column2', 'column3'])"
374 | ]
375 | },
376 | {
377 | "cell_type": "markdown",
378 | "id": "ea3e2a7c-0333-49b7-82e2-68d92048857d",
379 | "metadata": {},
380 | "source": [
381 | "## Optional - Step 7: push data to Celonis\n",
382 | "#### Only use if you need to push API data to your Celonis data pool (usually API data is just used as an input to a model in MLWB and does not need to be pushed to the Celonis data)"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": null,
388 | "id": "1cd5ab28-59a7-4c3f-9163-8a26a68091eb",
389 | "metadata": {},
390 | "outputs": [],
391 | "source": [
392 | "#define the data pool\n",
393 | "data_pool = celonis.pools.find(\"data_pool_id\")\n",
394 | "\n",
395 | "#option 1 - replace existing table\n",
396 | "data_pool.push_table(df,\"table_name_in_data_pool\", if_exists = 'replace')\n",
397 | "\n",
398 | "\n",
399 | "#option 2 - upsert data (similar to a delta load) using primary key of table\n",
400 | "data_pool.upsert_table(table_name=\"table_name_in_data_pool\",\n",
401 | " df_or_path=df,\n",
402 | " primary_keys=['primary_key'])\n",
403 | "\n"
404 | ]
405 | }
406 | ],
407 | "metadata": {
408 | "kernelspec": {
409 | "display_name": "Python 3",
410 | "language": "python",
411 | "name": "python3"
412 | },
413 | "language_info": {
414 | "codemirror_mode": {
415 | "name": "ipython",
416 | "version": 3
417 | },
418 | "file_extension": ".py",
419 | "mimetype": "text/x-python",
420 | "name": "python",
421 | "nbconvert_exporter": "python",
422 | "pygments_lexer": "ipython3",
423 | "version": "3.8.6"
424 | }
425 | },
426 | "nbformat": 4,
427 | "nbformat_minor": 5
428 | }
429 |
--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/model_utils.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import pandas as pd
6 |
7 | import pmdarima
8 | import statsmodels.api as sm
9 | from sklearn import linear_model, metrics
10 | from statsmodels.tsa.statespace import sarimax
11 | from . import utils, plot_utils
12 |
13 |
14 | def run_predictions_model(df,
15 | ext_data,
16 | y_col_name,
17 | exo_col_name,
18 | val_size_perc=0.2,
19 | to_adjust_years=False):
20 | """Run Predictions Model for Train df
21 |
22 | Parameters
23 | ----------
24 | df : DataFrame
25 | Train set Dataframe containing the Y values of the Time Series to predict
26 | ext_data : DataFrame
27 | External data to use as Regressor to model and predict the TS Trend
28 | val_size_perc : Float
29 | Part of the df to use for Validation.
30 | Format: [0.0;1.0]
31 | to_adjust_years : Boolean
32 | True if baseline level of the TS has changed during its timeframe and should be adjusted
33 | By default False
34 | y_col_name : String
35 | Column name of the TS values column (Y column).
36 | exo_col_name : String
37 | Column name of the External Regressor values column.
38 |
39 | Returns
40 | -------
41 | DataFrame
42 | Output DataFrame with the n-step Predictions for the TS (Predict the n future Y values).
43 | n is set as the minimum between the number of future values from the External data and the predicted Residuals
44 | """
45 |
46 | # Reindex and create Train Df
47 | df = df.reset_index(drop=True)
48 | train_df = df.copy()
49 | print('train df head looks like: \n', train_df.head())
50 |
51 | # Clean data: fill empty weeks with 0 value
52 | train_df = utils.fill_empty_dates(train_df)
53 |
54 | # Cap the high outliers to a max value
55 | train_df = utils.cap_outliers(
56 | train_df,
57 | max_outlier_value=1000) # PARAM - max_outlier_value: Max value
58 |
59 | # Adjust past data if baseline changed at date change_date
60 | if to_adjust_years:
61 | train_df = utils.adjust_baseline(train_df,
62 | change_date='YYYY-MM-DD',
63 | end_date='YYYY-MM-DD')
64 | # PARAM - change_date: date at which baseline level changed, end_date: end date of new baseline level
65 |
66 | # Plot preprocessed Train Df
67 | plot_utils.plot_clean_y(df, train_df,
68 | y_max=1000 + 100) #PARAM - y axis max value
69 |
70 | #### MODEL: Y = Trend + Seasonality + Residuals
71 |
72 | ### Trend: Calculate, Model and Predict future values
73 | trend_col_name = 'Trend' # PARAM - Trend column name
74 | train_df[trend_col_name] = utils.calculate_trend(
75 | train_df,
76 | ts_seasonality=
77 | 52, # PARAM - Seasonality timeframe e.g. 52 if weekly data with annual seasonality. 7 if daily TS with weekly seasonality
78 | center=False)
79 | # Plot Y and Trend
80 | plot_utils.plot_y_trend(train_df,
81 | train_df[trend_col_name],
82 | y_min=0,
83 | y_max=100)
84 |
85 | # Use External data/GDP to fit and predict the Trend
86 | print('train df shape is ',
87 | train_df.dropna().shape, ', adding the external data into the df...')
88 | train_df = utils.combine_ext_data(train_df, ext_data, days_to_shift=None)
89 |
90 | # Define X=GDP and Y=Trend for Regression model
91 | exo_pretty_name = "Regressor" # PARAM - External Data/GDP column
92 | X, Y = utils.get_trend_and_exo_for_fit(train_df, exo_col_name,
93 | trend_col_name, val_size_perc)
94 | # Plot Y, Trend and Exo Regr
95 | plot_utils.plot_y_trend_ext(train_df,
96 | Y,
97 | exo_col_name,
98 | exo_pretty_name,
99 | y_min=0,
100 | y_max=1100,
101 | y_min_exo=100,
102 | y_max_exo=200)
103 |
104 | # Fit Regression of Y=Trend on X=Exogenous Regressor
105 | reg = linear_model.LinearRegression().fit(X, Y)
106 | # Predict future Trend with the fitted Regression
107 | trend_pred_col_name = "Predicted Trend"
108 | X_F, train_df = predict_trend(train_df, reg, exo_col_name,
109 | trend_pred_col_name)
110 | # Plot Trend, External data/GDP and Predicted Trend
111 | plot_utils.plot_y_pred_trend_ext(train_df,
112 | exo_col_name,
113 | X,
114 | Y,
115 | X_F,
116 | y_min=0,
117 | y_max=1100,
118 | y_min_exo=100,
119 | y_max_exo=200)
120 | print('End of Trend part, df is \n', train_df.head())
121 |
122 | ### Seasonality: Calculate S for each date of the seasonality window
123 |
124 | # Calculate Y - Trend
125 | train_df["Y - Trend"] = train_df[y_col_name] - train_df[trend_col_name]
126 |
127 | # Calculate Seasonality by moving avg on Y - T
128 | s = train_df["Y - Trend"].rolling(
129 | window=10,
130 | center=True).mean() # PARAM - window: Moving avg window to smoothen S
131 | # Avg across periods to obtain 1 S value per date of a period
132 | s = s.groupby(s.index.week).mean()
133 |
134 | # Add Seasonality to Df
135 | seasonality_col_name = "Seasonality" # PARAM - S column name
136 | train_df[seasonality_col_name] = np.nan
137 | for i in train_df.index:
138 | train_df.loc[i][seasonality_col_name] = s[i.week]
139 | # (Optional) Fix border dates with Null values
140 | # seas_period_days = 52 * 7 # PARAM - seasonsality period in days
141 | # train_df = utils.fill_seasonality(train_df, seas_period_days,
142 | # seasonality_col_name)
143 |
144 | # Plot Y, T and S
145 | plot_utils.plot_y_t_s_with_pred(train_df, trend_col_name,
146 | seasonality_col_name, trend_pred_col_name)
147 |
148 | ### Residuals: Calculate, Model and Predict future values
149 |
150 | # Calculate R = Y - Trend - Season
151 | train_df["Y - T - S"] = train_df[y_col_name] - train_df[
152 | trend_col_name] - train_df[seasonality_col_name]
153 | # Create R df
154 | r_col_name = "Y - T - S" # PARAM - R column name
155 | r = train_df[r_col_name]
156 | # Plot R
157 | plot_utils.plot_r(train_df, r_col_name)
158 | # R shape
159 | print('R df shape is ', r.dropna().shape)
160 | # Stationarity test
161 | res = sm.tsa.adfuller(r.dropna(), regression="c")
162 | print("adf test p-value is:{}".format(res[1]))
163 | # Verify that p value is low
164 | # ACF PACF on R
165 | plot_utils.plot_acf_pacf_r(r, lags=25) # PARAM - # lags for acf pacf
166 | # Deduce ARMA(p,q) model for R
167 |
168 | # Create R df for R Model
169 | columns_to_drop = [y_col_name, exo_col_name]
170 | col_to_rename = {"index": "Date"}
171 | r_df = create_r_df(train_df, columns_to_drop, col_to_rename)
172 |
173 | # Fit ARIMA Model on R for R predictions
174 | p, d, q = 3, 0, 3 # PARAM - p for AR, d for I, q for MA.
175 | P, D, Q, s = None, None, None, None # If seasonality use P,D,Q,s, if not set to None.
176 | n_pred = 5 # n_pred is # future points to forecast
177 | model = None # (Optional) model - to input an existing loaded model
178 | exo = None # (Optional) exo - to input exogenous regressors
179 | r_df = r_df.dropna()
180 | model_r, results_df_r = get_results_with_val(r_df, exo, p, d, q, P, D, Q, s,
181 | model, r_col_name,
182 | val_size_perc, n_pred)
183 | # Add Predicted R to df
184 | r_col_name = "Predicted R" # PARAM - R column name for df
185 | class_col_name = "Classification" # PARAM - classification col name (train/test/forecast)
186 | train_df = add_r(train_df, results_df_r, r_col_name, class_col_name)
187 |
188 | ### Calculate Total Y Prediction = Predicted T + S + Predicted R
189 |
190 | y_pred_col_name = "Y Prediction" # PARAM - y pred column names
191 | train_df = calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name,
192 | seasonality_col_name, class_col_name)
193 | print('End of df with predictions is \n', train_df.tail(n=20))
194 |
195 | # Plot and show Final Df with predictions
196 | plot_utils.plot_final(train_df, trend_col_name, seasonality_col_name,
197 | r_col_name, trend_pred_col_name, y_pred_col_name,
198 | class_col_name)
199 |
200 | # Return Final Df with Y predictions
201 | return train_df
202 |
203 |
204 | def get_results_with_val(df,
205 | exo,
206 | p,
207 | d,
208 | q,
209 | P,
210 | D,
211 | Q,
212 | s,
213 | model,
214 | y_col_name,
215 | val_size_perc,
216 | n_predictions=5):
217 | """Fit SARIMAX on input df (optional input and future exo regr) and predict validation + future values
218 | Or use param fitted model (optional input and future exo regr) to predict validation + future values
219 | Plot input and output (val+future) predictions
220 |
221 | Parameters
222 | ----------
223 | df : DataFrame
224 | R Time Series
225 | exo : DataFrame, optional
226 | Exogenous Regressors to model Y
227 | p : int
228 | AR parameter for the SARIMAX on Y
229 | d : int
230 | Integrated parameter for the SARIMAX on Y
231 | q : int
232 | MA parameter for the SARIMAX on Y
233 | P : int
234 | Seasonal AR parameter for the SARIMAX on Y
235 | D : int
236 | Seasonal Integrated parameter for the SARIMAX on Y
237 | Q : int
238 | Seasonal MA parameter for the SARIMAX on Y
239 | s : int
240 | Seasonality timeframe for Y
241 | model : SARIMAX Fitted model, optional
242 | Pre-fitted SARIMAX model to use to predict Y values
243 | y_col_name : String
244 | Column name of Y values
245 | val_size_perc : Float
246 | Part of the df to use for Validation.
247 | Format: [0.0;1.0]
248 | n_predictions : int, optional
249 | Number of future values to predict for Y, by default 5
250 |
251 | Returns
252 | -------
253 | smodel: json
254 | Fitted SARIMAX model on Y
255 | results: DataFrame
256 | DataFrame including the train, validation and forecast values from the SARIMAX fitted model on Y Time Series
257 | """
258 |
259 | X = df[y_col_name].values
260 | Y = df["Date"].values
261 | train_size = int(len(X) * (1 - val_size_perc))
262 | train, test = X[:train_size], X[train_size:len(X)]
263 | week = Y[train_size:len(X)]
264 | exo_past, exo_future = None, None
265 |
266 | # Split Exo Regressor into past (train + val) and future (forecast) values
267 | if exo is not None:
268 | exo_past, exo_future = exo[:len(X)], exo[len(X):len(exo)]
269 |
270 | # Create SARIMAX model or use input model
271 | print("Checking model for fit...")
272 | if model is None:
273 | print("No input model, starting to fit SARIMAX" + str(p) + str(d) +
274 | str(q) + str(P) + str(D) + str(Q) + str(s))
275 | smodel = pmdarima.arima.ARIMA(order=[p, d, q],
276 | method="lbfgs",
277 | maxiter=50,
278 | suppress_warnings=True)
279 | smodel = smodel.fit(df[y_col_name].values, exo_past)
280 | print("Finished SARIMAX fit.")
281 | else:
282 | print("Existing input model, will use it")
283 | smodel = model
284 |
285 | # Test model on the Validation set
286 | history = [x for x in train]
287 | predictions = list()
288 | for t in range(len(test)):
289 | model = sarimax.SARIMAX(history,
290 | order=smodel.order,
291 | seasonal_order=smodel.seasonal_order,
292 | enforce_stationarity=False)
293 | model_fit = model.fit(disp=0)
294 | output = model_fit.forecast()
295 | if output[0] < 0:
296 | yhat = 0
297 | else:
298 | yhat = output[0]
299 | predictions.append(yhat)
300 | obs = test[t]
301 | history.append(obs)
302 | print("predicted=%f, expected=%f" % (yhat, obs))
303 | error = metrics.mean_squared_error(test, predictions)
304 | print("Test MSE: %.3f" % error)
305 |
306 | # Add Train set to output
307 | data = pd.DataFrame()
308 | data["Date"] = Y[0:train_size]
309 | data["Predicted Net Order Value"] = None
310 | data["Actual Net Order Value"] = X[0:train_size]
311 | data["Classification"] = "train"
312 |
313 | # Add Validation set to output
314 | Tested = pd.DataFrame()
315 | Tested["Date"] = week
316 | Tested["Predicted Net Order Value"] = predictions
317 | Tested["Actual Net Order Value"] = test
318 | Tested["Classification"] = "test"
319 | Tested["Predicted Net Order Value"] = Tested[
320 | "Predicted Net Order Value"].astype(float)
321 | Tested["Date"] = pd.to_datetime(Tested["Date"])
322 |
323 | # Add Forecast set to output
324 | print("Predicting forecast values...")
325 | n_periods = n_predictions
326 | fitted, confint = smodel.predict(n_periods=n_periods,
327 | return_conf_int=True,
328 | exogenous=exo_future)
329 | print("Finished predicting forecast values.")
330 | rng = pd.date_range(df["Date"].max(), periods=n_periods, freq="7D")
331 | forecast = pd.DataFrame({
332 | "Date": rng,
333 | "Predicted Net Order Value": fitted,
334 | "Actual Net Order Value": None,
335 | "Classification": "forecast",
336 | "Conf_lower": confint[:, 0],
337 | "Conf_Upper": confint[:, 1],
338 | })
339 | forecast = forecast.drop(forecast.index[0])
340 |
341 | # Combine all sets
342 | results = data.append(Tested, ignore_index=True)
343 | results = results.append(forecast, ignore_index=True)
344 | results["Date"] = pd.to_datetime(results["Date"])
345 | # Reformat Dates to Date type
346 | results["Date"] = pd.to_datetime(results["Date"])
347 | return smodel, results
348 |
349 |
350 | def predict_trend(train_df, reg, exo_col_name, pred_trend_col_name):
351 | """Trend Regression to predict future Trend"""
352 | # Get Regressor on prediction timeframe
353 | X_F = train_df[exo_col_name].dropna().values.reshape(-1, 1)
354 | print(X_F.shape)
355 | print(reg.predict(X_F).shape)
356 | # Predict Trend using fitted Regression on Regressor
357 | t_pred = reg.predict(X_F)
358 | len_pred = t_pred.shape[0]
359 | train_df["Predicted Trend"] = np.nan
360 | train_df["Predicted Trend"][-len_pred:] = t_pred.ravel()
361 | return X_F, train_df
362 |
363 |
364 | def create_r_df(train_df, columns_to_drop, col_to_rename):
365 | """Create Residuals DataFrame"""
366 | r_df = train_df.copy()
367 | r_df = r_df.drop(columns=columns_to_drop)
368 | r_df = r_df.reset_index()
369 | r_df = r_df.rename(columns=col_to_rename)
370 | return r_df
371 |
372 |
373 | def add_r(train_df, results_df_r, r_col_name, class_col_name):
374 | """Add Residuals (Train, Val and Forecast) to the Input Df"""
375 | results_df_r_idx = results_df_r.set_index("Date")
376 | train_df[r_col_name] = np.nan
377 | train_df[r_col_name] = results_df_r_idx["Predicted Net Order Value"]
378 | train_df[class_col_name] = results_df_r_idx[class_col_name]
379 | return train_df
380 |
381 |
382 | def calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name,
383 | seasonality_col_name, class_col_name):
384 | """Calculate Predicted Y with Predicted T, S and Predicted R components, on Validation and Forecast sets"""
385 | train_df[y_pred_col_name] = np.nan
386 | # Validation Y values
387 | mask = train_df[class_col_name] == "test"
388 | train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] +
389 | train_df[seasonality_col_name] +
390 | train_df["Predicted R"])
391 | # Future Y values
392 | mask = train_df[class_col_name] == "forecast"
393 | train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] +
394 | train_df[seasonality_col_name] +
395 | train_df["Predicted R"])
396 | return train_df
397 |
--------------------------------------------------------------------------------