├── .gitignore ├── README.md ├── pycelonis1 ├── 00_manage_celonis │ ├── 00_ibc_to_ibc_movers │ │ ├── 00_analysis_mover.ipynb │ │ ├── 01_data_pool_mover.ipynb │ │ ├── 02_asset_mover.ipynb │ │ ├── 03_workflow_mover.ipynb │ │ ├── 04_analysis_to_studio_mover.ipynb │ │ ├── 05_workflow_to_studio_skill_mover.ipynb │ │ ├── 06_package_mover.ipynb │ │ ├── 07_action_engine_skill_mover.ipynb │ │ ├── 08_replacer.ipynb │ │ └── KPI_Mover.ipynb │ └── 01_misc │ │ ├── 00_trigger_workbench_execution.ipynb │ │ └── 01_use_case_version_control.ipynb ├── 01_use_pycelonis │ ├── 00_basics │ │ ├── 00_connecting_to_celonis.ipynb │ │ ├── 01_pulling_data_from_analysis.ipynb │ │ ├── 02_pulling_data_from_datamodel.ipynb │ │ └── 03_pushing_data.ipynb │ └── 01_misc │ │ └── 00_working_in_ r_with_celonis.ipynb ├── 02_try_ml_use_cases │ ├── 00_time_series_forecasting │ │ ├── 00_O2C_material_group_net_weight_forecasting.ipynb │ │ └── 01_ts_forecasting │ │ │ ├── main.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── ext_data_utils.py │ │ │ ├── model_utils.py │ │ │ ├── plot_utils.py │ │ │ └── utils.py │ ├── 01_Clustering_KMeans.ipynb │ ├── 02_NLP_Topic_modeling_LDA.ipynb │ └── Simple_Model_with_Snippets.ipynb ├── 03_Connectivity │ ├── 02a_Extraction_Mover.ipynb │ ├── 02b_Transformation_Mover.ipynb │ ├── 03_Data_Model_Mover.ipynb │ ├── 05_Transformation_Download_to_MLW.ipynb │ ├── 11_Extraction_Unifier.ipynb │ └── 18_EMS_Data_Consumption_Report.ipynb ├── 04_Data_Formatting │ ├── 00_Combine_csv_files.ipynb │ └── 03_Clean_csv_data.ipynb ├── 05_Data_Visualization │ └── 00_3d_plot.ipynb └── 06_Extractors │ ├── 03_Datadog_log_data_extraction.ipynb │ ├── 11_API template.ipynb │ └── 99_Extract-logs-from-EMS.ipynb └── pycelonis2 ├── .DS_Store ├── 01_example_use_cases └── 01_use_case_version_control.ipynb └── 02_pycelonis_version_migrator ├── Pycelonis_Migration_UI.ipynb ├── README.md ├── function_get_data_frame.txt └── pycelonis_migration.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ---> Python 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | all_scripts/ 8 | 9 | .idea/ 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyCelonis Examples 2 | 3 | This repository contains demo notebooks covering popular functionalities and use cases of PyCelonis. 4 | It acts as an addition to the official [tutorials](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/) for PyCelonis. 5 | 6 | The repository contains several notebooks for both PyCelonis 1.X and 2.X that act as examples on what you can achieve using PyCelonis. 7 | The examples are grouped by their PyCelonis version and specific use cases. 8 | 9 | ## PyCelonis 10 | 11 | [![License: Celonis Tools License Agreement](https://img.shields.io/badge/License-Celonis%20Tools%20License%20Agreement-brightgreen)](https://celonis.github.io/pycelonis/license.html) 12 | [![Pycelonis Documentation](https://img.shields.io/badge/Docs-pycelonis-yellow)](https://celonis.github.io/pycelonis/index.html) 13 | 14 | PyCelonis is a python api wrapper for Celonis EMS. 15 | 16 | Using this package you can programmatically interact with Analyses, Workspaces, Datamodels, Datapools and other Celonis objects. 17 | The package also allows pushing and pulling data to and from data pools and data models. 18 | 19 | PyCelonis is pre-installed in all Celonis Machine Learning Workbenches by default. 20 | For more information about PyCelonis and how to set it up in your local python environment, [see the docs](https://celonis.github.io/pycelonis/). 21 | -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/00_analysis_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Analysis Mover using Content CLI\n", 8 | "\n", 9 | "\n", 10 | "### This tutorial shows how to copy an analysis from one team/workspace to another one" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 20 | "source_api_key = 'your_api_token'\n", 21 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 22 | "destination_api_key = 'your_api_token'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Set environment variables for the source team setup" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import os\n", 39 | "os.environ['TEAM_URL'] = source_team_url\n", 40 | "os.environ['API_TOKEN'] = source_api_key\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Pull analysis from the source team" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: analysis_6b2166e2-0c40-43e2-b3e6-62996c7dae11.json\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "!content-cli pull analysis --id 'insert analysis id here'" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Set environment variables again for the destination team setup\n", 72 | "(No need to do this step (re-define environment variables) if source and destination teams are the same)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "os.environ['TEAM_URL'] = destination_team_url\n", 82 | "os.environ['API_TOKEN'] = destination_api_key" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Push analysis to the destination team\n", 90 | "\n", 91 | "Hint: Press tab while writing the file name to auto complete" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "\u001b[32minfo\u001b[39m: Analysis was pushed successfully. New ID: 07f700ff-20e3-4dd8-878b-c7fb6319b3b2\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "!content-cli push analysis --workspaceId 'insert workspace id here' --file 'insert_downloaded_file_in_the_previous_step_here'" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.8.3-final" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 4 133 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/01_data_pool_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Data Pool Mover using Content CLI\n", 8 | "\n", 9 | "### This tutorial shows how to copy a datapool from one team to another\n", 10 | "Note: Datamodels and data jobs contained in the datapool are moved, but not the actual data" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 20 | "source_api_key = 'your_api_token'\n", 21 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 22 | "destination_api_key = 'your_api_token'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Set environment variable for the source team" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import os\n", 39 | "os.environ['TEAM_URL'] = source_team_url\n", 40 | "os.environ['API_TOKEN'] = source_api_key" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Pull data pool from the source team" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: data-pool_7796633e-c2db-4524-92ec-85ae5fe65282.json\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "!content-cli pull data-pool --id 'id_of_your_data_pool'" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Set environment variable for the destination team\n", 72 | "Skip this step of setting up destination team if source and destination team are the same" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "import os\n", 82 | "os.environ['TEAM_URL'] = destination_team_url\n", 83 | "os.environ['API_TOKEN'] = destination_api_key" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Push data pool config into destination team\n", 91 | "Hint: press tab to auto complete the file name" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "\u001b[32minfo\u001b[39m: Data Pool was pushed successfully. New ID: undefined\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "!content-cli push data-pool --file 'the_file_that_got_downloaded_in_the_previous_step'" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.7.6" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 4 133 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/02_asset_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Asset Mover using Content CLI\n", 8 | "\n", 9 | "### This script moves an Asset (skill, view, analysis, knowledge model etc. in the studio) from one team to another " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "source_api_key = 'your_api_token'\n", 20 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 21 | "destination_api_key = 'your_api_token'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Set environment variable for the source team" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "os.environ['TEAM_URL'] = source_team_url\n", 39 | "os.environ['API_TOKEN'] = source_api_key" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Pull asset from the source team" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_mykm.yml\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!content-cli pull asset --key 'insert_asset_key_here'" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Set environment variable for the destination team\n", 71 | "Skip this step of setting environment variables again if source and destination are the same teams" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "os.environ['TEAM_URL'] = destination_team_url\n", 81 | "os.environ['API_TOKEN'] = destination_api_key" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Push asset to the destination team\n", 89 | "Hint: Press tab to autocomplete the file name" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: noor.mykm\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 3", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.7.6" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 4 138 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/03_workflow_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Workflow Mover using Content CLI\n", 8 | "\n", 9 | "### This tutorial shows how to copy a workflow (process automation) from one team to another one" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "source_api_key = 'your_api_token'\n", 20 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 21 | "destination_api_key = 'your_api_token'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Set environment variable for the source team" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "os.environ['TEAM_URL'] = source_team_url\n", 39 | "os.environ['API_TOKEN'] = source_api_key" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Pull workflow from the source team" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: workflow_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!content-cli pull workflow --id 'insert_workflow_id_here'" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Set environment variables again for the destination team setup\n", 71 | "No need to do this step (re-define environment variables) if source and destination teams are the same" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "os.environ['TEAM_URL'] = destination_team_url\n", 81 | "os.environ['API_TOKEN'] = destination_api_key" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Push workflow to the destination team\n", 89 | "\n", 90 | "Hint: Press tab while writing the file name to auto complete" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "\u001b[32minfo\u001b[39m: Workflow was pushed successfully. New Id: b5391c57-87ae-47f9-a876-2c18e304a994\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "!content-cli push workflow --file 'insert_downloaded_file_in_the_previous_step_here'" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.7.6" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 4 139 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/04_analysis_to_studio_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Move Analysis from Process Analytics to Studio" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 17 | "source_api_key = 'your_api_token'\n", 18 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "destination_api_key = 'your_api_token'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Set environment variables for the source team setup" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "os.environ['TEAM_URL'] = source_team_url\n", 37 | "os.environ['API_TOKEN'] = source_api_key" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Pull analysis from process analytics in source team as an asset" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_6b2166e2-0c40-43e2-b3e6-62996c7dae11.yaml\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "!content-cli pull analysis --id 'insert_id_of_analysis' --asset" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Set environment variables again for the destination team setup\n", 69 | "No need to do this step (re-define environment variables) if source and destination teams are the same" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "os.environ['TEAM_URL'] = destination_team_url\n", 79 | "os.environ['API_TOKEN'] = destination_api_key" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### Push downloaded analysis file to the studio as asset in the source team\n", 87 | "Hint: Press tab while writing the file name to auto complete" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: test.RCA\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.7.6" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 4 136 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/05_workflow_to_studio_skill_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Move Workflows from Process Automation to Studio Skills" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 17 | "source_api_key = 'your_api_token'\n", 18 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "destination_api_key = 'your_api_token'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Set environment variables for the source team setup" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "os.environ['TEAM_URL'] = source_team_url\n", 37 | "os.environ['API_TOKEN'] = source_api_key" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Pull workflow from process automation in source team as an asset" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: asset_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "!content-cli pull workflow --id 'insert_workflow_id_here' --asset" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Set environment variables again for the destination team setup\n", 69 | "No need to do this step (re-define environment variables) if source and destination teams are the same" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "os.environ['TEAM_URL'] = destination_team_url\n", 79 | "os.environ['API_TOKEN'] = destination_api_key" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### Push downloaded workflow file to the studio as skill asset in the source team\n", 87 | "Hint: Press tab while writing the file name to auto complete" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "\u001b[32minfo\u001b[39m: Asset was pushed successfully. New key: test.On-time-Delivery-Prediction\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.6" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 4 129 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/06_package_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Package Mover using Content CLI\n", 8 | "\n", 9 | "### This script moves a package from one team/place to another " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "source_api_key = 'your_api_token'\n", 20 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 21 | "destination_api_key = 'your_api_token'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "#### Set environment variable for the source team" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "os.environ['TEAM_URL'] = source_team_url\n", 39 | "os.environ['API_TOKEN'] = source_api_key" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "#### Pull Package from the source team" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": { 53 | "jupyter": { 54 | "source_hidden": true 55 | } 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "\u001b[32minfo\u001b[39m: File downloaded successfully\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "!content-cli pull package --key 'package_key'" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "#### Set environment variable for the destination team\n", 75 | "(Skip this step of setting environment variables again if source and destination are the same teams)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "os.environ['TEAM_URL'] = destination_team_url\n", 85 | "os.environ['API_TOKEN'] = destination_api_key" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "#### Push package to the destination team\n", 93 | "(Hint: Press tab to autocomplete the file name)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "\u001b[32minfo\u001b[39m: Package was pushed successfully.\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "!content-cli push package --file 'downloaded_zip_file_in_previous_step'" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.7.6" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 4 135 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/07_action_engine_skill_mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Action Engine Skill Mover using Content CLI\n", 8 | "\n", 9 | "### This tutorial shows how to copy an action engine skill from one team to another one" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "source_team_url = 'https://your_team.celonis.cloud/'\n", 19 | "source_api_key = 'your_api_token'\n", 20 | "destination_team_url = 'https://your_team.celonis.cloud/'\n", 21 | "destination_api_key = 'your_api_token'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Set environment variable for the source team" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "os.environ['TEAM_URL'] = source_team_url\n", 39 | "os.environ['API_TOKEN'] = source_api_key" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Pull action engine skill from the source team" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "\u001b[32minfo\u001b[39m: File downloaded successfully. New filename: skill_08594b68-2731-4ede-abaf-4fd7eb5720ca.json\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!content-cli pull skill --skillId 'insert_skill_id_here' --projectId 'insert_project_id_here'" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Set environment variables again for the destination team setup\n", 71 | "No need to do this step (re-define environment variables) if source and destination teams are the same" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "os.environ['TEAM_URL'] = destination_team_url\n", 81 | "os.environ['API_TOKEN'] = destination_api_key" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Push action engine skill to the destination team\n", 89 | "\n", 90 | "Hint: Press tab while writing the file name to auto complete" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "\u001b[32minfo\u001b[39m: Skill was pushed successfully. New ID: 4639e4dd-b0ea-484f-822b-5415f2244c5d\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "!content-cli push skill --projectId 'insert_new_project_id_here' --file 'insert_downloaded_file_name_here'" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.7.6" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 4 132 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/08_replacer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Replacer\n", 8 | "\n", 9 | "### This tutorial shows how to replace any text in a whole analysis. \n", 10 | "Be careful, only use real key words to replace, otherwise you might replace also words or word parts you did not mean to.\n", 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### First connect to the analysis and indicate what should be replaced\n", 19 | "It is recommended to use the ID of the respective analysis." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 36, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "2020-02-17 08:20:12 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "import json\n", 37 | "from pycelonis import get_celonis\n", 38 | "\n", 39 | "celonis = get_celonis(\"URL to the team in which you are working.\", \"Specify a valid API key for the cloud team.\")\n", 40 | "analysis = celonis.analyses.find(\"Name or ID of the analysis.\")\n", 41 | "\n", 42 | "# enter as many replacements as you want and separate them with a comma\n", 43 | "replacements ={\n", 44 | " 'old_word_1' : 'new_word_1',\n", 45 | " 'old_word_2' : 'new_word_2'\n", 46 | "}" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Get the current version of the published analysis, the drafted one from the edit mode and the saved formulas" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 40, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "num_replacements = 0\n", 63 | "doc_p = analysis.published.data\n", 64 | "doc_d = analysis.draft.data\n", 65 | "kpis = analysis.saved_formulas" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### Replace in the formulas" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 38, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Replacements in the formulas: 78\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "for kpi in kpis:\n", 90 | " for key, val in replacements.items():\n", 91 | " num_replacements = num_replacements + kpi.data[\"name\"].count(key)\n", 92 | " num_replacements = num_replacements + kpi.data[\"template\"].count(key)\n", 93 | " num_replacements = num_replacements + kpi.data[\"description\"].count(key)\n", 94 | " \n", 95 | " name = kpi.data[\"name\"].replace(key, val)\n", 96 | " template = kpi.data[\"template\"].replace(key, val)\n", 97 | " description = kpi.data[\"description\"].replace(key, val)\n", 98 | " parameters = kpi.data[\"parameters\"]\n", 99 | " \n", 100 | " kpi.delete()\n", 101 | " analysis.create_saved_formula(name=name, description=description, template=template, parameters=parameters) \n", 102 | " \n", 103 | "print('Replacements in the formulas: ' + str(num_replacements))" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### Replace in the published and drafted analysis" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 39, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "Overall replacements: 143\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "json_doc_dump_p = json.dumps(doc_p, ensure_ascii=False)\n", 128 | "json_doc_dump_p = json_doc_dump_p.replace(\"€\", \"€\")\n", 129 | "\n", 130 | "json_doc_dump_d = json.dumps(doc_d, ensure_ascii=False)\n", 131 | "json_doc_dump_d = json_doc_dump_d.replace(\"€\", \"€\")\n", 132 | "\n", 133 | "for key, val in replacements.items():\n", 134 | " num_replacements += json_doc_dump_p.count(key)\n", 135 | " json_doc_dump_p = json_doc_dump_p.replace(key, val)\n", 136 | " json_doc_dump_d = json_doc_dump_d.replace(key, val)\n", 137 | " \n", 138 | "json_doc_dump_p = json_doc_dump_p.replace(\"€\", \"€\")\n", 139 | "json_doc_dump_d = json_doc_dump_d.replace(\"€\", \"€\")\n", 140 | "\n", 141 | "doc_p = json.loads(json_doc_dump_p)\n", 142 | "doc_d = json.loads(json_doc_dump_d)\n", 143 | "\n", 144 | "analysis.draft.data = doc_d\n", 145 | "analysis.published.data = doc_p\n", 146 | "\n", 147 | "print('Overall replacements: ' + str(num_replacements))" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "jupytext": { 153 | "formats": "ipynb,py:percent" 154 | }, 155 | "kernelspec": { 156 | "display_name": "Python 3", 157 | "language": "python", 158 | "name": "python3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.7.6" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 4 175 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/KPI_Mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Import packages and log in**" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import csv\n", 17 | "import os\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import copy\n", 21 | "import sys\n", 22 | "import yaml\n", 23 | "import re\n", 24 | "from collections import OrderedDict\n", 25 | "from pycelonis import get_celonis, pql\n", 26 | "from pycelonis.pql import PQL, PQLColumn\n", 27 | "from pycelonis.utils import parquet_utils as pu\n", 28 | "\n", 29 | "login = {\n", 30 | " \"celonis_url\": \"\",\n", 31 | " \"api_token\": \"\",\n", 32 | " }\n", 33 | "celonis = get_celonis(**login)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "package = celonis.packages.find('31af4c4d-4ddd-40ae-97a6-9d1146345e6f')\n", 43 | "source_analysis = package.analyses.find('0c191ff3-5ef8-47c9-92dd-f5170e342f2a')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "class quoted(str):\n", 53 | " pass\n", 54 | "\n", 55 | "def quoted_presenter(dumper, data):\n", 56 | " return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='\"')\n", 57 | " \n", 58 | "yaml.add_representer(quoted, quoted_presenter)\n", 59 | "\n", 60 | "class literal(str):\n", 61 | " pass\n", 62 | "\n", 63 | "def literal_presenter(dumper, data):\n", 64 | " return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')\n", 65 | " \n", 66 | "yaml.add_representer(literal, literal_presenter)\n", 67 | "\n", 68 | "def ordered_dict_presenter(dumper, data):\n", 69 | " return dumper.represent_dict(data.items())\n", 70 | "\n", 71 | "yaml.add_representer(OrderedDict, ordered_dict_presenter)\n", 72 | "\n", 73 | "\n", 74 | "def add_parameters(data, km_kpi):\n", 75 | " if len(data['parameters']) != 0:\n", 76 | " km_parameters = []\n", 77 | " for parameter, i in zip(data['parameters'], range(len(data['parameters']))):\n", 78 | " km_parameter = {\n", 79 | " 'id': 'p'+str(i+1),\n", 80 | " 'displayName': parameter['name'],\n", 81 | " 'defaultValue': 0\n", 82 | " }\n", 83 | " km_parameters.append(km_parameter)\n", 84 | " km_kpi['parameters'] = km_parameters\n", 85 | " return km_kpi\n", 86 | "\n", 87 | "def clean_formula_names(saved_formulas):\n", 88 | " to_return = []\n", 89 | " for data in saved_formulas:\n", 90 | " data['clean_name'] = data['name'].replace(')', '').replace('(', '')\n", 91 | " data['clean_name'] = re.sub(\"[^0-9a-zA-Z]+\", \"_\", data['clean_name'])\n", 92 | " to_return.append(data)\n", 93 | " return to_return\n", 94 | "\n", 95 | "def clean_template(clean_formulas):\n", 96 | " to_return = []\n", 97 | " for data in clean_formulas:\n", 98 | " tmp_template = data['template']\n", 99 | " for tmp_data in clean_formulas:\n", 100 | " name = tmp_data['name']\n", 101 | " clean_name = tmp_data['clean_name']\n", 102 | " tmp_template = tmp_template.replace(name, clean_name)\n", 103 | " data['clean_template'] = tmp_template\n", 104 | " to_return.append(data)\n", 105 | " return clean_formulas\n", 106 | " \n", 107 | "def clean_variables(var_list):\n", 108 | " to_return = []\n", 109 | " for y in var_list:\n", 110 | " tmp_var_query = y['value']\n", 111 | " for x in var_list: \n", 112 | " full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n", 113 | " tmp_var_query = tmp_var_query.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n", 114 | " var_data = {'id': y['name'],\n", 115 | " 'displayName': y['name'].replace('_', ' '),\n", 116 | " 'description':\"\",\n", 117 | " 'value': tmp_var_query\n", 118 | " }\n", 119 | " to_return.append(var_data)\n", 120 | " return to_return\n", 121 | "\n", 122 | "def replace_variables(clean_formulas, var_list):\n", 123 | " to_return = []\n", 124 | " for data in clean_formulas:\n", 125 | " tmp_data = copy.copy(data['clean_template'])\n", 126 | " for x in var_list:\n", 127 | " full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n", 128 | " tmp_data = tmp_data.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n", 129 | " data['clean_template'] = tmp_data\n", 130 | " to_return.append(data)\n", 131 | " return to_return\n", 132 | " \n", 133 | "def saved_formulas_to_yaml(source_analysis):\n", 134 | " \"\"\"Given a Celonis Analysis object, saves a 'data.yml' file \"\"\"\n", 135 | " \n", 136 | " saved_formulas = copy.copy(source_analysis.saved_formulas)\n", 137 | " var_list = copy.copy(source_analysis.analysis.draft.variables)\n", 138 | " km_kpis = []\n", 139 | " list_of_data = [formula.data for formula in saved_formulas] \n", 140 | " \n", 141 | " clean_data = clean_formula_names(list_of_data)\n", 142 | " clean_data = clean_template(clean_data)\n", 143 | " clean_data = replace_variables(clean_data, var_list)\n", 144 | " for data in clean_data:\n", 145 | " description = data['description'].replace('\\'', '').replace('\"', '')\n", 146 | " pql = '\\n'+data['clean_template'].replace('.id', '.\"id\"').replace('.Id', '.\"Id\"').replace('.ID', '.\"ID\"')\n", 147 | " km_kpi = OrderedDict(id=data['clean_name'],\n", 148 | " displayName=data['name'].replace('_', ' '),\n", 149 | " description=quoted(description),\n", 150 | " pql=literal(pql),\n", 151 | " )\n", 152 | " km_kpi = add_parameters(data, km_kpi)\n", 153 | " km_kpis.append(km_kpi)\n", 154 | " \n", 155 | " km_kpis = {'kpis':km_kpis}\n", 156 | " \n", 157 | " with open('kpis.yml', 'w') as outfile:\n", 158 | " yaml.dump(km_kpis, outfile, sort_keys=False)\n", 159 | " return\n", 160 | "\n", 161 | "def variables_to_yaml(source_analysis):\n", 162 | " var_list = copy.copy(source_analysis.analysis.draft.variables)\n", 163 | " var_list = clean_variables(var_list)\n", 164 | " km_vars = []\n", 165 | " for var in var_list:\n", 166 | " km_var = OrderedDict(id=var['id'],\n", 167 | " displayName=var['displayName'],\n", 168 | " description=var['description'],\n", 169 | " value=literal(var['value'].replace('\"', '') )\n", 170 | " )\n", 171 | " km_vars.append(km_var)\n", 172 | " km_vars = {'variables':km_vars}\n", 173 | " with open('variables.yml', 'w') as outfile:\n", 174 | " yaml.dump(km_vars, outfile, sort_keys=False)\n", 175 | " return" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "saved_formulas_to_yaml(source_analysis)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "variables_to_yaml(source_analysis)" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.7.6" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 4 218 | } 219 | -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/01_misc/00_trigger_workbench_execution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Trigger a Notebook from outside the Workbench (e.g. from an external automation software)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import requests\n", 17 | "import json\n", 18 | "\n", 19 | "url = \"https://TEAMNAME_HERE.REALMHERE.celonis.cloud/machine-learning/api/executions\"\n", 20 | "\n", 21 | "\n", 22 | "# specify ID of Workbench (find this in the URL of a specific Workbench you want to trigger)\n", 23 | "# specify filename, if your file is not stored in the root you should include the foldername in the path, e.g. \"executionFileName\": \"Foldername/Run quickly.ipynb\"\n", 24 | "# specify any params you want to pass. To see how they are received in the notebook, check https://papermill.readthedocs.io/en/latest/\n", 25 | "payload = {\n", 26 | " \"notebookId\": \"4417517d-55ae-482b-8f56-ac6d1d864e68\",\n", 27 | " \"executionFileName\": \"Run quickly.ipynb\",\n", 28 | " \"params\":{\n", 29 | " \"param1\":\"value1\"\n", 30 | " }\n", 31 | "}\n", 32 | "\n", 33 | "payload = json.dumps(payload)\n", 34 | "\n", 35 | "# specify API Key (Applicationkeys do not work as of June 2020)\n", 36 | "headers = {\n", 37 | " 'content-type': 'application/json;charset=UTF-8',\n", 38 | " 'authorization': 'Bearer API_KEY_HERE'\n", 39 | "}\n", 40 | "\n", 41 | "response = requests.request(\"POST\", url, headers=headers, data = payload)\n", 42 | "\n", 43 | "print(response.text.encode('utf8'))\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Get status of execution" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "import json\n", 60 | "resp = json.loads(response.text)\n", 61 | "\n", 62 | "url = url + resp['id']\n", 63 | "response = requests.request(\"GET\", url, headers=headers)\n", 64 | "print(response.text.encode('utf8'))" 65 | ] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.7.3" 85 | } 86 | }, 87 | "nbformat": 4, 88 | "nbformat_minor": 4 89 | } -------------------------------------------------------------------------------- /pycelonis1/00_manage_celonis/01_misc/01_use_case_version_control.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Create text-based backups of analyses and transformations (for git)\n", 8 | "\n", 9 | "\n", 10 | "### This script backs up all analyses and transformations into a backup folder, the user only needs to provide a Celonis object and a folder.\n" 11 | ] 12 | }, 13 | { 14 | "source": [ 15 | "### Do imports, log in to the Celonis instance, create backup folder" 16 | ], 17 | "cell_type": "markdown", 18 | "metadata": {} 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "2019-10-08 15:36:00 - Login successful! Hello Simon Riezebos\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "from pycelonis import get_celonis\n", 35 | "import shutil\n", 36 | "from pathlib import Path\n", 37 | "from pycelonis.utils.api_utils import pathify\n", 38 | "\n", 39 | "celonis = get_celonis(read_only=True)\n", 40 | "backup_path = Path('IBC Backup')\n", 41 | "if not backup_path.exists():\n", 42 | " backup_path.mkdir()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### Create or clean analyses backup folder" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "analyses_path = backup_path / \"Analyses\"\n", 59 | "if analyses_path.exists():\n", 60 | " shutil.rmtree(analyses_path)\n", 61 | "analyses_path.mkdir()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Create backups of all analyses that are published in separate workspace directories" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "for workspace in celonis.workspaces:\n", 78 | " workspace_path = analyses_path / pathify(workspace.name)\n", 79 | " workspace_path.mkdir()\n", 80 | " for a in workspace.analyses:\n", 81 | " if a.data.get('lastPublishedDraftId') is not None:\n", 82 | " a.backup_content(workspace_path)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### (Optional) Remove all draft files to only see published changes" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "for path in analyses_path.rglob('*'):\n", 99 | " if path.name.startswith(\"draft\") and path.suffix == \".json\":\n", 100 | " path.unlink()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### Create or clean transformation backup folder" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "transformation_path = backup_path / \"Transformations\"\n", 117 | "if transformation_path.exists():\n", 118 | " shutil.rmtree(transformation_path)\n", 119 | "transformation_path.mkdir()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "### Create backups of all transformations in separate Pool and Data Job directories" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 5, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "for pool in celonis.pools:\n", 136 | " pool_path = transformation_path / pathify(pool.name)\n", 137 | " pool_path.mkdir()\n", 138 | " for job in pool.data_jobs:\n", 139 | " job_path = pool_path / pathify(job.name)\n", 140 | " job_path.mkdir()\n", 141 | " for tm in job.transformations:\n", 142 | " try:\n", 143 | " tm.backup_content(job_path)\n", 144 | " except:\n", 145 | " pass" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### (Optional) Inititate git repo\n", 153 | "Navigate to the back_path on the command line, run:\n", 154 | "```\n", 155 | "git init\n", 156 | "git add .\n", 157 | "git commit -m \"Activating version control\"\n", 158 | "```\n", 159 | "When you re-run this notebook all changes will be easy to inspect, and can be committed again" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "jupytext": { 165 | "formats": "ipynb,py:percent" 166 | }, 167 | "kernelspec": { 168 | "display_name": "Python 3", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.6.8" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 4 187 | } -------------------------------------------------------------------------------- /pycelonis1/01_use_pycelonis/00_basics/00_connecting_to_celonis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Connecting to Celonis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### The Celonis object\n", 15 | "\n", 16 | "Import the get_celonis function and call it. This will return either an IBC object or a CPM4 object. **Permissions are determined by the App/API key you use to log in!** Set `read_only` to `True` if you want to make sure you can't break anything." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "scrolled": true 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "2019-10-08 12:01:08 - Login successful! Hello Simon Riezebos\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "from pycelonis import get_celonis\n", 36 | "\n", 37 | "celonis = get_celonis()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### (Optional) specify login details manually\n", 45 | "By default `pycelonis` will get the login information from environment variables. See :meth:`get_celonis` for more details. You can also specify them manually." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "login = {\n", 55 | " \"celonis_url\": \"demo.eu-1.celonis.cloud\",\n", 56 | " \"api_token\": \"paste_here_your_api_token\",\n", 57 | " #The following 2 lines are only necessary when connecting to CPM4.5, not for IBC:\n", 58 | " #\"api_id\": \"paste_here_your_api_id\", \n", 59 | " #\"username\": \"paste_here_your_username\",\n", 60 | "}\n", 61 | "celonis_manual = get_celonis(**login)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Working with (Celonis) objects in Jupyter Notebook" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "The object returned by `get_celonis` is your portal into celonis, **press tab after `c.`** to see the available methods and attributes." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "celonis." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Objects in celonis can be found using their ID or (substring of) name. **Press shift-tab inside the parentheses** to see the signature and documentation of each function." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 3, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "" 103 | ] 104 | }, 105 | "execution_count": 3, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "analysis = celonis.analyses.find('117f7528-8504-4450-9fd6-8ebcf1749d18')\n", 112 | "analysis" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Objects can also be accessed directly with auto-complete using the `.names[]` or `.ids[]` property of a collection." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "celonis.datamodels.names[<-PRESS TAB HERE]" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Advanced: access the API data of a Celonis object\n", 136 | "Almost every object that represents an object in Celonis contains a `.data` property that shows the JSON data from the Celonis API. This data is **automatically refreshed** and if **changes are made to this data, `pycelonis` tries to make the same changes in the object in Celonis.**" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "{'permissions': ['MOVE_TO',\n", 148 | " 'DELETE_WORKSPACE',\n", 149 | " 'CREATE_WORKSPACE',\n", 150 | " 'DELETE_ALL_WORKSPACES',\n", 151 | " 'DELETE_ALL_ANALYSES',\n", 152 | " 'EDIT_ALL_ANALYSES',\n", 153 | " 'EDIT_ALL_WORKSPACES',\n", 154 | " 'USE_ALL_ANALYSES',\n", 155 | " 'CREATE_ANALYSES',\n", 156 | " 'DELETE_ANALYSIS',\n", 157 | " 'EDIT_WORKSPACE',\n", 158 | " 'MANAGE_PERMISSIONS',\n", 159 | " 'EXPORT_CONTENT',\n", 160 | " 'USE_ANALYSIS',\n", 161 | " 'EDIT_ANALYSIS'],\n", 162 | " 'id': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n", 163 | " 'tenantId': None,\n", 164 | " 'name': 'OTD Prediction Binary - Technical App (OAF) - Copy',\n", 165 | " 'description': None,\n", 166 | " 'deleted': False,\n", 167 | " 'transportId': None,\n", 168 | " 'lastPublishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n", 169 | " 'autoSaveId': '2e47dccc-8cbf-400d-8404-72e1f5298d0d',\n", 170 | " 'processId': 'acb6313c-bba8-46fd-9637-24c7d5463746',\n", 171 | " 'createDate': 1556264369787,\n", 172 | " 'favourite': False,\n", 173 | " 'contentId': None,\n", 174 | " 'contentVersion': 0,\n", 175 | " 'tags': [{'name': 'WillBeDeleted'}],\n", 176 | " 'applicationId': '',\n", 177 | " 'publicLink': False,\n", 178 | " 'lastPublishedDate': 1564498481791,\n", 179 | " 'lastPublishedUser': 'Simon',\n", 180 | " 'objectId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n", 181 | " 'publishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n", 182 | " 'folderId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n", 183 | " 'parentObjectId': 'acb6313c-bba8-46fd-9637-24c7d5463746'}" 184 | ] 185 | }, 186 | "execution_count": 5, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "analysis.data" 193 | ] 194 | } 195 | ], 196 | "metadata": { 197 | "jupytext": { 198 | "formats": "ipynb,py:percent" 199 | }, 200 | "kernelspec": { 201 | "display_name": "Python 3", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.7.3" 216 | } 217 | }, 218 | "nbformat": 4, 219 | "nbformat_minor": 4 220 | } -------------------------------------------------------------------------------- /pycelonis1/01_use_pycelonis/00_basics/02_pulling_data_from_datamodel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Pulling data from a Data Model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from pycelonis import get_celonis\n", 17 | "from pycelonis.pql import PQL, PQLColumn, PQLFilter\n", 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "### Connect to Celonis" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "2020-03-02 13:00:58 - pycelonis: Login successful! Hello Dimitris\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "celonis = get_celonis(\"api token\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### Select Datamodel" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 5, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "datamodel = celonis.datamodels.find('datamodel id/name')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "This is the Table in our Datamodel. It has 2 Columns with the names \"A\" and \"B\"." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/html": [ 76 | "
\n", 77 | "\n", 90 | "\n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | "
AB
013
124
256
\n", 116 | "
" 117 | ], 118 | "text/plain": [ 119 | " A B\n", 120 | "0 1 3\n", 121 | "1 2 4\n", 122 | "2 5 6" 123 | ] 124 | }, 125 | "execution_count": 3, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Define the query you want to pull\n", 137 | "Example 1: pull a specific Column. We select Column \"A\" from table \"Table_name\" and name this \"Name 1\"" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 9, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/html": [ 148 | "
\n", 149 | "\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
Name 1
01
12
25
\n", 184 | "
" 185 | ], 186 | "text/plain": [ 187 | " Name 1\n", 188 | "0 1\n", 189 | "1 2\n", 190 | "2 5" 191 | ] 192 | }, 193 | "execution_count": 9, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "query = PQLColumn(query = \"Table_name.A\", name = \"Name 1\")\n", 200 | "df = datamodel.get_data_frame(query)\n", 201 | "df" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "Example 2: Pull a custom PQL-Statement" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 11, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/html": [ 219 | "
\n", 220 | "\n", 233 | "\n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
Name 1
05
\n", 247 | "
" 248 | ], 249 | "text/plain": [ 250 | " Name 1\n", 251 | "0 5" 252 | ] 253 | }, 254 | "execution_count": 11, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "query = PQLColumn(query = \"MAX(Table_name.A)\", name = \"Name 1\")\n", 261 | "df = datamodel.get_data_frame(query)\n", 262 | "df" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Example 3: Do more things at once and add Filters" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 13, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/html": [ 280 | "
\n", 281 | "\n", 294 | "\n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | "
Name 1Name 2
021
\n", 310 | "
" 311 | ], 312 | "text/plain": [ 313 | " Name 1 Name 2\n", 314 | "0 2 1" 315 | ] 316 | }, 317 | "execution_count": 13, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "query = PQL()\n", 324 | "query += PQLColumn(\"MAX(Table_name.A)\", \"Name 1\")\n", 325 | "query += PQLColumn(\"COUNT(Table_name.B)\", \"Name 2\")\n", 326 | "query += PQLFilter(\"Filter Table_name.B < 5\")\n", 327 | "query += PQLFilter(\"Filter Table_name.B > 3\")\n", 328 | "\n", 329 | "df = datamodel.get_data_frame(query)\n", 330 | "df" 331 | ] 332 | } 333 | ], 334 | "metadata": { 335 | "kernelspec": { 336 | "display_name": "Python 3", 337 | "language": "python", 338 | "name": "python3" 339 | }, 340 | "language_info": { 341 | "codemirror_mode": { 342 | "name": "ipython", 343 | "version": 3 344 | }, 345 | "file_extension": ".py", 346 | "mimetype": "text/x-python", 347 | "name": "python", 348 | "nbconvert_exporter": "python", 349 | "pygments_lexer": "ipython3", 350 | "version": "3.7.4" 351 | } 352 | }, 353 | "nbformat": 4, 354 | "nbformat_minor": 4 355 | } 356 | -------------------------------------------------------------------------------- /pycelonis1/01_use_pycelonis/00_basics/03_pushing_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Pushing Data\n", 8 | "\n", 9 | "### This tutorial shows how data can be pushed from Python to Celonis. The data is pushed to a Celonis Data Pool or Data Model and is ready to use within IBC.\n", 10 | "In this to Tutorial we will:\n", 11 | "1. Connect to Celonis\n", 12 | "2. Prepare the data that needs to be pushed into a dataframe.\n", 13 | "3. Push the data into Celonis\n", 14 | " 1. Push data to datapool\n", 15 | " 2. Push data directly to a specific datamodel in the datapool" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Connect to Celonis" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "2020-10-23 11:50:39 - pycelonis: Login successful! Hello Noor\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "from pycelonis import get_celonis\n", 40 | "celonis = get_celonis()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Prepare the data that needs to be pushed to celonis into a dataframe\n", 48 | "\n", 49 | "For the sake of this demo, we will create a dummy dataframe, but you can put any data in the dataframe as you like" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | "
ABC
02210
1402
2801
3008
\n", 110 | "
" 111 | ], 112 | "text/plain": [ 113 | " A B C\n", 114 | "0 2 2 10\n", 115 | "1 4 0 2\n", 116 | "2 8 0 1\n", 117 | "3 0 0 8" 118 | ] 119 | }, 120 | "execution_count": 2, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "import pandas as pd\n", 127 | "\n", 128 | "df = pd.DataFrame({'A': [2, 4, 8, 0], 'B': [2, 0, 0, 0], 'C': [10, 2, 1, 8]})\n", 129 | "df.head()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Push the data from the dataframe into a table in Celonis\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "#### A. Push data to the datapool\n", 144 | "\n", 145 | "##### Find the datapool\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 3, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "data_pool = celonis.pools.find(\"id_or_name_of_data_pool\")" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "##### If you are unsure about the name/id of your data pool you can list all the datapools available to you" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "celonis.pools" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Now we push the data frame to the data pool. For this, we use the push_table() function, which has the following properties:\n", 178 | "* df_or_path: Either the pandas data frame or the path to a data frame that should be pushed to Celonis\n", 179 | "* table_name: The name that this data frame should have in the data pool\n", 180 | "* if_exists: Specifies what happens when the table already exists in the data pool. The options are 'replace', 'append', 'upsert', 'error'\n", 181 | "\n", 182 | "The additional parameters (like the setting of primary keys and column types) can be checked by pressing SHIFT + TAB, while the curser is in the function push_table()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "data_pool.push_table(df,\"table_name\", if_exists = 'replace')" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "The table is now in the data pool and can be added to any data model in that pool." 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "### B. Push data directly to the datamodel\n", 206 | "We could also directly push the table to the data model. We would first need to find the data model.\n", 207 | "##### Find the datamodel" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 5, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "data_model = celonis.datamodels.find(\"datamodel id/name\")" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "##### If you are unsure about the name/id of your data pool you can list all the datamodels available to you" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 5, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "celonis.datamodels" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "##### Push the dataframe as table in the datamodel\n", 240 | "\n", 241 | "Use the push_table() function again. In this example we will replace the table if it already exist, however we can also use the options: append and upsert as stated earlier.\n", 242 | "\n", 243 | "Here we have additionally the option to reload the datamodel. In this example we choose to set reload_datamodel as False. In this case, the changes will be effective with the next scheduled or manual datamodel reload." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 6, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "" 255 | ] 256 | }, 257 | "execution_count": 6, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "data_model.push_table(df,\"table_name\",reload_datamodel=False, if_exists = 'replace')" 264 | ] 265 | } 266 | ], 267 | "metadata": { 268 | "jupytext": { 269 | "formats": "ipynb,py:percent" 270 | }, 271 | "kernelspec": { 272 | "display_name": "Python 3", 273 | "language": "python", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.7.4" 287 | } 288 | }, 289 | "nbformat": 4, 290 | "nbformat_minor": 4 291 | } 292 | -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pycelonis import get_celonis, pql 3 | 4 | from utils import ext_data_utils, model_utils, utils 5 | 6 | # Load input data 7 | celonis = get_celonis() 8 | dm_id = 'TBD' 9 | datamodel = celonis.datamodels.find(dm_id) 10 | input_columns = [("col_name", "pretty_name"), ("col_name_2", "pretty_name_2")] 11 | input_filter = "FILTER TBD" 12 | 13 | train_df = utils.get_pql_dataframe(datamodel, input_columns, input_filter) 14 | 15 | # Import External Data for n-step Predictions (such as GDP below) 16 | ext_data = ext_data_utils.load_external_data( 17 | overall_gdp_csv="US_GDP.csv", 18 | industry_gdp_perc_csv="US_MANUF_GDP_PERC.csv", 19 | csv_col_1="GDP", 20 | csv_col_2="VAPGDPMA", 21 | csv_col_2_new="IND_PERC", 22 | col_final="IND_GDP", 23 | ) 24 | 25 | # INPUTS 26 | subsets = ['subset1', 'subset2'] # PARAM 27 | subset_needs_adjusts = ['subset2' 28 | ] # PARAM Subsets which need a baseline adjustment 29 | subset_col_name = 'subset_filtering_column' # PARAM 30 | input_y_col_name = "Y_column" # PARAM 31 | input_exo_col_name = 'ext_data_column' # PARAM 32 | model_class_col_name = 'classification_naming' # PARAM Column to flag train vs test vs forecast timeframes 33 | model_y_pred_col_name = 'Y_prediction_column' # PARAM 34 | val_size_perc = 0.2 35 | 36 | # OUTPUTS, for Exported Predictions to DM 37 | all_subset_results = {} 38 | all_subset_exports = {} 39 | output_col_names = { 40 | "index": "Date", # PARAM 41 | input_y_col_name: "Actual Y Value", # PARAM 42 | model_y_pred_col_name: "Predicted Y Value", # PARAM 43 | model_class_col_name: "Classification", # PARAM 44 | } 45 | 46 | # Run Predictions for each selected subset 47 | for subset in subsets: 48 | # Check if subset needs baseline adjustment 49 | to_adjust = False 50 | if subset in subset_needs_adjusts: 51 | to_adjust = True 52 | 53 | # Filter train df for subset 54 | subset_train_df = utils.get_subset_df(train_df, subset, subset_col_name) 55 | 56 | # Run Predictions model for this subset 57 | print('Run TS Predictions model for subset train df \n', 58 | subset_train_df.head()) 59 | subset_results = model_utils.run_predictions_model(subset_train_df, 60 | ext_data, 61 | input_y_col_name, 62 | input_exo_col_name, 63 | val_size_perc, to_adjust) 64 | # Store Output (subset Predictions) 65 | all_subset_results[subset] = subset_results 66 | print('subset ', subset, ' Prediction outputs have shape ', 67 | all_subset_results[subset].shape) 68 | # Store export-version of the Output (subset Predictions) 69 | all_subset_exports[subset] = utils.prepare_export_df( 70 | subset_results, output_col_names, model_y_pred_col_name) 71 | 72 | print("Finished running predictions for all subsets, total output shape is ", 73 | all_subset_results[subset].shape) 74 | print("Subsets are ", all_subset_exports.keys()) 75 | 76 | # Combine Results into single Export table 77 | # Add new 'subset name' column to the export-version of Predictions 78 | export_df = utils.constitute_export_df(all_subset_exports, subset_col_name) 79 | 80 | # Export table to DM 81 | export_table_name = "Predictions_Output" 82 | print('Export df shape is ', export_df.shape) 83 | print('Export df head is ') 84 | print(export_df.head(10)) 85 | print('Export df tail is ') 86 | print(export_df.tail(10)) 87 | datamodel.push_table(export_df, 88 | export_table_name, 89 | reload_datamodel=False, 90 | if_exists="replace") 91 | -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/celonis/pycelonis-examples/6a51c8eaf84d59e7b69d457d5748ea7348659d6f/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/__init__.py -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/ext_data_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from . import plot_utils 3 | 4 | 5 | def load_external_data( 6 | overall_gdp_csv, 7 | industry_gdp_perc_csv, 8 | csv_col_1, 9 | csv_col_2, 10 | csv_col_2_new, 11 | col_final, 12 | ): 13 | """Load External/GDP data""" 14 | 15 | # Load National GDP data (need to create/upload external csv) 16 | all_gdp_csv = pd.read_csv(overall_gdp_csv) 17 | 18 | # Load Industry GDP % csv (need to create/upload external csv) 19 | all_gdp_ind_perc_csv = pd.read_csv(industry_gdp_perc_csv) 20 | # Rename col 21 | all_gdp_ind_perc_csv = all_gdp_ind_perc_csv.rename( 22 | columns={csv_col_2: csv_col_2_new}) 23 | 24 | # Manually estimate GDP values for future quarters (CORE for TS Predictions) 25 | all_gdp = all_gdp_csv.copy() 26 | all_gdp = all_gdp.append([ 27 | { 28 | "DATE": "7/1/2020", 29 | csv_col_1: 20200.0 30 | }, 31 | { 32 | "DATE": "10/1/2020", 33 | csv_col_1: 21000.0 34 | }, 35 | { 36 | "DATE": "1/1/2021", 37 | csv_col_1: 21000.0 38 | }, 39 | ]) 40 | all_gdp = all_gdp.reset_index(drop=True) 41 | 42 | # Manually estimate Industry GDP % values for future quarters (CORE for TS Predictions) 43 | all_gdp_ind_perc = all_gdp_ind_perc_csv.append([ 44 | { 45 | "DATE": "4/1/2020", 46 | csv_col_2_new: 11.0 47 | }, 48 | { 49 | "DATE": "7/1/2020", 50 | csv_col_2_new: 11.0 51 | }, 52 | { 53 | "DATE": "10/1/2020", 54 | csv_col_2_new: 11.0 55 | }, 56 | { 57 | "DATE": "1/1/2021", 58 | csv_col_2_new: 11.0 59 | }, 60 | ]) 61 | # Convert to % 62 | all_gdp_ind_perc[csv_col_2_new] = all_gdp_ind_perc[csv_col_2_new] / 100.0 63 | all_gdp_ind_perc = all_gdp_ind_perc.reset_index(drop=True) 64 | all_gdp_ind_perc.head() 65 | 66 | # Calculate Industry GDP 67 | all_gdp[col_final] = all_gdp[csv_col_1] * all_gdp_ind_perc[csv_col_2_new] 68 | 69 | # Resample to weekly GDP data 70 | all_gdp["DATE"] = pd.to_datetime(all_gdp["DATE"], format="%m/%d/%Y") 71 | all_gdp_weekly = all_gdp.copy() 72 | all_gdp_weekly = all_gdp_weekly.drop(columns=csv_col_1) 73 | all_gdp_weekly = all_gdp_weekly.set_index("DATE").resample( 74 | "W").ffill().reset_index() 75 | all_gdp_weekly[col_final] = all_gdp_weekly[col_final] * 4 / 52 76 | # Plot resampled external data 77 | plot_utils.plot_gdp(all_gdp_weekly, col_final) 78 | 79 | # Smoothen the weekly GDP data 80 | ext_data = all_gdp_weekly.copy() 81 | ext_data[col_final] = ext_data.iloc[:, 1].rolling(window=12, 82 | center=False, 83 | min_periods=1).mean() 84 | # Plot final external data 85 | plot_utils.plot_gdp(ext_data, col_final) 86 | return ext_data 87 | -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | 7 | import pmdarima 8 | import statsmodels.api as sm 9 | from sklearn import linear_model, metrics 10 | from statsmodels.tsa.statespace import sarimax 11 | from . import utils, plot_utils 12 | 13 | 14 | def run_predictions_model(df, 15 | ext_data, 16 | y_col_name, 17 | exo_col_name, 18 | val_size_perc=0.2, 19 | to_adjust_years=False): 20 | """Run Predictions Model for Train df 21 | 22 | Parameters 23 | ---------- 24 | df : DataFrame 25 | Train set Dataframe containing the Y values of the Time Series to predict 26 | ext_data : DataFrame 27 | External data to use as Regressor to model and predict the TS Trend 28 | val_size_perc : Float 29 | Part of the df to use for Validation. 30 | Format: [0.0;1.0] 31 | to_adjust_years : Boolean 32 | True if baseline level of the TS has changed during its timeframe and should be adjusted 33 | By default False 34 | y_col_name : String 35 | Column name of the TS values column (Y column). 36 | exo_col_name : String 37 | Column name of the External Regressor values column. 38 | 39 | Returns 40 | ------- 41 | DataFrame 42 | Output DataFrame with the n-step Predictions for the TS (Predict the n future Y values). 43 | n is set as the minimum between the number of future values from the External data and the predicted Residuals 44 | """ 45 | 46 | # Reindex and create Train Df 47 | df = df.reset_index(drop=True) 48 | train_df = df.copy() 49 | print('train df head looks like: \n', train_df.head()) 50 | 51 | # Clean data: fill empty weeks with 0 value 52 | train_df = utils.fill_empty_dates(train_df) 53 | 54 | # Cap the high outliers to a max value 55 | train_df = utils.cap_outliers( 56 | train_df, 57 | max_outlier_value=1000) # PARAM - max_outlier_value: Max value 58 | 59 | # Adjust past data if baseline changed at date change_date 60 | if to_adjust_years: 61 | train_df = utils.adjust_baseline(train_df, 62 | change_date='YYYY-MM-DD', 63 | end_date='YYYY-MM-DD') 64 | # PARAM - change_date: date at which baseline level changed, end_date: end date of new baseline level 65 | 66 | # Plot preprocessed Train Df 67 | plot_utils.plot_clean_y(df, train_df, 68 | y_max=1000 + 100) #PARAM - y axis max value 69 | 70 | #### MODEL: Y = Trend + Seasonality + Residuals 71 | 72 | ### Trend: Calculate, Model and Predict future values 73 | trend_col_name = 'Trend' # PARAM - Trend column name 74 | train_df[trend_col_name] = utils.calculate_trend( 75 | train_df, 76 | ts_seasonality= 77 | 52, # PARAM - Seasonality timeframe e.g. 52 if weekly data with annual seasonality. 7 if daily TS with weekly seasonality 78 | center=False) 79 | # Plot Y and Trend 80 | plot_utils.plot_y_trend(train_df, 81 | train_df[trend_col_name], 82 | y_min=0, 83 | y_max=100) 84 | 85 | # Use External data/GDP to fit and predict the Trend 86 | print('train df shape is ', 87 | train_df.dropna().shape, ', adding the external data into the df...') 88 | train_df = utils.combine_ext_data(train_df, ext_data, days_to_shift=None) 89 | 90 | # Define X=GDP and Y=Trend for Regression model 91 | exo_pretty_name = "Regressor" # PARAM - External Data/GDP column 92 | X, Y = utils.get_trend_and_exo_for_fit(train_df, exo_col_name, 93 | trend_col_name, val_size_perc) 94 | # Plot Y, Trend and Exo Regr 95 | plot_utils.plot_y_trend_ext(train_df, 96 | Y, 97 | exo_col_name, 98 | exo_pretty_name, 99 | y_min=0, 100 | y_max=1100, 101 | y_min_exo=100, 102 | y_max_exo=200) 103 | 104 | # Fit Regression of Y=Trend on X=Exogenous Regressor 105 | reg = linear_model.LinearRegression().fit(X, Y) 106 | # Predict future Trend with the fitted Regression 107 | trend_pred_col_name = "Predicted Trend" 108 | X_F, train_df = predict_trend(train_df, reg, exo_col_name, 109 | trend_pred_col_name) 110 | # Plot Trend, External data/GDP and Predicted Trend 111 | plot_utils.plot_y_pred_trend_ext(train_df, 112 | exo_col_name, 113 | X, 114 | Y, 115 | X_F, 116 | y_min=0, 117 | y_max=1100, 118 | y_min_exo=100, 119 | y_max_exo=200) 120 | print('End of Trend part, df is \n', train_df.head()) 121 | 122 | ### Seasonality: Calculate S for each date of the seasonality window 123 | 124 | # Calculate Y - Trend 125 | train_df["Y - Trend"] = train_df[y_col_name] - train_df[trend_col_name] 126 | 127 | # Calculate Seasonality by moving avg on Y - T 128 | s = train_df["Y - Trend"].rolling( 129 | window=10, 130 | center=True).mean() # PARAM - window: Moving avg window to smoothen S 131 | # Avg across periods to obtain 1 S value per date of a period 132 | s = s.groupby(s.index.week).mean() 133 | 134 | # Add Seasonality to Df 135 | seasonality_col_name = "Seasonality" # PARAM - S column name 136 | train_df[seasonality_col_name] = np.nan 137 | for i in train_df.index: 138 | train_df.loc[i][seasonality_col_name] = s[i.week] 139 | # (Optional) Fix border dates with Null values 140 | # seas_period_days = 52 * 7 # PARAM - seasonsality period in days 141 | # train_df = utils.fill_seasonality(train_df, seas_period_days, 142 | # seasonality_col_name) 143 | 144 | # Plot Y, T and S 145 | plot_utils.plot_y_t_s_with_pred(train_df, trend_col_name, 146 | seasonality_col_name, trend_pred_col_name) 147 | 148 | ### Residuals: Calculate, Model and Predict future values 149 | 150 | # Calculate R = Y - Trend - Season 151 | train_df["Y - T - S"] = train_df[y_col_name] - train_df[ 152 | trend_col_name] - train_df[seasonality_col_name] 153 | # Create R df 154 | r_col_name = "Y - T - S" # PARAM - R column name 155 | r = train_df[r_col_name] 156 | # Plot R 157 | plot_utils.plot_r(train_df, r_col_name) 158 | # R shape 159 | print('R df shape is ', r.dropna().shape) 160 | # Stationarity test 161 | res = sm.tsa.adfuller(r.dropna(), regression="c") 162 | print("adf test p-value is:{}".format(res[1])) 163 | # Verify that p value is low 164 | # ACF PACF on R 165 | plot_utils.plot_acf_pacf_r(r, lags=25) # PARAM - # lags for acf pacf 166 | # Deduce ARMA(p,q) model for R 167 | 168 | # Create R df for R Model 169 | columns_to_drop = [y_col_name, exo_col_name] 170 | col_to_rename = {"index": "Date"} 171 | r_df = create_r_df(train_df, columns_to_drop, col_to_rename) 172 | 173 | # Fit ARIMA Model on R for R predictions 174 | p, d, q = 3, 0, 3 # PARAM - p for AR, d for I, q for MA. 175 | P, D, Q, s = None, None, None, None # If seasonality use P,D,Q,s, if not set to None. 176 | n_pred = 5 # n_pred is # future points to forecast 177 | model = None # (Optional) model - to input an existing loaded model 178 | exo = None # (Optional) exo - to input exogenous regressors 179 | r_df = r_df.dropna() 180 | model_r, results_df_r = get_results_with_val(r_df, exo, p, d, q, P, D, Q, s, 181 | model, r_col_name, 182 | val_size_perc, n_pred) 183 | # Add Predicted R to df 184 | r_col_name = "Predicted R" # PARAM - R column name for df 185 | class_col_name = "Classification" # PARAM - classification col name (train/test/forecast) 186 | train_df = add_r(train_df, results_df_r, r_col_name, class_col_name) 187 | 188 | ### Calculate Total Y Prediction = Predicted T + S + Predicted R 189 | 190 | y_pred_col_name = "Y Prediction" # PARAM - y pred column names 191 | train_df = calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name, 192 | seasonality_col_name, class_col_name) 193 | print('End of df with predictions is \n', train_df.tail(n=20)) 194 | 195 | # Plot and show Final Df with predictions 196 | plot_utils.plot_final(train_df, trend_col_name, seasonality_col_name, 197 | r_col_name, trend_pred_col_name, y_pred_col_name, 198 | class_col_name) 199 | 200 | # Return Final Df with Y predictions 201 | return train_df 202 | 203 | 204 | def get_results_with_val(df, 205 | exo, 206 | p, 207 | d, 208 | q, 209 | P, 210 | D, 211 | Q, 212 | s, 213 | model, 214 | y_col_name, 215 | val_size_perc, 216 | n_predictions=5): 217 | """Fit SARIMAX on input df (optional input and future exo regr) and predict validation + future values 218 | Or use param fitted model (optional input and future exo regr) to predict validation + future values 219 | Plot input and output (val+future) predictions 220 | 221 | Parameters 222 | ---------- 223 | df : DataFrame 224 | R Time Series 225 | exo : DataFrame, optional 226 | Exogenous Regressors to model Y 227 | p : int 228 | AR parameter for the SARIMAX on Y 229 | d : int 230 | Integrated parameter for the SARIMAX on Y 231 | q : int 232 | MA parameter for the SARIMAX on Y 233 | P : int 234 | Seasonal AR parameter for the SARIMAX on Y 235 | D : int 236 | Seasonal Integrated parameter for the SARIMAX on Y 237 | Q : int 238 | Seasonal MA parameter for the SARIMAX on Y 239 | s : int 240 | Seasonality timeframe for Y 241 | model : SARIMAX Fitted model, optional 242 | Pre-fitted SARIMAX model to use to predict Y values 243 | y_col_name : String 244 | Column name of Y values 245 | val_size_perc : Float 246 | Part of the df to use for Validation. 247 | Format: [0.0;1.0] 248 | n_predictions : int, optional 249 | Number of future values to predict for Y, by default 5 250 | 251 | Returns 252 | ------- 253 | smodel: json 254 | Fitted SARIMAX model on Y 255 | results: DataFrame 256 | DataFrame including the train, validation and forecast values from the SARIMAX fitted model on Y Time Series 257 | """ 258 | 259 | X = df[y_col_name].values 260 | Y = df["Date"].values 261 | train_size = int(len(X) * (1 - val_size_perc)) 262 | train, test = X[:train_size], X[train_size:len(X)] 263 | week = Y[train_size:len(X)] 264 | exo_past, exo_future = None, None 265 | 266 | # Split Exo Regressor into past (train + val) and future (forecast) values 267 | if exo is not None: 268 | exo_past, exo_future = exo[:len(X)], exo[len(X):len(exo)] 269 | 270 | # Create SARIMAX model or use input model 271 | print("Checking model for fit...") 272 | if model is None: 273 | print("No input model, starting to fit SARIMAX" + str(p) + str(d) + 274 | str(q) + str(P) + str(D) + str(Q) + str(s)) 275 | smodel = pmdarima.arima.ARIMA(order=[p, d, q], 276 | method="lbfgs", 277 | maxiter=50, 278 | suppress_warnings=True) 279 | smodel = smodel.fit(df[y_col_name].values, exo_past) 280 | print("Finished SARIMAX fit.") 281 | else: 282 | print("Existing input model, will use it") 283 | smodel = model 284 | 285 | # Test model on the Validation set 286 | history = [x for x in train] 287 | predictions = list() 288 | for t in range(len(test)): 289 | model = sarimax.SARIMAX(history, 290 | order=smodel.order, 291 | seasonal_order=smodel.seasonal_order, 292 | enforce_stationarity=False) 293 | model_fit = model.fit(disp=0) 294 | output = model_fit.forecast() 295 | if output[0] < 0: 296 | yhat = 0 297 | else: 298 | yhat = output[0] 299 | predictions.append(yhat) 300 | obs = test[t] 301 | history.append(obs) 302 | print("predicted=%f, expected=%f" % (yhat, obs)) 303 | error = metrics.mean_squared_error(test, predictions) 304 | print("Test MSE: %.3f" % error) 305 | 306 | # Add Train set to output 307 | data = pd.DataFrame() 308 | data["Date"] = Y[0:train_size] 309 | data["Predicted Net Order Value"] = None 310 | data["Actual Net Order Value"] = X[0:train_size] 311 | data["Classification"] = "train" 312 | 313 | # Add Validation set to output 314 | Tested = pd.DataFrame() 315 | Tested["Date"] = week 316 | Tested["Predicted Net Order Value"] = predictions 317 | Tested["Actual Net Order Value"] = test 318 | Tested["Classification"] = "test" 319 | Tested["Predicted Net Order Value"] = Tested[ 320 | "Predicted Net Order Value"].astype(float) 321 | Tested["Date"] = pd.to_datetime(Tested["Date"]) 322 | 323 | # Add Forecast set to output 324 | print("Predicting forecast values...") 325 | n_periods = n_predictions 326 | fitted, confint = smodel.predict(n_periods=n_periods, 327 | return_conf_int=True, 328 | exogenous=exo_future) 329 | print("Finished predicting forecast values.") 330 | rng = pd.date_range(df["Date"].max(), periods=n_periods, freq="7D") 331 | forecast = pd.DataFrame({ 332 | "Date": rng, 333 | "Predicted Net Order Value": fitted, 334 | "Actual Net Order Value": None, 335 | "Classification": "forecast", 336 | "Conf_lower": confint[:, 0], 337 | "Conf_Upper": confint[:, 1], 338 | }) 339 | forecast = forecast.drop(forecast.index[0]) 340 | 341 | # Combine all sets 342 | results = data.append(Tested, ignore_index=True) 343 | results = results.append(forecast, ignore_index=True) 344 | results["Date"] = pd.to_datetime(results["Date"]) 345 | # Reformat Dates to Date type 346 | results["Date"] = pd.to_datetime(results["Date"]) 347 | return smodel, results 348 | 349 | 350 | def predict_trend(train_df, reg, exo_col_name, pred_trend_col_name): 351 | """Trend Regression to predict future Trend""" 352 | # Get Regressor on prediction timeframe 353 | X_F = train_df[exo_col_name].dropna().values.reshape(-1, 1) 354 | print(X_F.shape) 355 | print(reg.predict(X_F).shape) 356 | # Predict Trend using fitted Regression on Regressor 357 | t_pred = reg.predict(X_F) 358 | len_pred = t_pred.shape[0] 359 | train_df["Predicted Trend"] = np.nan 360 | train_df["Predicted Trend"][-len_pred:] = t_pred.ravel() 361 | return X_F, train_df 362 | 363 | 364 | def create_r_df(train_df, columns_to_drop, col_to_rename): 365 | """Create Residuals DataFrame""" 366 | r_df = train_df.copy() 367 | r_df = r_df.drop(columns=columns_to_drop) 368 | r_df = r_df.reset_index() 369 | r_df = r_df.rename(columns=col_to_rename) 370 | return r_df 371 | 372 | 373 | def add_r(train_df, results_df_r, r_col_name, class_col_name): 374 | """Add Residuals (Train, Val and Forecast) to the Input Df""" 375 | results_df_r_idx = results_df_r.set_index("Date") 376 | train_df[r_col_name] = np.nan 377 | train_df[r_col_name] = results_df_r_idx["Predicted Net Order Value"] 378 | train_df[class_col_name] = results_df_r_idx[class_col_name] 379 | return train_df 380 | 381 | 382 | def calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name, 383 | seasonality_col_name, class_col_name): 384 | """Calculate Predicted Y with Predicted T, S and Predicted R components, on Validation and Forecast sets""" 385 | train_df[y_pred_col_name] = np.nan 386 | # Validation Y values 387 | mask = train_df[class_col_name] == "test" 388 | train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] + 389 | train_df[seasonality_col_name] + 390 | train_df["Predicted R"]) 391 | # Future Y values 392 | mask = train_df[class_col_name] == "forecast" 393 | train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] + 394 | train_df[seasonality_col_name] + 395 | train_df["Predicted R"]) 396 | return train_df 397 | -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from statsmodels.graphics import tsaplots 3 | 4 | 5 | def plot_clean_y(df, train_df, y_max): 6 | """Plot Pre-processed Y""" 7 | fig, ax = plt.subplots(figsize=(20, 10)) 8 | plt.plot(df["Date"], df["Net Order Value"], c="c", label="Y Original") 9 | plt.plot(train_df["Date"], train_df["Net Order Value"], c="b", label="Y") 10 | plt.legend(loc="upper right") 11 | plt.axis([min(train_df["Date"]), max(train_df["Date"]), 0, y_max]) 12 | plt.show() 13 | 14 | 15 | def plot_gdp(ext_data, col_final): 16 | """Plot resulting Industry GDP""" 17 | fig, ax = plt.subplots(figsize=(20, 10)) 18 | plt.plot(ext_data["DATE"], ext_data[col_final], c="b") 19 | plt.show() 20 | 21 | 22 | def plot_y_trend(train_df, t, y_min, y_max): 23 | """Plot Y and Trend""" 24 | fig, ax = plt.subplots(figsize=(20, 10)) 25 | plt.plot(train_df["Date"], t, color="b", label="Trend") 26 | plt.plot(train_df["Date"], 27 | train_df["Net Order Value"], 28 | color="g", 29 | label="Y") 30 | plt.legend(loc="upper right") 31 | ax.set_ylim([y_min, y_max]) 32 | plt.show() 33 | 34 | 35 | def plot_y_trend_ext(train_df, Y, exo_col_name, exo_pretty_name, y_min, y_max, 36 | y_min_exo, y_max_exo): 37 | """Plot Y, Trend and Exo Regressors""" 38 | fig, ax = plt.subplots(figsize=(20, 10)) 39 | ax2 = ax.twinx() 40 | # Net Order Value 41 | ax.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y") 42 | # External data/GDP 43 | ax2.plot(train_df.index, 44 | train_df[exo_col_name], 45 | color="c", 46 | label=exo_pretty_name) 47 | # Trend 48 | ax.plot(train_df.dropna().index[:len(Y)], Y, color="b", label="Trend") 49 | plt.legend(loc="upper right") 50 | ax.set_ylim([y_min, y_max]) 51 | ax2.set_ylim([y_min_exo, y_max_exo]) 52 | plt.show() 53 | 54 | 55 | def plot_y_pred_trend_ext(train_df, exo_col_name, X, Y, X_F, y_min, y_max, 56 | y_min_exo, y_max_exo): 57 | """Plot Predicted Y, Trend and Exo Regressors""" 58 | fig, ax = plt.subplots(figsize=(20, 10)) 59 | ax2 = ax.twinx() 60 | # External Data/GDP 61 | ax2.plot(train_df[exo_col_name].dropna().index, 62 | train_df[exo_col_name].dropna(), 63 | color="m", 64 | label="External data (Full)") 65 | ax2.plot(train_df.dropna().index[:len(X)], 66 | X, 67 | color="c", 68 | label="External data (Train for Trend fit)") 69 | # Trend 70 | ax.plot(train_df.dropna().index[:len(Y)], 71 | Y, 72 | color="b", 73 | label="Trend (Train for Trend fit)") 74 | # Predicted Trend (through Reg) 75 | ax.plot(train_df[exo_col_name].dropna().index, 76 | train_df["Predicted Trend"][-len(X_F):], 77 | color="g", 78 | label="Trend (Predicted)") 79 | ax.legend(loc="upper right") 80 | ax2.legend(loc="lower right") 81 | ax.set_ylim([y_min, y_max]) 82 | ax2.set_ylim([y_min_exo, y_max_exo]) 83 | plt.show() 84 | 85 | 86 | def plot_y_t_s(train_df, trend_col_name, seasonality_col_name): 87 | """Plot Y, T and S""" 88 | fig, ax = plt.subplots(figsize=(20, 10)) 89 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y") 90 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T") 91 | plt.plot(train_df.index, 92 | train_df[trend_col_name] + train_df[seasonality_col_name], 93 | color="m", 94 | label="T+S") 95 | plt.legend(loc="upper right") 96 | plt.show() 97 | 98 | 99 | def plot_y_t_s_with_pred(train_df, trend_col_name, seasonality_col_name, 100 | pred_trend_col_name): 101 | """Plot Y, T, S and Predicted T + S""" 102 | fig, ax = plt.subplots(figsize=(20, 10)) 103 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y") 104 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T") 105 | plt.plot(train_df.index, 106 | train_df[pred_trend_col_name], 107 | color="c", 108 | label="T Pred") 109 | 110 | plt.plot(train_df.index, 111 | train_df[trend_col_name] + train_df[seasonality_col_name], 112 | color="m", 113 | label="T+S") 114 | plt.plot(train_df.index, 115 | train_df[pred_trend_col_name] + train_df[seasonality_col_name], 116 | color="r", 117 | label="T Pred + S") 118 | plt.legend(loc="upper right") 119 | plt.show() 120 | 121 | 122 | def plot_r(train_df, r_col_name): 123 | """Plot Residuals""" 124 | fig, ax = plt.subplots(figsize=(20, 10)) 125 | plt.plot(train_df.index, train_df[r_col_name], color="y", label="R") 126 | plt.legend(loc="upper right") 127 | plt.show() 128 | 129 | 130 | def plot_acf_pacf_r(r, lags): 131 | """Plot ACF and PACF plots for Residuals""" 132 | fig, ax = plt.subplots(2, 1, figsize=(20, 10)) 133 | fig = tsaplots.plot_acf(r.dropna(), lags=lags, ax=ax[0]) 134 | fig = tsaplots.plot_pacf(r.dropna(), lags=lags, ax=ax[1]) 135 | plt.show() 136 | 137 | 138 | def plot_results(results): 139 | """Plot all Residual sets (train, Val and Forecast)""" 140 | fig, ax = plt.subplots(figsize=(20, 10)) 141 | results["Date"] = results["Date"].astype(str) 142 | plt.plot(results["Date"], results["Predicted Net Order Value"], c="b") 143 | plt.plot(results["Date"], results["Actual Net Order Value"], c="r") 144 | plt.fill_between(results["Date"], 145 | results["Conf_lower"], 146 | results["Conf_Upper"], 147 | color="k", 148 | alpha=0.15) 149 | for i, tick in enumerate(ax.get_xticklabels()): 150 | tick.set_rotation(45) 151 | tick.set_visible(False) 152 | if i % 3 == 0: 153 | tick.set_visible(True) 154 | plt.show() 155 | 156 | 157 | def plot_final(train_df, trend_col_name, seasonality_col_name, r_col_name, 158 | trend_pred_col_name, y_pred_col_name, class_col_name): 159 | """Plot Y, T, S, R and Predicted Y with intermediary components""" 160 | fig, ax = plt.subplots(figsize=(20, 10)) 161 | plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y") 162 | plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T") 163 | plt.plot(train_df.index, 164 | train_df[trend_col_name] + train_df[seasonality_col_name], 165 | color="m", 166 | label="T+S") 167 | # Seasonality 168 | plt.plot(train_df.index, 169 | train_df[seasonality_col_name], 170 | color="m", 171 | label="S") 172 | # Predicted Trend 173 | plt.plot(train_df.index, 174 | train_df[trend_pred_col_name], 175 | color="y", 176 | label="T Pred") 177 | plt.plot(train_df.index, 178 | train_df[trend_pred_col_name] + train_df[seasonality_col_name], 179 | color="k", 180 | label="T Pred + S") 181 | # Predicted Y on Validation part 182 | plt.plot( 183 | train_df[train_df[class_col_name] == "test"].index, 184 | train_df[train_df[class_col_name] == "test"][y_pred_col_name], 185 | color="c", 186 | label="Y Pred (val)", 187 | ) 188 | # Predicted Y on Future part 189 | plt.plot( 190 | train_df[train_df[class_col_name] == "forecast"].index, 191 | train_df[train_df[class_col_name] == "forecast"][y_pred_col_name], 192 | color="r", 193 | label="Y Pred (future)", 194 | ) 195 | plt.legend(loc="upper right") 196 | plt.show() 197 | -------------------------------------------------------------------------------- /pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/utils.py: -------------------------------------------------------------------------------- 1 | from pycelonis import pql 2 | 3 | import datetime 4 | import isoweek 5 | import pandas as pd 6 | 7 | ## Loading Data 8 | 9 | 10 | def get_pql_dataframe(dm, input_columns, input_filter): 11 | """Query input columns with filters from input DM""" 12 | query = pql.PQL() 13 | for col_name, col_pretty_name in input_columns: 14 | query += pql.PQLColumn(col_name, col_pretty_name) 15 | if input_filter != '': 16 | query += pql.PQLFilter(input_filter) 17 | queried_df = dm.get_data_frame(query) 18 | return queried_df 19 | 20 | 21 | def get_subset_df(train_df, subset, subset_col_name): 22 | """Filter df for subset""" 23 | subset_train_df = train_df[train_df[subset_col_name] == subset] 24 | subset_train_df.drop(columns=[subset_col_name], inplace=True) 25 | return subset_train_df 26 | 27 | 28 | ## Pre-processing 29 | 30 | 31 | def fill_empty_dates(df): 32 | """Fill empty weeks of date Df""" 33 | my_date = datetime.datetime.now() 34 | year, week_num, day_of_week = my_date.isocalendar() 35 | d = isoweek.Week(year, week_num - 1).monday() 36 | rng = pd.date_range(df["Date"].min(), d, freq="7D") 37 | df = df.set_index("Date").reindex(rng, fill_value=0).reset_index() 38 | df.rename(columns={"index": "Date"}, inplace=True) 39 | return df 40 | 41 | 42 | def cap_outliers(df, max_outlier_value): 43 | """Clean outliers""" 44 | df.loc[df["Net Order Value"] > max_outlier_value, 45 | "Net Order Value"] = max_outlier_value 46 | return df 47 | 48 | 49 | def adjust_baseline(df, change_date, end_date): 50 | """Calculate baseline avg difference between TS before change_date vs TS between change_date and end_date""" 51 | diff_high_low = ( 52 | df.loc[(change_date < df["Date"]) & 53 | (df["Date"] <= end_date), "Net Order Value"].mean() - 54 | df.loc[df["Date"] <= change_date, "Net Order Value"].mean()) 55 | # Adjust lower baseline with the above avg difference 56 | df.loc[df["Date"] <= change_date, "Net Order Value"] += diff_high_low 57 | return df 58 | 59 | 60 | ## Model utils 61 | 62 | 63 | def calculate_trend(df, ts_seasonality, center=False): 64 | """Calculate Trend""" 65 | t = df.iloc[:, 1].rolling(window=ts_seasonality, center=center).mean() 66 | return t 67 | 68 | 69 | def combine_ext_data(train_df, ext_data, days_to_shift=None): 70 | """Combine External/GDP data with Y""" 71 | # Add Exo regressors (GDP) to train df 72 | train_df = train_df.set_index("Date") 73 | ext_data["DATE"] = pd.to_datetime(ext_data["DATE"]) 74 | ext_data = ext_data.set_index("DATE") 75 | # Optional - Align dates of Industry GDP with Trend 76 | if days_to_shift is not None: 77 | ext_data = ext_data.shift(days_to_shift, freq="D") 78 | # Combine Train Df with GDP 79 | train_df = train_df.combine_first(ext_data) 80 | return train_df 81 | 82 | 83 | def get_trend_and_exo_for_fit(train_df, exo_col_name, trend_col_name, 84 | val_size_perc): 85 | """Create subsets for Trend Fit""" 86 | # Create X set (Exo Regressor) 87 | X = train_df.dropna()[exo_col_name].values 88 | train_size = int(len(X) * (1 - val_size_perc)) 89 | X_train = X[:train_size].reshape(-1, 1) 90 | # Create Y set (Trend to fit) 91 | Y_train = train_df.dropna()[trend_col_name].values[:train_size].reshape( 92 | -1, 1) 93 | return X_train, Y_train 94 | 95 | 96 | def fill_seasonality(train_df, 97 | seas_period_days, 98 | seasonality_col_name='Seasonality'): 99 | """Fill empty seasonality dates""" 100 | delta = datetime.timedelta(days=-seas_period_days) 101 | for i in train_df[train_df[seasonality_col_name].isnull() == True].index: 102 | print(i, i + delta) 103 | train_df.loc[i][seasonality_col_name] = train_df.loc[ 104 | i + delta][seasonality_col_name] 105 | return train_df 106 | 107 | 108 | ## Exports 109 | 110 | 111 | def prepare_export_df(train_df, output_col_names, y_pred_col_name): 112 | """Reformat results for Export to DM""" 113 | print(output_col_names) 114 | cols_to_load = list(output_col_names) 115 | cols_to_load.remove('index') 116 | print(cols_to_load) 117 | export_df = pd.DataFrame(train_df[cols_to_load]) 118 | export_df.reset_index(inplace=True) 119 | export_df.rename(columns=output_col_names, inplace=True) 120 | return export_df 121 | 122 | 123 | def constitute_export_df(all_subset_exports, subset_col_name): 124 | """Create export-version Df from the export-version of subsets""" 125 | export_df = pd.DataFrame() 126 | for key in all_subset_exports: 127 | subset_df = all_subset_exports[key] 128 | subset_df[subset_col_name] = key 129 | export_df = pd.concat([export_df, subset_df], axis=0) 130 | return export_df -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/02a_Extraction_Mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extraction Mover" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**This tutorial shows how to copy a simple extraction from one team/ data job to another one, independent of the cluster.**\n", 15 | "\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "**To do so we first need to connect to the source data model.**" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import pycelonis\n", 32 | "from pycelonis import get_celonis\n", 33 | "\n", 34 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n", 35 | "# All IDs required can be found within the URLs when displaying the related objects in the EMS.\n", 36 | "source_data_pool = c_source.pools.find('Name or ID of the source data pool.')\n", 37 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")\n", 38 | "source_extraction = source_data_job.extractions.ids['ID of the source extraction task.']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Print source variables\n", 48 | "print(source_data_pool)\n", 49 | "print(source_data_job)\n", 50 | "print(source_extraction)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "**In the next step we connect to the target data pool.**" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n", 67 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n", 68 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# Print target variables\n", 78 | "print(target_data_pool)\n", 79 | "print(target_data_job)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "**Create target extraction.**" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# Print source extraction name and type\n", 96 | "print(source_extraction.name)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "target_extraction = target_data_job.create_extraction(source_extraction.name)\n", 106 | "print(target_extraction.name)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "global_vars = {}\n", 123 | "for source_var in source_data_pool.variables: # loop through global parameters of source data pool\n", 124 | " global_vars.update({source_var.id: None}) # save ID of source global parameter\n", 125 | " for target_var in target_data_pool.variables: # loop through the global parameters of target data pool\n", 126 | " if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper(): # if the placeholder of a source global parameter and a target global parameter match\n", 127 | " global_vars.update({source_var.id: target_var.id}) # match the saved ID of source global parameter wih the target global parameter ID\n", 128 | "print(\"Global parameter configurations saved.\")" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "**This section serves to create the extraction parameters.**" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "for source_ex_para in source_extraction.variables: # loop through the source extraction parameters\n", 145 | " if source_ex_para['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n", 146 | " if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n", 147 | " target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId']) \n", 148 | " if target_id is None:\n", 149 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 150 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 151 | " target_id = para.id\n", 152 | " global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n", 153 | " source_ex_para['defaultSettings']['poolVariableId'] = target_id\n", 154 | " if source_ex_para['settings'] is not None: # create the connection for the value to the target global parameters\n", 155 | " if source_ex_para['settings']['poolVariableId'] is not None:\n", 156 | " target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n", 157 | " if target_id is None:\n", 158 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 159 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 160 | " target_id = para.id\n", 161 | " global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n", 162 | " source_ex_para['settings']['poolVariableId'] = target_id\n", 163 | " \n", 164 | " target_extraction.create_extraction_parameter(source_ex_para) # create the target transformation parameter\n", 165 | " print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "**Add tables in target extraction and overwrite parameter connections for time filters.**" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "for table in source_extraction.tables.data:\n", 182 | " \n", 183 | " if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None:\n", 184 | " #None or table.data['changeDateOffsetParameter'] is not None:\n", 185 | " \n", 186 | " temp = table.data.copy()\n", 187 | " \n", 188 | " for para in source_extraction.variables:\n", 189 | " if para['id'] == table.data['creationDateParameterStart']:\n", 190 | " placeholder_start = para['placeholder']\n", 191 | "# if para['id'] == table.data['creationDateParameterEnd']:\n", 192 | "# placeholder_end = para['placeholder']\n", 193 | "# if para['id'] == table.data['changeDateOffsetParameter']:\n", 194 | "# placeholder_change = para['placeholder']\n", 195 | " \n", 196 | " for para in target_extraction.variables:\n", 197 | " if para['placeholder'] == placeholder_start:\n", 198 | " temp['creationDateParameterStart'] = para['id']\n", 199 | " # if para['placeholder'] == placeholder_end:\n", 200 | " # temp['creationDateParameterEnd'] = para['id']\n", 201 | " # if para['placeholder'] == placeholder_change:\n", 202 | " # temp['changeDateOffsetParameter'] = para['id']\n", 203 | " \n", 204 | " table=temp \n", 205 | " \n", 206 | " try:\n", 207 | " target_extraction.add_table(table=table)\n", 208 | " except:\n", 209 | " print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n", 210 | "\n", 211 | "print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\")" 212 | ] 213 | } 214 | ], 215 | "metadata": { 216 | "jupytext": { 217 | "formats": "ipynb,py:percent" 218 | }, 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.8.8" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 4 239 | } 240 | -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/02b_Transformation_Mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Transformation (DataJob) Mover" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**This tutorial shows how to copy transformations from one team/ data pool to another one, independent of the cluster.**\n", 15 | "\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "**To do so we first need to connect to the source data job.**" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "from pycelonis import get_celonis\n", 32 | "\n", 33 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n", 34 | "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n", 35 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "**In the next step we connect to the target data job.**" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n", 52 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n", 53 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "global_vars = {}\n", 70 | "for source_var in source_data_pool.variables: # loop through global parameters of source data pool\n", 71 | " global_vars.update({source_var.id: None}) # save ID of source global parameter\n", 72 | " for target_var in target_data_pool.variables: # loop through the global parameters of target data pool\n", 73 | " if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper(): # if the placeholder of a source global parameter and a target global parameter match\n", 74 | " global_vars.update({source_var.id: target_var.id}) # match the saved ID of source global parameter wih the target global parameter ID\n", 75 | "print(\"Global parameter configurations saved.\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "**This section serves to create the target transformation, the related transformation parameters and to copy over the template settings.**" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "for source_transformation in source_data_job.transformations.data: # loop through source transformations\n", 92 | " \n", 93 | " if source_transformation.statement is None: # if the source transformation is empty, it will not be created\n", 94 | " continue\n", 95 | " \n", 96 | " # copy transformation from source to target data job:\n", 97 | " target_transformation = target_data_job.create_transformation(name=source_transformation.name, description=source_transformation.data['description'], statement=source_transformation.statement)\n", 98 | " print(\"Transformation: '\" + target_transformation.name + \"' created.\")\n", 99 | " \n", 100 | " for source_local_var in source_transformation.variables: # loop through the source transformation parameters\n", 101 | " if source_local_var['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n", 102 | " if source_local_var['defaultSettings']['poolVariableId'] is not None: \n", 103 | " target_id = global_vars.get(source_local_var['defaultSettings']['poolVariableId'])\n", 104 | " if target_id is None:\n", 105 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 106 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 107 | " target_id = para.id\n", 108 | " global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n", 109 | " source_local_var['defaultSettings']['poolVariableId'] = target_id\n", 110 | " if source_local_var['settings'] is not None: # create the connection for the value to the target global parameters \n", 111 | " if source_local_var['settings']['poolVariableId'] is not None:\n", 112 | " target_id = global_vars.get(source_local_var['settings']['poolVariableId'])\n", 113 | " if target_id is None:\n", 114 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 115 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 116 | " target_id = para.id\n", 117 | " global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n", 118 | " source_local_var['settings']['poolVariableId'] = target_id\n", 119 | "\n", 120 | " target_transformation.create_transformation_parameter(source_local_var) # create the target transformation parameter\n", 121 | " print(\"Parameter '\" + source_local_var['name'] + \"' created.\")\n", 122 | " \n", 123 | " if(source_transformation.data['template']): # copy template settings to target transformation\n", 124 | " target_transformation.to_template(source_transformation.data['protectionStatus'])\n", 125 | "\n", 126 | "print(\"Congrats you copied the data job \"+ source_data_job.name + \"!\")" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "jupytext": { 132 | "formats": "ipynb,py:percent" 133 | }, 134 | "kernelspec": { 135 | "display_name": "Python 3", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.8.8" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 4 154 | } 155 | -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/03_Data_Model_Mover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Model Mover" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**This tutorial shows how to copy a data model from one team/ data pool to another one, independent of the cluster.**\n", 15 | "\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "**To do so we first need to connect to the source data model.**" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 32, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "2020-02-13 15:59:50 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "from pycelonis import get_celonis\n", 40 | "\n", 41 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n", 42 | "source_data_model = c_source.datamodels.find(\"ID of the source data model. It can be copied from the URL.\") " 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "**In the next step we connect to the target data pool.**" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 39, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "2020-02-13 16:05:17 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n", 67 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n", 68 | "data_source_name = \"Name of the data connection in the target data pool the target data model should refer to. Indicate an empty string to point to the global scope.\"" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "**Create the data model and add the tables.**" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 40, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "if source_data_model.name in target_data_pool.datamodels.names:\n", 85 | " print('A data model with the same name does already exist in the target data pool. Please rename one.')\n", 86 | "else: \n", 87 | " target_data_model = target_data_pool.create_datamodel(source_data_model.name) # create target data model\n", 88 | "\n", 89 | " connection = target_data_pool.data_connections.names[data_source_name] if data_source_name else None # choose the connection for the data model \n", 90 | "\n", 91 | " target_data_model.add_tables_from_pool(source_data_model.tables, connection) # add the tables from the connection to the data model" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "**Copy Activity & Case table settings. If you copy to a data pool that does not contain the case and activity table in the specified connection, this step cannot be performed. Just continue with the next one.**" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 41, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "[,]" 110 | ] 111 | }, 112 | "execution_count": 41, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "target_data_model.create_process_configuration(\n", 119 | " activity_table=source_data_model.process_configurations[0].activity_table.data[\"name\"] if source_data_model.process_configurations[0].activity_table else None,\n", 120 | " case_table=source_data_model.process_configurations[0].case_table.data[\"name\"] if source_data_model.process_configurations[0].case_table else None,\n", 121 | " case_column=source_data_model.process_configurations[0].case_column if source_data_model.process_configurations[0].activity_table else None,\n", 122 | " activity_column=source_data_model.process_configurations[0].activity_column if source_data_model.process_configurations[0].activity_table else None,\n", 123 | " timestamp_column=source_data_model.process_configurations[0].timestamp_column if source_data_model.process_configurations[0].activity_table else None,\n", 124 | " sorting_column=source_data_model.process_configurations[0].sorting_column if source_data_model.process_configurations[0].activity_table else None)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "**Add forgein key relationships.**" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 42, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "Relationships copied.\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "for fk in source_data_model.foreign_keys:\n", 149 | " target_data_model.create_foreign_key(fk[\"source_table\"], fk[\"target_table\"], fk[\"columns\"], from_scratch=True)\n", 150 | "print('Relationships copied.')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "**Add table aliases.**" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 38, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stderr", 167 | "output_type": "stream", 168 | "text": [ 169 | "WARNING:pycelonis:More things might have changed than requested\n", 170 | "WARNING:pycelonis:More things might have changed than requested\n", 171 | "WARNING:pycelonis:More things might have changed than requested\n", 172 | "WARNING:pycelonis:More things might have changed than requested\n" 173 | ] 174 | }, 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Please ignore the warnings. Congrats you copied the data model 'SAP ECC - Accounts Payable Data Model'!\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "for t in source_data_model.tables:\n", 185 | " if t.alias == t.name and isinstance(source_data_model, pycelonis.objects_ibc.Datamodel):\n", 186 | " target_t = target_data_model.tables.find(t.data[\"name\"])\n", 187 | " try:\n", 188 | " target_t.alias = t.name\n", 189 | " except ValueError:\n", 190 | " pass\n", 191 | " if t.alias != t.name and isinstance(source_data_model, pycelonis.objects_cpm4.Datamodel):\n", 192 | " target_t = target_data_model.tables.find(t.data[\"name\"])\n", 193 | " try:\n", 194 | " target_t.alias = t.alias\n", 195 | " except ValueError:\n", 196 | " pass\n", 197 | "print(\"Please ignore the warnings. Congrats you copied the data model '\"+ target_data_model.name + \"'!\")" 198 | ] 199 | } 200 | ], 201 | "metadata": { 202 | "jupytext": { 203 | "formats": "ipynb,py:percent" 204 | }, 205 | "kernelspec": { 206 | "display_name": "Python 3", 207 | "language": "python", 208 | "name": "python3" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.8.8" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 4 225 | } 226 | -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/05_Transformation_Download_to_MLW.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Script Downloader" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**This tutorial shows how to download transformations of one data job.**\n", 15 | "\n", 16 | "**The installation of ftfy is a requirement for this script and needs to be installed once per workbench.**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 24, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Looking in indexes: https://pypi.celonis.cloud, https://pypi.org/simple\n", 29 | "Requirement already satisfied: ftfy in /home/jovyan/.local/lib/python3.7/site-packages (5.6)\n", 30 | "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.7/site-packages (from ftfy) (0.1.8)\n", 31 | "Note: you may need to restart the kernel to use updated packages.\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "pip install ftfy" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "**In the first step we need to connect to the data job.**" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 23, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from pycelonis import get_celonis\n", 53 | "import os\n", 54 | "from ftfy import fix_text #requires pip install\n", 55 | "\n", 56 | "c = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n", 57 | "data_pool = c.pools.find(\"Name or ID of the source data pool.\")\n", 58 | "data_job = data_pool.data_jobs.find(\"Name or ID of the source data job.\")" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "**In the next step we create the SQL for the respective data job.**" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "sql = ''\n", 75 | "for transformation in data_job.transformations.data:\n", 76 | " if transformation.statement is None:\n", 77 | " continue\n", 78 | " sql = sql + '--###' + transformation.name + '###--' + '\\n\\n' + transformation.statement.strip() + '\\n\\n'" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "**In this step the SQL statement is saved as file.**" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 14, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "filepath = os.path.join('', data_job.name + '.sql')\n", 95 | "with open(filepath, 'w') as f:\n", 96 | " f.write(fix_text(sql))" 97 | ] 98 | } 99 | ], 100 | "metadata": { 101 | "jupytext": { 102 | "formats": "ipynb,py:percent" 103 | }, 104 | "kernelspec": { 105 | "display_name": "Python 3", 106 | "language": "python", 107 | "name": "python3" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.7.6" 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 4 124 | } 125 | -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/11_Extraction_Unifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extraction Unifier" 8 | ] 9 | }, 10 | { 11 | "cell_type": "raw", 12 | "metadata": {}, 13 | "source": [ 14 | "Created by: e.vogt@celonis.com\n", 15 | "Uploaded on: 08.07.2020" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "**This tutorial shows how to copy multiple separate extraction jobs into one extraction.**" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "**To do so we first need to connect to the source data model.**" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "2020-04-02 08:25:56 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "from pycelonis import get_celonis\n", 47 | "\n", 48 | "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n", 49 | "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n", 50 | "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "for source_extraction in source_data_job.extractions.data: \n", 60 | " print(source_extraction.name)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "**In the next step we connect to the target data pool.**" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "2020-04-02 08:29:22 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n", 85 | "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n", 86 | "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")\n", 87 | "target_extraction = target_data_job.extractions.ids[\"Name or ID of the target data extraction.\"]" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "**Create target extraction.**" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 6, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Congrats you copied the extraction test_unified!\n", 107 | "Congrats you copied the extraction test_unified!\n", 108 | "Congrats you copied the extraction test_unified!\n", 109 | "Congrats you copied the extraction test_unified!\n", 110 | "Parameter 'Maximal Activity End Date' created.\n", 111 | "Parameter 'Maximal Activity Start Date' created.\n", 112 | "Congrats you copied the extraction test_unified!\n", 113 | "Congrats you copied the extraction test_unified!\n", 114 | "Congrats you copied the extraction test_unified!\n", 115 | "Congrats you copied the extraction test_unified!\n", 116 | "Congrats you copied the extraction test_unified!\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "for source_extraction in source_data_job.extractions.data: # loop through source extractions\n", 122 | "\n", 123 | " for source_ex_para in source_extraction.variables: # loop through the source extraction parameters\n", 124 | " if source_ex_para['defaultSettings'] is not None: # create the connection for the default value to the target global parameters\n", 125 | " if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n", 126 | " target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId']) \n", 127 | " if target_id is None:\n", 128 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 129 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 130 | " target_id = para.id\n", 131 | " global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n", 132 | " source_ex_para['defaultSettings']['poolVariableId'] = target_id\n", 133 | " if source_ex_para['settings'] is not None: # create the connection for the value to the target global parameters\n", 134 | " if source_ex_para['settings']['poolVariableId'] is not None:\n", 135 | " target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n", 136 | " if target_id is None:\n", 137 | " para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n", 138 | " print(\"Pool Parameter '\" + para.name + \"' created.\")\n", 139 | " target_id = para.id\n", 140 | " global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n", 141 | " source_ex_para['settings']['poolVariableId'] = target_id\n", 142 | "\n", 143 | " target_extraction.create_extraction_parameter(source_ex_para) # create the target transformation parameter\n", 144 | " print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")\n", 145 | " \n", 146 | " for table in source_extraction.tables.data:\n", 147 | "\n", 148 | " if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None or table.data['changeDateOffsetParameter'] is not None:\n", 149 | "\n", 150 | " temp = table.data.copy()\n", 151 | "\n", 152 | " for para in source_extraction.variables:\n", 153 | " if para['id'] == table.data['creationDateParameterStart']:\n", 154 | " placeholder_start = para['placeholder']\n", 155 | " if para['id'] == table.data['creationDateParameterEnd']:\n", 156 | " placeholder_end = para['placeholder']\n", 157 | " if para['id'] == table.data['changeDateOffsetParameter']:\n", 158 | " placeholder_change = para['placeholder']\n", 159 | "\n", 160 | " for para in target_extraction.variables:\n", 161 | " if para['placeholder'] == placeholder_start:\n", 162 | " temp['creationDateParameterStart'] = para['id']\n", 163 | " if para['placeholder'] == placeholder_end:\n", 164 | " temp['creationDateParameterEnd'] = para['id']\n", 165 | " if para['placeholder'] == placeholder_change:\n", 166 | " temp['changeDateOffsetParameter'] = para['id']\n", 167 | "\n", 168 | " table=temp \n", 169 | "\n", 170 | " try:\n", 171 | " target_extraction.add_table(table=table)\n", 172 | " except:\n", 173 | " print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n", 174 | "\n", 175 | " print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\") " 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.7.6" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 4 207 | } 208 | -------------------------------------------------------------------------------- /pycelonis1/03_Connectivity/18_EMS_Data_Consumption_Report.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4090f679", 6 | "metadata": {}, 7 | "source": [ 8 | "# Pull and analyze the APC consumption report\n", 9 | "\n", 10 | "##### Recommendation\n", 11 | "Use the recent (Oct 2021) EMS feature 'Pipeline Monitoring' to easily and flexibly analyze your APC and even your Job executions within Analyses. Documentation is here: https://help.celonis.cloud/help/display/CIBC/Custom+Data+Pipeline+Monitoring\n", 12 | "##### Purpose of this script\n", 13 | "Allows to analyze the full APC consumption report (https://TEAM.CLUSTER.celonis.cloud/integration/ui/data-consumption) within the MLW or other environments, by pulling it in python. This can serve as a complement to the EMS features as this allows export and APC aggregation by Data Pool.\n", 14 | "\n", 15 | "#### Inputs\n", 16 | "None if run from the MLWB.\n", 17 | "Token if run out of the MLW.\n", 18 | "\n", 19 | "#### Outputs\n", 20 | "Consumption report with used GB per table and data pool:\n", 21 | "* as pandas DataFrame\n", 22 | "* as CSV file\n", 23 | "\n", 24 | "#### Steps\n", 25 | "1. Import and connect\n", 26 | "2. Fetch data\n", 27 | "3. Process date and data volume\n", 28 | "4. Analyze (group by Data Pool)\n", 29 | "5. Export as CSV" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "aa8a37bb", 35 | "metadata": {}, 36 | "source": [ 37 | "## Import and connect" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "id": "5ed198a9", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import pandas as pd\n", 48 | "from pycelonis import get_celonis\n", 49 | "from datetime import datetime as dt\n", 50 | "import time" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "b1c1089b", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "c = get_celonis()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "id": "2e46ad2e", 66 | "metadata": {}, 67 | "source": [ 68 | "## Fetch data" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "e5a274cc", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def get_consumption_df(c):\n", 79 | " page = 0\n", 80 | " df=pd.DataFrame()\n", 81 | " \n", 82 | " # Iterate over pages of data consumption\n", 83 | " while True: # while true + if -> break \n", 84 | " url = f\"{c.url}/integration//api/pools/data-consumption/?limit=5000&page={page}&sort=consumptionInBytesZA\"\n", 85 | " consumption_table = c.api_request(url, message = 'None', method = 'GET', get_json = True)\n", 86 | " t_list = consumption_table[\"extendedTableConsumptionTransports\"]\n", 87 | " if len(t_list) == 0:\n", 88 | " # Reached last page: no more data\n", 89 | " break\n", 90 | " df = pd.concat([df,pd.DataFrame(t_list)])\n", 91 | " page += 1\n", 92 | " # Limit api request rate\n", 93 | " time.sleep(1)\n", 94 | " return df\n", 95 | "\n", 96 | "df_consumption_ = get_consumption_df(c)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "f35588f9", 102 | "metadata": {}, 103 | "source": [ 104 | "## Transform" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "97d49319", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "df_consumption = df_consumption_.copy()\n", 115 | "df_consumption[\"rawDataSizeGB\"] = df_consumption[\"rawDataSize\"] / (1024**3)\n", 116 | "df_consumption[\"lastUpdateDt\"] = pd.to_datetime(df_consumption[\"lastUpdate\"], unit='ms')\n", 117 | "df_consumption = df_consumption[[\"dataPoolId\", \"dataPoolName\", \"tableName\", \"rawDataSizeGB\", \"lastUpdateDt\"]]\n", 118 | "df_consumption.head() " 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "de1b6e00", 124 | "metadata": {}, 125 | "source": [ 126 | "## Analyse" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "id": "369e9c6d", 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "df_consumption_summary = df_consumption.groupby([\"dataPoolId\", \"dataPoolName\"]).agg({\"rawDataSizeGB\":sum, \"lastUpdateDt\":min}).reset_index()\n", 137 | "df_consumption_summary = df_consumption_summary.sort_values(\"rawDataSizeGB\", ascending=False)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "a108f5bb", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "df_consumption_summary[\"rawDataSizeGB\"].sum()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "2ea4a47c", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "df_consumption_summary" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "a9702873", 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "# Details per table\n", 168 | "df_consumption.sort_values(\"rawDataSizeGB\", ascending=False)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "bd112b97", 174 | "metadata": {}, 175 | "source": [ 176 | "## Export" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "29d728b3", 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "def to_csv(df, name):\n", 187 | " df.to_csv(f\"{name}_{dt.now().strftime('%Y-%m-%d_%Hh%M')}.csv\", sep=';', decimal=',')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "ad9f9cd7", 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "to_csv(df_consumption_summary, \"consumption_summary\")\n", 198 | "to_csv(df_consumption, \"consumption_details\")" 199 | ] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python 3", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.8.8" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 5 223 | } 224 | -------------------------------------------------------------------------------- /pycelonis1/04_Data_Formatting/00_Combine_csv_files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import glob\n", 11 | "\n", 12 | "path = r'D:\\Customer Data\\Testfile_join' # use your path\n", 13 | "all_files = glob.glob(path + \"/*.csv\")\n", 14 | "\n", 15 | "li = []\n", 16 | "\n", 17 | "for filename in all_files:\n", 18 | " df = pd.read_csv(filename, index_col=None, header=0)\n", 19 | " li.append(df)\n", 20 | "\n", 21 | "frame = pd.concat(li, axis=0, ignore_index=True)\n", 22 | "\n", 23 | "df.to_csv('D:\\Customer Data\\Testfile_join\\out.csv')" 24 | ] 25 | } 26 | ], 27 | "metadata": { 28 | "kernelspec": { 29 | "display_name": "Python 3", 30 | "language": "python", 31 | "name": "python3" 32 | }, 33 | "language_info": { 34 | "codemirror_mode": { 35 | "name": "ipython", 36 | "version": 3 37 | }, 38 | "file_extension": ".py", 39 | "mimetype": "text/x-python", 40 | "name": "python", 41 | "nbconvert_exporter": "python", 42 | "pygments_lexer": "ipython3", 43 | "version": "3.7.6" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 4 48 | } 49 | -------------------------------------------------------------------------------- /pycelonis1/06_Extractors/03_Datadog_log_data_extraction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Extract Datadog Log Data Into the IBC" 8 | ] 9 | }, 10 | { 11 | "cell_type": "raw", 12 | "metadata": {}, 13 | "source": [ 14 | "Documentation: https://confluence.celonis.com/pages/viewpage.action?pageId=105841328\n", 15 | "Placeholder for use case-specific info is XXXXX" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Set up Required Packages and Settings" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "from pandas.io.json import json_normalize\n", 33 | "from pycelonis import get_celonis\n", 34 | "import requests\n", 35 | "from time import sleep\n", 36 | "\n", 37 | "pd.set_option('max_colwidth', 300)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Define API Requests Parameters" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "baseUrlLogs = 'https://api.datadoghq.com/api/v1/logs-queries/list'\n", 54 | "\n", 55 | "headers = {\n", 56 | " 'content-type': 'application/json',\n", 57 | " 'DD-API-KEY': 'XXXXX',\n", 58 | " 'DD-APPLICATION-KEY': 'XXXXX'\n", 59 | "}\n", 60 | "\n", 61 | "\n", 62 | "startDate = \"XXXXX\" #ISO-8601 string, unix timestamp or relative time (such as \"now-1h\" or \"now-1d\")\n", 63 | "endDate = \"XXXXX\" #ISO-8601 string, unix timestamp or relative time (such as \"now\")\n", 64 | "\n", 65 | " \n", 66 | "bodyLogXXXXX = {\n", 67 | " \"query\": \"XXXXX\", #Datadog log explorer query, e.g.:\"@errorType:(INTERNAL OR EXTERNAL)\"\n", 68 | " \"sort\": \"asc\",\n", 69 | " \"time\": {\n", 70 | " \"from\": startDate,\n", 71 | " \"to\": endDate\n", 72 | " },\n", 73 | " \"limit\": 1,\n", 74 | "}" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Define Required Helper Functions" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "#### Get newest log ID" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "def get_newestLogId_logXXXXX():\n", 98 | " print('Getting newest log ID')\n", 99 | " newestLogId = \"\"\n", 100 | " response = requests.post(baseUrlLogs, headers=headers, json=bodyLogXXXXX)\n", 101 | " newestLogId = json_normalize(response.json()['logs'])['id'] \n", 102 | " newestLogId = newestLogId.to_string(index=False)[1:]\n", 103 | " print('Newest log ID retrieved')\n", 104 | " return newestLogId" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Fetch Data" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "#### Get most current log ID as starting point for API request" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "nextLogId_logXXXXX = get_newestLogId_logXXXXX()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "#### Get log data and convert it to dataframe" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "print('Getting log XXXXX')\n", 144 | "logXXXXX = pd.DataFrame()\n", 145 | "while nextLogId_logXXXXX != None:\n", 146 | " response = requests.post(baseUrlLogs, headers=headers, json={\n", 147 | " \"limit\": 1000,\n", 148 | " \"query\": \"XXXXX\",\n", 149 | " \"startAt\": nextLogId_logXXXXX,\n", 150 | " \"sort\": \"asc\",\n", 151 | " \"time\": {\n", 152 | " \"from\": startDate,\n", 153 | " \"to\": endDate\n", 154 | " },\n", 155 | " }) \n", 156 | " data = json_normalize(response.json()['logs'])[['XXXXX'\n", 157 | " , 'XXXXX'\n", 158 | " , ...\n", 159 | " ]]\n", 160 | " \n", 161 | " #additional functionalities to isolate information from message string, set data types, etc. as required\n", 162 | " \n", 163 | " nextLogId_logXXXXX = response.json()['nextLogId']\n", 164 | " logXXXXX = logXXXXX.append(data, ignore_index=True, sort=False)\n", 165 | " sleep(0.1)\n", 166 | "print('Log XXXXX retrieved')" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## Push Data to the IBC" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "#### Connect to IBC team and identify data pool (here: manually)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "login = {\n", 190 | " 'celonis_url': 'XXXXX',\n", 191 | " 'api_token': 'XXXXX',\n", 192 | "}\n", 193 | "celonis_manual = get_celonis(**login)\n", 194 | "\n", 195 | "data_pool = celonis_manual.pools.find('XXXXX')\n", 196 | "data_pool" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "#### Push dataframes into IBC team/data pool" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "print('Starting to push data to IBC')\n", 213 | "data_pool.push_table(logXXXXX, 'DD_LOG_XXXXX', if_exists = 'upsert', primary_keys = ['id'])\n", 214 | "print('Data push successful')" 215 | ] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.7.6" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 4 239 | } 240 | -------------------------------------------------------------------------------- /pycelonis1/06_Extractors/11_API template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "77c1a358-1d72-43d6-91f4-5c1bed16e682", 6 | "metadata": {}, 7 | "source": [ 8 | "# Data Imports via API\n", 9 | "#### Historical and forecast weather data used as an example" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "c467d46e-4b6d-482d-a995-557bfec50900", 15 | "metadata": {}, 16 | "source": [ 17 | "## Step 1: Import Required Libraries" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "id": "33648981-2703-4262-b8fd-caf1c9d80048", 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "#Libraries specifically required for API imports\n", 28 | "import yaml\n", 29 | "from yaml import load, dump\n", 30 | "\n", 31 | "import requests\n", 32 | "from pandas.io.json import json_normalize\n", 33 | "\n", 34 | "#Other libraries that are always good to have\n", 35 | "import pandas as pd\n", 36 | "import numpy as np\n", 37 | "from datetime import date, timedelta, time, datetime\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "\n", 40 | "\n", 41 | "#Note: if it is your first time importing a library, run a PIP install like so. Ensure you keep the exclamation point\n", 42 | "\n", 43 | "# ! pip install library_name\n", 44 | "\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "155c6433-adda-4ddb-8afb-15d5ebdf5ffe", 50 | "metadata": {}, 51 | "source": [ 52 | "## Optional - Step 2: connect to Celonis\n", 53 | "#### complete only if you need to integrate the API data with Celonis data / analyses" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "a5ffde80-8843-4352-b5f4-d17de82fd106", 60 | "metadata": { 61 | "tags": [] 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "from pycelonis import get_celonis, pql\n", 66 | "\n", 67 | "celonis = get_celonis(\"team\", \n", 68 | " \"key\")\n", 69 | "\n", 70 | "#team is something like: https://berkshirehathawayenergy.us-2.celonis.cloud/\n", 71 | "#the key can be created by going to Edit Profile (under the circular button in top right) --> create API key" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "b64b326c-7070-4532-b85b-11e434c82a10", 77 | "metadata": {}, 78 | "source": [ 79 | "## Optional - Step 3: load data from Celonis data model\n", 80 | "#### Complete only if you need to integrate API data with Celonis data/analyses. All code is sample code and should be adjusted to fit your data." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "id": "4cd5616b-f39c-49c0-a2ea-56ebc0b205df", 87 | "metadata": { 88 | "tags": [] 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "\n", 93 | "#use the code below if you are querying directly from a data model\n", 94 | " # dm_id = 'data_model_id'\n", 95 | " # datamodel = celonis.datamodels.find(dm_id)\n", 96 | "\n", 97 | "#use the code below to query from studio\n", 98 | " # package = celonis.packages.find('package_id')\n", 99 | " # source_analysis = package.analyses.find('analysis_id')\n", 100 | "\n", 101 | " \n", 102 | "#use the code below to create your PQL query (examples of aggregate functions and filter statements are provided)\n", 103 | " # q1 = pql.PQL()\n", 104 | " # q1 += pql.PQLColumn(\"ROUND_DAY(table.field1)\", \"Date\")\n", 105 | " # q1 += pql.PQLColumn(\"SUM(table.field2)\", \"Actual\")\n", 106 | " # q1 += pql.PQLColumn(\"table.field3\", \"Region\")\n", 107 | " # q1 += pql.PQLFilter(\"table.field2 IS NOT NULL; \")\n", 108 | " # q1 += pql.PQLFilter(\"table.field1 > TO_DATE ( '2019-08-17 00:00:00' , FORMAT ( '%Y-%m-%d %H:%M:%S' )); \")\n", 109 | "\n", 110 | "#use the code below to create your dataframe\n", 111 | " # df1 = datamodel.get_data_frame(q1)\n", 112 | "\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "id": "165afba5-2e3e-41d6-803d-6b46d597cb28", 118 | "metadata": {}, 119 | "source": [ 120 | "## Step 4: Import Data via API\n", 121 | "#### To request your own API token for NOAA weather data, go here: https://www.ncdc.noaa.gov/cdo-web/token\n", 122 | "#### To find the dataset and station ID you need, go here: https://www.ncdc.noaa.gov/cdo-web/webservices/v2" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "3e8fbbf5-032c-400c-883e-2cbdfb4817be", 129 | "metadata": { 130 | "tags": [] 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "# ADJUSTMENT REQUIRED - Define the amount of time that you will pull data for (in days)\n", 135 | "timeframe = 30\n", 136 | "\n", 137 | "#usually the max date will be Today's date\n", 138 | "max_date = datetime.today().strftime(\"%Y-%m-%d\")\n", 139 | "\n", 140 | "#the min date will be the Today's date offset by the timeframe indicated above\n", 141 | "min_date = datetime.today() + timedelta(days=timeframe*(-1))\n", 142 | "min_date = min_date.strftime(\"%Y-%m-%d\")\n", 143 | "\n", 144 | "# ADJUSTMENT REQUIRED - define the data set ID (refer to link above to find dataset IDs)\n", 145 | "datasetid = 'GHCND'\n", 146 | "\n", 147 | "# ADJUSTMENT REQUIRED - define the station ID (refer to link above to find station IDs)\n", 148 | "stationid = 'GHCND:USW00023169'\n", 149 | "\n", 150 | "# ADJUSTMENT REQUIRED - define the token\n", 151 | "token = 'xyz'\n", 152 | "\n", 153 | "# run this code to obtain the request\n", 154 | "url = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=' + datasetid +'&stationid=' + stationid + '&startdate=' + min_date + '&enddate=' + max_date + '&units=standard&limit=1000'\n", 155 | "payload = {}\n", 156 | "headers = {\n", 157 | " 'token': token\n", 158 | "}\n", 159 | "\n", 160 | "\n", 161 | "#run the query to get raw data, put raw data into JSON format\n", 162 | "data = requests.request(\"GET\", url, headers=headers, json = payload)\n", 163 | "data = data.json()\n", 164 | "\n", 165 | "\n", 166 | "data" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "id": "86df13a1-21a5-4b01-81ac-78aa0bd83403", 172 | "metadata": {}, 173 | "source": [ 174 | "## Step 5: create a dataframe from your JSON data using json_normalize function\n", 175 | "#### 'results' should be replaced with the JSON object you need" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 8, 181 | "id": "f6ec8959-d4ba-4237-abf4-161e5c8466cd", 182 | "metadata": { 183 | "tags": [] 184 | }, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/html": [ 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | "
datedatatypestationattributesvalue
02021-08-25T00:00:00AWNDGHCND:USW00023169,,W,7.4
12021-08-25T00:00:00PRCPGHCND:USW00023169,,W,24000.0
22021-08-25T00:00:00SNOWGHCND:USW00023169,,W,0.0
32021-08-25T00:00:00SNWDGHCND:USW00023169,,W,0.0
42021-08-25T00:00:00TAVGGHCND:USW00023169H,,S,92.0
..................
2792021-09-20T00:00:00SNOWGHCND:USW00023169,,D,0.0
2802021-09-20T00:00:00TAVGGHCND:USW00023169H,,S,81.0
2812021-09-20T00:00:00TMAXGHCND:USW00023169,,D,240088.0
2822021-09-20T00:00:00TMINGHCND:USW00023169,,D,240073.0
2832021-09-21T00:00:00TAVGGHCND:USW00023169H,,S,80.0
\n", 305 | "

284 rows × 5 columns

\n", 306 | "
" 307 | ], 308 | "text/plain": [ 309 | " date datatype station attributes value\n", 310 | "0 2021-08-25T00:00:00 AWND GHCND:USW00023169 ,,W, 7.4\n", 311 | "1 2021-08-25T00:00:00 PRCP GHCND:USW00023169 ,,W,2400 0.0\n", 312 | "2 2021-08-25T00:00:00 SNOW GHCND:USW00023169 ,,W, 0.0\n", 313 | "3 2021-08-25T00:00:00 SNWD GHCND:USW00023169 ,,W, 0.0\n", 314 | "4 2021-08-25T00:00:00 TAVG GHCND:USW00023169 H,,S, 92.0\n", 315 | ".. ... ... ... ... ...\n", 316 | "279 2021-09-20T00:00:00 SNOW GHCND:USW00023169 ,,D, 0.0\n", 317 | "280 2021-09-20T00:00:00 TAVG GHCND:USW00023169 H,,S, 81.0\n", 318 | "281 2021-09-20T00:00:00 TMAX GHCND:USW00023169 ,,D,2400 88.0\n", 319 | "282 2021-09-20T00:00:00 TMIN GHCND:USW00023169 ,,D,2400 73.0\n", 320 | "283 2021-09-21T00:00:00 TAVG GHCND:USW00023169 H,,S, 80.0\n", 321 | "\n", 322 | "[284 rows x 5 columns]" 323 | ] 324 | }, 325 | "execution_count": 8, 326 | "metadata": {}, 327 | "output_type": "execute_result" 328 | } 329 | ], 330 | "source": [ 331 | "df=pd.json_normalize(data['results'])\n", 332 | "df" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "id": "f8851b47-4f28-47c4-a0be-d6018eae9f61", 338 | "metadata": {}, 339 | "source": [ 340 | "## Optional - Step 6: Helpful Formatting Functions\n", 341 | "#### This step provides examples functions that can be used to manipulate your API output. These sample functions refer to a generic dataframe titled \"df\". \"df\" should be replaced with your dataframe name.\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "id": "0747aebc-7e0f-41f3-be38-713ae624dbce", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "#limit how much output ALL functions in the workbook will show\n", 352 | "pd.set_option('display.max_rows', None)\n", 353 | "\n", 354 | "#sort dataframe values by Date\n", 355 | "df = df.sort_values(by=[\"column_name\"])\n", 356 | "\n", 357 | "#drop the last two rows of the dataframe\n", 358 | "df.drop(df.tail(2).index,inplace = True)\n", 359 | "\n", 360 | "#reset the index of the dataframe\n", 361 | "df = df.reset_index(drop=True)\n", 362 | "\n", 363 | "#rename a column\n", 364 | "df = df.rename(columns={\"current_column_name\": \"new_column_name\"})\n", 365 | "\n", 366 | "#change datatype of a column\n", 367 | "df['date_column_name'] = pd.to_datetime(df['date_column_name'])\n", 368 | "\n", 369 | "#filter dataset\n", 370 | "df = df[df['date_column_name'] > pd.Timestamp(datetime.now())]\n", 371 | "\n", 372 | "#drop columns\n", 373 | "df = df.drop(columns=['column1', 'column2', 'column3'])" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "id": "ea3e2a7c-0333-49b7-82e2-68d92048857d", 379 | "metadata": {}, 380 | "source": [ 381 | "## Optional - Step 7: push data to Celonis\n", 382 | "#### Only use if you need to push API data to your Celonis data pool (usually API data is just used as an input to a model in MLWB and does not need to be pushed to the Celonis data)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "id": "1cd5ab28-59a7-4c3f-9163-8a26a68091eb", 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "#define the data pool\n", 393 | "data_pool = celonis.pools.find(\"data_pool_id\")\n", 394 | "\n", 395 | "#option 1 - replace existing table\n", 396 | "data_pool.push_table(df,\"table_name_in_data_pool\", if_exists = 'replace')\n", 397 | "\n", 398 | "\n", 399 | "#option 2 - upsert data (similar to a delta load) using primary key of table\n", 400 | "data_pool.upsert_table(table_name=\"table_name_in_data_pool\",\n", 401 | " df_or_path=df,\n", 402 | " primary_keys=['primary_key'])\n", 403 | "\n" 404 | ] 405 | } 406 | ], 407 | "metadata": { 408 | "kernelspec": { 409 | "display_name": "Python 3", 410 | "language": "python", 411 | "name": "python3" 412 | }, 413 | "language_info": { 414 | "codemirror_mode": { 415 | "name": "ipython", 416 | "version": 3 417 | }, 418 | "file_extension": ".py", 419 | "mimetype": "text/x-python", 420 | "name": "python", 421 | "nbconvert_exporter": "python", 422 | "pygments_lexer": "ipython3", 423 | "version": "3.8.6" 424 | } 425 | }, 426 | "nbformat": 4, 427 | "nbformat_minor": 5 428 | } 429 | -------------------------------------------------------------------------------- /pycelonis1/06_Extractors/99_Extract-logs-from-EMS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import pandas as pd\n", 11 | "import pycelonis as py" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "api_key = ''\n", 21 | "team_url = 'https://XXXXXX.celonis.cloud/'\n", 22 | "\n", 23 | "instance = py.get_celonis(team_url, api_key)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "audit_log = instance.api_request('https://XXXXX.celonis.cloud/api/team/logs/audit/csv')\n", 33 | "audit_log" 34 | ] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 3", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.7.3" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 4 58 | } 59 | -------------------------------------------------------------------------------- /pycelonis2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/celonis/pycelonis-examples/6a51c8eaf84d59e7b69d457d5748ea7348659d6f/pycelonis2/.DS_Store -------------------------------------------------------------------------------- /pycelonis2/01_example_use_cases/01_use_case_version_control.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Version Control Use Case\n", 8 | "This example is solely intended as a demonstration to highlight relevant pycelonis functions and properties. In this example, you will learn how to create text-based backups of analyses, knowledge models, package variables, and transformations. More specifically, you will learn:\n", 9 | "\n", 10 | "- How to connect to the EMS\n", 11 | "- How to create folders\n", 12 | "- How to create backups of all published analyses, knowledge models, package variables and transformations\n", 13 | "- Optionally, How to commit the backup folder to GitHub\n", 14 | "\n", 15 | "
\n", 16 | " NOTE: Any Celonis objects with a serialized_content property can be backed up to a YAML or JSON file.\n", 17 | "
" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Prerequisites\n", 25 | "To follow this tutorial, you should have PyCelonis installed and should know how to perform basic interactions with PyCelonis objects. If you don't know how to do this, please complete the **Celonis Basics** tutorial first. Further, it would be helpful to already have the previously mentioned assets inside your EMS. Please refer to the **Studio - Introduction** and **Data Integration - Data Jobs** tutorials for an overview of working with each asset type." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### 1. Import PyCelonis, connect to Celonis API, and create the backup folder" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": { 39 | "collapsed": false, 40 | "jupyter": { 41 | "outputs_hidden": false 42 | }, 43 | "pycharm": { 44 | "is_executing": true 45 | } 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "[2023-01-30 20:53:08,556] INFO: Initial connect successful! PyCelonis Version: 2.0.1\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "from pycelonis import get_celonis\n", 58 | "from datetime import datetime as dt\n", 59 | "from pathlib import Path\n", 60 | "import json\n", 61 | "\n", 62 | "celonis = get_celonis()\n", 63 | "backup_path = Path('IBC Backup')\n", 64 | "now = dt.now().strftime(\"%d-%m-%Y_%H-%M\")\n", 65 | "\n", 66 | "if not backup_path.exists():\n", 67 | " backup_path.mkdir()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### 2. Create new backup folders for analyses, knowledge models, variables, and transformations" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 2, 80 | "metadata": { 81 | "collapsed": false, 82 | "jupyter": { 83 | "outputs_hidden": false 84 | } 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "IBC Backup/Analyses_30-01-2023_20-53\n", 92 | "IBC Backup/KnowledgeModels_30-01-2023_20-53\n", 93 | "IBC Backup/Transformations_30-01-2023_20-53\n", 94 | "IBC Backup/Variables_30-01-2023_20-53\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "analyses_path = backup_path / f\"Analyses_{now}\"\n", 100 | "kms_path = backup_path / f\"KnowledgeModels_{now}\"\n", 101 | "vars_path = backup_path / f\"Variables_{now}\"\n", 102 | "trans_path = backup_path / f\"Transformations_{now}\"\n", 103 | "\n", 104 | "print(analyses_path)\n", 105 | "print(kms_path)\n", 106 | "print(vars_path)\n", 107 | "print(trans_path)\n", 108 | "\n", 109 | "analyses_path.mkdir()\n", 110 | "kms_path.mkdir()\n", 111 | "vars_path.mkdir()\n", 112 | "trans_path.mkdir()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### 3. Create backups of all published analyses, sorted by workspace" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "outputs": [], 126 | "source": [ 127 | "# Helper Function\n", 128 | "\n", 129 | "def backup_assets(assets, path):\n", 130 | " for asset in assets:\n", 131 | "\n", 132 | " # skip unpublished assets\n", 133 | " if asset.activated_draft_id is None:\n", 134 | " continue\n", 135 | "\n", 136 | " file_name = f'{asset.key}.{asset.serialization_type.lower()}'\n", 137 | " file = path / file_name\n", 138 | "\n", 139 | " file.write_text(asset.serialized_content)" 140 | ], 141 | "metadata": { 142 | "collapsed": false 143 | } 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": { 149 | "collapsed": false, 150 | "jupyter": { 151 | "outputs_hidden": false 152 | } 153 | }, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "Analyses Backup Complete\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "for space in celonis.studio.get_spaces():\n", 165 | "\n", 166 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n", 167 | "\n", 168 | " # Create space folders for analyses backup\n", 169 | " space_path = analyses_path / space_folder\n", 170 | " space_path.mkdir()\n", 171 | "\n", 172 | " # Create backup files\n", 173 | " for pkg in space.get_packages():\n", 174 | " backup_assets(pkg.get_analyses(), space_path)\n", 175 | "\n", 176 | "print(\"Analyses Backup Complete\")" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### 4. Create backups of all published knowledge models, sorted by workspace" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 5, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "Knowledge Models Backup Complete\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "for space in celonis.studio.get_spaces():\n", 201 | "\n", 202 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n", 203 | "\n", 204 | " # Create space folders for knowledge models backup\n", 205 | " space_path = kms_path / space_folder\n", 206 | " space_path.mkdir()\n", 207 | "\n", 208 | " # Create backup files\n", 209 | " for pkg in space.get_packages():\n", 210 | " backup_assets(pkg.get_knowledge_models(), space_path)\n", 211 | "\n", 212 | "print(\"Knowledge Models Backup Complete\")" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### 5. Create backups of all package variables, sorted by package" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "outputs": [], 226 | "source": [ 227 | "# Helper Function\n", 228 | "\n", 229 | "def backup_variables(variables, package_path):\n", 230 | " for variable in variables:\n", 231 | "\n", 232 | " file_name = f\"{variable.key}.json\"\n", 233 | " file = package_path / file_name\n", 234 | "\n", 235 | " content = json.dumps({\n", 236 | " 'key': variable.key,\n", 237 | " 'type_': variable.type_,\n", 238 | " 'description': variable.description,\n", 239 | " 'value': variable.value,\n", 240 | " 'package_key': variable.package_key\n", 241 | " })\n", 242 | "\n", 243 | " file.write_text(content)" 244 | ], 245 | "metadata": { 246 | "collapsed": false 247 | } 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 6, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "Package Variables Backup Complete\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "for space in celonis.studio.get_spaces():\n", 264 | "\n", 265 | " space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n", 266 | "\n", 267 | " # Create space folders for variables backup\n", 268 | " space_path = vars_path / space_folder\n", 269 | " space_path.mkdir()\n", 270 | "\n", 271 | " # Create backup files\n", 272 | " for pkg in space.get_packages():\n", 273 | " pkg_folder = f\"{pkg.name}_{pkg.id}\".replace(\" \", \"_\")\n", 274 | " pkg_path = space_path / pkg_folder\n", 275 | " \n", 276 | " variables = pkg.get_variables()\n", 277 | " \n", 278 | " # Skip packages without variables\n", 279 | " if variables == []:\n", 280 | " continue\n", 281 | " \n", 282 | " pkg_path.mkdir()\n", 283 | " backup_variables(variables, pkg_path)\n", 284 | "\n", 285 | "print(\"Package Variables Backup Complete\")" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### 6. Create backups of all transformations, sorted by data job*" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "outputs": [], 299 | "source": [ 300 | "# Helper Function\n", 301 | "\n", 302 | "def backup_transformations(transformations, job_path):\n", 303 | " for transformation in transformations:\n", 304 | "\n", 305 | " # Handle errors retrieving the statement\n", 306 | " try:\n", 307 | " statement = transformation.get_statement()\n", 308 | " # Skip transformations with no statement\n", 309 | " if statement is None:\n", 310 | " continue\n", 311 | " except Exception as e:\n", 312 | " print(f\"FAILED to backup: {transformation.name} with id: {transformation.id} \\n You either don't have permissions to access the asset or the transformation is proprietary to Celonis.\")\n", 313 | " continue\n", 314 | "\n", 315 | " file_name = f\"{transformation.name}.json\"\n", 316 | " file = job_path / file_name\n", 317 | "\n", 318 | " content = json.dumps({\n", 319 | " 'id': transformation.id,\n", 320 | " 'name': transformation.name,\n", 321 | " 'description': transformation.description,\n", 322 | " 'statement': statement,\n", 323 | " 'pool_id': transformation.pool_id,\n", 324 | " 'job_id': transformation.job_id\n", 325 | " })\n", 326 | "\n", 327 | " file.write_text(content)" 328 | ], 329 | "metadata": { 330 | "collapsed": false 331 | } 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 7, 336 | "metadata": { 337 | "collapsed": false, 338 | "jupyter": { 339 | "outputs_hidden": false 340 | } 341 | }, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "Transformations Backup Complete\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "for pool in celonis.data_integration.get_data_pools():\n", 353 | "\n", 354 | " # Create space folders for analyses backup\n", 355 | " pool_folder = f\"{pool.name}_{pool.id}\".replace(\" \", \"_\")\n", 356 | " pool_path = trans_path / pool_folder\n", 357 | " pool_path.mkdir()\n", 358 | "\n", 359 | " for job in pool.get_jobs():\n", 360 | " job_folder = f\"{job.name}_{job.id}\"\n", 361 | " job_path = pool_path / job_folder\n", 362 | " transformations = job.get_transformations()\n", 363 | "\n", 364 | " # Skip jobs without transformations\n", 365 | " if transformations == []:\n", 366 | " continue\n", 367 | "\n", 368 | " job_path.mkdir()\n", 369 | " backup_transformations(transformations, job_path)\n", 370 | "\n", 371 | "print(\"Transformations Backup Complete\")" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "**Transformations downloaded from the marketplace are proprietary to Celonis and are unable to backup*" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "### 7. (Optional) Commit the backup folder to GitHub" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "Navigate to the backup_path on the command line using the *cd* command, then run:\n", 393 | "\n", 394 | " git init\n", 395 | " git add .\n", 396 | " git commit -m \"Activating version control\"" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## Conclusion\n", 404 | "Congratulations! You have learned how to connect to the EMS, how to create folders, how to create backups of various assets, and how to commit the backup folder to GitHub." 405 | ] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 3 (ipykernel)", 411 | "language": "python", 412 | "name": "python3" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 3 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython3", 424 | "version": "3.8.13" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 4 429 | } 430 | -------------------------------------------------------------------------------- /pycelonis2/02_pycelonis_version_migrator/Pycelonis_Migration_UI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "15ffaabc", 6 | "metadata": {}, 7 | "source": [ 8 | "# User Interface for the Pycelonis migration script" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "f505a141", 14 | "metadata": {}, 15 | "source": [ 16 | "- **PROJECT DESCRIPTION:** This project migrates a given code (either .py or .ipynb) to pycelonis 2.0. It consists of two scripts, this one you are reading (**Pycelonis_Migration_UI.ipynb**) which is the **ONLY ONE THE USER SHOULD OPEN**; and the pycelonis_migration.py, which can be regarded as the backend of this project. The former should only be modified if you are planning on collaborating in enhancing this project.
\n", 17 | "
\n", 18 | " This whole project creates a modified copy of the original code (Inside the project's folder) and it afterwards produces a diff HTML file to easily visualize the changes. The modified code has the same name as the original one inputed in code_path, but with \"_migrated_automatically\" between the original name and the file extension. If you open the new outputted file, you can also see every line that was changed because it will have a comment with either # CHANGED or # CHECK MANUALLY.
\n", 19 | "
\n", 20 | "- **UI Overview**: The overall structure of this code (**Pycelonis_Migration_UI.ipynb**) is:
\n", 21 | " - Inputs: You just need to input a working path as a string in the variable code_path
\n", 22 | " - Outputs: You are going to get another code file with the migrated version, as well as an HTML file to visualize the output easily.
\n", 23 | "\n", 24 | "The whole aim is for you to test the modified code (_migrated_automatically) and look for the changes to see if they are properly working. This project doesn't cover all the use cases, so **you should manually check the output afterwards**. Altough it is not perfect, it can definetely help you save a lot of time." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "523df5c4", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import pycelonis_migration\n", 35 | "from IPython.display import IFrame" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "90823215", 41 | "metadata": {}, 42 | "source": [ 43 | "Fullfill the next cell with the path to the code you want to migrate. It can either be a notebook or a plain .py file" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "id": "f9be9117", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "code_path = \"/Users/tests/Downloads/test_code.ipynb\"" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "cfd4531a", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "pycelonis_migration.main(path=code_path)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "id": "88476ecb", 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "IFrame(\"diff.html\", width=1000, height=600)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "00d11c77", 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Projects", 88 | "language": "python", 89 | "name": "projects" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.9.12" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 5 106 | } 107 | -------------------------------------------------------------------------------- /pycelonis2/02_pycelonis_version_migrator/README.md: -------------------------------------------------------------------------------- 1 | # Pycelonis Version Migrator 2 | This code serves the purpose of automatically migrating a given code from pycelonis 1 to pycelonis 2 3 | The code is not perfect (It doesn't cover every single use case), but serves as a tool to save time for 4 | people having to migrate codes to pycelonis 2 5 | 6 | # Project Description 7 | ## Input 8 | The UI Script only needs the path to the location of the .py / .ipynb archive you want to migrate. 9 | 10 | ## Output 11 | Once the code is run, it generates another .py or .ipynb file with the same name as the one you inputted but with 12 | "_migrated_automatically" after. In every line of code that is changed there will appear a comment # CHANGED or # MANUALLY CHECK. 13 | For more clarity on what has actually changed, the code generates a diff.html file that can be easily opened with any 14 | browser and highlights all the changes made in either, red, green or yellow. Nevertheless, the UI script enables a 15 | visualization of this HTML within the very UI notebook. 16 | 17 | ## Check the output once it has run 18 | After it has run, you should check manually if there is any mistake and solve it reading the pycelonis documentation 19 | or also reading [this article](https://celonis.github.io/pycelonis/2.0.0/tutorials/executed/04_migration/01_migration_guide/) 20 | on how to migrate most of the biggest changes. As stated before, the code is not perfect, 21 | but will definitely save you time. 22 | 23 | The backend script relies on a class called PycelonisMigrator, which uses different regex patterns and mainly the 24 | regex library to modify the text. 25 | 26 | # Scripts 27 | This project consists of two different scripts: 28 | 29 | - **Pycelonis_Migration_UI.ipynb**: This is the code the final user should use. It contains a brief description of the - 30 | overall project, and the call to the backend to perform the migration.
31 | - **pycelonis_migration.py**: This is the code that performs all of the operations. It is based on a class called 32 | PycelonisMigrator and several functions that help defining the output of the regex substitute patterns.
33 | 34 | 35 | # Additional Information 36 | ## Pycelonis version change 37 | This project only migrates the script you provided, but bear in mind that you also need to update the python packages 38 | so you can test the updated script outcome. It is highly encouraged for you to check which version of pycelonis are you 39 | currently using. For this you can either: 40 | - Type the following command in a newly opened terminal and search for the pycelonis version number:
41 | >pip list 42 | - Inside any notebook run the following piece of code:
43 | > import pycelonis
44 | > pycelonis.\__version\__ 45 | 46 | This way you can revert to the older version in case you have problems. Once you have done this, you can safely update 47 | pycelonis to the latest version. For updating pycelonis python package follow [these guidelines.](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/) 48 | You will need to run this command in terminal:
49 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis 50 | 51 | Note that you can select the version you want to install by adding it at the end of the command:
52 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis=="2.0.1" 53 | 54 | ## Friendly reminder 55 | - This project **does never change the original script** provided to migrate. You can safely use it since it only reads 56 | the original, but doesn't write back on it.
57 | 58 | -------------------------------------------------------------------------------- /pycelonis2/02_pycelonis_version_migrator/function_get_data_frame.txt: -------------------------------------------------------------------------------- 1 | import pycelonis 2 | import pandas as pd 3 | from pycelonis.pql import PQL 4 | 5 | def extract_table_from_data_pool( 6 | celonis_object: pycelonis.celonis.Celonis, 7 | data_pool: pycelonis.ems.data_integration.data_pool.DataPool, 8 | data_model: pycelonis.ems.data_integration.data_model.DataModel, 9 | table: pycelonis.ems.data_integration.data_pool_table.DataPoolTable, 10 | ) -> pd.DataFrame: 11 | 12 | """This function creates the PQL query to extract a whole 13 | table from a data model. If the table is too big, the kernel might 14 | shut down, make sure the RAM memory is enough. 15 | 16 | Input: 17 | celonis_object: Celonis object already instantiated 18 | data_pool: Data Pool object already instantiated 19 | data_model: Data Model object already instantiated 20 | table: Table object already instantiated 21 | 22 | Returns: 23 | df: DataFrame containing the target table. 24 | """ 25 | 26 | # Instantiate query object 27 | query = PQL(distinct=False, limit=None, offset=None) 28 | 29 | # Populate query with all the columns from the table 30 | for PQL_column in table.get_columns(): 31 | query += PQLColumn(name=PQL_column.name, query=f""" "{table.name}"."{PQL_column.name}" """) 32 | 33 | try: 34 | # If you have USE permissions on the data model this method can be used 35 | df = DataModel.export_data_frame_from( 36 | celonis_object.client, 37 | pool_id=data_model.pool_id, 38 | data_model_id=data_model.id, 39 | query=query) 40 | print("You have use permissions") 41 | except: 42 | # Should work if you don't have USE permissions 43 | df = data_model.export_data_frame(query) 44 | print("You might not have USE permissions") 45 | 46 | return df 47 | 48 | --------------------------------------------------------------------------------