├── .gitignore
├── README.md
├── pycelonis1
    ├── 00_manage_celonis
    │   ├── 00_ibc_to_ibc_movers
    │   │   ├── 00_analysis_mover.ipynb
    │   │   ├── 01_data_pool_mover.ipynb
    │   │   ├── 02_asset_mover.ipynb
    │   │   ├── 03_workflow_mover.ipynb
    │   │   ├── 04_analysis_to_studio_mover.ipynb
    │   │   ├── 05_workflow_to_studio_skill_mover.ipynb
    │   │   ├── 06_package_mover.ipynb
    │   │   ├── 07_action_engine_skill_mover.ipynb
    │   │   ├── 08_replacer.ipynb
    │   │   └── KPI_Mover.ipynb
    │   └── 01_misc
    │   │   ├── 00_trigger_workbench_execution.ipynb
    │   │   └── 01_use_case_version_control.ipynb
    ├── 01_use_pycelonis
    │   ├── 00_basics
    │   │   ├── 00_connecting_to_celonis.ipynb
    │   │   ├── 01_pulling_data_from_analysis.ipynb
    │   │   ├── 02_pulling_data_from_datamodel.ipynb
    │   │   └── 03_pushing_data.ipynb
    │   └── 01_misc
    │   │   └── 00_working_in_ r_with_celonis.ipynb
    ├── 02_try_ml_use_cases
    │   ├── 00_time_series_forecasting
    │   │   ├── 00_O2C_material_group_net_weight_forecasting.ipynb
    │   │   └── 01_ts_forecasting
    │   │   │   ├── main.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── ext_data_utils.py
    │   │   │       ├── model_utils.py
    │   │   │       ├── plot_utils.py
    │   │   │       └── utils.py
    │   ├── 01_Clustering_KMeans.ipynb
    │   ├── 02_NLP_Topic_modeling_LDA.ipynb
    │   └── Simple_Model_with_Snippets.ipynb
    ├── 03_Connectivity
    │   ├── 02a_Extraction_Mover.ipynb
    │   ├── 02b_Transformation_Mover.ipynb
    │   ├── 03_Data_Model_Mover.ipynb
    │   ├── 05_Transformation_Download_to_MLW.ipynb
    │   ├── 11_Extraction_Unifier.ipynb
    │   └── 18_EMS_Data_Consumption_Report.ipynb
    ├── 04_Data_Formatting
    │   ├── 00_Combine_csv_files.ipynb
    │   └── 03_Clean_csv_data.ipynb
    ├── 05_Data_Visualization
    │   └── 00_3d_plot.ipynb
    └── 06_Extractors
    │   ├── 03_Datadog_log_data_extraction.ipynb
    │   ├── 11_API template.ipynb
    │   └── 99_Extract-logs-from-EMS.ipynb
└── pycelonis2
    ├── .DS_Store
    ├── 01_example_use_cases
        └── 01_use_case_version_control.ipynb
    └── 02_pycelonis_version_migrator
        ├── Pycelonis_Migration_UI.ipynb
        ├── README.md
        ├── function_get_data_frame.txt
        └── pycelonis_migration.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # ---> Python
 2 | # Byte-compiled / optimized / DLL files
 3 | __pycache__/
 4 | *.py[cod]
 5 | *$py.class
 6 | 
 7 | all_scripts/
 8 | 
 9 | .idea/
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyCelonis Examples
 2 | 
 3 | This repository contains demo notebooks covering popular functionalities and use cases of PyCelonis.
 4 | It acts as an addition to the official [tutorials](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/) for PyCelonis. 
 5 | 
 6 | The repository contains several notebooks for both PyCelonis 1.X and 2.X that act as examples on what you can achieve using PyCelonis.
 7 | The examples are grouped by their PyCelonis version and specific use cases.
 8 | 
 9 | ## PyCelonis
10 | 
11 | [![License: Celonis Tools License Agreement](https://img.shields.io/badge/License-Celonis%20Tools%20License%20Agreement-brightgreen)](https://celonis.github.io/pycelonis/license.html)
12 | [![Pycelonis Documentation](https://img.shields.io/badge/Docs-pycelonis-yellow)](https://celonis.github.io/pycelonis/index.html)
13 | 
14 | PyCelonis is a python api wrapper for Celonis EMS.
15 | 
16 | Using this package you can programmatically interact with Analyses, Workspaces, Datamodels, Datapools and other Celonis objects.
17 | The package also allows pushing and pulling data to and from data pools and data models.
18 | 
19 | PyCelonis is pre-installed in all Celonis Machine Learning Workbenches by default.
20 | For more information about PyCelonis and how to set it up in your local python environment, [see the docs](https://celonis.github.io/pycelonis/).
21 | 


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/00_analysis_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Analysis Mover using Content CLI\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "### This tutorial shows how to copy an analysis from one team/workspace to another one"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 20 |     "source_api_key = 'your_api_token'\n",
 21 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 22 |     "destination_api_key = 'your_api_token'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "### Set environment variables for the source team setup"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import os\n",
 39 |     "os.environ['TEAM_URL'] = source_team_url\n",
 40 |     "os.environ['API_TOKEN'] = source_api_key\n"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Pull analysis from the source team"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: analysis_6b2166e2-0c40-43e2-b3e6-62996c7dae11.json\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "!content-cli pull analysis --id 'insert analysis id here'"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Set environment variables again for the destination team setup\n",
 72 |     "(No need to do this step (re-define environment variables) if source and destination teams are the same)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 82 |     "os.environ['API_TOKEN'] = destination_api_key"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### Push analysis to the destination team\n",
 90 |     "\n",
 91 |     "Hint: Press tab while writing the file name to auto complete"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "name": "stdout",
101 |      "output_type": "stream",
102 |      "text": [
103 |       "\u001b[32minfo\u001b[39m:    Analysis was pushed successfully. New ID: 07f700ff-20e3-4dd8-878b-c7fb6319b3b2\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "!content-cli push analysis --workspaceId 'insert workspace id here' --file 'insert_downloaded_file_in_the_previous_step_here'"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 3",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.8.3-final"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 4
133 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/01_data_pool_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Data Pool Mover using Content CLI\n",
  8 |     "\n",
  9 |     "### This tutorial shows how to copy a datapool from one team to another\n",
 10 |     "Note: Datamodels and data jobs contained in the datapool are moved, but not the actual data"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 20 |     "source_api_key = 'your_api_token'\n",
 21 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 22 |     "destination_api_key = 'your_api_token'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "### Set environment variable for the source team"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import os\n",
 39 |     "os.environ['TEAM_URL'] = source_team_url\n",
 40 |     "os.environ['API_TOKEN'] = source_api_key"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Pull data pool from the source team"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: data-pool_7796633e-c2db-4524-92ec-85ae5fe65282.json\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "!content-cli pull data-pool --id 'id_of_your_data_pool'"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Set environment variable for the destination team\n",
 72 |     "Skip this step of setting up destination team if source and destination team are the same"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "import os\n",
 82 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 83 |     "os.environ['API_TOKEN'] = destination_api_key"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Push data pool config into destination team\n",
 91 |     "Hint: press tab to auto complete the file name"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "name": "stdout",
101 |      "output_type": "stream",
102 |      "text": [
103 |       "\u001b[32minfo\u001b[39m:    Data Pool was pushed successfully. New ID: undefined\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "!content-cli push data-pool --file 'the_file_that_got_downloaded_in_the_previous_step'"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 3",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.7.6"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 4
133 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/02_asset_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Asset Mover using Content CLI\n",
  8 |     "\n",
  9 |     "### This script moves an Asset (skill, view, analysis, knowledge model etc. in the studio) from one team to another "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "source_api_key = 'your_api_token'\n",
 20 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 21 |     "destination_api_key = 'your_api_token'"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Set environment variable for the source team"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "os.environ['TEAM_URL'] = source_team_url\n",
 39 |     "os.environ['API_TOKEN'] = source_api_key"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Pull asset from the source team"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: asset_mykm.yml\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "!content-cli pull asset --key 'insert_asset_key_here'"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Set environment variable for the destination team\n",
 71 |     "Skip this step of setting environment variables again if source and destination are the same teams"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 81 |     "os.environ['API_TOKEN'] = destination_api_key"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Push asset to the destination team\n",
 89 |     "Hint: Press tab to autocomplete the file name"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "\u001b[32minfo\u001b[39m:    Asset was pushed successfully. New key: noor.mykm\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": []
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "Python 3",
120 |    "language": "python",
121 |    "name": "python3"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 3
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython3",
133 |    "version": "3.7.6"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 4
138 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/03_workflow_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Workflow Mover using Content CLI\n",
  8 |     "\n",
  9 |     "### This tutorial shows how to copy a workflow (process automation) from one team to another one"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "source_api_key = 'your_api_token'\n",
 20 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 21 |     "destination_api_key = 'your_api_token'"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Set environment variable for the source team"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "os.environ['TEAM_URL'] = source_team_url\n",
 39 |     "os.environ['API_TOKEN'] = source_api_key"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Pull workflow from the source team"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: workflow_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "!content-cli pull workflow --id 'insert_workflow_id_here'"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Set environment variables again for the destination team setup\n",
 71 |     "No need to do this step (re-define environment variables) if source and destination teams are the same"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 81 |     "os.environ['API_TOKEN'] = destination_api_key"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Push workflow to the destination team\n",
 89 |     "\n",
 90 |     "Hint: Press tab while writing the file name to auto complete"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "\u001b[32minfo\u001b[39m:    Workflow was pushed successfully. New Id: b5391c57-87ae-47f9-a876-2c18e304a994\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "!content-cli push workflow --file  'insert_downloaded_file_in_the_previous_step_here'"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": []
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python 3",
121 |    "language": "python",
122 |    "name": "python3"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.7.6"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 4
139 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/04_analysis_to_studio_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Move Analysis from Process Analytics to Studio"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 17 |     "source_api_key = 'your_api_token'\n",
 18 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "destination_api_key = 'your_api_token'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "### Set environment variables for the source team setup"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "os.environ['TEAM_URL'] = source_team_url\n",
 37 |     "os.environ['API_TOKEN'] = source_api_key"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "### Pull analysis from process analytics in source team as an asset"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: asset_6b2166e2-0c40-43e2-b3e6-62996c7dae11.yaml\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "!content-cli pull analysis --id 'insert_id_of_analysis' --asset"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Set environment variables again for the destination team setup\n",
 69 |     "No need to do this step (re-define environment variables) if source and destination teams are the same"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 79 |     "os.environ['API_TOKEN'] = destination_api_key"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "### Push downloaded analysis file to the studio as asset in the source team\n",
 87 |     "Hint: Press tab while writing the file name to auto complete"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "\u001b[32minfo\u001b[39m:    Asset was pushed successfully. New key: test.RCA\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": []
113 |   }
114 |  ],
115 |  "metadata": {
116 |   "kernelspec": {
117 |    "display_name": "Python 3",
118 |    "language": "python",
119 |    "name": "python3"
120 |   },
121 |   "language_info": {
122 |    "codemirror_mode": {
123 |     "name": "ipython",
124 |     "version": 3
125 |    },
126 |    "file_extension": ".py",
127 |    "mimetype": "text/x-python",
128 |    "name": "python",
129 |    "nbconvert_exporter": "python",
130 |    "pygments_lexer": "ipython3",
131 |    "version": "3.7.6"
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 4
136 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/05_workflow_to_studio_skill_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Move Workflows from Process Automation to Studio Skills"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 17 |     "source_api_key = 'your_api_token'\n",
 18 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "destination_api_key = 'your_api_token'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "### Set environment variables for the source team setup"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "os.environ['TEAM_URL'] = source_team_url\n",
 37 |     "os.environ['API_TOKEN'] = source_api_key"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "### Pull workflow from process automation in source team as an asset"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: asset_2b3ef876-aa47-42b6-823f-5e1bb4680e9d.yaml\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "!content-cli pull workflow --id 'insert_workflow_id_here' --asset"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Set environment variables again for the destination team setup\n",
 69 |     "No need to do this step (re-define environment variables) if source and destination teams are the same"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 79 |     "os.environ['API_TOKEN'] = destination_api_key"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "### Push downloaded workflow file to the studio as skill asset in the source team\n",
 87 |     "Hint: Press tab while writing the file name to auto complete"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "\u001b[32minfo\u001b[39m:    Asset was pushed successfully. New key: test.On-time-Delivery-Prediction\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "!content-cli push asset --file 'insert_downloaded_asset_file_name' --package 'package_key_to_push_the_asset_to'"
105 |    ]
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "Python 3",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.7.6"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 4
129 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/06_package_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Package Mover using Content CLI\n",
  8 |     "\n",
  9 |     "### This script moves a package from one team/place to another "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "source_api_key = 'your_api_token'\n",
 20 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 21 |     "destination_api_key = 'your_api_token'"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "#### Set environment variable for the source team"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "os.environ['TEAM_URL'] = source_team_url\n",
 39 |     "os.environ['API_TOKEN'] = source_api_key"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "#### Pull Package from the source team"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {
 53 |     "jupyter": {
 54 |      "source_hidden": true
 55 |     }
 56 |    },
 57 |    "outputs": [
 58 |     {
 59 |      "name": "stdout",
 60 |      "output_type": "stream",
 61 |      "text": [
 62 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully\n"
 63 |      ]
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "!content-cli pull package --key 'package_key'"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "#### Set environment variable for the destination team\n",
 75 |     "(Skip this step of setting environment variables again if source and destination are the same teams)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 4,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 85 |     "os.environ['API_TOKEN'] = destination_api_key"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "#### Push package to the destination team\n",
 93 |     "(Hint: Press tab to autocomplete the file name)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "\u001b[32minfo\u001b[39m:    Package was pushed successfully.\n"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "!content-cli push package --file 'downloaded_zip_file_in_previous_step'"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.7.6"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 4
135 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/07_action_engine_skill_mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Action Engine Skill Mover using Content CLI\n",
  8 |     "\n",
  9 |     "### This tutorial shows how to copy an action engine skill from one team to another one"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "source_team_url = 'https://your_team.celonis.cloud/'\n",
 19 |     "source_api_key = 'your_api_token'\n",
 20 |     "destination_team_url = 'https://your_team.celonis.cloud/'\n",
 21 |     "destination_api_key = 'your_api_token'"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Set environment variable for the source team"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "os.environ['TEAM_URL'] = source_team_url\n",
 39 |     "os.environ['API_TOKEN'] = source_api_key"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Pull action engine skill from the source team"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "\u001b[32minfo\u001b[39m:    File downloaded successfully. New filename: skill_08594b68-2731-4ede-abaf-4fd7eb5720ca.json\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "!content-cli pull skill --skillId 'insert_skill_id_here'  --projectId 'insert_project_id_here'"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Set environment variables again for the destination team setup\n",
 71 |     "No need to do this step (re-define environment variables) if source and destination teams are the same"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "os.environ['TEAM_URL'] = destination_team_url\n",
 81 |     "os.environ['API_TOKEN'] = destination_api_key"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Push action engine skill to the destination team\n",
 89 |     "\n",
 90 |     "Hint: Press tab while writing the file name to auto complete"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "\u001b[32minfo\u001b[39m:    Skill was pushed successfully. New ID: 4639e4dd-b0ea-484f-822b-5415f2244c5d\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "!content-cli push skill --projectId 'insert_new_project_id_here' --file 'insert_downloaded_file_name_here'"
108 |    ]
109 |   }
110 |  ],
111 |  "metadata": {
112 |   "kernelspec": {
113 |    "display_name": "Python 3",
114 |    "language": "python",
115 |    "name": "python3"
116 |   },
117 |   "language_info": {
118 |    "codemirror_mode": {
119 |     "name": "ipython",
120 |     "version": 3
121 |    },
122 |    "file_extension": ".py",
123 |    "mimetype": "text/x-python",
124 |    "name": "python",
125 |    "nbconvert_exporter": "python",
126 |    "pygments_lexer": "ipython3",
127 |    "version": "3.7.6"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 4
132 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/08_replacer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Replacer\n",
  8 |     "\n",
  9 |     "### This tutorial shows how to replace any text in a whole analysis. \n",
 10 |     "Be careful, only use real key words to replace, otherwise you might replace also words or word parts you did not mean to.\n",
 11 |     "\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### First connect to the analysis and indicate what should be replaced\n",
 19 |     "It is recommended to use the ID of the respective analysis."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 36,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "2020-02-17 08:20:12 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "import json\n",
 37 |     "from pycelonis import get_celonis\n",
 38 |     "\n",
 39 |     "celonis = get_celonis(\"URL to the team in which you are working.\", \"Specify a valid API key for the cloud team.\")\n",
 40 |     "analysis = celonis.analyses.find(\"Name or ID of the analysis.\")\n",
 41 |     "\n",
 42 |     "# enter as many replacements as you want and separate them with a comma\n",
 43 |     "replacements ={\n",
 44 |     "    'old_word_1' : 'new_word_1',\n",
 45 |     "    'old_word_2' : 'new_word_2'\n",
 46 |     "}"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Get the current version of the published analysis, the drafted one from the edit mode and the saved formulas"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 40,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "num_replacements = 0\n",
 63 |     "doc_p = analysis.published.data\n",
 64 |     "doc_d = analysis.draft.data\n",
 65 |     "kpis = analysis.saved_formulas"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "### Replace in the formulas"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 38,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "Replacements in the formulas: 78\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "for kpi in kpis:\n",
 90 |     "    for key, val in replacements.items():\n",
 91 |     "        num_replacements = num_replacements + kpi.data[\"name\"].count(key)\n",
 92 |     "        num_replacements = num_replacements + kpi.data[\"template\"].count(key)\n",
 93 |     "        num_replacements = num_replacements + kpi.data[\"description\"].count(key)\n",
 94 |     "        \n",
 95 |     "        name = kpi.data[\"name\"].replace(key, val)\n",
 96 |     "        template = kpi.data[\"template\"].replace(key, val)\n",
 97 |     "        description = kpi.data[\"description\"].replace(key, val)\n",
 98 |     "        parameters = kpi.data[\"parameters\"]\n",
 99 |     "        \n",
100 |     "        kpi.delete()\n",
101 |     "        analysis.create_saved_formula(name=name, description=description, template=template, parameters=parameters)       \n",
102 |     "        \n",
103 |     "print('Replacements in the formulas: ' + str(num_replacements))"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "### Replace in the published and drafted analysis"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 39,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "Overall replacements: 143\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "json_doc_dump_p = json.dumps(doc_p, ensure_ascii=False)\n",
128 |     "json_doc_dump_p = json_doc_dump_p.replace(\"â¬\", \"€\")\n",
129 |     "\n",
130 |     "json_doc_dump_d = json.dumps(doc_d, ensure_ascii=False)\n",
131 |     "json_doc_dump_d = json_doc_dump_d.replace(\"â¬\", \"€\")\n",
132 |     "\n",
133 |     "for key, val in replacements.items():\n",
134 |     "    num_replacements += json_doc_dump_p.count(key)\n",
135 |     "    json_doc_dump_p = json_doc_dump_p.replace(key, val)\n",
136 |     "    json_doc_dump_d = json_doc_dump_d.replace(key, val)\n",
137 |     "    \n",
138 |     "json_doc_dump_p = json_doc_dump_p.replace(\"â¬\", \"€\")\n",
139 |     "json_doc_dump_d = json_doc_dump_d.replace(\"â¬\", \"€\")\n",
140 |     "\n",
141 |     "doc_p = json.loads(json_doc_dump_p)\n",
142 |     "doc_d = json.loads(json_doc_dump_d)\n",
143 |     "\n",
144 |     "analysis.draft.data = doc_d\n",
145 |     "analysis.published.data = doc_p\n",
146 |     "\n",
147 |     "print('Overall replacements: ' + str(num_replacements))"
148 |    ]
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "jupytext": {
153 |    "formats": "ipynb,py:percent"
154 |   },
155 |   "kernelspec": {
156 |    "display_name": "Python 3",
157 |    "language": "python",
158 |    "name": "python3"
159 |   },
160 |   "language_info": {
161 |    "codemirror_mode": {
162 |     "name": "ipython",
163 |     "version": 3
164 |    },
165 |    "file_extension": ".py",
166 |    "mimetype": "text/x-python",
167 |    "name": "python",
168 |    "nbconvert_exporter": "python",
169 |    "pygments_lexer": "ipython3",
170 |    "version": "3.7.6"
171 |   }
172 |  },
173 |  "nbformat": 4,
174 |  "nbformat_minor": 4
175 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/00_ibc_to_ibc_movers/KPI_Mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Import packages and log in**"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import csv\n",
 17 |     "import os\n",
 18 |     "import numpy as np\n",
 19 |     "import pandas as pd\n",
 20 |     "import copy\n",
 21 |     "import sys\n",
 22 |     "import yaml\n",
 23 |     "import re\n",
 24 |     "from collections import OrderedDict\n",
 25 |     "from pycelonis import get_celonis, pql\n",
 26 |     "from pycelonis.pql import PQL, PQLColumn\n",
 27 |     "from pycelonis.utils import parquet_utils as pu\n",
 28 |     "\n",
 29 |     "login = {\n",
 30 |     "        \"celonis_url\": \"\",\n",
 31 |     "        \"api_token\": \"\",\n",
 32 |     "    }\n",
 33 |     "celonis = get_celonis(**login)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "package = celonis.packages.find('31af4c4d-4ddd-40ae-97a6-9d1146345e6f')\n",
 43 |     "source_analysis = package.analyses.find('0c191ff3-5ef8-47c9-92dd-f5170e342f2a')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "class quoted(str):\n",
 53 |     "    pass\n",
 54 |     "\n",
 55 |     "def quoted_presenter(dumper, data):\n",
 56 |     "    return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='\"')\n",
 57 |     "    \n",
 58 |     "yaml.add_representer(quoted, quoted_presenter)\n",
 59 |     "\n",
 60 |     "class literal(str):\n",
 61 |     "    pass\n",
 62 |     "\n",
 63 |     "def literal_presenter(dumper, data):\n",
 64 |     "    return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')\n",
 65 |     "    \n",
 66 |     "yaml.add_representer(literal, literal_presenter)\n",
 67 |     "\n",
 68 |     "def ordered_dict_presenter(dumper, data):\n",
 69 |     "    return dumper.represent_dict(data.items())\n",
 70 |     "\n",
 71 |     "yaml.add_representer(OrderedDict, ordered_dict_presenter)\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "def add_parameters(data, km_kpi):\n",
 75 |     "    if len(data['parameters']) != 0:\n",
 76 |     "        km_parameters = []\n",
 77 |     "        for parameter, i in zip(data['parameters'], range(len(data['parameters']))):\n",
 78 |     "            km_parameter = {\n",
 79 |     "                'id': 'p'+str(i+1),\n",
 80 |     "                'displayName': parameter['name'],\n",
 81 |     "                'defaultValue': 0\n",
 82 |     "            }\n",
 83 |     "            km_parameters.append(km_parameter)\n",
 84 |     "        km_kpi['parameters'] = km_parameters\n",
 85 |     "    return km_kpi\n",
 86 |     "\n",
 87 |     "def clean_formula_names(saved_formulas):\n",
 88 |     "    to_return = []\n",
 89 |     "    for data in saved_formulas:\n",
 90 |     "        data['clean_name'] = data['name'].replace(')', '').replace('(', '')\n",
 91 |     "        data['clean_name'] = re.sub(\"[^0-9a-zA-Z]+\", \"_\", data['clean_name'])\n",
 92 |     "        to_return.append(data)\n",
 93 |     "    return to_return\n",
 94 |     "\n",
 95 |     "def clean_template(clean_formulas):\n",
 96 |     "    to_return = []\n",
 97 |     "    for data in clean_formulas:\n",
 98 |     "        tmp_template = data['template']\n",
 99 |     "        for tmp_data in clean_formulas:\n",
100 |     "            name = tmp_data['name']\n",
101 |     "            clean_name = tmp_data['clean_name']\n",
102 |     "            tmp_template = tmp_template.replace(name, clean_name)\n",
103 |     "        data['clean_template'] = tmp_template\n",
104 |     "        to_return.append(data)\n",
105 |     "    return clean_formulas\n",
106 |     "            \n",
107 |     "def clean_variables(var_list):\n",
108 |     "    to_return = []\n",
109 |     "    for y in var_list:\n",
110 |     "        tmp_var_query = y['value']\n",
111 |     "        for x in var_list:            \n",
112 |     "            full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n",
113 |     "            tmp_var_query = tmp_var_query.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n",
114 |     "        var_data = {'id': y['name'],\n",
115 |     "                    'displayName': y['name'].replace('_', ' '),\n",
116 |     "                    'description':\"\",\n",
117 |     "                    'value': tmp_var_query\n",
118 |     "        }\n",
119 |     "        to_return.append(var_data)\n",
120 |     "    return to_return\n",
121 |     "\n",
122 |     "def replace_variables(clean_formulas, var_list):\n",
123 |     "    to_return = []\n",
124 |     "    for data in clean_formulas:\n",
125 |     "        tmp_data = copy.copy(data['clean_template'])\n",
126 |     "        for x in var_list:\n",
127 |     "            full_var_name0, full_var_name1, full_var_name2, full_var_name3 = '<%='+x['name']+'%>', '<%= '+x['name']+' %>', '<%= '+x['name']+'%>', '<%='+x['name']+' %>'\n",
128 |     "            tmp_data = tmp_data.replace(full_var_name0, '${'+x['name']+'}').replace(full_var_name1, '${'+x['name']+'}').replace(full_var_name2, '${'+x['name']+'}').replace(full_var_name3, '${'+x['name']+'}')\n",
129 |     "        data['clean_template'] = tmp_data\n",
130 |     "        to_return.append(data)\n",
131 |     "    return to_return\n",
132 |     "        \n",
133 |     "def saved_formulas_to_yaml(source_analysis):\n",
134 |     "    \"\"\"Given a Celonis Analysis object, saves a 'data.yml' file \"\"\"\n",
135 |     "    \n",
136 |     "    saved_formulas = copy.copy(source_analysis.saved_formulas)\n",
137 |     "    var_list = copy.copy(source_analysis.analysis.draft.variables)\n",
138 |     "    km_kpis = []\n",
139 |     "    list_of_data = [formula.data for formula in saved_formulas]    \n",
140 |     "        \n",
141 |     "    clean_data = clean_formula_names(list_of_data)\n",
142 |     "    clean_data = clean_template(clean_data)\n",
143 |     "    clean_data = replace_variables(clean_data, var_list)\n",
144 |     "    for data in clean_data:\n",
145 |     "        description = data['description'].replace('\\'', '').replace('\"', '')\n",
146 |     "        pql = '\\n'+data['clean_template'].replace('.id', '.\"id\"').replace('.Id', '.\"Id\"').replace('.ID', '.\"ID\"')\n",
147 |     "        km_kpi =  OrderedDict(id=data['clean_name'],\n",
148 |     "                displayName=data['name'].replace('_', ' '),\n",
149 |     "                description=quoted(description),\n",
150 |     "                pql=literal(pql),\n",
151 |     "                )\n",
152 |     "        km_kpi = add_parameters(data, km_kpi)\n",
153 |     "        km_kpis.append(km_kpi)\n",
154 |     "        \n",
155 |     "    km_kpis = {'kpis':km_kpis}\n",
156 |     "    \n",
157 |     "    with open('kpis.yml', 'w') as outfile:\n",
158 |     "        yaml.dump(km_kpis, outfile, sort_keys=False)\n",
159 |     "    return\n",
160 |     "\n",
161 |     "def variables_to_yaml(source_analysis):\n",
162 |     "    var_list = copy.copy(source_analysis.analysis.draft.variables)\n",
163 |     "    var_list = clean_variables(var_list)\n",
164 |     "    km_vars = []\n",
165 |     "    for var in var_list:\n",
166 |     "        km_var = OrderedDict(id=var['id'],\n",
167 |     "                             displayName=var['displayName'],\n",
168 |     "                             description=var['description'],\n",
169 |     "                             value=literal(var['value'].replace('\"', '') )\n",
170 |     "        )\n",
171 |     "        km_vars.append(km_var)\n",
172 |     "    km_vars = {'variables':km_vars}\n",
173 |     "    with open('variables.yml', 'w') as outfile:\n",
174 |     "        yaml.dump(km_vars, outfile, sort_keys=False)\n",
175 |     "    return"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "saved_formulas_to_yaml(source_analysis)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "variables_to_yaml(source_analysis)"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.7.6"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 4
218 | }
219 | 


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/01_misc/00_trigger_workbench_execution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Trigger a Notebook from outside the Workbench (e.g. from an external automation software)"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import requests\n",
17 |     "import json\n",
18 |     "\n",
19 |     "url = \"https://TEAMNAME_HERE.REALMHERE.celonis.cloud/machine-learning/api/executions\"\n",
20 |     "\n",
21 |     "\n",
22 |     "# specify ID of Workbench (find this in the URL of a specific Workbench you want to trigger)\n",
23 |     "# specify filename, if your file is not stored in the root you should include the foldername in the path, e.g. \"executionFileName\": \"Foldername/Run quickly.ipynb\"\n",
24 |     "# specify any params you want to pass. To see how they are received in the notebook, check https://papermill.readthedocs.io/en/latest/\n",
25 |     "payload = {\n",
26 |     "    \"notebookId\": \"4417517d-55ae-482b-8f56-ac6d1d864e68\",\n",
27 |     "    \"executionFileName\": \"Run quickly.ipynb\",\n",
28 |     "    \"params\":{\n",
29 |     "        \"param1\":\"value1\"\n",
30 |     "    }\n",
31 |     "}\n",
32 |     "\n",
33 |     "payload = json.dumps(payload)\n",
34 |     "\n",
35 |     "# specify API Key (Applicationkeys do not work as of June 2020)\n",
36 |     "headers = {\n",
37 |     "    'content-type': 'application/json;charset=UTF-8',\n",
38 |     "    'authorization': 'Bearer API_KEY_HERE'\n",
39 |     "}\n",
40 |     "\n",
41 |     "response = requests.request(\"POST\", url, headers=headers, data = payload)\n",
42 |     "\n",
43 |     "print(response.text.encode('utf8'))\n"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "markdown",
48 |    "metadata": {},
49 |    "source": [
50 |     "# Get status of execution"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "import json\n",
60 |     "resp = json.loads(response.text)\n",
61 |     "\n",
62 |     "url = url + resp['id']\n",
63 |     "response = requests.request(\"GET\", url, headers=headers)\n",
64 |     "print(response.text.encode('utf8'))"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.7.3"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 4
89 | }


--------------------------------------------------------------------------------
/pycelonis1/00_manage_celonis/01_misc/01_use_case_version_control.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Create text-based backups of analyses and transformations (for git)\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "### This script backs up all analyses and transformations into a backup folder, the user only needs to provide a Celonis object and a folder.\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "source": [
 15 |     "### Do imports, log in to the Celonis instance, create backup folder"
 16 |    ],
 17 |    "cell_type": "markdown",
 18 |    "metadata": {}
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 1,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "2019-10-08 15:36:00 - Login successful! Hello Simon Riezebos\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "from pycelonis import get_celonis\n",
 35 |     "import shutil\n",
 36 |     "from pathlib import Path\n",
 37 |     "from pycelonis.utils.api_utils import pathify\n",
 38 |     "\n",
 39 |     "celonis = get_celonis(read_only=True)\n",
 40 |     "backup_path = Path('IBC Backup')\n",
 41 |     "if not backup_path.exists():\n",
 42 |     "    backup_path.mkdir()"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "### Create or clean analyses backup folder"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "analyses_path = backup_path / \"Analyses\"\n",
 59 |     "if analyses_path.exists():\n",
 60 |     "    shutil.rmtree(analyses_path)\n",
 61 |     "analyses_path.mkdir()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Create backups of all analyses that are published in separate workspace directories"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 4,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "for workspace in celonis.workspaces:\n",
 78 |     "    workspace_path = analyses_path / pathify(workspace.name)\n",
 79 |     "    workspace_path.mkdir()\n",
 80 |     "    for a in workspace.analyses:\n",
 81 |     "        if a.data.get('lastPublishedDraftId') is not None:\n",
 82 |     "            a.backup_content(workspace_path)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### (Optional) Remove all draft files to only see published changes"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "for path in analyses_path.rglob('*'):\n",
 99 |     "    if path.name.startswith(\"draft\") and path.suffix == \".json\":\n",
100 |     "        path.unlink()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "### Create or clean transformation backup folder"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 4,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "transformation_path = backup_path / \"Transformations\"\n",
117 |     "if transformation_path.exists():\n",
118 |     "    shutil.rmtree(transformation_path)\n",
119 |     "transformation_path.mkdir()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "### Create backups of all transformations in separate Pool and Data Job directories"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 5,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "for pool in celonis.pools:\n",
136 |     "    pool_path = transformation_path / pathify(pool.name)\n",
137 |     "    pool_path.mkdir()\n",
138 |     "    for job in pool.data_jobs:\n",
139 |     "        job_path = pool_path / pathify(job.name)\n",
140 |     "        job_path.mkdir()\n",
141 |     "        for tm in job.transformations:\n",
142 |     "            try:\n",
143 |     "                tm.backup_content(job_path)\n",
144 |     "            except:\n",
145 |     "                pass"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "### (Optional) Inititate git repo\n",
153 |     "Navigate to the back_path on the command line, run:\n",
154 |     "```\n",
155 |     "git init\n",
156 |     "git add .\n",
157 |     "git commit -m \"Activating version control\"\n",
158 |     "```\n",
159 |     "When you re-run this notebook all changes will be easy to inspect, and can be committed again"
160 |    ]
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "jupytext": {
165 |    "formats": "ipynb,py:percent"
166 |   },
167 |   "kernelspec": {
168 |    "display_name": "Python 3",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.6.8"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 4
187 | }


--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/00_connecting_to_celonis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Connecting to Celonis"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### The Celonis object\n",
 15 |     "\n",
 16 |     "Import the get_celonis function and call it. This will return either an IBC object or a CPM4 object. **Permissions are determined by the App/API key you use to log in!** Set `read_only` to `True` if you want to make sure you can't break anything."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "scrolled": true
 24 |    },
 25 |    "outputs": [
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "2019-10-08 12:01:08 - Login successful! Hello Simon Riezebos\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "from pycelonis import get_celonis\n",
 36 |     "\n",
 37 |     "celonis = get_celonis()"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "### (Optional) specify login details manually\n",
 45 |     "By default `pycelonis` will get the login information from environment variables. See :meth:`get_celonis` for more details. You can also specify them manually."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "login = {\n",
 55 |     "    \"celonis_url\": \"demo.eu-1.celonis.cloud\",\n",
 56 |     "    \"api_token\": \"paste_here_your_api_token\",\n",
 57 |     "    #The following 2 lines are only necessary when connecting to CPM4.5, not for IBC:\n",
 58 |     "    #\"api_id\": \"paste_here_your_api_id\", \n",
 59 |     "    #\"username\": \"paste_here_your_username\",\n",
 60 |     "}\n",
 61 |     "celonis_manual = get_celonis(**login)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Working with (Celonis) objects in Jupyter Notebook"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "The object returned by `get_celonis` is your portal into celonis, **press tab after `c.`** to see the available methods and attributes."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "celonis."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "Objects in celonis can be found using their ID or (substring of) name. **Press shift-tab inside the parentheses** to see the signature and documentation of each function."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 3,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "<Analysis, id 117f7528-8504-4450-9fd6-8ebcf1749d18, name OTD Prediction Binary - Technical App (OAF) - Copy>"
103 |       ]
104 |      },
105 |      "execution_count": 3,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "analysis = celonis.analyses.find('117f7528-8504-4450-9fd6-8ebcf1749d18')\n",
112 |     "analysis"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "Objects can also be accessed directly with auto-complete using the `.names[]` or `.ids[]` property of a collection."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "celonis.datamodels.names[<-PRESS TAB HERE]"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "### Advanced: access the API data of a Celonis object\n",
136 |     "Almost every object that represents an object in Celonis contains a `.data` property that shows the JSON data from the Celonis API. This data is **automatically refreshed** and if **changes are made to this data, `pycelonis` tries to make the same changes in the object in Celonis.**"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 5,
142 |    "metadata": {},
143 |    "outputs": [
144 |     {
145 |      "data": {
146 |       "text/plain": [
147 |        "{'permissions': ['MOVE_TO',\n",
148 |        "  'DELETE_WORKSPACE',\n",
149 |        "  'CREATE_WORKSPACE',\n",
150 |        "  'DELETE_ALL_WORKSPACES',\n",
151 |        "  'DELETE_ALL_ANALYSES',\n",
152 |        "  'EDIT_ALL_ANALYSES',\n",
153 |        "  'EDIT_ALL_WORKSPACES',\n",
154 |        "  'USE_ALL_ANALYSES',\n",
155 |        "  'CREATE_ANALYSES',\n",
156 |        "  'DELETE_ANALYSIS',\n",
157 |        "  'EDIT_WORKSPACE',\n",
158 |        "  'MANAGE_PERMISSIONS',\n",
159 |        "  'EXPORT_CONTENT',\n",
160 |        "  'USE_ANALYSIS',\n",
161 |        "  'EDIT_ANALYSIS'],\n",
162 |        " 'id': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
163 |        " 'tenantId': None,\n",
164 |        " 'name': 'OTD Prediction Binary - Technical App (OAF) - Copy',\n",
165 |        " 'description': None,\n",
166 |        " 'deleted': False,\n",
167 |        " 'transportId': None,\n",
168 |        " 'lastPublishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n",
169 |        " 'autoSaveId': '2e47dccc-8cbf-400d-8404-72e1f5298d0d',\n",
170 |        " 'processId': 'acb6313c-bba8-46fd-9637-24c7d5463746',\n",
171 |        " 'createDate': 1556264369787,\n",
172 |        " 'favourite': False,\n",
173 |        " 'contentId': None,\n",
174 |        " 'contentVersion': 0,\n",
175 |        " 'tags': [{'name': 'WillBeDeleted'}],\n",
176 |        " 'applicationId': '',\n",
177 |        " 'publicLink': False,\n",
178 |        " 'lastPublishedDate': 1564498481791,\n",
179 |        " 'lastPublishedUser': 'Simon',\n",
180 |        " 'objectId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
181 |        " 'publishedDraftId': '7f82df02-b728-4ca3-acdf-1940dd7de7b0',\n",
182 |        " 'folderId': '117f7528-8504-4450-9fd6-8ebcf1749d18',\n",
183 |        " 'parentObjectId': 'acb6313c-bba8-46fd-9637-24c7d5463746'}"
184 |       ]
185 |      },
186 |      "execution_count": 5,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "analysis.data"
193 |    ]
194 |   }
195 |  ],
196 |  "metadata": {
197 |   "jupytext": {
198 |    "formats": "ipynb,py:percent"
199 |   },
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.7.3"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 4
220 | }


--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/02_pulling_data_from_datamodel.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Pulling data from a Data Model"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from pycelonis import get_celonis\n",
 17 |     "from pycelonis.pql import PQL, PQLColumn, PQLFilter\n",
 18 |     "import pandas as pd"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "### Connect to Celonis"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "2020-03-02 13:00:58 - pycelonis: Login successful! Hello Dimitris\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "celonis = get_celonis(\"api token\")"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "### Select Datamodel"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 5,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "datamodel = celonis.datamodels.find('datamodel id/name')"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "This is the Table in our Datamodel. It has 2 Columns with the names \"A\" and \"B\"."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/html": [
 76 |        "<div>\n",
 77 |        "<style scoped>\n",
 78 |        "    .dataframe tbody tr th:only-of-type {\n",
 79 |        "        vertical-align: middle;\n",
 80 |        "    }\n",
 81 |        "\n",
 82 |        "    .dataframe tbody tr th {\n",
 83 |        "        vertical-align: top;\n",
 84 |        "    }\n",
 85 |        "\n",
 86 |        "    .dataframe thead th {\n",
 87 |        "        text-align: right;\n",
 88 |        "    }\n",
 89 |        "</style>\n",
 90 |        "<table border=\"1\" class=\"dataframe\">\n",
 91 |        "  <thead>\n",
 92 |        "    <tr style=\"text-align: right;\">\n",
 93 |        "      <th></th>\n",
 94 |        "      <th>A</th>\n",
 95 |        "      <th>B</th>\n",
 96 |        "    </tr>\n",
 97 |        "  </thead>\n",
 98 |        "  <tbody>\n",
 99 |        "    <tr>\n",
100 |        "      <th>0</th>\n",
101 |        "      <td>1</td>\n",
102 |        "      <td>3</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>1</th>\n",
106 |        "      <td>2</td>\n",
107 |        "      <td>4</td>\n",
108 |        "    </tr>\n",
109 |        "    <tr>\n",
110 |        "      <th>2</th>\n",
111 |        "      <td>5</td>\n",
112 |        "      <td>6</td>\n",
113 |        "    </tr>\n",
114 |        "  </tbody>\n",
115 |        "</table>\n",
116 |        "</div>"
117 |       ],
118 |       "text/plain": [
119 |        "   A  B\n",
120 |        "0  1  3\n",
121 |        "1  2  4\n",
122 |        "2  5  6"
123 |       ]
124 |      },
125 |      "execution_count": 3,
126 |      "metadata": {},
127 |      "output_type": "execute_result"
128 |     }
129 |    ],
130 |    "source": []
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Define the query you want to pull\n",
137 |     "Example 1: pull a specific Column. We select Column \"A\" from table \"Table_name\" and name this \"Name 1\""
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 9,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/html": [
148 |        "<div>\n",
149 |        "<style scoped>\n",
150 |        "    .dataframe tbody tr th:only-of-type {\n",
151 |        "        vertical-align: middle;\n",
152 |        "    }\n",
153 |        "\n",
154 |        "    .dataframe tbody tr th {\n",
155 |        "        vertical-align: top;\n",
156 |        "    }\n",
157 |        "\n",
158 |        "    .dataframe thead th {\n",
159 |        "        text-align: right;\n",
160 |        "    }\n",
161 |        "</style>\n",
162 |        "<table border=\"1\" class=\"dataframe\">\n",
163 |        "  <thead>\n",
164 |        "    <tr style=\"text-align: right;\">\n",
165 |        "      <th></th>\n",
166 |        "      <th>Name 1</th>\n",
167 |        "    </tr>\n",
168 |        "  </thead>\n",
169 |        "  <tbody>\n",
170 |        "    <tr>\n",
171 |        "      <th>0</th>\n",
172 |        "      <td>1</td>\n",
173 |        "    </tr>\n",
174 |        "    <tr>\n",
175 |        "      <th>1</th>\n",
176 |        "      <td>2</td>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <th>2</th>\n",
180 |        "      <td>5</td>\n",
181 |        "    </tr>\n",
182 |        "  </tbody>\n",
183 |        "</table>\n",
184 |        "</div>"
185 |       ],
186 |       "text/plain": [
187 |        "   Name 1\n",
188 |        "0       1\n",
189 |        "1       2\n",
190 |        "2       5"
191 |       ]
192 |      },
193 |      "execution_count": 9,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "query = PQLColumn(query = \"Table_name.A\", name = \"Name 1\")\n",
200 |     "df = datamodel.get_data_frame(query)\n",
201 |     "df"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "Example 2: Pull a custom PQL-Statement"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 11,
214 |    "metadata": {},
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/html": [
219 |        "<div>\n",
220 |        "<style scoped>\n",
221 |        "    .dataframe tbody tr th:only-of-type {\n",
222 |        "        vertical-align: middle;\n",
223 |        "    }\n",
224 |        "\n",
225 |        "    .dataframe tbody tr th {\n",
226 |        "        vertical-align: top;\n",
227 |        "    }\n",
228 |        "\n",
229 |        "    .dataframe thead th {\n",
230 |        "        text-align: right;\n",
231 |        "    }\n",
232 |        "</style>\n",
233 |        "<table border=\"1\" class=\"dataframe\">\n",
234 |        "  <thead>\n",
235 |        "    <tr style=\"text-align: right;\">\n",
236 |        "      <th></th>\n",
237 |        "      <th>Name 1</th>\n",
238 |        "    </tr>\n",
239 |        "  </thead>\n",
240 |        "  <tbody>\n",
241 |        "    <tr>\n",
242 |        "      <th>0</th>\n",
243 |        "      <td>5</td>\n",
244 |        "    </tr>\n",
245 |        "  </tbody>\n",
246 |        "</table>\n",
247 |        "</div>"
248 |       ],
249 |       "text/plain": [
250 |        "   Name 1\n",
251 |        "0       5"
252 |       ]
253 |      },
254 |      "execution_count": 11,
255 |      "metadata": {},
256 |      "output_type": "execute_result"
257 |     }
258 |    ],
259 |    "source": [
260 |     "query = PQLColumn(query = \"MAX(Table_name.A)\", name = \"Name 1\")\n",
261 |     "df = datamodel.get_data_frame(query)\n",
262 |     "df"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "Example 3: Do more things at once and add Filters"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 13,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "text/html": [
280 |        "<div>\n",
281 |        "<style scoped>\n",
282 |        "    .dataframe tbody tr th:only-of-type {\n",
283 |        "        vertical-align: middle;\n",
284 |        "    }\n",
285 |        "\n",
286 |        "    .dataframe tbody tr th {\n",
287 |        "        vertical-align: top;\n",
288 |        "    }\n",
289 |        "\n",
290 |        "    .dataframe thead th {\n",
291 |        "        text-align: right;\n",
292 |        "    }\n",
293 |        "</style>\n",
294 |        "<table border=\"1\" class=\"dataframe\">\n",
295 |        "  <thead>\n",
296 |        "    <tr style=\"text-align: right;\">\n",
297 |        "      <th></th>\n",
298 |        "      <th>Name 1</th>\n",
299 |        "      <th>Name 2</th>\n",
300 |        "    </tr>\n",
301 |        "  </thead>\n",
302 |        "  <tbody>\n",
303 |        "    <tr>\n",
304 |        "      <th>0</th>\n",
305 |        "      <td>2</td>\n",
306 |        "      <td>1</td>\n",
307 |        "    </tr>\n",
308 |        "  </tbody>\n",
309 |        "</table>\n",
310 |        "</div>"
311 |       ],
312 |       "text/plain": [
313 |        "   Name 1  Name 2\n",
314 |        "0       2       1"
315 |       ]
316 |      },
317 |      "execution_count": 13,
318 |      "metadata": {},
319 |      "output_type": "execute_result"
320 |     }
321 |    ],
322 |    "source": [
323 |     "query = PQL()\n",
324 |     "query += PQLColumn(\"MAX(Table_name.A)\", \"Name 1\")\n",
325 |     "query += PQLColumn(\"COUNT(Table_name.B)\", \"Name 2\")\n",
326 |     "query += PQLFilter(\"Filter Table_name.B < 5\")\n",
327 |     "query += PQLFilter(\"Filter Table_name.B > 3\")\n",
328 |     "\n",
329 |     "df = datamodel.get_data_frame(query)\n",
330 |     "df"
331 |    ]
332 |   }
333 |  ],
334 |  "metadata": {
335 |   "kernelspec": {
336 |    "display_name": "Python 3",
337 |    "language": "python",
338 |    "name": "python3"
339 |   },
340 |   "language_info": {
341 |    "codemirror_mode": {
342 |     "name": "ipython",
343 |     "version": 3
344 |    },
345 |    "file_extension": ".py",
346 |    "mimetype": "text/x-python",
347 |    "name": "python",
348 |    "nbconvert_exporter": "python",
349 |    "pygments_lexer": "ipython3",
350 |    "version": "3.7.4"
351 |   }
352 |  },
353 |  "nbformat": 4,
354 |  "nbformat_minor": 4
355 | }
356 | 


--------------------------------------------------------------------------------
/pycelonis1/01_use_pycelonis/00_basics/03_pushing_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Pushing Data\n",
  8 |     "\n",
  9 |     "### This tutorial shows how data can be pushed from Python to Celonis. The data is pushed to a Celonis Data Pool or Data Model and is ready to use within IBC.\n",
 10 |     "In this to Tutorial we will:\n",
 11 |     "1. Connect to Celonis\n",
 12 |     "2. Prepare the data that needs to be pushed into a dataframe.\n",
 13 |     "3. Push the data into Celonis\n",
 14 |     "    1. Push data to datapool\n",
 15 |     "    2. Push data directly to a specific datamodel in the datapool"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "### Connect to Celonis"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "2020-10-23 11:50:39 - pycelonis: Login successful! Hello Noor\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "from pycelonis import get_celonis\n",
 40 |     "celonis = get_celonis()"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Prepare the data that needs to be pushed to celonis into a dataframe\n",
 48 |     "\n",
 49 |     "For the sake of this demo, we will create a dummy dataframe, but you can put any data in the dataframe as you like"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "data": {
 59 |       "text/html": [
 60 |        "<div>\n",
 61 |        "<style scoped>\n",
 62 |        "    .dataframe tbody tr th:only-of-type {\n",
 63 |        "        vertical-align: middle;\n",
 64 |        "    }\n",
 65 |        "\n",
 66 |        "    .dataframe tbody tr th {\n",
 67 |        "        vertical-align: top;\n",
 68 |        "    }\n",
 69 |        "\n",
 70 |        "    .dataframe thead th {\n",
 71 |        "        text-align: right;\n",
 72 |        "    }\n",
 73 |        "</style>\n",
 74 |        "<table border=\"1\" class=\"dataframe\">\n",
 75 |        "  <thead>\n",
 76 |        "    <tr style=\"text-align: right;\">\n",
 77 |        "      <th></th>\n",
 78 |        "      <th>A</th>\n",
 79 |        "      <th>B</th>\n",
 80 |        "      <th>C</th>\n",
 81 |        "    </tr>\n",
 82 |        "  </thead>\n",
 83 |        "  <tbody>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>0</th>\n",
 86 |        "      <td>2</td>\n",
 87 |        "      <td>2</td>\n",
 88 |        "      <td>10</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>1</th>\n",
 92 |        "      <td>4</td>\n",
 93 |        "      <td>0</td>\n",
 94 |        "      <td>2</td>\n",
 95 |        "    </tr>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>2</th>\n",
 98 |        "      <td>8</td>\n",
 99 |        "      <td>0</td>\n",
100 |        "      <td>1</td>\n",
101 |        "    </tr>\n",
102 |        "    <tr>\n",
103 |        "      <th>3</th>\n",
104 |        "      <td>0</td>\n",
105 |        "      <td>0</td>\n",
106 |        "      <td>8</td>\n",
107 |        "    </tr>\n",
108 |        "  </tbody>\n",
109 |        "</table>\n",
110 |        "</div>"
111 |       ],
112 |       "text/plain": [
113 |        "   A  B   C\n",
114 |        "0  2  2  10\n",
115 |        "1  4  0   2\n",
116 |        "2  8  0   1\n",
117 |        "3  0  0   8"
118 |       ]
119 |      },
120 |      "execution_count": 2,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "import pandas as pd\n",
127 |     "\n",
128 |     "df = pd.DataFrame({'A': [2, 4, 8, 0], 'B': [2, 0, 0, 0], 'C': [10, 2, 1, 8]})\n",
129 |     "df.head()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Push the data from the dataframe into a table in Celonis\n"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "#### A. Push data to the datapool\n",
144 |     "\n",
145 |     "##### Find the datapool\n"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 3,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "data_pool = celonis.pools.find(\"id_or_name_of_data_pool\")"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "##### If you are unsure about the name/id of your data pool you can list all the datapools available to you"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "celonis.pools"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "Now we push the data frame to the data pool. For this, we use the push_table() function, which has the following properties:\n",
178 |     "* df_or_path: Either the pandas data frame or the path to a data frame that should be pushed to Celonis\n",
179 |     "* table_name: The name that this data frame should have in the data pool\n",
180 |     "* if_exists: Specifies what happens when the table already exists in the data pool. The options are 'replace', 'append', 'upsert', 'error'\n",
181 |     "\n",
182 |     "The additional parameters (like the setting of primary keys and column types) can be checked by pressing SHIFT + TAB, while the curser is in the function push_table()"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "data_pool.push_table(df,\"table_name\", if_exists = 'replace')"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "The table is now in the data pool and can be added to any data model in that pool."
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "### B. Push data directly to the datamodel\n",
206 |     "We could also directly push the table to the data model. We would first need to find the data model.\n",
207 |     "##### Find the datamodel"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 5,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "data_model = celonis.datamodels.find(\"datamodel id/name\")"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "##### If you are unsure about the name/id of your data pool you can list all the datamodels available to you"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 5,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "celonis.datamodels"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "##### Push the dataframe as table in the datamodel\n",
240 |     "\n",
241 |     "Use the push_table() function again. In this example we will replace the table if it already exist, however we can also use the options: append and upsert as stated earlier.\n",
242 |     "\n",
243 |     "Here we have additionally the option to reload the datamodel. In this example we choose to set reload_datamodel as False. In this case, the changes will be effective with the next scheduled or manual datamodel reload."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 6,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "<DatamodelTable, id 128a0234-c1ba-4037-b260-4765ae0b7141, name table_name>"
255 |       ]
256 |      },
257 |      "execution_count": 6,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "data_model.push_table(df,\"table_name\",reload_datamodel=False, if_exists = 'replace')"
264 |    ]
265 |   }
266 |  ],
267 |  "metadata": {
268 |   "jupytext": {
269 |    "formats": "ipynb,py:percent"
270 |   },
271 |   "kernelspec": {
272 |    "display_name": "Python 3",
273 |    "language": "python",
274 |    "name": "python3"
275 |   },
276 |   "language_info": {
277 |    "codemirror_mode": {
278 |     "name": "ipython",
279 |     "version": 3
280 |    },
281 |    "file_extension": ".py",
282 |    "mimetype": "text/x-python",
283 |    "name": "python",
284 |    "nbconvert_exporter": "python",
285 |    "pygments_lexer": "ipython3",
286 |    "version": "3.7.4"
287 |   }
288 |  },
289 |  "nbformat": 4,
290 |  "nbformat_minor": 4
291 | }
292 | 


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/main.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pycelonis import get_celonis, pql
 3 | 
 4 | from utils import ext_data_utils, model_utils, utils
 5 | 
 6 | # Load input data
 7 | celonis = get_celonis()
 8 | dm_id = 'TBD'
 9 | datamodel = celonis.datamodels.find(dm_id)
10 | input_columns = [("col_name", "pretty_name"), ("col_name_2", "pretty_name_2")]
11 | input_filter = "FILTER TBD"
12 | 
13 | train_df = utils.get_pql_dataframe(datamodel, input_columns, input_filter)
14 | 
15 | # Import External Data for n-step Predictions (such as GDP below)
16 | ext_data = ext_data_utils.load_external_data(
17 |     overall_gdp_csv="US_GDP.csv",
18 |     industry_gdp_perc_csv="US_MANUF_GDP_PERC.csv",
19 |     csv_col_1="GDP",
20 |     csv_col_2="VAPGDPMA",
21 |     csv_col_2_new="IND_PERC",
22 |     col_final="IND_GDP",
23 | )
24 | 
25 | # INPUTS
26 | subsets = ['subset1', 'subset2']  # PARAM
27 | subset_needs_adjusts = ['subset2'
28 |                        ]  # PARAM Subsets which need a baseline adjustment
29 | subset_col_name = 'subset_filtering_column'  # PARAM
30 | input_y_col_name = "Y_column"  # PARAM
31 | input_exo_col_name = 'ext_data_column'  # PARAM
32 | model_class_col_name = 'classification_naming'  # PARAM Column to flag train vs test vs forecast timeframes
33 | model_y_pred_col_name = 'Y_prediction_column'  # PARAM
34 | val_size_perc = 0.2
35 | 
36 | # OUTPUTS, for Exported Predictions to DM
37 | all_subset_results = {}
38 | all_subset_exports = {}
39 | output_col_names = {
40 |     "index": "Date",  # PARAM
41 |     input_y_col_name: "Actual Y Value",  # PARAM
42 |     model_y_pred_col_name: "Predicted Y Value",  # PARAM
43 |     model_class_col_name: "Classification",  # PARAM
44 | }
45 | 
46 | # Run Predictions for each selected subset
47 | for subset in subsets:
48 |     # Check if subset needs baseline adjustment
49 |     to_adjust = False
50 |     if subset in subset_needs_adjusts:
51 |         to_adjust = True
52 | 
53 |     # Filter train df for subset
54 |     subset_train_df = utils.get_subset_df(train_df, subset, subset_col_name)
55 | 
56 |     # Run Predictions model for this subset
57 |     print('Run TS Predictions model for subset train df \n',
58 |           subset_train_df.head())
59 |     subset_results = model_utils.run_predictions_model(subset_train_df,
60 |                                                        ext_data,
61 |                                                        input_y_col_name,
62 |                                                        input_exo_col_name,
63 |                                                        val_size_perc, to_adjust)
64 |     # Store Output (subset Predictions)
65 |     all_subset_results[subset] = subset_results
66 |     print('subset ', subset, ' Prediction outputs have shape ',
67 |           all_subset_results[subset].shape)
68 |     # Store export-version of the Output (subset Predictions)
69 |     all_subset_exports[subset] = utils.prepare_export_df(
70 |         subset_results, output_col_names, model_y_pred_col_name)
71 | 
72 | print("Finished running predictions for all subsets, total output shape is ",
73 |       all_subset_results[subset].shape)
74 | print("Subsets are ", all_subset_exports.keys())
75 | 
76 | # Combine Results into single Export table
77 | # Add new 'subset name' column to the export-version of Predictions
78 | export_df = utils.constitute_export_df(all_subset_exports, subset_col_name)
79 | 
80 | # Export table to DM
81 | export_table_name = "Predictions_Output"
82 | print('Export df shape is ', export_df.shape)
83 | print('Export df head is ')
84 | print(export_df.head(10))
85 | print('Export df tail is ')
86 | print(export_df.tail(10))
87 | datamodel.push_table(export_df,
88 |                      export_table_name,
89 |                      reload_datamodel=False,
90 |                      if_exists="replace")
91 | 


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celonis/pycelonis-examples/6a51c8eaf84d59e7b69d457d5748ea7348659d6f/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/__init__.py


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/ext_data_utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from . import plot_utils
 3 | 
 4 | 
 5 | def load_external_data(
 6 |     overall_gdp_csv,
 7 |     industry_gdp_perc_csv,
 8 |     csv_col_1,
 9 |     csv_col_2,
10 |     csv_col_2_new,
11 |     col_final,
12 | ):
13 |     """Load External/GDP data"""
14 | 
15 |     # Load National GDP data (need to create/upload external csv)
16 |     all_gdp_csv = pd.read_csv(overall_gdp_csv)
17 | 
18 |     # Load Industry GDP % csv (need to create/upload external csv)
19 |     all_gdp_ind_perc_csv = pd.read_csv(industry_gdp_perc_csv)
20 |     # Rename col
21 |     all_gdp_ind_perc_csv = all_gdp_ind_perc_csv.rename(
22 |         columns={csv_col_2: csv_col_2_new})
23 | 
24 |     # Manually estimate GDP values for future quarters (CORE for TS Predictions)
25 |     all_gdp = all_gdp_csv.copy()
26 |     all_gdp = all_gdp.append([
27 |         {
28 |             "DATE": "7/1/2020",
29 |             csv_col_1: 20200.0
30 |         },
31 |         {
32 |             "DATE": "10/1/2020",
33 |             csv_col_1: 21000.0
34 |         },
35 |         {
36 |             "DATE": "1/1/2021",
37 |             csv_col_1: 21000.0
38 |         },
39 |     ])
40 |     all_gdp = all_gdp.reset_index(drop=True)
41 | 
42 |     # Manually estimate Industry GDP % values for future quarters (CORE for TS Predictions)
43 |     all_gdp_ind_perc = all_gdp_ind_perc_csv.append([
44 |         {
45 |             "DATE": "4/1/2020",
46 |             csv_col_2_new: 11.0
47 |         },
48 |         {
49 |             "DATE": "7/1/2020",
50 |             csv_col_2_new: 11.0
51 |         },
52 |         {
53 |             "DATE": "10/1/2020",
54 |             csv_col_2_new: 11.0
55 |         },
56 |         {
57 |             "DATE": "1/1/2021",
58 |             csv_col_2_new: 11.0
59 |         },
60 |     ])
61 |     # Convert to %
62 |     all_gdp_ind_perc[csv_col_2_new] = all_gdp_ind_perc[csv_col_2_new] / 100.0
63 |     all_gdp_ind_perc = all_gdp_ind_perc.reset_index(drop=True)
64 |     all_gdp_ind_perc.head()
65 | 
66 |     # Calculate Industry GDP
67 |     all_gdp[col_final] = all_gdp[csv_col_1] * all_gdp_ind_perc[csv_col_2_new]
68 | 
69 |     # Resample to weekly GDP data
70 |     all_gdp["DATE"] = pd.to_datetime(all_gdp["DATE"], format="%m/%d/%Y")
71 |     all_gdp_weekly = all_gdp.copy()
72 |     all_gdp_weekly = all_gdp_weekly.drop(columns=csv_col_1)
73 |     all_gdp_weekly = all_gdp_weekly.set_index("DATE").resample(
74 |         "W").ffill().reset_index()
75 |     all_gdp_weekly[col_final] = all_gdp_weekly[col_final] * 4 / 52
76 |     # Plot resampled external data
77 |     plot_utils.plot_gdp(all_gdp_weekly, col_final)
78 | 
79 |     # Smoothen the weekly GDP data
80 |     ext_data = all_gdp_weekly.copy()
81 |     ext_data[col_final] = ext_data.iloc[:, 1].rolling(window=12,
82 |                                                       center=False,
83 |                                                       min_periods=1).mean()
84 |     # Plot final external data
85 |     plot_utils.plot_gdp(ext_data, col_final)
86 |     return ext_data
87 | 


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | import pmdarima
  8 | import statsmodels.api as sm
  9 | from sklearn import linear_model, metrics
 10 | from statsmodels.tsa.statespace import sarimax
 11 | from . import utils, plot_utils
 12 | 
 13 | 
 14 | def run_predictions_model(df,
 15 |                           ext_data,
 16 |                           y_col_name,
 17 |                           exo_col_name,
 18 |                           val_size_perc=0.2,
 19 |                           to_adjust_years=False):
 20 |     """Run Predictions Model for Train df
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     df : DataFrame
 25 |         Train set Dataframe containing the Y values of the Time Series to predict
 26 |     ext_data : DataFrame
 27 |         External data to use as Regressor to model and predict the TS Trend
 28 |     val_size_perc : Float
 29 |         Part of the df to use for Validation. 
 30 |         Format: [0.0;1.0]
 31 |     to_adjust_years : Boolean
 32 |         True if baseline level of the TS has changed during its timeframe and should be adjusted
 33 |         By default False
 34 |     y_col_name : String
 35 |         Column name of the TS values column (Y column).
 36 |     exo_col_name : String
 37 |         Column name of the External Regressor values column.
 38 | 
 39 |     Returns
 40 |     -------
 41 |     DataFrame
 42 |         Output DataFrame with the n-step Predictions for the TS (Predict the n future Y values).
 43 |         n is set as the minimum between the number of future values from the External data and the predicted Residuals
 44 |     """
 45 | 
 46 |     # Reindex and create Train Df
 47 |     df = df.reset_index(drop=True)
 48 |     train_df = df.copy()
 49 |     print('train df head looks like: \n', train_df.head())
 50 | 
 51 |     # Clean data: fill empty weeks with 0 value
 52 |     train_df = utils.fill_empty_dates(train_df)
 53 | 
 54 |     # Cap the high outliers to a max value
 55 |     train_df = utils.cap_outliers(
 56 |         train_df,
 57 |         max_outlier_value=1000)  # PARAM - max_outlier_value: Max value
 58 | 
 59 |     # Adjust past data if baseline changed at date change_date
 60 |     if to_adjust_years:
 61 |         train_df = utils.adjust_baseline(train_df,
 62 |                                          change_date='YYYY-MM-DD',
 63 |                                          end_date='YYYY-MM-DD')
 64 |         # PARAM - change_date: date at which baseline level changed, end_date: end date of new baseline level
 65 | 
 66 |     # Plot preprocessed Train Df
 67 |     plot_utils.plot_clean_y(df, train_df,
 68 |                             y_max=1000 + 100)  #PARAM - y axis max value
 69 | 
 70 |     #### MODEL: Y = Trend + Seasonality + Residuals
 71 | 
 72 |     ### Trend: Calculate, Model and Predict future values
 73 |     trend_col_name = 'Trend'  # PARAM - Trend column name
 74 |     train_df[trend_col_name] = utils.calculate_trend(
 75 |         train_df,
 76 |         ts_seasonality=
 77 |         52,  # PARAM - Seasonality timeframe e.g. 52 if weekly data with annual seasonality. 7 if daily TS with weekly seasonality
 78 |         center=False)
 79 |     # Plot Y and Trend
 80 |     plot_utils.plot_y_trend(train_df,
 81 |                             train_df[trend_col_name],
 82 |                             y_min=0,
 83 |                             y_max=100)
 84 | 
 85 |     # Use External data/GDP to fit and predict the Trend
 86 |     print('train df shape is ',
 87 |           train_df.dropna().shape, ', adding the external data into the df...')
 88 |     train_df = utils.combine_ext_data(train_df, ext_data, days_to_shift=None)
 89 | 
 90 |     # Define X=GDP and Y=Trend for Regression model
 91 |     exo_pretty_name = "Regressor"  # PARAM - External Data/GDP column
 92 |     X, Y = utils.get_trend_and_exo_for_fit(train_df, exo_col_name,
 93 |                                            trend_col_name, val_size_perc)
 94 |     # Plot Y, Trend and Exo Regr
 95 |     plot_utils.plot_y_trend_ext(train_df,
 96 |                                 Y,
 97 |                                 exo_col_name,
 98 |                                 exo_pretty_name,
 99 |                                 y_min=0,
100 |                                 y_max=1100,
101 |                                 y_min_exo=100,
102 |                                 y_max_exo=200)
103 | 
104 |     # Fit Regression of Y=Trend on X=Exogenous Regressor
105 |     reg = linear_model.LinearRegression().fit(X, Y)
106 |     # Predict future Trend with the fitted Regression
107 |     trend_pred_col_name = "Predicted Trend"
108 |     X_F, train_df = predict_trend(train_df, reg, exo_col_name,
109 |                                   trend_pred_col_name)
110 |     # Plot Trend, External data/GDP and Predicted Trend
111 |     plot_utils.plot_y_pred_trend_ext(train_df,
112 |                                      exo_col_name,
113 |                                      X,
114 |                                      Y,
115 |                                      X_F,
116 |                                      y_min=0,
117 |                                      y_max=1100,
118 |                                      y_min_exo=100,
119 |                                      y_max_exo=200)
120 |     print('End of Trend part, df is \n', train_df.head())
121 | 
122 |     ### Seasonality: Calculate S for each date of the seasonality window
123 | 
124 |     # Calculate Y - Trend
125 |     train_df["Y - Trend"] = train_df[y_col_name] - train_df[trend_col_name]
126 | 
127 |     # Calculate Seasonality by moving avg on Y - T
128 |     s = train_df["Y - Trend"].rolling(
129 |         window=10,
130 |         center=True).mean()  # PARAM - window: Moving avg window to smoothen S
131 |     # Avg across periods to obtain 1 S value per date of a period
132 |     s = s.groupby(s.index.week).mean()
133 | 
134 |     # Add Seasonality to Df
135 |     seasonality_col_name = "Seasonality"  # PARAM - S column name
136 |     train_df[seasonality_col_name] = np.nan
137 |     for i in train_df.index:
138 |         train_df.loc[i][seasonality_col_name] = s[i.week]
139 |     # (Optional) Fix border dates with Null values
140 |     # seas_period_days = 52 * 7  # PARAM - seasonsality period in days
141 |     # train_df = utils.fill_seasonality(train_df, seas_period_days,
142 |     #                                  seasonality_col_name)
143 | 
144 |     # Plot Y, T and S
145 |     plot_utils.plot_y_t_s_with_pred(train_df, trend_col_name,
146 |                                     seasonality_col_name, trend_pred_col_name)
147 | 
148 |     ### Residuals: Calculate, Model and Predict future values
149 | 
150 |     # Calculate R = Y - Trend - Season
151 |     train_df["Y - T - S"] = train_df[y_col_name] - train_df[
152 |         trend_col_name] - train_df[seasonality_col_name]
153 |     # Create R df
154 |     r_col_name = "Y - T - S"  # PARAM - R column name
155 |     r = train_df[r_col_name]
156 |     # Plot R
157 |     plot_utils.plot_r(train_df, r_col_name)
158 |     # R shape
159 |     print('R df shape is ', r.dropna().shape)
160 |     # Stationarity test
161 |     res = sm.tsa.adfuller(r.dropna(), regression="c")
162 |     print("adf test p-value is:{}".format(res[1]))
163 |     # Verify that p value is low
164 |     # ACF PACF on R
165 |     plot_utils.plot_acf_pacf_r(r, lags=25)  # PARAM - # lags for acf pacf
166 |     # Deduce ARMA(p,q) model for R
167 | 
168 |     # Create R df for R Model
169 |     columns_to_drop = [y_col_name, exo_col_name]
170 |     col_to_rename = {"index": "Date"}
171 |     r_df = create_r_df(train_df, columns_to_drop, col_to_rename)
172 | 
173 |     # Fit ARIMA Model on R for R predictions
174 |     p, d, q = 3, 0, 3  # PARAM - p for AR, d for I, q for MA.
175 |     P, D, Q, s = None, None, None, None  # If seasonality use P,D,Q,s, if not set to None.
176 |     n_pred = 5  # n_pred is # future points to forecast
177 |     model = None  # (Optional) model - to input an existing loaded model
178 |     exo = None  # (Optional) exo - to input exogenous regressors
179 |     r_df = r_df.dropna()
180 |     model_r, results_df_r = get_results_with_val(r_df, exo, p, d, q, P, D, Q, s,
181 |                                                  model, r_col_name,
182 |                                                  val_size_perc, n_pred)
183 |     # Add Predicted R to df
184 |     r_col_name = "Predicted R"  # PARAM - R column name for df
185 |     class_col_name = "Classification"  # PARAM - classification col name (train/test/forecast)
186 |     train_df = add_r(train_df, results_df_r, r_col_name, class_col_name)
187 | 
188 |     ### Calculate Total Y Prediction = Predicted T + S + Predicted R
189 | 
190 |     y_pred_col_name = "Y Prediction"  # PARAM - y pred column names
191 |     train_df = calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name,
192 |                            seasonality_col_name, class_col_name)
193 |     print('End of df with predictions is \n', train_df.tail(n=20))
194 | 
195 |     # Plot and show Final Df with predictions
196 |     plot_utils.plot_final(train_df, trend_col_name, seasonality_col_name,
197 |                           r_col_name, trend_pred_col_name, y_pred_col_name,
198 |                           class_col_name)
199 | 
200 |     # Return Final Df with Y predictions
201 |     return train_df
202 | 
203 | 
204 | def get_results_with_val(df,
205 |                          exo,
206 |                          p,
207 |                          d,
208 |                          q,
209 |                          P,
210 |                          D,
211 |                          Q,
212 |                          s,
213 |                          model,
214 |                          y_col_name,
215 |                          val_size_perc,
216 |                          n_predictions=5):
217 |     """Fit SARIMAX on input df (optional input and future exo regr) and predict validation + future values
218 |     Or use param fitted model (optional input and future exo regr) to predict validation + future values
219 |     Plot input and output (val+future) predictions
220 | 
221 |     Parameters
222 |     ----------
223 |     df : DataFrame
224 |         R Time Series
225 |     exo : DataFrame, optional
226 |         Exogenous Regressors to model Y
227 |     p : int
228 |         AR parameter for the SARIMAX on Y
229 |     d : int
230 |         Integrated parameter for the SARIMAX on Y
231 |     q : int
232 |         MA parameter for the SARIMAX on Y
233 |     P : int
234 |         Seasonal AR parameter for the SARIMAX on Y
235 |     D : int
236 |         Seasonal Integrated parameter for the SARIMAX on Y
237 |     Q : int
238 |         Seasonal MA parameter for the SARIMAX on Y
239 |     s : int
240 |         Seasonality timeframe for Y
241 |     model : SARIMAX Fitted model, optional
242 |         Pre-fitted SARIMAX model to use to predict Y values
243 |     y_col_name : String
244 |         Column name of Y values
245 |     val_size_perc : Float
246 |         Part of the df to use for Validation. 
247 |         Format: [0.0;1.0]
248 |     n_predictions : int, optional
249 |         Number of future values to predict for Y, by default 5
250 | 
251 |     Returns
252 |     -------
253 |     smodel: json
254 |         Fitted SARIMAX model on Y
255 |     results: DataFrame
256 |         DataFrame including the train, validation and forecast values from the SARIMAX fitted model on Y Time Series
257 |     """
258 | 
259 |     X = df[y_col_name].values
260 |     Y = df["Date"].values
261 |     train_size = int(len(X) * (1 - val_size_perc))
262 |     train, test = X[:train_size], X[train_size:len(X)]
263 |     week = Y[train_size:len(X)]
264 |     exo_past, exo_future = None, None
265 | 
266 |     # Split Exo Regressor into past (train + val) and future (forecast) values
267 |     if exo is not None:
268 |         exo_past, exo_future = exo[:len(X)], exo[len(X):len(exo)]
269 | 
270 |     # Create SARIMAX model or use input model
271 |     print("Checking model for fit...")
272 |     if model is None:
273 |         print("No input model, starting to fit SARIMAX" + str(p) + str(d) +
274 |               str(q) + str(P) + str(D) + str(Q) + str(s))
275 |         smodel = pmdarima.arima.ARIMA(order=[p, d, q],
276 |                                       method="lbfgs",
277 |                                       maxiter=50,
278 |                                       suppress_warnings=True)
279 |         smodel = smodel.fit(df[y_col_name].values, exo_past)
280 |         print("Finished SARIMAX fit.")
281 |     else:
282 |         print("Existing input model, will use it")
283 |         smodel = model
284 | 
285 |     # Test model on the Validation set
286 |     history = [x for x in train]
287 |     predictions = list()
288 |     for t in range(len(test)):
289 |         model = sarimax.SARIMAX(history,
290 |                                 order=smodel.order,
291 |                                 seasonal_order=smodel.seasonal_order,
292 |                                 enforce_stationarity=False)
293 |         model_fit = model.fit(disp=0)
294 |         output = model_fit.forecast()
295 |         if output[0] < 0:
296 |             yhat = 0
297 |         else:
298 |             yhat = output[0]
299 |         predictions.append(yhat)
300 |         obs = test[t]
301 |         history.append(obs)
302 |         print("predicted=%f, expected=%f" % (yhat, obs))
303 |     error = metrics.mean_squared_error(test, predictions)
304 |     print("Test MSE: %.3f" % error)
305 | 
306 |     # Add Train set to output
307 |     data = pd.DataFrame()
308 |     data["Date"] = Y[0:train_size]
309 |     data["Predicted Net Order Value"] = None
310 |     data["Actual Net Order Value"] = X[0:train_size]
311 |     data["Classification"] = "train"
312 | 
313 |     # Add Validation set to output
314 |     Tested = pd.DataFrame()
315 |     Tested["Date"] = week
316 |     Tested["Predicted Net Order Value"] = predictions
317 |     Tested["Actual Net Order Value"] = test
318 |     Tested["Classification"] = "test"
319 |     Tested["Predicted Net Order Value"] = Tested[
320 |         "Predicted Net Order Value"].astype(float)
321 |     Tested["Date"] = pd.to_datetime(Tested["Date"])
322 | 
323 |     # Add Forecast set to output
324 |     print("Predicting forecast values...")
325 |     n_periods = n_predictions
326 |     fitted, confint = smodel.predict(n_periods=n_periods,
327 |                                      return_conf_int=True,
328 |                                      exogenous=exo_future)
329 |     print("Finished predicting forecast values.")
330 |     rng = pd.date_range(df["Date"].max(), periods=n_periods, freq="7D")
331 |     forecast = pd.DataFrame({
332 |         "Date": rng,
333 |         "Predicted Net Order Value": fitted,
334 |         "Actual Net Order Value": None,
335 |         "Classification": "forecast",
336 |         "Conf_lower": confint[:, 0],
337 |         "Conf_Upper": confint[:, 1],
338 |     })
339 |     forecast = forecast.drop(forecast.index[0])
340 | 
341 |     # Combine all sets
342 |     results = data.append(Tested, ignore_index=True)
343 |     results = results.append(forecast, ignore_index=True)
344 |     results["Date"] = pd.to_datetime(results["Date"])
345 |     # Reformat Dates to Date type
346 |     results["Date"] = pd.to_datetime(results["Date"])
347 |     return smodel, results
348 | 
349 | 
350 | def predict_trend(train_df, reg, exo_col_name, pred_trend_col_name):
351 |     """Trend Regression to predict future Trend"""
352 |     # Get Regressor on prediction timeframe
353 |     X_F = train_df[exo_col_name].dropna().values.reshape(-1, 1)
354 |     print(X_F.shape)
355 |     print(reg.predict(X_F).shape)
356 |     # Predict Trend using fitted Regression on Regressor
357 |     t_pred = reg.predict(X_F)
358 |     len_pred = t_pred.shape[0]
359 |     train_df["Predicted Trend"] = np.nan
360 |     train_df["Predicted Trend"][-len_pred:] = t_pred.ravel()
361 |     return X_F, train_df
362 | 
363 | 
364 | def create_r_df(train_df, columns_to_drop, col_to_rename):
365 |     """Create Residuals DataFrame"""
366 |     r_df = train_df.copy()
367 |     r_df = r_df.drop(columns=columns_to_drop)
368 |     r_df = r_df.reset_index()
369 |     r_df = r_df.rename(columns=col_to_rename)
370 |     return r_df
371 | 
372 | 
373 | def add_r(train_df, results_df_r, r_col_name, class_col_name):
374 |     """Add Residuals (Train, Val and Forecast) to the Input Df"""
375 |     results_df_r_idx = results_df_r.set_index("Date")
376 |     train_df[r_col_name] = np.nan
377 |     train_df[r_col_name] = results_df_r_idx["Predicted Net Order Value"]
378 |     train_df[class_col_name] = results_df_r_idx[class_col_name]
379 |     return train_df
380 | 
381 | 
382 | def calc_y_pred(train_df, y_pred_col_name, trend_pred_col_name,
383 |                 seasonality_col_name, class_col_name):
384 |     """Calculate Predicted Y with Predicted T, S and Predicted R components, on Validation and Forecast sets"""
385 |     train_df[y_pred_col_name] = np.nan
386 |     # Validation Y values
387 |     mask = train_df[class_col_name] == "test"
388 |     train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] +
389 |                                            train_df[seasonality_col_name] +
390 |                                            train_df["Predicted R"])
391 |     # Future Y values
392 |     mask = train_df[class_col_name] == "forecast"
393 |     train_df.loc[mask, y_pred_col_name] = (train_df[trend_pred_col_name] +
394 |                                            train_df[seasonality_col_name] +
395 |                                            train_df["Predicted R"])
396 |     return train_df
397 | 


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/plot_utils.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | from statsmodels.graphics import tsaplots
  3 | 
  4 | 
  5 | def plot_clean_y(df, train_df, y_max):
  6 |     """Plot Pre-processed Y"""
  7 |     fig, ax = plt.subplots(figsize=(20, 10))
  8 |     plt.plot(df["Date"], df["Net Order Value"], c="c", label="Y Original")
  9 |     plt.plot(train_df["Date"], train_df["Net Order Value"], c="b", label="Y")
 10 |     plt.legend(loc="upper right")
 11 |     plt.axis([min(train_df["Date"]), max(train_df["Date"]), 0, y_max])
 12 |     plt.show()
 13 | 
 14 | 
 15 | def plot_gdp(ext_data, col_final):
 16 |     """Plot resulting Industry GDP"""
 17 |     fig, ax = plt.subplots(figsize=(20, 10))
 18 |     plt.plot(ext_data["DATE"], ext_data[col_final], c="b")
 19 |     plt.show()
 20 | 
 21 | 
 22 | def plot_y_trend(train_df, t, y_min, y_max):
 23 |     """Plot Y and Trend"""
 24 |     fig, ax = plt.subplots(figsize=(20, 10))
 25 |     plt.plot(train_df["Date"], t, color="b", label="Trend")
 26 |     plt.plot(train_df["Date"],
 27 |              train_df["Net Order Value"],
 28 |              color="g",
 29 |              label="Y")
 30 |     plt.legend(loc="upper right")
 31 |     ax.set_ylim([y_min, y_max])
 32 |     plt.show()
 33 | 
 34 | 
 35 | def plot_y_trend_ext(train_df, Y, exo_col_name, exo_pretty_name, y_min, y_max,
 36 |                      y_min_exo, y_max_exo):
 37 |     """Plot Y, Trend and Exo Regressors"""
 38 |     fig, ax = plt.subplots(figsize=(20, 10))
 39 |     ax2 = ax.twinx()
 40 |     # Net Order Value
 41 |     ax.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
 42 |     # External data/GDP
 43 |     ax2.plot(train_df.index,
 44 |              train_df[exo_col_name],
 45 |              color="c",
 46 |              label=exo_pretty_name)
 47 |     # Trend
 48 |     ax.plot(train_df.dropna().index[:len(Y)], Y, color="b", label="Trend")
 49 |     plt.legend(loc="upper right")
 50 |     ax.set_ylim([y_min, y_max])
 51 |     ax2.set_ylim([y_min_exo, y_max_exo])
 52 |     plt.show()
 53 | 
 54 | 
 55 | def plot_y_pred_trend_ext(train_df, exo_col_name, X, Y, X_F, y_min, y_max,
 56 |                           y_min_exo, y_max_exo):
 57 |     """Plot Predicted Y, Trend and Exo Regressors"""
 58 |     fig, ax = plt.subplots(figsize=(20, 10))
 59 |     ax2 = ax.twinx()
 60 |     # External Data/GDP
 61 |     ax2.plot(train_df[exo_col_name].dropna().index,
 62 |              train_df[exo_col_name].dropna(),
 63 |              color="m",
 64 |              label="External data (Full)")
 65 |     ax2.plot(train_df.dropna().index[:len(X)],
 66 |              X,
 67 |              color="c",
 68 |              label="External data (Train for Trend fit)")
 69 |     # Trend
 70 |     ax.plot(train_df.dropna().index[:len(Y)],
 71 |             Y,
 72 |             color="b",
 73 |             label="Trend (Train for Trend fit)")
 74 |     # Predicted Trend (through Reg)
 75 |     ax.plot(train_df[exo_col_name].dropna().index,
 76 |             train_df["Predicted Trend"][-len(X_F):],
 77 |             color="g",
 78 |             label="Trend (Predicted)")
 79 |     ax.legend(loc="upper right")
 80 |     ax2.legend(loc="lower right")
 81 |     ax.set_ylim([y_min, y_max])
 82 |     ax2.set_ylim([y_min_exo, y_max_exo])
 83 |     plt.show()
 84 | 
 85 | 
 86 | def plot_y_t_s(train_df, trend_col_name, seasonality_col_name):
 87 |     """Plot Y, T and S"""
 88 |     fig, ax = plt.subplots(figsize=(20, 10))
 89 |     plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
 90 |     plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
 91 |     plt.plot(train_df.index,
 92 |              train_df[trend_col_name] + train_df[seasonality_col_name],
 93 |              color="m",
 94 |              label="T+S")
 95 |     plt.legend(loc="upper right")
 96 |     plt.show()
 97 | 
 98 | 
 99 | def plot_y_t_s_with_pred(train_df, trend_col_name, seasonality_col_name,
100 |                          pred_trend_col_name):
101 |     """Plot Y, T, S and Predicted T + S"""
102 |     fig, ax = plt.subplots(figsize=(20, 10))
103 |     plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
104 |     plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
105 |     plt.plot(train_df.index,
106 |              train_df[pred_trend_col_name],
107 |              color="c",
108 |              label="T Pred")
109 | 
110 |     plt.plot(train_df.index,
111 |              train_df[trend_col_name] + train_df[seasonality_col_name],
112 |              color="m",
113 |              label="T+S")
114 |     plt.plot(train_df.index,
115 |              train_df[pred_trend_col_name] + train_df[seasonality_col_name],
116 |              color="r",
117 |              label="T Pred + S")
118 |     plt.legend(loc="upper right")
119 |     plt.show()
120 | 
121 | 
122 | def plot_r(train_df, r_col_name):
123 |     """Plot Residuals"""
124 |     fig, ax = plt.subplots(figsize=(20, 10))
125 |     plt.plot(train_df.index, train_df[r_col_name], color="y", label="R")
126 |     plt.legend(loc="upper right")
127 |     plt.show()
128 | 
129 | 
130 | def plot_acf_pacf_r(r, lags):
131 |     """Plot ACF and PACF plots for Residuals"""
132 |     fig, ax = plt.subplots(2, 1, figsize=(20, 10))
133 |     fig = tsaplots.plot_acf(r.dropna(), lags=lags, ax=ax[0])
134 |     fig = tsaplots.plot_pacf(r.dropna(), lags=lags, ax=ax[1])
135 |     plt.show()
136 | 
137 | 
138 | def plot_results(results):
139 |     """Plot all Residual sets (train, Val and Forecast)"""
140 |     fig, ax = plt.subplots(figsize=(20, 10))
141 |     results["Date"] = results["Date"].astype(str)
142 |     plt.plot(results["Date"], results["Predicted Net Order Value"], c="b")
143 |     plt.plot(results["Date"], results["Actual Net Order Value"], c="r")
144 |     plt.fill_between(results["Date"],
145 |                      results["Conf_lower"],
146 |                      results["Conf_Upper"],
147 |                      color="k",
148 |                      alpha=0.15)
149 |     for i, tick in enumerate(ax.get_xticklabels()):
150 |         tick.set_rotation(45)
151 |         tick.set_visible(False)
152 |         if i % 3 == 0:
153 |             tick.set_visible(True)
154 |     plt.show()
155 | 
156 | 
157 | def plot_final(train_df, trend_col_name, seasonality_col_name, r_col_name,
158 |                trend_pred_col_name, y_pred_col_name, class_col_name):
159 |     """Plot Y, T, S, R and Predicted Y with intermediary components"""
160 |     fig, ax = plt.subplots(figsize=(20, 10))
161 |     plt.plot(train_df.index, train_df["Net Order Value"], color="g", label="Y")
162 |     plt.plot(train_df.index, train_df[trend_col_name], color="b", label="T")
163 |     plt.plot(train_df.index,
164 |              train_df[trend_col_name] + train_df[seasonality_col_name],
165 |              color="m",
166 |              label="T+S")
167 |     # Seasonality
168 |     plt.plot(train_df.index,
169 |              train_df[seasonality_col_name],
170 |              color="m",
171 |              label="S")
172 |     # Predicted Trend
173 |     plt.plot(train_df.index,
174 |              train_df[trend_pred_col_name],
175 |              color="y",
176 |              label="T Pred")
177 |     plt.plot(train_df.index,
178 |              train_df[trend_pred_col_name] + train_df[seasonality_col_name],
179 |              color="k",
180 |              label="T Pred + S")
181 |     # Predicted Y on Validation part
182 |     plt.plot(
183 |         train_df[train_df[class_col_name] == "test"].index,
184 |         train_df[train_df[class_col_name] == "test"][y_pred_col_name],
185 |         color="c",
186 |         label="Y Pred (val)",
187 |     )
188 |     # Predicted Y on Future part
189 |     plt.plot(
190 |         train_df[train_df[class_col_name] == "forecast"].index,
191 |         train_df[train_df[class_col_name] == "forecast"][y_pred_col_name],
192 |         color="r",
193 |         label="Y Pred (future)",
194 |     )
195 |     plt.legend(loc="upper right")
196 |     plt.show()
197 | 


--------------------------------------------------------------------------------
/pycelonis1/02_try_ml_use_cases/00_time_series_forecasting/01_ts_forecasting/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from pycelonis import pql
  2 | 
  3 | import datetime
  4 | import isoweek
  5 | import pandas as pd
  6 | 
  7 | ## Loading Data
  8 | 
  9 | 
 10 | def get_pql_dataframe(dm, input_columns, input_filter):
 11 |     """Query input columns with filters from input DM"""
 12 |     query = pql.PQL()
 13 |     for col_name, col_pretty_name in input_columns:
 14 |         query += pql.PQLColumn(col_name, col_pretty_name)
 15 |     if input_filter != '':
 16 |         query += pql.PQLFilter(input_filter)
 17 |     queried_df = dm.get_data_frame(query)
 18 |     return queried_df
 19 | 
 20 | 
 21 | def get_subset_df(train_df, subset, subset_col_name):
 22 |     """Filter df for subset"""
 23 |     subset_train_df = train_df[train_df[subset_col_name] == subset]
 24 |     subset_train_df.drop(columns=[subset_col_name], inplace=True)
 25 |     return subset_train_df
 26 | 
 27 | 
 28 | ## Pre-processing
 29 | 
 30 | 
 31 | def fill_empty_dates(df):
 32 |     """Fill empty weeks of date Df"""
 33 |     my_date = datetime.datetime.now()
 34 |     year, week_num, day_of_week = my_date.isocalendar()
 35 |     d = isoweek.Week(year, week_num - 1).monday()
 36 |     rng = pd.date_range(df["Date"].min(), d, freq="7D")
 37 |     df = df.set_index("Date").reindex(rng, fill_value=0).reset_index()
 38 |     df.rename(columns={"index": "Date"}, inplace=True)
 39 |     return df
 40 | 
 41 | 
 42 | def cap_outliers(df, max_outlier_value):
 43 |     """Clean outliers"""
 44 |     df.loc[df["Net Order Value"] > max_outlier_value,
 45 |            "Net Order Value"] = max_outlier_value
 46 |     return df
 47 | 
 48 | 
 49 | def adjust_baseline(df, change_date, end_date):
 50 |     """Calculate baseline avg difference between TS before change_date vs TS between change_date and end_date"""
 51 |     diff_high_low = (
 52 |         df.loc[(change_date < df["Date"]) &
 53 |                (df["Date"] <= end_date), "Net Order Value"].mean() -
 54 |         df.loc[df["Date"] <= change_date, "Net Order Value"].mean())
 55 |     # Adjust lower baseline with the above avg difference
 56 |     df.loc[df["Date"] <= change_date, "Net Order Value"] += diff_high_low
 57 |     return df
 58 | 
 59 | 
 60 | ## Model utils
 61 | 
 62 | 
 63 | def calculate_trend(df, ts_seasonality, center=False):
 64 |     """Calculate Trend"""
 65 |     t = df.iloc[:, 1].rolling(window=ts_seasonality, center=center).mean()
 66 |     return t
 67 | 
 68 | 
 69 | def combine_ext_data(train_df, ext_data, days_to_shift=None):
 70 |     """Combine External/GDP data with Y"""
 71 |     # Add Exo regressors (GDP) to train df
 72 |     train_df = train_df.set_index("Date")
 73 |     ext_data["DATE"] = pd.to_datetime(ext_data["DATE"])
 74 |     ext_data = ext_data.set_index("DATE")
 75 |     # Optional - Align dates of Industry GDP with Trend
 76 |     if days_to_shift is not None:
 77 |         ext_data = ext_data.shift(days_to_shift, freq="D")
 78 |     # Combine Train Df with GDP
 79 |     train_df = train_df.combine_first(ext_data)
 80 |     return train_df
 81 | 
 82 | 
 83 | def get_trend_and_exo_for_fit(train_df, exo_col_name, trend_col_name,
 84 |                               val_size_perc):
 85 |     """Create subsets for Trend Fit"""
 86 |     # Create X set (Exo Regressor)
 87 |     X = train_df.dropna()[exo_col_name].values
 88 |     train_size = int(len(X) * (1 - val_size_perc))
 89 |     X_train = X[:train_size].reshape(-1, 1)
 90 |     # Create Y set (Trend to fit)
 91 |     Y_train = train_df.dropna()[trend_col_name].values[:train_size].reshape(
 92 |         -1, 1)
 93 |     return X_train, Y_train
 94 | 
 95 | 
 96 | def fill_seasonality(train_df,
 97 |                      seas_period_days,
 98 |                      seasonality_col_name='Seasonality'):
 99 |     """Fill empty seasonality dates"""
100 |     delta = datetime.timedelta(days=-seas_period_days)
101 |     for i in train_df[train_df[seasonality_col_name].isnull() == True].index:
102 |         print(i, i + delta)
103 |         train_df.loc[i][seasonality_col_name] = train_df.loc[
104 |             i + delta][seasonality_col_name]
105 |     return train_df
106 | 
107 | 
108 | ## Exports
109 | 
110 | 
111 | def prepare_export_df(train_df, output_col_names, y_pred_col_name):
112 |     """Reformat results for Export to DM"""
113 |     print(output_col_names)
114 |     cols_to_load = list(output_col_names)
115 |     cols_to_load.remove('index')
116 |     print(cols_to_load)
117 |     export_df = pd.DataFrame(train_df[cols_to_load])
118 |     export_df.reset_index(inplace=True)
119 |     export_df.rename(columns=output_col_names, inplace=True)
120 |     return export_df
121 | 
122 | 
123 | def constitute_export_df(all_subset_exports, subset_col_name):
124 |     """Create export-version Df from the export-version of subsets"""
125 |     export_df = pd.DataFrame()
126 |     for key in all_subset_exports:
127 |         subset_df = all_subset_exports[key]
128 |         subset_df[subset_col_name] = key
129 |         export_df = pd.concat([export_df, subset_df], axis=0)
130 |     return export_df


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/02a_Extraction_Mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extraction Mover"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**This tutorial shows how to copy a simple extraction from one team/ data job to another one, independent of the cluster.**\n",
 15 |     "\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "**To do so we first need to connect to the source data model.**"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import pycelonis\n",
 32 |     "from pycelonis import get_celonis\n",
 33 |     "\n",
 34 |     "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
 35 |     "# All IDs required can be found within the URLs when displaying the related objects in the EMS.\n",
 36 |     "source_data_pool = c_source.pools.find('Name or ID of the source data pool.')\n",
 37 |     "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")\n",
 38 |     "source_extraction = source_data_job.extractions.ids['ID of the source extraction task.']"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Print source variables\n",
 48 |     "print(source_data_pool)\n",
 49 |     "print(source_data_job)\n",
 50 |     "print(source_extraction)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "**In the next step we connect to the target data pool.**"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
 67 |     "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
 68 |     "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Print target variables\n",
 78 |     "print(target_data_pool)\n",
 79 |     "print(target_data_job)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "**Create target extraction.**"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "# Print source extraction name and type\n",
 96 |     "print(source_extraction.name)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "target_extraction = target_data_job.create_extraction(source_extraction.name)\n",
106 |     "print(target_extraction.name)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "global_vars = {}\n",
123 |     "for source_var in source_data_pool.variables:                                                          # loop through global parameters of source data pool\n",
124 |     "    global_vars.update({source_var.id: None})                                                          # save ID of source global parameter\n",
125 |     "    for target_var in target_data_pool.variables:                                                      # loop through the global parameters of target data pool\n",
126 |     "        if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper():           # if the placeholder of a source global parameter and a target global parameter match\n",
127 |     "            global_vars.update({source_var.id: target_var.id})                                         # match the saved ID of source global parameter wih the target global parameter ID\n",
128 |     "print(\"Global parameter configurations saved.\")"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "**This section serves to create the extraction parameters.**"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "for source_ex_para in source_extraction.variables:                                                                              # loop through the source extraction parameters\n",
145 |     "    if source_ex_para['defaultSettings'] is not None:                                                                           # create the connection for the default value to the target global parameters\n",
146 |     "        if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n",
147 |     "            target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId'])              \n",
148 |     "            if target_id is None:\n",
149 |     "                para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
150 |     "                print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
151 |     "                target_id = para.id\n",
152 |     "                global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n",
153 |     "            source_ex_para['defaultSettings']['poolVariableId'] = target_id\n",
154 |     "    if source_ex_para['settings'] is not None:                                                                                   # create the connection for the value to the target global parameters\n",
155 |     "        if source_ex_para['settings']['poolVariableId'] is not None:\n",
156 |     "            target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n",
157 |     "            if target_id is None:\n",
158 |     "                para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId']))        # create the target global parameter if it does not exist yet\n",
159 |     "                print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
160 |     "                target_id = para.id\n",
161 |     "                global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n",
162 |     "            source_ex_para['settings']['poolVariableId'] = target_id\n",
163 |     "         \n",
164 |     "    target_extraction.create_extraction_parameter(source_ex_para)                                                                # create the target transformation parameter\n",
165 |     "    print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "**Add tables in target extraction and overwrite parameter connections for time filters.**"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "for table in source_extraction.tables.data:\n",
182 |     "    \n",
183 |     "    if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None:\n",
184 |     "        #None or table.data['changeDateOffsetParameter'] is not None:\n",
185 |     "        \n",
186 |     "        temp = table.data.copy()\n",
187 |     "        \n",
188 |     "        for para in source_extraction.variables:\n",
189 |     "            if para['id'] == table.data['creationDateParameterStart']:\n",
190 |     "                placeholder_start = para['placeholder']\n",
191 |     "#            if para['id'] == table.data['creationDateParameterEnd']:\n",
192 |     "#                placeholder_end = para['placeholder']\n",
193 |     "#            if para['id'] == table.data['changeDateOffsetParameter']:\n",
194 |     "#                placeholder_change = para['placeholder']\n",
195 |     "                \n",
196 |     "        for para in target_extraction.variables:\n",
197 |     "            if para['placeholder'] == placeholder_start:\n",
198 |     "                temp['creationDateParameterStart'] = para['id']\n",
199 |     " #           if para['placeholder'] == placeholder_end:\n",
200 |     " #               temp['creationDateParameterEnd'] = para['id']\n",
201 |     " #           if para['placeholder'] == placeholder_change:\n",
202 |     " #               temp['changeDateOffsetParameter'] = para['id']\n",
203 |     "        \n",
204 |     "        table=temp          \n",
205 |     "              \n",
206 |     "    try:\n",
207 |     "        target_extraction.add_table(table=table)\n",
208 |     "    except:\n",
209 |     "        print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n",
210 |     "\n",
211 |     "print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\")"
212 |    ]
213 |   }
214 |  ],
215 |  "metadata": {
216 |   "jupytext": {
217 |    "formats": "ipynb,py:percent"
218 |   },
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.8.8"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 4
239 | }
240 | 


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/02b_Transformation_Mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Transformation (DataJob) Mover"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**This tutorial shows how to copy transformations from one team/ data pool to another one, independent of the cluster.**\n",
 15 |     "\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "**To do so we first need to connect to the source data job.**"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "from pycelonis import get_celonis\n",
 32 |     "\n",
 33 |     "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
 34 |     "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n",
 35 |     "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "**In the next step we connect to the target data job.**"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
 52 |     "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
 53 |     "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "**In this step we save all source global parameter IDs in a dictionary and overwrite them with the target global parameter ID if the parameter exists already in the target data pool.**"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "global_vars = {}\n",
 70 |     "for source_var in source_data_pool.variables:                                                          # loop through global parameters of source data pool\n",
 71 |     "    global_vars.update({source_var.id: None})                                                          # save ID of source global parameter\n",
 72 |     "    for target_var in target_data_pool.variables:                                                      # loop through the global parameters of target data pool\n",
 73 |     "        if source_var.data['placeholder'].upper() == target_var.data['placeholder'].upper():           # if the placeholder of a source global parameter and a target global parameter match\n",
 74 |     "            global_vars.update({source_var.id: target_var.id})                                         # match the saved ID of source global parameter wih the target global parameter ID\n",
 75 |     "print(\"Global parameter configurations saved.\")"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "**This section serves to create the target transformation, the related transformation parameters and to copy over the template settings.**"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "for source_transformation in source_data_job.transformations.data:   # loop through source transformations\n",
 92 |     "    \n",
 93 |     "    if source_transformation.statement is None:                      # if the source transformation is empty, it will not be created\n",
 94 |     "        continue\n",
 95 |     "    \n",
 96 |     "    # copy transformation from source to target data job:\n",
 97 |     "    target_transformation = target_data_job.create_transformation(name=source_transformation.name, description=source_transformation.data['description'], statement=source_transformation.statement)\n",
 98 |     "    print(\"Transformation: '\" + target_transformation.name + \"' created.\")\n",
 99 |     "    \n",
100 |     "    for source_local_var in source_transformation.variables:                                                                              # loop through the source transformation parameters\n",
101 |     "        if source_local_var['defaultSettings'] is not None:                                                                               # create the connection for the default value to the target global parameters\n",
102 |     "            if source_local_var['defaultSettings']['poolVariableId'] is not None:                                                                             \n",
103 |     "                target_id = global_vars.get(source_local_var['defaultSettings']['poolVariableId'])\n",
104 |     "                if target_id is None:\n",
105 |     "                    para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['defaultSettings']['poolVariableId']))  # create the target global parameter if it does not exist yet\n",
106 |     "                    print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
107 |     "                    target_id = para.id\n",
108 |     "                    global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n",
109 |     "                source_local_var['defaultSettings']['poolVariableId'] = target_id\n",
110 |     "        if source_local_var['settings'] is not None:                                                                                      # create the connection for the value to the target global parameters \n",
111 |     "            if source_local_var['settings']['poolVariableId'] is not None:\n",
112 |     "                target_id = global_vars.get(source_local_var['settings']['poolVariableId'])\n",
113 |     "                if target_id is None:\n",
114 |     "                    para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_local_var['settings']['poolVariableId']))         # create the target global parameter if it does not exist yet\n",
115 |     "                    print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
116 |     "                    target_id = para.id\n",
117 |     "                    global_vars[source_local_var['defaultSettings']['poolVariableId']] = target_id\n",
118 |     "                source_local_var['settings']['poolVariableId'] = target_id\n",
119 |     "\n",
120 |     "        target_transformation.create_transformation_parameter(source_local_var)                                                            # create the target transformation parameter\n",
121 |     "        print(\"Parameter '\" + source_local_var['name'] + \"' created.\")\n",
122 |     "        \n",
123 |     "    if(source_transformation.data['template']):                                                                                            # copy template settings to target transformation\n",
124 |     "        target_transformation.to_template(source_transformation.data['protectionStatus'])\n",
125 |     "\n",
126 |     "print(\"Congrats you copied the data job \"+ source_data_job.name + \"!\")"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "jupytext": {
132 |    "formats": "ipynb,py:percent"
133 |   },
134 |   "kernelspec": {
135 |    "display_name": "Python 3",
136 |    "language": "python",
137 |    "name": "python3"
138 |   },
139 |   "language_info": {
140 |    "codemirror_mode": {
141 |     "name": "ipython",
142 |     "version": 3
143 |    },
144 |    "file_extension": ".py",
145 |    "mimetype": "text/x-python",
146 |    "name": "python",
147 |    "nbconvert_exporter": "python",
148 |    "pygments_lexer": "ipython3",
149 |    "version": "3.8.8"
150 |   }
151 |  },
152 |  "nbformat": 4,
153 |  "nbformat_minor": 4
154 | }
155 | 


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/03_Data_Model_Mover.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Model Mover"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**This tutorial shows how to copy a data model from one team/ data pool to another one, independent of the cluster.**\n",
 15 |     "\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "**To do so we first need to connect to the source data model.**"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 32,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "2020-02-13 15:59:50 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "from pycelonis import get_celonis\n",
 40 |     "\n",
 41 |     "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
 42 |     "source_data_model = c_source.datamodels.find(\"ID of the source data model. It can be copied from the URL.\") "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "**In the next step we connect to the target data pool.**"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 39,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stdout",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "2020-02-13 16:05:17 - pycelonis: Login successful! Hello s.matthaei@celonis.com\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
 67 |     "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
 68 |     "data_source_name = \"Name of the data connection in the target data pool the target data model should refer to. Indicate an empty string to point to the global scope.\""
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "**Create the data model and add the tables.**"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 40,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "if source_data_model.name in target_data_pool.datamodels.names:\n",
 85 |     "    print('A data model with the same name does already exist in the target data pool. Please rename one.')\n",
 86 |     "else: \n",
 87 |     "    target_data_model = target_data_pool.create_datamodel(source_data_model.name)                         # create target data model\n",
 88 |     "\n",
 89 |     "    connection = target_data_pool.data_connections.names[data_source_name] if data_source_name else None  # choose the connection for the data model \n",
 90 |     "\n",
 91 |     "    target_data_model.add_tables_from_pool(source_data_model.tables, connection)                          # add the tables from the connection to the data model"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "**Copy Activity & Case table settings. If you copy to a data pool that does not contain the case and activity table in the specified connection, this step cannot be performed. Just continue with the next one.**"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 41,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "[<DatamodelProcessConfiguration, id 6ac4ba6a-1996-4088-8211-7970aa108669, name Unnamed object with ID: 6ac4ba6a-1996-4088-8211-7970aa108669>,]"
110 |       ]
111 |      },
112 |      "execution_count": 41,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "target_data_model.create_process_configuration(\n",
119 |     "            activity_table=source_data_model.process_configurations[0].activity_table.data[\"name\"] if source_data_model.process_configurations[0].activity_table else None,\n",
120 |     "            case_table=source_data_model.process_configurations[0].case_table.data[\"name\"] if source_data_model.process_configurations[0].case_table else None,\n",
121 |     "            case_column=source_data_model.process_configurations[0].case_column if source_data_model.process_configurations[0].activity_table else None,\n",
122 |     "            activity_column=source_data_model.process_configurations[0].activity_column if source_data_model.process_configurations[0].activity_table else None,\n",
123 |     "            timestamp_column=source_data_model.process_configurations[0].timestamp_column if source_data_model.process_configurations[0].activity_table else None,\n",
124 |     "            sorting_column=source_data_model.process_configurations[0].sorting_column if source_data_model.process_configurations[0].activity_table else None)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "**Add forgein key relationships.**"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 42,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "Relationships copied.\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "for fk in source_data_model.foreign_keys:\n",
149 |     "    target_data_model.create_foreign_key(fk[\"source_table\"], fk[\"target_table\"], fk[\"columns\"], from_scratch=True)\n",
150 |     "print('Relationships copied.')"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "**Add table aliases.**"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 38,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "name": "stderr",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "WARNING:pycelonis:More things might have changed than requested\n",
170 |       "WARNING:pycelonis:More things might have changed than requested\n",
171 |       "WARNING:pycelonis:More things might have changed than requested\n",
172 |       "WARNING:pycelonis:More things might have changed than requested\n"
173 |      ]
174 |     },
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Please ignore the warnings. Congrats you copied the data model 'SAP ECC - Accounts Payable Data Model'!\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "for t in source_data_model.tables:\n",
185 |     "    if t.alias == t.name and isinstance(source_data_model, pycelonis.objects_ibc.Datamodel):\n",
186 |     "        target_t = target_data_model.tables.find(t.data[\"name\"])\n",
187 |     "        try:\n",
188 |     "            target_t.alias = t.name\n",
189 |     "        except ValueError:\n",
190 |     "            pass\n",
191 |     "    if t.alias != t.name and isinstance(source_data_model, pycelonis.objects_cpm4.Datamodel):\n",
192 |     "        target_t = target_data_model.tables.find(t.data[\"name\"])\n",
193 |     "        try:\n",
194 |     "            target_t.alias = t.alias\n",
195 |     "        except ValueError:\n",
196 |     "            pass\n",
197 |     "print(\"Please ignore the warnings. Congrats you copied the data model '\"+ target_data_model.name + \"'!\")"
198 |    ]
199 |   }
200 |  ],
201 |  "metadata": {
202 |   "jupytext": {
203 |    "formats": "ipynb,py:percent"
204 |   },
205 |   "kernelspec": {
206 |    "display_name": "Python 3",
207 |    "language": "python",
208 |    "name": "python3"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.8.8"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 4
225 | }
226 | 


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/05_Transformation_Download_to_MLW.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Script Downloader"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**This tutorial shows how to download transformations of one data job.**\n",
 15 |     "\n",
 16 |     "**The installation of ftfy is a requirement for this script and needs to be installed once per workbench.**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 24,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Looking in indexes: https://pypi.celonis.cloud, https://pypi.org/simple\n",
 29 |       "Requirement already satisfied: ftfy in /home/jovyan/.local/lib/python3.7/site-packages (5.6)\n",
 30 |       "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.7/site-packages (from ftfy) (0.1.8)\n",
 31 |       "Note: you may need to restart the kernel to use updated packages.\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "pip install ftfy"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "**In the first step we need to connect to the data job.**"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 23,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from pycelonis import get_celonis\n",
 53 |     "import os\n",
 54 |     "from ftfy import fix_text #requires pip install\n",
 55 |     "\n",
 56 |     "c = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
 57 |     "data_pool = c.pools.find(\"Name or ID of the source data pool.\")\n",
 58 |     "data_job = data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "**In the next step we create the SQL for the respective data job.**"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 4,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "sql = ''\n",
 75 |     "for transformation in data_job.transformations.data:\n",
 76 |     "    if transformation.statement is None:\n",
 77 |     "        continue\n",
 78 |     "    sql = sql + '--###' + transformation.name + '###--' + '\\n\\n' + transformation.statement.strip() + '\\n\\n'"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "**In this step the SQL statement is saved as file.**"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 14,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "filepath = os.path.join('', data_job.name + '.sql')\n",
 95 |     "with open(filepath, 'w') as f:\n",
 96 |     "    f.write(fix_text(sql))"
 97 |    ]
 98 |   }
 99 |  ],
100 |  "metadata": {
101 |   "jupytext": {
102 |    "formats": "ipynb,py:percent"
103 |   },
104 |   "kernelspec": {
105 |    "display_name": "Python 3",
106 |    "language": "python",
107 |    "name": "python3"
108 |   },
109 |   "language_info": {
110 |    "codemirror_mode": {
111 |     "name": "ipython",
112 |     "version": 3
113 |    },
114 |    "file_extension": ".py",
115 |    "mimetype": "text/x-python",
116 |    "name": "python",
117 |    "nbconvert_exporter": "python",
118 |    "pygments_lexer": "ipython3",
119 |    "version": "3.7.6"
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 4
124 | }
125 | 


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/11_Extraction_Unifier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extraction Unifier"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "raw",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Created by: e.vogt@celonis.com\n",
 15 |     "Uploaded on: 08.07.2020"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "**This tutorial shows how to copy multiple separate extraction jobs into one extraction.**"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "**To do so we first need to connect to the source data model.**"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 1,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "2020-04-02 08:25:56 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n"
 42 |      ]
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "from pycelonis import get_celonis\n",
 47 |     "\n",
 48 |     "c_source = get_celonis(\"URL to the team from which you want to copy.\", \"Specify a valid API key for your source cloud team.\")\n",
 49 |     "source_data_pool = c_source.pools.find(\"Name or ID of the source data pool.\")\n",
 50 |     "source_data_job = source_data_pool.data_jobs.find(\"Name or ID of the source data job.\")"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "for source_extraction in source_data_job.extractions.data: \n",
 60 |     "    print(source_extraction.name)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "**In the next step we connect to the target data pool.**"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 4,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "2020-04-02 08:29:22 - pycelonis: Login successful! Hello Application Key, this key currently has access to 0 analyses.\n"
 80 |      ]
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "c_target = get_celonis(\"URL to cloud team where you want to copy to.\", \"Specify a valid API key for your target cloud team.\")\n",
 85 |     "target_data_pool = c_target.pools.find(\"Name or ID of the target data pool.\")\n",
 86 |     "target_data_job = target_data_pool.data_jobs.find(\"Name or ID of the target data job.\")\n",
 87 |     "target_extraction = target_data_job.extractions.ids[\"Name or ID of the target data extraction.\"]"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "**Create target extraction.**"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 6,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "Congrats you copied the extraction test_unified!\n",
107 |       "Congrats you copied the extraction test_unified!\n",
108 |       "Congrats you copied the extraction test_unified!\n",
109 |       "Congrats you copied the extraction test_unified!\n",
110 |       "Parameter 'Maximal Activity End Date' created.\n",
111 |       "Parameter 'Maximal Activity Start Date' created.\n",
112 |       "Congrats you copied the extraction test_unified!\n",
113 |       "Congrats you copied the extraction test_unified!\n",
114 |       "Congrats you copied the extraction test_unified!\n",
115 |       "Congrats you copied the extraction test_unified!\n",
116 |       "Congrats you copied the extraction test_unified!\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "for source_extraction in source_data_job.extractions.data:   # loop through source extractions\n",
122 |     "\n",
123 |     "    for source_ex_para in source_extraction.variables:                                                                              # loop through the source extraction parameters\n",
124 |     "        if source_ex_para['defaultSettings'] is not None:                                                                           # create the connection for the default value to the target global parameters\n",
125 |     "            if source_ex_para['defaultSettings']['poolVariableId'] is not None:\n",
126 |     "                target_id = global_vars.get(source_ex_para['defaultSettings']['poolVariableId'])              \n",
127 |     "                if target_id is None:\n",
128 |     "                    para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['defaultSettings']['poolVariableId'])) # create the target global parameter if it does not exist yet\n",
129 |     "                    print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
130 |     "                    target_id = para.id\n",
131 |     "                    global_vars[source_ex_para['defaultSettings']['poolVariableId']] = target_id\n",
132 |     "                source_ex_para['defaultSettings']['poolVariableId'] = target_id\n",
133 |     "        if source_ex_para['settings'] is not None:                                                                                   # create the connection for the value to the target global parameters\n",
134 |     "            if source_ex_para['settings']['poolVariableId'] is not None:\n",
135 |     "                target_id = global_vars.get(source_ex_para['settings']['poolVariableId'])\n",
136 |     "                if target_id is None:\n",
137 |     "                    para = target_data_pool.create_pool_parameter(source_data_pool.variables.find(source_ex_para['settings']['poolVariableId']))        # create the target global parameter if it does not exist yet\n",
138 |     "                    print(\"Pool Parameter '\" + para.name + \"' created.\")\n",
139 |     "                    target_id = para.id\n",
140 |     "                    global_vars[source_ex_para['settings']['poolVariableId']] = target_id\n",
141 |     "                source_ex_para['settings']['poolVariableId'] = target_id\n",
142 |     "\n",
143 |     "        target_extraction.create_extraction_parameter(source_ex_para)                                                                # create the target transformation parameter\n",
144 |     "        print(\"Parameter '\" + source_ex_para['name'] + \"' created.\")\n",
145 |     "    \n",
146 |     "    for table in source_extraction.tables.data:\n",
147 |     "\n",
148 |     "        if table.data['creationDateParameterStart'] is not None or table.data['creationDateParameterEnd'] is not None or table.data['changeDateOffsetParameter'] is not None:\n",
149 |     "\n",
150 |     "            temp = table.data.copy()\n",
151 |     "\n",
152 |     "            for para in source_extraction.variables:\n",
153 |     "                if para['id'] == table.data['creationDateParameterStart']:\n",
154 |     "                    placeholder_start = para['placeholder']\n",
155 |     "                if para['id'] == table.data['creationDateParameterEnd']:\n",
156 |     "                    placeholder_end = para['placeholder']\n",
157 |     "                if para['id'] == table.data['changeDateOffsetParameter']:\n",
158 |     "                    placeholder_change = para['placeholder']\n",
159 |     "\n",
160 |     "            for para in target_extraction.variables:\n",
161 |     "                if para['placeholder'] == placeholder_start:\n",
162 |     "                    temp['creationDateParameterStart'] = para['id']\n",
163 |     "                if para['placeholder'] == placeholder_end:\n",
164 |     "                    temp['creationDateParameterEnd'] = para['id']\n",
165 |     "                if para['placeholder'] == placeholder_change:\n",
166 |     "                    temp['changeDateOffsetParameter'] = para['id']\n",
167 |     "\n",
168 |     "            table=temp          \n",
169 |     "\n",
170 |     "        try:\n",
171 |     "            target_extraction.add_table(table=table)\n",
172 |     "        except:\n",
173 |     "            print(\"Filters for table: '\" + table.name + \"' are neglected as the table is not found in the indicated source connection.\")\n",
174 |     "\n",
175 |     "    print(\"Congrats you copied the extraction \"+ target_extraction.name + \"!\")   "
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": []
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.7.6"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 4
207 | }
208 | 


--------------------------------------------------------------------------------
/pycelonis1/03_Connectivity/18_EMS_Data_Consumption_Report.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "4090f679",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Pull and analyze the APC consumption report\n",
  9 |     "\n",
 10 |     "##### Recommendation\n",
 11 |     "Use the recent (Oct 2021) EMS feature 'Pipeline Monitoring' to easily and flexibly analyze your APC and even your Job executions within Analyses. Documentation is here: https://help.celonis.cloud/help/display/CIBC/Custom+Data+Pipeline+Monitoring\n",
 12 |     "##### Purpose of this script\n",
 13 |     "Allows to analyze the full APC consumption report (https://TEAM.CLUSTER.celonis.cloud/integration/ui/data-consumption) within the MLW or other environments, by pulling it in python. This can serve as a complement to the EMS features as this allows export and APC aggregation by Data Pool.\n",
 14 |     "\n",
 15 |     "#### Inputs\n",
 16 |     "None if run from the MLWB.\n",
 17 |     "Token if run out of the MLW.\n",
 18 |     "\n",
 19 |     "#### Outputs\n",
 20 |     "Consumption report with used GB per table and data pool:\n",
 21 |     "* as pandas DataFrame\n",
 22 |     "* as CSV file\n",
 23 |     "\n",
 24 |     "#### Steps\n",
 25 |     "1. Import and connect\n",
 26 |     "2. Fetch data\n",
 27 |     "3. Process date and data volume\n",
 28 |     "4. Analyze (group by Data Pool)\n",
 29 |     "5. Export as CSV"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "id": "aa8a37bb",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Import and connect"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "id": "5ed198a9",
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import pandas as pd\n",
 48 |     "from pycelonis import get_celonis\n",
 49 |     "from datetime import datetime as dt\n",
 50 |     "import time"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "id": "b1c1089b",
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "c = get_celonis()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "id": "2e46ad2e",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Fetch data"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "id": "e5a274cc",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def get_consumption_df(c):\n",
 79 |     "    page = 0\n",
 80 |     "    df=pd.DataFrame()\n",
 81 |     "    \n",
 82 |     "    # Iterate over pages of data consumption\n",
 83 |     "    while True: # while true + if -> break \n",
 84 |     "        url = f\"{c.url}/integration//api/pools/data-consumption/?limit=5000&page={page}&sort=consumptionInBytesZA\"\n",
 85 |     "        consumption_table = c.api_request(url, message = 'None', method = 'GET', get_json = True)\n",
 86 |     "        t_list = consumption_table[\"extendedTableConsumptionTransports\"]\n",
 87 |     "        if len(t_list) == 0:\n",
 88 |     "            # Reached last page: no more data\n",
 89 |     "            break\n",
 90 |     "        df = pd.concat([df,pd.DataFrame(t_list)])\n",
 91 |     "        page += 1\n",
 92 |     "        # Limit api request rate\n",
 93 |     "        time.sleep(1)\n",
 94 |     "    return df\n",
 95 |     "\n",
 96 |     "df_consumption_ = get_consumption_df(c)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "id": "f35588f9",
102 |    "metadata": {},
103 |    "source": [
104 |     "## Transform"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "id": "97d49319",
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "df_consumption = df_consumption_.copy()\n",
115 |     "df_consumption[\"rawDataSizeGB\"] = df_consumption[\"rawDataSize\"] / (1024**3)\n",
116 |     "df_consumption[\"lastUpdateDt\"] = pd.to_datetime(df_consumption[\"lastUpdate\"], unit='ms')\n",
117 |     "df_consumption = df_consumption[[\"dataPoolId\", \"dataPoolName\", \"tableName\", \"rawDataSizeGB\", \"lastUpdateDt\"]]\n",
118 |     "df_consumption.head()    "
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "id": "de1b6e00",
124 |    "metadata": {},
125 |    "source": [
126 |     "## Analyse"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "id": "369e9c6d",
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "df_consumption_summary = df_consumption.groupby([\"dataPoolId\", \"dataPoolName\"]).agg({\"rawDataSizeGB\":sum, \"lastUpdateDt\":min}).reset_index()\n",
137 |     "df_consumption_summary = df_consumption_summary.sort_values(\"rawDataSizeGB\", ascending=False)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "id": "a108f5bb",
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "df_consumption_summary[\"rawDataSizeGB\"].sum()"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "id": "2ea4a47c",
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "df_consumption_summary"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "a9702873",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# Details per table\n",
168 |     "df_consumption.sort_values(\"rawDataSizeGB\", ascending=False)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "id": "bd112b97",
174 |    "metadata": {},
175 |    "source": [
176 |     "## Export"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "id": "29d728b3",
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "def to_csv(df, name):\n",
187 |     "    df.to_csv(f\"{name}_{dt.now().strftime('%Y-%m-%d_%Hh%M')}.csv\", sep=';', decimal=',')"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "ad9f9cd7",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "to_csv(df_consumption_summary, \"consumption_summary\")\n",
198 |     "to_csv(df_consumption, \"consumption_details\")"
199 |    ]
200 |   }
201 |  ],
202 |  "metadata": {
203 |   "kernelspec": {
204 |    "display_name": "Python 3",
205 |    "language": "python",
206 |    "name": "python3"
207 |   },
208 |   "language_info": {
209 |    "codemirror_mode": {
210 |     "name": "ipython",
211 |     "version": 3
212 |    },
213 |    "file_extension": ".py",
214 |    "mimetype": "text/x-python",
215 |    "name": "python",
216 |    "nbconvert_exporter": "python",
217 |    "pygments_lexer": "ipython3",
218 |    "version": "3.8.8"
219 |   }
220 |  },
221 |  "nbformat": 4,
222 |  "nbformat_minor": 5
223 | }
224 | 


--------------------------------------------------------------------------------
/pycelonis1/04_Data_Formatting/00_Combine_csv_files.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import pandas as pd\n",
10 |     "import glob\n",
11 |     "\n",
12 |     "path = r'D:\\Customer Data\\Testfile_join' # use your path\n",
13 |     "all_files = glob.glob(path + \"/*.csv\")\n",
14 |     "\n",
15 |     "li = []\n",
16 |     "\n",
17 |     "for filename in all_files:\n",
18 |     "    df = pd.read_csv(filename, index_col=None, header=0)\n",
19 |     "    li.append(df)\n",
20 |     "\n",
21 |     "frame = pd.concat(li, axis=0, ignore_index=True)\n",
22 |     "\n",
23 |     "df.to_csv('D:\\Customer Data\\Testfile_join\\out.csv')"
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Python 3",
30 |    "language": "python",
31 |    "name": "python3"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.7.6"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 4
48 | }
49 | 


--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/03_Datadog_log_data_extraction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# How to Extract Datadog Log Data Into the IBC"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "raw",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Documentation: https://confluence.celonis.com/pages/viewpage.action?pageId=105841328\n",
 15 |     "Placeholder for use case-specific info is XXXXX"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Set up Required Packages and Settings"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import pandas as pd\n",
 32 |     "from pandas.io.json import json_normalize\n",
 33 |     "from pycelonis import get_celonis\n",
 34 |     "import requests\n",
 35 |     "from time import sleep\n",
 36 |     "\n",
 37 |     "pd.set_option('max_colwidth', 300)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Define API Requests Parameters"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "baseUrlLogs = 'https://api.datadoghq.com/api/v1/logs-queries/list'\n",
 54 |     "\n",
 55 |     "headers = {\n",
 56 |     "  'content-type': 'application/json',\n",
 57 |     "  'DD-API-KEY': 'XXXXX',\n",
 58 |     "  'DD-APPLICATION-KEY': 'XXXXX'\n",
 59 |     "}\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "startDate = \"XXXXX\"  #ISO-8601 string, unix timestamp or relative time (such as \"now-1h\" or \"now-1d\")\n",
 63 |     "endDate = \"XXXXX\"  #ISO-8601 string, unix timestamp or relative time (such as \"now\")\n",
 64 |     "\n",
 65 |     "  \n",
 66 |     "bodyLogXXXXX = {\n",
 67 |     "  \"query\": \"XXXXX\",  #Datadog log explorer query, e.g.:\"@errorType:(INTERNAL OR EXTERNAL)\"\n",
 68 |     "  \"sort\": \"asc\",\n",
 69 |     "  \"time\": {\n",
 70 |     "          \"from\": startDate,\n",
 71 |     "          \"to\": endDate\n",
 72 |     "  },\n",
 73 |     "  \"limit\": 1,\n",
 74 |     "}"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Define Required Helper Functions"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "#### Get newest log ID"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "def get_newestLogId_logXXXXX():\n",
 98 |     "    print('Getting newest log ID')\n",
 99 |     "    newestLogId = \"\"\n",
100 |     "    response = requests.post(baseUrlLogs, headers=headers, json=bodyLogXXXXX)\n",
101 |     "    newestLogId = json_normalize(response.json()['logs'])['id'] \n",
102 |     "    newestLogId = newestLogId.to_string(index=False)[1:]\n",
103 |     "    print('Newest log ID retrieved')\n",
104 |     "    return newestLogId"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "## Fetch Data"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "#### Get most current log ID as starting point for API request"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "nextLogId_logXXXXX = get_newestLogId_logXXXXX()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "#### Get log data and convert it to dataframe"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "print('Getting log XXXXX')\n",
144 |     "logXXXXX = pd.DataFrame()\n",
145 |     "while nextLogId_logXXXXX != None:\n",
146 |     "    response = requests.post(baseUrlLogs, headers=headers, json={\n",
147 |     "                                                            \"limit\": 1000,\n",
148 |     "                                                            \"query\": \"XXXXX\",\n",
149 |     "                                                            \"startAt\": nextLogId_logXXXXX,\n",
150 |     "                                                            \"sort\": \"asc\",\n",
151 |     "                                                            \"time\": {\n",
152 |     "                                                                    \"from\": startDate,\n",
153 |     "                                                                    \"to\": endDate\n",
154 |     "                                                                    },\n",
155 |     "                                                            })                \n",
156 |     "    data = json_normalize(response.json()['logs'])[['XXXXX'\n",
157 |     "                                                    , 'XXXXX'\n",
158 |     "                                                    , ...\n",
159 |     "                                                    ]]\n",
160 |     "    \n",
161 |     "    #additional functionalities to isolate information from message string, set data types, etc. as required\n",
162 |     "    \n",
163 |     "    nextLogId_logXXXXX = response.json()['nextLogId']\n",
164 |     "    logXXXXX = logXXXXX.append(data, ignore_index=True, sort=False)\n",
165 |     "    sleep(0.1)\n",
166 |     "print('Log XXXXX retrieved')"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "## Push Data to the IBC"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "#### Connect to IBC team and identify data pool (here: manually)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "login = {\n",
190 |     "    'celonis_url': 'XXXXX',\n",
191 |     "    'api_token': 'XXXXX',\n",
192 |     "}\n",
193 |     "celonis_manual = get_celonis(**login)\n",
194 |     "\n",
195 |     "data_pool = celonis_manual.pools.find('XXXXX')\n",
196 |     "data_pool"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "#### Push dataframes into IBC team/data pool"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "print('Starting to push data to IBC')\n",
213 |     "data_pool.push_table(logXXXXX, 'DD_LOG_XXXXX', if_exists = 'upsert', primary_keys = ['id'])\n",
214 |     "print('Data push successful')"
215 |    ]
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.7.6"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 4
239 | }
240 | 


--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/11_API template.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "77c1a358-1d72-43d6-91f4-5c1bed16e682",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Data Imports via API\n",
  9 |     "#### Historical and forecast weather data used as an example"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "c467d46e-4b6d-482d-a995-557bfec50900",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Step 1: Import Required Libraries"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "id": "33648981-2703-4262-b8fd-caf1c9d80048",
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#Libraries specifically required for API imports\n",
 28 |     "import yaml\n",
 29 |     "from yaml import load, dump\n",
 30 |     "\n",
 31 |     "import requests\n",
 32 |     "from pandas.io.json import json_normalize\n",
 33 |     "\n",
 34 |     "#Other libraries that are always good to have\n",
 35 |     "import pandas as pd\n",
 36 |     "import numpy as np\n",
 37 |     "from datetime import date, timedelta, time, datetime\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "\n",
 40 |     "\n",
 41 |     "#Note: if it is your first time importing a library, run a PIP install like so. Ensure you keep the exclamation point\n",
 42 |     "\n",
 43 |     "# ! pip install library_name\n",
 44 |     "\n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "155c6433-adda-4ddb-8afb-15d5ebdf5ffe",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Optional - Step 2: connect to Celonis\n",
 53 |     "#### complete only if you need to integrate the API data with Celonis data / analyses"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "id": "a5ffde80-8843-4352-b5f4-d17de82fd106",
 60 |    "metadata": {
 61 |     "tags": []
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from pycelonis import get_celonis, pql\n",
 66 |     "\n",
 67 |     "celonis = get_celonis(\"team\", \n",
 68 |     "                      \"key\")\n",
 69 |     "\n",
 70 |     "#team is something like: https://berkshirehathawayenergy.us-2.celonis.cloud/\n",
 71 |     "#the key can be created by going to Edit Profile (under the circular button in top right) --> create API key"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "id": "b64b326c-7070-4532-b85b-11e434c82a10",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "## Optional - Step 3: load data from Celonis data model\n",
 80 |     "#### Complete only if you need to integrate API data with Celonis data/analyses. All code is sample code and should be adjusted to fit your data."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "id": "4cd5616b-f39c-49c0-a2ea-56ebc0b205df",
 87 |    "metadata": {
 88 |     "tags": []
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "\n",
 93 |     "#use the code below if you are querying directly from a data model\n",
 94 |     "    # dm_id = 'data_model_id'\n",
 95 |     "    # datamodel = celonis.datamodels.find(dm_id)\n",
 96 |     "\n",
 97 |     "#use the code below to query from studio\n",
 98 |     "    # package = celonis.packages.find('package_id')\n",
 99 |     "    # source_analysis = package.analyses.find('analysis_id')\n",
100 |     "\n",
101 |     "    \n",
102 |     "#use the code below to create your PQL query (examples of aggregate functions and filter statements are provided)\n",
103 |     "    # q1 = pql.PQL()\n",
104 |     "    # q1 += pql.PQLColumn(\"ROUND_DAY(table.field1)\", \"Date\")\n",
105 |     "    # q1 += pql.PQLColumn(\"SUM(table.field2)\", \"Actual\")\n",
106 |     "    # q1 += pql.PQLColumn(\"table.field3\", \"Region\")\n",
107 |     "    # q1 += pql.PQLFilter(\"table.field2 IS NOT NULL; \")\n",
108 |     "    # q1 += pql.PQLFilter(\"table.field1 > TO_DATE ( '2019-08-17 00:00:00' , FORMAT ( '%Y-%m-%d %H:%M:%S' )); \")\n",
109 |     "\n",
110 |     "#use the code below to create your dataframe\n",
111 |     "    # df1 = datamodel.get_data_frame(q1)\n",
112 |     "\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "id": "165afba5-2e3e-41d6-803d-6b46d597cb28",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Step 4: Import Data via API\n",
121 |     "#### To request your own API token for NOAA weather data, go here: https://www.ncdc.noaa.gov/cdo-web/token\n",
122 |     "#### To find the dataset and station ID you need, go here: https://www.ncdc.noaa.gov/cdo-web/webservices/v2"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "id": "3e8fbbf5-032c-400c-883e-2cbdfb4817be",
129 |    "metadata": {
130 |     "tags": []
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "# ADJUSTMENT REQUIRED - Define the amount of time that you will pull data for (in days)\n",
135 |     "timeframe = 30\n",
136 |     "\n",
137 |     "#usually the max date will be Today's date\n",
138 |     "max_date = datetime.today().strftime(\"%Y-%m-%d\")\n",
139 |     "\n",
140 |     "#the min date will be the Today's date offset by the timeframe indicated above\n",
141 |     "min_date = datetime.today() + timedelta(days=timeframe*(-1))\n",
142 |     "min_date = min_date.strftime(\"%Y-%m-%d\")\n",
143 |     "\n",
144 |     "# ADJUSTMENT REQUIRED - define the data set ID (refer to link above to find dataset IDs)\n",
145 |     "datasetid = 'GHCND'\n",
146 |     "\n",
147 |     "# ADJUSTMENT REQUIRED - define the station ID (refer to link above to find station IDs)\n",
148 |     "stationid = 'GHCND:USW00023169'\n",
149 |     "\n",
150 |     "# ADJUSTMENT REQUIRED - define the token\n",
151 |     "token = 'xyz'\n",
152 |     "\n",
153 |     "# run this code to obtain the request\n",
154 |     "url = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=' + datasetid +'&stationid=' + stationid + '&startdate=' + min_date + '&enddate=' + max_date + '&units=standard&limit=1000'\n",
155 |     "payload = {}\n",
156 |     "headers = {\n",
157 |     "    'token': token\n",
158 |     "}\n",
159 |     "\n",
160 |     "\n",
161 |     "#run the query to get raw data, put raw data into JSON format\n",
162 |     "data = requests.request(\"GET\", url, headers=headers, json = payload)\n",
163 |     "data = data.json()\n",
164 |     "\n",
165 |     "\n",
166 |     "data"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "id": "86df13a1-21a5-4b01-81ac-78aa0bd83403",
172 |    "metadata": {},
173 |    "source": [
174 |     "## Step 5: create a dataframe from your JSON data using json_normalize function\n",
175 |     "#### 'results' should be replaced with the JSON object you need"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 8,
181 |    "id": "f6ec8959-d4ba-4237-abf4-161e5c8466cd",
182 |    "metadata": {
183 |     "tags": []
184 |    },
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/html": [
189 |        "<div>\n",
190 |        "<style scoped>\n",
191 |        "    .dataframe tbody tr th:only-of-type {\n",
192 |        "        vertical-align: middle;\n",
193 |        "    }\n",
194 |        "\n",
195 |        "    .dataframe tbody tr th {\n",
196 |        "        vertical-align: top;\n",
197 |        "    }\n",
198 |        "\n",
199 |        "    .dataframe thead th {\n",
200 |        "        text-align: right;\n",
201 |        "    }\n",
202 |        "</style>\n",
203 |        "<table border=\"1\" class=\"dataframe\">\n",
204 |        "  <thead>\n",
205 |        "    <tr style=\"text-align: right;\">\n",
206 |        "      <th></th>\n",
207 |        "      <th>date</th>\n",
208 |        "      <th>datatype</th>\n",
209 |        "      <th>station</th>\n",
210 |        "      <th>attributes</th>\n",
211 |        "      <th>value</th>\n",
212 |        "    </tr>\n",
213 |        "  </thead>\n",
214 |        "  <tbody>\n",
215 |        "    <tr>\n",
216 |        "      <th>0</th>\n",
217 |        "      <td>2021-08-25T00:00:00</td>\n",
218 |        "      <td>AWND</td>\n",
219 |        "      <td>GHCND:USW00023169</td>\n",
220 |        "      <td>,,W,</td>\n",
221 |        "      <td>7.4</td>\n",
222 |        "    </tr>\n",
223 |        "    <tr>\n",
224 |        "      <th>1</th>\n",
225 |        "      <td>2021-08-25T00:00:00</td>\n",
226 |        "      <td>PRCP</td>\n",
227 |        "      <td>GHCND:USW00023169</td>\n",
228 |        "      <td>,,W,2400</td>\n",
229 |        "      <td>0.0</td>\n",
230 |        "    </tr>\n",
231 |        "    <tr>\n",
232 |        "      <th>2</th>\n",
233 |        "      <td>2021-08-25T00:00:00</td>\n",
234 |        "      <td>SNOW</td>\n",
235 |        "      <td>GHCND:USW00023169</td>\n",
236 |        "      <td>,,W,</td>\n",
237 |        "      <td>0.0</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>3</th>\n",
241 |        "      <td>2021-08-25T00:00:00</td>\n",
242 |        "      <td>SNWD</td>\n",
243 |        "      <td>GHCND:USW00023169</td>\n",
244 |        "      <td>,,W,</td>\n",
245 |        "      <td>0.0</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>4</th>\n",
249 |        "      <td>2021-08-25T00:00:00</td>\n",
250 |        "      <td>TAVG</td>\n",
251 |        "      <td>GHCND:USW00023169</td>\n",
252 |        "      <td>H,,S,</td>\n",
253 |        "      <td>92.0</td>\n",
254 |        "    </tr>\n",
255 |        "    <tr>\n",
256 |        "      <th>...</th>\n",
257 |        "      <td>...</td>\n",
258 |        "      <td>...</td>\n",
259 |        "      <td>...</td>\n",
260 |        "      <td>...</td>\n",
261 |        "      <td>...</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>279</th>\n",
265 |        "      <td>2021-09-20T00:00:00</td>\n",
266 |        "      <td>SNOW</td>\n",
267 |        "      <td>GHCND:USW00023169</td>\n",
268 |        "      <td>,,D,</td>\n",
269 |        "      <td>0.0</td>\n",
270 |        "    </tr>\n",
271 |        "    <tr>\n",
272 |        "      <th>280</th>\n",
273 |        "      <td>2021-09-20T00:00:00</td>\n",
274 |        "      <td>TAVG</td>\n",
275 |        "      <td>GHCND:USW00023169</td>\n",
276 |        "      <td>H,,S,</td>\n",
277 |        "      <td>81.0</td>\n",
278 |        "    </tr>\n",
279 |        "    <tr>\n",
280 |        "      <th>281</th>\n",
281 |        "      <td>2021-09-20T00:00:00</td>\n",
282 |        "      <td>TMAX</td>\n",
283 |        "      <td>GHCND:USW00023169</td>\n",
284 |        "      <td>,,D,2400</td>\n",
285 |        "      <td>88.0</td>\n",
286 |        "    </tr>\n",
287 |        "    <tr>\n",
288 |        "      <th>282</th>\n",
289 |        "      <td>2021-09-20T00:00:00</td>\n",
290 |        "      <td>TMIN</td>\n",
291 |        "      <td>GHCND:USW00023169</td>\n",
292 |        "      <td>,,D,2400</td>\n",
293 |        "      <td>73.0</td>\n",
294 |        "    </tr>\n",
295 |        "    <tr>\n",
296 |        "      <th>283</th>\n",
297 |        "      <td>2021-09-21T00:00:00</td>\n",
298 |        "      <td>TAVG</td>\n",
299 |        "      <td>GHCND:USW00023169</td>\n",
300 |        "      <td>H,,S,</td>\n",
301 |        "      <td>80.0</td>\n",
302 |        "    </tr>\n",
303 |        "  </tbody>\n",
304 |        "</table>\n",
305 |        "<p>284 rows × 5 columns</p>\n",
306 |        "</div>"
307 |       ],
308 |       "text/plain": [
309 |        "                    date datatype            station attributes  value\n",
310 |        "0    2021-08-25T00:00:00     AWND  GHCND:USW00023169       ,,W,    7.4\n",
311 |        "1    2021-08-25T00:00:00     PRCP  GHCND:USW00023169   ,,W,2400    0.0\n",
312 |        "2    2021-08-25T00:00:00     SNOW  GHCND:USW00023169       ,,W,    0.0\n",
313 |        "3    2021-08-25T00:00:00     SNWD  GHCND:USW00023169       ,,W,    0.0\n",
314 |        "4    2021-08-25T00:00:00     TAVG  GHCND:USW00023169      H,,S,   92.0\n",
315 |        "..                   ...      ...                ...        ...    ...\n",
316 |        "279  2021-09-20T00:00:00     SNOW  GHCND:USW00023169       ,,D,    0.0\n",
317 |        "280  2021-09-20T00:00:00     TAVG  GHCND:USW00023169      H,,S,   81.0\n",
318 |        "281  2021-09-20T00:00:00     TMAX  GHCND:USW00023169   ,,D,2400   88.0\n",
319 |        "282  2021-09-20T00:00:00     TMIN  GHCND:USW00023169   ,,D,2400   73.0\n",
320 |        "283  2021-09-21T00:00:00     TAVG  GHCND:USW00023169      H,,S,   80.0\n",
321 |        "\n",
322 |        "[284 rows x 5 columns]"
323 |       ]
324 |      },
325 |      "execution_count": 8,
326 |      "metadata": {},
327 |      "output_type": "execute_result"
328 |     }
329 |    ],
330 |    "source": [
331 |     "df=pd.json_normalize(data['results'])\n",
332 |     "df"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "id": "f8851b47-4f28-47c4-a0be-d6018eae9f61",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Optional - Step 6: Helpful Formatting Functions\n",
341 |     "#### This step provides examples functions that can be used to manipulate your API output. These sample functions refer to a generic dataframe titled \"df\". \"df\" should be replaced with your dataframe name.\n"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "id": "0747aebc-7e0f-41f3-be38-713ae624dbce",
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "#limit how much output ALL functions in the workbook will show\n",
352 |     "pd.set_option('display.max_rows', None)\n",
353 |     "\n",
354 |     "#sort dataframe values by Date\n",
355 |     "df = df.sort_values(by=[\"column_name\"])\n",
356 |     "\n",
357 |     "#drop the last two rows of the dataframe\n",
358 |     "df.drop(df.tail(2).index,inplace = True)\n",
359 |     "\n",
360 |     "#reset the index of the dataframe\n",
361 |     "df = df.reset_index(drop=True)\n",
362 |     "\n",
363 |     "#rename a column\n",
364 |     "df = df.rename(columns={\"current_column_name\": \"new_column_name\"})\n",
365 |     "\n",
366 |     "#change datatype of a column\n",
367 |     "df['date_column_name'] = pd.to_datetime(df['date_column_name'])\n",
368 |     "\n",
369 |     "#filter dataset\n",
370 |     "df = df[df['date_column_name'] >  pd.Timestamp(datetime.now())]\n",
371 |     "\n",
372 |     "#drop columns\n",
373 |     "df = df.drop(columns=['column1', 'column2', 'column3'])"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "markdown",
378 |    "id": "ea3e2a7c-0333-49b7-82e2-68d92048857d",
379 |    "metadata": {},
380 |    "source": [
381 |     "## Optional - Step 7: push data to Celonis\n",
382 |     "#### Only use if you need to push API data to your Celonis data pool (usually API data is just used as an input to a model in MLWB and does not need to be pushed to the Celonis data)"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "id": "1cd5ab28-59a7-4c3f-9163-8a26a68091eb",
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "#define the data pool\n",
393 |     "data_pool = celonis.pools.find(\"data_pool_id\")\n",
394 |     "\n",
395 |     "#option 1 - replace existing table\n",
396 |     "data_pool.push_table(df,\"table_name_in_data_pool\", if_exists = 'replace')\n",
397 |     "\n",
398 |     "\n",
399 |     "#option 2 - upsert data (similar to a delta load) using primary key of table\n",
400 |     "data_pool.upsert_table(table_name=\"table_name_in_data_pool\",\n",
401 |     "                       df_or_path=df,\n",
402 |     "                       primary_keys=['primary_key'])\n",
403 |     "\n"
404 |    ]
405 |   }
406 |  ],
407 |  "metadata": {
408 |   "kernelspec": {
409 |    "display_name": "Python 3",
410 |    "language": "python",
411 |    "name": "python3"
412 |   },
413 |   "language_info": {
414 |    "codemirror_mode": {
415 |     "name": "ipython",
416 |     "version": 3
417 |    },
418 |    "file_extension": ".py",
419 |    "mimetype": "text/x-python",
420 |    "name": "python",
421 |    "nbconvert_exporter": "python",
422 |    "pygments_lexer": "ipython3",
423 |    "version": "3.8.6"
424 |   }
425 |  },
426 |  "nbformat": 4,
427 |  "nbformat_minor": 5
428 | }
429 | 


--------------------------------------------------------------------------------
/pycelonis1/06_Extractors/99_Extract-logs-from-EMS.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import requests\n",
10 |     "import pandas as pd\n",
11 |     "import pycelonis as py"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "api_key = ''\n",
21 |     "team_url = 'https://XXXXXX.celonis.cloud/'\n",
22 |     "\n",
23 |     "instance = py.get_celonis(team_url, api_key)"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "code",
28 |    "execution_count": null,
29 |    "metadata": {},
30 |    "outputs": [],
31 |    "source": [
32 |     "audit_log = instance.api_request('https://XXXXX.celonis.cloud/api/team/logs/audit/csv')\n",
33 |     "audit_log"
34 |    ]
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 3",
40 |    "language": "python",
41 |    "name": "python3"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.7.3"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 4
58 | }
59 | 


--------------------------------------------------------------------------------
/pycelonis2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celonis/pycelonis-examples/6a51c8eaf84d59e7b69d457d5748ea7348659d6f/pycelonis2/.DS_Store


--------------------------------------------------------------------------------
/pycelonis2/01_example_use_cases/01_use_case_version_control.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Version Control Use Case\n",
  8 |     "This example is solely intended as a demonstration to highlight relevant pycelonis functions and properties. In this example, you will learn how to create text-based backups of analyses, knowledge models, package variables, and transformations. More specifically, you will learn:\n",
  9 |     "\n",
 10 |     "- How to connect to the EMS\n",
 11 |     "- How to create folders\n",
 12 |     "- How to create backups of all published analyses, knowledge models, package variables and transformations\n",
 13 |     "- Optionally, How to commit the backup folder to GitHub\n",
 14 |     "\n",
 15 |     "<blockquote>\n",
 16 |     "    <strong>NOTE:</strong> Any Celonis objects with a <strong>serialized_content</strong> property can be backed up to a YAML or JSON file.\n",
 17 |     "</blockquote>"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "## Prerequisites\n",
 25 |     "To follow this tutorial, you should have PyCelonis installed and should know how to perform basic interactions with PyCelonis objects. If you don't know how to do this, please complete the **Celonis Basics** tutorial first. Further, it would be helpful to already have the previously mentioned assets inside your EMS. Please refer to the **Studio - Introduction** and **Data Integration - Data Jobs** tutorials for an overview of working with each asset type."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "### 1. Import PyCelonis, connect to Celonis API, and create the backup folder"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 1,
 38 |    "metadata": {
 39 |     "collapsed": false,
 40 |     "jupyter": {
 41 |      "outputs_hidden": false
 42 |     },
 43 |     "pycharm": {
 44 |      "is_executing": true
 45 |     }
 46 |    },
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "[2023-01-30 20:53:08,556] INFO: Initial connect successful! PyCelonis Version: 2.0.1\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "from pycelonis import get_celonis\n",
 58 |     "from datetime import datetime as dt\n",
 59 |     "from pathlib import Path\n",
 60 |     "import json\n",
 61 |     "\n",
 62 |     "celonis = get_celonis()\n",
 63 |     "backup_path = Path('IBC Backup')\n",
 64 |     "now = dt.now().strftime(\"%d-%m-%Y_%H-%M\")\n",
 65 |     "\n",
 66 |     "if not backup_path.exists():\n",
 67 |     "    backup_path.mkdir()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### 2. Create new backup folders for analyses, knowledge models, variables, and transformations"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 2,
 80 |    "metadata": {
 81 |     "collapsed": false,
 82 |     "jupyter": {
 83 |      "outputs_hidden": false
 84 |     }
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "IBC Backup/Analyses_30-01-2023_20-53\n",
 92 |       "IBC Backup/KnowledgeModels_30-01-2023_20-53\n",
 93 |       "IBC Backup/Transformations_30-01-2023_20-53\n",
 94 |       "IBC Backup/Variables_30-01-2023_20-53\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "analyses_path = backup_path / f\"Analyses_{now}\"\n",
100 |     "kms_path = backup_path / f\"KnowledgeModels_{now}\"\n",
101 |     "vars_path = backup_path / f\"Variables_{now}\"\n",
102 |     "trans_path = backup_path / f\"Transformations_{now}\"\n",
103 |     "\n",
104 |     "print(analyses_path)\n",
105 |     "print(kms_path)\n",
106 |     "print(vars_path)\n",
107 |     "print(trans_path)\n",
108 |     "\n",
109 |     "analyses_path.mkdir()\n",
110 |     "kms_path.mkdir()\n",
111 |     "vars_path.mkdir()\n",
112 |     "trans_path.mkdir()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "### 3. Create backups of all published analyses, sorted by workspace"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "outputs": [],
126 |    "source": [
127 |     "# Helper Function\n",
128 |     "\n",
129 |     "def backup_assets(assets, path):\n",
130 |     "    for asset in assets:\n",
131 |     "\n",
132 |     "        # skip unpublished assets\n",
133 |     "        if asset.activated_draft_id is None:\n",
134 |     "            continue\n",
135 |     "\n",
136 |     "        file_name = f'{asset.key}.{asset.serialization_type.lower()}'\n",
137 |     "        file = path / file_name\n",
138 |     "\n",
139 |     "        file.write_text(asset.serialized_content)"
140 |    ],
141 |    "metadata": {
142 |     "collapsed": false
143 |    }
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 4,
148 |    "metadata": {
149 |     "collapsed": false,
150 |     "jupyter": {
151 |      "outputs_hidden": false
152 |     }
153 |    },
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "Analyses Backup Complete\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "for space in celonis.studio.get_spaces():\n",
165 |     "\n",
166 |     "    space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
167 |     "\n",
168 |     "    # Create space folders for analyses backup\n",
169 |     "    space_path = analyses_path / space_folder\n",
170 |     "    space_path.mkdir()\n",
171 |     "\n",
172 |     "    # Create backup files\n",
173 |     "    for pkg in space.get_packages():\n",
174 |     "        backup_assets(pkg.get_analyses(), space_path)\n",
175 |     "\n",
176 |     "print(\"Analyses Backup Complete\")"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "### 4. Create backups of all published knowledge models, sorted by workspace"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 5,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "name": "stdout",
193 |      "output_type": "stream",
194 |      "text": [
195 |       "Knowledge Models Backup Complete\n"
196 |      ]
197 |     }
198 |    ],
199 |    "source": [
200 |     "for space in celonis.studio.get_spaces():\n",
201 |     "\n",
202 |     "    space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
203 |     "\n",
204 |     "    # Create space folders for knowledge models backup\n",
205 |     "    space_path = kms_path / space_folder\n",
206 |     "    space_path.mkdir()\n",
207 |     "\n",
208 |     "    # Create backup files\n",
209 |     "    for pkg in space.get_packages():\n",
210 |     "        backup_assets(pkg.get_knowledge_models(), space_path)\n",
211 |     "\n",
212 |     "print(\"Knowledge Models Backup Complete\")"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "### 5. Create backups of all package variables, sorted by package"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "outputs": [],
226 |    "source": [
227 |     "# Helper Function\n",
228 |     "\n",
229 |     "def backup_variables(variables, package_path):\n",
230 |     "    for variable in variables:\n",
231 |     "\n",
232 |     "        file_name = f\"{variable.key}.json\"\n",
233 |     "        file = package_path / file_name\n",
234 |     "\n",
235 |     "        content = json.dumps({\n",
236 |     "            'key': variable.key,\n",
237 |     "            'type_': variable.type_,\n",
238 |     "            'description': variable.description,\n",
239 |     "            'value': variable.value,\n",
240 |     "            'package_key': variable.package_key\n",
241 |     "            })\n",
242 |     "\n",
243 |     "        file.write_text(content)"
244 |    ],
245 |    "metadata": {
246 |     "collapsed": false
247 |    }
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 6,
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "Package Variables Backup Complete\n"
259 |      ]
260 |     }
261 |    ],
262 |    "source": [
263 |     "for space in celonis.studio.get_spaces():\n",
264 |     "\n",
265 |     "    space_folder = f\"{space.name}_{space.id}\".replace(\" \", \"_\")\n",
266 |     "\n",
267 |     "    # Create space folders for variables backup\n",
268 |     "    space_path = vars_path / space_folder\n",
269 |     "    space_path.mkdir()\n",
270 |     "\n",
271 |     "    # Create backup files\n",
272 |     "    for pkg in space.get_packages():\n",
273 |     "        pkg_folder = f\"{pkg.name}_{pkg.id}\".replace(\" \", \"_\")\n",
274 |     "        pkg_path = space_path / pkg_folder\n",
275 |     "        \n",
276 |     "        variables = pkg.get_variables()\n",
277 |     "        \n",
278 |     "        # Skip packages without variables\n",
279 |     "        if variables == []:\n",
280 |     "            continue\n",
281 |     "        \n",
282 |     "        pkg_path.mkdir()\n",
283 |     "        backup_variables(variables, pkg_path)\n",
284 |     "\n",
285 |     "print(\"Package Variables Backup Complete\")"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "### 6. Create backups of all transformations, sorted by data job*"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "outputs": [],
299 |    "source": [
300 |     "# Helper Function\n",
301 |     "\n",
302 |     "def backup_transformations(transformations, job_path):\n",
303 |     "    for transformation in transformations:\n",
304 |     "\n",
305 |     "        # Handle errors retrieving the statement\n",
306 |     "        try:\n",
307 |     "            statement = transformation.get_statement()\n",
308 |     "            # Skip transformations with no statement\n",
309 |     "            if statement is None:\n",
310 |     "                continue\n",
311 |     "        except Exception as e:\n",
312 |     "            print(f\"FAILED to backup: {transformation.name} with id: {transformation.id} \\n You either don't have permissions to access the asset or the transformation is proprietary to Celonis.\")\n",
313 |     "            continue\n",
314 |     "\n",
315 |     "        file_name = f\"{transformation.name}.json\"\n",
316 |     "        file = job_path / file_name\n",
317 |     "\n",
318 |     "        content = json.dumps({\n",
319 |     "            'id': transformation.id,\n",
320 |     "            'name': transformation.name,\n",
321 |     "            'description': transformation.description,\n",
322 |     "            'statement': statement,\n",
323 |     "            'pool_id': transformation.pool_id,\n",
324 |     "            'job_id': transformation.job_id\n",
325 |     "            })\n",
326 |     "\n",
327 |     "        file.write_text(content)"
328 |    ],
329 |    "metadata": {
330 |     "collapsed": false
331 |    }
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 7,
336 |    "metadata": {
337 |     "collapsed": false,
338 |     "jupyter": {
339 |      "outputs_hidden": false
340 |     }
341 |    },
342 |    "outputs": [
343 |     {
344 |      "name": "stdout",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "Transformations Backup Complete\n"
348 |      ]
349 |     }
350 |    ],
351 |    "source": [
352 |     "for pool in celonis.data_integration.get_data_pools():\n",
353 |     "\n",
354 |     "    # Create space folders for analyses backup\n",
355 |     "    pool_folder = f\"{pool.name}_{pool.id}\".replace(\" \", \"_\")\n",
356 |     "    pool_path = trans_path / pool_folder\n",
357 |     "    pool_path.mkdir()\n",
358 |     "\n",
359 |     "    for job in pool.get_jobs():\n",
360 |     "        job_folder = f\"{job.name}_{job.id}\"\n",
361 |     "        job_path = pool_path / job_folder\n",
362 |     "        transformations = job.get_transformations()\n",
363 |     "\n",
364 |     "        # Skip jobs without transformations\n",
365 |     "        if transformations == []:\n",
366 |     "            continue\n",
367 |     "\n",
368 |     "        job_path.mkdir()\n",
369 |     "        backup_transformations(transformations, job_path)\n",
370 |     "\n",
371 |     "print(\"Transformations Backup Complete\")"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "**Transformations downloaded from the marketplace are proprietary to Celonis and are unable to backup*"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "markdown",
383 |    "metadata": {},
384 |    "source": [
385 |     "### 7. (Optional) Commit the backup folder to GitHub"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "markdown",
390 |    "metadata": {},
391 |    "source": [
392 |     "Navigate to the backup_path on the command line using the *cd* command, then run:\n",
393 |     "\n",
394 |     "    git init\n",
395 |     "    git add .\n",
396 |     "    git commit -m \"Activating version control\""
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "## Conclusion\n",
404 |     "Congratulations! You have learned how to connect to the EMS, how to create folders, how to create backups of various assets, and how to commit the backup folder to GitHub."
405 |    ]
406 |   }
407 |  ],
408 |  "metadata": {
409 |   "kernelspec": {
410 |    "display_name": "Python 3 (ipykernel)",
411 |    "language": "python",
412 |    "name": "python3"
413 |   },
414 |   "language_info": {
415 |    "codemirror_mode": {
416 |     "name": "ipython",
417 |     "version": 3
418 |    },
419 |    "file_extension": ".py",
420 |    "mimetype": "text/x-python",
421 |    "name": "python",
422 |    "nbconvert_exporter": "python",
423 |    "pygments_lexer": "ipython3",
424 |    "version": "3.8.13"
425 |   }
426 |  },
427 |  "nbformat": 4,
428 |  "nbformat_minor": 4
429 | }
430 | 


--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/Pycelonis_Migration_UI.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "15ffaabc",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# User Interface for the Pycelonis migration script"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "f505a141",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "- **PROJECT DESCRIPTION:** This project migrates a given code (either .py or .ipynb) to pycelonis 2.0. It consists of two scripts, this one you are reading (**Pycelonis_Migration_UI.ipynb**) which is the **ONLY ONE THE USER SHOULD OPEN**; and the pycelonis_migration.py, which can be regarded as the backend of this project. The former should only be modified if you are planning on collaborating in enhancing this project.<br>\n",
 17 |     "<br>\n",
 18 |     "    This whole project creates a modified copy of the original code (Inside the project's folder) and it afterwards produces a diff HTML file to easily visualize the changes. The modified code has the same name as the original one inputed in code_path, but with \"_migrated_automatically\" between the original name and the file extension. If you open the new outputted file, you can also see every line that was changed because it will have a comment with either # CHANGED or # CHECK MANUALLY.<br>\n",
 19 |     "<br>\n",
 20 |     "- **UI Overview**: The overall structure of this code (**Pycelonis_Migration_UI.ipynb**) is:<br>\n",
 21 |     "&emsp;- Inputs: You just need to input a working path as a string in the variable code_path<br>\n",
 22 |     "&emsp;- Outputs: You are going to get another code file with the migrated version, as well as an HTML file to visualize the output easily.<br>\n",
 23 |     "\n",
 24 |     "The whole aim is for you to test the modified code (_migrated_automatically) and look for the changes to see if they are properly working. This project doesn't cover all the use cases, so **you should manually check the output afterwards**. Altough it is not perfect, it can definetely help you save a lot of time."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "id": "523df5c4",
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import pycelonis_migration\n",
 35 |     "from IPython.display import IFrame"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "id": "90823215",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "Fullfill the next cell with the path to the code you want to migrate. It can either be a notebook or a plain .py file"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "id": "f9be9117",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "code_path = \"/Users/tests/Downloads/test_code.ipynb\""
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "id": "cfd4531a",
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "pycelonis_migration.main(path=code_path)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "id": "88476ecb",
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "IFrame(\"diff.html\", width=1000, height=600)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "00d11c77",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": []
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Projects",
 88 |    "language": "python",
 89 |    "name": "projects"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.9.12"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 5
106 | }
107 | 


--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/README.md:
--------------------------------------------------------------------------------
 1 | # Pycelonis Version Migrator
 2 | This code serves the purpose of automatically migrating a given code from pycelonis 1 to pycelonis 2
 3 | The code is not perfect (It doesn't cover every single use case), but serves as a tool to save time for
 4 | people having to migrate codes to pycelonis 2
 5 | 
 6 | # Project Description
 7 | ## Input
 8 | The UI Script only needs the path to the location of the .py / .ipynb archive you want to migrate.
 9 | 
10 | ## Output
11 | Once the code is run, it generates another .py or .ipynb file with the same name as the one you inputted but with
12 | "_migrated_automatically" after. In every line of code that is changed there will appear a comment # CHANGED or # MANUALLY CHECK.
13 | For more clarity on what has actually changed, the code generates a diff.html file that can be easily opened with any
14 | browser and highlights all the changes made in either, red, green or yellow. Nevertheless, the UI script enables a
15 | visualization of this HTML within the very UI notebook.
16 | 
17 | ## Check the output once it has run
18 | After it has run, you should check manually if there is any mistake and solve it reading the pycelonis documentation
19 | or also reading [this article](https://celonis.github.io/pycelonis/2.0.0/tutorials/executed/04_migration/01_migration_guide/)
20 | on how to migrate most of the biggest changes. As stated before, the code is not perfect,
21 | but will definitely save you time.
22 | 
23 | The backend script relies on a class called PycelonisMigrator, which uses different regex patterns and mainly the
24 | regex library to modify the text.
25 | 
26 | # Scripts
27 | This project consists of two different scripts:
28 | 
29 | - **Pycelonis_Migration_UI.ipynb**: This is the code the final user should use. It contains a brief description of the -
30 | overall project, and the call to the backend to perform the migration. <br /> 
31 | - **pycelonis_migration.py**: This is the code that performs all of the operations. It is based on a class called 
32 | PycelonisMigrator and several functions that help defining the output of the regex substitute patterns.<br />
33 | 
34 | 
35 | # Additional Information
36 | ## Pycelonis version change
37 | This project only migrates the script you provided, but bear in mind that you also need to update the python packages
38 | so you can test the updated script outcome. It is highly encouraged for you to check which version of pycelonis are you
39 | currently using. For this you can either:
40 | - Type the following command in a newly opened terminal and search for the pycelonis version number: <br /> 
41 | >pip list 
42 | - Inside any notebook run the following piece of code:<br />
43 | > import pycelonis <br />
44 | > pycelonis.\__version\__
45 | 
46 | This way you can revert to the older version in case you have problems. Once you have done this, you can safely update
47 | pycelonis to the latest version. For updating pycelonis python package follow [these guidelines.](https://celonis.github.io/pycelonis/2.0.1/tutorials/executed/01_quickstart/01_installation/)
48 | You will need to run this command in terminal: <br />
49 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis
50 | 
51 | Note that you can select the version you want to install by adding it at the end of the command: <br />
52 | > pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis=="2.0.1"
53 | 
54 | ## Friendly reminder
55 | - This project **does never change the original script** provided to migrate. You can safely use it since it only reads
56 | the original, but doesn't write back on it. <br />
57 | 
58 | 


--------------------------------------------------------------------------------
/pycelonis2/02_pycelonis_version_migrator/function_get_data_frame.txt:
--------------------------------------------------------------------------------
 1 | import pycelonis
 2 | import pandas as pd
 3 | from pycelonis.pql import PQL
 4 | 
 5 | def extract_table_from_data_pool(
 6 |     celonis_object: pycelonis.celonis.Celonis,
 7 |     data_pool: pycelonis.ems.data_integration.data_pool.DataPool,
 8 |     data_model: pycelonis.ems.data_integration.data_model.DataModel,
 9 |     table: pycelonis.ems.data_integration.data_pool_table.DataPoolTable,
10 |     ) -> pd.DataFrame:
11 | 
12 |     """This function creates the PQL query to extract a whole
13 |     table from a data model. If the table is too big, the kernel might
14 |     shut down, make sure the RAM memory is enough.
15 | 
16 |         Input:
17 |             celonis_object: Celonis object already instantiated
18 |             data_pool: Data Pool object already instantiated
19 |             data_model: Data Model object already instantiated
20 |             table: Table object already instantiated
21 | 
22 |         Returns:
23 |             df: DataFrame containing the target table.
24 |     """
25 | 
26 |     # Instantiate query object
27 |     query = PQL(distinct=False, limit=None, offset=None)
28 |      
29 |     # Populate query with all the columns from the table
30 |     for PQL_column in table.get_columns():
31 |         query += PQLColumn(name=PQL_column.name, query=f""" "{table.name}"."{PQL_column.name}" """)
32 |     
33 |     try:
34 |         # If you have USE permissions on the data model this method can be used
35 |         df = DataModel.export_data_frame_from(
36 |                                      celonis_object.client,
37 |                                      pool_id=data_model.pool_id,
38 |                                      data_model_id=data_model.id,
39 |                                      query=query)
40 |         print("You have use permissions")
41 |     except:
42 |         # Should work if you don't have USE permissions
43 |         df = data_model.export_data_frame(query)
44 |         print("You might not have USE permissions")
45 |     
46 |     return df
47 | 
48 | 


--------------------------------------------------------------------------------