├── .gitignore
├── LICENSE
├── README.md
├── admin
    ├── __init__.py
    ├── bulk_dataset_tag_importer.py
    ├── bulk_field_tag_importer.py
    ├── bulk_set_fh_sensitivity.py
    ├── bulk_set_volume_sensitivity.py
    ├── bulk_table_view_tag_importer.py
    ├── bulk_tag_exporter.py
    ├── bulk_tag_importer.py
    ├── field_description_importer.py
    ├── get_audit_log_by_day.py
    ├── key_asset_tagger.py
    ├── mute_non_domain_tables.py
    ├── requirements.txt
    ├── table_description_importer.py
    ├── unmute_tables.py
    └── user_role_exporter.py
├── app
    ├── __init__.py
    ├── __main__.py
    ├── app.py
    ├── categories.py
    ├── controls.py
    ├── executor.py
    ├── header.py
    ├── readme.py
    ├── themes.py
    └── utilities.py
├── code.png
├── configs
    └── configs.ini
├── insights
    ├── bigquery_insights_importer.py
    ├── extract_mc_insights_dbx.py
    └── requirements.txt
├── landing.png
├── lib
    ├── __init__.py
    ├── auth
    │   ├── __init__.py
    │   └── mc_auth.py
    ├── helpers
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── encryption.py
    │   ├── logs.py
    │   ├── parser_config.json
    │   └── sdk_helpers.py
    └── util.py
├── lineage
    ├── assets_downstream_from_asset.py
    ├── field_lineage_retrieval.py
    ├── incidents_upstream_from_report.py
    ├── insertLineageFromCSV.py
    ├── lineage.py
    ├── lineage_graph_retrieval.py
    ├── reports_by_schema.py
    ├── requirements.txt
    └── tables_upstream_from_report.py
├── mcsdksamplerunner.py
├── monitors
    ├── __init__.py
    ├── add_remove_monitoring_rules.py
    ├── bulk_export_monitors.py
    ├── bulk_set_freshness_sensitivity.py
    ├── bulk_set_unchanged_size_sensitivity.py
    ├── deduplicate_metric_monitors.py
    ├── delete_monitors_by_audience.py
    ├── delete_monitors_without_tag.py
    ├── enable_monitored_table_volume_queries.py
    ├── field_health_resource_migration.py
    ├── migration_monitors.py
    ├── monitor_migration_util.py
    ├── monitors_stats.py
    ├── overlapped_monitor_schedules.py
    ├── requirements.txt
    ├── run_monitors_by_tag.py
    └── track_unmonitored_tables_in_data_products.py
├── notifications
    ├── README.md
    ├── google_chat_lambda.py
    ├── service_now_lambda.py
    └── webex_lambda.py
├── requirements.txt
├── tables
    ├── __init__.py
    └── link_tables_via_descriptions.py
└── utility.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.xlsx
 2 | *.csv
 3 | *.json
 4 | !lib/helpers/parser_config.json
 5 | *.log
 6 | *.bkp
 7 | *.yml
 8 | *.yaml
 9 | .env
10 | .idea
11 | venv
12 | .DS_Store
13 | output
14 | keys
15 | __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div id="top"></div>
  2 | <a href=""></a>
  3 | 
  4 | # Monte Carlo Python SDK Examples
  5 | 
  6 | These examples use [Pycarlo](https://github.com/monte-carlo-data/python-sdk), Monte Carlo's Python SDK and the Monte Carlo's [CLI](https://pypi.org/project/montecarlodata/).
  7 | 
  8 | <!-- TABLE OF CONTENTS -->
  9 | - [Utility Setup](#utility-setup)
 10 | - [Quick Start](#quick-start)
 11 |   - [MC SDK Samples App](#1-mc-sdk-samples-app)
 12 |   - [MC SDK Sample Runner](#2-mc-sdk-sample-runner-wrapper)
 13 |   - [Standalone Scripts](#3-standalone-script)
 14 | 
 15 | ## Utility Setup
 16 | Some of the scripts in this repository may be called from the CLI app, utility wrapper runner or as standalone scripts. 
 17 | 1. Navigate to a desired directory where the repository will reside
 18 | 2. Clone or download the git repository
 19 |    ```bash
 20 |    git clone https://github.com/monte-carlo-data/monte-carlo-python-sdk-examples.git
 21 |    ```
 22 | 3. You can choose from an existing or new virtual environment or use the base python installation as the interpreter. 
 23 | In either case, make sure to use python3.12 as the base interpreter
 24 | 4. Install all python modules:
 25 |    ```bash
 26 |    python -m pip install -r requirements.txt
 27 |    ```
 28 | <p align="right">(<a href="#top">back to top</a>)</p>
 29 | 
 30 | ## Quick Start
 31 | 
 32 | ### 1. MC SDK Samples App
 33 | 
 34 | App with a nice looking interface to run scripts from the terminal. If you are not too familiar with python and command utilities in general this option is best suited for you. To launch the app, execute the command below:
 35 | 
 36 | ```bash
 37 | python -m app
 38 | ```
 39 | 
 40 | ![app](landing.png) ![code](code.png) ![utility](utility.png)
 41 |  
 42 | **Note:** The navigation keys will be shown in the footer.
 43 | 
 44 | <p align="right">(<a href="#top">back to top</a>)</p>
 45 | 
 46 | ### 2. MC SDK Sample Runner (Wrapper)
 47 | 
 48 | CLI that collects different types of utilities into commands and subcommands. Use the ```--help/-h``` flag for details on the commands/utilities available.
 49 | 
 50 | ```bash
 51 | python mcsdksamplerunner.py -h
 52 | ```
 53 | 
 54 | If the Monte Carlo CLI has not been configured before, running any utility will prompt for Monte Carlo credentials to 
 55 | generate new tokens. This only applies for accounts not using SSO. 
 56 | 
 57 | ### Example:
 58 | 
 59 | ```bash
 60 | (venv) python3.12 mcsdksamplerunner.py  monitors bulk-set-freshness-sensitivity -p demo -i /Users/hjarrin/Downloads/freshness_thresholds_auto.csv -w aaaa7777-7777-a7a7-a7a7a-aaaa7777
 61 | 
 62 |                     
 63 |             ███╗   ███╗ ██████╗ ███╗   ██╗████████╗███████╗     ██████╗ █████╗ ██████╗ ██╗      ██████╗ 
 64 |             ████╗ ████║██╔═══██╗████╗  ██║╚══██╔══╝██╔════╝    ██╔════╝██╔══██╗██╔══██╗██║     ██╔═══██╗
 65 |             ██╔████╔██║██║   ██║██╔██╗ ██║   ██║   █████╗      ██║     ███████║██████╔╝██║     ██║   ██║
 66 |             ██║╚██╔╝██║██║   ██║██║╚██╗██║   ██║   ██╔══╝      ██║     ██╔══██║██╔══██╗██║     ██║   ██║
 67 |             ██║ ╚═╝ ██║╚██████╔╝██║ ╚████║   ██║   ███████╗    ╚██████╗██║  ██║██║  ██║███████╗╚██████╔╝
 68 |             ╚═╝     ╚═╝ ╚═════╝ ╚═╝  ╚═══╝   ╚═╝   ╚══════╝     ╚═════╝╚═╝  ╚═╝╚═╝  ╚═╝╚══════╝ ╚═════╝ 
 69 |                                                                                                          
 70 |         
 71 | 2024-08-13 16:15:28 INFO - running utility using 'demo' profile
 72 | 2024-08-13 16:15:28 INFO - checking montecarlo test version...
 73 | 2024-08-13 16:15:28 INFO - montecarlo test present
 74 | 2024-08-13 16:15:28 INFO - validating montecarlo test connection...
 75 | 2024-08-13 16:15:29 ERROR - unable to validate token
 76 | 2024-08-13 16:15:29 INFO - creating new token
 77 | MC Username: testuser@testdomain.com
 78 | MC Password: 
 79 | 2024-08-13 16:15:46 INFO - token stored successfully
 80 | 2024-08-13 16:15:46 INFO - starting input file validation...
 81 | 2024-08-13 16:15:46 INFO - updating freshness rules...
 82 | 2024-08-13 16:15:48 INFO - freshness threshold updated successfully for table hxe:dev_schema.offer
 83 | 2024-08-13 16:15:49 INFO - freshness threshold updated successfully for table hxe:dev_schema.subscription
 84 | 2024-08-13 16:15:49 INFO - freshness threshold updated successfully for table hxe:dev_schema.zuora_invoice
 85 | [COMPLETE] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
 86 | ```
 87 | 
 88 | **Note:** If your account is using SSO, generate the token manually from the UI and store them in ```~/.mcd/profiles.ini
 89 | ``` or run the ```montecarlo configure``` command by following the onscreen prompts.
 90 | 
 91 | <p align="right">(<a href="#top">back to top</a>)</p>
 92 | 
 93 | ### 3. Standalone Script
 94 | 
 95 | Scripts are organized into different folders depending on its functionality. You still have the option to run a given python file as a regular script. For example:
 96 | 
 97 | ```bash
 98 | python monitors/monitor_migration_util.py -h
 99 | ```
100 | 
101 | or
102 | 
103 | ```bash
104 | cd monitors
105 | python monitor_migration_util.py -h
106 | ```
107 | 
108 | <p align="right">(<a href="#top">back to top</a>)</p>


--------------------------------------------------------------------------------
/admin/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging.config
 3 | import subprocess
 4 | import textwrap
 5 | import traceback
 6 | import shutil
 7 | import yaml
 8 | import lib.helpers.constants as const
 9 | from contextlib import nullcontext
10 | from lib.util import Monitors, Tables, Admin
11 | from pathlib import Path
12 | from lib.helpers.logs import LoggingConfigs, LogHelper, LogRotater, LOGGER
13 | from lib.helpers import sdk_helpers
14 | from pycarlo.core import Mutation
15 | from rich.progress import Progress
16 | from rich import print
17 | 


--------------------------------------------------------------------------------
/admin/bulk_dataset_tag_importer.py:
--------------------------------------------------------------------------------
  1 | #Instructions:
  2 | #1. Create a CSV with 3 columns in the following order: dataset, tag key, tag value
  3 | #	dataset must be lowercase
  4 | #2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  5 | #3. Input the Data Warehouse ID in which the datasets to import tags exist (will check and ignore tables in other warehouses)
  6 | 	#Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from. You'll need to rerun the script after this.
  7 | #4. Input the name of the CSV with the tags
  8 | #5. This script creates an "import_log.txt" file with some logging details such as datasets that were not found in the dwId or total volume of tags imported
  9 | #Note: If you have a list of tags for tables in multiple warehouses, run again for each data warehouse ID 
 10 | 
 11 | from pycarlo.core import Client, Query, Mutation, Session
 12 | import csv
 13 | import json
 14 | from typing import Optional
 15 | from datetime import datetime
 16 | 
 17 | def getDefaultWarehouse(mcdId,mcdToken):
 18 |     client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 19 |     query=Query()
 20 |     query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 21 |     warehouses=client(query).get_user.account.warehouses
 22 |     if len(warehouses) == 1:
 23 |         return warehouses[0].uuid
 24 |     elif len(warehouses) > 1:
 25 |         for val in warehouses:
 26 |             print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 27 |         print("Error: More than one warehouse, please re-run with UUID value")
 28 |         quit()
 29 |         
 30 | def get_dataset_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 31 |     query = Query()
 32 |     get_datasets = query.get_datasets(first=first, dw_id=dwId, **(dict(after=after) if after else {}))
 33 |     get_datasets.edges.node.__fields__("project","dataset","mcon")
 34 |     get_datasets.page_info.__fields__(end_cursor=True)
 35 |     get_datasets.page_info.__fields__("has_next_page")
 36 |     return query
 37 | 
 38 | def getMcons(mcdId,mcdToken,dwId):
 39 |     client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 40 |     dataset_mcon_dict={}
 41 |     next_token=None
 42 |     while True:
 43 |         response = client(get_dataset_query(dwId=dwId,after=next_token)).get_datasets
 44 | #         print(response)
 45 |         for dataset in response.edges:
 46 |             dataset_mcon_dict[dataset.node.dataset.lower()] = dataset.node.mcon
 47 |         if response.page_info.has_next_page:
 48 |             next_token = response.page_info.end_cursor
 49 |         else:
 50 |             break
 51 |     return dataset_mcon_dict
 52 | 
 53 | def bulkImportTagsFromCSV(mcdId,mcdToken,csvFileName, mconDict):
 54 |     client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 55 |     tags_list=[]
 56 |     bulk_tag_query = """
 57 |         mutation bulkCreateOrUpdateObjectProperties($inputObjectProperties:[InputObjectProperty]!) {
 58 |             bulkCreateOrUpdateObjectProperties(inputObjectProperties:$inputObjectProperties) {
 59 |                 objectProperties {
 60 |                     mconId
 61 |                 }
 62 |             }
 63 |         }
 64 |         """
 65 |     with open(csvFileName,"r") as tags_to_import:
 66 |         tags=csv.reader(tags_to_import, delimiter=",")
 67 |         total_tags=0
 68 |         imported_tag_counter = 0
 69 |         incremental_tags = 0
 70 |         print("Import time: " + str(datetime.now()), file=open('import_log.txt', 'a'))
 71 |         for row in tags:
 72 |             print(', '.join(row))
 73 |             total_tags += 1
 74 |             if row[0].lower() not in mconDict.keys():
 75 |                 # print a failure message if the dataset in the csv does not exist on the dwId/project:
 76 | #                 print("dataset check failed: " + row[0].lower())
 77 |                 print(("dataset check failed: " + row[0].lower()), file=open('import_log.txt', 'a'))
 78 |                 continue
 79 |             if mconDict[row[0].lower()]:
 80 |                 # print a success message if the dataset in the csv does not exist on the dwId/project:
 81 | #                 print("dataset check succeeded: " + row[0].lower())
 82 |                 print(("dataset check succeeded: " + row[0].lower()), file=open('import_log.txt', 'a'))
 83 |                 temp_obj=dict(mconId=mconDict[row[0].lower()],propertyName=row[1],propertyValue=row[2])
 84 |                 print((temp_obj), file=open('import_log.txt', 'a'))
 85 |                 print(("\n"), file=open('import_log.txt', 'a'))
 86 |                 tags_list.append(temp_obj)
 87 |                 imported_tag_counter += 1
 88 |                 incremental_tags += 1
 89 |                 # Uncomment next 2 rows to print the tag counter on each iteration:
 90 | #                 print("Tag count: " + str(incremental_tags))
 91 | #                 print(("Tag count: " + str(incremental_tags)), file=open('import_log.txt', 'a'))
 92 |             if incremental_tags == 99:
 93 |                 mutation=Mutation()
 94 |                 client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list))
 95 |                 print(("100 tags uploaded!" + "\n"), file=open('import_log.txt', 'a'))
 96 |                 tags_list.clear()
 97 |                 incremental_tags = 0
 98 |         if incremental_tags > 0:
 99 |             mutation=Mutation()
100 |             client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list))
101 |             print("Last tag group count: " + str(incremental_tags), file=open('import_log.txt', 'a'))
102 |             print(str(incremental_tags) + " tags uploaded in the last batch!", file=open('import_log.txt', 'a'))
103 | #     print("Successfully Imported " + str(imported_tag_counter) + " Tags")
104 | #     print("Tags list: " + str(tags_list))
105 |     print("END OF EXECUTION: Successfully Imported " + str(imported_tag_counter) + " Tags" + "\n", file=open('import_log.txt', 'a'))
106 |     
107 | if __name__ == '__main__':
108 |     #-------------------INPUT VARIABLES---------------------
109 |     mcd_id = input("MCD ID: ")
110 |     mcd_token = input("MCD Token: ")
111 |     dw_id = input("DW ID: ")
112 |     csv_file = input("CSV Filename: ")
113 | 
114 |     #-------------------------------------------------------
115 |     if dw_id and csv_file:
116 |         mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
117 |         bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
118 |     elif csv_file and not dw_id:
119 |         warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
120 |         mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
121 |         bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
122 | 


--------------------------------------------------------------------------------
/admin/bulk_field_tag_importer.py:
--------------------------------------------------------------------------------
  1 | #Instructions:
  2 | #1. Create a CSV with 4 columns in the following order: full_table_id, field_name, tag key, tag value
  3 | #	full_table_is must be lowercase in the format database:schema.table, field_name must also be lowercase
  4 | #2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  5 | #3. Input the Data Warehouse ID in which the tables to import tags exist (will check and ignore tables in other warehouses)
  6 | 	#Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from. You'll need to rerun the script after this.
  7 | #4. Input the name of the CSV with the tags
  8 | #Note: If you have a list of tags for tables in multiple warehouses, run again for each data warehouse ID
  9 | 
 10 | from pycarlo.core import Client, Query, Mutation, Session
 11 | import csv
 12 | import json
 13 | from typing import Optional
 14 | 
 15 | def getDefaultWarehouse(mcdId,mcdToken):
 16 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 17 | 	query=Query()
 18 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 19 | 	warehouses=client(query).get_user.account.warehouses
 20 | 	if len(warehouses) == 1:
 21 | 		return warehouses[0].uuid
 22 | 	elif len(warehouses) > 1:
 23 | 		for val in warehouses:
 24 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 25 | 		print("Error: More than one warehouse, please re-run with UUID value")
 26 | 		quit()
 27 | 
 28 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 29 |     query = Query()
 30 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 31 |     get_tables.edges.node.__fields__("full_table_id","mcon")
 32 |     get_tables.page_info.__fields__(end_cursor=True)
 33 |     get_tables.page_info.__fields__("has_next_page")
 34 |     return query
 35 | 
 36 | def getMcons(mcdId,mcdToken,dwId):
 37 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 38 | 	table_mcon_dict={}
 39 | 	next_token=None
 40 | 	while True:
 41 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
 42 | 		print(response)
 43 | 		for table in response.edges:
 44 | 			table_mcon_dict[table.node.full_table_id] = table.node.mcon
 45 | 		if response.page_info.has_next_page:
 46 | 			next_token = response.page_info.end_cursor
 47 | 		else:
 48 | 			break
 49 | 	return table_mcon_dict
 50 | 
 51 | def bulkImportTagsFromCSV(mcdId,mcdToken,csvFileName, mconDict):
 52 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 53 | 	tags_list=[]
 54 | 	bulk_tag_query = """
 55 | 		mutation bulkCreateOrUpdateObjectProperties($inputObjectProperties:[InputObjectProperty]!) {
 56 |   			bulkCreateOrUpdateObjectProperties(inputObjectProperties:$inputObjectProperties) {
 57 |     			objectProperties {
 58 |       				mconId
 59 |     			}
 60 |   			}
 61 | 		}
 62 | 		"""
 63 | 	with open(csvFileName,"r") as tags_to_import:
 64 | 		tags=csv.reader(tags_to_import, delimiter=",")
 65 | 		total_tags=0
 66 | 		imported_tag_counter = 0
 67 | 		incremental_tags = 0
 68 | 		for row in tags:
 69 | 			total_tags += 1
 70 | 			if row[0] not in mconDict.keys():
 71 | 				print("check failed: " + row[0])
 72 | 				continue
 73 | 			if mconDict[row[0]]:
 74 | 				print("check succeeded: " + row[0])
 75 | 				temp_obj=dict(mconId=str(mconDict[row[0]]+ '+++' + row[1].lower()).replace('++table++','++field++'),propertyName=row[2],propertyValue=row[3])
 76 | 				print(temp_obj)
 77 | 				tags_list.append(temp_obj)
 78 | 				imported_tag_counter += 1
 79 | 				incremental_tags += 1
 80 | 			if incremental_tags == 99:
 81 | 				mutation=Mutation()
 82 | 				print(client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list)))
 83 | 				tags_list.clear()
 84 | 				incremental_tags = 0
 85 | 		if incremental_tags > 0:
 86 | 			mutation=Mutation()
 87 | 			print(client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list)))
 88 | 	print("Successfully Imported " + str(imported_tag_counter) + " Tags")
 89 | 
 90 | if __name__ == '__main__':
 91 | 	#-------------------INPUT VARIABLES---------------------
 92 | 	mcd_id = input("MCD ID: ")
 93 | 	mcd_token = input("MCD Token: ")
 94 | 	dw_id = input("DW ID: ")
 95 | 	csv_file = input("CSV Filename: ")
 96 | 	#-------------------------------------------------------
 97 | 	if dw_id and csv_file:
 98 | 		mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
 99 | 		bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
100 | 	elif csv_file and not dw_id:
101 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
102 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
103 | 		bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
104 | 


--------------------------------------------------------------------------------
/admin/bulk_set_fh_sensitivity.py:
--------------------------------------------------------------------------------
 1 | #INSTRUCTIONS:
 2 | #1.Create a CSV with 2 columns: [full_table_id, minimum sensitivity delay in seconds]
 3 | #2. Run this script, providing the mcdId, mcdToken, DWId,and CSV
 4 | #Limitation:
 5 | #This will make 1 request per table, so 10,000/day request limit via API is still a consideration
 6 | #If there are multiple FH monitors on a single table, it will only update for the first one returned by MC APIs
 7 | 
 8 | from pycarlo.core import Client, Query, Mutation, Session
 9 | import csv
10 | from typing import Optional
11 | 
12 | def getDefaultWarehouse(mcdId,mcdToken):
13 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
14 | 	query=Query()
15 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
16 | 	warehouses=client(query).get_user.account.warehouses
17 | 	if len(warehouses) == 1:
18 | 		return warehouses[0].uuid
19 | 	elif len(warehouses) > 1:
20 | 		for val in warehouses:
21 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
22 | 		print("Error: More than one warehouse, please re-run with UUID value")
23 | 		quit()
24 | 
25 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
26 |     query = Query()
27 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
28 |     get_tables.edges.node.__fields__("full_table_id","mcon")
29 |     get_tables.page_info.__fields__(end_cursor=True)
30 |     get_tables.page_info.__fields__("has_next_page")
31 |     return query
32 | 
33 | def getMcons(mcdId,mcdToken,dwId):
34 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
35 | 	table_mcon_dict={}
36 | 	next_token=None
37 | 	while True:
38 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
39 | 		print(response)
40 | 		for table in response.edges:
41 | 			table_mcon_dict[table.node.full_table_id] = table.node.mcon
42 | 		if response.page_info.has_next_page:
43 | 			next_token = response.page_info.end_cursor
44 | 		else:
45 | 			break
46 | 	return table_mcon_dict
47 | 
48 | def getFieldHealthMonitors(mcdId,mcdToken):
49 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
50 | 	get_monitors_query = "query{getMonitors(monitorTypes:[STATS]){monitorType,entities,uuid}}"
51 | 	monitor_response = client(get_monitors_query)
52 | 	fh_table_dict={}
53 | 	for val in monitor_response.get_monitors:
54 | 		table_name = val.entities[0]
55 | 		fh_table_dict[table_name] = val.uuid
56 | 	return fh_table_dict
57 | 
58 | def bulkSetFieldHealthSensitivity(mcdId,mcdToken,csvFileName,fieldHealthDict):
59 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
60 | 	imported_sensitivity_counter=0
61 | 	with open(csvFileName,"r") as sensitivitiesToImport:
62 | 		sensitivities=csv.reader(sensitivitiesToImport,delimiter=",")
63 | 		for row in sensitivities:
64 | 			if row[0] not in fieldHealthDict.keys():
65 | 				print("check failed: " +row[0])
66 | 				continue
67 | 			if fieldHealthDict[row[0]]:
68 | 				imported_sensitivity_counter+=1
69 | 				print("check succeeded " + row[0])
70 | 				print(fieldHealthDict[row[0]])
71 | 				mutation=Mutation()
72 | 				mutation.set_sensitivity(event_type="metric",monitor_uuid=fieldHealthDict[row[0]],threshold=dict(level=row[1].upper())).__fields__("success")
73 | 				print(mutation)
74 | 				print(row[0],client(mutation).set_sensitivity,row[1])
75 | 	print("Successfully imported freshness for " + str(imported_sensitivity_counter) + " Tables")
76 | 
77 | if __name__ == '__main__':
78 | 	#-------------------INPUT VARIABLES---------------------
79 | 	mcd_id = input("MCD ID: ")
80 | 	mcd_token = input("MCD Token: ")
81 | 	csv_file = input("CSV Filename: ")
82 | 	#-------------------------------------------------------
83 | 	if csv_file:
84 | 		fh_monitors = getFieldHealthMonitors(mcd_id,mcd_token)
85 | 		bulkSetFieldHealthSensitivity(mcd_id,mcd_token,csv_file,fh_monitors)
86 | 


--------------------------------------------------------------------------------
/admin/bulk_set_volume_sensitivity.py:
--------------------------------------------------------------------------------
 1 | #INSTRUCTIONS:
 2 | #1.Create a CSV with 2 columns: [full_table_id, sensitivity (must be upper case with the following values: LOW, MEDIUM, HIGH)]
 3 | #2. Run this script, providing the mcdId, mcdToken, DWId, and CSV
 4 | #Limitation:
 5 | #This will make 1 request per table, so 10,000/day request limit via API is still a consideration
 6 | 
 7 | from pycarlo.core import Client, Query, Mutation, Session
 8 | import csv
 9 | from typing import Optional
10 | 
11 | def getDefaultWarehouse(mcdId,mcdToken):
12 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
13 | 	query=Query()
14 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
15 | 	warehouses=client(query).get_user.account.warehouses
16 | 	if len(warehouses) == 1:
17 | 		return warehouses[0].uuid
18 | 	elif len(warehouses) > 1:
19 | 		for val in warehouses:
20 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
21 | 		print("Error: More than one warehouse, please re-run with UUID value")
22 | 		quit()
23 | 
24 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
25 |     query = Query()
26 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
27 |     get_tables.edges.node.__fields__("full_table_id","mcon")
28 |     get_tables.page_info.__fields__(end_cursor=True)
29 |     get_tables.page_info.__fields__("has_next_page")
30 |     return query
31 | 
32 | def getMcons(mcdId,mcdToken,dwId):
33 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
34 | 	table_mcon_dict={}
35 | 	next_token=None
36 | 	while True:
37 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
38 | 		print(response)
39 | 		for table in response.edges:
40 | 			table_mcon_dict[table.node.full_table_id] = table.node.mcon
41 | 		if response.page_info.has_next_page:
42 | 			next_token = response.page_info.end_cursor
43 | 		else:
44 | 			break
45 | 	return table_mcon_dict
46 | 
47 | def bulkSetFreshnessSensitivity(mcdId,mcdToken,csvFileName,mconDict):
48 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
49 | 	imported_sensitivity_counter=0
50 | 	with open(csvFileName,"r") as sensitivitiesToImport:
51 | 		sensitivities=csv.reader(sensitivitiesToImport,delimiter=",")
52 | 		for row in sensitivities:
53 | 			if row[0] not in mconDict.keys():
54 | 				print("check failed: " +row[0])
55 | 				continue
56 | 			if mconDict[row[0]]:
57 | 				imported_sensitivity_counter+=1
58 | 				print("check succeeded " + row[0])
59 | 				mutation=Mutation()
60 | 				mutation.set_sensitivity(event_type="size_diff",mcon=mconDict[row[0]],threshold=dict(level=str(row[1]))).__fields__("success")
61 | 				print(row[0],client(mutation).set_sensitivity,row[1])
62 | 	print("Successfully imported freshness for " + str(imported_sensitivity_counter) + " tables")
63 | 
64 | if __name__ == '__main__':
65 | 	#-------------------INPUT VARIABLES---------------------
66 | 	mcd_id = input("MCD ID: ")
67 | 	mcd_token = input("MCD Token: ")
68 | 	dw_id = input("DW ID: ")
69 | 	csv_file = input("CSV Filename: ")
70 | 	#-------------------------------------------------------
71 | 	if dw_id and csv_file:
72 | 		mcon_dict=getMcons(mcd_id,mcd_token,dw_id)
73 | 		bulkSetFreshnessSensitivity(mcd_id,mcd_token,csv_file,mcon_dict)
74 | 	if csv_file and not dw_id:
75 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
76 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
77 | 		bulkSetFreshnessSensitivity(mcd_id,mcd_token,csv_file,mcon_dict)


--------------------------------------------------------------------------------
/admin/bulk_table_view_tag_importer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  4 | import csv
  5 | from tables import *
  6 | from lib.helpers import sdk_helpers
  7 | 
  8 | # Initialize logger
  9 | util_name = os.path.basename(__file__).split('.')[0]
 10 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 11 | 
 12 | 
 13 | class BulkTableViewTagImporter(Tables):
 14 | 
 15 |     def __init__(self, profile, config_file: str = None, progress: Progress = None):
 16 |         """Creates an instance of BulkTableTagImporter.
 17 | 
 18 |         Args:
 19 |             profile(str): Profile to use stored in montecarlo test.
 20 |             config_file (str): Path to the Configuration File.
 21 |             progress(Progress): Progress bar.
 22 |         """
 23 | 
 24 |         super().__init__(profile,  config_file, progress)
 25 |         self.progress_bar = progress
 26 | 
 27 |     @staticmethod
 28 |     def validate_input_file(input_file: str) -> any:
 29 |         """Ensure path given exists.
 30 | 
 31 |         Args:
 32 |             input_file(str): Input file.
 33 | 
 34 |         Returns:
 35 |             Path: Full path to input file.
 36 |         """
 37 | 
 38 |         file = Path(input_file)
 39 | 
 40 |         if file.is_file():
 41 |             asset_ids = []
 42 |             tag_set = []
 43 |             with open(input_file, "r") as input_csv:
 44 |                 reader = csv.reader(input_csv, delimiter=",")
 45 |                 for row in reader:
 46 |                     if len(row) == 2:
 47 |                         tag_set.append(row[1])
 48 |                     elif len(row) > 2:
 49 |                         LOGGER.error(f"1 or 2 column(s) expected in input file, received {len(row)}")
 50 |                         sys.exit(1)
 51 |                     asset_ids.append(row[0])
 52 |             if len(asset_ids) == 0:
 53 |                 LOGGER.error("No rows present in input file")
 54 |                 sys.exit(1)
 55 |             return asset_ids, tag_set
 56 |         else:
 57 |             LOGGER.error("invalid input file")
 58 |             sys.exit(1)
 59 | 
 60 |     def generate_mcons(self, asset_ids: list, warehouse_name: str, asset_type: str):
 61 |         """Running one call per asset to obtain the MCON can be expensive, instead, the MCON can be predicted
 62 |         and this method will use the asset type from the input file to generate it.
 63 | 
 64 |         Args:
 65 |             asset_ids(list): list of asset ids
 66 |             warehouse_name(str): name of warehouse as it appears in MC
 67 |             asset_type(str): table or view
 68 |         """
 69 | 
 70 |         _, raw = self.get_warehouses()
 71 |         account, warehouse = None, None
 72 |         for acct in raw:
 73 |             account = raw[acct].uuid
 74 |             for wh in raw[acct].warehouses:
 75 |                 if wh.name == warehouse_name:
 76 |                     warehouse = wh.uuid
 77 |                     break
 78 | 
 79 |         if None in (warehouse, account):
 80 |             LOGGER.error("unable to locate account/warehouse. Ensure the warehouse provided is spelled correctly")
 81 |             sys.exit(1)
 82 | 
 83 |         return [f"MCON++{account}++{warehouse}++{asset_type}++{asset}" for asset in asset_ids]
 84 | 
 85 |     @staticmethod
 86 |     def process_tags(mcon, tag_string, properties):
 87 |         """Helper function to process a tag string and append to properties list."""
 88 |         try:
 89 |             for tag in tag_string.split(','):
 90 |                 k, v = tag.split(':', 1)  # Avoids ValueError for unexpected input
 91 |                 properties.append({
 92 |                     'mcon_id': mcon,
 93 |                     'property_name': k.strip(),
 94 |                     'property_value': v.strip()
 95 |                 })
 96 |         except ValueError:
 97 |             LOGGER.debug(f"Skipping invalid tag format: {tag_string}")
 98 | 
 99 |     def import_tags(self, assets: list, tags: str):
100 |         """ """
101 | 
102 |         properties = []
103 | 
104 |         LOGGER.debug(f"generating payload for {len(assets)} assets")
105 |         for index, mcon in enumerate(assets):
106 |             if isinstance(tags, list) and index < len(tags):
107 |                 self.process_tags(mcon, tags[index], properties)
108 |             elif isinstance(tags, str):
109 |                 self.process_tags(mcon, tags, properties)
110 | 
111 |         batches = [properties[i:i + 100] for i in range(0, len(properties), 100)]
112 |         LOGGER.info(f"splitting {len(properties)} properties in batches of 100")
113 |         for batch in batches:
114 |             response = self.auth.client(self.bulk_create_or_update_object_properties(batch)).bulk_create_or_update_object_properties
115 |             if not response:
116 |                 LOGGER.error(f"unable to set tags")
117 |             else:
118 |                 LOGGER.info(f"tag(s) set successfully")
119 | 
120 | 
121 | def main(*args, **kwargs):
122 | 
123 |     # Capture Command Line Arguments
124 |     parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
125 |                                              os.path.basename(__file__))
126 | 
127 |     if not args:
128 |         args = parser.parse_args(*args, **kwargs)
129 |     else:
130 |         sdk_helpers.dump_help(parser, main, *args)
131 |         args = parser.parse_args(*args, **kwargs)
132 | 
133 |     @sdk_helpers.ensure_progress
134 |     def run_utility(progress, util, args):
135 |         util.progress_bar = progress
136 |         assets, tags = util.validate_input_file(args.input_file)
137 |         if args.tag:
138 |             tags = args.tag
139 |         util.import_tags(util.generate_mcons(assets, args.warehouse, args.asset_type), tags)
140 | 
141 |     util = BulkTableViewTagImporter(args.profile)
142 |     run_utility(util, args)
143 | 
144 | 
145 | if __name__ == '__main__':
146 |     main()
147 | 


--------------------------------------------------------------------------------
/admin/bulk_tag_exporter.py:
--------------------------------------------------------------------------------
 1 | #Instructions:
 2 | #2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
 3 | #3. Input the Data Warehouse ID in which the tables to import tags exist (will check and ignore tables in other warehouses)
 4 | 	#Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from. You'll need to rerun the script after this.
 5 | #4. Input the name of the CSV you would like to create.
 6 | #Note: If you would like to get tags for other warehouse connections, run this again and export to a new CSV filename.
 7 | 
 8 | from pycarlo.core import Client, Query, Mutation, Session
 9 | import csv
10 | import json
11 | from typing import Optional
12 | 
13 | def getDefaultWarehouse(mcdId,mcdToken):
14 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
15 | 	query=Query()
16 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
17 | 	warehouses=client(query).get_user.account.warehouses
18 | 	if len(warehouses) == 1:
19 | 		return warehouses[0].uuid
20 | 	elif len(warehouses) > 1:
21 | 		for val in warehouses:
22 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
23 | 		print("Error: More than one warehouse, please re-run with UUID value")
24 | 		quit()
25 | 
26 | def getTableQuery(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
27 |     query = Query()
28 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
29 |     get_tables.edges.node.__fields__("full_table_id","mcon")
30 |     get_tables.edges.node.object_properties.__fields__("property_name","property_value")
31 |     get_tables.page_info.__fields__(end_cursor=True)
32 |     get_tables.page_info.__fields__("has_next_page")
33 |     return query
34 | 
35 | def getMcons(mcdId,mcdToken,dwId):
36 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
37 | 	table_mcon_dict={}
38 | 	next_token=None
39 | 	while True:
40 | 		response = client(getTableQuery(dwId=dwId,after=next_token)).get_tables
41 | 		for table in response.edges:
42 | 			if len(table.node.object_properties) > 0:
43 | 				temp_dict={}
44 | 				# temp_dict["table_name"] = table.node.full_table_id
45 | 				temp_dict["mcon"] = table.node.mcon
46 | 				temp_dict["tags"] = []
47 | 				for tag in table.node.object_properties:
48 | 					prop_dict={}
49 | 					prop_dict["property_name"] = tag["property_name"]
50 | 					prop_dict["property_value"] = tag["property_value"]
51 | 					temp_dict["tags"].append(prop_dict)
52 | 				table_mcon_dict[table.node.full_table_id] = temp_dict
53 | 		if response.page_info.has_next_page:
54 | 			next_token = response.page_info.end_cursor
55 | 		else:
56 | 			break
57 | 	return table_mcon_dict
58 | 
59 | def bulkExportTagsToCSV(mcdId,mcdToken,csvFileName,mconDict):
60 | 	with open(csvFileName,"w") as tags_to_export:
61 | 		writer=csv.writer(tags_to_export)
62 | 		writer.writerow(["full_table_id","tag_key","tag_value"])
63 | 		for table_name in mconDict:
64 | 			for tag in mconDict[table_name]["tags"]:
65 | 				writer.writerow([table_name,tag["property_name"],tag["property_value"]])
66 | 
67 | if __name__ == '__main__':
68 | 	#-------------------INPUT VARIABLES---------------------
69 | 	mcd_id = input("MCD ID: ")
70 | 	mcd_token = input("MCD Token: ")
71 | 	dw_id = input("DW ID: ")
72 | 	csv_file = input("CSV Export Filename: ")
73 | 	#-------------------------------------------------------
74 | 	if dw_id and csv_file:
75 | 		mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
76 | 		bulkExportTagsToCSV(mcd_id,mcd_token,csv_file,mcon_dict)
77 | 	elif csv_file and not dw_id:
78 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
79 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
80 | 		bulkExportTagsToCSV(mcd_id,mcd_token,csv_file,mcon_dict)
81 | 	elif not csv_file:
82 | 		print("CSV Export Filename Required.")
83 | 


--------------------------------------------------------------------------------
/admin/bulk_tag_importer.py:
--------------------------------------------------------------------------------
  1 | #Instructions:
  2 | #1. Create a CSV with 3 columns in the following order: full_table_id, tag key, tag value
  3 | #	full_table_is must be lowercase in the format database:schema.table
  4 | #2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  5 | #3. Input the Data Warehouse ID in which the tables to import tags exist (will check and ignore tables in other warehouses)
  6 | 	#Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from. You'll need to rerun the script after this.
  7 | #4. Input the name of the CSV with the tags
  8 | #Note: If you have a list of tags for tables in multiple warehouses, run again for each data warehouse ID
  9 | 
 10 | from pycarlo.core import Client, Query, Mutation, Session
 11 | import csv
 12 | import json
 13 | from typing import Optional
 14 | 
 15 | def getDefaultWarehouse(mcdId,mcdToken):
 16 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 17 | 	query=Query()
 18 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 19 | 	warehouses=client(query).get_user.account.warehouses
 20 | 	if len(warehouses) == 1:
 21 | 		return warehouses[0].uuid
 22 | 	elif len(warehouses) > 1:
 23 | 		for val in warehouses:
 24 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 25 | 		print("Error: More than one warehouse, please re-run with UUID value")
 26 | 		quit()
 27 | 
 28 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 29 |     query = Query()
 30 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 31 |     get_tables.edges.node.__fields__("full_table_id","mcon")
 32 |     get_tables.page_info.__fields__(end_cursor=True)
 33 |     get_tables.page_info.__fields__("has_next_page")
 34 |     return query
 35 | 
 36 | def getMcons(mcdId,mcdToken,dwId):
 37 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 38 | 	table_mcon_dict={}
 39 | 	next_token=None
 40 | 	while True:
 41 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
 42 | 		print(response)
 43 | 		for table in response.edges:
 44 | 			table_mcon_dict[table.node.full_table_id.lower()] = table.node.mcon
 45 | 		if response.page_info.has_next_page:
 46 | 			next_token = response.page_info.end_cursor
 47 | 		else:
 48 | 			break
 49 | 	return table_mcon_dict
 50 | 
 51 | def bulkImportTagsFromCSV(mcdId,mcdToken,csvFileName, mconDict):
 52 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 53 | 	tags_list=[]
 54 | 	bulk_tag_query = """
 55 | 		mutation bulkCreateOrUpdateObjectProperties($inputObjectProperties:[InputObjectProperty]!) {
 56 |   			bulkCreateOrUpdateObjectProperties(inputObjectProperties:$inputObjectProperties) {
 57 |     			objectProperties {
 58 |       				mconId
 59 |     			}
 60 |   			}
 61 | 		}
 62 | 		"""
 63 | 	with open(csvFileName,"r") as tags_to_import:
 64 | 		tags=csv.reader(tags_to_import, delimiter=",")
 65 | 		total_tags=0
 66 | 		imported_tag_counter = 0
 67 | 		incremental_tags = 0
 68 | 		for row in tags:
 69 | 			total_tags += 1
 70 | 			if row[0].lower() not in mconDict.keys():
 71 | 				print("check failed: " + row[0].lower())
 72 | 				continue
 73 | 			if mconDict[row[0].lower()]:
 74 | 				print("check succeeded: " + row[0].lower())
 75 | 				temp_obj=dict(mconId=mconDict[row[0].lower()],propertyName=row[1],propertyValue=row[2])
 76 | 				print(temp_obj)
 77 | 				tags_list.append(temp_obj)
 78 | 				imported_tag_counter += 1
 79 | 				incremental_tags += 1
 80 | 			if incremental_tags == 99:
 81 | 				mutation=Mutation()
 82 | 				print(client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list)))
 83 | 				tags_list.clear()
 84 | 				incremental_tags = 0
 85 | 		if incremental_tags > 0:
 86 | 			mutation=Mutation()
 87 | 			print(client(bulk_tag_query, variables=dict(inputObjectProperties=tags_list)))
 88 | 	print("Successfully Imported " + str(imported_tag_counter) + " Tags")
 89 | 
 90 | if __name__ == '__main__':
 91 | 	#-------------------INPUT VARIABLES---------------------
 92 | 	mcd_id = input("MCD ID: ")
 93 | 	mcd_token = input("MCD Token: ")
 94 | 	dw_id = input("DW ID: ")
 95 | 	csv_file = input("CSV Filename: ")
 96 | 	#-------------------------------------------------------
 97 | 	if dw_id and csv_file:
 98 | 		mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
 99 | 		bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
100 | 	elif csv_file and not dw_id:
101 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
102 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
103 | 		bulkImportTagsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
104 | 


--------------------------------------------------------------------------------
/admin/field_description_importer.py:
--------------------------------------------------------------------------------
  1 | ########
  2 | #  WARNING: For every line in the CSV file, this script makes one API request.  Typically, the API limit per day is 10k.
  3 | #    If you are updating thousands of field descriptions, please consider spreading the effort across multiple days, or
  4 | #    you can request a temporary increase in your API request limit.
  5 | # Instructions:
  6 | # 1. Create a CSV with 3 columns in the following order: full_table_id, field name, desired description
  7 | #	full_table_id must be lowercase in the format database:schema.table
  8 | # 2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  9 | # 3. Input the Data Warehouse ID in which the tables to import descriptions exist (will check and ignore tables in other warehouses)
 10 | #	Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from.  You'll need to rerun the script after this.
 11 | # 4. Input the name of the CSV with the descriptions
 12 | #   Note: If you have a list of descriptions for table fields in multiple warehouses, run again for each data warehouse ID
 13 | ########
 14 | 
 15 | from pycarlo.core import Client, Query, Mutation, Session
 16 | import csv
 17 | import json
 18 | from typing import Optional
 19 | 
 20 | 
 21 | def getDefaultWarehouse(mcdId,mcdToken):
 22 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 23 | 	query=Query()
 24 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 25 | 	warehouses=client(query).get_user.account.warehouses
 26 | 	if len(warehouses) == 1:
 27 | 		return warehouses[0].uuid
 28 | 	elif len(warehouses) > 1:
 29 | 		for val in warehouses:
 30 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 31 | 		print("Error: More than one warehouse, please re-run with UUID value")
 32 | 		quit()
 33 | 
 34 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 35 | 	query = Query()
 36 | 	get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 37 | 	get_tables.edges.node.__fields__("full_table_id","mcon")
 38 | 	get_tables.page_info.__fields__(end_cursor=True)
 39 | 	get_tables.page_info.__fields__("has_next_page")
 40 | 	return query
 41 | 
 42 | def getMcons(mcdId,mcdToken,dwId):
 43 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 44 | 	table_mcon_dict={}
 45 | 	next_token=None
 46 | 	while True:
 47 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
 48 | 		for table in response.edges:
 49 | 			table_mcon_dict[table.node.full_table_id.lower()] = table.node.mcon
 50 | 		if response.page_info.has_next_page:
 51 | 			next_token = response.page_info.end_cursor
 52 | 		else:
 53 | 			break
 54 | 	return table_mcon_dict
 55 | 
 56 | def importDescriptionsFromCSV(mcdId,mcdToken,csvFileName, mconDict):
 57 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 58 | 	field_description_update_query = """
 59 | 		mutation createOrUpdateCatalogObjectMetadata($mcon: String!, $description: String!) {
 60 | 			createOrUpdateCatalogObjectMetadata(mcon: $mcon, description: $description) {
 61 | 			catalogObjectMetadata {
 62 | 				mcon
 63 | 			}
 64 | 		  }
 65 | 		}
 66 | 		"""
 67 | 
 68 | 	with open(csvFileName,"r") as field_descriptions_to_import:
 69 | 		descriptions=csv.reader(field_descriptions_to_import, delimiter=",")
 70 | 		total_desc=0
 71 | 		imported_desc_counter = 0
 72 | 		for row in descriptions:
 73 | 			total_desc += 1
 74 | 			if row[0].lower() not in mconDict.keys():
 75 | 				print("check failed: " + row[0].lower())
 76 | 				continue
 77 | 			if mconDict[row[0].lower()]:
 78 | 				print("check succeeded: " + row[0].lower())
 79 | 				if "++view++" in mconDict[row[0].lower()]:
 80 | 					field_mcon = mconDict[row[0].lower()].replace("++view++", "++field++") + "+++" + row[1].lower()
 81 | 				else:
 82 | 					field_mcon = mconDict[row[0].lower()].replace("++table++", "++field++") + "+++" + row[1].lower()
 83 | 
 84 | 				temp_obj=dict(mcon=field_mcon, description=row[2])
 85 | 
 86 | 				mutation = Mutation()
 87 | 				print(client(field_description_update_query, variables=temp_obj))
 88 | 
 89 | 				imported_desc_counter += 1
 90 | 
 91 | 	print("Successfully Imported " + str(imported_desc_counter) + " of " + str(total_desc) + " Field Descriptions")
 92 | 
 93 | if __name__ == '__main__':
 94 | 	print('''
 95 | 	WARNING: For every line in the CSV file, this script makes one API request.
 96 | 	Typically, the API limit per day is 10k.  If you are updating thousands of field
 97 | 	descriptions, please consider spreading the effort across multiple days, or you
 98 | 	can request a temporary increase in your API request limit.
 99 | 	''')
100 | 	ack = input("Proceed? (y/n) ")
101 | 	if ack.lower() == 'y':
102 | 		#-------------------INPUT VARIABLES---------------------
103 | 		mcd_id = input("MCD ID: ")
104 | 		mcd_token = input("MCD Token: ")
105 | 		dw_id = input("DW ID: ")
106 | 		csv_file = input("CSV Filename: ")
107 | 		#-------------------------------------------------------
108 | 		if dw_id and csv_file:
109 | 			mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
110 | 			importDescriptionsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
111 | 		elif csv_file and not dw_id:
112 | 			warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
113 | 			mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
114 | 			importDescriptionsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
115 | 	else:
116 | 		print("Do not proceed acknowledged.  Exiting.")
117 | 		quit()


--------------------------------------------------------------------------------
/admin/get_audit_log_by_day.py:
--------------------------------------------------------------------------------
 1 | from pycarlo.core import Client, Session
 2 | from datetime import datetime
 3 | import json
 4 | 
 5 | 
 6 | class Log:
 7 |     def __init__(self, client, output_path) -> None:
 8 |         self.client = client
 9 |         self.output_path = output_path
10 |         self.responses = {}
11 |         self.set_start_variables()
12 |     
13 |     def set_start_variables(self):
14 |         self.has_next_page = True
15 |         self.end_cursor = ''
16 |         self.variables = ''
17 |         self.query_executions = 0
18 | 
19 |     def runQuery(self, query, api_signature):
20 |         if self.query_executions == 0:
21 |             self.responses[api_signature] = []
22 |         response = self.client(query)
23 |         self.responses[api_signature].extend(response[api_signature]['records'])
24 |         self.has_next_page = response[api_signature]['page_info']['has_next_page']
25 |         self.end_cursor = response[api_signature]['page_info']['end_cursor']
26 |         self.query_executions += 1
27 |         
28 | 
29 |     def getAccountAuditLogs(self, start_time):
30 |         while self.has_next_page:
31 |             
32 |             if self.query_executions == 0:
33 |                 self.variables = 'startTime: "{0}"'.format(datetime.isoformat(start_time))
34 |             else:
35 |                 self.variables = 'startTime: "{0}" after: "{1}"'.format(datetime.isoformat(start_time), self.end_cursor)
36 |             
37 |             query = '''
38 |                     query GetAccountAuditLogs {
39 |                     getAccountAuditLogs(''' + self.variables + ''') {
40 |                         pageInfo {
41 |                         endCursor
42 |                         hasNextPage
43 |                         hasPreviousPage
44 |                         startCursor
45 |                         }
46 |                         records {
47 |                         accountName
48 |                         accountUuid
49 |                         apiCallReferences
50 |                         apiCallSource
51 |                         apiIsQuery
52 |                         apiName
53 |                         clientIp
54 |                         email
55 |                         eventType
56 |                         firstName
57 |                         lastName
58 |                         timestamp
59 |                         url
60 |                         }
61 |                     }
62 |                     }'''
63 |             
64 |             self.runQuery(query, 'get_account_audit_logs')
65 |         
66 |         self.set_start_variables()
67 | 
68 |     def write_logs(self, start_time):
69 |         file_name = 'audit_logs_'  + start_time.strftime('%Y_%m_%d_%H%M%S') + '.json'
70 |         with open(self.output_path + file_name, 'w') as outfile:
71 |             json.dump(self.responses, outfile)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     #-------------------INPUT VARIABLES---------------------
76 |     mcdId = '{Insert key here}'
77 |     mcdToken = '{Insert token here}'
78 |     output_path = '{Insert path here}'
79 |     # Select date to export
80 |     start_time = datetime(2024,8,6,0,0,0)
81 | 	#-------------------------------------------------------
82 |     print("Creating session and getting audit log")
83 |     client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
84 |     
85 |     
86 |     
87 |     audit_log = Log(client, output_path)
88 |     audit_log.getAccountAuditLogs(start_time)
89 | 
90 |     audit_log.write_logs(start_time)
91 |     print("Writing logs complete")
92 | 


--------------------------------------------------------------------------------
/admin/key_asset_tagger.py:
--------------------------------------------------------------------------------
 1 | #Change line 11 to filter for however many tables they have
 2 | 
 3 | from pycarlo.core import Client, Query, Mutation, Session
 4 | import requests
 5 | import csv
 6 | 
 7 | mcd_profile="mc_prod"
 8 | client = Client(session=Session(mcd_profile=mcd_profile))
 9 | query1=Query()
10 | query2=Query()
11 | query1.get_tables(first=3000).edges.node.__fields__("mcon","full_table_id")
12 | query2.get_report_url(insight_name="key_assets",report_name="key_assets.csv").__fields__('url')
13 | table_list=client(query1).get_tables.edges
14 | report_url=client(query2).get_report_url.url
15 | r = requests.get(report_url)
16 | key_assets = r.content.decode('utf-8')
17 | reader = csv.reader(key_assets.splitlines(),delimiter=",")
18 | key_asset_list = list(reader)
19 | table_mcon_object={}
20 | 
21 | for val in table_list:
22 | 	table_mcon_object[val.node.full_table_id] = val.node.mcon
23 | 
24 | count=1
25 | for row in key_asset_list:
26 | 	table_id = str(row[1])
27 | 	if table_id == "FULL_TABLE_ID":
28 | 		continue
29 | 	key_asset_score = str(round(float(row[7]),1))
30 | 	if table_id in table_mcon_object.keys():
31 | 		mcon_id = str(table_mcon_object[table_id])
32 | 	else:
33 | 		continue
34 | 
35 | 	print(count, mcon_id, key_asset_score)
36 | 	mutation=Mutation()
37 | 	mutation.create_or_update_object_property(mcon_id=mcon_id,property_name="Key Asset Score",property_value=key_asset_score).object_property.__fields__('id')
38 | 	print(client(mutation).create_or_update_object_property.object_property.id)
39 | 	count += 1
40 | 


--------------------------------------------------------------------------------
/admin/mute_non_domain_tables.py:
--------------------------------------------------------------------------------
  1 | #Instructions:
  2 | #1. Run this script locally, input API Key and Token when Prompted
  3 | #2. The script will print out the count of tables from each warehouse connection that are not in a domain (to be muted), in a domain, and in a domain but muted (to be unmuted)
  4 | #3. The script will prompt you to confirm the counts of tables to be muted/unmuted per warehouse connection within Monte Carlo (Y/N)
  5 | #4. Once you pass a Y response, the muting of those tables will begin
  6 | 
  7 | from pycarlo.core import Client, Query, Mutation, Session
  8 | import csv
  9 | import json
 10 | from typing import Optional
 11 | 
 12 | def getAllWarehouses(mcdId,mcdToken):
 13 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 14 | 	query=Query()
 15 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 16 | 	warehouses=client(query).get_user.account.warehouses
 17 | 	warehouse_list=[]
 18 | 	if len(warehouses) > 0:
 19 | 		for val in warehouses:
 20 | 			warehouse_list.append(val.uuid)
 21 | 	else:
 22 | 		print("Error: no warehouses connected")
 23 | 	return warehouse_list
 24 | 
 25 | def getAllDomains(mcdId,mcdToken):
 26 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 27 | 	query=Query()
 28 | 	get_all_domains = query.get_all_domains().__fields__("name","uuid","assignments")
 29 | 	domains=client(query).get_all_domains
 30 | 	domain_list = []
 31 | 	for domain in domains:
 32 | 		domain_list.append(domain["uuid"])
 33 | 	return domain_list
 34 | 
 35 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 36 | 	query = Query()
 37 | 	get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 38 | 	get_tables.edges.node.__fields__("full_table_id","mcon","is_muted")
 39 | 	get_tables.page_info.__fields__(end_cursor=True)
 40 | 	get_tables.page_info.__fields__("has_next_page")
 41 | 	return query
 42 | 
 43 | def get_tables_for_domain_query(domainId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 44 | 	query = Query()
 45 | 	get_tables = query.get_tables(first=first, is_deleted=False, domain_id=domainId, **(dict(after=after) if after else {}))
 46 | 	get_tables.edges.node.__fields__("full_table_id","mcon","is_muted")
 47 | 	get_tables.edges.node.warehouse.__fields__("uuid")
 48 | 	get_tables.page_info.__fields__(end_cursor=True)
 49 | 	get_tables.page_info.__fields__("has_next_page")
 50 | 	return query
 51 | 
 52 | def getMcons(mcdId,mcdToken,warehouses,domains):
 53 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 54 | 	table_mcon_dict={}
 55 | 	domain_mcon_dict={}
 56 | 	tables_not_in_domain={}
 57 | 	tables_to_unmute={}
 58 | 	for warehouse in warehouses:
 59 | 		print("Warehouse check: " + str(warehouse))
 60 | 		table_mcon_dict[warehouse] = {}
 61 | 		domain_mcon_dict[warehouse] = {}
 62 | 		tables_not_in_domain[warehouse] = {}
 63 | 		tables_to_unmute[warehouse] = {}
 64 | 		next_token=None
 65 | 		while True:
 66 | 			response = client(get_table_query(dwId=warehouse,after=next_token)).get_tables
 67 | 			for table in response.edges:
 68 | 				if table.node.is_muted == False:
 69 | 					table_mcon_dict[warehouse][table.node.full_table_id] = table.node.mcon
 70 | 					tables_not_in_domain[warehouse][table.node.full_table_id] = table.node.mcon
 71 | 			if response.page_info.has_next_page:
 72 | 				next_token = response.page_info.end_cursor
 73 | 			else:
 74 | 				break
 75 | 	for domain in domains:
 76 | 		print("Domain check: " + str(domain))
 77 | 		next_token=None
 78 | 		while True:
 79 | 			response = client(get_tables_for_domain_query(domainId=domain,after=next_token)).get_tables
 80 | 			if len(response.edges) > 100:
 81 | 				print(domain)
 82 | 			for table in response.edges:
 83 | 				warehouse = table.node.warehouse.uuid
 84 | 				if table.node.is_muted == False:
 85 | 					domain_mcon_dict[warehouse][table.node.full_table_id] = table.node.mcon
 86 | 				else:
 87 | 					#get list of muted tables within Domain to unmute
 88 | 					tables_to_unmute[warehouse][table.node.full_table_id] = table.node.mcon
 89 | 			if response.page_info.has_next_page:
 90 | 				next_token = response.page_info.end_cursor
 91 | 			else:
 92 | 				break
 93 | 
 94 | 	# identify tables not in a domain
 95 | 	for warehouse in warehouses:
 96 | 		for table_name in table_mcon_dict[warehouse]:
 97 | 			if table_name in domain_mcon_dict[warehouse].keys():
 98 | 				del tables_not_in_domain[warehouse][table_name]
 99 | 			else:
100 | 				continue
101 | 	for warehouse in warehouses:
102 | 		print("For warehouse: " + str(warehouse))
103 | 		print("forMuting: "+str(len(tables_not_in_domain[warehouse])))
104 | 		print("inDomain: "+str(len(domain_mcon_dict[warehouse])))
105 | 		print("Total: "+str(len(table_mcon_dict[warehouse])))
106 | 		print("forUnMuting: "+str(len(tables_to_unmute[warehouse])))
107 | 	return [tables_not_in_domain,tables_to_unmute]
108 | 
109 | 
110 | def bulkMuteTablesByDomain(mcdId,mcdToken,mconDict):
111 | 	tables_not_in_domain = mconDict[0]
112 | 	tables_to_unmute = mconDict[1]
113 | 	bulkMuteTables(mcdId,mcdToken,tables_not_in_domain,True)
114 | 	counter=0
115 | 	for warehouse in tables_to_unmute:
116 | 		counter += len(tables_to_unmute[warehouse])
117 | 	if counter > 0:
118 | 		bulkMuteTables(mcdId,mcdToken,tables_to_unmute,False)
119 | 
120 | def bulkMuteTables(mcdId,mcdToken,mconDict,muteBoolean):
121 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
122 | 	temp_list=[]
123 | 	for warehouse in mconDict:
124 | 		counter=0
125 | 		for item in mconDict[warehouse]:
126 | 			temp_payload={}
127 | 			temp_payload["mcon"]=mconDict[warehouse][item]
128 | 			temp_payload["fullTableId"]=item
129 | 			temp_payload["dwId"]=warehouse
130 | 			temp_list.append(temp_payload)
131 | 			counter+=1
132 | 			if len(temp_list) > 9:
133 | 				mutation=Mutation()
134 | 				mutation.toggle_mute_tables(input=dict(mute=muteBoolean,tables=temp_list)).muted.__fields__("id")
135 | 				print(client(mutation).toggle_mute_tables)
136 | 				temp_list=[]
137 | 			if counter == len(mconDict[warehouse]):
138 | 				mutation=Mutation()
139 | 				mutation.toggle_mute_tables(input=dict(mute=muteBoolean,tables=temp_list)).muted.__fields__("id")
140 | 				print(client(mutation).toggle_mute_tables)
141 | 				break
142 | 			else:
143 | 				continue
144 | 		print("Tables muted("+str(muteBoolean)+") for " + str(warehouse) + ": " + str(counter))
145 | 
146 | if __name__ == '__main__':
147 | 	#-------------------INPUT VARIABLES---------------------
148 | 	mcd_id = input("MCD ID: ")
149 | 	mcd_token = input("MCD Token: ")
150 | 	#-------------------------------------------------------
151 | 	warehouses = getAllWarehouses(mcd_id,mcd_token)
152 | 	domains = getAllDomains(mcd_id,mcd_token)
153 | 	mcon_dict = getMcons(mcd_id,mcd_token,warehouses,domains)
154 | 	mute = input("Mute? (Y/N): ")
155 | 	if mute == "Y":
156 | 		bulkMuteTablesByDomain(mcd_id,mcd_token,mcon_dict)
157 | 


--------------------------------------------------------------------------------
/admin/requirements.txt:
--------------------------------------------------------------------------------
1 | pycarlo==0.0.8
2 | requests==2.32.0
3 | dotenv==1.0.1
4 | cryptography~=42.0.7
5 | pycryptodome~=3.20.0
6 | 


--------------------------------------------------------------------------------
/admin/table_description_importer.py:
--------------------------------------------------------------------------------
  1 | ########
  2 | # Instructions:
  3 | # 1. Create a CSV with 2 columns in the following order: full_table_id, desired description
  4 | #	full_table_id must be lowercase in the format database:schema.table
  5 | # 2. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  6 | # 3. Input the Data Warehouse ID in which the tables to import descriptions exist (will check and ignore tables in other warehouses)
  7 | #	Note: If you do not know the Data Warehouse ID, you can skip by pressing enter and the script will give you the options to choose from.  You'll need to rerun the script after this.
  8 | # 4. Input the name of the CSV with the descriptions
  9 | #   Note: If you have a list of descriptions for tables in multiple warehouses, run again for each data warehouse ID
 10 | ########
 11 | 
 12 | from pycarlo.core import Client, Query, Mutation, Session
 13 | import csv
 14 | import json
 15 | from typing import Optional
 16 | import requests
 17 | 
 18 | mcd_gql_api = "https://api.getmontecarlo.com/graphql"
 19 | 
 20 | def getDefaultWarehouse(mcdId,mcdToken):
 21 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 22 | 	query=Query()
 23 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
 24 | 	warehouses=client(query).get_user.account.warehouses
 25 | 	if len(warehouses) == 1:
 26 | 		return warehouses[0].uuid
 27 | 	elif len(warehouses) > 1:
 28 | 		for val in warehouses:
 29 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 30 | 		print("Error: More than one warehouse, please re-run with UUID value")
 31 | 		quit()
 32 | 
 33 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 34 |     query = Query()
 35 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 36 |     get_tables.edges.node.__fields__("full_table_id","mcon")
 37 |     get_tables.page_info.__fields__(end_cursor=True)
 38 |     get_tables.page_info.__fields__("has_next_page")
 39 |     return query
 40 | 
 41 | def getMcons(mcdId,mcdToken,dwId):
 42 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 43 | 	table_mcon_dict={}
 44 | 	next_token=None
 45 | 	while True:
 46 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
 47 | 		for table in response.edges:
 48 | 			table_mcon_dict[table.node.full_table_id.lower()] = table.node.mcon
 49 | 		if response.page_info.has_next_page:
 50 | 			next_token = response.page_info.end_cursor
 51 | 		else:
 52 | 			break
 53 | 	return table_mcon_dict
 54 | 
 55 | def getHeaders(mcdId, mcdToken):
 56 | 	return {
 57 | 		'Content-Type': 'application/json',
 58 | 		'x-mcd-id': mcdId,
 59 | 		'x-mcd-token': mcdToken
 60 | 	}
 61 | 
 62 | def getPayload(query, variables):
 63 | 	data = {
 64 | 		'query': query,
 65 | 		'variables': variables
 66 | 	}
 67 | 	payload = json.dumps(data).replace("\\\\", "\\")
 68 | 
 69 | 	return payload
 70 | 
 71 | def importDescriptionsFromCSV(mcdId,mcdToken,csvFileName, mconDict):
 72 | 	description_update_query = """
 73 | 		mutation createOrUpdateCatalogObjectMetadata($mcon: String!, $description: String!) {
 74 |             createOrUpdateCatalogObjectMetadata(mcon: $mcon, description: $description) {
 75 |                 catalogObjectMetadata {
 76 |                     mcon
 77 |                 }
 78 |               }
 79 |             }
 80 | 		"""
 81 | 
 82 | 	headers = getHeaders(mcdId, mcdToken)
 83 | 
 84 | 	with open(csvFileName,"r") as descriptions_to_import:
 85 | 		descriptions=csv.reader(descriptions_to_import, delimiter=",")
 86 | 		total_desc=0
 87 | 		imported_desc_counter = 0
 88 | 		for row in descriptions:
 89 | 			total_desc += 1
 90 | 			if row[0].lower() not in mconDict.keys():
 91 | 				print("check failed: " + row[0].lower())
 92 | 				continue
 93 | 			if mconDict[row[0].lower()]:
 94 | 				print("check succeeded: " + row[0].lower())
 95 | 
 96 | 				query_variables = {
 97 | 					"mcon": mconDict[row[0].lower()],
 98 | 					"description": row[1]
 99 | 				}
100 | 
101 | 				payload = getPayload(description_update_query, query_variables)
102 | 
103 | 				response = requests.post(mcd_gql_api, data=payload, headers=headers)
104 | 				print(response.text)
105 | 
106 | 				imported_desc_counter += 1
107 | 
108 | 	print("Successfully Imported " + str(imported_desc_counter) + " of " + str(total_desc) + " Table Descriptions")
109 | 
110 | if __name__ == '__main__':
111 | 	#-------------------INPUT VARIABLES---------------------
112 | 	mcd_id = input("MCD ID: ")
113 | 	mcd_token = input("MCD Token: ")
114 | 	dw_id = input("DW ID: ")
115 | 	csv_file = input("CSV Filename: ")
116 | 	#-------------------------------------------------------
117 | 	if dw_id and csv_file:
118 | 		mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
119 | 		importDescriptionsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
120 | 	elif csv_file and not dw_id:
121 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
122 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
123 | 		importDescriptionsFromCSV(mcd_id,mcd_token,csv_file,mcon_dict)
124 | 


--------------------------------------------------------------------------------
/admin/unmute_tables.py:
--------------------------------------------------------------------------------
  1 | #####
  2 | # About:
  3 | #   This script is indended to be used to UNMUTE ALL currently muted tables within a specified warehouse
  4 | # Instructions:
  5 | #   1. Run this script, input your API Key ID, Token (generated in Settings -> API within MC UI)
  6 | #   2. If applicable, copy/paste the UUID of the warehouse you would like to target to unmute tables
  7 | #      Note: the script must be run for one warehouse at a time, run multiple times for multiple warehouses
  8 | #   3. Review the list of tables to be unmuted in the .csv file provided by the prompt
  9 | #      RECOMMENDATION:  Keep this CSV file as a means to audit which tables were unmuted by this script
 10 | #   4. Proceed to unmute the list of tables
 11 | #####
 12 | 
 13 | from pycarlo.core import Client, Query, Session
 14 | import csv
 15 | import json
 16 | from typing import Optional
 17 | from datetime import datetime
 18 | 
 19 | def getWarehouses(mcdId,mcdToken):
 20 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 21 | 	warehousesQuery = """
 22 | 		query getUser {
 23 | 			getUser {
 24 | 				account {
 25 | 					warehouses {
 26 | 						name
 27 | 						connectionType
 28 | 						uuid
 29 | 					}
 30 | 				}
 31 | 			}
 32 | 			}
 33 | 		"""
 34 | 
 35 | 	warehouses=client(warehousesQuery).get_user.account.warehouses
 36 | 
 37 | 	if len(warehouses) == 1:
 38 | 		print(f"Found one warehouse - Name: {warehouses[0].name} - UUID: {warehouses[0].uuid}")
 39 | 		return warehouses[0].uuid
 40 | 	elif len(warehouses) > 1:
 41 | 		print("Found multiple warehouses... ")
 42 | 		for val in warehouses:
 43 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
 44 | 		dwId = input("Please copy/paste the full UUID of the warehouse you would like to target: ")
 45 | 		return dwId
 46 | 
 47 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
 48 | 	query = Query()
 49 | 	get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
 50 | 	get_tables.edges.node.__fields__("full_table_id","mcon","is_muted")
 51 | 	get_tables.page_info.__fields__(end_cursor=True)
 52 | 	get_tables.page_info.__fields__("has_next_page")
 53 | 	return query
 54 | 
 55 | def getMcons(mcdId,mcdToken,dwId):
 56 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
 57 | 	table_mcon_dict={}
 58 | 	next_token=None
 59 | 	while True:
 60 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
 61 | 		for table in response.edges:
 62 | 			if table.node.is_muted:
 63 | 				table_mcon_dict[table.node.full_table_id.lower()] = table.node.mcon
 64 | 		if response.page_info.has_next_page:
 65 | 			next_token = response.page_info.end_cursor
 66 | 		else:
 67 | 			break
 68 | 	return table_mcon_dict
 69 | 
 70 | def get_date():
 71 | 	return datetime.today().strftime('%Y-%m-%d_%H:%M:%S')
 72 | 
 73 | def userReview(mcon_dict, dw_id):
 74 | 	if not mcon_dict:
 75 | 		print(f"No muted tables found in selected warehouse id {dw_id}.  Exiting")
 76 | 		quit()
 77 | 
 78 | 	fname = f"tables_to_mute_{get_date()}.csv"
 79 | 	header = ['fullTableName', 'MCON']
 80 | 	with open(fname, 'w') as csvfile:
 81 | 		writer = csv.writer(csvfile)
 82 | 		writer.writerow(header)
 83 | 		for table, mcon in mcon_dict.items():
 84 | 			writer.writerow([table, mcon])
 85 | 	userReview = input(f'Tables to unmute written to file {fname} for your review. OK to proceed? (y/n) ').lower()
 86 | 
 87 | 	if userReview == 'y':
 88 | 		return
 89 | 	else:
 90 | 		print("Acknowledged do not proceed. Exiting.")
 91 | 		quit()
 92 | 
 93 | def generateVarsInput(mcon_list):
 94 | 	vars_input = {
 95 | 			"input": {
 96 | 				"tables": mcon_list,
 97 | 				"mute": False
 98 | 				}
 99 | 			}
100 | 	return vars_input
101 | 
102 | def unmute_tables(mcdId,mcdToken,mconDict):
103 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
104 | 	mcon_list=[]
105 | 	unmute_tables_query = """
106 | 		mutation toggleMuteTables($input: ToggleMuteTablesInput!) {
107 | 			toggleMuteTables(input: $input) {
108 | 				muted {
109 | 				mcon
110 | 				isMuted
111 | 				}
112 | 			}
113 | 			}
114 | 		"""
115 | 
116 | 	unmuted_table_counter = 0
117 | 	incremental_tables = 0
118 | 	for mcon in mconDict.values():
119 | 		temp_obj=dict(mcon=mcon)
120 | 		print(temp_obj)
121 | 		mcon_list.append(temp_obj)
122 | 		unmuted_table_counter += 1
123 | 		incremental_tables += 1
124 | 		if incremental_tables == 99:
125 | 			vars_input = generateVarsInput(mcon_list)
126 | 			print(client(unmute_tables_query, variables=vars_input))
127 | 			mcon_list.clear()
128 | 			incremental_tables = 0
129 | 	if incremental_tables > 0:
130 | 		vars_input = generateVarsInput(mcon_list)
131 | 		print(client(unmute_tables_query, variables=vars_input))
132 | 	print("Successfully Unmuted " + str(unmuted_table_counter) + " Tables")
133 | 
134 | if __name__ == '__main__':
135 | 	#-------------------INPUT VARIABLES---------------------
136 | 	mcd_id = input("MCD ID: ")
137 | 	mcd_token = input("MCD Token: ")
138 | 	#-------------------------------------------------------
139 | 	print("Getting warehouses...")
140 | 	dw_id = getWarehouses(mcd_id, mcd_token)
141 | 	print("Getting tables...")
142 | 	mcon_dict = getMcons(mcd_id,mcd_token,dw_id)
143 | 	userReview(mcon_dict, dw_id)
144 | 	unmute_tables(mcd_id,mcd_token,mcon_dict)


--------------------------------------------------------------------------------
/admin/user_role_exporter.py:
--------------------------------------------------------------------------------
 1 | from pycarlo.core import Client, Query, Mutation, Session
 2 | import csv
 3 | 
 4 | def userRoleExporter(file_name):
 5 | 	with open(file_name, "w") as roles:
 6 | 		csv_writer=csv.writer(roles)
 7 | 		first_row=["Email","Role"]
 8 | 		csv_writer.writerow(first_row)
 9 | 
10 | 		user_query = '''
11 | 		query {
12 | 		  getUsersInAccount(first: 1000) {
13 | 		    pageInfo {
14 | 		      hasNextPage
15 | 		      endCursor
16 | 		    }
17 | 		    edges {
18 | 		      node {
19 | 		        email
20 | 		        auth {
21 | 		          groups
22 | 		        }
23 | 		      }
24 | 		    }
25 | 		  }
26 | 		}
27 | 		'''
28 | 		query=Query()
29 | 		response= client(user_query).get_users_in_account.edges
30 | 
31 | 		for user in response:
32 | 			print(user.node.email)
33 | 			csv_writer.writerow([user.node.email,str(user.node.auth.groups)])
34 | 
35 | if __name__ == '__main__':
36 | 	mcd_id = input("MCD ID: ")
37 | 	mcd_token = input("MCD Token: ")
38 | 	csv_name = input("CSV Name: ")
39 | 	client = Client(session=Session(mcd_id=mcd_id,mcd_token=mcd_token))
40 | 	userRoleExporter(csv_name)
41 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/app/__main__.py:
--------------------------------------------------------------------------------
1 | from app.app import MCSDKApp
2 | 
3 | if __name__ == "__main__":
4 |     app = MCSDKApp()
5 |     app.run()
6 | 


--------------------------------------------------------------------------------
/app/app.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from textual.app import App
  4 | from textual.binding import Binding
  5 | from app.readme import ReadmeScreen
  6 | from app.categories import CategoriesScreen
  7 | from app.themes import ThemesScreen
  8 | from pathlib import Path
  9 | from textual.theme import BUILTIN_THEMES
 10 | import json
 11 | 
 12 | # Define the path for the theme configuration file
 13 | CONFIG_PATH = Path.home() / ".mc_sdk_app" / "config.json"
 14 | 
 15 | # Ensure directory exists
 16 | CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
 17 | 
 18 | 
 19 | class MCSDKApp(App):
 20 |     """The demo app defines the modes and sets a few bindings."""
 21 | 
 22 |     def __init__(self, **kwargs):
 23 |         super().__init__(**kwargs)
 24 | 
 25 |     CSS = """
 26 |     .column {          
 27 |         align: center top;
 28 |         &>*{ max-width: 100; }        
 29 |     }
 30 |     Screen .-maximized {
 31 |         margin: 1 2;        
 32 |         max-width: 100%;
 33 |         &.column { margin: 1 2; padding: 1 2; }
 34 |         &.column > * {        
 35 |             max-width: 100%;           
 36 |         }        
 37 |     }
 38 |     """
 39 | 
 40 |     ENABLE_COMMAND_PALETTE = False
 41 | 
 42 |     MODES = {
 43 |         "readme": ReadmeScreen,
 44 |         "categories": CategoriesScreen,
 45 |         "themes": ThemesScreen
 46 |     }
 47 |     DEFAULT_MODE = "categories"
 48 |     BINDINGS = [
 49 |         Binding(
 50 |             "r",
 51 |             "app.switch_mode('readme')",
 52 |             "ReadMe",
 53 |             tooltip="Show the readme screen",
 54 |         ),
 55 |         Binding(
 56 |             "c",
 57 |             "app.switch_mode('categories')",
 58 |             "Categories",
 59 |             tooltip="Show utilities categories",
 60 |         ),
 61 |         Binding(
 62 |             "t",
 63 |             "app.switch_mode('themes')",
 64 |             "Themes",
 65 |             tooltip="Change app theme",
 66 |         ),
 67 |         Binding(
 68 |             "ctrl+s",
 69 |             "app.screenshot",
 70 |             "Screenshot",
 71 |             tooltip="Save an SVG 'screenshot' of the current screen",
 72 |         )
 73 |     ]
 74 | 
 75 |     def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None:
 76 |         """Disable switching to a mode we are already on."""
 77 |         if (
 78 |             action == "switch_mode"
 79 |             and parameters
 80 |             and self.current_mode == parameters[0]
 81 |         ):
 82 |             return None
 83 |         return True
 84 | 
 85 |     def on_mount(self) -> None:
 86 |         """Set initial theme when the app is mounted."""
 87 | 
 88 |         for theme in BUILTIN_THEMES:
 89 |             if theme == self.load_theme():
 90 |                 self.app.theme = theme
 91 | 
 92 |     def load_theme(self) -> str:
 93 |         """Load theme from local file."""
 94 |         if CONFIG_PATH.exists():
 95 |             try:
 96 |                 with open(CONFIG_PATH, "r") as file:
 97 |                     config = json.load(file)
 98 |                     return config.get("theme", "dark")
 99 |             except json.JSONDecodeError:
100 |                 pass
101 |         return "dark"
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/app/categories.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from dataclasses import dataclass
  4 | from textual import events, on
  5 | from textual.app import ComposeResult
  6 | from textual.binding import Binding
  7 | from textual.containers import Center, Horizontal, ItemGrid, Vertical, VerticalScroll
  8 | from textual.widgets import Footer, Label, Markdown, Static
  9 | from app.controls import Controls
 10 | from app.header import MCHeader
 11 | 
 12 | 
 13 | @dataclass
 14 | class CategoryInfo:
 15 |     """Dataclass for storing category information."""
 16 | 
 17 |     title: str
 18 |     description: str
 19 | 
 20 | 
 21 | CATEGORIES_MD = """\
 22 | # Categories
 23 | """
 24 | 
 25 | CATEGORIES = [CategoryInfo('Admin', '\nAdmin related operations and utilities.'),
 26 |               CategoryInfo('Tables', '\nCollection of actions and utilities around tables/views.'),
 27 |               CategoryInfo('Monitors', '\nCollection of actions and utilities for MC monitors.'),
 28 |               CategoryInfo('Lineage', '\nCollection of actions and utilities around lineage.'),]
 29 | 
 30 | 
 31 | class Category(Vertical, can_focus=True, can_focus_children=False):
 32 |     """Display category information and show utilities within"""
 33 | 
 34 |     ALLOW_MAXIMIZE = True
 35 |     DEFAULT_CSS = """
 36 |     Category {
 37 |         width: 1fr;
 38 |         height: auto;      
 39 |         padding: 0 1;
 40 |         border: tall transparent;
 41 |         box-sizing: border-box;
 42 |         &:focus { 
 43 |             border: tall $text-primary;
 44 |             background: $primary 20%;
 45 |             &.link {
 46 |                 color: red !important;
 47 |             }        
 48 |         }
 49 |         #title { text-style: bold italic; width: 1fr; color: #33acff; }
 50 |         .header { height: 1; }
 51 |         .link {
 52 |             color: #0c5395;
 53 |             text-style: underline;
 54 |         }
 55 |         .description { color: $text-muted; }
 56 |         &.-hover { opacity: 1; }
 57 |     }
 58 |     """
 59 | 
 60 |     def __init__(self, category_info: CategoryInfo) -> None:
 61 |         self.category_info = category_info
 62 |         super().__init__()
 63 | 
 64 |     def compose(self) -> ComposeResult:
 65 |         info = self.category_info
 66 |         with Horizontal(classes="header"):
 67 |             yield Label(info.title, id="title")
 68 |         yield Static(info.description, classes="description")
 69 | 
 70 | 
 71 | class CategoriesScreen(Controls):
 72 |     AUTO_FOCUS = None
 73 |     CSS = """
 74 |     CategoriesScreen {        
 75 |         align-horizontal: center;                      
 76 |         ItemGrid {
 77 |             margin: 2 4;
 78 |             padding: 1 2;
 79 |             background: $boost;
 80 |             width: 1fr;
 81 |             height: auto;            
 82 |             grid-gutter: 1 1;
 83 |             grid-rows: auto;           
 84 |             keyline:thin $foreground 30%;        
 85 |         }              
 86 |         Markdown { margin: 0; padding: 0 2; max-width: 100; background: transparent; }
 87 |     }
 88 |     """
 89 | 
 90 |     BINDINGS = [
 91 |         Binding("enter", "open_category", "Open Category", tooltip="Open the category"),
 92 |     ]
 93 | 
 94 |     def __init__(
 95 |             self,
 96 |             name: str | None = None,
 97 |             id: str | None = None,
 98 |             classes: str | None = None,
 99 |     ):
100 |         super().__init__(name, id, classes)
101 |         self.category_info = None
102 |         self.categories = None
103 |         self.grid = None
104 | 
105 |     def compose(self) -> ComposeResult:
106 |         self.grid = ItemGrid(min_column_width=40)
107 |         yield MCHeader()
108 |         with VerticalScroll():
109 |             with Center():
110 |                 yield Markdown(CATEGORIES_MD)
111 |             yield self.grid  # Ensure ItemGrid is mounted first
112 |         yield Footer()
113 | 
114 |     def on_mount(self) -> None:
115 |         """Mount categories after the grid is ready and focus the first item."""
116 |         self.categories = [Category(category) for category in CATEGORIES]
117 |         self.grid.mount(*self.categories)
118 |         if self.categories:
119 |             self.categories[0].focus()
120 | 
121 |     @on(events.Enter)
122 |     @on(events.Leave)
123 |     def on_enter(self, event: events.Enter):
124 |         event.stop()
125 |         self.set_class(self.is_mouse_over, "-hover")
126 | 
127 |     def action_open_category(self) -> None:
128 |         current_focus = self.app.focused
129 |         if isinstance(current_focus, Category):  # Ensure it's a Category instance
130 |             # self.notify(f"Opening category: {current_focus.category_info.title}", severity="info")
131 |             from app.utilities import UtilitiesScreen
132 |             self.app.push_screen(UtilitiesScreen(current_focus.category_info.title))
133 | 
134 |     def action_move(self, direction: str) -> None:
135 |         """Move focus within the utility grid."""
136 |         self.action_move_grid(direction, Category)
137 | 
138 |     def action_go_readme(self) -> None:
139 |         """Return to CategoriesScreen when Escape is pressed."""
140 |         from app.readme import ReadmeScreen
141 |         self.app.push_screen(ReadmeScreen())
142 | 


--------------------------------------------------------------------------------
/app/controls.py:
--------------------------------------------------------------------------------
 1 | from textual.screen import Screen
 2 | from textual.binding import Binding
 3 | 
 4 | 
 5 | class Controls(Screen):
 6 | 
 7 | 	def __init__(self, name: str | None = None, id: str | None = None, classes: str | None = None,):
 8 | 		super().__init__(name, id, classes)
 9 | 		self.grid = None
10 | 
11 | 	BINDINGS = [
12 | 		Binding("up", "move('up')", "Move Up", priority=True),
13 | 		Binding("down", "move('down')", "Move Down", priority=True),
14 | 		Binding("left", "move('left')", "Move Left", priority=True),
15 | 		Binding("right", "move('right')", "Move Right", priority=True),
16 | 	]
17 | 
18 | 	def action_move_grid(self, direction: str, cls) -> None:
19 | 		"""Move focus between utilities in the grid based on arrow keys."""
20 | 		if not self.grid:
21 | 			return
22 | 
23 | 		focusables = list(self.grid.query(cls))  # Convert generator to list
24 | 		if not focusables:
25 | 			return
26 | 
27 | 		current_focus = self.app.focused
28 | 		if current_focus not in focusables:
29 | 			focusables[0].focus()
30 | 			return
31 | 
32 | 		current_index = focusables.index(current_focus)
33 | 		# Dynamically determine row length based on terminal width
34 | 		terminal_width = self.app.size.width  # Get terminal width
35 | 		min_column_width = 40  # Must match ItemGrid's min_column_width
36 | 		row_length = max(1, terminal_width // min_column_width)  # Ensure at least 1
37 | 
38 | 		# Calculate new focus index
39 | 		if direction == "up":
40 | 			new_index = max(0, current_index - row_length)
41 | 		elif direction == "down":
42 | 			new_index = min(len(focusables) - 1, current_index + row_length)
43 | 		elif direction == "left":
44 | 			new_index = max(0, current_index - 1)
45 | 		elif direction == "right":
46 | 			new_index = min(len(focusables) - 1, current_index + 1)
47 | 		else:
48 | 			return
49 | 
50 | 		focusables[new_index].focus()
51 | 


--------------------------------------------------------------------------------
/app/header.py:
--------------------------------------------------------------------------------
 1 | from textual.reactive import reactive
 2 | from textual.containers import Horizontal, Vertical
 3 | from textual.app import ComposeResult
 4 | from textual.widgets import Static
 5 | 
 6 | 
 7 | class MCHeader(Vertical):
 8 |     """Widget to get and display GitHub star count."""
 9 | 
10 |     DEFAULT_CSS = """
11 |     MCHeader {
12 |         dock: top;
13 |         height: 6;
14 |         border-bottom: hkey $background;
15 |         border-top: hkey $background;
16 |         layout: horizontal;
17 |         background: #0c5395;
18 |         padding: 0 0;
19 |         color: $text-warning;
20 |         #logo { align: center top; text-style: bold; color: $foreground; padding: 1 0 0 35;}
21 |         Label { text-style: bold; color: $foreground; }
22 |         LoadingIndicator { background: transparent !important; }
23 |         Digits { width: auto; margin-right: 1; }
24 |         Label { margin-right: 1; }
25 |         align: center top;
26 |         &>Horizontal { max-width: 100;} 
27 |     }
28 |     """
29 |     stars = reactive(25251, recompose=True)
30 |     forks = reactive(776, recompose=True)
31 | 
32 |     def compose(self) -> ComposeResult:
33 |         with Horizontal():
34 |             with Vertical(id="logo"):
35 |                 yield Static("┳┳┓┏┓  ┏┓┳┓┓┏┓  ┏┓┏┓┳┳┓┏┓┓ ┏┓┏┓\n"
36 |                              "┃┃┃┃   ┗┓┃┃┃┫   ┗┓┣┫┃┃┃┃┃┃ ┣ ┗┓\n"
37 |                              "┛ ┗┗┛  ┗┛┻┛┛┗┛  ┗┛┛┗┛ ┗┣┛┗┛┗┛┗┛")
38 | 
39 |     def on_mount(self) -> None:
40 |         print("")


--------------------------------------------------------------------------------
/app/readme.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from app.categories import CategoriesScreen
  4 | from app.header import MCHeader
  5 | from textual.app import ComposeResult
  6 | from textual.containers import VerticalScroll
  7 | from textual.screen import Screen
  8 | from textual.widgets import Collapsible, Footer, Markdown
  9 | from pathlib import Path
 10 | 
 11 | import re
 12 | import os
 13 | 
 14 | WHAT_IS_TEXTUAL_MD = """\
 15 | # What is MC SDK SAMPLES?
 16 | 
 17 | Set of utilities around MC operations that run in the terminal.
 18 | 
 19 | 🐍 All you need is Python!
 20 | 
 21 | """
 22 | 
 23 | 
 24 | def read_md_by_sections(file_path):
 25 |     """
 26 |     Reads a markdown file and splits it into sections based on headers.
 27 | 
 28 |     Args:
 29 |         file_path (str): The path to the markdown file.
 30 | 
 31 |     Returns:
 32 |         dict: A dictionary where keys are section headers and values are the
 33 |               corresponding section content.
 34 |     """
 35 |     sections = {}
 36 |     stack = []  # Stack to maintain header hierarchy
 37 | 
 38 |     with open(file_path, 'r', encoding='utf-8') as file:
 39 |         for line in file:
 40 |             header_match = re.match(r'^(#+)\s+(.*)$', line)
 41 |             if header_match:
 42 |                 level = len(header_match.group(1))
 43 |                 header_text = header_match.group(2).strip()
 44 | 
 45 |                 # Create a new section for the header
 46 |                 new_section = {"content": "", "subsections": {}}
 47 | 
 48 |                 # Adjust stack based on header level
 49 |                 while stack and stack[-1][0] >= level:
 50 |                     stack.pop()
 51 | 
 52 |                 if stack:
 53 |                     parent_section = stack[-1][1]["subsections"]
 54 |                     parent_section[header_text] = new_section
 55 |                 else:
 56 |                     sections[header_text] = new_section
 57 | 
 58 |                 stack.append((level, new_section))
 59 |             elif stack:
 60 |                 stack[-1][1]["content"] += line
 61 | 
 62 |     return sections
 63 | 
 64 | 
 65 | MD_SECTIONS = read_md_by_sections(os.path.join(str(Path(os.path.abspath(__file__)).parent.parent), 'README.md'))
 66 | 
 67 | 
 68 | class Content(VerticalScroll, can_focus=False):
 69 |     """Non focusable vertical scroll."""
 70 | 
 71 | 
 72 | class ReadmeScreen(Screen):
 73 |     DEFAULT_CSS = """
 74 |     ReadmeScreen {
 75 | 
 76 |         Content {
 77 |             align-horizontal: center;
 78 |             & > * {
 79 |                 max-width: 100;
 80 |             }      
 81 |             margin: 0 1;          
 82 |             overflow-y: auto;
 83 |             height: 1fr;
 84 |             scrollbar-gutter: stable;
 85 |             MarkdownFence {
 86 |                 height: auto;
 87 |                 max-height: initial;
 88 |             }
 89 |             Collapsible {
 90 |                 padding-right: 0;               
 91 |                 &.-collapsed { padding-bottom: 1; }
 92 |             }
 93 |             Markdown {
 94 |                 margin-right: 1;
 95 |                 padding-right: 1;
 96 |                 background: transparent;
 97 |             }
 98 |         }
 99 |     }
100 |     """
101 | 
102 |     def render_markdown_sections(self, sections, collapsed_level=1):
103 |         for i, (title, content) in enumerate(sections.items()):
104 |             with Collapsible(title=title, collapsed=False if i < collapsed_level else True):
105 |                 yield Markdown(content['content'])
106 |                 if 'subsections' in content:
107 |                     yield from self.render_markdown_sections(content['subsections'], collapsed_level)
108 | 
109 |     def compose(self) -> ComposeResult:
110 |         yield MCHeader()
111 |         with Content():
112 |             yield Markdown(WHAT_IS_TEXTUAL_MD)
113 |             yield from self.render_markdown_sections(MD_SECTIONS)
114 |         yield Footer()
115 | 
116 |     def action_go_back(self) -> None:
117 |         """Return to CategoriesScreen when Escape is pressed."""
118 |         if len(self.app.screen_stack) > 1:
119 |             self.app.pop_screen()
120 |         else:
121 |             self.app.push_screen(CategoriesScreen())
122 | 
123 |     def action_go_categories(self) -> None:
124 |         """Return to CategoriesScreen when Escape is pressed."""
125 |         from app.categories import CategoriesScreen
126 |         self.app.push_screen(CategoriesScreen())
127 | 


--------------------------------------------------------------------------------
/app/themes.py:
--------------------------------------------------------------------------------
  1 | from textual.screen import Screen
  2 | from textual import events, on
  3 | from textual.containers import Vertical, VerticalScroll, ItemGrid, HorizontalGroup, Center
  4 | from textual.widgets import Switch, Label, Footer
  5 | from textual.app import ComposeResult
  6 | from textual.theme import BUILTIN_THEMES
  7 | from app.header import MCHeader
  8 | 
  9 | import json
 10 | from pathlib import Path
 11 | 
 12 | # Define the path for the theme configuration file
 13 | CONFIG_PATH = Path.home() / ".mc_sdk_app" / "config.json"
 14 | 
 15 | 
 16 | class Themes(Vertical):
 17 |     """Switch themes."""
 18 | 
 19 |     ALLOW_MAXIMIZE = True
 20 |     DEFAULT_CLASSES = "column"
 21 | 
 22 |     DEFAULT_CSS = """\
 23 | Switches {    
 24 |     Label {
 25 |         padding: 1;
 26 |         &:hover {text-style:underline; }
 27 |     }
 28 | }
 29 | """
 30 | 
 31 |     def compose(self) -> ComposeResult:
 32 |         with ItemGrid(min_column_width=32):
 33 |             for theme in BUILTIN_THEMES:
 34 |                 if theme.endswith("-ansi"):
 35 |                     continue
 36 |                 with HorizontalGroup():
 37 |                     yield Switch(id=theme)
 38 |                     yield Label(theme, name=theme)
 39 | 
 40 |     @on(events.Click, "Label")
 41 |     def on_click(self, event: events.Click) -> None:
 42 |         """Make the label toggle the switch."""
 43 |         event.stop()
 44 |         if event.widget is not None:
 45 |             self.query_one(f"#{event.widget.name}", Switch).toggle()
 46 | 
 47 |     def on_mount(self):
 48 |         self.query_one(f"#{self.app.theme}", Switch).value = True
 49 | 
 50 |     def on_switch_changed(self, event: Switch.Changed) -> None:
 51 |         # Don't issue more Changed events
 52 |         if not event.value:
 53 |             self.query_one("#textual-dark", Switch).value = True
 54 |             return
 55 | 
 56 |         with self.prevent(Switch.Changed):
 57 |             # Reset all other switches
 58 |             for switch in self.query("Switch").results(Switch):
 59 |                 if switch.id != event.switch.id:
 60 |                     switch.value = False
 61 |         assert event.switch.id is not None
 62 |         theme_id = event.switch.id
 63 | 
 64 |         def switch_theme() -> None:
 65 |             """Callback to switch the theme."""
 66 |             self.app.theme = theme_id
 67 |             self.save_theme()
 68 | 
 69 |         # Call after a short delay, so we see the Switch animation
 70 |         self.set_timer(0.3, switch_theme)
 71 | 
 72 |     def save_theme(self) -> None:
 73 |         """Save theme to local file."""
 74 |         with open(CONFIG_PATH, "w") as file:
 75 |             json.dump({"theme": self.app.theme}, file, indent=4)
 76 | 
 77 | 
 78 | class ThemesScreen(Screen):
 79 |     AUTO_FOCUS = None
 80 |     CSS = """
 81 |     ThemesScreen {        
 82 |         align-horizontal: center;                      
 83 |         ItemGrid {
 84 |             margin: 2 4;
 85 |             padding: 1 2;
 86 |             background: $boost;
 87 |             width: 1fr;
 88 |             height: auto;            
 89 |             grid-gutter: 1 1;
 90 |             grid-rows: auto;           
 91 |             keyline:thin $foreground 30%;        
 92 |         }              
 93 |         Markdown { margin: 0; padding: 0 2; max-width: 100; background: transparent; }
 94 |     }
 95 |     """
 96 | 
 97 |     def __init__(self):
 98 |         super().__init__()
 99 | 
100 |     def compose(self) -> ComposeResult:
101 |         yield MCHeader()
102 |         with VerticalScroll():
103 |             with Center():
104 |                 yield Themes()
105 |         yield Footer()
106 | 


--------------------------------------------------------------------------------
/app/utilities.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from app.categories import CATEGORIES
  4 | from dataclasses import dataclass
  5 | from textual.app import ComposeResult
  6 | from textual.binding import Binding
  7 | from textual import events, on
  8 | from textual.containers import Center, Horizontal, ItemGrid, Vertical, VerticalScroll
  9 | from textual.widgets import Footer, Label, Markdown, Static
 10 | from app.controls import Controls
 11 | from pathlib import Path
 12 | from app.header import MCHeader
 13 | import os
 14 | import json
 15 | 
 16 | PARSER_CONFIG = f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/lib/helpers/parser_config.json'
 17 | 
 18 | 
 19 | def parse_utility_help():
 20 |     with open(PARSER_CONFIG, 'r') as file:
 21 |         config = json.load(file)
 22 | 
 23 |     utils = {}
 24 |     for category in CATEGORIES:
 25 |         category.title = category.title.lower()
 26 |         path = os.path.join(os.getcwd(), category.title)
 27 |         subpaths = sorted(Path(f'{path}').glob('[!__]*.py'))
 28 |         for path in subpaths:
 29 |             utility_exec = str(path).split('/')[-1]
 30 |             utility_title = utility_exec.replace('.py', '').replace('_', ' ').title()
 31 |             if config.get(category.title):
 32 |                 if config.get(category.title).get(utility_exec):
 33 |                     if config.get(category.title).get(utility_exec).get('description'):
 34 |                         utility_desc = config[category.title][utility_exec]['description']
 35 |                         utility_args = config[category.title][utility_exec].get('arguments', {})
 36 |                         utility_subparsers = config[category.title][utility_exec].get('subparsers', {})
 37 |                         utils[utility_title] = UtilityInfo(utility_title, path, truncate_string(utility_desc), utility_desc,
 38 |                                                            category.title, utility_args, utility_subparsers)
 39 | 
 40 |     return utils
 41 | 
 42 | 
 43 | def truncate_string(text, max_length=60):
 44 |     if len(text) <= max_length:
 45 |         return text
 46 |     else:
 47 |         truncated_text = text[:max_length]
 48 |         last_space_index = truncated_text.rfind(' ')
 49 |         if last_space_index == -1:
 50 |             return ""
 51 |         else:
 52 |             return truncated_text[:last_space_index] + "... \[more]"
 53 | 
 54 | 
 55 | @dataclass
 56 | class UtilityInfo:
 57 |     """Dataclass for storing utility information."""
 58 | 
 59 |     title: str
 60 |     executable: str
 61 |     short_description: str
 62 |     description: str
 63 |     parent: str
 64 |     arguments: dict
 65 |     subparsers: dict
 66 | 
 67 | 
 68 | UTILITIES = parse_utility_help()
 69 | 
 70 | 
 71 | class Utility(Vertical, can_focus=True, can_focus_children=False):
 72 |     """Display all utilities from a category"""
 73 | 
 74 |     ALLOW_MAXIMIZE = True
 75 |     DEFAULT_CSS = """
 76 |         Utility {
 77 |             width: 1fr;
 78 |             height: auto;      
 79 |             padding: 0 1;
 80 |             border: tall transparent;
 81 |             box-sizing: border-box;
 82 |             &:focus { 
 83 |                 border: tall $text-primary;
 84 |                 background: $primary 20%;
 85 |                 &.link {
 86 |                     color: red !important;
 87 |                 }        
 88 |             }
 89 |             #title { text-style: bold italic; width: 1fr; color: #ffffff;}
 90 |             .header { height: 1; }
 91 |             .link {
 92 |                 color: #0c5395;
 93 |                 text-style: underline;
 94 |             }
 95 |             .description { color: $text-muted; }
 96 |             &.-hover { opacity: 1; }
 97 |         }
 98 |         """
 99 | 
100 |     def __init__(self, utility_info: UtilityInfo) -> None:
101 |         self.utility_info = utility_info
102 |         super().__init__()
103 | 
104 |     def compose(self) -> ComposeResult:
105 |         info = self.utility_info
106 |         with Horizontal(classes="header"):
107 |             yield Label(info.title, id="title")
108 |         yield Static(info.short_description, classes="description")
109 | 
110 | 
111 | class UtilitiesScreen(Controls):
112 |     AUTO_FOCUS = None
113 |     CSS = """
114 |         UtilitiesScreen {        
115 |             align-horizontal: center;                      
116 |             ItemGrid {
117 |                 margin: 2 4;
118 |                 padding: 1 2;
119 |                 background: $boost;
120 |                 width: 1fr;
121 |                 height: auto;            
122 |                 grid-gutter: 1 1;
123 |                 grid-rows: auto;           
124 |                 keyline:thin $foreground 30%;        
125 |             }              
126 |             Markdown { margin: 0; padding: 0 2; max-width: 100; background: transparent; }
127 |         }
128 |         """
129 | 
130 |     BINDINGS = [
131 |         Binding("escape", "go_back", "Categories", tooltip="Go to previous screen"),
132 |         Binding("enter", "open_utility", "Open Utility", tooltip="Open the utility"),
133 |     ]
134 | 
135 |     def __init__(
136 |         self,
137 |         category: str,
138 |         name: str | None = None,
139 |         id: str | None = None,
140 |         classes: str | None = None,
141 |     ):
142 |         super().__init__(name, id, classes)
143 |         self.category = category
144 |         self.utility_info = None
145 |         self.utilities = None
146 |         self.grid = None
147 | 
148 |     def compose(self) -> ComposeResult:
149 |         self.app.bind("r", "void")
150 |         self.grid = ItemGrid(min_column_width=40)
151 |         yield MCHeader()
152 |         with VerticalScroll():
153 |             with Center():
154 |                 utilities_md = f"# {self.category.title()}"
155 |                 yield Markdown(utilities_md)
156 |             yield self.grid  # Ensure ItemGrid is mounted first
157 |         yield Footer()
158 | 
159 |     def on_mount(self) -> None:
160 |         """Mount utilities after the grid is ready and focus the first item."""
161 |         self.utilities = [Utility(utility) for title, utility in UTILITIES.items() if utility.parent.lower() == self.category.lower()]
162 |         self.grid.mount(*self.utilities)
163 |         if self.utilities:
164 |             self.utilities[0].focus()
165 | 
166 |     @on(events.Enter)
167 |     @on(events.Leave)
168 |     def on_enter(self, event: events.Enter):
169 |         event.stop()
170 |         self.set_class(self.is_mouse_over, "-hover")
171 | 
172 |     def action_open_utility(self) -> None:
173 |         current_focus = self.app.focused
174 |         if isinstance(current_focus, Utility):
175 |             # self.notify(f"Opening utility: {current_focus.utility_info.title}", severity="info")
176 |             from app.executor import ExecutorScreen
177 |             self.app.push_screen(ExecutorScreen(current_focus.utility_info))
178 | 
179 |     def action_move(self, direction: str) -> None:
180 |         """Move focus within the utility grid."""
181 |         self.action_move_grid(direction, Utility)
182 | 
183 |     def action_go_back(self) -> None:
184 |         """Return to CategoriesScreen when Escape is pressed."""
185 |         self.app.pop_screen()
186 | 


--------------------------------------------------------------------------------
/code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/code.png


--------------------------------------------------------------------------------
/configs/configs.ini:
--------------------------------------------------------------------------------
1 | [global]
2 | BATCH = 300
3 | TOKEN_DURATION = 14


--------------------------------------------------------------------------------
/insights/bigquery_insights_importer.py:
--------------------------------------------------------------------------------
 1 | #Usage Instructions:
 2 | #1. Create a Service Account within BigQuery with Owner-level permissions
 3 | #2. Create a Key (JSON) from this Service Account, save locally and specify the path below under "key_path"
 4 | #3. Input your Bigquery Project ID under "bq_project_id"
 5 | #4. Configure the Monte Carlo CLI with a --profile-name to reference in variable mcd_profile (https://docs.getmontecarlo.com/docs/using-the-cli#setting-up-the-cli)
 6 | #5. Update the insight_names and insight_report_names for the specific reports you want to include (associated names should be in same index)
 7 | #NOTES: 
 8 | # - This will create local CSV files for all data imported to BigQuery
 9 | # - Running this script will overwrite existing dataset and tables with same names in BQ project
10 | 
11 | from pycarlo.core import Client, Query, Mutation, Session
12 | import csv
13 | import requests
14 | from google.cloud import bigquery
15 | from google.oauth2 import service_account
16 | 
17 | #-------------------INPUT VARIABLES---------------------
18 | key_path = ""
19 | bq_project_id=""
20 | mcd_profile=""
21 | insight_names = ["key_assets","monitors","cleanup_suggestions","events","table_read_write_stats","incident_history"]
22 | insight_report_names = ["key_assets.csv","monitors.csv","cleanup_suggestions.csv","events.csv","table_read_write_stats.csv","incident_history.csv"]
23 | #-------------------------------------------------------
24 | 
25 | credentials = service_account.Credentials.from_service_account_file(key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"])
26 | bq_client = bigquery.Client(credentials=credentials, project=bq_project_id, location="US")
27 | dataset_id = "monte_carlo_insights"
28 | client = Client(session=Session(mcd_profile=mcd_profile))
29 | 
30 | 
31 | bq_client.create_dataset("monte_carlo_insights", exists_ok = True)
32 | for report in insight_names:
33 | 	bq_client.create_table(bq_project_id+".monte_carlo_insights."+report, exists_ok = True)
34 | 	print("Created {} Table in {} dataset".format(report,dataset_id))
35 | 
36 | for i in range(len(insight_report_names)):
37 | 	query=Query()
38 | 	query.get_report_url(insight_name=insight_names[i],report_name=insight_report_names[i]).__fields__('url')
39 | 	report_url=client(query).get_report_url.url
40 | 	r = requests.get(report_url)
41 | 	url_content = r.content
42 | 	with open(insight_report_names[i],"wb") as report_csv:
43 | 		report_csv.write(url_content)
44 | 		report_csv.close()
45 | 
46 | 	table_id = insight_names[i]
47 | 	filename = insight_report_names[i]
48 | 	dataset_ref = bq_client.dataset(dataset_id)
49 | 	table_ref = dataset_ref.table(table_id)
50 | 	job_config = bigquery.LoadJobConfig()
51 | 	job_config.write_disposition = 'WRITE_TRUNCATE'
52 | 	job_config.allow_quoted_newlines = True
53 | 	job_config.source_format = bigquery.SourceFormat.CSV
54 | 	job_config.autodetect = True
55 | 
56 | 	with open(filename,"rb") as source_file:
57 | 		job = bq_client.load_table_from_file(source_file, table_ref, job_config=job_config)
58 | 		job.result()
59 | 		print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id))
60 | 		source_file.close()
61 | 


--------------------------------------------------------------------------------
/insights/extract_mc_insights_dbx.py:
--------------------------------------------------------------------------------
  1 | # Databricks notebook source
  2 | # MAGIC %md # Instructions
  3 | # MAGIC ## What is this?
  4 | # MAGIC This notebook will download insights from Monte Carlo using the Monte Carlo API and then load them into Delta Table(s). Each insight will be loaded to its own Delta Table. This script will create / replace the Delta Table each time it is run. The table names will be "mcd_insight_insightname"
  5 | # MAGIC
  6 | # MAGIC ## Prerequisites
  7 | # MAGIC * Through the Monte Carlo UI create an API token.
  8 | # MAGIC * Store the Token ID and Token Value in a DBX Secret key repo named 'monte-carlo-creds' with the keys 'mcd-id' and 'mcd-token'
  9 | # MAGIC     * Alternatively you can set the ID and Token in this notebook direclty by editing the cell of this notebook named 'Find/Set API Credentials'
 10 | # MAGIC * This script will not create a _schema_ for you. It is assumed that the schema you provide already exists.
 11 | # MAGIC
 12 | # MAGIC ## Running the notebook
 13 | # MAGIC * After the 'Create User Input Widgets' command is run, there will be two drop down widgets at the top of the notebook
 14 | # MAGIC   * INSIGHTS TO DOWNLOAD: Lets you select which insight(s) you want to downlaod. The default will be ALL. If you want to only download a set of specific insights, de-select ALL and select the insights you want.
 15 | # MAGIC   * SCHEMA TO WRITE TO: The schema under which the Delta Tables will be created/replaced.
 16 | # MAGIC * Run the rest of the commands to download the insights from Monte Carlo and import them to Databricks
 17 | 
 18 | # COMMAND ----------
 19 | 
 20 | # MAGIC %md # Environment Setup
 21 | 
 22 | # COMMAND ----------
 23 | 
 24 | #Install the Monte Carlo Python Library (Notebook scoped)
 25 | #More info here: https://docs.databricks.com/libraries/notebooks-python-libraries.html#install-a-library-with-pip
 26 | %pip install pycarlo
 27 | 
 28 | # COMMAND ----------
 29 | 
 30 | # DBTITLE 1,Find/Set API Credentials
 31 | # Monte Carlo Credentials stored in DBX Secret Key Repo called "monte-carlo-creds":
 32 | mcd_id = dbutils.secrets.get(scope="monte-carlo-creds", key="mcd-id")
 33 | mcd_token = dbutils.secrets.get(scope="monte-carlo-creds", key="mcd-token")
 34 | 
 35 | # Other variables which you can customize:
 36 | mcd_profile = ""
 37 | 
 38 | # COMMAND ----------
 39 | 
 40 | # DBTITLE 1,Build a List of Available Reports
 41 | from pycarlo.core import Client, Query, Session
 42 | 
 43 | client = Client(session=Session(mcd_id=mcd_id, mcd_token=mcd_token, mcd_profile=mcd_profile))
 44 | query = Query()
 45 | query.get_insights().__fields__('name', 'reports')
 46 | 
 47 | response = client(query).get_insights
 48 | 
 49 | insight_name_to_report_mapping = {}
 50 | for insight in response:
 51 |     name = insight.name
 52 | 
 53 |     for report in insight.reports:
 54 |         # Some Insights have a .html report as well, we want to filter for just the .csv reports
 55 |         if report.name.endswith('.csv'):
 56 |             insight_name_to_report_mapping[name] = report.name
 57 | 
 58 | # COMMAND ----------
 59 | 
 60 | # DBTITLE 1,Create User Input Widgets
 61 | dbutils.widgets.multiselect(
 62 |     'INSIGHTS TO DOWNLOAD',
 63 |     defaultValue='ALL',
 64 |     choices=['ALL'] + list(insight_name_to_report_mapping.keys())
 65 | )
 66 | dbutils.widgets.text("SCHEMA TO WRITE TO", "mcd_insights")
 67 | 
 68 | # COMMAND ----------
 69 | 
 70 | # DBTITLE 1,Runtime Variables (Pulled From Input Widgets)
 71 | insight_names = dbutils.widgets.get("INSIGHTS TO DOWNLOAD").split(',')
 72 | 
 73 | # If ALL is in list of insight_names selected, even if other individual insights are selected, we will download all insights
 74 | if insight_names == ['ALL']:
 75 |     insight_report_names = [(insight, insight_name_to_report_mapping[insight]) for insight in
 76 |                             list(insight_name_to_report_mapping.keys())]
 77 | elif 'ALL' in insight_names:
 78 |     raise Exception("De-select 'ALL' from Insights to Download if you want to pick individual insights to download.")
 79 | else:
 80 |     insight_report_names = [(insight, insight_name_to_report_mapping[insight]) for insight in insight_names]
 81 | table_schema = dbutils.widgets.get("SCHEMA TO WRITE TO")
 82 | 
 83 | # COMMAND ----------
 84 | 
 85 | # MAGIC %md # Load Insights to DBX
 86 | 
 87 | # COMMAND ----------
 88 | 
 89 | from pycarlo.core import Client, Query, Mutation, Session
 90 | import requests
 91 | from pyspark.sql.functions import *
 92 | import io
 93 | import pandas as pd
 94 | from datetime import *
 95 | 
 96 | client = Client(session=Session(mcd_id=mcd_id, mcd_token=mcd_token,mcd_profile=mcd_profile))
 97 | today = datetime.today()
 98 | 
 99 | for insight, report in insight_report_names:
100 |     print("Looking for Insight Report: {}".format(insight))
101 |     query=Query()
102 |     query.get_report_url(insight_name=insight,report_name=report).__fields__('url')
103 |     report_url=client(query).get_report_url.url
104 |     if not report_url:
105 |         print("Insight Report {} is not available right now.".format(insight))
106 |         print("\n")
107 |         continue
108 |     r = requests.get(report_url).content
109 |     
110 |     # Customize the naming scheme of the loaded tables here:
111 |     table_name = "mcd_insight_" + insight
112 |     filename = report
113 |     
114 |     #Read data into pandas to convert to csv
115 |     df=pd.read_csv(io.StringIO(r.decode('utf-8')))  
116 |     #display(df) #Uncomment to see the data before it is loaded to a table
117 |     
118 |     #changing column spaces to underscores (if there are any)
119 |     df.columns = df.columns.str.replace(' ','_')
120 |     print('Creating Spark Data Frame')
121 |     DF = spark.createDataFrame(df).withColumn("load_date", lit(date(today.year, today.month, today.day)))
122 | 
123 |     #Load Data to Databricks DELTA lake
124 |     DF.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{table_schema}.{table_name}")
125 |     print("Created table: {}.{}".format(table_schema,table_name))
126 |     print("\n") 
127 | 
128 | # COMMAND ----------
129 | 
130 | df = spark.sql("SHOW TABLES IN {} like 'mcd_insight_*'".format(table_schema))
131 | display(df)
132 | 


--------------------------------------------------------------------------------
/insights/requirements.txt:
--------------------------------------------------------------------------------
1 | protobuf==3.20.2
2 | pycarlo==0.0.8
3 | requests==2.32.0
4 | 


--------------------------------------------------------------------------------
/landing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/landing.png


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/lib/__init__.py


--------------------------------------------------------------------------------
/lib/auth/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/lib/auth/__init__.py


--------------------------------------------------------------------------------
/lib/auth/mc_auth.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import subprocess
  3 | import configparser
  4 | import os
  5 | import boto3
  6 | import requests
  7 | import lib.helpers.constants as const
  8 | from pycognito import aws_srp
  9 | from botocore.exceptions import ClientError
 10 | from contextlib import nullcontext
 11 | from pathlib import Path
 12 | from pycarlo.core import Client, Session, Query, Mutation
 13 | from rich.prompt import Confirm, Prompt
 14 | from lib.helpers import sdk_helpers
 15 | from rich.progress import Progress
 16 | from lib.helpers.logs import LOGGER
 17 | 
 18 | 
 19 | class MCAuth(object):
 20 | 
 21 |     def __init__(self, configs: configparser.ConfigParser, profile: str = None, progress: Progress = None):
 22 | 
 23 |         self.profile = "default" if not profile else profile
 24 |         self.profile_file = os.path.expanduser("~/.mcd/profiles.ini")
 25 |         self.progress = progress
 26 |         self._configs = configs
 27 |         self._ini = self.__read_ini()
 28 | 
 29 |         if self._ini:
 30 |             if self._ini.has_section(self.profile):
 31 |                 self.mcd_id_current = self._ini[self.profile].get('mcd_id')
 32 |                 self._mcd_token_current = self._ini[self.profile].get('mcd_token')
 33 | 
 34 |                 if not self.mcd_id_current or not self._mcd_token_current:
 35 |                     LOGGER.error("authentication id/token missing")
 36 |                     exit(1)
 37 | 
 38 |                 self.client = Client(session=Session(mcd_id=self.mcd_id_current, mcd_token=self._mcd_token_current))
 39 |                 self.validate_cli()
 40 |             else:
 41 |                 LOGGER.error(f"profile '{self.profile}' does not exist")
 42 |                 exit(1)
 43 | 
 44 |     def __read_ini(self):
 45 |         """ """
 46 | 
 47 |         configs = None
 48 |         if Path(self.profile_file).is_file():
 49 |             configs = configparser.ConfigParser()
 50 |             configs.read(self.profile_file)
 51 | 
 52 |         return configs
 53 | 
 54 |     def validate_cli(self):
 55 | 
 56 |         LOGGER.info("checking montecarlo version...")
 57 |         proc = subprocess.run(["montecarlo", "--version"], capture_output=True, text=True)
 58 |         if proc.returncode != 0:
 59 |             LOGGER.info("montecarlo is not installed")
 60 |             exit(proc.returncode)
 61 |         else:
 62 |             LOGGER.info(f"montecarlo present")
 63 | 
 64 |         LOGGER.info("validating montecarlo connection...")
 65 |         proc = subprocess.run(
 66 |             ["montecarlo", "--profile", self.profile, "validate"], capture_output=True, text=True
 67 |         )
 68 |         if proc.returncode != 0:
 69 |             LOGGER.error("unable to validate token")
 70 |             self.__mc_create_token()
 71 |         else:
 72 |             LOGGER.info(f"validation complete")
 73 |             self.get_token_status()
 74 | 
 75 |     def get_token_status(self):
 76 |         """ """
 77 | 
 78 |         query = Query()
 79 |         get_token_metadata = query.get_token_metadata(index="user")
 80 |         get_token_metadata.__fields__("id", "expiration_time")
 81 |         res = self.client(query).get_token_metadata
 82 | 
 83 |         threshold = 7
 84 |         token_info = [token for token in res if token.id == self.mcd_id_current]
 85 |         token_expiration = token_info[0].expiration_time.astimezone(datetime.UTC) if len(token_info) > 0 else datetime.datetime.now(datetime.UTC)
 86 |         expires_in_seconds = (token_expiration - datetime.datetime.now(datetime.UTC)).total_seconds()
 87 | 
 88 |         # Ask user (threshold) days before expiration if the token should be regenerated
 89 |         if expires_in_seconds <= (86400 * threshold):
 90 |             with sdk_helpers.PauseProgress(self.progress) if self.progress else nullcontext():
 91 |                 regenerate = Confirm.ask(f"The token associated with '{self.profile}' will expire in "
 92 |                                          f"{int(expires_in_seconds/3600)} hours. Do you want to create a new one?")
 93 |             if regenerate:
 94 |                 self.delete_token(self.create_token())
 95 | 
 96 |     def create_token(self):
 97 |         """ """
 98 | 
 99 |         try:
100 |             mcd_id_old = self.mcd_id_current
101 |             mutation = Mutation()
102 |             (mutation.create_access_token(comment="MC-SDK-Utils",
103 |                                          expiration_in_days=int(self._configs['global']
104 |                                                                 .get('TOKEN_DURATION', "14")))
105 |              .access_token.__fields__("id", "token"))
106 |             client = Client(session=Session(mcd_id=self.mcd_id_current, mcd_token=self._mcd_token_current))
107 |             res = client(mutation).create_access_token
108 |             self.mcd_id_current = res.access_token.id
109 |             self._mcd_token_current = res.access_token.token
110 |             LOGGER.info("token created successfully")
111 |             self.__store_token()
112 |             self.client = Client(session=Session(mcd_id=self.mcd_id_current, mcd_token=self._mcd_token_current))
113 |             return mcd_id_old
114 |         except:
115 |             LOGGER.error("unable to create token")
116 |             exit(1)
117 | 
118 |     def delete_token(self, token_id: str):
119 |         """ """
120 | 
121 |         try:
122 |             mutation = Mutation()
123 |             mutation.delete_access_token(token_id=token_id)
124 |             client = Client(session=Session(mcd_id=self.mcd_id_current, mcd_token=self._mcd_token_current))
125 |             _ = client(mutation).delete_access_token
126 |             LOGGER.info("old token deleted successfully")
127 |         except:
128 |             LOGGER.error("unable to delete old token")
129 |             exit(1)
130 | 
131 |     def __store_token(self):
132 |         """ """
133 | 
134 |         try:
135 |             self._ini.set(self.profile, 'mcd_id', self.mcd_id_current)
136 |             self._ini.set(self.profile, 'mcd_token', self._mcd_token_current)
137 |             with open(self.profile_file, 'w') as configfile:
138 |                 self._ini.write(configfile)
139 |             LOGGER.info("token stored successfully")
140 |         except Exception as e:
141 |             LOGGER.error(f"unable to store token - {e}")
142 |             exit(1)
143 | 
144 |     def __mc_create_token(self):
145 |         """ """
146 | 
147 |         username = self._configs['global'].get('USERNAME')
148 |         password = self._configs['global'].get('PASSWORD')
149 | 
150 |         if None in [username, password]:
151 |             LOGGER.debug("USERNAME/PASSWORD missing in configuration file")
152 |             with sdk_helpers.PauseProgress(self.progress) if self.progress else nullcontext():
153 |                 LOGGER.info("creating new token")
154 |                 username = Prompt.ask("[dodger_blue2]MC Username")
155 |                 password = Prompt.ask("[dodger_blue2]MC Password", password=True)
156 | 
157 |         bc = boto3.client("cognito-idp", "us-east-1")
158 |         srp_helper = aws_srp.AWSSRP(
159 |             username=username,
160 |             password=password,
161 |             pool_id=const.POOL_ID,
162 |             client_id=const.CLIENT_ID,
163 |             client_secret=None,
164 |             client=bc
165 |         )
166 | 
167 |         try:
168 |             auth_tokens = srp_helper.authenticate_user()
169 |         except ClientError:
170 |             LOGGER.error("unable to authenticate user. Ensure username/password is correct")
171 |             exit(1)
172 | 
173 |         headers = {"Authorization": f"Bearer {auth_tokens['AuthenticationResult']['IdToken']}"}
174 |         payload = f"""mutation createAccessToken($comment: String!, $expirationInDays: Int!) {{
175 |                        createAccessToken(expirationInDays: $expirationInDays, comment: $comment) {{
176 |                          accessToken {{
177 |                            id
178 |                            token
179 |                          }}
180 |                        }}
181 |                      }}"""
182 |         variables = {"comment": "MC-SDK-Utils",
183 |                      "expirationInDays": int(self._configs['global'].get('TOKEN_DURATION', "14"))}
184 |         response = requests.post("https://graphql.getmontecarlo.com/graphql", verify=True,
185 |                                  json={'query': payload, 'variables': variables}, headers=headers)
186 |         res_json = response.json()
187 |         self.mcd_id_current = res_json['data']['createAccessToken']['accessToken']['id']
188 |         self._mcd_token_current = res_json['data']['createAccessToken']['accessToken']['token']
189 |         self.__store_token()
190 |         self.client = Client(session=Session(mcd_id=self.mcd_id_current, mcd_token=self._mcd_token_current))
191 | 


--------------------------------------------------------------------------------
/lib/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/lib/helpers/__init__.py


--------------------------------------------------------------------------------
/lib/helpers/constants.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class MonitorTypes(str, Enum):
 5 |     CATEGORIES = 'CATEGORIES'
 6 |     STATS = 'STATS'
 7 |     JSON_SCHEMA = 'JSON_SCHEMA'
 8 |     CUSTOM_SQL = 'CUSTOM_SQL'
 9 |     FIELD_QUALITY = 'FIELD_QUALITY'
10 |     FRESHNESS = 'FRESHNESS'
11 |     TABLE_METRIC = 'TABLE_METRIC'
12 |     VOLUME = 'VOLUME'
13 |     QUERY_PERF = 'QUERY_PERF'
14 |     COMPARISON = 'COMPARISON'
15 |     VALIDATION = 'VALIDATION'
16 |     SCHEMA = 'SCHEMA'
17 | 
18 | 
19 | CLIENT_ID = "7om30cblkad8fb19c4hdjkmme9"
20 | POOL_ID = "us-east-1_OQBptzZme"
21 | 


--------------------------------------------------------------------------------
/lib/helpers/logs.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | import time
  5 | from rich import print
  6 | from pathlib import Path
  7 | 
  8 | LOGGER = logging.getLogger()
  9 | LOGS_DIR = Path(str(Path(os.path.abspath(__file__)).parent.parent.parent) + "/logs")
 10 | 
 11 | 
 12 | class CustomFormatter(logging.Formatter):
 13 | 
 14 |     format = '%(message)s'
 15 | 
 16 |     FORMATS = {
 17 |         logging.DEBUG: "[grey]" + format,
 18 |         logging.INFO: "[steel_blue]" + format,
 19 |         logging.WARNING: "[orange3 bold]" + format,
 20 |         logging.ERROR: "[red3 bold]" + format,
 21 |         logging.CRITICAL: "[deep_pink2 bold]" + format
 22 |     }
 23 | 
 24 |     def format(self, record):
 25 |         log_fmt = self.FORMATS.get(record.levelno)
 26 |         formatter = logging.Formatter(log_fmt)
 27 |         try:
 28 |             record.msg = record.msg[0].upper() + record.msg[1:]
 29 |         except:
 30 |             record.msg = record.msg
 31 |         return formatter.format(record)
 32 | 
 33 | 
 34 | class LoggingConfigs(object):
 35 | 
 36 |     @staticmethod
 37 |     def logging_configs(util_name) -> dict:
 38 |         """Return the Python Logging Configuration Dictionary.
 39 | 
 40 |         Returns:
 41 |             dict: Python Logging Configurations.
 42 | 
 43 |         """
 44 | 
 45 |         LOGS_DIR.mkdir(parents=True, exist_ok=True)
 46 | 
 47 |         logging_config = dict(
 48 |             version=1,
 49 |             formatters={
 50 |                 'standard': {'format': '%(asctime)s - %(levelname)s - %(message)s'},
 51 |                 'console': {'()': 'lib.helpers.logs.CustomFormatter',
 52 |                             'format': '%(message)s'}
 53 |             },
 54 |             handlers={
 55 |                 'file': {'class': 'logging.FileHandler',
 56 |                          'formatter': 'standard',
 57 |                          'level': logging.DEBUG,
 58 |                          'filename': f"{LOGS_DIR}/{util_name}-{datetime.date.today()}.log",
 59 |                          'encoding': "utf-8"},
 60 |                 'console': {'class': 'rich.logging.RichHandler',
 61 |                             'show_path': False,
 62 |                             'omit_repeated_times': False,
 63 |                             'markup': True,
 64 |                             'rich_tracebacks': True,
 65 |                             'formatter': 'console',
 66 |                             'level': logging.INFO,
 67 |                     }
 68 |             },
 69 |             root={'handlers': ['file', 'console'],
 70 |                   'level': logging.NOTSET},
 71 |         )
 72 | 
 73 |         return logging_config
 74 | 
 75 | 
 76 | class LogHelper(object):
 77 |     """Formatted Log Messages"""
 78 | 
 79 |     @staticmethod
 80 |     def banner():
 81 |         font = "┳┳┓┏┓  ┏┓┳┓┓┏┓  ┏┓┏┓┳┳┓┏┓┓ ┏┓┏┓\n"\
 82 |                 "┃┃┃┃   ┗┓┃┃┃┫   ┗┓┣┫┃┃┃┃┃┃ ┣ ┗┓\n"\
 83 |                 "┛ ┗┗┛  ┗┛┻┛┛┗┛  ┗┛┛┗┛ ┗┣┛┗┛┗┛┗┛"
 84 |         print(f"[dodger_blue2]{font}")
 85 | 
 86 |     @staticmethod
 87 |     def split_message(message: str, level: [logging.ERROR, logging.INFO] = logging.INFO):
 88 |         """Writes message from stderr/stdout to individual lines.
 89 | 
 90 |             Args:
 91 |                 message(str): Output from stdout or stderr.
 92 |                 level(LOGGER): Logging level in which the lines are printed.
 93 | 
 94 |         """
 95 |         for line in message.split('\n'):
 96 |             if line != '':
 97 |                 LOGGER.log(level, line)
 98 | 
 99 | 
100 | class LogRotater(object):
101 |     """Rotate Logs Every N Days."""
102 | 
103 |     @staticmethod
104 |     def rotate_logs(retention_period: int):
105 |         """Delete log files older than the retention period.
106 | 
107 |         Args:
108 |             retention_period (int): Number of Days of Logs to retain.
109 | 
110 |         """
111 |         now = time.time()
112 | 
113 |         for log_file in os.listdir(LOGS_DIR):
114 |             log = os.path.join(LOGS_DIR, log_file)
115 |             if os.stat(log).st_mtime < now - retention_period * 86400:
116 |                 if os.path.isfile(log):
117 |                     os.remove(log)
118 | 


--------------------------------------------------------------------------------
/lib/helpers/sdk_helpers.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import pytz
  4 | import argparse
  5 | import os
  6 | import json
  7 | import traceback
  8 | from lib.helpers.logs import LogRotater
  9 | from datetime import datetime, timedelta
 10 | from lib.helpers.logs import LOGGER
 11 | from rich.progress import Progress
 12 | from cronsim import CronSim
 13 | 
 14 | PARSER_CONFIG = f'{os.path.dirname(os.path.abspath(__file__))}/parser_config.json'
 15 | 
 16 | 
 17 | def ensure_progress(func):
 18 |     def wrapper(*args, **kwargs):
 19 |         with Progress() as progress:
 20 |             task = progress.add_task("[yellow][RUNNING]...", total=100)
 21 |             LogRotater.rotate_logs(retention_period=7)
 22 | 
 23 |             try:
 24 |                 LOGGER.info(f"running utility using '{args[1].profile}' profile")
 25 |                 result = func(progress, *args, **kwargs)
 26 |                 progress.update(task, description="[dodger_blue2][COMPLETE]", advance=100)
 27 |                 return result
 28 |             except Exception as e:
 29 |                 LOGGER.error(e, exc_info=False)
 30 |                 print(traceback.format_exc())
 31 |             finally:
 32 |                 progress.update(task, description="[dodger_blue2 bold][COMPLETE]", advance=100)
 33 | 
 34 |     return wrapper
 35 | 
 36 | 
 37 | def hour_rounder(t):
 38 |     # Rounds to nearest hour by adding a timedelta hour if minute >= 30
 39 |     return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta(hours=t.minute // 30)
 40 | 
 41 | 
 42 | def calculate_interval_minutes(cron: str):
 43 |     """Return interval in minutes for a crontab string"""
 44 | 
 45 |     it = CronSim(cron, datetime.now(pytz.UTC))
 46 |     a = next(it)
 47 |     b = next(it)
 48 |     delta = b - a
 49 |     return int(delta.total_seconds() / 60)
 50 | 
 51 | 
 52 | def link(uri, label=None):
 53 |     """Create clickable link inside terminal"""
 54 |     if label is None:
 55 |         label = uri
 56 |     parameters = ''
 57 | 
 58 |     # OSC 8 ; params ; URI ST <name> OSC 8 ;; ST
 59 |     escape_mask = '\033]8;{};{}\033\\{}\033]8;;\033\\'
 60 | 
 61 |     return escape_mask.format(parameters, uri, label)
 62 | 
 63 | 
 64 | def generate_arg_parser(type, executable):
 65 |     """Fill in util description parser from JSON file"""
 66 | 
 67 |     with open(PARSER_CONFIG, 'r') as file:
 68 |         config = json.load(file)
 69 | 
 70 |     formatter = lambda prog: argparse.RawTextHelpFormatter(prog, max_help_position=120)
 71 |     try:
 72 |         parser = argparse.ArgumentParser(description=config[type][executable]['description'].expandtabs(4), formatter_class=formatter)
 73 |         parser._optionals.title = "Options"
 74 |         parser._positionals.title = None
 75 | 
 76 |         if config.get(type).get(executable).get('subparsers'):
 77 |             subparsers = parser.add_subparsers(dest='commands', required=True, metavar=" ")
 78 |             parser._positionals.title = "Commands"
 79 |             d = {}
 80 |             for subparser in config[type][executable]['subparsers']:
 81 |                 d[f"{subparser}_parser"] = subparsers.add_parser(subparser,
 82 |                                             description=config[type][executable]['subparsers'][subparser]['description'],
 83 |                                             help=config[type][executable]['subparsers'][subparser]['help'])
 84 |                 for argument in config[type][executable]['subparsers'][subparser]['arguments']:
 85 |                     args = {'required': config[type][executable]['subparsers'][subparser]['arguments'][argument].get('required', None),
 86 |                             'default': config[type][executable]['subparsers'][subparser]['arguments'][argument].get('default', None),
 87 |                             'help': config[type][executable]['subparsers'][subparser]['arguments'][argument].get('help', None),
 88 |                             'choices': config[type][executable]['subparsers'][subparser]['arguments'][argument].get('choices', None)}
 89 |                     not_none_params = {k: v for k, v in args.items() if v is not None}
 90 |                     d[f"{subparser}_parser"].add_argument(f"--{argument}", f"-{argument[0]}",
 91 |                                                           **not_none_params)
 92 |             return parser, subparsers
 93 |         else:
 94 |             for argument in config[type][executable]['arguments']:
 95 |                 args = {'required': config[type][executable]['arguments'][argument].get('required', None),
 96 |                         'default': config[type][executable]['arguments'][argument].get('default', None),
 97 |                         'help': config[type][executable]['arguments'][argument].get('help', None),
 98 |                         'choices': config[type][executable]['arguments'][argument].get('choices', None)}
 99 |                 not_none_params = {k: v for k, v in args.items() if v is not None}
100 |                 parser.add_argument(f"--{argument}", f"-{argument[0]}",
101 |                                     **not_none_params)
102 | 
103 |         return parser
104 | 
105 |     except KeyError as key:
106 |         LOGGER.error(f"Key {key} missing in {PARSER_CONFIG}")
107 |         sys.exit(1)
108 | 
109 | 
110 | def dump_help(parser, func, *args):
111 |     if len(*args) == 0:
112 |         parser.print_help(sys.stderr)
113 |         sys.exit(1)
114 |     elif len(*args) == 1:
115 |         if '-h' not in args[0] and '--help' not in args[0]:
116 |             args[0].append("-h")
117 |             func(*args)
118 |             sys.exit(1)
119 |     elif len(*args) == 2 and args[0][1] in ["-h", "--help"]:
120 |         if parser._subparsers._group_actions[0].choices.get(args[0][0]):
121 |             parser._subparsers._group_actions[0].choices[args[0][0]].print_help(sys.stderr)
122 |             sys.exit(1)
123 | 
124 | 
125 | def batch_objects(objects: list, batch_size: int) -> list:
126 |     """Batch Objects into sublists.
127 | 
128 |     Args:
129 |         objects (list): Objects to be batched.
130 |         batch_size (int): number of elements inside each batch
131 | 
132 |     Returns:
133 |         list: List of Lists.
134 | 
135 |     """
136 |     LOGGER.info(f"batching the {len(objects)} into lists of {batch_size}")
137 |     batches = [objects[x:x + batch_size] for x in range(0, len(objects), batch_size)]
138 |     LOGGER.info(f"batching complete. {len(batches)} batch lists created")
139 | 
140 |     return batches
141 | 
142 | 
143 | def parse_input(input_value,delimiter):
144 |     parsed_list = input_value.split(delimiter)
145 |     final_list = []
146 |     for val in parsed_list:
147 |         while " " == val[0]:
148 |             val = val[1:]
149 |         while " " == val[-1]:
150 |             val = val[:-1]
151 |         final_list.append(val)
152 |     return final_list
153 | 
154 | 
155 | class PauseProgress:
156 |     def __init__(self, progress: Progress) -> None:
157 |         self._progress = progress
158 | 
159 |     def _clear_line(self) -> None:
160 |         UP = "\x1b[1A"
161 |         CLEAR = "\x1b[2K"
162 | 
163 |         for _ in self._progress.tasks:
164 |             print(UP + CLEAR + UP)
165 | 
166 |     def __enter__(self):
167 |         self._progress.stop()
168 |         self._clear_line()
169 |         return self._progress
170 | 
171 |     def __exit__(self, exc_type, exc_value, exc_traceback):
172 |         self._progress.start()
173 | 


--------------------------------------------------------------------------------
/lineage/assets_downstream_from_asset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pycarlo.core import Client, Query, Session
 3 | import requests
 4 | import csv
 5 | import networkx as nx
 6 | 
 7 | mcd_profile='dev'
 8 | asset_id = 'warehouse:schema.table'
 9 | 
10 | ########################################################################
11 | # Fetches all downstream assets from a specified asset.
12 | ########################################################################
13 | 
14 | # start a Monte Carlo API session
15 | client = Client(session=Session(mcd_profile=mcd_profile))
16 | 
17 | # set column positions in csv 
18 | row_position = 0
19 | type_position = 2
20 | asset_id_position = 3 
21 | 
22 | # get MC lineage directed graph
23 | get_digraph = Query()
24 | get_digraph.get_digraph(metadata_version="v2")
25 | digraph = client(get_digraph).get_digraph
26 | 
27 | # get a list of nodes
28 | download_vertices = requests.get(digraph.vertices)
29 | decoded_vertices = download_vertices.content.decode('utf-8')
30 | vertices_csv = csv.reader(decoded_vertices.splitlines(), delimiter=',')
31 | vertices = list(vertices_csv)
32 | 
33 | # get a list of edges in Monte Carlo lineage
34 | download_edges = requests.get(digraph.edges)
35 | decoded_edges = download_edges.content.decode('utf-8')
36 | 
37 | # create a networkx directed graph
38 | G = nx.DiGraph()
39 | G = nx.read_edgelist(decoded_edges.splitlines(), delimiter=',', nodetype=str, create_using=nx.DiGraph)
40 | 
41 | assets_affected = []
42 | 
43 | # loop throuh nodes
44 | for find in vertices:
45 | 
46 |     # find node of interest
47 |     if find[asset_id_position] == asset_id:
48 |         node_id = f'"{find[row_position]}"'
49 |         try:
50 |             # find downstream nodes
51 |             downstream_nodes = [n for n in nx.traversal.bfs_tree(G, node_id) if n != node_id]
52 | 
53 |             # create list of downstream node metadata
54 |             for downstream_node in downstream_nodes:
55 |                 downstream_node_id = int(downstream_node.replace('"', ''))
56 |                 assets_affected.append(vertices[downstream_node_id])
57 |         except:
58 |             continue
59 | 
60 | # write affected objects to a csv
61 | asset_file_name = asset_id.replace(':','.')
62 | with open(f'assets_downstream_from_{asset_file_name}.csv', 'w') as f:
63 |     write = csv.writer(f)
64 |     write.writerow(vertices[0])
65 |     write.writerows(assets_affected)


--------------------------------------------------------------------------------
/lineage/incidents_upstream_from_report.py:
--------------------------------------------------------------------------------
 1 | from email.policy import default
 2 | from pycarlo.core import Client, Query, Session
 3 | import requests
 4 | import csv
 5 | import networkx as nx
 6 | 
 7 | mcd_profile='dev'
 8 | bi_report_id = '123' # ID of object in Looker / Tableau
 9 | exclude_incidents_with_status = ['FIXED', 'FALSE_POSITIVE', 'EXPECTED', 'NO_ACTION_NEEDED']
10 | incident_types_to_include = ['CUSTOM_RULE_ANOMALIES','DELETED_TABLES']
11 | incident_sub_types_to_include = ['dimension_anomaly','field_metrics_anomaly','freshness_anomaly','volume_anomaly']
12 | 
13 | ########################################################################
14 | # Fetches any recent incidents from an upstream BI report.
15 | ########################################################################
16 | 
17 | def get_report_quality_status(mcd_profile, bi_report_id, exclude_incidents_with_status, incident_types_to_include, incident_sub_types_to_include):
18 |     # start a Monte Carlo API session
19 |     client = Client(session=Session(mcd_profile=mcd_profile))
20 | 
21 |     # set column positions in csv 
22 |     row_position = 0
23 |     type_position = 2
24 |     name_position = 3
25 |     dataset_id_position = 5 
26 |     mcon_position = 9
27 | 
28 |     # get MC lineage directed graph
29 |     get_digraph = Query()
30 |     get_digraph.get_digraph(metadata_version="v2")
31 |     digraph = client(get_digraph).get_digraph
32 | 
33 |     # get a list of nodes
34 |     download_vertices = requests.get(digraph.vertices)
35 |     decoded_vertices = download_vertices.content.decode('utf-8')
36 |     vertices_csv = csv.reader(decoded_vertices.splitlines(), delimiter=',')
37 |     vertices = list(vertices_csv)
38 | 
39 |     # get a list of edges in Monte Carlo lineage
40 |     download_edges = requests.get(digraph.edges)
41 |     decoded_edges = download_edges.content.decode('utf-8')
42 | 
43 |     # create a networkx directed graph
44 |     G = nx.DiGraph()
45 |     G = nx.read_edgelist(decoded_edges.splitlines(), delimiter=',', nodetype=str, create_using=nx.DiGraph)
46 | 
47 |     for index, sublist in enumerate(vertices):
48 |         if sublist[mcon_position] != 'mcon' and sublist[mcon_position].split('++')[4] == bi_report_id:
49 |             bi_report_id = f'"{index}"'
50 |             break
51 | 
52 |     # find upstream nodes
53 |     upstream_nodes = [n for n in nx.traversal.bfs_tree(G, bi_report_id, reverse=True) if n != bi_report_id]
54 | 
55 |     # create a list of tables upstream
56 |     tables_upstream = []
57 |     for upstream_node in upstream_nodes:
58 |         node_id = int(upstream_node.strip('"'))
59 |         if vertices[node_id][type_position] == 'table':
60 |             tables_upstream.append(vertices[node_id][name_position])
61 | 
62 |     # get incidents
63 |     tables_with_incidents = []
64 |     has_unresolved_incident = False
65 |     get_recent_incidents = Query()
66 |     get_recent_incidents.get_incidents(
67 |         first=100, 
68 |         exclude_feedback=exclude_incidents_with_status, 
69 |         incident_types=incident_types_to_include,
70 |         incident_sub_types=incident_sub_types_to_include
71 |         ).edges.node.events(first=100).edges.node.table.__fields__('full_table_id')
72 |     incidents = client(get_recent_incidents).get_incidents.edges
73 | 
74 |     for incident in incidents:
75 |         for event_edge in incident.node.events.edges:
76 |             table_with_incident = event_edge.node.table.full_table_id
77 |             if table_with_incident in tables_upstream:
78 |                 has_unresolved_incident = True
79 |                 tables_with_incidents.append(table_with_incident)
80 |                 print(f'Recent unresolved incident upstream on {table_with_incident}.')
81 | 
82 |     return has_unresolved_incident, tables_with_incidents
83 | 
84 | 
85 | has_unresolved_incident, tables_with_incidents = get_report_quality_status(mcd_profile, bi_report_id, exclude_incidents_with_status, incident_types_to_include, incident_sub_types_to_include)
86 | print(has_unresolved_incident)


--------------------------------------------------------------------------------
/lineage/insertLineageFromCSV.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################################
  2 | ##
  3 | ##  I want a CSV with five fields, but only two are required.  Order doesn't matter, case does.
  4 | ##  All five columns need to be in the CSV
  5 | ##
  6 | ##  - source (required)
  7 | ##    database:schema.object referring to the upstream object 
  8 | ##
  9 | ##  - destination (required)
 10 | ##     database:schema.object referring to the downstream object 
 11 | ##
 12 | ##  - source_type
 13 | ##     Type of object (table,view,external,etc).  If missing, a lookup will be run against the API and the first result used.
 14 | ##
 15 | ##  - destination_type
 16 | ##     Type of object (table,view,external,etc).  If missing, a lookup will be run against the API and the first result used.
 17 | ##
 18 | ##  - dwid
 19 | ##     UUID assigned to the DW by MC.  If not provided, a search will be run and the first result used.  Assumption being you have one DW.
 20 | ##
 21 | #######################################################################################################################################
 22 | 
 23 | import argparse
 24 | import csv
 25 | from pycarlo.core import Client, Query, Mutation
 26 | 
 27 | client = Client()
 28 | query = Query()
 29 | 
 30 | header_list = ("source","destination","source_type","destination_type","dwid")
 31 | dw_id = ""
 32 | sType = ""
 33 | dType = ""
 34 | 
 35 | 
 36 | def getDWID():
 37 |     get_dwID_query = """
 38 |     query getUser {
 39 |         getUser {
 40 |             account {
 41 |                 warehouses {
 42 |                     uuid
 43 |                     connectionType
 44 |                 }
 45 |             }
 46 |         }
 47 |     }
 48 |     """
 49 |     response = client(get_dwID_query)
 50 |     return response['get_user']['account']['warehouses'][0]['uuid']
 51 | 
 52 | 
 53 | def getObjType (objName):
 54 |     get_objType = """
 55 |         query search {
 56 |             search(query: " """ + objName + """ ", limit: 1) {
 57 |                 results {
 58 |                     objectType
 59 |                 }
 60 |             }
 61 |         }
 62 |         """
 63 |     response = client(get_objType)
 64 |     return response['search']['results'][0]['objectType']
 65 | 
 66 | 
 67 | def insertLineage(fsource,fsType,fdestination,fdType,fdwID):
 68 |     insert_lineage_query = """
 69 |     mutation{
 70 |         createOrUpdateLineageEdge(
 71 |         destination: {
 72 |             objectId: \"""" + fdestination + """\"
 73 |             objectType:  \"""" + fdType + """\" #table,view,external,report, others?
 74 |             resourceId: \"3c989167-8e51-4a04-8ac0-2c0b2ed8f0b9\" # warehouseID
 75 |         }
 76 |         source: {
 77 |             objectId: \"""" + fsource + """\"
 78 |             objectType:  \"""" + fsType + """\" #table,view,external,report, others?
 79 |             resourceId: \"3c989167-8e51-4a04-8ac0-2c0b2ed8f0b9\" # warehouseID
 80 |         }
 81 |         ){
 82 |         edge{
 83 |           expireAt
 84 |           isCustom
 85 |           jobTs
 86 |         }
 87 |       }
 88 |     }
 89 |     """
 90 |     try:
 91 |          response = client(insert_lineage_query)
 92 |          return(response)
 93 |     except:
 94 |          return("failed insert: " + fsource + " -> " + fdestination)
 95 |     print(insert_lineage_query)
 96 | 
 97 | 
 98 | ############################################################################
 99 | ##                                                                       ##
100 | ###########################################################################
101 | parser = argparse.ArgumentParser()
102 | parser.add_argument("-f", "--file", help = "Input file, csv")
103 | parser.add_argument("-dw", "--warehouse", help = "Get the associated DW ID(S)")
104 | args = parser.parse_args()
105 | 
106 | if args.file:
107 |     with open(args.file, mode='r') as file:
108 |         reader = csv.DictReader(file)
109 |         #################################
110 |         ## validate the column headers ##
111 |         #################################
112 |         if all (key in reader.fieldnames for key in header_list):
113 |             ########################################
114 |             ## Headers are good, build the fields ##
115 |             ########################################            
116 |             for r in reader:
117 |                 ######################################################################## 
118 |                 ## Want object type to be optional, but also want to see if it's set. ## 
119 |                 ## Only hit the API if object type is blank                           ##
120 |                 ########################################################################  
121 |                 if r['source_type'] == "":
122 |                     sType = getObjType(r['source'])
123 |                 else:
124 |                     sType = r['source_type']
125 | 
126 |                 if r['destination_type'] == "":
127 |                     dType = getObjType(r['destination'])
128 |                 else:
129 |                     dType = r['destination_type']              
130 |                 ########################################################################## 
131 |                 ## if dwID is blank, assume that means there's only one so look it up.  ##
132 |                 ## but only want to hit the API once, no need to do it over and over.   ##
133 |                 ########################################################################## 
134 |                 if r['dwid'] == "":
135 |                     if dw_id  == "":
136 |                         dw_id = getDWID()
137 |                     insertLineage(r['source'],sType,r['destination'],dType,dw_id) 
138 |                 else:
139 |                     insertLineage(r['source'],sType,r['destination'],dType,r['dwid'])  
140 |         else:
141 |             print("Missing Column (case sensitive, order doesn't matter)")
142 |             print("Expected: ", header_list)
143 |             print("Found: ", reader.fieldnames)        
144 | elif args.warehouse:
145 |     print(getDWID())
146 | else:
147 |     print("Argument required - either '-f <filename>' or '-dw x'")
148 | 


--------------------------------------------------------------------------------
/lineage/lineage.py:
--------------------------------------------------------------------------------
 1 | from pycarlo.core import Client, Mutation, Session
 2 | 
 3 | ########################################################################
 4 | # Class to simplify adding Lineage to Monte Carlo for ML Models
 5 | ########################################################################
 6 | 
 7 | class lineage:
 8 |     def __init__(self, customer: None):
 9 |         self.client = Client(session=Session(mcd_profile=customer))
10 | 
11 |     def add_downstream_node(self, node_name, node_id, warehouse_id, tags, source_nodes):
12 |         ''' Arguments:
13 |             * `node_name` (`String`): Object name (table name, report name, etc)
14 |             * `node_id` (`String!`): Object identifier
15 |             * `warehouse_id` (`UUID`): The id of the resource containing the node
16 |         '''
17 |         node_id = self.add_node(node_name, node_id, warehouse_id, tags)
18 |         for source_node in source_nodes:
19 |             self.add_edge(source_node, node_id, warehouse_id)
20 |         return node_id
21 | 
22 |     def add_node(self, node_name, node_id, resource_id, tags):
23 |         ''' Arguments:
24 |             * `name` (`String`): Object name (table name, report name, etc)
25 |             * `object_id` (`String!`): Object identifier
26 |             * `object_type` (`String!`): Object type
27 |             * `properties` (`[ObjectPropertyInput]`): A list of object properties to be indexed by the search service
28 |             * `resource_id` (`UUID`): The id of the resource containing the node
29 |         '''
30 | 
31 |         put = Mutation()
32 |         put.create_or_update_lineage_node(
33 |             name=node_name
34 |             ,object_id=node_id
35 |             ,object_type='ML Model'
36 |             ,resource_id=resource_id # ID of Monte Carlo Warehouse to place the node under (Warehouse is a parent object)
37 |             # ,tags=tags
38 |         )
39 |         response = self.client(put)
40 |         return response.create_or_update_lineage_node.node.node_id 
41 |             
42 | 
43 |     def add_edge(self, source_node, destination_node_id, warehouse_id):
44 |         ''' Arguments:
45 |             * `source` (`NodeInput!`): The destination node
46 |                 * object_id
47 |                 * object_type
48 |                 * tags (optional)
49 |             * `destination` (`NodeInput!`): The destination node
50 |                 * object_id
51 |                 * object_type
52 |                 * tags (optional)
53 |             * `expire_at` (`DateTime`): When the edge will expire
54 |             * `source` (`NodeInput!`): The source node
55 |         '''
56 |         put = Mutation()
57 |         put.create_or_update_lineage_edge(
58 |             source = dict(
59 |                 object_id=source_node['object_id']
60 |                 ,object_type=source_node['object_type']
61 |                 ,resouce_id=warehouse_id
62 |             ),
63 |             destination = dict(
64 |                 object_id=destination_node_id
65 |                 ,object_type="ML Model"
66 |                 ,resouce_id=warehouse_id
67 |             )
68 |         )
69 |         response = self.client(put)
70 |         return response.create_or_update_lineage_edge.edge.edge_id
71 | 
72 | ########################################################################
73 | # Execution example of adding lineage for a new node and a source
74 | #   table to Monte Carlo
75 | ########################################################################
76 | 
77 | # Initialize lineage class in my dev enviornment
78 | l = lineage('dev')
79 | 
80 | # add details of the new node and edges to upstream nodes
81 | new_node = l.add_downstream_node(
82 |     node_name='My Machine Learning Model' # Name of asset in Monte Carlo
83 |     ,node_id='prod_ml_models.my_machine_learning_model' # ID of asset in Monte Carlo
84 |     ,warehouse_id='6110e6-b92-48f-a71-84b421f32' # ID of Monte Carlo Warehouse to place the node under (Warehouse is a parent object)
85 |     ,source_nodes = [
86 |         dict({
87 |             'object_id': 'prod:ml_data.table_for_ml_model'
88 |             , 'object_type': 'table'
89 |         })
90 |     ]
91 | )
92 | 
93 | print(f'Created new node: {new_node}!')


--------------------------------------------------------------------------------
/lineage/lineage_graph_retrieval.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from typing import List, Dict
 3 | from pycarlo.core import Client, Query, Mutation
 4 | import time
 5 | 
 6 | 
 7 | def get_all_tables(client: Client, batch_size: int = 1000, selected_table_fields: List[str] = None, sleep_in_seconds: float = 0.5):
 8 |     """
 9 |     Retrieves all tables via getTable API: https://apidocs.getmontecarlo.com/#query-getTable
10 |     Tweak batch_size to increase throughput
11 |     Tweak sleep_in_seconds to stay within API limits
12 |     """
13 |     selected_table_fields = selected_table_fields or [
14 |         'mcon', 'project_name', 'dataset', 'table_id', 'table_type', 'full_table_id'
15 |     ]
16 |     current_cursor = None
17 |     tables = []
18 | 
19 |     while (True):
20 |         params = {
21 |             'first': batch_size,
22 |             'is_deleted': False
23 |         }
24 |         if current_cursor:
25 |             params['after'] = current_cursor
26 | 
27 |         query = Query()
28 |         get_tables_query = query.get_tables(**params)
29 |         get_tables_query.page_info()
30 |         get_tables_query.edges.node.__fields__(*selected_table_fields)
31 | 
32 |         print(get_tables_query)
33 | 
34 |         response = client(query)
35 |         for table in response.get_tables.edges:
36 |             tables.append({
37 |                 field: table.node[field]
38 |                 for field in selected_table_fields
39 |             })
40 | 
41 |         has_next_page = response.get_tables.page_info.has_next_page
42 |         if not has_next_page:
43 |             break
44 |         current_cursor = response.get_tables.page_info.end_cursor
45 | 
46 |         time.sleep(sleep_in_seconds)
47 | 
48 |     return tables
49 | 
50 | 
51 | def chunker(seq, size):
52 |     return (seq[pos:pos + size] for pos in range(0, len(seq), size))
53 | 
54 | 
55 | def get_lineage_graph_for_tables(client: Client, tables: List[Dict], direction: str = 'downstream',
56 |                                  batch_size: int = 20, sleep_in_seconds: float = 0.5):
57 |     """
58 |     Retrieve all lineage edges for tables.
59 |     Tweak batch_size to control throughput
60 |     Tweak sleep_in_seconds to stay within API limits
61 |     """
62 |     if batch_size > 20:
63 |         raise ValueError('batch_size must be between 0 and 20')
64 | 
65 |     edges = set()
66 |     count = 0
67 |     for chunk in chunker(tables, batch_size):
68 |         query = Query()
69 |         query.get_table_lineage(
70 |             mcons=[table['mcon'] for table in chunk],
71 |             direction=direction
72 |         ).flattened_edges.__fields__('mcon', 'directly_connected_mcons')
73 | 
74 |         response = client(query)
75 | 
76 |         if hasattr(response.get_table_lineage, 'flattened_edges'):
77 |             for flattened_edge in response.get_table_lineage.flattened_edges:
78 |                 for destination_mcon in flattened_edge.directly_connected_mcons:
79 |                     edges.add((flattened_edge.mcon, destination_mcon))
80 | 
81 |         count += len(chunk)
82 |         print(f"Fetched lineage for {count} nodes")
83 |         time.sleep(sleep_in_seconds)
84 |     return edges
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     client = Client()
89 |     tables = get_all_tables(client)
90 |     print(f"Retrieved {len(tables)} tables")
91 |     edges = get_lineage_graph_for_tables(client, tables)
92 |     print(f"Retrieved {len(edges)} edges")
93 | 
94 |     tables_df = pd.DataFrame(tables)
95 |     tables_df.to_csv('/tmp/tables.csv')
96 | 
97 |     edges_df = pd.DataFrame(edges)
98 |     edges_df.to_csv('/tmp/edges.csv')
99 | 


--------------------------------------------------------------------------------
/lineage/reports_by_schema.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pycarlo.core import Client, Query, Session
 3 | import requests
 4 | import csv
 5 | import networkx as nx
 6 | 
 7 | mcd_profile='dev'
 8 | schema = 'my_dataset'
 9 | 
10 | ########################################################################
11 | # Fetches all reports that are downstream of assets in a schema.
12 | ########################################################################
13 | 
14 | # start a Monte Carlo API session
15 | client = Client(session=Session(mcd_profile=mcd_profile))
16 | 
17 | # set column positions in csv 
18 | row_position = 0
19 | type_position = 2
20 | dataset_id_position = 5 
21 | 
22 | # get MC lineage directed graph
23 | get_digraph = Query()
24 | get_digraph.get_digraph(metadata_version="v2")
25 | digraph = client(get_digraph).get_digraph
26 | 
27 | # get a list of nodes
28 | download_vertices = requests.get(digraph.vertices)
29 | decoded_vertices = download_vertices.content.decode('utf-8')
30 | vertices_csv = csv.reader(decoded_vertices.splitlines(), delimiter=',')
31 | vertices = list(vertices_csv)
32 | 
33 | # create a list of nodes that removes non-looker-nodes
34 | looker_nodes = []
35 | for node in vertices:
36 |     if node[type_position] in ['looker-dashboard', 'looker-explore', 'looker-view', 'looker-look']:
37 |         looker_nodes.append(node)
38 | 
39 | 
40 | # get a list of edges in Monte Carlo lineage
41 | download_edges = requests.get(digraph.edges)
42 | decoded_edges = download_edges.content.decode('utf-8')
43 | 
44 | # create a networkx directed graph
45 | G = nx.DiGraph()
46 | G = nx.read_edgelist(decoded_edges.splitlines(), delimiter=',', nodetype=str, create_using=nx.DiGraph)
47 | 
48 | looker_dashboards_affected = []
49 | 
50 | # loop throuh nodes
51 | for node in vertices:
52 |     
53 |     # if node is in the schema we are interested in
54 |     if node[dataset_id_position] == schema:
55 |         node_id = f'"{node[row_position]}"'
56 |         try:
57 |             # find downstream nodes
58 |             downstream_nodes = [n for n in nx.traversal.bfs_tree(G, node_id) if n != node_id]
59 | 
60 |             # loop through downstream nodes against looker nodes and add to a dependency list if in looker
61 |             for downstream_node in downstream_nodes:
62 |                 for looker_node in looker_nodes:
63 |                     if  f'"{looker_node[row_position]}"' == downstream_node:
64 |                         looker_dashboards_affected.append(looker_node)
65 |         except:
66 |             continue
67 | 
68 | # write affected looker objects to a csv - contains all, including duplicates (should handle this earlier but shrug)
69 | with open('looker_dashboards_affected_dups.csv', 'w') as f:
70 |     write = csv.writer(f)
71 |     write.writerow(vertices[0])
72 |     write.writerows(looker_dashboards_affected)
73 | 
74 | # remove duplicates
75 | with open('looker_dashboards_affected_dups.csv','r') as in_file, open('looker_dashboards_affected.csv','w') as out_file:
76 |     seen = set()
77 |     for line in in_file:
78 |         if line in seen: continue
79 |         seen.add(line)
80 |         out_file.write(line)
81 | 


--------------------------------------------------------------------------------
/lineage/requirements.txt:
--------------------------------------------------------------------------------
1 | networkx==2.6.3
2 | pycarlo>=0.8.1
3 | requests==2.32.0
4 | pandas==2.0.3
5 | 


--------------------------------------------------------------------------------
/lineage/tables_upstream_from_report.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pycarlo.core import Client, Query, Session
 3 | import requests
 4 | import csv
 5 | import networkx as nx
 6 | 
 7 | mcd_profile='dev'
 8 | bi_report_id = '123'
 9 | 
10 | ########################################################################
11 | # Fetches all tables upstream from a BI Report.
12 | ########################################################################
13 | 
14 | # start a Monte Carlo API session
15 | client = Client(session=Session(mcd_profile=mcd_profile))
16 | 
17 | # set column positions in csv 
18 | row_position = 0
19 | type_position = 2
20 | dataset_id_position = 5
21 | mcon_position = 9
22 | 
23 | # get MC lineage directed graph
24 | get_digraph = Query()
25 | get_digraph.get_digraph(metadata_version="v2")
26 | digraph = client(get_digraph).get_digraph
27 | 
28 | # get a list of nodes
29 | download_vertices = requests.get(digraph.vertices)
30 | decoded_vertices = download_vertices.content.decode('utf-8')
31 | vertices_csv = csv.reader(decoded_vertices.splitlines(), delimiter=',')
32 | vertices = list(vertices_csv)
33 | 
34 | # create a list of table nodes
35 | table_nodes = []
36 | for node in vertices:
37 |     if node[type_position] in ['table']:
38 |         table_nodes.append(node)
39 | 
40 | 
41 | # get a list of edges in Monte Carlo lineage
42 | download_edges = requests.get(digraph.edges)
43 | decoded_edges = download_edges.content.decode('utf-8')
44 | 
45 | # create a networkx directed graph
46 | G = nx.DiGraph()
47 | G = nx.read_edgelist(decoded_edges.splitlines(), delimiter=',', nodetype=str, create_using=nx.DiGraph)
48 | 
49 | for index, sublist in enumerate(vertices):
50 |     if sublist[mcon_position] != 'mcon' and sublist[mcon_position].split('++')[4] == bi_report_id:
51 |         bi_report_id = f'"{index}"'
52 |         break
53 | 
54 | # find downstream nodes
55 | upstream_nodes = [n for n in nx.traversal.bfs_tree(G, bi_report_id, reverse=True) if n != bi_report_id]
56 | 
57 | tables_upstream = []
58 | for upstream_node in upstream_nodes:
59 |     node_id = int(upstream_node.strip('"'))
60 |     if vertices[node_id][type_position] == 'table':
61 |         tables_upstream.append(vertices[node_id])
62 | 
63 | # write affected looker objects to a csv - contains all, including duplicates (should handle this earlier but shrug)
64 | with open('tables_upstream.csv', 'w') as f:
65 |     write = csv.writer(f)
66 |     write.writerow(vertices[0])
67 |     write.writerows(tables_upstream)


--------------------------------------------------------------------------------
/mcsdksamplerunner.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import click
 3 | import sys
 4 | from lib.helpers.logs import LogHelper
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | @click.group(help='MC SDK SAMPLE CATALOG', context_settings=dict(help_option_names=["-h", "--help"]))
 9 | @click.pass_context
10 | def main_module(cmd):
11 |     pass
12 | 
13 | 
14 | def bind_function(name):
15 | 
16 |     def func(args):
17 |         call = importlib.import_module(f"{module}.{submodule.replace('-', '_')}")
18 |         try:
19 |             call.main(args)
20 |         except AttributeError as e:
21 |             click.echo(click.style(f"The '{submodule}' command is not supported yet", fg='red'))
22 | 
23 |     func.__name__ = name
24 |     return func
25 | 
26 | 
27 | if __name__ == '__main__':
28 | 
29 |     # Define folders to track
30 |     modules = {'admin': {'description': 'Admin related operations and utilities.'},
31 |                'tables': {'description': 'Collection of actions and utilities around tables/views'},
32 |                'monitors': {'description': 'Collection of actions and utilities for MC monitors.'},
33 |                'lineage': {'description': 'Collection of actions and utilities around lineage'},}
34 | 
35 |     for module in modules:
36 |         subpaths = sorted(Path(module).glob('[!__]*.py'))
37 | 
38 |         @click.command(name=module, help=modules[module]['description'],
39 |                        context_settings=dict(help_option_names=["-h", "--help"]))
40 |         def command():
41 |             pass
42 | 
43 |         main_module.add_command(command)
44 | 
45 |         if len(sys.argv) > 1:
46 |             if module == sys.argv[1]:
47 | 
48 |                 @click.group(name=module, help=modules[module]['description'],
49 |                              context_settings=dict(help_option_names=["-h", "--help"]))
50 |                 def main_submodule():
51 |                     pass
52 | 
53 | 
54 |                 #LogHelper.banner()
55 |                 for path in subpaths:
56 |                     submodule = str(path).split('/')[-1].replace('.py', '').replace('_', '-')
57 |                     script = bind_function(f'_{submodule}')
58 | 
59 |                     @click.command(name=submodule, context_settings=dict(help_option_names=["-h", "--help"]))
60 |                     def subcommand():
61 |                         pass
62 | 
63 |                     if len(sys.argv) >= 3:
64 |                         if submodule == sys.argv[2]:
65 |                             script(sys.argv[3:])
66 |                             exit(0)
67 | 
68 |                     main_submodule.add_command(subcommand)
69 | 
70 |                 main_module.add_command(main_submodule)
71 |                 main_module(max_content_width=120)
72 | 
73 |             else:
74 |                 continue
75 | 
76 |     LogHelper.banner()
77 |     main_module(max_content_width=120)
78 | 


--------------------------------------------------------------------------------
/monitors/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging.config
 3 | import subprocess
 4 | import textwrap
 5 | import traceback
 6 | import shutil
 7 | import yaml
 8 | import lib.helpers.constants as const
 9 | from contextlib import nullcontext
10 | from lib.util import Monitors, Tables, Admin
11 | from pathlib import Path
12 | from lib.helpers.logs import LoggingConfigs, LogHelper, LogRotater, LOGGER
13 | from lib.helpers import sdk_helpers
14 | from pycarlo.core import Mutation
15 | from rich.progress import Progress
16 | from rich import print
17 | 


--------------------------------------------------------------------------------
/monitors/add_remove_monitoring_rules.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  4 | from monitors import *
  5 | 
  6 | # Initialize logger
  7 | util_name = __file__.split('/')[-1].split('.')[0]
  8 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
  9 | LOGGER = logging.getLogger()
 10 | 
 11 | 
 12 | class MonitoringRules(Monitors, Tables, Admin):
 13 | 
 14 | 	def __init__(self, profile, config_file: str = None):
 15 | 		"""Creates an instance of RowCountMonitoring.
 16 | 
 17 | 		Args:
 18 | 			config_file (str): Path to the Configuration File.
 19 | 		"""
 20 | 
 21 | 		super().__init__(profile,  config_file)
 22 | 		self.enabled = True
 23 | 
 24 | 	def create_rules(self, input_file: str) -> dict:
 25 | 		""" Reads input file and creates rules dictionary
 26 | 
 27 | 		Args:
 28 | 			input_file(str): Path of the file containing asset entries.
 29 | 
 30 | 		Returns:
 31 | 			dict: Rule dictionary configuration.
 32 | 		
 33 | 		"""
 34 | 
 35 | 		if Path(input_file).is_file():
 36 | 			mapping = {}
 37 | 			with open(input_file, 'r') as input_tables:
 38 | 				for table in input_tables:
 39 | 					table_filter, table_name = table.split('.')
 40 | 					content = {'project': table_filter[:table_filter.index(":")],
 41 | 							   'dataset': table_filter[table_filter.index(":") + 1:],
 42 | 							   'rules': []}
 43 | 					if not mapping.get(table_filter):
 44 | 						mapping[table_filter] = content
 45 | 
 46 | 					if self.enabled:
 47 | 						rule = {
 48 | 							"isExclude": False,
 49 | 							"ruleType": "wildcard_pattern",
 50 | 							"tableRuleAttribute": "table_id",
 51 | 							"tableRuleText": table_name
 52 | 						}
 53 | 						mapping[table_filter]['rules'].append(rule)
 54 | 
 55 | 			return mapping
 56 | 
 57 | 		else:
 58 | 			LOGGER.error(f"unable to locate input file: {input_file}")
 59 | 			sys.exit(1)
 60 | 
 61 | 	def apply_rules(self, operation: str, warehouse_id: str, rule_configs: dict):
 62 | 		""" Submits rules for processing only if # of rules is <= 100
 63 | 
 64 | 		Args:
 65 | 			operation(str): Enable or Disable.
 66 | 			warehouse_id(str): UUID of warehouse.
 67 | 			rule_configs(dict): Dictionary containing rule configuration.
 68 | 		"""
 69 | 
 70 | 		for db_schema in rule_configs:
 71 | 			project = rule_configs[db_schema]['project']
 72 | 			dataset = rule_configs[db_schema]['dataset']
 73 | 			rules = rule_configs[db_schema]['rules']
 74 | 			if len(rules) > 100:
 75 | 				LOGGER.error("monitor rules allow at most 100 entries. Use a different method to filter out tables i.e."
 76 | 				             " pattern match")
 77 | 				exit(0)
 78 | 			LOGGER.info(f"{operation.title()} usage for database/schema combination "
 79 | 			            f"[{project}:{dataset}] and warehouse [{warehouse_id}]...")
 80 | 			response = (self.auth.client(self.enable_schema_usage(dw_id=warehouse_id, project=project,
 81 | 			                                                            dataset=dataset, rules=rules))
 82 | 			            .update_monitored_table_rule_list)
 83 | 			if isinstance(response, list):
 84 | 				LOGGER.info(f"monitor rule {operation}d\n")
 85 | 			else:
 86 | 				LOGGER.error("an error occurred")
 87 | 				exit(1)
 88 | 
 89 | 
 90 | def main(*args, **kwargs):
 91 | 	
 92 | 	# Capture Command Line Arguments
 93 | 	parser, subparsers = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 94 | 	                                                     , os.path.basename(__file__))
 95 | 
 96 | 	if not args:
 97 | 		args = parser.parse_args(*args, **kwargs)
 98 | 	else:
 99 | 		sdk_helpers.dump_help(parser, main, *args)
100 | 		args = parser.parse_args(*args, **kwargs)
101 | 
102 | 	@sdk_helpers.ensure_progress
103 | 	def run_utility(progress, util, args):
104 | 		util.progress_bar = progress
105 | 		if args.operation == 'disable':
106 | 			util.enabled = False
107 | 		else:
108 | 			util.enabled = True
109 | 		util.apply_rules(args.operation, args.warehouse, util.create_rules(args.input))
110 | 
111 | 	util = MonitoringRules(args.profile)
112 | 	run_utility(util, args)
113 | 
114 | 
115 | if __name__ == '__main__':
116 | 	main()
117 | 


--------------------------------------------------------------------------------
/monitors/bulk_export_monitors.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 4 | from monitors import *
 5 | 
 6 | # Initialize logger
 7 | util_name = __file__.split('/')[-1].split('.')[0]
 8 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 9 | 
10 | 
11 | class BulkExportMonitors(Monitors):
12 | 
13 |     def __init__(self, profile, config_file: str = None, progress: Progress = None):
14 |         """Creates an instance of BulkExportMonitors.
15 | 
16 |         Args:
17 |             config_file (str): Path to the Configuration File.
18 |             progress(Progress): Progress bar.
19 |         """
20 | 
21 |         super().__init__(profile,  config_file)
22 |         self.OUTPUT_FILE = "monitors.yaml"
23 |         self.progress_bar = progress
24 | 
25 |     def bulk_export_yaml(self, export_name):
26 | 
27 |         monitor_list, _ = self.get_ui_monitors()
28 |         # Split list of monitors in batches of 500
29 |         batches = sdk_helpers.batch_objects(monitor_list, 500)
30 |         file_path = self.OUTPUT_DIR / util_name
31 |         file_path.mkdir(parents=True, exist_ok=True)
32 |         with open(file_path / self.OUTPUT_FILE, "w") as yaml_file:
33 |             yaml_file.write("montecarlo:\n")
34 |             for batch in batches:
35 |                 monitor_yaml = self.export_yaml_template(batch, export_name)
36 |                 yaml_file.write(textwrap.indent(monitor_yaml["config_template_as_yaml"], prefix="  "))
37 | 
38 |         LOGGER.info(f"exported ui monitors to yaml templates successfully")
39 | 
40 | 
41 | def main(*args, **kwargs):
42 | 
43 |     # Capture Command Line Arguments
44 |     parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
45 |                                              os.path.basename(__file__))
46 | 
47 |     if not args:
48 |         args = parser.parse_args(*args, **kwargs)
49 |     else:
50 |         sdk_helpers.dump_help(parser, main, *args)
51 |         args = parser.parse_args(*args, **kwargs)
52 | 
53 |     @sdk_helpers.ensure_progress
54 |     def run_utility(progress, util, args):
55 |         util.progress_bar = progress
56 |         export_name = False
57 |         if args.export_name == 'y':
58 |             export_name = True
59 |         util.bulk_export_yaml(export_name)
60 | 
61 |     util = BulkExportMonitors(args.profile)
62 |     run_utility(util, args)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     main()
67 | 


--------------------------------------------------------------------------------
/monitors/bulk_set_freshness_sensitivity.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import csv
  4 | import datetime
  5 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  6 | from monitors import *
  7 | from cron_validator import CronValidator
  8 | 
  9 | # Initialize logger
 10 | util_name = __file__.split('/')[-1].split('.')[0]
 11 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 12 | 
 13 | 
 14 | class SetFreshnessSensitivity(Monitors, Tables):
 15 | 
 16 | 	def __init__(self, profile, config_file: str = None, progress: Progress = None):
 17 | 		"""Creates an instance of SetFreshnessSensitivity.
 18 | 
 19 | 		Args:
 20 | 			profile(str): Profile to use stored in montecarlo test.
 21 | 			config_file (str): Path to the Configuration File.
 22 | 			progress(Progress): Progress bar.
 23 | 		"""
 24 | 
 25 | 		super().__init__(profile, config_file, progress)
 26 | 		self.progress_bar = progress
 27 | 		self.rule_operator_type = None
 28 | 
 29 | 	def validate_input_file(self, input_file: str) -> any:
 30 | 		"""Ensure contents of input file satisfy requirements.
 31 | 
 32 | 		Args:
 33 | 			input_file(str): Path to input file.
 34 | 
 35 | 		Returns:
 36 | 			Any: Dictionary with mappings or None.
 37 | 
 38 | 		"""
 39 | 		# TODO
 40 | 		#  should fail if input file does not exist
 41 | 
 42 | 		file_path = Path(input_file)
 43 | 		LOGGER.info(f"starting input file validation...")
 44 | 		if file_path.is_file():
 45 | 			input_tables = None
 46 | 			auto_required_cols = ['full_table_id', 'sensitivity']
 47 | 			explicit_required_cols = ['full_table_id', 'updated_in_last_minutes', 'cron']
 48 | 			try:
 49 | 				with open(file_path, 'r') as file:
 50 | 					reader = csv.DictReader(file)
 51 | 					input_tables = {}
 52 | 					for index, row in enumerate(reader):
 53 | 						col_count = len(row)
 54 | 						if col_count == 3:
 55 | 							self.rule_operator_type = 'EXPLICIT'
 56 | 							for col in explicit_required_cols:
 57 | 								if not row.get(col):
 58 | 									raise ValueError(f"value for '{col}' is missing: line {index + 1}")
 59 | 							try:
 60 | 								int(row["updated_in_last_minutes"])
 61 | 								try:
 62 | 									CronValidator.parse(row["cron"])
 63 | 									input_tables[row["full_table_id"]] = row
 64 | 								except ValueError:
 65 | 									raise ValueError(
 66 | 										f"value under 'cron' is invalid: line {index + 1}")
 67 | 							except ValueError:
 68 | 								raise ValueError(
 69 | 									f"value under 'updated_in_last_minutes' must be an integer: line {index + 1}")
 70 | 						elif col_count == 2:
 71 | 							self.rule_operator_type = 'AUTO'
 72 | 							for col in auto_required_cols:
 73 | 								if not row.get(col):
 74 | 									raise ValueError(f"value for '{col}' is missing: line {index + 1}")
 75 | 							if row["sensitivity"].upper() not in ['LOW', 'MEDIUM', 'HIGH']:
 76 | 								raise ValueError(f"sensitivity must be LOW, MEDIUM or HIGH: line {index + 1}")
 77 | 							input_tables[row["full_table_id"]] = row
 78 | 						else:
 79 | 							raise ValueError(f"{col_count} columns present in CSV, either {explicit_required_cols} OR "
 80 | 							                 f"{auto_required_cols} are required")
 81 | 
 82 | 			except ValueError as e:
 83 | 				LOGGER.error(f"errors found in file: {e}")
 84 | 
 85 | 		return input_tables
 86 | 
 87 | 	def update_freshness_thresholds(self, input_dict: dict, warehouse_id: str):
 88 | 
 89 | 		if input_dict:
 90 | 			LOGGER.info(f"updating freshness rules...")
 91 | 			input_fulltableids = [item['full_table_id'] for item in input_dict.values()]
 92 | 			input_mcons, _ = self.get_mcons_by_fulltableid(warehouse_id, input_fulltableids)
 93 | 			monitor_ids, response = self.get_monitors_by_type(warehouse_id, [const.MonitorTypes.FRESHNESS], True, input_mcons)
 94 | 			for index, full_table_id in enumerate(input_fulltableids):
 95 | 				try:
 96 | 					input_mcons[index]
 97 | 				except IndexError:
 98 | 					LOGGER.warning(f"skipping {full_table_id} - asset not found")
 99 | 					continue
100 | 
101 | 				payload = {
102 | 					"dw_id": warehouse_id,
103 | 					"replaces_ootb": True,
104 | 					"event_rollup_until_changed": True,
105 | 					"timezone": "UTC",
106 | 					"schedule_config": {
107 | 						"schedule_type": "FIXED",
108 | 						"start_time": datetime.datetime.strftime(sdk_helpers.hour_rounder(datetime.datetime.now()),
109 | 						                                        "%Y-%m-%dT%H:%M:%S.%fZ"),
110 | 					},
111 | 					"comparisons": [
112 | 						{
113 | 							"full_table_id": input_mcons[index],
114 | 							"comparison_type": "FRESHNESS",
115 | 						}
116 | 					]
117 | 				}
118 | 
119 | 				if self.rule_operator_type == 'EXPLICIT':
120 | 					payload["schedule_config"]["interval_crontab"] = [input_dict[full_table_id]['cron']]
121 | 					payload["comparisons"][0]["operator"] = 'GT'
122 | 					payload["comparisons"][0]["threshold"] = float(input_dict[full_table_id]['updated_in_last_minutes'])
123 | 				else:
124 | 					payload["schedule_config"]["interval_minutes"] = 60
125 | 					payload["comparisons"][0]["operator"] = 'AUTO'
126 | 					payload["comparisons"][0]["threshold_sensitivity"] = input_dict[full_table_id]['sensitivity'].upper()
127 | 
128 | 				for monitor in response:
129 | 					if monitor.rule_comparisons[0].full_table_id == full_table_id:
130 | 						payload["description"] = monitor.description
131 | 						payload["custom_rule_uuid"] = monitor.uuid
132 | 						break
133 | 
134 | 				if not payload.get("description"):
135 | 					payload["description"] = f"Freshness rule for {full_table_id}"
136 | 
137 | 				mutation = Mutation()
138 | 				mutation.create_or_update_freshness_custom_rule(**payload)
139 | 				try:
140 | 					self.progress_bar.update(self.progress_bar.tasks[0].id, advance=75 / len(input_fulltableids))
141 | 					_ = self.auth.client(mutation).create_or_update_freshness_custom_rule
142 | 					LOGGER.info(f"freshness threshold updated successfully for table {full_table_id}")
143 | 				except Exception as e:
144 | 					LOGGER.error(f"unable to update freshness threshold for table {full_table_id}")
145 | 					LOGGER.debug(e)
146 | 					continue
147 | 
148 | 
149 | def main(*args, **kwargs):
150 | 
151 | 	# Capture Command Line Arguments
152 | 	parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
153 | 	                                         os.path.basename(__file__))
154 | 
155 | 	if not args:
156 | 		args = parser.parse_args(*args, **kwargs)
157 | 	else:
158 | 		sdk_helpers.dump_help(parser, main, *args)
159 | 		args = parser.parse_args(*args, **kwargs)
160 | 
161 | 
162 | 	@sdk_helpers.ensure_progress
163 | 	def run_utility(progress, util, args):
164 | 		util.progress_bar = progress
165 | 		util.update_freshness_thresholds(util.validate_input_file(args.input_file), args.warehouse)
166 | 
167 | 	util = SetFreshnessSensitivity(args.profile)
168 | 	run_utility(util, args)
169 | 
170 | 
171 | if __name__ == '__main__':
172 | 	main()


--------------------------------------------------------------------------------
/monitors/bulk_set_unchanged_size_sensitivity.py:
--------------------------------------------------------------------------------
 1 | #INSTRUCTIONS:
 2 | #1.Create a CSV with 2 columns: [full_table_id, sensitivity (must be upper case with the following values: LOW, MEDIUM, HIGH)]
 3 | #2. Run this script, providing the mcdId, mcdToken, DWId, and CSV
 4 | #Limitation:
 5 | #This will make 1 request per table, so 10,000/day request limit via API is still a consideration
 6 | 
 7 | from pycarlo.core import Client, Query, Mutation, Session
 8 | import csv
 9 | from typing import Optional
10 | 
11 | def getDefaultWarehouse(mcdId,mcdToken):
12 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
13 | 	query=Query()
14 | 	query.get_user().account.warehouses.__fields__("name","connection_type","uuid")
15 | 	warehouses=client(query).get_user.account.warehouses
16 | 	if len(warehouses) == 1:
17 | 		return warehouses[0].uuid
18 | 	elif len(warehouses) > 1:
19 | 		for val in warehouses:
20 | 			print("Name: " + val.name + ", Connection Type: " + val.connection_type + ", UUID: " + val.uuid)
21 | 		print("Error: More than one warehouse, please re-run with UUID value")
22 | 		quit()
23 | 
24 | def get_table_query(dwId,first: Optional[int] = 1000, after: Optional[str] = None) -> Query:
25 |     query = Query()
26 |     get_tables = query.get_tables(first=first, dw_id=dwId, is_deleted=False, **(dict(after=after) if after else {}))
27 |     get_tables.edges.node.__fields__("full_table_id","mcon")
28 |     get_tables.page_info.__fields__(end_cursor=True)
29 |     get_tables.page_info.__fields__("has_next_page")
30 |     return query
31 | 
32 | def getMcons(mcdId,mcdToken,dwId):
33 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
34 | 	table_mcon_dict={}
35 | 	next_token=None
36 | 	while True:
37 | 		response = client(get_table_query(dwId=dwId,after=next_token)).get_tables
38 | 		print(response)
39 | 		for table in response.edges:
40 | 			table_mcon_dict[table.node.full_table_id] = table.node.mcon
41 | 		if response.page_info.has_next_page:
42 | 			next_token = response.page_info.end_cursor
43 | 		else:
44 | 			break
45 | 	return table_mcon_dict
46 | 
47 | def bulkSetFreshnessSensitivity(mcdId,mcdToken,csvFileName,mconDict):
48 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
49 | 	imported_sensitivity_counter=0
50 | 	with open(csvFileName,"r") as sensitivitiesToImport:
51 | 		sensitivities=csv.reader(sensitivitiesToImport,delimiter=",")
52 | 		for row in sensitivities:
53 | 			if row[0] not in mconDict.keys():
54 | 				print("check failed: " +row[0])
55 | 				continue
56 | 			if mconDict[row[0]]:
57 | 				imported_sensitivity_counter+=1
58 | 				print("check succeeded " + row[0])
59 | 				mutation=Mutation()
60 | 				mutation.set_sensitivity(event_type="unchanged_size",mcon=mconDict[row[0]],threshold=dict(level=str(row[1]))).__fields__("success")
61 | 				print(row[0],client(mutation).set_sensitivity,row[1])
62 | 	print("Successfully imported freshness for " + str(imported_sensitivity_counter) + " tables")
63 | 
64 | if __name__ == '__main__':
65 | 	#-------------------INPUT VARIABLES---------------------
66 | 	mcd_id = input("MCD ID: ")
67 | 	mcd_token = input("MCD Token: ")
68 | 	dw_id = input("DW ID: ")
69 | 	csv_file = input("CSV Filename: ")
70 | 	#-------------------------------------------------------
71 | 	if dw_id and csv_file:
72 | 		mcon_dict=getMcons(mcd_id,mcd_token,dw_id)
73 | 		bulkSetFreshnessSensitivity(mcd_id,mcd_token,csv_file,mcon_dict)
74 | 	if csv_file and not dw_id:
75 | 		warehouse_id = getDefaultWarehouse(mcd_id,mcd_token)
76 | 		mcon_dict = getMcons(mcd_id,mcd_token,warehouse_id)
77 | 		bulkSetFreshnessSensitivity(mcd_id,mcd_token,csv_file,mcon_dict)


--------------------------------------------------------------------------------
/monitors/deduplicate_metric_monitors.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  4 | from monitors import *
  5 | 
  6 | # Initialize logger
  7 | util_name = __file__.split('/')[-1].split('.')[0]
  8 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
  9 | LOGGER = logging.getLogger()
 10 | 
 11 | 
 12 | class DeduplicateMetricMonitors(Monitors):
 13 | 
 14 |     def __init__(self, profile, config_file: str = None):
 15 |         """Creates an instance of DeduplicateMonitors.
 16 | 
 17 |         Args:
 18 |             config_file (str): Path to the Configuration File.
 19 |         """
 20 | 
 21 |         super().__init__(profile,  config_file)
 22 |         self.OUTPUT_FILE = None
 23 | 
 24 |     def deduplicate(self, input_file: str, namespace: str):
 25 |         """
 26 | 
 27 |         """
 28 | 
 29 |         self.OUTPUT_DIR = Path(''.join(input_file.split('/')[:-1]))
 30 |         self.OUTPUT_FILE = ''.join(input_file.split('/')[-1])
 31 |         file_path = None
 32 | 
 33 |         if self.OUTPUT_DIR.is_dir():
 34 |             file_path = self.OUTPUT_DIR / self.OUTPUT_FILE
 35 | 
 36 |         if file_path and Path(input_file).is_file():
 37 |             LOGGER.info("backing up input file...")
 38 |             shutil.copyfile(input_file, f"{file_path}.bkp")
 39 |             with open(input_file, 'r') as file:
 40 |                 yaml_dict = yaml.safe_load(file)
 41 |                 metric_monitors = yaml_dict.get("montecarlo").get("field_health")
 42 | 
 43 |             # Initializing compare keys
 44 |             comp_keys = ['table', 'timestamp_field', 'lookback_days', 'aggregation_time_interval', 'connection_name',
 45 |                          'use_important_fields', 'use_partition_clause', 'metric']
 46 | 
 47 |             # Compare each monitor with the rest to find possible duplicates
 48 |             duplicate_indexes = []
 49 |             for i in range(len(metric_monitors) - 1):
 50 |                 for j in range(i + 1, len(metric_monitors)):
 51 |                     if all(metric_monitors[i].get(key) == metric_monitors[j].get(key) for key in comp_keys):
 52 |                         LOGGER.debug(f"possible duplicate monitors in [{i} "
 53 |                                      f"{metric_monitors[i].get('table')}] <=> [{j} - {metric_monitors[j].get('table')}]")
 54 |                         duplicate_indexes.append(i)
 55 | 
 56 |             # Remove duplicates
 57 |             LOGGER.info("removing duplicate metric monitors...")
 58 |             for index in duplicate_indexes:
 59 |                 del metric_monitors[index]
 60 | 
 61 |             # Save as new file
 62 |             with open(file_path, 'w') as outfile:
 63 |                 yaml.safe_dump(yaml_dict, outfile, sort_keys=False)
 64 | 
 65 |             LOGGER.info("validating updated YML configuration...")
 66 |             if not namespace:
 67 |                 cmd = subprocess.run(["montecarlo", "--profile", self.profile, "monitors", "apply", "--project-dir",
 68 |                                       self.OUTPUT_DIR, "--option-file", self.OUTPUT_FILE, "--dry-run"],
 69 |                                      capture_output=True, text=True)
 70 |             else:
 71 |                 cmd = subprocess.run(["montecarlo", "--profile", self.profile, "monitors", "apply", "--project-dir",
 72 |                                       self.OUTPUT_DIR, "--option-file", self.OUTPUT_FILE, "--namespace", namespace,
 73 |                                       "--dry-run"], capture_output=True, text=True)
 74 |             if cmd.returncode != 0:
 75 |                 LogHelper.split_message(cmd.stdout, logging.ERROR)
 76 |                 LOGGER.error("an error occurred")
 77 |                 LogHelper.split_message(cmd.stderr, logging.ERROR)
 78 |                 exit(cmd.returncode)
 79 |             else:
 80 |                 LOGGER.info(f"export completed")
 81 |                 LogHelper.split_message(cmd.stdout)
 82 |         else:
 83 |             LOGGER.error(f"unable to locate input file: {input_file}")
 84 | 
 85 | 
 86 | def main(*args, **kwargs):
 87 | 
 88 |     # Capture Command Line Arguments
 89 |     parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
 90 |                                              os.path.basename(__file__))
 91 | 
 92 |     if not args:
 93 |         args = parser.parse_args(*args, **kwargs)
 94 |     else:
 95 |         sdk_helpers.dump_help(parser, main, *args)
 96 |         args = parser.parse_args(*args, **kwargs)
 97 | 
 98 |     @sdk_helpers.ensure_progress
 99 |     def run_utility(progress, util, args):
100 |         util.progress_bar = progress
101 |         util.deduplicate(args.input, args.namespace)
102 | 
103 |     util = DeduplicateMetricMonitors(args.profile)
104 |     run_utility(util, args)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     main()
109 | 


--------------------------------------------------------------------------------
/monitors/delete_monitors_by_audience.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 4 | from monitors import *
 5 | 
 6 | # Initialize logger
 7 | util_name = __file__.split('/')[-1].split('.')[0]
 8 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 9 | 
10 | 
11 | class DeleteMonitorsByAudience(Monitors):
12 | 	def __init__(self, profile,config_file: str = None, progress: Progress = None):
13 | 		"""Creates an instance of DeleteMonitorsByAudience.
14 | 
15 | 		Args:
16 | 			profile(str): Profile to use stored in montecarlo test.
17 | 			config_file (str): Path to the Configuration File.
18 | 			progress(Progress): Progress bar.
19 | 		"""
20 | 
21 | 		super().__init__(profile, config_file, progress)
22 | 		self.progress_bar = progress
23 | 		self.rule_operator_type = None
24 | 
25 | 	def delete_custom_monitors(self,audiences):
26 | 		_, monitors = self.get_monitors_by_audience(audiences)
27 | 		if len(monitors) == 0:
28 | 			LOGGER.error("No monitors exist for given audience(s)")
29 | 			sys.exit(1)
30 | 		LOGGER.info(monitors)
31 | 		rules = [const.MonitorTypes.VOLUME,const.MonitorTypes.CUSTOM_SQL,const.MonitorTypes.FRESHNESS,
32 | 		         const.MonitorTypes.FIELD_QUALITY, const.MonitorTypes.COMPARISON,const.MonitorTypes.VALIDATION]
33 | 		for monitor in monitors:
34 | 			self.progress_bar.update(self.progress_bar.tasks[0].id, advance=100 / len(monitors))
35 | 			error = False
36 | 			if monitor["monitor_type"] in rules:
37 | 				response = self.auth.client(self.delete_custom_rule(monitor["uuid"])).delete_custom_rule
38 | 				if not response.uuid:
39 | 					error = True
40 | 			else:
41 | 				response = self.auth.client(self.delete_monitor(monitor["uuid"])).delete_monitor
42 | 				if not response.success:
43 | 					error = True
44 | 			if error:
45 | 				LOGGER.info(f"Deletion Not Successful for: {monitor.uuid}")
46 | 			else:
47 | 				LOGGER.info(f"Deletion Successful for: {monitor.uuid}")
48 | 
49 | 
50 | def main(*args, **kwargs):
51 | 
52 | 	# Capture Command Line Arguments
53 | 	parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
54 | 	                                         os.path.basename(__file__))
55 | 
56 | 	if not args:
57 | 		args = parser.parse_args(*args, **kwargs)
58 | 	else:
59 | 		sdk_helpers.dump_help(parser, main, *args)
60 | 		args = parser.parse_args(*args, **kwargs)
61 | 
62 | 	@sdk_helpers.ensure_progress
63 | 	def run_utility(progress, util, args):
64 | 		util.progress_bar = progress
65 | 		audiences = sdk_helpers.parse_input(args.audience, ',')
66 | 		util.delete_custom_monitors(audiences)
67 | 
68 | 	util = DeleteMonitorsByAudience(args.profile)
69 | 	run_utility(util, args)
70 | 
71 | 
72 | if __name__ == '__main__':
73 | 	main()
74 | 


--------------------------------------------------------------------------------
/monitors/delete_monitors_without_tag.py:
--------------------------------------------------------------------------------
 1 | #INSTRUCTIONS:
 2 | #1. Add your api key and token
 3 | #2. Enter the tag key and value for the monitors you'd like to keep
 4 | #3. Run the script
 5 | 
 6 | from pycarlo.core import Client, Query, Mutation, Session
 7 | 
 8 | # Retrieve monitors with designated tag
 9 | def get_pref_monitors(client):
10 |     query = Query()
11 |     #Input tags for monitors you'd like to keep
12 |     tag_key = '{Enter Tag Key Here}'
13 |     tag_value = '{Enter Tag Value Here}'
14 |     filter = [{'name': tag_key,'value': tag_value}]
15 |     query.get_monitors(tags=filter).__fields__('uuid','tags','entities')
16 |     search_results = client(query).get_monitors
17 |     monitors_to_keep = []
18 |     # Adding tagged monitors to a list
19 |     for mon in search_results:
20 |         monitors_to_keep.append(mon.uuid)   
21 |     return monitors_to_keep
22 | 
23 | # Get all custom monitors in MC
24 | def get_all_monitors(client, limit):
25 |     query = Query()
26 |     query.get_monitors(limit=limit).__fields__('uuid', 'monitor_type')
27 |     search_results = client(query).get_monitors
28 |     return search_results
29 | 
30 | # Delete a single monitor
31 | def delete_monitor(client, uuid, type):
32 |     mutation = Mutation()
33 |     # defining monitor types that can only be deleted with delete_monitor call
34 |     monitor_types = ['STATS', 'FIELD_QUALITY', 'CATEGORIES', 'JSON_SCHEMA']
35 |     error = False
36 |     if type in monitor_types:
37 |         mutation.delete_monitor(monitor_id=uuid)
38 |         response = client(mutation).delete_monitor
39 |         if not response.success:
40 |             error = True
41 |     else:
42 |         mutation.delete_custom_rule(uuid=uuid)
43 |         response = client(mutation).delete_custom_rule
44 |         if not response.uuid:
45 |             error = True
46 |     if error:
47 |         print(f"Deletion not Successful for: {uuid}")
48 |     else:
49 |         print(f"Successfully deleted monitor {uuid}")
50 |     
51 | 
52 | # Compiles preferred monitors and deletes every other custom monitor
53 | def bulk_delete_monitors(client):
54 |     pref_monitors = get_pref_monitors(client)
55 |     print(f"Found {len(pref_monitors)} tagged monitors: {pref_monitors}")
56 |     all_monitors = get_all_monitors(client,limit=200)
57 |     count = 0
58 |     for mon in all_monitors:
59 |         if mon.uuid not in pref_monitors:
60 |             mon_id = mon.uuid
61 |             print(mon.uuid)
62 |             mon_type = mon.monitor_type
63 |             delete_monitor(client=client, uuid=mon_id, type=mon_type)
64 |             count+=1
65 |     print(f"Deleted {count} monitors")
66 |     
67 | 
68 | if __name__ == '__main__':
69 |     #-------------------INPUT VARIABLES---------------------
70 |     mcdId = '{Add Key Here}'
71 |     mcdToken = '{Add Token Here}'
72 |     #-------------------------------------------------------
73 |     client= Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
74 |     bulk_delete_monitors(client)
75 |     
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/monitors/enable_monitored_table_volume_queries.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 4 | from monitors import *
 5 | 
 6 | # Initialize logger
 7 | util_name = __file__.split('/')[-1].split('.')[0]
 8 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 9 | LOGGER = logging.getLogger()
10 | 
11 | 
12 | class RowCountMonitoring(Monitors, Tables, Admin):
13 | 
14 | 	def __init__(self, profile, config_file: str = None):
15 | 		"""Creates an instance of RowCountMonitoring.
16 | 
17 | 		Args:
18 | 			operation(str): Enable/Disable operation
19 | 		"""
20 | 
21 | 		super().__init__(profile,  config_file)
22 | 		self.enabled = True
23 | 
24 | 	def enable_monitored_table_volume_queries(self,operation):
25 | 		""" Enables query-based volume monitoring for tables that have monitoring enabled."""
26 | 
27 | 		tables_to_enable = []
28 | 		next_token=None
29 | 		while True:
30 | 			tables = self.auth.client(self.get_tables(is_monitored=True, after=next_token)).get_tables
31 | 			for table in tables.edges:
32 | 				if not table.node.table_capabilities.has_non_metadata_size_collection:
33 | 					tables_to_enable.append(table.node.mcon)
34 | 			if tables.page_info.has_next_page:
35 | 				next_token = tables.page_info.end_cursor
36 | 			else:
37 | 				break
38 | 
39 | 		for table in tables_to_enable:
40 | 			operation = operation
41 | 			op = ""
42 | 			if operation == "enable":
43 | 				op = True
44 | 			if operation == "disable":
45 | 				op = False
46 | 			response = self.auth.client(self.toggle_size_collection(mcon=table,enabled=op)).__fields__("enabled")
47 | 			if response.toggle_size_collection.enabled:
48 | 				LOGGER.info(f"row count {operation} for mcon[{table}]")
49 | 			else:
50 | 				LOGGER.error(f"unable to apply {operation.lower()} action on mcon[{table}]")
51 | 				exit(1)
52 | 
53 | 
54 | def main(*args, **kwargs):
55 | 	
56 | 	# Capture Command Line Arguments
57 | 	parser, subparsers = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__)))
58 | 	                                                     , os.path.basename(__file__))
59 | 
60 | 	if not args:
61 | 		args = parser.parse_args(*args, **kwargs)
62 | 	else:
63 | 		sdk_helpers.dump_help(parser, main, *args)
64 | 		args = parser.parse_args(*args, **kwargs)
65 | 
66 | 	@sdk_helpers.ensure_progress
67 | 	def run_utility(progress, util, args):
68 | 		util.progress_bar = progress
69 | 		if args.operation == 'disable':
70 | 			util.enabled = False
71 | 		else:
72 | 			util.enabled = True
73 | 		util.enable_monitored_table_volume_queries(args.operation)
74 | 
75 | 	util = RowCountMonitoring(args.profile)
76 | 	run_utility(util, args)
77 | 
78 | 
79 | if __name__ == '__main__':
80 | 	main()
81 | 


--------------------------------------------------------------------------------
/monitors/field_health_resource_migration.py:
--------------------------------------------------------------------------------
  1 | from pycarlo.core import Client, Query, Mutation, Session
  2 | import csv
  3 | import datetime
  4 | 
  5 | def monitorConverter(mcdProfile,newResourceId,newTimeAxis,newTimeAxisName,newScheduleType,numMonitorsToConvert,parseLogic):
  6 | 	client = Client(session=Session(mcd_profile=mcdProfile))
  7 | 	query=Query()
  8 | 	query.get_all_user_defined_monitors_v2(first=5000,user_defined_monitor_types=["stats"]).edges.node.__fields__('uuid','resource_id','next_execution_time','monitor_time_axis_field_type','monitor_time_axis_field_name','entities')
  9 | 	count = 1
 10 | 	old_table_list=[]
 11 | 	new_table_list=[]
 12 | 	for val in client(query).get_all_user_defined_monitors_v2.edges:
 13 | 		x=Query()
 14 | 		y=Query()
 15 | 		mutation=Mutation()
 16 | 
 17 | 		if val.node.resource_id == newResourceId:
 18 | 			continue
 19 | 
 20 | 		x.get_monitor(resource_id=val.node.resource_id,uuid=val.node.uuid).__fields__('uuid','type','full_table_id','schedule_config','agg_time_interval','history_days')
 21 | 		response=client(x).get_monitor
 22 | 
 23 | 		time_offset = count*3
 24 | 		first_run_time = datetime.datetime.utcnow() + datetime.timedelta(minutes=time_offset)
 25 | 		external_table_id = parseLogic
 26 | 		old_table_list.append(val.node.entities[0])
 27 | 		new_table_list.append(external_table_id)
 28 | 
 29 | 		y.get_table(dw_id=newResourceId,full_table_id=external_table_id).__fields__('full_table_id')
 30 | 		verified_table=client(y).get_table
 31 | 
 32 | 		mutation.create_or_update_monitor(
 33 | 			resource_id=newResourceId,
 34 | 			monitor_type=response.type.lower(),
 35 | 			time_axis_type=newTimeAxis,
 36 | 			time_axis_name=newTimeAxisName,
 37 | 			agg_time_interval=response.agg_time_interval, 
 38 | 			lookback_days=response.history_days,
 39 | 			full_table_id=external_table_id,
 40 | 			schedule_config={
 41 | 				"schedule_type":newScheduleType,
 42 | 				"interval_minutes":response.schedule_config.interval_minutes,
 43 | 				"start_time": first_run_time
 44 | 				}
 45 | 			).monitor.__fields__('uuid','monitor_type')
 46 | 		mutation_response=client(mutation).create_or_update_monitor.monitor.uuid
 47 | 		print(mutation_response)
 48 | 		with open("completed_monitors.csv","a") as complete_monitors:
 49 | 			writer = csv.writer(complete_monitors)
 50 | 			writer.writerow([response.uuid,mutation_response,val.node.resource_id,val.node.next_execution_time,newTimeAxis,newTimeAxisName,])
 51 | 			complete_monitors.close()
 52 | 		print(count)
 53 | 		print(first_run_time)
 54 | 		if count == numMonitorsToConvert:
 55 | 			break
 56 | 		count += 1
 57 | 
 58 | 	print(new_table_list)
 59 | 	print(old_table_list)
 60 | 	return old_table_list
 61 | 
 62 | def monitorDeleter(mcdProfile,listToDelete):
 63 | 	client = Client(session=Session(mcd_profile=mcdProfile))
 64 | 	count = 1
 65 | 	for table_name in listToDelete:
 66 | 		print(table_name, count)
 67 | 		query=Query()
 68 | 		query.get_monitor(monitor_type="stats",full_table_id=table_name).__fields__('uuid')
 69 | 		value = client(query).get_monitor.uuid
 70 | 		mutation=Mutation()
 71 | 		mutation.stop_monitor(monitor_id=value).__fields__('success')
 72 | 		response=client(mutation).stop_monitor.success
 73 | 		print(response)
 74 | 		count += 1
 75 | 
 76 | 	print("Deletions Complete")
 77 | 
 78 | def existingMonitorCSV(mcdProfile, csvName):
 79 | 	client = Client(session=Session(mcd_profile=mcdProfile))
 80 | 	query=Query()
 81 | 	query.get_all_user_defined_monitors_v2(first=5000,user_defined_monitor_types=["stats"]).edges.node.__fields__('uuid','resource_id','next_execution_time','monitor_time_axis_field_type','monitor_time_axis_field_name','entities')
 82 | 	with open(csvName,'w') as monitor_list:
 83 | 		writer = csv.writer(monitor_list)
 84 | 		writer.writerow(['uuid','full_table_id','resource_id','next_execution_time','monitor_time_axis_field_type','monitor_time_axis_field_name'])
 85 | 		for val in client(query).get_all_user_defined_monitors_v2.edges:
 86 | 			writer.writerow([val.node.uuid,val.node.entities[0],val.node.resource_id,val.node.next_execution_time,val.node.monitor_time_axis_field_type,val.node.monitor_time_axis_field_name])
 87 | 		monitor_list.close()
 88 | 
 89 | if __name__ == '__main__':
 90 | 	##################--VARIABLES--########################
 91 | 	new_resource_id=""
 92 | 	new_time_axis="custom"
 93 | 	new_time_axis_name="" #Enter custom SQL Expression
 94 | 	new_schedule_type="FIXED"
 95 | 	num_monitors_to_convert=15
 96 | 	parse_logic="new_project_name:" + val.node.entities[0].split(":")[1] #Current Table Name is val.node.entities[0]
 97 | 	mcd_profile = "dev_testing"
 98 | 	#######################################################
 99 | 
100 | 	existingMonitorCSV(mcd_profile,"monitor_list_before_deletion.csv")
101 | 	old_list= monitorConverter(mcd_profile,new_resource_id,new_time_axis,new_time_axis_name,new_schedule_type,num_monitors_to_convert,parse_logic)
102 | 	monitorDeleter(mcd_profile,old_list)
103 | 	existingMonitorCSV(mcd_profile,"monitor_list_after_deletion.csv")
104 | 


--------------------------------------------------------------------------------
/monitors/migration_monitors.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | 
  4 | from pathlib import Path
  5 | from pycarlo.core import Client,Session, Query
  6 | 
  7 | OUTPUT_FILE = 'monitors.csv'
  8 | MONITORS_FILE_WORKSPACE = '/dbfs/FileStore/temp/montecarlo'
  9 | 
 10 | def get_monitors(client, source_tag_value, destination_tag_value, tag_key):
 11 | 
 12 | 	# Get list of tables that are tagged with tag_name
 13 | 	print("Getting list of source tables")
 14 | 	source_table_query = Query()
 15 | 	filter = [{'tag_name': tag_key,'tag_values': [source_tag_value]}]
 16 | 	source_tables = {}
 17 | 	
 18 | 	source_table_query.search(query='',tag_filters=filter)
 19 | 	search_results = client(source_table_query).search
 20 | 	for result in search_results.results:
 21 | 		print(result)
 22 | 		source_tables[result.table_id] = {"mcon": result.mcon,
 23 | 									'monitors': [],
 24 | 									'object_id': result.object_id}
 25 | 	if source_tables:
 26 | 		print("Getting the list of source monitors")
 27 | 		for table in source_tables:
 28 | 			source_table_monitors = Query()
 29 | 			source_table_monitors.get_monitors(mcons=[source_tables[table]["mcon"]]).__fields__('uuid','namespace')
 30 | 			response = client(source_table_monitors).get_monitors
 31 | 			for monitor in response:
 32 | 				if monitor.namespace == 'ui':
 33 | 					source_tables[table]["monitors"].append(monitor.uuid)
 34 | 	
 35 | 	print("Getting destination tables")
 36 | 	destination_table_query = Query()
 37 | 	destination_filter = [{'tag_name': tag_key,'tag_values': [destination_tag_value]}]
 38 | 	destination_table_query.search(query='',tag_filters=destination_filter)
 39 | 	search_results = client(destination_table_query).search
 40 | 	for result in search_results.results:
 41 | 		print(result)
 42 | 		if result.table_id in source_tables:
 43 | 			source_tables[result.table_id]['destination'] =  {"mcon": result.mcon,
 44 | 													 'object_id': result.object_id,
 45 | 													 'resource_id': result.resource_id}
 46 | 	return source_tables
 47 | 
 48 | def write_csv_file(source_tables):
 49 | 	print("Writing CSV file")
 50 | 	monitors_to_write = []
 51 | 	monitors_file_name = ''
 52 | 	if len(source_tables) > 0:
 53 | 		for table in source_tables:
 54 | 			for monitor in source_tables[table]['monitors']:
 55 | 				if monitor not in monitors_to_write:
 56 | 					monitors_to_write.append(monitor)
 57 | 	if monitors_to_write:
 58 | 		print("Found monitors to write")
 59 | 		file_path = Path(os.path.abspath(MONITORS_FILE_WORKSPACE))
 60 | 		file_path.mkdir(parents=True, exist_ok=True)
 61 | 		monitors_file_name = file_path / OUTPUT_FILE
 62 | 		with open(monitors_file_name, 'w') as csvfile:
 63 | 			for monitor_id in monitors_to_write:
 64 | 				csvfile.write(f"{monitor_id}\n")
 65 | 	return monitors_file_name
 66 | 
 67 | def export_monitors(monitors_file_path, namespace, warehouse_id):
 68 | 	print("Exporting monitors")
 69 | 	mc_monitors_path = MONITORS_FILE_WORKSPACE + "/test"
 70 | 	cmd_args = ["montecarlo", "monitors", "convert-to-mac",
 71 | 			 "--namespace", namespace, "--project-dir", mc_monitors_path,
 72 | 			 "--monitors-file", monitors_file_path, "--dry-run"]
 73 | 	cmd = subprocess.run(cmd_args,
 74 |                                  capture_output=True, text=True)
 75 | 	print(cmd.stderr)
 76 | 	print(cmd.stdout)
 77 | 	print("Adding default_resource")
 78 | 	with open(mc_monitors_path + '/montecarlo.yml', 'r') as montecarlo_yml:
 79 | 		file_data = montecarlo_yml.read()
 80 | 		file_data = file_data + 'default_resource: %s' % warehouse_id
 81 | 	with open(mc_monitors_path + '/montecarlo.yml', 'w') as new_montecarlo_yml:
 82 | 		new_montecarlo_yml.write(file_data)
 83 | 	montecarlo_yml.close()
 84 | 	new_montecarlo_yml.close()
 85 | 	print("Wrote the file")
 86 | 	return mc_monitors_path
 87 | 
 88 | def modify_monitors_file_ids(monitor_path, source_tables, source_warehouse_uuid, destination_warehouse_uuid):
 89 | 	print("Modifying the monitors file")
 90 | 	monitors_file_yml = monitor_path + '/montecarlo/monitors.yml'
 91 | 
 92 | 	destination_warehouse_query = Query()
 93 | 	destination_warehouse_query.get_warehouse(uuid=destination_warehouse_uuid).__fields__('name')
 94 | 	destination_warehouse = client(destination_warehouse_query).get_warehouse
 95 | 
 96 | 	source_warehouse_query = Query()
 97 | 	source_warehouse_query.get_warehouse(uuid=source_warehouse_uuid).__fields__('name')
 98 | 	source_warehouse = client(source_warehouse_query).get_warehouse
 99 | 	with open(monitors_file_yml, 'r') as monitors_yml:
100 | 		file_data = monitors_yml.read()
101 | 		file_data = file_data.replace(source_warehouse.name, destination_warehouse.name)
102 | 		for table in source_tables:
103 | 			if "destination" in source_tables[table]:
104 | 				file_data = file_data.replace(source_tables[table]['object_id'], source_tables[table]['destination']['object_id'])
105 | 	with open(monitors_file_yml, 'w') as new_monitors_yml:
106 | 		new_monitors_yml.write(file_data)
107 | 	monitors_yml.close()
108 | 	new_monitors_yml.close()
109 | 	print("Completed modifying the monitors file")
110 | 	
111 | def move_monitors(namespace, monitors_workspace_dir):
112 | 	print("Moving the monitors")
113 | 	cmd_args = ["montecarlo", "monitors", "apply", "--namespace", namespace, "--project-dir", monitors_workspace_dir, "--auto-yes"]
114 | 	cmd = subprocess.run(cmd_args, capture_output=True, text=True)
115 | 	print(cmd.stdout)
116 | 	print(cmd.stderr)
117 | 	print("Movement complete")
118 | 
119 | def clean_up_files():
120 | 	print("Cleaning up files")
121 | 	cmd_args = ['rm', '-rf', MONITORS_FILE_WORKSPACE]
122 | 	cmd = subprocess.run(cmd_args, capture_output=True, text=True)
123 | 
124 | if __name__ == '__main__':
125 | 	#-------------------INPUT VARIABLES---------------------
126 | 	mcdId = '{Add Key here}'
127 | 	mcdToken = '{Add Token Here}'
128 | 	tag_key = '{Add tag key here}'
129 | 	source_tag_value = '{Add source tage here}'
130 | 	destination_tag_value = '{Add destination tag here}'
131 | 	# UUID for source Warehouse found via API
132 | 	source_warehouse_uuid = '{Source Warehouse UUID}'
133 | 	#UUID for destination Warehouse found via API
134 | 	destination_warehouse_uuid = '{Destination Warehouse UUID}'
135 | 	#-------------------------------------------------------
136 | 	print("Preparing to move monitors from '%s' to '%s'" %(source_tag_value, destination_tag_value))
137 | 	# Environment setup
138 | 	os.environ['MCD_DEFAULT_API_ID'] = mcdId
139 | 	os.environ['MCD_DEFAULT_API_TOKEN'] = mcdToken
140 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
141 | 	namespace = destination_tag_value.replace(' ', '_')
142 | 	
143 |     #Get monitors to move
144 | 	source_tables = get_monitors(client=client, source_tag_value=source_tag_value, destination_tag_value=destination_tag_value, tag_key=tag_key)
145 | 	if source_tables:
146 | 		# Write UUIDs to csv file
147 | 		csv_file_name = write_csv_file(source_tables=source_tables)
148 | 		# Export using the csv file from above
149 | 		monitors_path = export_monitors(monitors_file_path=csv_file_name, namespace=namespace, warehouse_id=destination_warehouse_uuid)
150 | 		# Modify exported files contents to new paths
151 | 		modify_monitors_file_ids(monitors_path, source_tables, source_warehouse_uuid=source_warehouse_uuid, destination_warehouse_uuid=destination_warehouse_uuid)
152 | 		# Re import the file
153 | 		move_monitors(namespace=namespace, monitors_workspace_dir=monitors_path)
154 | 	else:
155 | 		print("No monitors found to migrate")
156 | 	#Clean up files on system
157 | 	clean_up_files()
158 | 


--------------------------------------------------------------------------------
/monitors/monitors_stats.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  4 | from monitors import *
  5 | import csv
  6 | 
  7 | # Initialize LOGGER
  8 | util_name = __file__.split('/')[-1].split('.')[0]
  9 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 10 | 
 11 | 
 12 | class GetMonitorStats(Monitors):
 13 | 
 14 |     def __init__(self, profile, config_file: str = None, progress: Progress = None):
 15 |         """Creates an instance of BulkExportMonitors.
 16 | 
 17 |         Args:
 18 |             config_file (str): Path to the Configuration File.
 19 |             progress(Progress): Progress bar.
 20 |         """
 21 | 
 22 |         super().__init__(profile,  config_file)
 23 |         self.OUTPUT_FILE = "monitors_stats.csv"
 24 |         self.progress_bar = progress
 25 | 
 26 |     def generate_stats_file(self, warehouses):
 27 | 
 28 |         cursor = None
 29 |         monitors = {}
 30 |         LOGGER.info(f"- Retrieving monitors...")
 31 |         LOGGER.debug("Retrieving monitors using [GetCustomRules]")
 32 |         for warehouse in warehouses:
 33 |             while True:
 34 |                 response = self.auth.client(self.get_custom_rules(warehouse_id=warehouse, after=cursor)).get_custom_rules
 35 |                 if len(response.edges) > 0:
 36 |                     for edge in response.edges:
 37 |                         node = edge.node
 38 |                         if not node.is_deleted and not monitors.get(node.uuid):
 39 |                             LOGGER.debug(f"{node.uuid} added to list")
 40 |                             monitors[node.uuid] = [warehouse, node.uuid, node.rule_type, node.rule_name,
 41 |                                                    node.description,
 42 |                                                    node.prev_execution_time, node.next_execution_time, "UNAVAILABLE",
 43 |                                                    f"https://getmontecarlo.com/monitors/{node.uuid}"]
 44 |                 if response.page_info.has_next_page:
 45 |                     cursor = response.page_info.end_cursor
 46 |                 else:
 47 |                     break
 48 | 
 49 |         LOGGER.debug("Retrieving monitors using [GetMonitors]")
 50 |         _, raw = self.auth.client(self.get_monitors_by_entities()).get_monitors
 51 |         if len(raw) > 0:
 52 |             for monitor in raw:
 53 |                 if monitor.monitor_status != "PAUSED":
 54 |                     if len(warehouses) == 1 and monitor.resource_id != warehouses[0]:
 55 |                         continue
 56 | 
 57 |                     if not monitors.get(monitor.uuid):
 58 |                         LOGGER.debug(f"{monitor.uuid} added to list")
 59 |                         monitors[monitor.uuid] = [monitor.resource_id, monitor.uuid, monitor.monitor_type,
 60 |                                                   monitor.name, monitor.description, monitor.prev_execution_time,
 61 |                                                   monitor.next_execution_time, monitor.monitor_run_status,
 62 |                                                   f"https://getmontecarlo.com/alerts/{monitor.uuid}"]
 63 | 
 64 |         if len(monitors) > 0:
 65 |             LOGGER.info(f"- Retrieving last run status and incidents...")
 66 |             for monitor in monitors:
 67 |                 res = self.auth.client(self.get_job_execution_history_logs).get_job_execution_history_logs
 68 |                 if len(res) > 0:
 69 |                     LOGGER.debug(f"Updating last run status for {monitor}")
 70 |                     monitors[monitor].pop()
 71 |                     monitors[monitor].append(res[0].status)
 72 |                 res = self.auth.client(self.get_latest_incident(monitor)).get_incidents
 73 |                 if len(res.edges) > 0:
 74 |                     edge = res.edges[0]
 75 |                     monitors[monitor].append(f"https://getmontecarlo.com/alerts/{edge.node.uuid}")
 76 |                     monitors[monitor].append(edge.node.incident_time)
 77 | 
 78 |             LOGGER.info(f"- {len(monitors)} monitors found")
 79 |             # Write stats to CSV
 80 |             file_path = Path(os.path.abspath(__file__)).parent
 81 |             file_path.mkdir(parents=True, exist_ok=True)
 82 |             filename = file_path / self.OUTPUT_FILE
 83 |             fields = ['Warehouse UUID', 'Monitor UUID', 'Type', 'Name', 'Description', 'Previous Run', 'Next Run',
 84 |                       'Run Status', 'Monitor URL', 'Last Incident URL', 'Last Incident Time']
 85 |             with open(filename, 'w') as csvfile:
 86 |                 csvwriter = csv.writer(csvfile)
 87 |                 csvwriter.writerow(fields)
 88 |                 csvwriter.writerows(list(monitors.values()))
 89 |             LOGGER.info(f"- monitor stats generated\n")
 90 |     
 91 | 
 92 | def main(*args, **kwargs):
 93 |     # Capture Command Line Arguments
 94 |     parser = argparse.ArgumentParser(description='\n\tMonitor Stats Utility')
 95 |     parser._optionals.title = "Options"
 96 |     parser._positionals.title = "Commands"
 97 |     parser.add_argument('--profile', '-p', required=False, default="default",
 98 |                         help='Specify an MCD profile name. Uses default otherwise')
 99 |     parser.add_argument('--warehouse', '-w', required=False,
100 |                         help='Warehouse ID')
101 | 
102 |     # Capture Command Line Arguments
103 |     parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
104 |                                              os.path.basename(__file__))
105 | 
106 |     if not args:
107 |         args = parser.parse_args(*args, **kwargs)
108 |     else:
109 |         sdk_helpers.dump_help(parser, main, *args)
110 |         args = parser.parse_args(*args, **kwargs)
111 | 
112 |     @sdk_helpers.ensure_progress
113 |     def run_utility(progress, util, args):
114 |         util.progress_bar = progress
115 |         if args.warehouse:
116 |             warehouses = list(args.warehouse.split(" "))
117 |         else:
118 |             warehouses, _ = util.get_warehouses
119 |         util.generate_stats_file(warehouses)
120 | 
121 |     util = GetMonitorStats(args.profile)
122 |     run_utility(util, args)
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     main()
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/monitors/requirements.txt:
--------------------------------------------------------------------------------
1 | pycarlo==0.8.12
2 | PyYAML==6.0.1
3 | 


--------------------------------------------------------------------------------
/monitors/run_monitors_by_tag.py:
--------------------------------------------------------------------------------
 1 | from pycarlo.core import Client, Mutation, Session, Query
 2 | import time
 3 | 
 4 | def run_monitors(client, tag_value, tag_key, monitor_group):
 5 | 	# Get list of tables that are tagged with tag_name
 6 | 	print("getting list of monitors")
 7 | 	query = Query()
 8 | 	complete_breakers = []
 9 | 
10 | 	description_prefix = '%s | %s' % (tag_value, monitor_group)
11 | 	mcons = []
12 | 	monitors_to_run = []
13 | 	filter = [{'tag_name': tag_key,'tag_values': [tag_value]}]
14 | 	mcons = []
15 | 	query.search(query='',tag_filters=filter)
16 | 	search_results = client(query).search
17 | 	for result in search_results.results:
18 | 		print(result)
19 | 		mcons.append(result.mcon)
20 | 	if mcons:
21 | 		print('Getting Monitors')
22 | 		for mcon in mcons:
23 | 			query = Query()
24 | 			query.get_monitors(mcons=[mcon],monitor_types=['CUSTOM_SQL']).__fields__('uuid','description')
25 | 			response = client(query).get_monitors
26 | 			for monitor in response:
27 | 				if monitor.uuid not in monitors_to_run and monitor.description.startswith(description_prefix):
28 | 					monitors_to_run.append(monitor.uuid)
29 | 	breakers_triggered = []
30 | 	for monitor in monitors_to_run:
31 | 		print(monitor)
32 | 		breakers_triggered.append(trigger_circuit_breaker(client=client, uuid=monitor))
33 | 	if breakers_triggered:
34 | 		breaker_status = {}
35 | 		status_tries = 5
36 | 		try_count = 0
37 | 		while (len(complete_breakers) < len(breakers_triggered)) and try_count<status_tries:
38 | 			try_count = try_count + 1
39 | 			print("Getting Status: %s" % try_count)
40 | 			time.sleep(15)
41 | 			for breaker in breakers_triggered:
42 | 				status = resolve_status(client, breaker)
43 | 				breaker_status[breaker[0]] = status
44 | 			print('Statuses: %s' % breaker_status)
45 | 			for breaker in breaker_status:
46 | 				if (breaker_status[breaker] == 'PROCESSING_COMPLETE' or breaker_status[breaker] == 'HAS_ERROR') and breaker not in complete_breakers:
47 | 					print('Breaker complete: %s' % breaker)
48 | 					complete_breakers.append(breaker)
49 | 			if try_count == status_tries:
50 | 				for breaker in breaker_status:
51 | 					if breaker not in complete_breakers:
52 | 						print('Breaker complete: %s' % breaker)
53 | 						complete_breakers.append(breaker)
54 | 
55 | 		return complete_breakers
56 | 
57 | 
58 | def trigger_circuit_breaker(client,uuid):
59 | 	print("Starting breaker")
60 | 	mutation = Mutation()
61 | 	mutation.trigger_circuit_breaker_rule_v2(rule_uuid=uuid).__fields__('job_execution_uuids')
62 | 	execution_uuids = client(mutation).trigger_circuit_breaker_rule_v2.job_execution_uuids
63 | 	for uuid in execution_uuids:
64 | 		print("Found this execution: %s" % uuid)
65 | 	return execution_uuids
66 | 		
67 | def resolve_status(client, uuids):
68 | 	print("Getting status")
69 | 	query = Query()
70 | 	query.get_circuit_breaker_rule_state_v2(job_execution_uuids=uuids)
71 | 	circuit_breaker_states = client(query).get_circuit_breaker_rule_state_v2
72 | 	status = circuit_breaker_states[0].status
73 | 	print('Status: %s, for: %s' % (status, uuids))
74 | 	return status
75 | 
76 | if __name__ == '__main__':
77 | 	# This will run all the SQL monitors associated with the tables that are tagged with the {tag_value} supplied.
78 | 	#-------------------INPUT VARIABLES---------------------
79 | 	mcdId = '{Insert Key here}'
80 | 	mcdToken = '{Insert Token here}'
81 | 	tag_key = '{Tag key name here}'
82 | 	tag_value = '{Tag value here}'
83 | 	# Monitor group is used to match at the monitor description level. The format in the description is: {tag_value} | {monitor_group} 
84 | 	monitor_group = '{Monitor group to run}'
85 | 	#-------------------------------------------------------
86 | 	print("Running monitors associated with tag: %s" % tag_value)
87 | 	client=Client(session=Session(mcd_id=mcdId,mcd_token=mcdToken))
88 | 	results = run_monitors(client,tag_value=tag_value, tag_key=tag_key, monitor_group=monitor_group)
89 | 	print("Final results: %s" % results)
90 | 


--------------------------------------------------------------------------------
/notifications/README.md:
--------------------------------------------------------------------------------
 1 | # Instructions for Implementing Webhooks Notifications
 2 | 
 3 | The scripts in this folder contain examples on how to run webhooks in AWS with the use of Lambda and an API Gateway.
 4 | 
 5 | ## Steps
 6 | 
 7 | 1. Log into your AWS environment, navigating to the Lambda section.
 8 | 2. Create a new lambda, pasting the lambda found here and deploying.
 9 | 3. Create an API Gateway for Monte Carlo to communicate directly with the lambda.
10 | 4. Create a Function URL, used later in step 10.
11 | 5. Within the 3rd party notification app, follow any instructions to create a webhook endpoint.
12 | 6. Copy this webhook url for step 8.
13 | 7. Within AWS while viewing the lambda, navigate to Configuration -> Environment Variables.
14 | 8. Create any variables that are referenced in the lambda function. See each service section for more details.
15 | 9. Create a notification within Monte Carlo, selecting Webhook as the channel.
16 | 10. Once you have completed the routing logic, pass the Function URL from step 4 as the Webhook URL, and the SHARED_SIGNING_SECRET from step 8 as the Secret.
17 | 
18 | ## Services
19 | ### Google Chat
20 | 
21 | #### Overview
22 | 
23 | `google_chat_lambda.py` code for a Lambda function that can receive webhooks from Monte Carlo and post messages to a Google Chat.
24 | 
25 | #### Environment Variables
26 | 
27 | In the **Environment variables** panel, add the following variables:
28 | * `GOOGLE_ENDPOINT`: The Google Chat API endpoint.
29 | * `SHARED_SIGNING_SECRET`: The shared secret that you configured in the Monte Carlo webhook configuration.
30 | 
31 | ### Webex Teams
32 | 
33 | #### Overview
34 | 
35 | `webex_lambda.py` code for a Lambda function that can receive webhooks from Monte Carlo and post messages to a Webex Teams room.
36 | 
37 | For more information about Webex webhooks, see the [Webex Teams Webhooks Guide](https://developer.webex.com/docs/api/guides/webhooks).
38 | 
39 | For more information about webhooks from Monte Carlo, see the [Monte Carlo Webhooks Guide](https://docs.getmontecarlo.com/docs/webhooks).
40 | 
41 | #### Webex Prerequisites
42 | 
43 | 1. Create a Webex Bot and obtain an access token. For more information, see the [Webex Teams Bot Guide](https://developer.webex.com/docs/bots).
44 | 2. Create a Webex Teams room where you want to post messages. For more information, see the [Webex Teams Rooms Guide](https://developer.webex.com/docs/api/v1/rooms).
45 | 3. Invite the Webex Bot to the room where you want to post messages.
46 | 4. Get the room ID for the room where you want to post messages.This is available in the Webex Teams room URL. For example, if the room link is `webexteams://im?space=5ee4bc50-6a0a-11ee-8c80-19de48a0d1a2`, the room ID is `5ee4bc50-6a0a-11ee-8c80-19de48a0d1a2`.
47 | 
48 | #### Environment Variables
49 | 
50 | In the **Environment variables** panel, add the following variables:
51 | * `WEBEX_ACCESS_TOKEN`: The access token for the Webex Teams bot.
52 | * `WEBEX_ROOM_ID`: The room ID for the Webex Teams room where you want to post messages.
53 | * `WEBEX_ENDPOINT`: The Webex Teams API endpoint. This is always `https://webexapis.com/v1/messages`.
54 | * `SHARED_SIGNING_SECRET`: The shared secret that you configured in the Monte Carlo webhook configuration.
55 | 
56 | ### ServiceNow
57 | 
58 | `service_now_lambda.py` **[DEPRECATED]** 
59 | 
60 | Refer to the built-in 
61 | [ServiceNow Integration](https://docs.getmontecarlo.com/docs/servicenow)


--------------------------------------------------------------------------------
/notifications/google_chat_lambda.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example MCD Webhook to Google Chat Lambda
 3 | """
 4 | 
 5 | import hashlib
 6 | import hmac
 7 | import json
 8 | import urllib.parse
 9 | import urllib3
10 | from typing import Dict
11 | import os
12 | 
13 | SHARED_SIGNING_SECRET = str.encode(os.environ.get('SHARED_SIGNING_SECRET'))  # This should be an environment variable
14 | 
15 | def lambda_handler(event: Dict, context: Dict) -> Dict:
16 |     mcd_signature = event['headers'].get('x-mcd-signature')
17 |     body = json.loads(event['body']) if isinstance(event['body'], str) else event['body']
18 |     
19 |     if verify_signature(mcd_signature=mcd_signature, body=body):
20 |         print('Signature Verified!')
21 |         google_webhook(body)
22 |         return {'statusCode': 200}
23 |     return {'statusCode': 403}
24 | 
25 | 
26 | def verify_signature(mcd_signature: str, body: Dict) -> bool:
27 |     body_as_byes = urllib.parse.urlencode(body).encode('utf8')
28 |     computed_signature = hmac.new(SHARED_SIGNING_SECRET, body_as_byes, hashlib.sha512).hexdigest()
29 |     return hmac.compare_digest(computed_signature, mcd_signature)
30 |     
31 | def google_webhook(body):
32 |     """Hangouts Chat incoming webhook quickstart."""
33 |     url = os.environ.get('google_endpoint')
34 |     type = body['type']
35 |     http = urllib3.PoolManager()
36 |     message_text = '''
37 |     New Incident
38 |     Type: {}
39 |     URL: {}
40 |     '''.format(type,body['payload']['url'])
41 |     message_headers = {'Content-Type': 'application/json; charset=UTF-8'}
42 |     bot_message = {
43 |         'text': message_text
44 |     }
45 |     encoded_message = json.dumps(bot_message)
46 |     response = http.request("POST",url,headers = message_headers, body = encoded_message)
47 |     print(response.data.decode('utf-8'))
48 | 


--------------------------------------------------------------------------------
/notifications/service_now_lambda.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import urllib3
 3 | import urllib.parse
 4 | import hmac
 5 | import hashlib
 6 | import os
 7 | from typing import Dict
 8 | import boto3  
 9 | from datetime import date
10 | 
11 | 
12 | # Parameters - These are set in the Lambda Environment Variables
13 | instance = os.environ.get('instance')
14 | serviceNowUser = os.environ.get('serviceNowUser')
15 | serviceNowPassword = os.environ.get('serviceNowPassword')
16 | SHARED_SIGNING_SECRET = str.encode(os.environ.get('SHARED_SIGNING_SECRET'))  
17 | 
18 | 
19 | def create_incident(url, table, incident_type, incident_id) :
20 |     http = urllib3.PoolManager()
21 |     endpoint = "https://"+instance +".service-now.com/api/now/table/incident"
22 |     payload = json.dumps({
23 |       "short_description": incident_type+ " " + table,
24 |       "urgency": "3",
25 |       "work_notes": "[code]<a href= "+url+" target='_blank'>Monte Carlo Incident</a[/code]>",
26 |       "correlation_id" : "MC-"+incident_id 
27 |     })
28 | 
29 |     authHeaders = urllib3.make_headers(basic_auth='{}:{}'.format(serviceNowUser, serviceNowPassword))
30 | 
31 |     basicHeaders = {
32 |       'Content-Type': 'application/json',
33 |       'Accept': 'application/json'}
34 |     headers = {**authHeaders, **basicHeaders}  # Merge the two header dictionaries    
35 |     
36 |     response = http.request('POST', endpoint, headers=headers, body=payload)
37 |   
38 |     return( response.data.decode('utf-8'))
39 | 
40 | def lambda_handler(event, context):   
41 |     
42 |     mcd_signature = event['headers'].get('x-mcd-signature')
43 |     body = json.loads(event['body']) if isinstance(event['body'], str) else event['body']
44 | 
45 |     
46 |     if verify_signature(mcd_signature=mcd_signature, body=body):
47 |         print('Signature Verified!')
48 |         
49 |         #TODO null check vars below
50 |         #Add description (Monitor Name) // Important for SQL RULE / Custom Monitors generally
51 |         # Group ID is schema, would be good to add
52 |         # Event details when available 
53 |         # Look into number of tables effected (value?)
54 |         # Language like 50 affected tables in this schema
55 |         incident_url = body['payload'].get('url')
56 |         incident_type = body['type']
57 |         table_name = body['payload']["event_list"][0]["table_name"]
58 |         incident_id = body['payload'].get('incident_id')
59 | 
60 |         r = create_incident(incident_url, table_name, incident_type, incident_id)
61 |         return {'statusCode': 200}
62 |     return {'statusCode': 403}
63 |     
64 | def verify_signature(mcd_signature: str, body: Dict) -> bool:
65 |     body_as_byes = urllib.parse.urlencode(body).encode('utf8')
66 |     computed_signature = hmac.new(SHARED_SIGNING_SECRET, body_as_byes, hashlib.sha512).hexdigest()
67 |     print("Computed Signature: " + computed_signature)
68 |     return hmac.compare_digest(computed_signature, mcd_signature)
69 | 
70 | ## This is only a debugging method for logging requests to S3
71 | def logToS3(log):
72 |     encoded_string = log.encode("utf-8")
73 |     bucket_name = "s3bucket"
74 |     file_name = "request"+ date.today() + ".txt"
75 |     s3_path = file_name
76 |     s3 = boto3.resource("s3")
77 |     s3.Bucket(bucket_name).put_object(Key=s3_path, Body=encoded_string)


--------------------------------------------------------------------------------
/notifications/webex_lambda.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import hmac
 3 | import json
 4 | import urllib.parse
 5 | import urllib3
 6 | from typing import Dict
 7 | import os
 8 | 
 9 | SHARED_SIGNING_SECRET = os.environ.get('SHARED_SIGNING_SECRET') # any secret used to validate MC incoming webhooks
10 | WEBEX_ENDPOINT = os.environ.get('WEBEX_ENDPOINT') # always 'https://webexapis.com/v1/messages'
11 | WEBEX_ROOM_ID = os.environ.get('WEBEX_ROOM_ID') # room id of the channel where MC will deliver messages. get this from webex
12 | WEBEX_TOKEN = os.environ.get('WEBEX_TOKEN') # create a new bot here to retrieve token: https://developer.webex.com/my-apps/new
13 | 
14 | def lambda_handler(event: Dict, context: Dict) -> Dict:
15 |     mcd_signature = event['headers'].get('x-mcd-signature')
16 |     body = json.loads(event['body'])
17 |     
18 |     if verify_signature(mcd_signature=mcd_signature, body=body):
19 |         print('Success!')
20 |         webex_webhook(body)
21 |         return {'statusCode': 200}
22 |     return {'statusCode': 403}  
23 |     
24 | def verify_signature(mcd_signature: str, body: Dict) -> bool:
25 |     body_as_bytes = urllib.parse.urlencode(body).encode('utf8')
26 |     computed_signature = hmac.new(SHARED_SIGNING_SECRET.encode(), body_as_bytes, hashlib.sha512).hexdigest()
27 |     return hmac.compare_digest(computed_signature, mcd_signature)
28 | 
29 | def webex_webhook(body):
30 |     """Hangouts Chat incoming webhook quickstart."""
31 |     url = os.environ.get('WEBEX_ENDPOINT')
32 |     type = body['type']
33 |     http = urllib3.PoolManager()
34 |     message_text = '''
35 |     New Incident
36 |     Type: {}
37 |     URL: {}
38 |     '''.format(type,body['payload']['url'])
39 |     message_headers = {'Authorization': f'Bearer {WEBEX_TOKEN}',
40 |     'Content-Type': 'application/json'}
41 |     bot_message = {
42 |         'text': message_text,
43 |         'roomId': f'{WEBEX_ROOM_ID}'
44 |     }
45 |     encoded_message = json.dumps(bot_message)
46 |     response = http.request("POST",url,headers = message_headers, body = encoded_message)
47 |     print(response.data.decode('utf-8'))
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohappyeyeballs==2.4.4
  2 | aiohttp==3.10.11
  3 | aiosignal==1.3.2
  4 | airflow_mcd==0.3.4
  5 | alembic==1.14.0
  6 | anyio==4.7.0
  7 | apache-airflow==2.10.4
  8 | apache-airflow-providers-common-compat==1.2.2
  9 | apache-airflow-providers-common-io==1.4.2
 10 | apache-airflow-providers-common-sql==1.20.0
 11 | apache-airflow-providers-fab==1.5.1
 12 | apache-airflow-providers-ftp==3.11.1
 13 | apache-airflow-providers-http==4.13.3
 14 | apache-airflow-providers-imap==3.7.0
 15 | apache-airflow-providers-smtp==1.8.1
 16 | apache-airflow-providers-sqlite==3.9.1
 17 | apispec==6.8.0
 18 | argcomplete==3.4.0
 19 | arghandler==1.3.1
 20 | asgiref==3.8.1
 21 | attrs==24.3.0
 22 | babel==2.16.0
 23 | backoff==2.2.1
 24 | black==24.4.2
 25 | blinker==1.9.0
 26 | boto3==1.34.143
 27 | botocore==1.34.143
 28 | cachelib==0.9.0
 29 | certifi==2024.12.14
 30 | cffi==1.16.0
 31 | chardet==5.2.0
 32 | charset-normalizer==3.3.2
 33 | click==8.1.7
 34 | click-config-file==0.6.0
 35 | click-help-colors==0.9.4
 36 | clickclick==20.10.2
 37 | colorama==0.4.6
 38 | colorlog==6.9.0
 39 | configobj==5.0.9
 40 | ConfigUpdater==3.2
 41 | connexion==2.14.2
 42 | cron-descriptor==1.4.5
 43 | cron-validator==1.0.8
 44 | croniter==5.0.1
 45 | cronsim==2.5
 46 | cryptography==44.0.0
 47 | dataclasses-json==0.6.6
 48 | decorator==5.1.1
 49 | Deprecated==1.2.15
 50 | dill==0.3.9
 51 | dnspython==2.7.0
 52 | email_validator==2.2.0
 53 | envs==1.4
 54 | et-xmlfile==1.1.0
 55 | Flask==2.2.5
 56 | Flask-AppBuilder==4.5.2
 57 | Flask-Babel==2.0.0
 58 | Flask-Caching==2.3.0
 59 | Flask-JWT-Extended==4.7.1
 60 | Flask-Limiter==3.9.2
 61 | Flask-Login==0.6.3
 62 | Flask-Session==0.5.0
 63 | Flask-SQLAlchemy==2.5.1
 64 | Flask-WTF==1.2.2
 65 | frozenlist==1.5.0
 66 | fsspec==2024.10.0
 67 | gcloud-aio-auth==5.3.2
 68 | google-re2==1.1.20240702
 69 | googleapis-common-protos==1.66.0
 70 | graphql-core==3.2.3
 71 | grpcio==1.68.1
 72 | gunicorn==23.0.0
 73 | h11==0.14.0
 74 | httpcore==1.0.7
 75 | httpx==0.28.1
 76 | humanfriendly==10.0
 77 | idna==3.7
 78 | importlib_metadata==8.5.0
 79 | inflection==0.5.1
 80 | itsdangerous==2.2.0
 81 | Jinja2==3.1.4
 82 | jmespath==1.0.1
 83 | jsonschema==4.23.0
 84 | jsonschema-specifications==2024.10.1
 85 | lazy-object-proxy==1.10.0
 86 | limits==3.14.1
 87 | linkify-it-py==2.0.3
 88 | lockfile==0.12.2
 89 | Mako==1.3.8
 90 | markdown-it-py==3.0.0
 91 | MarkupSafe==2.1.5
 92 | marshmallow==3.21.2
 93 | marshmallow-oneofschema==3.1.1
 94 | marshmallow-sqlalchemy==0.28.2
 95 | mdit-py-plugins==0.4.2
 96 | mdurl==0.1.2
 97 | methodtools==0.4.7
 98 | montecarlodata==0.100.2
 99 | more-itertools==10.5.0
100 | multidict==6.1.0
101 | mypy-extensions==1.0.0
102 | numpy==2.0.0
103 | openpyxl==3.1.5
104 | opentelemetry-api==1.29.0
105 | opentelemetry-exporter-otlp==1.29.0
106 | opentelemetry-exporter-otlp-proto-common==1.29.0
107 | opentelemetry-exporter-otlp-proto-grpc==1.29.0
108 | opentelemetry-exporter-otlp-proto-http==1.29.0
109 | opentelemetry-proto==1.29.0
110 | opentelemetry-sdk==1.29.0
111 | opentelemetry-semantic-conventions==0.50b0
112 | ordered-set==4.1.0
113 | packaging==24.0
114 | pandas==2.2.2
115 | pathspec==0.12.1
116 | pendulum==3.0.0
117 | pip3-autoremove==1.2.2
118 | platformdirs==4.2.2
119 | plotext==5.2.8
120 | pluggy==1.5.0
121 | prettytable==3.11.0
122 | prison==0.2.1
123 | prompt_toolkit==3.0.47
124 | propcache==0.2.1
125 | protobuf==5.29.1
126 | psutil==6.1.0
127 | py==1.11.0
128 | pycarlo==0.9.15
129 | pycognito==2024.5.1
130 | pycparser==2.22
131 | pycryptodome==3.20.0
132 | Pygments==2.18.0
133 | PyJWT==2.8.0
134 | python-box==7.1.1
135 | python-daemon==3.1.2
136 | python-dateutil==2.9.0.post0
137 | python-dotenv==1.0.1
138 | python-nvd3==0.16.0
139 | python-slugify==8.0.4
140 | pytz==2024.1
141 | PyYAML==6.0.1
142 | questionary==1.10.0
143 | referencing==0.35.1
144 | requests==2.32.3
145 | requests-toolbelt==1.0.0
146 | responses==0.25.3
147 | retry==0.9.2
148 | rfc3339-validator==0.1.4
149 | rich==13.8.0
150 | rich-argparse==1.6.0
151 | rpds-py==0.22.3
152 | s3transfer==0.10.2
153 | setproctitle==1.3.4
154 | setuptools==75.8.0
155 | sgqlc==16.3
156 | six==1.16.0
157 | sniffio==1.3.1
158 | SQLAlchemy==1.4.54
159 | SQLAlchemy-JSONField==1.0.2
160 | SQLAlchemy-Utils==0.41.2
161 | sqlparse==0.5.3
162 | tabulate==0.9.0
163 | tenacity==9.0.0
164 | termcolor==2.5.0
165 | text-unidecode==1.3
166 | time-machine==2.16.0
167 | typing-inspect==0.9.0
168 | typing_extensions==4.11.0
169 | tzdata==2024.1
170 | uc-micro-py==1.0.3
171 | universal_pathlib==0.2.6
172 | urllib3==2.3.0
173 | wcwidth==0.2.13
174 | Werkzeug==2.2.3
175 | wirerope==0.4.8
176 | wrapt==1.17.0
177 | WTForms==3.2.1
178 | yarl==1.18.3
179 | zipp==3.21.0
180 | 


--------------------------------------------------------------------------------
/tables/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging.config
 3 | import subprocess
 4 | import textwrap
 5 | import traceback
 6 | import shutil
 7 | import yaml
 8 | import lib.helpers.constants as const
 9 | from contextlib import nullcontext
10 | from lib.util import Monitors, Tables, Admin
11 | from pathlib import Path
12 | from lib.helpers.logs import LoggingConfigs, LogHelper, LogRotater, LOGGER
13 | from lib.helpers import sdk_helpers
14 | from pycarlo.core import Mutation
15 | from rich.progress import Progress
16 | from rich import print
17 | 


--------------------------------------------------------------------------------
/tables/link_tables_via_descriptions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  4 | from tables import *
  5 | from lib.helpers import sdk_helpers
  6 | 
  7 | # Initialize logger
  8 | util_name = os.path.basename(__file__).split('.')[0]
  9 | logging.config.dictConfig(LoggingConfigs.logging_configs(util_name))
 10 | 
 11 | 
 12 | class LinkTableViewUtility(Tables):
 13 | 
 14 |     def __init__(self, profile, config_file: str = None, progress: Progress = None):
 15 |         """Creates an instance of LinkTableViewUtility.
 16 | 
 17 |         Args:
 18 |             profile(str): Profile to use stored in montecarlo test.
 19 |             config_file (str): Path to the Configuration File.
 20 |             progress(Progress): Progress bar.
 21 |         """
 22 | 
 23 |         super().__init__(profile,  config_file, progress)
 24 |         self.progress_bar = progress
 25 | 
 26 |     def retrieve_assets(self, source):
 27 |         """Helper function to retrieve assets for a given source.
 28 | 
 29 |         Args:
 30 |             source(str): db:schema
 31 |         """
 32 | 
 33 |         assets = None
 34 |         if ':' not in source:
 35 |             LOGGER.error("source must be in db:schema format")
 36 |         else:
 37 |             db, schema = source.split(':')
 38 |             LOGGER.info(f"retrieving assets under {source}...")
 39 |             assets, _ = self.get_tables_in_db_schema(db, schema)
 40 |             if not assets:
 41 |                 LOGGER.error(f"no assets found in {source}")
 42 | 
 43 |         return assets
 44 | 
 45 |     def map_assets(self, source_a: str, source_b: str) -> dict:
 46 |         """Maps assets from a schema to another
 47 | 
 48 |         Args:
 49 |             source_a(str): db:schema
 50 |             source_b(str): db:schema
 51 |         """
 52 | 
 53 |         asset_map = {}
 54 | 
 55 |         source_a_assets = self.retrieve_assets(source_a)
 56 |         if source_a_assets is None:
 57 |             return asset_map
 58 | 
 59 |         source_b_assets = self.retrieve_assets(source_b)
 60 |         if source_b_assets is None:
 61 |             return asset_map
 62 | 
 63 |         for mcon_a in source_a_assets:
 64 |             asset_a = mcon_a.split('++')[-1].split('.')[-1]
 65 |             for mcon_b in source_b_assets:
 66 |                 asset_b = mcon_b.split('++')[-1].split('.')[-1]
 67 |                 if asset_a == asset_b:
 68 |                     LOGGER.debug(f"asset with name {asset_a} matched")
 69 |                     asset_map[mcon_a] = mcon_b
 70 |                     break
 71 | 
 72 |         LOGGER.info(f"{len(asset_map)} assets mapped")
 73 | 
 74 |         return asset_map
 75 | 
 76 |     def set_table_descriptions(self, assets: dict):
 77 |         """ """
 78 | 
 79 |         LOGGER.info(f"setting descriptions for the {len(assets)} matches")
 80 |         for k, v in assets.items():
 81 |             description = f"### Mapping: https://getmontecarlo.com/assets/{v}"
 82 |             LOGGER.debug(f"updating asset - {k.split('++')[-1].split('.')[-1]}")
 83 |             response = self.auth.client(self.update_asset_description(k, description)).create_or_update_catalog_object_metadata
 84 |             if not response:
 85 |                 LOGGER.error(f"unable to set description on {k}")
 86 |             description = f"### Mapping: https://getmontecarlo.com/assets/{k}"
 87 |             LOGGER.debug(f"updating asset - {v.split('++')[-1].split('.')[-1]}")
 88 |             response = self.auth.client(self.update_asset_description(v, description)).create_or_update_catalog_object_metadata
 89 |             if not response:
 90 |                 LOGGER.error(f"unable to set description on {k}")
 91 | 
 92 | 
 93 | def main(*args, **kwargs):
 94 | 
 95 |     # Capture Command Line Arguments
 96 |     parser = sdk_helpers.generate_arg_parser(os.path.basename(os.path.dirname(os.path.abspath(__file__))),
 97 |                                              os.path.basename(__file__))
 98 | 
 99 |     if not args:
100 |         args = parser.parse_args(*args, **kwargs)
101 |     else:
102 |         sdk_helpers.dump_help(parser, main, *args)
103 |         args = parser.parse_args(*args, **kwargs)
104 | 
105 |     @sdk_helpers.ensure_progress
106 |     def run_utility(progress, util, args):
107 |         util.progress_bar = progress
108 |         util.set_table_descriptions(util.map_assets(args.a_source, args.b_source))
109 | 
110 |     util = LinkTableViewUtility(args.profile)
111 |     run_utility(util, args)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/utility.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monte-carlo-data/monte-carlo-python-sdk-examples/074216990a85ea21b1e1fd0a6f6d35a6e6c428da/utility.png


--------------------------------------------------------------------------------